From b077aa1cabbe778c5868eb7e5df59d37d6b574d3 Mon Sep 17 00:00:00 2001
From: djc <1462492739@qq.com>
Date: Sun, 22 Aug 2021 16:26:45 +0800
Subject: [PATCH] [feat] [assistant] [I3T96T] add new Dataset operator
 CMUARCTICDataset

---
 CMakeLists.txt                                |   3 +
 build.sh                                      |  12 +-
 cmake/external_libs/flatbuffers.cmake         |   6 +-
 cmake/mind_expression.cmake                   |   1 -
 cmake/options.cmake                           |   1 +
 cmake/package.cmake                           |  13 +-
 cmake/package_script.cmake                    |  55 ++
 cmake/package_tar.cmake                       |  12 -
 cmake/package_win.cmake                       |  13 -
 docker/OWNERS                                 |   4 +-
 docker/mindspore-cpu/devel/Dockerfile         |   3 +
 docker/mindspore-cpu/runtime/Dockerfile       |   5 +-
 docker/mindspore-gpu/devel/Dockerfile         |   7 +-
 docker/mindspore-gpu/runtime/Dockerfile       |   7 +-
 include/api/context.h                         | 134 ++-
 include/api/model.h                           |  47 +-
 include/api/serialization.h                   |  32 +-
 include/api/types.h                           |  90 +-
 mindspore/_checkparam.py                      |   5 +
 .../graph_kernel/parallel_estimate.py         |   4 +-
 mindspore/_extends/graph_kernel/splitter.py   |   2 +-
 .../akg_compiler/akg_process.py               |   5 -
 mindspore/_extends/parse/parser.py            |   7 +-
 mindspore/_extends/parse/standard_method.py   |  12 +-
 mindspore/ccsrc/CMakeLists.txt                |   2 +-
 .../backend/kernel_compiler/CMakeLists.txt    |   1 +
 .../kernel_compiler/akg/akg_kernel_build.cc   | 417 ++++++++-
 .../kernel_compiler/akg/akg_kernel_build.h    |  75 ++
 .../akg/ascend/akg_ascend_kernel_build.cc     |   2 +
 .../akg/ascend/akg_ascend_kernel_mod.cc       |  13 +-
 .../akg/gpu/akg_gpu_kernel_build.cc           |   2 +
 .../akg/gpu/akg_gpu_kernel_mod.cc             |   8 +-
 .../akg/gpu/akg_gpu_kernel_mod.h              |   1 +
 .../backend/kernel_compiler/common_utils.cc   |  32 +-
 .../backend/kernel_compiler/common_utils.h    |   3 +-
 .../cpu/adam_weight_decay_cpu_kernel.cc       |  85 +-
 .../cpu/adam_weight_decay_cpu_kernel.h        |   2 +-
 .../cpu/apply_adagrad_cpu_kernel.cc           |  25 +-
 .../cpu/arithmetic_logic_cpu_kernel.cc        |  32 +-
 .../backend/kernel_compiler/cpu/cpu_kernel.h  |   2 +-
 .../kernel_compiler/cpu/cpu_kernel_factory.cc |   6 +
 .../cpu/dropout_grad_kernel.cc                |  11 +-
 .../kernel_compiler/cpu/dropout_grad_kernel.h |   2 +-
 .../cpu/eltwise_grad_cpu_kernel.cc            |  97 ++-
 .../cpu/eltwise_grad_cpu_kernel.h             |   7 +-
 .../cpu/mkldnn/eltwise_cpu_kernel.cc          |  52 +-
 .../cpu/mkldnn/eltwise_cpu_kernel.h           |   4 +-
 .../kernel_compiler/cpu/nnacl/CMakeLists.txt  |  18 +
 .../cpu/nnacl/assembly/opt/MatmulDpInt8.S     |   3 +-
 .../cpu/nnacl/assembly/opt/MatmulDpInt8Opt.S  |   5 +-
 .../cpu/nnacl/base/batch_to_space_base.c      |  39 +-
 .../cpu/nnacl/base/broadcast_to.c             |   2 +-
 .../cpu/nnacl/base/concat_base.c              |   9 +-
 .../cpu/nnacl/base/depth_to_space_base.c      |  16 +-
 .../nnacl/base/minimal_filtering_generator.c  |   4 +-
 .../cpu/nnacl/base/slice_base.c               |  64 +-
 .../cpu/nnacl/base/split_base.c               |   4 -
 .../cpu/nnacl/base/tile_base.c                |  10 +-
 .../cpu/nnacl/base/tile_base.h                |  14 +-
 .../cpu/nnacl/base/transpose_base.c           |   4 +-
 .../kernel_compiler/cpu/nnacl/common_func.c   |  10 +-
 .../kernel_compiler/cpu/nnacl/common_func.h   |   7 +-
 .../kernel_compiler/cpu/nnacl/fp16/pad_fp16.c |  14 +-
 .../cpu/nnacl/fp32/adam_fp32.c                |  20 +-
 .../cpu/nnacl/fp32/adam_fp32.h                |   8 +-
 .../cpu/nnacl/fp32/arg_min_max_fp32.c         |  50 +-
 .../cpu/nnacl/fp32/common_func_fp32.c         |   4 +-
 .../cpu/nnacl/fp32/conv_depthwise_fp32.c      |   7 +-
 .../cpu/nnacl/fp32/deconv_fp32.c              |   2 +-
 .../cpu/nnacl/fp32/embedding_lookup_fp32.c    |   4 +-
 .../cpu/nnacl/fp32/gatherNd_fp32.c            |   2 +-
 .../cpu/nnacl/fp32/lstm_fp32.c                |   2 +-
 .../cpu/nnacl/fp32/matmul_fp32.c              |  85 +-
 .../kernel_compiler/cpu/nnacl/fp32/pad_fp32.c |  17 +-
 .../cpu/nnacl/fp32/resize_fp32.c              |   4 +-
 .../cpu/nnacl/fp32/reverse_fp32.c             |   4 +-
 .../cpu/nnacl/fp32/scatter_nd_fp32.c          |   2 +-
 .../cpu/nnacl/fp32/splice_fp32.c              |   2 +-
 .../cpu/nnacl/fp32/strided_slice_fp32.c       |   4 +-
 .../cpu/nnacl/fp32/transpose_fp32.c           |   8 +-
 .../cpu/nnacl/fp32/winograd_transform.c       |   4 +-
 .../cpu/nnacl/fp32_grad/activation_grad.c     |  27 +-
 .../cpu/nnacl/fp32_grad/activation_grad.h     |   3 +-
 .../cpu/nnacl/fp32_grad/gemm.c                |  15 +-
 .../cpu/nnacl/fp32_grad/reduce_grad.c         |   4 +-
 .../cpu/nnacl/fp32_grad/softmax_grad.c        |   4 +-
 .../cpu/nnacl/fp32_grad/strided_slice_grad.c  |   4 +-
 .../cpu/nnacl/infer/addn_infer.c              |   6 +-
 .../cpu/nnacl/infer/affine_infer.c            |   4 +-
 .../cpu/nnacl/infer/argmin_max_infer.c        |   4 +-
 .../cpu/nnacl/infer/arithmetic_grad_infer.c   |  10 +-
 .../cpu/nnacl/infer/audio_spectrogram_infer.c |   2 +-
 .../cpu/nnacl/infer/bias_grad_infer.c         |   4 +-
 .../cpu/nnacl/infer/broadcast_to_infer.c      |   4 +-
 .../cpu/nnacl/infer/common_infer.c            | 113 +--
 .../cpu/nnacl/infer/common_infer.h            |  11 +-
 .../cpu/nnacl/infer/concat_infer.c            |   9 +-
 .../cpu/nnacl/infer/constant_of_shape_infer.c |   2 +-
 .../nnacl/infer/conv2d_grad_filter_infer.c    |   5 +-
 .../cpu/nnacl/infer/conv2d_grad_input_infer.c |   8 +-
 .../cpu/nnacl/infer/conv2d_infer.c            |   2 +
 .../cpu/nnacl/infer/deconv2d_infer.c          |   6 +-
 .../nnacl/infer/dedepthwise_conv2d_infer.c    |   2 +
 .../cpu/nnacl/infer/depthwise_conv2d_infer.c  |   2 +
 .../cpu/nnacl/infer/embedding_lookup_infer.c  |   3 +
 .../cpu/nnacl/infer/expand_dims_infer.c       |   5 +-
 .../cpu/nnacl/infer/fill_infer.c              |   4 +-
 .../cpu/nnacl/infer/full_connection_infer.c   |   4 +-
 .../cpu/nnacl/infer/gather_infer.c            |  10 +-
 .../cpu/nnacl/infer/gather_nd_infer.c         |   4 +-
 .../infer/group_conv2d_grad_input_infer.c     |   8 +-
 .../cpu/nnacl/infer/infer_register.c          |  26 +-
 .../cpu/nnacl/infer/infer_register.h          |   3 +-
 .../cpu/nnacl/infer/layer_norm_grad_infer.c   |   5 +-
 .../cpu/nnacl/infer/layer_norm_infer.c        |   6 +-
 .../cpu/nnacl/infer/lin_space_infer.c         |   3 +
 .../cpu/nnacl/infer/matmul_infer.c            |   5 +-
 .../cpu/nnacl/infer/max_min_grad_infer.c      |  10 +-
 .../cpu/nnacl/infer/mean_infer.c              |   5 +-
 .../cpu/nnacl/infer/pad_infer.c               |   4 +-
 .../cpu/nnacl/infer/prior_box_infer.c         |  11 +-
 .../cpu/nnacl/infer/range_infer.c             |   4 +-
 .../cpu/nnacl/infer/reduce_infer.c            |   9 +-
 .../cpu/nnacl/infer/reshape_infer.c           |  56 +-
 .../cpu/nnacl/infer/resize_infer.c            |   5 +-
 .../cpu/nnacl/infer/scatter_nd_infer.h        |   1 -
 .../cpu/nnacl/infer/select_infer.c            |   4 +
 .../cpu/nnacl/infer/slice_infer.c             |  55 +-
 .../cpu/nnacl/infer/space_to_batch_infer.c    |   2 +-
 .../cpu/nnacl/infer/space_to_batch_nd_infer.c |   2 +-
 .../cpu/nnacl/infer/split_infer.c             |  11 +-
 .../cpu/nnacl/infer/squeeze_infer.c           |   2 +-
 .../cpu/nnacl/infer/stack_infer.c             |   4 +-
 .../nnacl/infer/strided_slice_grad_infer.c    |  20 +-
 .../cpu/nnacl/infer/strided_slice_infer.c     |  52 +-
 .../cpu/nnacl/infer/tile_infer.c              |  20 +-
 .../cpu/nnacl/infer/transpose_infer.c         |   8 +-
 .../cpu/nnacl/infer/uniform_real_infer.c      |   2 +-
 .../nnacl/infer/unsorted_segment_sum_infer.c  |   2 +-
 .../cpu/nnacl/infer/unsqueeze_infer.c         |  11 +-
 .../cpu/nnacl/infer/unstack_infer.c           |   6 +-
 .../cpu/nnacl/infer/where_infer.c             |  11 +-
 .../cpu/nnacl/int8/arg_min_max_int8.c         |  52 +-
 .../cpu/nnacl/int8/arithmetic_self_int8.c     |   2 +-
 .../cpu/nnacl/int8/conv1x1_int8.c             |   5 +-
 .../cpu/nnacl/int8/conv1x1_int8.h             |   5 +-
 .../cpu/nnacl/int8/conv3x3_int8.c             |   6 +-
 .../cpu/nnacl/int8/deconv_int8.c              |  10 +-
 .../cpu/nnacl/int8/fixed_point.c              |   4 +-
 .../cpu/nnacl/int8/hswish_int8.c              |   2 +-
 .../cpu/nnacl/int8/matmul_int8.c              |  22 +-
 .../cpu/nnacl/int8/matmul_int8.h              |  12 +-
 .../kernel_compiler/cpu/nnacl/int8/mul_int8.c |  14 +-
 .../kernel_compiler/cpu/nnacl/int8/mul_int8.h |   7 +-
 .../cpu/nnacl/int8/pack_int8.c                |   3 +-
 .../cpu/nnacl/int8/pack_int8.h                |   2 +-
 .../kernel_compiler/cpu/nnacl/int8/pad_int8.c |   6 +-
 .../cpu/nnacl/int8/quant_dtype_cast_int8.c    |   2 +-
 .../cpu/nnacl/int8/resize_int8.c              |   8 +-
 .../cpu/nnacl/int8/scale_int8.c               |   4 +-
 .../kernel_compiler/cpu/nnacl/int8/sub_int8.c |   8 +-
 .../cpu/nnacl/int8/transpose_int8.c           |   8 +-
 .../cpu/nnacl/int8/unsqueeze_int8.c           |   2 +-
 .../cpu/nnacl/matmul_parameter.h              |  13 +-
 .../kernel_compiler/cpu/nnacl/op_base.h       |   2 +
 .../kernel_compiler/cpu/nnacl/pad_parameter.h |   8 +-
 .../kernel_compiler/cpu/ps/pserver_kernel.cc  |   5 +
 .../cpu/pyfunc/py_func_cpu_kernel.cc          |   3 +-
 .../cpu/searchsorted_cpu_kernel.cc            |  14 +-
 .../cpu/searchsorted_cpu_kernel.h             |   4 +-
 .../kernel_compiler/cpu/sgd_cpu_kernel.cc     |  18 +-
 .../kernel_compiler/cpu/sgd_cpu_kernel.h      |   4 +-
 .../kernel_compiler/cpu/sort_cpu_kernel.cc    |   6 +-
 .../kernel_compiler/cpu/split_cpu_kernel.cc   |   3 +-
 .../cpu/transpose_cpu_kernel.cc               |  14 +-
 .../kernel_compiler/cpu/unpack_cpu_kernel.cc  |  12 +-
 .../gpu/arrays/array_reduce_gpu_kernel.h      |   2 +-
 .../gpu/arrays/cast_gpu_kernel.h              |  16 +-
 .../gpu/arrays/concatv2_gpu_kernel.h          |  22 +-
 .../gpu/arrays/dynamic_range_gpu_kernel.h     |   2 +-
 .../gpu/arrays/dynamic_shape_gpu_kernel.h     |   4 +-
 .../gpu/arrays/one_hot_gpu_kernel.h           |   6 +-
 .../gpu/arrays/slice_gpu_kernel.h             |   2 +-
 .../gpu/arrays/slice_grad_gpu_kernel.h        |   4 +-
 .../gpu/arrays/strided_slice_gpu_common.h     |  18 +-
 .../gpu/arrays/strided_slice_gpu_kernel.h     |   4 +-
 .../backend/kernel_compiler/gpu/gpu_kernel.h  |  13 +
 .../gpu/nn/batch_norm_gpu_kernel.h            |  10 +-
 .../gpu/nn/batch_norm_grad_gpu_kernel.h       |  10 +-
 .../gpu/nn/bias_add_grad_gpu_kenel.h          |   4 +-
 .../gpu/nn/conv2d_gpu_kernel.h                |   5 +-
 .../gpu/nn/conv2d_grad_filter_gpu_kernel.h    |   7 +-
 .../gpu/nn/conv2d_grad_input_gpu_kernel.h     |   7 +-
 .../gpu/nn/conv3d_gpu_kernel.h                |   7 +-
 .../gpu/nn/conv3d_grad_filter_gpu_kernel.h    |   7 +-
 .../gpu/nn/conv3d_grad_input_gpu_kernel.h     |   5 +-
 .../gpu/nn/conv3d_transpose_gpu_kernel.h      |   7 +-
 .../gpu/nn/instance_norm_gpu_kernel.h         |   5 +-
 .../gpu/nn/instance_norm_grad_gpu_kernel.h    |   5 +-
 .../gpu/nn/l2normalize_gpu_kernel.h           |   4 +-
 .../gpu/nn/l2normalize_grad_gpu_kernel.h      |   8 +-
 .../kernel_compiler/gpu/nn/lstm_gpu_kernel.h  |   2 +-
 .../gpu/nn/lstm_grad_data_gpu_kernel.h        |   2 +-
 .../gpu/nn/nll_loss_gpu_kernel.h              |   2 +-
 .../gpu/other/assign_gpu_kernel.cc            |   3 +
 .../neighbor_list_update_new_kernel.cc        |   2 +-
 .../kernel_compiler/hccl/hccl_kernel.cc       |  40 +-
 .../kernel_compiler/hccl/hccl_kernel.h        |   4 +-
 .../hccl/hccl_kernel_metadata.cc              |  10 +-
 .../hccl/hcom_all_broadcast.cc                |   1 +
 .../kernel_compiler/hccl/hcom_all_gather.cc   |  22 +-
 .../kernel_compiler/hccl/hcom_all_gather.h    |   1 -
 .../kernel_compiler/hccl/hcom_all_reduce.cc   |  11 +-
 .../hccl/hcom_all_reduce_scatter.cc           |  22 +-
 .../hccl/hcom_all_reduce_scatter.h            |   1 -
 .../kernel_compiler/hccl/hcom_receive.cc      |  20 +-
 .../backend/kernel_compiler/hccl/hcom_send.cc |  21 +-
 .../backend/kernel_compiler/hccl/hcom_util.cc |  53 +-
 .../backend/kernel_compiler/hccl/hcom_util.h  |   6 +-
 .../kernel_compiler/kash/kernel_pack.cc       |  10 +
 .../backend/kernel_compiler/kernel_query.cc   |   6 +
 .../tbe/tbe_dynaminc_shape_util.cc            |   4 +-
 .../tbe/tbe_json/tbe_json_creator.cc          |   6 +-
 .../tbe/tbe_json/tbe_json_creator.h           |   2 +
 .../enhancer/concat_outputs_for_all_gather.cc |   2 +-
 .../insert_tensor_move_for_hccl_op.cc         |  30 +-
 .../enhancer/insert_tensor_move_for_hccl_op.h |   2 +-
 .../change_axis_of_reduce_kernel.cc           |  15 +
 .../ascend/mindir/all_to_all_unify_mindir.cc  |   2 +-
 .../ccsrc/backend/optimizer/common/helper.cc  |  14 +-
 .../backend/optimizer/cpu/insert_cast_cpu.cc  |  38 +
 .../gpu/concat_outputs_for_all_gather.cc      |   2 +-
 .../gpu/insert_format_transform_op.cc         |  10 +-
 .../graph_kernel/add_atomic_clean.cc          |   2 +-
 .../add_stitch_atomic_clean_gpu.cc            |   2 +-
 .../graph_kernel/arithmetic_simplify.cc       |  48 +-
 .../graph_kernel/graph_kernel_cse.cc          |   4 +-
 .../graph_kernel/graph_kernel_helper.cc       |   2 +-
 .../graph_kernel/graph_kernel_optimization.cc |  20 +-
 .../graph_kernel/graph_kernel_splitter.cc     |   7 +-
 .../graph_kernel/model/lite_graph.cc          |  32 +-
 .../optimizer/graph_kernel/model/lite_graph.h |  22 -
 .../optimizer/graph_kernel/model/node.h       |  12 +
 .../optimizer/graph_kernel/model/op_node.cc   | 350 +++++++-
 .../optimizer/graph_kernel/model/op_node.h    | 163 +++-
 .../mem_reuse/mem_dynamic_allocator.cc        |  31 +-
 .../mem_reuse/mem_dynamic_allocator.h         |   5 +-
 .../optimizer/pass/communication_op_fusion.cc |   2 +-
 .../pass/convert_const_input_to_attr.cc       |  23 +-
 .../convert_tuple_input_to_dynamic_input.cc   |   2 +-
 .../optimizer/pass/optimize_dependence.cc     |   3 +
 .../backend/session/anf_runtime_algorithm.cc  | 106 +--
 .../ccsrc/backend/session/ascend_session.cc   |   8 +-
 mindspore/ccsrc/backend/session/executor.cc   |   4 +-
 .../ccsrc/backend/session/gpu_session.cc      |   8 +-
 .../ccsrc/backend/session/kernel_graph.cc     |   9 +-
 .../ccsrc/backend/session/kernel_graph.h      |   9 +-
 .../ccsrc/backend/session/session_basic.cc    |  45 +-
 .../ccsrc/backend/session/session_basic.h     |   3 +-
 mindspore/ccsrc/common/trans.cc               |   6 +-
 .../ccsrc/cxx_api/graph/acl/acl_graph_impl.cc |   9 +-
 .../ccsrc/cxx_api/graph/acl/model_process.cc  |  17 +-
 .../ccsrc/cxx_api/graph/acl/model_process.h   |   2 +-
 .../cxx_api/model/acl/model_converter.cc      |   4 +-
 .../ccsrc/cxx_api/model/acl/model_converter.h |   5 +-
 mindspore/ccsrc/cxx_api/model/model.cc        |   8 +-
 .../model_converter_utils/multi_process.cc    |  39 +-
 .../model_converter_utils/multi_process.h     |   8 +-
 .../model_converter_utils/shared_memory.h     |   4 +-
 mindspore/ccsrc/cxx_api/types.cc              |  14 +-
 mindspore/ccsrc/debug/anf_ir_dump.cc          |   6 +-
 mindspore/ccsrc/debug/anf_ir_utils.cc         |   3 +-
 mindspore/ccsrc/debug/common.cc               |  48 +-
 .../ccsrc/debug/data_dump/dump_json_parser.cc |  35 +-
 mindspore/ccsrc/debug/debug_services.cc       | 128 +--
 .../ccsrc/debug/debugger/debug_grpc.proto     |   6 +
 mindspore/ccsrc/debug/debugger/debugger.cc    |  68 +-
 mindspore/ccsrc/debug/debugger/debugger.h     |   7 +
 mindspore/ccsrc/debug/debugger/grpc_client.cc |  15 +
 mindspore/ccsrc/debug/debugger/grpc_client.h  |   3 +
 .../debugger/offline_debug/dbg_services.cc    |   2 +-
 .../debugger/offline_debug/offline_logger.h   |  18 +-
 .../ccsrc/debug/debugger/proto_exporter.cc    |   3 +-
 mindspore/ccsrc/debug/dump_proto.cc           |   3 +-
 mindspore/ccsrc/debug/env_config_parser.cc    |   3 +-
 mindspore/ccsrc/debug/tensor_data.h           |  19 +-
 mindspore/ccsrc/debug/trace.cc                |  20 +-
 .../ccsrc/fl/server/consistent_hash_ring.cc   |   2 +
 .../fl/server/distributed_count_service.cc    |   1 +
 mindspore/ccsrc/fl/server/executor.cc         |  17 +-
 mindspore/ccsrc/fl/server/executor.h          |  12 +-
 mindspore/ccsrc/fl/server/iteration_timer.cc  |   4 +-
 .../server/kernel/dense_grad_accum_kernel.h   |   2 +
 .../ccsrc/fl/server/kernel/fed_avg_kernel.h   |   8 +
 .../ccsrc/fl/server/kernel/optimizer_kernel.h |   2 +-
 .../server/kernel/round/pull_weight_kernel.cc |   2 +-
 .../server/kernel/round/push_weight_kernel.cc |   2 +-
 .../round/reconstruct_secrets_kernel.cc       |  10 +-
 mindspore/ccsrc/fl/server/model_store.cc      |   4 -
 .../ccsrc/fl/server/parameter_aggregator.cc   |  31 +-
 .../ccsrc/fl/server/parameter_aggregator.h    |  12 +-
 mindspore/ccsrc/fl/server/round.cc            |   6 +-
 mindspore/ccsrc/fl/server/server.h            |  10 +-
 .../ccsrc/frontend/optimizer/ad/dfunctor.cc   |  58 +-
 mindspore/ccsrc/frontend/optimizer/irpass.cc  |  25 +-
 mindspore/ccsrc/frontend/optimizer/irpass.h   |  11 +-
 .../optimizer/irpass/symbol_resolver.h        | 118 +--
 .../optimizer/irpass/updatestate_eliminate.cc | 174 ++--
 .../optimizer/irpass/updatestate_eliminate.h  |  39 +-
 mindspore/ccsrc/frontend/optimizer/opt.cc     |  62 +-
 mindspore/ccsrc/frontend/optimizer/opt.h      |  28 +-
 .../rec_core/rec_generate_strategy.cc         |  49 +-
 .../rec_core/rec_generate_strategy.h          |   3 +
 .../parallel/graph_util/generate_graph.cc     |   2 +-
 .../parallel/graph_util/generate_graph.h      |   2 +-
 .../frontend/parallel/ops_info/conv2d_info.cc | 195 +++--
 .../frontend/parallel/ops_info/conv2d_info.h  |   9 +-
 .../parallel/ops_info/gatherd_info.cc         |  16 +
 .../frontend/parallel/ops_info/gatherd_info.h |   1 +
 .../frontend/parallel/ops_info/ops_utils.h    |   3 +
 .../parallel/ops_info/virtual_output_info.cc  |  10 +-
 .../ccsrc/frontend/parallel/step_parallel.cc  |  48 +-
 .../ccsrc/frontend/parallel/step_parallel.h   |   9 +
 .../ccsrc/minddata/dataset/CMakeLists.txt     |   2 +
 mindspore/ccsrc/minddata/dataset/api/audio.cc | 112 +++
 .../ccsrc/minddata/dataset/api/datasets.cc    |  43 +-
 .../dataset/audio/kernels/ir/bindings.cc      |  90 ++
 .../dataset/engine/ir/datasetops/bindings.cc  |  22 +-
 .../engine/ir/datasetops/source/bindings.cc   |  14 +-
 .../dataset/kernels/ir/image/bindings.cc      |  12 +
 .../ccsrc/minddata/dataset/api/vision.cc      |  14 +
 .../minddata/dataset/audio/ir/CMakeLists.txt  |   2 +
 .../dataset/audio/ir/kernels/CMakeLists.txt   |   7 +
 .../audio/ir/kernels/allpass_biquad_ir.cc     |   9 +-
 .../audio/ir/kernels/allpass_biquad_ir.h      |   5 +-
 .../audio/ir/kernels/amplitude_to_db_ir.cc    |  10 +-
 .../dataset/audio/ir/kernels/angle_ir.cc      |   3 +-
 .../dataset/audio/ir/kernels/angle_ir.h       |   3 +-
 .../audio/ir/kernels/bandpass_biquad_ir.cc    |   7 +-
 .../audio/ir/kernels/bandpass_biquad_ir.h     |   4 +-
 .../audio/ir/kernels/bandreject_biquad_ir.cc  |   6 +-
 .../audio/ir/kernels/bandreject_biquad_ir.h   |   4 +-
 .../audio/ir/kernels/bass_biquad_ir.cc        |   6 +-
 .../dataset/audio/ir/kernels/bass_biquad_ir.h |   1 +
 .../audio/ir/kernels/time_stretch_ir.cc       |  13 +-
 .../minddata/dataset/audio/ir/validators.cc   |  72 +-
 .../minddata/dataset/audio/ir/validators.h    |  28 +
 .../dataset/audio/kernels/CMakeLists.txt      |   9 +-
 .../audio/kernels/allpass_biquad_op.cc        |  18 +-
 .../dataset/audio/kernels/allpass_biquad_op.h |   1 -
 .../audio/kernels/amplitude_to_db_op.cc       |  13 +-
 .../audio/kernels/amplitude_to_db_op.h        |   1 -
 .../dataset/audio/kernels/angle_op.cc         |   6 +-
 .../minddata/dataset/audio/kernels/angle_op.h |   1 -
 .../dataset/audio/kernels/audio_utils.cc      | 254 ++----
 .../dataset/audio/kernels/audio_utils.h       |  48 ++
 .../audio/kernels/bandpass_biquad_op.cc       |  17 +-
 .../audio/kernels/bandpass_biquad_op.h        |   3 +-
 .../audio/kernels/bandreject_biquad_op.cc     |  19 +-
 .../audio/kernels/bandreject_biquad_op.h      |   1 -
 .../dataset/audio/kernels/bass_biquad_op.cc   |  11 +-
 .../dataset/audio/kernels/bass_biquad_op.h    |   3 +-
 .../dataset/audio/kernels/time_stretch_op.cc  |  13 +-
 .../dataset/audio/kernels/time_stretch_op.h   |   4 +-
 .../dataset/callback/py_ds_callback.cc        |  12 +-
 .../dataset/callback/py_ds_callback.h         |  12 +-
 .../ccsrc/minddata/dataset/core/cv_tensor.cc  |  20 +-
 .../ccsrc/minddata/dataset/core/cv_tensor.h   |   3 +-
 .../ccsrc/minddata/dataset/core/data_type.cc  |   2 +-
 .../ccsrc/minddata/dataset/core/de_tensor.cc  |   2 +-
 .../ccsrc/minddata/dataset/core/tensor.cc     |  98 ++-
 .../ccsrc/minddata/dataset/core/tensor.h      |  17 +-
 .../dataset/engine/cache/cache_grpc_client.cc |   2 +-
 .../dataset/engine/consumers/tree_consumer.cc |   2 +-
 .../minddata/dataset/engine/data_schema.cc    |   2 +-
 .../dataset/engine/datasetops/rename_op.cc    |  19 +-
 .../engine/datasetops/source/CMakeLists.txt   |   1 +
 .../engine/datasetops/source/album_op.cc      |  18 +-
 .../engine/datasetops/source/album_op.h       |  18 +-
 .../engine/datasetops/source/cifar_op.cc      |   2 +-
 .../engine/datasetops/source/cmu_arctic_op.cc | 254 ++++++
 .../engine/datasetops/source/cmu_arctic_op.h  | 126 +++
 .../engine/datasetops/source/flickr_op.cc     |  26 +-
 .../engine/datasetops/source/mindrecord_op.cc |   2 +-
 .../engine/ir/cache/dataset_cache_impl.cc     |  16 +-
 .../engine/ir/datasetops/dataset_node.h       |   3 +-
 .../engine/ir/datasetops/epoch_ctrl_node.cc   |   4 +-
 .../engine/ir/datasetops/epoch_ctrl_node.h    |   4 +-
 .../ir/datasetops/source/CMakeLists.txt       |   1 +
 .../engine/ir/datasetops/source/album_node.cc |   2 +-
 .../engine/ir/datasetops/source/album_node.h  |   2 +-
 .../ir/datasetops/source/cmu_arctic_node.cc   | 107 +++
 .../ir/datasetops/source/cmu_arctic_node.h    |  76 ++
 .../engine/ir/datasetops/source/mnist_node.cc |   2 +-
 .../engine/ir/datasetops/source/mnist_node.h  |   2 +-
 .../ir/datasetops/source/random_node.cc       |   2 +-
 .../engine/ir/datasetops/source/random_node.h |   2 +-
 .../ir/datasetops/source/tf_record_node.cc    |   4 +-
 .../ir/datasetops/source/tf_record_node.h     |   4 +-
 .../engine/ir/datasetops/transfer_node.cc     |   3 +
 .../dataset/engine/opt/post/repeat_pass.cc    |   2 +-
 .../dataset/engine/opt/post/repeat_pass.h     |   2 +-
 .../ccsrc/minddata/dataset/engine/serdes.cc   | 246 +++---
 .../ccsrc/minddata/dataset/engine/serdes.h    |  21 +-
 .../minddata/dataset/include/dataset/audio.h  | 154 ++++
 .../dataset/include/dataset/constants.h       |   6 +
 .../dataset/include/dataset/datasets.h        | 166 ++--
 .../dataset/include/dataset/samplers.h        |   3 +-
 .../minddata/dataset/include/dataset/vision.h |  26 +-
 .../dataset/kernels/image/CMakeLists.txt      |   1 +
 .../dataset/kernels/image/adjust_gamma_op.cc  |   6 +-
 .../minddata/dataset/kernels/image/crop_op.cc |  12 +-
 .../dataset/kernels/image/hwc_to_chw_op.cc    |   8 +-
 .../dataset/kernels/image/image_utils.cc      |  77 +-
 .../dataset/kernels/image/image_utils.h       |  10 +
 .../kernels/image/lite_cv/image_process.cc    |   6 +-
 .../kernels/image/lite_cv/warp_affine.cc      |   4 +-
 .../dataset/kernels/image/posterize_op.cc     |   3 +-
 .../dataset/kernels/image/random_color_op.cc  |   2 +-
 .../image/random_crop_and_resize_op.cc        |  12 +-
 .../dataset/kernels/image/random_crop_op.cc   |  12 +-
 .../dataset/kernels/image/resize_op.cc        |  12 +-
 .../dataset/kernels/image/sharpness_op.cc     |   2 +-
 ..._dvpp_decode_random_crop_resize_jpeg_op.cc |   3 +-
 .../soft_dvpp_decode_resize_jpeg_op.cc        |   3 +-
 .../dataset/kernels/image/solarize_op.cc      |   2 +-
 .../dataset/kernels/image/uniform_aug_op.h    |   2 +-
 .../dataset/kernels/ir/data/transforms_ir.cc  |  21 +
 .../dataset/kernels/ir/data/transforms_ir.h   |  13 +-
 .../minddata/dataset/kernels/ir/validators.cc |   5 +
 .../minddata/dataset/kernels/ir/validators.h  |   3 +
 .../dataset/kernels/ir/vision/CMakeLists.txt  |   1 +
 .../kernels/ir/vision/adjust_gamma_ir.cc      |   2 +
 .../dataset/kernels/ir/vision/affine_ir.cc    |  12 +-
 .../kernels/ir/vision/auto_contrast_ir.cc     |   4 +-
 .../kernels/ir/vision/center_crop_ir.cc       |   2 +-
 .../dataset/kernels/ir/vision/crop_ir.cc      |  15 +
 .../dataset/kernels/ir/vision/crop_ir.h       |   4 +
 .../kernels/ir/vision/cutmix_batch_ir.cc      |   6 +-
 .../dataset/kernels/ir/vision/cutout_ir.cc    |   4 +-
 .../dataset/kernels/ir/vision/decode_ir.cc    |   2 +-
 .../kernels/ir/vision/gaussian_blur_ir.cc     |   4 +-
 .../kernels/ir/vision/mixup_batch_ir.cc       |   2 +-
 .../kernels/ir/vision/normalize_pad_ir.cc     |   6 +-
 .../dataset/kernels/ir/vision/pad_ir.cc       |   6 +-
 .../kernels/ir/vision/random_affine_ir.cc     |  13 +-
 .../ir/vision/random_color_adjust_ir.cc       |   9 +-
 .../kernels/ir/vision/random_color_ir.cc      |   2 +-
 .../ir/vision/random_crop_decode_resize_ir.cc |  10 +-
 .../kernels/ir/vision/random_crop_ir.cc       |  10 +-
 .../ir/vision/random_crop_with_bbox_ir.cc     |  10 +-
 .../ir/vision/random_horizontal_flip_ir.cc    |   2 +-
 .../random_horizontal_flip_with_bbox_ir.cc    |   2 +-
 .../kernels/ir/vision/random_posterize_ir.cc  |   2 +-
 .../kernels/ir/vision/random_resize_ir.cc     |   2 +-
 .../ir/vision/random_resize_with_bbox_ir.cc   |   2 +-
 .../ir/vision/random_resized_crop_ir.cc       |  10 +-
 .../random_resized_crop_with_bbox_ir.cc       |  10 +-
 .../kernels/ir/vision/random_rotation_ir.cc   |  10 +-
 .../kernels/ir/vision/random_sharpness_ir.cc  |   2 +-
 .../kernels/ir/vision/random_solarize_ir.cc   |   4 +-
 .../ir/vision/random_vertical_flip_ir.cc      |   2 +-
 .../random_vertical_flip_with_bbox_ir.cc      |   2 +-
 .../dataset/kernels/ir/vision/rescale_ir.cc   |   4 +-
 .../dataset/kernels/ir/vision/resize_ir.cc    |   4 +-
 .../ir/vision/resize_preserve_ar_ir.cc        |   6 +-
 .../kernels/ir/vision/resize_with_bbox_ir.cc  |   4 +-
 .../kernels/ir/vision/rgb_to_bgr_ir.cc        |   5 +
 .../dataset/kernels/ir/vision/rgb_to_bgr_ir.h |   2 +
 .../kernels/ir/vision/rgb_to_gray_ir.cc       |   6 +
 .../kernels/ir/vision/rgb_to_gray_ir.h        |   2 +
 .../kernels/ir/vision/rgba_to_bgr_ir.cc       |   1 -
 .../dataset/kernels/ir/vision/rotate_ir.cc    |  12 +-
 .../kernels/ir/vision/slice_patches_ir.cc     |  12 +
 .../kernels/ir/vision/slice_patches_ir.h      |   2 +
 ...tdvpp_decode_random_crop_resize_jpeg_ir.cc |  10 +-
 .../vision/softdvpp_decode_resize_jpeg_ir.cc  |   4 +-
 .../kernels/ir/vision/vertical_flip_ir.cc     |   6 +
 .../kernels/ir/vision/vertical_flip_ir.h      |   2 +
 .../minddata/dataset/kernels/tensor_op.h      |   8 +
 .../dataset/text/ir/kernels/text_ir.cc        |   7 +
 .../dataset/text/ir/kernels/text_ir.h         |   2 +
 .../mindrecord/io/shard_index_generator.cc    |   2 +-
 .../minddata/mindrecord/io/shard_reader.cc    |   4 +-
 mindspore/ccsrc/pipeline/jit/action.cc        |  47 +-
 .../pipeline/jit/parse/function_block.cc      |  15 +-
 .../ccsrc/pipeline/jit/parse/function_block.h |   2 +-
 mindspore/ccsrc/pipeline/jit/parse/parse.cc   | 267 ++++--
 mindspore/ccsrc/pipeline/jit/parse/parse.h    | 148 ++--
 mindspore/ccsrc/pipeline/jit/parse/resolve.cc |  16 +-
 mindspore/ccsrc/pipeline/jit/parse/resolve.h  |   2 +-
 mindspore/ccsrc/pipeline/jit/pass.cc          |  59 +-
 mindspore/ccsrc/pipeline/jit/pipeline.cc      |   5 +-
 mindspore/ccsrc/pipeline/jit/resource.h       |   4 +
 .../jit/static_analysis/order_enforce.cc      |  12 +-
 .../pipeline/pynative/pynative_execute.cc     |   1 +
 mindspore/ccsrc/profiler/device/data_saver.cc |  13 +-
 mindspore/ccsrc/profiler/device/data_saver.h  |   9 +
 .../profiler/device/gpu/gpu_data_saver.cc     |   4 +
 .../profiler/device/gpu/gpu_profiling.cc      |   4 +
 .../ps/core/communicator/http_communicator.cc |   8 +-
 .../ps/core/communicator/tcp_communicator.cc  |  16 +-
 .../ps/core/communicator/tcp_communicator.h   |   2 +-
 mindspore/ccsrc/ps/optimizer_info.cc          |  47 +-
 mindspore/ccsrc/ps/optimizer_info_builder.cc  |   3 +
 .../ccsrc/ps/ps_cache/ps_cache_manager.cc     |  15 +-
 .../ps/ps_cache/ps_data/ps_data_prefetch.cc   |   1 +
 mindspore/ccsrc/ps/ps_context.cc              |   3 +-
 mindspore/ccsrc/ps/ps_context.h               |   7 +-
 .../ccsrc/pybind_api/ir/param_info_py.cc      |   1 +
 mindspore/ccsrc/runtime/device/CMakeLists.txt |   2 +-
 .../runtime/device/ascend/ascend_bucket.cc    |   4 +-
 .../device/ascend/ascend_device_address.cc    |   3 +
 .../runtime/device/ascend/ascend_event.cc     |   4 +
 .../device/ascend/ascend_kernel_runtime.cc    |  59 +-
 .../device/ascend/ascend_stream_assign.cc     |  36 +-
 .../device/ascend/ascend_stream_assign.h      |   3 +-
 .../ascend/executor/ai_core_dynamic_kernel.cc |  19 +-
 .../ascend/executor/ai_cpu_dynamic_kernel.cc  |   2 +-
 .../executor/tiling/op_tiling_adapter.cc      |  14 +-
 .../executor/tiling/op_tiling_adapter.h       |   4 +-
 .../device/ascend/kernel_select_ascend.cc     |   2 +-
 .../profiling/profiling_callback_register.h   |   2 +-
 .../ascend/profiling/profiling_manager.cc     |   1 -
 .../ascend/profiling/profiling_manager.h      |   2 +-
 .../runtime/device/cpu/kernel_select_cpu.cc   |  74 +-
 .../runtime/device/cpu/kernel_select_cpu.h    |   2 +
 .../ccsrc/runtime/device/gpu/blocking_queue.h |   2 +-
 .../runtime/device/gpu/cuda_env_checker.cc    |   4 +
 .../device/gpu/distribution/mpi_wrapper.cc    |   6 +-
 .../runtime/device/gpu/gpu_buffer_mgr.cc      |  12 +-
 .../runtime/device/gpu/gpu_device_address.cc  |   1 +
 .../runtime/device/gpu/gpu_kernel_build.cc    |   4 +-
 .../runtime/device/gpu/gpu_kernel_runtime.cc  |  54 +-
 .../runtime/device/gpu/gpu_stream_assign.cc   |   5 +
 .../runtime/device/gpu/kernel_info_setter.cc  |   5 +
 .../ccsrc/runtime/device/kernel_runtime.cc    |  10 +-
 .../runtime/framework/actor/debug_actor.cc    |   1 -
 .../runtime/framework/actor/gather_actor.cc   |  10 +-
 .../runtime/framework/actor/gather_actor.h    |  10 +-
 .../runtime/framework/actor/kernel_actor.cc   |   2 +-
 .../runtime/framework/actor/switch_actor.cc   |  10 +-
 .../runtime/framework/actor/switch_actor.h    |  12 +-
 .../runtime/framework/control_node_parser.cc  |   2 +-
 .../ccsrc/runtime/framework/graph_compiler.cc |  13 +-
 .../runtime/framework/graph_scheduler.cc      |  65 +-
 .../ccsrc/runtime/framework/graph_scheduler.h |  30 +-
 .../hardware/cpu/cpu_device_context.cc        |   8 +
 .../runtime/hardware/cpu/cpu_device_context.h |   2 +
 .../runtime/hardware/cpu/cpu_memory_pool.cc   |   4 +-
 .../hardware/gpu/gpu_device_context.cc        |   9 -
 .../runtime/hccl_adapter/hccl_adapter.cc      |  95 ++-
 .../ccsrc/runtime/hccl_adapter/hccl_adapter.h |  17 +-
 .../runtime/hccl_adapter/plugin/hccl_plugin.h |   6 +
 .../transform/express_ir/mindir_exporter.cc   |  71 +-
 .../transform/express_ir/onnx_exporter.cc     | 660 +++++++++++++--
 mindspore/ccsrc/transform/graph_ir/convert.h  |   9 +-
 .../ccsrc/utils/context/graph_kernel_flags.cc |   2 +
 .../ccsrc/utils/context/graph_kernel_flags.h  |   7 +
 mindspore/ccsrc/utils/utils.h                 |   8 +
 mindspore/ccsrc/vm/transform.cc               |   7 +
 mindspore/common/parameter.py                 |  14 +-
 mindspore/common/seed.py                      |   4 +-
 mindspore/common/tensor.py                    |  41 +-
 mindspore/context.py                          |  40 +-
 mindspore/core/abstract/abstract_value.cc     |   8 +-
 mindspore/core/abstract/analysis_context.cc   |  27 +-
 mindspore/core/abstract/analysis_context.h    |  10 +-
 mindspore/core/abstract/prim_arrays.cc        |   2 +-
 mindspore/core/abstract/prim_structures.cc    |   7 +-
 .../core/abstract/primitive_infer_map.cc      |  12 +-
 mindspore/core/base/core_ops.h                |  10 +
 mindspore/core/ir/param_info.h                |   5 +
 .../core/load_mindir/anf_model_parser.cc      | 162 +++-
 mindspore/core/load_mindir/anf_model_parser.h |   2 +
 mindspore/core/load_mindir/load_model.cc      |   4 +-
 mindspore/core/mindrt/src/actor/actormgr.cc   |  30 +-
 mindspore/core/mindrt/src/actor/actormgr.h    |  24 +-
 .../mindrt/src/thread/actor_threadpool.cc     |  57 +-
 .../core/mindrt/src/thread/actor_threadpool.h |   8 +-
 .../core/mindrt/src/thread/core_affinity.cc   |  20 +-
 .../core/mindrt/src/thread/core_affinity.h    |   2 +
 mindspore/core/mindrt/src/thread/threadlog.h  |   9 +
 .../core/mindrt/src/thread/threadpool.cc      |  42 +-
 mindspore/core/mindrt/src/thread/threadpool.h |   9 +-
 mindspore/core/ops/apply_momentum.cc          |   3 +
 mindspore/core/ops/arg_min.cc                 |   1 +
 mindspore/core/ops/asin.cc                    |   1 +
 mindspore/core/ops/assert.cc                  |   3 +
 mindspore/core/ops/batch_to_space_nd.cc       |   2 +-
 mindspore/core/ops/batch_to_space_nd.h        |   2 +-
 mindspore/core/ops/conv2d.cc                  |   3 +
 mindspore/core/ops/cos.cc                     |   2 +-
 .../core/ops/fake_quant_with_min_max_vars.cc  |   2 +-
 mindspore/core/ops/grad/hshrink_grad.h        |   2 +-
 .../core/ops/grad/soft_margin_loss_grad.h     |   2 +-
 mindspore/core/ops/hshrink.h                  |   2 +-
 mindspore/core/ops/logical_not.cc             |   1 +
 mindspore/core/ops/lrn.cc                     |   3 +-
 mindspore/core/ops/max_pool.cc                |   3 +-
 mindspore/core/ops/ones_like.cc               |   2 +
 mindspore/core/ops/pack.cc                    |   1 +
 mindspore/core/ops/rank.cc                    |   1 +
 mindspore/core/ops/reduce_sum.cc              | 168 +++-
 mindspore/core/ops/reduce_sum.h               |   4 +-
 mindspore/core/ops/round.cc                   |   1 +
 mindspore/core/ops/scatter_nd_update.h        |   2 +-
 mindspore/core/ops/soft_margin_loss.h         |   2 +-
 mindspore/core/ops/space_to_batch_nd.cc       |   2 +-
 mindspore/core/ops/space_to_batch_nd.h        |   2 +-
 mindspore/core/ops/squeeze.cc                 |   2 +-
 mindspore/core/ops/stack.cc                   |   3 +
 mindspore/core/ops/strided_slice.cc           |  63 +-
 mindspore/core/ops/topk.cc                    |   3 +
 mindspore/core/ops/unpack.cc                  |   1 +
 mindspore/core/ops/unsorted_segment_sum.cc    |   3 +
 mindspore/core/ops/unstack.cc                 |   1 +
 mindspore/core/proto/mind_ir.proto            |   6 +
 mindspore/core/utils/check_convert_utils.cc   |  15 +
 mindspore/core/utils/check_convert_utils.h    |   1 +
 mindspore/core/utils/log_adapter.cc           |   4 +-
 mindspore/core/utils/parallel_node_check.cc   |   2 +-
 mindspore/core/utils/trace_info.h             |   8 +
 mindspore/dataset/audio/transforms.py         | 215 ++++-
 mindspore/dataset/audio/utils.py              |   2 -
 mindspore/dataset/audio/validators.py         | 130 ++-
 mindspore/dataset/core/validator_helpers.py   |  13 +-
 mindspore/dataset/engine/__init__.py          |   2 +-
 mindspore/dataset/engine/datasets.py          |  10 +-
 .../dataset/engine/serializer_deserializer.py | 350 +-------
 mindspore/dataset/engine/validators.py        |   6 +-
 mindspore/dataset/vision/c_transforms.py      |  33 +-
 mindspore/dataset/vision/py_transforms.py     |  42 +-
 .../dataset/vision/py_transforms_util.py      |  28 +-
 mindspore/dataset/vision/validators.py        |  24 +-
 mindspore/lite/CMakeLists.txt                 |  44 +-
 mindspore/lite/OWNERS                         |  16 +-
 mindspore/lite/build_lite.sh                  |   2 +-
 .../models/densenet_train_export.py           |   3 +-
 .../lite/examples/quick_start_cpp/build.sh    |   4 +-
 .../lite/examples/quick_start_cpp/main.cc     | 160 ++--
 mindspore/lite/examples/runtime_cpp/build.sh  |   8 +-
 mindspore/lite/examples/runtime_cpp/main.cc   | 674 +++++++--------
 .../train_lenet_java/prepare_and_run.sh       |   4 +
 .../lite/include/registry/kernel_interface.h  |  18 -
 .../lite/include/registry/register_kernel.h   |  28 -
 mindspore/lite/micro/cmake/file_list.cmake    |   1 +
 .../generator/component/weight_component.cc   |   1 -
 mindspore/lite/micro/coder/graph.cc           |   7 +-
 .../cmsis-nn/int8/conv2d_int8_coder.cc        |   5 +-
 .../opcoders/nnacl/fp32/addn_fp32_coder.cc    |   6 +-
 .../nnacl/fp32/batchnorm_fp32_coder.cc        |   2 +
 .../opcoders/nnacl/fp32/biasadd_fp32_coder.cc |   1 +
 .../fp32/convolution_depthwise_fp32_coder.cc  |   3 +-
 .../fp32/convolution_winograd_fp32_coder.cc   |   4 +-
 .../nnacl/fp32/full_connection_fp32_coder.cc  |   1 +
 .../opcoders/nnacl/fp32/gather_fp32_coder.cc  |   6 +-
 .../nnacl/fp32/matmul_fp32_base_coder.cc      |   2 +
 .../opcoders/nnacl/fp32/softmax_fp32_coder.cc |   3 +-
 .../opcoders/nnacl/int8/conv2d_int8_coder.cc  |   2 +-
 .../int8/convolution_depthwise_int8_coder.cc  |   2 +-
 .../opcoders/nnacl/int8/reduce_int8_coder.cc  |   2 +-
 .../opcoders/nnacl/int8/reduce_int8_coder.h   |  16 +-
 .../opcoders/nnacl/int8/softmax_int8_coder.cc |   5 +-
 mindspore/lite/micro/coder/train.cc           |   4 +
 .../wrapper/base/optimize_handler_wrapper.c   |  22 +-
 .../wrapper/base/optimize_handler_wrapper.h   |  13 +-
 .../wrapper/int8/conv1x1_init_int8_wrapper.c  |   5 +-
 .../lite/minddata/example/CMakeLists.txt      |   4 +-
 .../lite/minddata/example/testlitecv.cpp      |  20 +-
 mindspore/lite/minddata/wrapper/MDToDApi.cc   |   4 -
 .../lite/minddata/wrapper/album_op_android.cc |  18 +-
 .../lite/minddata/wrapper/album_op_android.h  |  18 +-
 mindspore/lite/schema/ops.fbs                 |   4 +
 mindspore/lite/src/CMakeLists.txt             |  23 +-
 .../lite/src/common/dynamic_library_loader.cc |   4 +-
 .../lite/src/common/dynamic_library_loader.h  |   4 +-
 mindspore/lite/src/common/log_adapter.h       |  14 +
 mindspore/lite/src/common/string_util.cc      |  79 +-
 mindspore/lite/src/common/string_util.h       |   5 +-
 mindspore/lite/src/common/tensor_util.cc      |  49 +-
 mindspore/lite/src/common/tensor_util.h       |   6 +-
 mindspore/lite/src/common/utils.cc            |  33 +-
 mindspore/lite/src/common/utils.h             |   4 +-
 .../lite/src/cxx_api/model/model_impl.cc      |   6 +-
 .../lite/src/cxx_api/tensor/tensor_impl.cc    |  10 +
 .../lite/src/cxx_api/tensor/tensor_impl.h     |  12 +-
 .../lite/src/delegate/npu/npu_delegate.cc     |   8 +
 mindspore/lite/src/delegate/npu/npu_graph.cc  |   2 +-
 .../lite/src/delegate/npu/npu_manager.cc      |   6 +-
 .../lite/src/delegate/npu/op/resize_npu.cc    |   1 +
 .../tensorrt/op/activation_tensorrt.cc        |   5 +
 .../delegate/tensorrt/op/concate_tensorrt.cc  |   6 +-
 .../tensorrt/op/convolution_tensorrt.cc       |  15 +-
 .../tensorrt/op/deconvolution_tensorrt.cc     |  15 +-
 .../tensorrt/op/elementwise_tensorrt.cc       |  41 +-
 .../tensorrt/op/elementwise_tensorrt.h        |   6 +-
 .../delegate/tensorrt/op/gather_tensorrt.cc   |   5 +
 .../delegate/tensorrt/op/matmul_tensorrt.cc   |  14 +-
 .../src/delegate/tensorrt/op/pad_tensorrt.cc  |   5 +
 .../src/delegate/tensorrt/op/pool_tensorrt.cc |   5 +
 .../delegate/tensorrt/op/reduce_tensorrt.cc   |   4 +
 .../delegate/tensorrt/op/scale_tensorrt.cc    |   4 +
 .../delegate/tensorrt/op/shape_tensorrt.cc    |   5 +
 .../delegate/tensorrt/op/shuffle_tensorrt.cc  | 101 ++-
 .../delegate/tensorrt/op/shuffle_tensorrt.h   |   1 +
 .../delegate/tensorrt/op/slice_tensorrt.cc    |  15 +-
 .../src/delegate/tensorrt/op/slice_tensorrt.h |   2 +
 .../delegate/tensorrt/op/softmax_tensorrt.cc  |   4 +
 .../src/delegate/tensorrt/op/tensorrt_op.cc   |  11 +
 .../src/delegate/tensorrt/op/tensorrt_op.h    |   2 +
 .../delegate/tensorrt/op/unary_tensorrt.cc    |   4 +
 .../delegate/tensorrt/tensorrt_delegate.cc    |  19 +-
 .../delegate/tensorrt/tensorrt_subgraph.cc    |   2 +
 .../src/delegate/tensorrt/tensorrt_subgraph.h |   6 +-
 .../src/delegate/tensorrt/tensorrt_utils.cc   |   2 +-
 .../src/delegate/tensorrt/tensorrt_utils.h    |   2 +-
 mindspore/lite/src/huffman_decode.h           |   1 -
 mindspore/lite/src/inner_context.cc           |  24 +-
 mindspore/lite/src/inner_kernel.cc            |  35 +
 mindspore/lite/src/inner_kernel.h             |  47 +-
 mindspore/lite/src/kernel_registry.cc         |  41 +-
 mindspore/lite/src/kernel_registry.h          |   5 +-
 mindspore/lite/src/lite_kernel.cc             |  13 +-
 mindspore/lite/src/lite_kernel.h              |  10 +-
 mindspore/lite/src/lite_kernel_util.cc        |   4 +-
 mindspore/lite/src/lite_kernel_util.h         |   2 +
 mindspore/lite/src/lite_mindrt.cc             | 341 ++++----
 mindspore/lite/src/lite_mindrt.h              |   6 +-
 mindspore/lite/src/lite_model.cc              |   1 -
 mindspore/lite/src/lite_session.cc            |  20 +-
 mindspore/lite/src/ops/CMakeLists.txt         |  36 +
 .../ops/compat/v0/expand_dims_compat_v0.cc    |   2 +-
 .../lite/src/ops/compat/v0/slice_compat_v0.cc |   2 +-
 .../ops/compat/v0/strided_slice_compat_v0.cc  |   2 +-
 .../lite/src/ops/compat/v0/topk_compat_v0.cc  |   2 +-
 mindspore/lite/src/ops/ops_def.cc             |   4 +
 mindspore/lite/src/ops/ops_func_declare.h     |   2 +
 mindspore/lite/src/ops/ops_utils.cc           |   6 +
 .../lite/src/ops/populate/adder_populate.cc   |   4 +-
 .../lite/src/ops/populate/conv2d_populate.cc  |  10 +-
 .../src/ops/populate/deconv2d_populate.cc     |  10 +-
 .../detection_post_process_populate.cc        |   7 +-
 .../lite/src/ops/populate/pooling_populate.cc |  16 +-
 .../lite/src/ops/populate/populate_register.h |   4 +
 .../src/ops/populate/prior_box_populate.cc    |   6 +-
 .../lite/src/ops/populate/split_populate.cc   |   8 +-
 .../src/ops/populate/v0/split_populate_v0.cc  |   9 +-
 .../lite/src/registry/register_kernel.cc      |   4 -
 .../lite/src/registry/register_kernel_impl.h  |   1 +
 mindspore/lite/src/registry/register_utils.cc |  25 +
 mindspore/lite/src/registry/register_utils.h  |  59 ++
 .../runtime/gpu/opencl/opencl_allocator.cc    |   5 +
 .../src/runtime/gpu/opencl/opencl_runtime.cc  |  35 +-
 .../src/runtime/gpu/opencl/opencl_runtime.h   |   2 +-
 mindspore/lite/src/runtime/infer_manager.cc   |  10 +
 .../src/runtime/kernel/arm/CMakeLists.txt     |  19 +-
 .../src/runtime/kernel/arm/base/carry_data.cc |   9 +-
 .../src/runtime/kernel/arm/base/carry_data.h  |   2 +
 .../kernel/arm/base/convolution_base.cc       |  50 +-
 .../kernel/arm/base/convolution_base.h        |  18 +-
 .../kernel/arm/base/group_convolution_base.cc |   1 +
 .../kernel/arm/base/group_convolution_base.h  |   2 +-
 .../kernel/arm/base/quant_dtype_cast.cc       |   5 +-
 .../runtime/kernel/arm/base/reshape_base.cc   |  58 +-
 .../runtime/kernel/arm/base/reshape_base.h    |  13 +-
 .../src/runtime/kernel/arm/base/slice_base.cc |   4 +-
 .../runtime/kernel/arm/base/softmax_base.cc   |   2 +
 .../src/runtime/kernel/arm/base/stack_base.cc |   6 +-
 .../kernel/arm/control/tensorlist_reserve.cc  |   6 +-
 .../kernel/arm/fp16/activation_fp16.cc        |   2 +
 .../arm/fp16/arithmetic_compare_fp16.cc       |   4 +-
 .../kernel/arm/fp16/arithmetic_fp16.cc        |   6 +-
 .../kernel/arm/fp16/arithmetic_self_fp16.cc   |  16 +-
 .../runtime/kernel/arm/fp16/batchnorm_fp16.cc |   6 +-
 .../runtime/kernel/arm/fp16/biasadd_fp16.cc   |  12 +-
 .../src/runtime/kernel/arm/fp16/cast_fp16.cc  |  26 +-
 .../runtime/kernel/arm/fp16/concat_fp16.cc    |   5 +
 .../kernel/arm/fp16/convolution_1x1_fp16.cc   |  91 +-
 .../kernel/arm/fp16/convolution_1x1_fp16.h    |  10 +-
 .../arm/fp16/convolution_delegate_fp16.cc     |   6 +-
 .../arm/fp16/convolution_delegate_fp16.h      |   4 +-
 .../fp16/convolution_depthwise_3x3_fp16.cc    |  50 +-
 .../arm/fp16/convolution_depthwise_3x3_fp16.h |   9 +-
 .../arm/fp16/convolution_depthwise_fp16.cc    |  47 +-
 .../arm/fp16/convolution_depthwise_fp16.h     |   9 +-
 .../convolution_depthwise_slidewindow_fp16.cc |  56 +-
 .../convolution_depthwise_slidewindow_fp16.h  |   7 +-
 .../kernel/arm/fp16/convolution_fp16.cc       |  41 +-
 .../kernel/arm/fp16/convolution_fp16.h        |  17 +-
 .../arm/fp16/convolution_winograd_fp16.cc     |  60 +-
 .../arm/fp16/convolution_winograd_fp16.h      |  20 +-
 .../src/runtime/kernel/arm/fp16/crop_fp16.cc  |   5 +-
 .../arm/fp16/deconvolution_depthwise_fp16.cc  |  43 +-
 .../arm/fp16/deconvolution_depthwise_fp16.h   |   7 +-
 .../kernel/arm/fp16/deconvolution_fp16.cc     |  67 +-
 .../kernel/arm/fp16/deconvolution_fp16.h      |   7 +-
 .../arm/fp16/deconvolution_winograd_fp16.cc   |  60 +-
 .../arm/fp16/deconvolution_winograd_fp16.h    |   4 +-
 .../kernel/arm/fp16/fullconnection_fp16.cc    |   3 +
 .../kernel/arm/fp16/fused_batchnorm_fp16.cc   |   8 +-
 .../runtime/kernel/arm/fp16/gather_fp16.cc    |  45 +-
 .../src/runtime/kernel/arm/fp16/gather_fp16.h |   1 -
 .../kernel/arm/fp16/group_convolution_fp16.cc |   2 +
 .../src/runtime/kernel/arm/fp16/gru_fp16.cc   |  13 +-
 .../kernel/arm/fp16/instance_norm_fp16.cc     |   8 +
 .../src/runtime/kernel/arm/fp16/lstm_fp16.cc  |  15 +-
 .../kernel/arm/fp16/matmul_base_fp16.cc       |   8 +
 .../runtime/kernel/arm/fp16/matmul_fp16.cc    |   3 +
 .../src/runtime/kernel/arm/fp16/pad_fp16.cc   |  21 +-
 .../runtime/kernel/arm/fp16/pooling_fp16.cc   |   3 +-
 .../src/runtime/kernel/arm/fp16/power_fp16.cc |  13 +-
 .../kernel/arm/fp16/quant_dtype_cast_fp16.cc  |  16 +-
 .../runtime/kernel/arm/fp16/reduce_fp16.cc    |   9 +-
 .../src/runtime/kernel/arm/fp16/scale_fp16.cc |  10 +-
 .../runtime/kernel/arm/fp16/softmax_fp16.cc   |  14 +-
 .../src/runtime/kernel/arm/fp16/stack_fp16.cc |   6 +-
 .../kernel/arm/fp16_grad/bn_fp16_grad.cc      |  28 +-
 .../kernel/arm/fp16_grad/dropout_fp16_grad.cc |   1 -
 .../arm/fp16_grad/layernorm_fp16_grad.cc      |  26 +-
 .../kernel/arm/fp16_grad/pooling_fp16_grad.cc |  18 +-
 .../kernel/arm/fp16_grad/resize_fp16_grad.cc  |   2 -
 .../kernel/arm/fp32/activation_fp32.cc        |   2 +
 .../src/runtime/kernel/arm/fp32/adder_fp32.cc |  10 +-
 .../src/runtime/kernel/arm/fp32/adder_fp32.h  |   2 +-
 .../src/runtime/kernel/arm/fp32/addn_fp32.cc  |   6 +-
 .../kernel/arm/fp32/arithmetic_fp32.cc        |   2 +
 .../kernel/arm/fp32/arithmetic_self_fp32.cc   |   2 +
 .../kernel/arm/fp32/batch_to_space_fp32.cc    |   2 +
 .../runtime/kernel/arm/fp32/batchnorm_fp32.cc |   2 +
 .../src/runtime/kernel/arm/fp32/bias_fp32.cc  |   6 +-
 .../kernel/arm/fp32/broadcast_to_fp32.cc      |   2 +
 .../src/runtime/kernel/arm/fp32/cast_fp32.cc  |   2 +
 .../runtime/kernel/arm/fp32/concat_fp32.cc    |   1 +
 .../kernel/arm/fp32/convolution_1x1_fp32.cc   | 105 +--
 .../kernel/arm/fp32/convolution_1x1_fp32.h    |  11 +-
 .../arm/fp32/convolution_delegate_fp32.cc     |   5 +
 .../arm/fp32/convolution_delegate_fp32.h      |   4 +-
 .../fp32/convolution_depthwise_3x3_fp32.cc    |  96 +--
 .../arm/fp32/convolution_depthwise_3x3_fp32.h |   9 +-
 .../arm/fp32/convolution_depthwise_fp32.cc    |  84 +-
 .../arm/fp32/convolution_depthwise_fp32.h     |  10 +-
 .../convolution_depthwise_indirect_fp32.cc    | 114 +--
 .../convolution_depthwise_indirect_fp32.h     |   8 +-
 .../convolution_depthwise_slidewindow_fp32.cc |  88 +-
 .../convolution_depthwise_slidewindow_fp32.h  |   8 +-
 ...volution_depthwise_slidewindow_x86_fp32.cc |  81 +-
 ...nvolution_depthwise_slidewindow_x86_fp32.h |  10 +-
 .../kernel/arm/fp32/convolution_fp32.cc       | 108 +--
 .../kernel/arm/fp32/convolution_fp32.h        |  18 +-
 .../arm/fp32/convolution_slidewindow_fp32.cc  |  89 +-
 .../arm/fp32/convolution_slidewindow_fp32.h   |  19 +-
 .../arm/fp32/convolution_winograd_fp32.cc     | 156 ++--
 .../arm/fp32/convolution_winograd_fp32.h      |  21 +-
 .../kernel/arm/fp32/crop_and_resize_fp32.cc   |   2 +
 .../src/runtime/kernel/arm/fp32/crop_fp32.cc  |   2 +
 .../arm/fp32/deconvolution_depthwise_fp32.cc  |  77 +-
 .../arm/fp32/deconvolution_depthwise_fp32.h   |   9 +-
 .../kernel/arm/fp32/deconvolution_fp32.cc     | 104 ++-
 .../kernel/arm/fp32/deconvolution_fp32.h      |   8 +-
 .../arm/fp32/deconvolution_winograd_fp32.cc   |  66 +-
 .../arm/fp32/deconvolution_winograd_fp32.h    |   4 +-
 .../kernel/arm/fp32/depth_to_space_fp32.cc    |   2 +
 .../arm/fp32/detection_post_process_fp32.cc   |   1 +
 .../src/runtime/kernel/arm/fp32/elu_fp32.cc   |   2 +
 .../kernel/arm/fp32/embedding_lookup_fp32.cc  |   2 +
 .../src/runtime/kernel/arm/fp32/exp_fp32.cc   |   2 +
 .../src/runtime/kernel/arm/fp32/fill_fp32.cc  |   2 +
 .../kernel/arm/fp32/fullconnection_fp32.cc    |   3 +
 .../kernel/arm/fp32/fused_batchnorm_fp32.cc   |   7 +-
 .../runtime/kernel/arm/fp32/gatherNd_fp32.cc  |   2 +
 .../runtime/kernel/arm/fp32/gather_fp32.cc    |   6 +-
 .../src/runtime/kernel/arm/fp32/glu_fp32.cc   |   5 +-
 .../kernel/arm/fp32/group_convolution_fp32.cc |   2 +
 .../src/runtime/kernel/arm/fp32/gru_fp32.cc   |   2 +
 .../kernel/arm/fp32/instance_norm_fp32.cc     |   2 +
 .../arm/fp32/invert_permutation_fp32.cc       |   2 +
 .../runtime/kernel/arm/fp32/l2_norm_fp32.cc   |   2 +
 .../kernel/arm/fp32/layer_norm_fp32.cc        |   2 +
 .../arm/fp32/local_response_norm_fp32.cc      |   6 +-
 .../src/runtime/kernel/arm/fp32/lstm_fp32.cc  |   2 +
 .../runtime/kernel/arm/fp32/matmul_fp32.cc    |   2 +
 .../kernel/arm/fp32/matmul_fp32_base.cc       |  32 +-
 .../arm/fp32/non_max_suppression_fp32.cc      |  13 +-
 .../runtime/kernel/arm/fp32/nonzero_fp32.cc   |   2 +
 .../src/runtime/kernel/arm/fp32/pad_fp32.cc   |  92 +-
 .../src/runtime/kernel/arm/fp32/pad_fp32.h    |   6 +-
 .../runtime/kernel/arm/fp32/pooling_fp32.cc   |   2 +
 .../src/runtime/kernel/arm/fp32/power_fp32.cc |   6 +-
 .../src/runtime/kernel/arm/fp32/prelu_fp32.cc |   2 +
 .../src/runtime/kernel/arm/fp32/range_fp32.cc |   2 +
 .../src/runtime/kernel/arm/fp32/rank_fp32.cc  |   6 +-
 .../runtime/kernel/arm/fp32/reduce_fp32.cc    |   2 +
 .../fp32/relative_position_attention_fp32.cc  |   2 +
 .../runtime/kernel/arm/fp32/resize_fp32.cc    |  19 +-
 .../runtime/kernel/arm/fp32/reverse_fp32.cc   |  10 +-
 .../kernel/arm/fp32/reverse_sequence_fp32.cc  |   2 +
 .../kernel/arm/fp32/roi_pooling_fp32.cc       |   6 +-
 .../src/runtime/kernel/arm/fp32/scale_fp32.cc |   6 +-
 .../kernel/arm/fp32/scatter_nd_fp32.cc        |   2 +
 .../src/runtime/kernel/arm/fp32/shape_fp32.cc |   6 +-
 .../src/runtime/kernel/arm/fp32/size_fp32.cc  |   6 +-
 .../runtime/kernel/arm/fp32/softmax_fp32.cc   |   2 +
 .../kernel/arm/fp32/space_to_batch_fp32.cc    |   2 +
 .../kernel/arm/fp32/space_to_depth_fp32.cc    |   2 +
 .../kernel/arm/fp32/sparse_to_dense_fp32.cc   |   9 +-
 .../runtime/kernel/arm/fp32/splice_fp32.cc    |   6 +-
 .../src/runtime/kernel/arm/fp32/topk_fp32.cc  |   5 +-
 .../runtime/kernel/arm/fp32/transpose_fp32.cc |   4 +-
 .../runtime/kernel/arm/fp32/transpose_fp32.h  |   2 +-
 .../kernel/arm/fp32/uniform_real_fp32.cc      |  21 +-
 .../runtime/kernel/arm/fp32/unique_fp32.cc    |   6 +-
 .../runtime/kernel/arm/fp32/unstack_fp32.cc   |   6 +-
 .../src/runtime/kernel/arm/fp32/where_fp32.cc |  12 +-
 .../runtime/kernel/arm/fp32/zeroslike_fp32.cc |   6 +-
 .../runtime/kernel/arm/int8/arithmetic_int8.h |   2 +-
 .../runtime/kernel/arm/int8/batchnorm_int8.cc |   8 +-
 .../runtime/kernel/arm/int8/concat_int8.cc    |  21 +-
 .../src/runtime/kernel/arm/int8/concat_int8.h |   2 +-
 .../kernel/arm/int8/convolution_1x1_int8.h    |   2 +-
 .../kernel/arm/int8/convolution_3x3_int8.cc   |   4 +-
 .../kernel/arm/int8/convolution_3x3_int8.h    |   4 +-
 .../int8/convolution_depthwise_3x3_int8.cc    |  13 +-
 .../arm/int8/convolution_depthwise_3x3_int8.h |   2 +-
 .../arm/int8/convolution_depthwise_int8.cc    |   2 +-
 .../arm/int8/convolution_depthwise_int8.h     |   2 +-
 .../convolution_depthwise_slidewindow_int8.cc |  37 +-
 .../convolution_depthwise_slidewindow_int8.h  |   2 +-
 .../kernel/arm/int8/convolution_int8.cc       |   8 +-
 .../kernel/arm/int8/convolution_int8.h        |   2 +-
 .../src/runtime/kernel/arm/int8/crop_int8.cc  |   9 +-
 .../src/runtime/kernel/arm/int8/crop_int8.h   |   2 +-
 .../arm/int8/deconvolution_depthwise_int8.h   |   2 +-
 .../kernel/arm/int8/deconvolution_int8.h      |   2 +-
 .../runtime/kernel/arm/int8/gatherNd_int8.cc  |  10 +-
 .../runtime/kernel/arm/int8/gatherNd_int8.h   |   2 +-
 .../kernel/arm/int8/group_convolution_int8.cc |   4 +-
 .../src/runtime/kernel/arm/int8/hswish_int8.h |   2 +-
 .../runtime/kernel/arm/int8/leaky_relu_int8.h |   2 +-
 .../src/runtime/kernel/arm/int8/mul_int8.cc   |  23 +-
 .../src/runtime/kernel/arm/int8/mul_int8.h    |   4 +-
 .../runtime/kernel/arm/int8/opt_op_handler.cc |  13 +-
 .../runtime/kernel/arm/int8/opt_op_handler.h  |  21 +-
 .../src/runtime/kernel/arm/int8/pad_int8.cc   |   5 +-
 .../src/runtime/kernel/arm/int8/reduce_int8.h |   2 +-
 .../src/runtime/kernel/arm/int8/relux_int8.h  |   2 +-
 .../runtime/kernel/arm/int8/reshape_int8.cc   |  12 +-
 .../runtime/kernel/arm/int8/reshape_int8.h    |   2 +-
 .../runtime/kernel/arm/int8/resize_int8.cc    |  12 +
 .../src/runtime/kernel/arm/int8/resize_int8.h |   4 +-
 .../src/runtime/kernel/arm/int8/split_int8.cc |   4 +-
 .../runtime/kernel/arm/int8/squeeze_int8.cc   |  11 +-
 .../runtime/kernel/arm/int8/squeeze_int8.h    |   2 +-
 .../src/runtime/kernel/arm/int8/tanh_int8.h   |   2 +-
 .../runtime/kernel/arm/int8/transpose_int8.cc |   2 +-
 .../runtime/kernel/arm/int8/transpose_int8.h  |   3 +-
 .../src/runtime/kernel/opencl/cl/pooling2d.cl |  97 +--
 .../kernel/opencl/kernel/activation.cc        |  42 +-
 .../runtime/kernel/opencl/kernel/activation.h |   2 +-
 .../runtime/kernel/opencl/kernel/argminmax.cc |  71 +-
 .../runtime/kernel/opencl/kernel/argminmax.h  |   2 +-
 .../kernel/opencl/kernel/arithmetic.cc        |  69 +-
 .../runtime/kernel/opencl/kernel/arithmetic.h |   2 +-
 .../kernel/opencl/kernel/arithmetic_self.cc   |  22 +-
 .../kernel/opencl/kernel/arithmetic_self.h    |   8 +-
 .../kernel/opencl/kernel/batch_to_space_nd.cc |  47 +-
 .../kernel/opencl/kernel/batch_to_space_nd.h  |   2 +-
 .../runtime/kernel/opencl/kernel/batchnorm.cc | 130 ++-
 .../runtime/kernel/opencl/kernel/batchnorm.h  |   4 +-
 .../src/runtime/kernel/opencl/kernel/cast.cc  |  32 +-
 .../src/runtime/kernel/opencl/kernel/cast.h   |   2 +-
 .../runtime/kernel/opencl/kernel/concat.cc    |  60 +-
 .../src/runtime/kernel/opencl/kernel/concat.h |   2 +-
 .../runtime/kernel/opencl/kernel/conv2d.cc    | 114 ++-
 .../src/runtime/kernel/opencl/kernel/conv2d.h |   6 +-
 .../kernel/opencl/kernel/conv2d_transpose.cc  |  93 +-
 .../kernel/opencl/kernel/conv2d_transpose.h   |   2 +-
 .../kernel/opencl/kernel/depthwise_conv2d.cc  |  79 +-
 .../kernel/opencl/kernel/depthwise_conv2d.h   |   2 +-
 .../src/runtime/kernel/opencl/kernel/fill.cc  |   7 +-
 .../src/runtime/kernel/opencl/kernel/fill.h   |   2 +-
 .../kernel/opencl/kernel/fullconnection.cc    |  93 +-
 .../kernel/opencl/kernel/fullconnection.h     |   2 +-
 .../kernel/opencl/kernel/fusion_eltwise.cc    |  62 +-
 .../kernel/opencl/kernel/fusion_eltwise.h     |   2 +-
 .../runtime/kernel/opencl/kernel/gather.cc    |  81 +-
 .../src/runtime/kernel/opencl/kernel/gather.h |   2 +-
 .../opencl/kernel/int8/arithmetic_int8.cc     |  79 +-
 .../opencl/kernel/int8/arithmetic_int8.h      |   2 +-
 .../kernel/opencl/kernel/layer_norm.cc        | 120 ++-
 .../runtime/kernel/opencl/kernel/layer_norm.h |   2 +-
 .../runtime/kernel/opencl/kernel/matmul.cc    |  81 +-
 .../src/runtime/kernel/opencl/kernel/matmul.h |   4 +-
 .../runtime/kernel/opencl/kernel/one_hot.cc   |  60 +-
 .../runtime/kernel/opencl/kernel/one_hot.h    |   2 +-
 .../src/runtime/kernel/opencl/kernel/pad.cc   |  48 +-
 .../src/runtime/kernel/opencl/kernel/pad.h    |   2 +-
 .../runtime/kernel/opencl/kernel/pooling2d.cc | 120 ++-
 .../runtime/kernel/opencl/kernel/pooling2d.h  |  11 +-
 .../src/runtime/kernel/opencl/kernel/power.cc |  55 +-
 .../src/runtime/kernel/opencl/kernel/power.h  |   2 +-
 .../src/runtime/kernel/opencl/kernel/prelu.cc |  56 +-
 .../src/runtime/kernel/opencl/kernel/prelu.h  |   2 +-
 .../runtime/kernel/opencl/kernel/reduce.cc    |  38 +-
 .../src/runtime/kernel/opencl/kernel/reduce.h |   2 +-
 .../runtime/kernel/opencl/kernel/reshape.cc   |  42 +-
 .../runtime/kernel/opencl/kernel/reshape.h    |   2 +-
 .../runtime/kernel/opencl/kernel/resize.cc    |  45 +-
 .../src/runtime/kernel/opencl/kernel/resize.h |   2 +-
 .../src/runtime/kernel/opencl/kernel/scale.cc | 100 ++-
 .../src/runtime/kernel/opencl/kernel/scale.h  |   2 +-
 .../runtime/kernel/opencl/kernel/softmax.cc   |  35 +-
 .../runtime/kernel/opencl/kernel/softmax.h    |   2 +-
 .../kernel/opencl/kernel/space_to_batch_nd.cc |  47 +-
 .../kernel/opencl/kernel/space_to_batch_nd.h  |   2 +-
 .../kernel/opencl/kernel/space_to_depth.cc    |  50 +-
 .../kernel/opencl/kernel/space_to_depth.h     |   2 +-
 .../kernel/opencl/kernel/sparse_to_dense.cc   |  86 +-
 .../kernel/opencl/kernel/sparse_to_dense.h    |   2 +-
 .../src/runtime/kernel/opencl/kernel/split.cc |  81 +-
 .../src/runtime/kernel/opencl/kernel/split.h  |   4 +-
 .../src/runtime/kernel/opencl/kernel/stack.cc |  57 +-
 .../src/runtime/kernel/opencl/kernel/stack.h  |   2 +-
 .../runtime/kernel/opencl/kernel/strassen.cc  | 255 ++++--
 .../runtime/kernel/opencl/kernel/strassen.h   |  24 +-
 .../kernel/opencl/kernel/strided_slice.cc     |  55 +-
 .../kernel/opencl/kernel/strided_slice.h      |   2 +-
 .../runtime/kernel/opencl/kernel/to_format.cc |  35 +-
 .../runtime/kernel/opencl/kernel/to_format.h  |   2 +-
 .../runtime/kernel/opencl/kernel/transpose.cc |  40 +-
 .../runtime/kernel/opencl/kernel/transpose.h  |   2 +-
 .../runtime/kernel/opencl/kernel/winograd.cc  | 151 +++-
 .../runtime/kernel/opencl/kernel/winograd.h   |   6 +-
 .../runtime/kernel/opencl/opencl_kernel.cc    |  21 +-
 .../src/runtime/kernel/opencl/opencl_kernel.h |   4 +-
 .../runtime/kernel/opencl/opencl_subgraph.cc  |   2 +
 mindspore/lite/src/runtime/runtime_pass.cc    | 112 +--
 mindspore/lite/src/runtime/runtime_pass.h     |  14 +-
 mindspore/lite/src/scheduler.cc               | 176 ++--
 mindspore/lite/src/scheduler.h                |  21 +-
 mindspore/lite/src/sub_graph_kernel.cc        |  12 +-
 mindspore/lite/src/sub_graph_kernel.h         |  10 +-
 mindspore/lite/src/tensor.cc                  |   4 +-
 mindspore/lite/src/tensor.h                   |  14 +-
 mindspore/lite/src/tensorlist.h               |   4 +-
 mindspore/lite/src/train/train_session.cc     | 114 ++-
 mindspore/lite/src/train/train_session.h      |   3 +
 mindspore/lite/src/weight_decoder.cc          |  22 +-
 mindspore/lite/src/weight_decoder.h           |   2 +
 mindspore/lite/test/CMakeLists.txt            |   3 +-
 .../lite/test/config/models_ms_train.cfg      |   3 +
 mindspore/lite/test/config/models_npu.cfg     |   4 +
 mindspore/lite/test/config/models_onnx.cfg    |   5 +
 .../lite/test/config/models_onnx_fp16.cfg     |   1 +
 .../lite/test/config/models_posttraining.cfg  |   4 +-
 mindspore/lite/test/config/models_tf.cfg      |   4 +-
 mindspore/lite/test/config/models_tf_fp16.cfg |   3 +
 mindspore/lite/test/runtest.sh                |   3 +
 mindspore/lite/test/st/run_benchmark_nets.sh  |  13 +-
 .../lite/test/st/scripts/base_functions.sh    |  12 +-
 .../st/scripts/nnie/run_converter_nnie.sh     |   4 +-
 .../test/st/scripts/run_benchmark_arm64.sh    |  17 +-
 .../lite/test/st/scripts/run_benchmark_x86.sh |  22 +-
 .../lite/test/st/scripts/run_net_train.sh     | 101 ++-
 .../custom_extract_features_infer_test.cc     |   2 +-
 .../infer/custom_normalize_infer_test.cc      |   2 +-
 .../nnacl/infer/custom_predict_infer_test.cc  |   2 +-
 .../infer/hashtable_lookup_infer_test.cc      |   2 +-
 .../nnacl/infer/lsh_projection_infer_test.cc  |   2 +-
 .../ut/nnacl/infer/skip_gram_infer_test.cc    |   2 +-
 .../infer/tensorlist_fromtensor_infer_test.cc |   2 +-
 .../infer/tensorlist_getitem_infer_test.cc    |   2 +-
 .../infer/tensorlist_reserve_infer_test.cc    |   2 +-
 .../infer/tensorlist_setitem_infer_test.cc    |   2 +-
 .../infer/tensorlist_stack_infer_test.cc      |   2 +-
 .../runtime/kernel/arm/fp32/skip_gram_fp32.cc |   2 +-
 .../runtime/kernel/arm/string/normalize.cc    |   2 +-
 .../test/ut/src/runtime/runtime_pass_tests.cc |  62 +-
 .../lite/tools/benchmark/benchmark_base.cc    |   4 +-
 .../lite/tools/benchmark_train/net_train.cc   |   6 +-
 mindspore/lite/tools/common/flag_parser.cc    |   4 +
 mindspore/lite/tools/common/flag_parser.h     |   5 +-
 .../lite/tools/common/func_graph_subgraph.cc  |   4 +-
 mindspore/lite/tools/common/graph_util.cc     |  24 +
 mindspore/lite/tools/common/graph_util.h      |   2 +
 mindspore/lite/tools/common/node_util.cc      | 155 +---
 mindspore/lite/tools/common/node_util.h       |   2 +
 mindspore/lite/tools/converter/CMakeLists.txt |   6 +-
 .../lite/tools/converter/anf_transform.cc     |   3 -
 .../lite/tools/converter/converter_flags.cc   |  10 +-
 .../lite/tools/converter/export_model.cc      |   2 +-
 .../converter/import/mindspore_importer.cc    |  98 +--
 .../converter/import/mindspore_importer.h     |   3 +-
 .../graph/batchnorm_convert_scale_pass.cc     |  41 +-
 .../parser/caffe/caffe_model_parser.cc        | 111 +--
 .../parser/caffe/caffe_model_parser.h         |   4 -
 .../converter/parser/conv1d_inout_adjust.cc   |   2 +
 .../parser/onnx/onnx_conv_transpose_parser.cc |  37 +-
 .../parser/onnx/onnx_inputs_adjust.cc         |   1 +
 .../parser/onnx/onnx_model_parser.cc          | 142 +---
 .../converter/parser/onnx/onnx_model_parser.h |   5 +-
 .../converter/parser/onnx/onnx_pad_adjust.cc  |   4 +-
 .../tools/converter/parser/parser_utils.cc    | 300 ++++---
 .../tools/converter/parser/parser_utils.h     |  17 +-
 .../converter/parser/tf/tf_model_parser.cc    | 136 +--
 .../converter/parser/tf/tf_model_parser.h     |   7 -
 .../parser/tflite/tflite_model_parser.cc      | 117 +--
 .../parser/tflite/tflite_model_parser.h       |   4 -
 .../tools/converter/parser/unify_format.cc    | 182 +++-
 .../tools/converter/parser/unify_format.h     |  10 +-
 .../quantizer/post_training_quantizer.cc      |  10 +-
 .../tools/converter/quantizer/quant_cast.cc   |   9 +-
 .../attention_quant_type_determiner.cc        |  13 +-
 .../conv_quant_param_propogator.cc            |   1 -
 ...default_quant_all_quant_type_determiner.cc |   1 -
 .../only_need_inputs_quant_type_determiner.cc |   1 -
 .../quant_helper/quant_node_helper.cc         |   1 -
 .../converter/quantizer/weight_quantizer.cc   |   2 +-
 .../registry/model_parser_registry.cc         |   1 -
 .../tools/cropper/build_cropper_config.sh     |  93 +-
 mindspore/lite/tools/cropper/cropper.cc       |   4 +-
 .../lite/tools/dataset/cropper/build_lib.py   |   6 +-
 .../dataset/cropper/cropper_configure.py      |   8 +-
 .../lite/tools/optimizer/common/gllo_utils.cc |  13 +-
 .../format/delete_redundant_transpose.cc      |  31 +
 .../format/delete_redundant_transpose.h       |   1 +
 .../tools/optimizer/format/to_format_base.cc  |  79 +-
 .../tools/optimizer/format/to_format_base.h   |   8 +-
 .../tools/optimizer/format/to_nchw_format.cc  |  24 +-
 .../tools/optimizer/format/to_nchw_format.h   |   4 +-
 .../tools/optimizer/format/to_nhwc_format.cc  |  23 +-
 .../tools/optimizer/format/to_nhwc_format.h   |   4 +-
 .../optimizer/fusion/batchmatmul_fusion.cc    |   1 -
 .../optimizer/fusion/conv_conv_fusion.cc      |   4 +
 .../fusion/multi_head_attention_fusion.cc     |   4 +-
 ...ite_rel_pos_multi_head_attention_fusion.cc |  24 +-
 .../tools/optimizer/graph/node_infershape.cc  |  15 -
 .../optimizer/graph/slice_prepose_pass.cc     |   4 +
 mindspore/nn/acc/base.py                      |  68 +-
 mindspore/nn/acc/grad_freeze.py               |   1 +
 mindspore/nn/acc/less_batch_normalization.py  |   1 +
 mindspore/nn/cell.py                          |   8 +-
 mindspore/nn/layer/activation.py              |  47 ++
 mindspore/nn/loss/__init__.py                 |   4 +-
 mindspore/nn/loss/loss.py                     |  53 +-
 mindspore/nn/wrap/grad_reducer.py             |   8 -
 mindspore/nn/wrap/loss_scale.py               |  31 +-
 mindspore/numpy/array_creations.py            | 123 +--
 mindspore/numpy/array_ops.py                  |  32 +-
 mindspore/numpy/math_ops.py                   | 163 ++--
 mindspore/numpy/utils_const.py                |   2 +
 mindspore/ops/_grad/grad_array_ops.py         |  10 +-
 .../ops/_grad_experimental/grad_nn_ops.py     |  25 +
 mindspore/ops/_op_impl/akg/ascend/__init__.py |   1 +
 mindspore/ops/_op_impl/cpu/__init__.py        |   1 +
 mindspore/ops/_op_impl/tbe/__init__.py        |   5 +
 .../ops/bprop_mindir/Identity_bprop.mindir    |  12 +-
 mindspore/ops/bprop_mindir/ReLU_bprop.mindir  |  16 +-
 mindspore/ops/composite/random_ops.py         |   4 +-
 mindspore/ops/functional.py                   |   4 +
 mindspore/ops/operations/__init__.py          |  50 +-
 mindspore/ops/operations/_grad_ops.py         |  43 +
 mindspore/ops/operations/_thor_ops.py         |  51 ++
 mindspore/ops/operations/array_ops.py         |  33 +-
 mindspore/ops/operations/inner_ops.py         |   3 +-
 mindspore/ops/operations/math_ops.py          |  84 +-
 mindspore/ops/operations/nn_ops.py            |  99 ++-
 mindspore/ops/operations/other_ops.py         |   3 +-
 mindspore/ops/operations/sponge_ops.py        | 302 -------
 mindspore/ops/operations/sponge_update_ops.py |   2 +-
 mindspore/ops/primitive.py                    |  28 +-
 .../profiler/common/exceptions/error_code.py  |   4 +-
 .../profiler/common/exceptions/exceptions.py  |   1 -
 .../profiler/parser/aicpu_data_parser.py      |  11 +-
 mindspore/profiler/parser/container.py        |   5 +
 mindspore/profiler/parser/flops_parser.py     |  14 +-
 mindspore/profiler/parser/hccl_parser.py      |  25 +-
 mindspore/profiler/parser/hwts_log_parser.py  |   5 +-
 mindspore/profiler/parser/integrator.py       |   3 +
 .../profiler/parser/memory_usage_parser.py    |   8 +-
 .../profiler/parser/minddata_analyzer.py      |   2 +
 mindspore/profiler/parser/minddata_parser.py  |   2 +
 .../parser/minddata_pipeline_parser.py        |   4 +
 mindspore/profiler/parser/optime_parser.py    |  11 +-
 .../profiler/parser/step_trace_parser.py      |   4 +-
 mindspore/profiler/profiling.py               |  83 +-
 mindspore/run_check/_check_version.py         |   2 +-
 .../train/callback/_lr_scheduler_callback.py  |   2 +-
 .../train/callback/_summary_collector.py      |   4 +-
 mindspore/train/loss_scale_manager.py         |   3 +-
 mindspore/train/model.py                      |  12 +-
 mindspore/train/serialization.py              |  19 +-
 mindspore/train/train_thor/convert_utils.py   |   2 +-
 .../official/cv/centerface/src/centerface.py  |   4 +-
 model_zoo/official/cv/cnnctc/src/cnn_ctc.py   |   6 +-
 .../official/cv/crnn/src/crnn_for_train.py    |   3 +-
 .../cv/crnn_seq2seq_ocr/src/attention_ocr.py  |   3 +-
 .../cv/crnn_seq2seq_ocr/src/seq2seq.py        |   2 +-
 .../official/cv/ctpn/default_config.yaml      |   4 +-
 model_zoo/official/cv/ctpn/src/ctpn.py        |   4 +-
 .../official/cv/ctpn/src/network_define.py    |   4 +-
 .../cv/deeptext/src/network_define.py         |   4 +-
 .../cv/faster_rcnn/default_config.yaml        |   2 +-
 .../cv/faster_rcnn/default_config_101.yaml    |   2 +-
 .../cv/faster_rcnn/default_config_152.yaml    |   2 +-
 .../cv/faster_rcnn/src/network_define.py      |   4 +-
 .../cv/maskrcnn/src/network_define.py         |   5 +-
 .../src/network_define.py                     |   3 +-
 .../official/cv/nasnet/src/nasnet_a_mobile.py |   3 +-
 model_zoo/official/cv/openpose/src/loss.py    |   3 +-
 model_zoo/official/cv/psenet/README.md        | 181 ++--
 model_zoo/official/cv/psenet/README_CN.md     | 207 +++--
 model_zoo/official/cv/psenet/requirements.txt |   2 +
 .../cv/psenet/src/ETSNET/pse/Makefile         |   3 +-
 .../cv/psenet/src/ETSNET/pse/adaptor.cpp      |   2 +-
 .../official/cv/psenet/src/network_define.py  |   4 +-
 model_zoo/official/cv/psenet/train.py         |   2 +-
 model_zoo/official/cv/resnet/README.md        |  87 +-
 model_zoo/official/cv/resnet/README_CN.md     |  86 +-
 .../cv/resnet/scripts/run_distribute_train.sh |   4 +-
 .../scripts/run_distribute_train_gpu.sh       |   4 +-
 .../official/cv/resnet/scripts/run_eval.sh    |   4 +-
 .../cv/resnet/scripts/run_eval_gpu.sh         |   4 +-
 .../official/cv/resnet/scripts/run_infer.sh   |   4 +-
 .../cv/resnet/scripts/run_infer_310.sh        |   2 +-
 .../scripts/run_parameter_server_train.sh     |   8 +-
 .../scripts/run_parameter_server_train_gpu.sh |   8 +-
 .../cv/resnet/scripts/run_standalone_train.sh |   4 +-
 .../scripts/run_standalone_train_gpu.sh       |   4 +-
 .../cv/resnet/src/model_utils/config.py       |   4 +-
 model_zoo/official/cv/resnet/src/resnet.py    |  10 +-
 model_zoo/official/cv/resnet/train.py         |  59 +-
 .../cv/retinaface_resnet50/src/network.py     |   4 +-
 .../official/cv/retinanet/src/retinanet.py    |   3 +-
 model_zoo/official/cv/shufflenetv1/eval.py    |   2 +-
 model_zoo/official/cv/shufflenetv1/export.py  |   2 +-
 model_zoo/official/cv/shufflenetv1/train.py   |   2 +-
 model_zoo/official/cv/ssd/src/ssd.py          |   3 +-
 model_zoo/official/cv/unet/README.md          |   6 +-
 model_zoo/official/cv/unet/README_CN.md       |   6 +-
 .../official/cv/unet/preprocess_dataset.py    |   2 +-
 model_zoo/official/cv/vgg16/README.md         |  35 +
 model_zoo/official/cv/vgg16/README_CN.md      |  34 +
 .../ascend310_quant_infer/run_quant_infer.sh  |   1 -
 .../cv/warpctc/src/warpctc_for_train.py       |   3 +-
 .../official/cv/yolov3_darknet53/src/yolo.py  |   3 +-
 .../cv/yolov3_darknet53_quant/src/yolo.py     |   3 +-
 .../cv/yolov3_resnet18/scripts/run_eval.sh    |   2 +-
 .../official/cv/yolov3_resnet18/src/yolov3.py |   3 +-
 model_zoo/official/cv/yolov4/src/yolo.py      |   7 +-
 model_zoo/official/cv/yolov5/README.md        |   2 +-
 model_zoo/official/cv/yolov5/README_CN.md     |  88 +-
 model_zoo/official/cv/yolov5/src/yolo.py      |   3 +-
 model_zoo/official/gnn/gat/src/utils.py       |   4 +-
 model_zoo/official/nlp/bert/README.md         |  12 +
 model_zoo/official/nlp/bert/README_CN.md      |   8 +
 .../nlp/bert/src/bert_for_finetune.py         |  18 +-
 .../nlp/bert/src/bert_for_pre_training.py     |  31 +-
 .../bert_thor/src/bert_for_pre_training.py    |  31 +-
 model_zoo/official/nlp/cpm/src/cpm_train.py   |  17 +-
 .../official/nlp/dgu/src/bert_for_finetune.py |  18 +-
 .../nlp/dgu/src/bert_for_pre_training.py      |  31 +-
 .../nlp/emotect/src/ernie_for_finetune.py     |   9 +-
 .../nlp/fasttext/src/fasttext_train.py        |   4 +-
 .../gnmt_v2/src/gnmt_model/gnmt_for_train.py  |   9 +-
 .../official/nlp/gpt/src/gpt_wrapcell.py      |   9 +-
 model_zoo/official/nlp/gru/README.md          | 108 ++-
 .../official/nlp/gru/default_config.yaml      |   2 +
 .../official/nlp/gru/model_utils/config.py    |  24 +-
 .../nlp/gru/scripts/create_dataset.sh         |   1 -
 .../scripts/run_distribute_train_ascend.sh    |   5 +-
 .../official/nlp/gru/src/gru_for_train.py     |  54 +-
 model_zoo/official/nlp/gru/src/seq2seq.py     |  38 +-
 model_zoo/official/nlp/gru/src/weight_init.py |  16 +-
 model_zoo/official/nlp/gru/train.py           |  66 +-
 .../src/transformer/transformer_for_train.py  |   9 +-
 .../official/nlp/pangu_alpha/src/dataset.py   |   5 +-
 .../pangu_alpha/src/pangu_alpha_wrapcell.py   |  17 +-
 .../official/nlp/pangu_alpha/src/utils.py     |  12 +
 model_zoo/official/nlp/pangu_alpha/train.py   | 146 ++--
 model_zoo/official/nlp/q8bert/src/q8bert.py   |  26 +-
 .../nlp/tinybert/src/tinybert_for_gd_td.py    |  26 +-
 .../transformer/src/transformer_for_train.py  |  26 +-
 model_zoo/official/recommend/ncf/src/ncf.py   |   4 +-
 model_zoo/official/rl/dqn/README.md           |  39 +-
 model_zoo/official/rl/dqn/README_CN.md        |   8 +-
 model_zoo/official/rl/dqn/eval.py             |  42 +-
 .../dqn/scripts/run_standalone_train_gpu.sh   |   3 +-
 model_zoo/official/rl/dqn/src/agent.py        | 128 +--
 model_zoo/official/rl/dqn/src/config.py       |  11 +-
 model_zoo/official/rl/dqn/src/dqn.py          |  17 +-
 model_zoo/official/rl/dqn/train.py            |  84 +-
 .../cv/AVA_cifar/src/network_define.py        |   4 +-
 .../cv/AVA_hpa/src/network_define_pretrain.py |   4 +-
 .../cv/AVA_hpa/src/network_define_train.py    |   4 +-
 model_zoo/research/cv/AttGAN/src/cell.py      |   6 +-
 .../cv/FaceDetection/src/network_define.py    |   6 +-
 model_zoo/research/cv/ICNet/README.md         |  41 +-
 model_zoo/research/cv/ICNet/eval.py           |   5 +-
 .../research/cv/ICNet/scripts/run_eval.sh     |   8 +-
 .../cv/ICNet/src/model_utils/icnet.yaml       |   2 +-
 model_zoo/research/cv/IPT/src/loss.py         |   9 +-
 model_zoo/research/cv/IPT/src/utils.py        |   4 +-
 .../cv/LearningToSeeInTheDark/src/myutils.py  |  10 +-
 .../cv/MaskedFaceRecognition/model/model.py   |   4 +-
 model_zoo/research/cv/ProtoNet/README.md      |  36 +-
 model_zoo/research/cv/ProtoNet/eval.py        |   5 +-
 .../scripts/run_distribution_ascend.sh        |  14 +-
 .../research/cv/ProtoNet/src/parser_util.py   |   2 +-
 .../cv/SRGAN/src/trainonestep/train_gan.py    |   6 +-
 .../cv/SRGAN/src/trainonestep/train_psnr.py   |   3 +-
 .../research/cv/STGAN/src/models/networks.py  |   6 +-
 model_zoo/research/cv/SiamFC/readme.md        | 195 +++++
 .../research/cv/advanced_east/src/model.py    |   4 +-
 model_zoo/research/cv/arcface/README_CN.md    |  10 +-
 .../arcface/scripts/run_distribute_train.sh   |   2 +-
 model_zoo/research/cv/arcface/train.py        |  27 +-
 .../cv/centernet/src/centernet_pose.py        |  10 +-
 .../cv/centernet_det/src/centernet_det.py     |  14 +-
 .../src/centernet_det.py                      |  14 +-
 model_zoo/research/cv/dem/src/demnet.py       |   3 +-
 .../research/cv/glore_res200/README_CN.md     |  22 +-
 .../scripts/run_distribute_train.sh           |  35 +-
 .../research/cv/glore_res200/src/config.py    |   2 +-
 model_zoo/research/cv/glore_res200/train.py   |   2 +
 model_zoo/research/cv/hardnet/README_CN.md    |   4 +-
 .../hardnet/scripts/run_distribute_train.sh   |  42 +-
 model_zoo/research/cv/midas/src/midas_net.py  |   4 +-
 .../research/cv/resnext152_64x4d/README.md    |  74 +-
 .../research/cv/resnext152_64x4d/README_CN.md |  66 +-
 .../scripts/run_distribute_train.sh           |   3 +-
 .../scripts/run_standalone_train.sh           |   3 +-
 .../research/cv/resnext152_64x4d/train.py     |   5 +-
 .../cv/retinanet_resnet101/src/retinahead.py  |   3 +-
 .../cv/retinanet_resnet152/src/retinahead.py  |   3 +-
 .../research/cv/simple_baselines/README.md    |  14 +-
 .../scripts/run_distribute_train.sh           |  31 +-
 model_zoo/research/cv/squeezenet1_1/README.md |  11 +-
 model_zoo/research/cv/squeezenet1_1/eval.py   |   9 +-
 model_zoo/research/cv/squeezenet1_1/train.py  |   4 +-
 .../cv/ssd_ghostnet/src/ssd_ghostnet.py       |   3 +-
 .../research/cv/ssd_mobilenetV2/src/ssd.py    |   3 +-
 .../cv/ssd_mobilenetV2_FPNlite/src/ssd.py     |   3 +-
 model_zoo/research/cv/ssd_resnet50/src/ssd.py |   3 +-
 model_zoo/research/cv/wideresnet/README_CN.md |  36 +-
 model_zoo/research/hpc/sponge/main.py         |  10 +-
 model_zoo/research/hpc/sponge/src/angle.py    |  46 +-
 model_zoo/research/hpc/sponge/src/bond.py     |  44 +-
 model_zoo/research/hpc/sponge/src/dihedral.py |  71 +-
 .../hpc/sponge/src/langevin_liujian_md.py     |  65 +-
 .../research/hpc/sponge/src/lennard_jones.py  |  99 ++-
 .../research/hpc/sponge/src/md_information.py | 203 ++++-
 model_zoo/research/hpc/sponge/src/nb14.py     |  48 +-
 .../research/hpc/sponge/src/neighbor_list.py  |  40 +-
 .../hpc/sponge/src/particle_mesh_ewald.py     |  23 +-
 .../research/hpc/sponge/src/simulation.py     | 799 ++++++++++++++----
 .../nlp/gpt2/src/gpt2_for_finetune.py         |   9 +-
 .../src/seq2seq_model/seq2seq_for_train.py    |  12 +-
 .../nlp/seq2seq/src/utils/optimizer.py        |   3 -
 model_zoo/research/nlp/seq2seq/train.py       |   4 +-
 .../research/nlp/skipgram/src/dataset.py      |   2 +
 .../recommend/Fat-DeepFFM/src/fat_deepffm.py  |   4 +-
 .../research/recommend/autodis/src/autodis.py |   6 +-
 model_zoo/utils/hccl_tools/hccl_tools.py      |  10 +-
 .../auto_monad/test_auto_monad_mindtester.py  |   3 +-
 tests/st/control/inner/test_002_single_for.py |   3 +-
 tests/st/control/inner/test_010_if_in_if.py   |   3 +
 tests/st/control/inner/test_012_if_in_for.py  |   3 +
 tests/st/control/inner/test_032_for_in_for.py |  12 +-
 .../control/inner/test_101_if_after_while.py  |   2 +
 .../inner/test_110_if_after_if_in_if.py       |  10 +-
 .../inner/test_121_if_after_while_in_while.py |   7 +-
 .../inner/test_122_if_after_while_in_for.py   |  53 +-
 .../inner/test_330_for_after_for_in_if.py     |   2 +
 tests/st/control/test_cont_grad.py            | 251 ++++--
 tests/st/dump/test_data_dump.py               |   2 +-
 tests/st/fl/albert/src/cell_wrapper.py        |   4 +-
 .../resnet50/test_resnet50_cifar10.py         |   4 +-
 .../transformer/test_transformer.py           |   2 +-
 tests/st/model_zoo_tests/yolov3/src/yolov3.py |   3 +-
 .../yolov3_darknet53/src/yolo.py              |   7 +-
 .../models/bert/src/bert_for_pre_training.py  |  13 +-
 tests/st/networks/models/bert/src/utils.py    |   9 +-
 tests/st/ops/cpu/test_softplus_grad_op.py     |  18 +-
 tests/st/ops/cpu/test_softplus_op.py          |  34 +-
 tests/ut/cpp/CMakeLists.txt                   |   1 +
 tests/ut/cpp/dataset/CMakeLists.txt           |   3 +
 .../ut/cpp/dataset/c_api_audio_a_to_q_test.cc | 392 ++++++++-
 .../ut/cpp/dataset/c_api_audio_r_to_z_test.cc |  74 +-
 .../cpp/dataset/c_api_vision_a_to_q_test.cc   |  96 +++
 tests/ut/cpp/dataset/cmu_arctic_test.cc       | 145 ++++
 tests/ut/cpp/dataset/common/bboxop_common.cc  |   4 +-
 tests/ut/cpp/dataset/common/cvop_common.cc    |   6 +-
 tests/ut/cpp/dataset/common/cvop_common.h     |   1 +
 tests/ut/cpp/dataset/deserialize_test.cc      |  56 +-
 tests/ut/cpp/dataset/execute_test.cc          | 278 ++++++
 tests/ut/cpp/dataset/random_color_op_test.cc  |   4 +-
 tests/ut/cpp/dataset/rgba_to_bgr_op_test.cc   |   2 +-
 tests/ut/cpp/dataset/rgba_to_rgb_op_test.cc   |   2 +-
 tests/ut/cpp/dataset/tensor_test.cc           |  56 +-
 tests/ut/cpp/runtest.sh                       |   2 +
 .../stub/dynamic_shape/dynamic_shape_stub.cc  |   6 -
 tests/ut/cpp/stub/ge/ge_mock.cc               |   2 -
 tests/ut/cpp/stub/ge/ge_task_launch_stub.cc   |  17 +-
 tests/ut/python/dataset/test_adjustgamma.py   |  32 +-
 .../ut/python/dataset/test_allpass_biquad.py  |  48 +-
 .../ut/python/dataset/test_amplitude_to_db.py |  19 +-
 tests/ut/python/dataset/test_angle.py         |  23 +-
 .../ut/python/dataset/test_bandpass_biquad.py |  25 +-
 .../python/dataset/test_bandreject_biquad.py  |  27 +-
 tests/ut/python/dataset/test_bass_biquad.py   |  22 +-
 .../python/dataset/test_datasets_cmuarctic.py | 203 +++++
 tests/ut/python/dataset/test_datasets_sbd.py  |   2 +-
 tests/ut/python/dataset/test_schema.py        |   2 +-
 .../ut/python/dataset/test_serdes_dataset.py  |  40 +-
 tests/ut/python/dataset/test_skip.py          |   6 +-
 tests/ut/python/dataset/test_slice_patches.py |  52 ++
 tests/ut/python/dataset/test_take.py          |   2 +-
 tests/ut/python/dataset/test_time_stretch.py  |  39 +-
 tests/ut/python/exec/test_train_with_lars.py  |   6 +-
 tests/ut/python/ops/test_ops.py               |  15 +
 tests/ut/python/optimizer/test_auto_grad.py   | 109 +++
 tests/ut/python/parallel/test_conv2d.py       |  27 +
 .../python/parallel/test_conv2d_transpose.py  |  34 +
 .../python/parallel/test_dataset_interface.py |   5 +-
 .../parallel/test_gather_v2_primitive.py      |   4 +-
 tests/ut/python/parallel/test_gatherd.py      |   8 +
 tests/ut/python/parallel/test_loss_scale.py   |   9 +-
 tests/ut/python/parallel/test_reshape.py      |   4 +-
 third_party/patch/icu4c/icu4c.patch01         |   4 +-
 third_party/patch/sqlite/sqlite.patch001      | 138 ++-
 version.txt                                   |   2 +-
 1432 files changed, 21434 insertions(+), 10802 deletions(-)
 create mode 100644 mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cmu_arctic_op.cc
 create mode 100644 mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cmu_arctic_op.h
 create mode 100644 mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cmu_arctic_node.cc
 create mode 100644 mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cmu_arctic_node.h
 create mode 100644 mindspore/lite/src/registry/register_utils.cc
 create mode 100644 mindspore/lite/src/registry/register_utils.h
 create mode 100644 model_zoo/research/cv/SiamFC/readme.md
 create mode 100644 tests/ut/cpp/dataset/cmu_arctic_test.cc
 create mode 100644 tests/ut/python/dataset/test_datasets_cmuarctic.py

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0b61c1987f1..098b8af2383 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -24,6 +24,9 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
         -Wno-return-std-move -Wno-unused-private-field -Wno-unused-lambda-capture -Wno-sign-compare \
         -Wno-overloaded-virtual -Wno-unneeded-internal-declaration -Wno-unused-variable -Wno-pessimizing-move \
         -Wno-inconsistent-missing-override -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
+elseif(ENABLE_SYM_FILE)
+    set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -g -ggdb -Wl,--allow-shlib-undefined \
+        -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
 else()
     set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined \
         -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2")
diff --git a/build.sh b/build.sh
index 17dc8f19629..83ea081fca1 100755
--- a/build.sh
+++ b/build.sh
@@ -27,7 +27,7 @@ usage()
   echo "              [-P on|off] [-z [on|off]] [-M on|off] [-V 10.1|11.1|310|910] [-I arm64|arm32|x86_64] [-K] \\"
   echo "              [-B on|off] [-E] [-l on|off] [-n full|lite|off] [-H on|off] \\"
   echo "              [-A on|off] [-S on|off] [-k on|off] [-W sse|neon|avx|avx512|off] \\"
-  echo "              [-L Tensor-RT path]  \\"
+  echo "              [-L Tensor-RT path] [-y on|off]  \\"
   echo ""
   echo "Options:"
   echo "    -d Debug mode"
@@ -64,6 +64,7 @@ usage()
   echo "    -W Enable x86_64 SSE or AVX instruction set, use [sse|neon|avx|avx512|off], default off for lite and avx for CPU"
   echo "    -H Enable hidden"
   echo "    -L Link and specify Tensor-RT library path, default disable Tensor-RT lib linking"
+  echo "    -y Compile the symbol table switch and save the symbol table to the directory output"
 }
 
 # check value of input is 'on' or 'off'
@@ -122,8 +123,9 @@ checkopts()
   TENSORRT_HOME=""
   USER_ENABLE_DUMP_IR=false
   USER_ENABLE_DEBUGGER=false
+  ENABLE_SYM_FILE="off"
   # Process the options
-  while getopts 'drvj:c:t:hb:s:a:g:p:ie:m:l:I:RP:D:zM:V:K:B:En:A:S:k:W:H:L:' opt
+  while getopts 'drvj:c:t:hb:s:a:g:p:ie:m:l:I:RP:D:zM:V:K:B:En:A:S:k:W:H:L:y' opt
   do
     CASE_SENSIVE_ARG=${OPTARG}
     OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]')
@@ -140,6 +142,9 @@ checkopts()
           exit 1
         fi
         ;;
+      y)
+        ENABLE_SYM_FILE="on"
+        ;;
       r)
         DEBUG_MODE="off"
         ;;
@@ -442,6 +447,9 @@ build_mindspore()
     if [[ -n "$TRAIN_MODE" ]]; then
         CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_${TRAIN_MODE}=ON"
     fi
+    if [[ "X$ENABLE_SYM_FILE" = "Xon" ]]; then
+        CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_SYM_FILE=ON"
+    fi
     if [[ "X$ENABLE_ASAN" = "Xon" ]]; then
         CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_ASAN=ON"
     fi
diff --git a/cmake/external_libs/flatbuffers.cmake b/cmake/external_libs/flatbuffers.cmake
index 182632b09f1..72b68bf6446 100644
--- a/cmake/external_libs/flatbuffers.cmake
+++ b/cmake/external_libs/flatbuffers.cmake
@@ -1,10 +1,10 @@
 if(MSVC)
     set(flatbuffers_CXXFLAGS "${CMAKE_CXX_FLAGS}")
-    set(flatbuffers_CFLAGS "${CMAKE_CXX_FLAGS}")
+    set(flatbuffers_CFLAGS "${CMAKE_C_FLAGS}")
     set(flatbuffers_LDFLAGS "${CMAKE_SHARED_LINKER_FLAGS}")
 else()
-    set(flatbuffers_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2")
-    set(flatbuffers_CFLAGS "-D_FORTIFY_SOURCE=2 -O2")
+    set(flatbuffers_CXXFLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-strong")
+    set(flatbuffers_CFLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-strong")
 endif()
 
 if(WIN32)
diff --git a/cmake/mind_expression.cmake b/cmake/mind_expression.cmake
index b1c6cf50ec8..69ee8b0c295 100644
--- a/cmake/mind_expression.cmake
+++ b/cmake/mind_expression.cmake
@@ -89,7 +89,6 @@ if(ENABLE_MINDDATA)
     include(${CMAKE_SOURCE_DIR}/cmake/external_libs/tinyxml2.cmake)
     include(${CMAKE_SOURCE_DIR}/cmake/external_libs/cppjieba.cmake)
     include(${CMAKE_SOURCE_DIR}/cmake/external_libs/sentencepiece.cmake)
-    include(${CMAKE_SOURCE_DIR}/cmake/external_libs/ffmpeg.cmake)
 endif()
 
 if(ENABLE_MINDDATA)
diff --git a/cmake/options.cmake b/cmake/options.cmake
index c4bd42b3223..59d5861c5ed 100644
--- a/cmake/options.cmake
+++ b/cmake/options.cmake
@@ -25,6 +25,7 @@ option(ENABLE_ACL "enable acl" OFF)
 option(ENABLE_GLIBCXX "enable_glibcxx" OFF)
 option(MODE_ASCEND_ALL "supports all ascend platform" OFF)
 option(MODE_ASCEND_ACL "supports ascend acl mode only" OFF)
+option(ENABLE_SYM_FILE "enable sym file" OFF)
 
 if(NOT ENABLE_D AND NOT ENABLE_TESTCASES AND NOT ENABLE_ACL AND NOT ENABLE_GE)
     set(ENABLE_GLIBCXX ON)
diff --git a/cmake/package.cmake b/cmake/package.cmake
index 69b8ecbcd2a..506f5ee86dc 100644
--- a/cmake/package.cmake
+++ b/cmake/package.cmake
@@ -12,6 +12,8 @@ set(CPACK_TEMPORARY_PACKAGE_FILE_NAME ${BUILD_PATH}/package/mindspore)
 set(CPACK_TEMPORARY_INSTALL_DIRECTORY ${BUILD_PATH}/package/mindspore)
 set(CPACK_PACK_ROOT_DIR ${BUILD_PATH}/package/)
 set(CPACK_CMAKE_SOURCE_DIR ${CMAKE_SOURCE_DIR})
+set(CPACK_ENABLE_SYM_FILE ${ENABLE_SYM_FILE})
+set(CPACK_CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE})
 if(ENABLE_GE)
     set(CPACK_MS_BACKEND "ge")
     set(CPACK_MS_TARGET "ascend or cpu")
@@ -125,17 +127,6 @@ if(ENABLE_MINDDATA)
       DESTINATION ${INSTALL_LIB_DIR} RENAME libicudata.so.67 COMPONENT mindspore)
     install(FILES ${icu4c_LIBPATH}/libicui18n.so.67.1
       DESTINATION ${INSTALL_LIB_DIR} RENAME libicui18n.so.67 COMPONENT mindspore)
-
-    install(FILES ${ffmpeg_LIBPATH}/libavcodec.so.58.91.100
-            DESTINATION ${INSTALL_LIB_DIR} RENAME libavcodec.so.58 COMPONENT mindspore)
-    install(FILES ${ffmpeg_LIBPATH}/libavformat.so.58.45.100
-            DESTINATION ${INSTALL_LIB_DIR} RENAME libavformat.so.58 COMPONENT mindspore)
-    install(FILES ${ffmpeg_LIBPATH}/libavutil.so.56.51.100
-            DESTINATION ${INSTALL_LIB_DIR} RENAME libavutil.so.56 COMPONENT mindspore)
-    install(FILES ${ffmpeg_LIBPATH}/libswresample.so.3.7.100
-            DESTINATION ${INSTALL_LIB_DIR} RENAME libswresample.so.3 COMPONENT mindspore)
-    install(FILES ${ffmpeg_LIBPATH}/libswscale.so.5.7.100
-            DESTINATION ${INSTALL_LIB_DIR} RENAME libswscale.so.5 COMPONENT mindspore)
 endif()
 
 if(ENABLE_CPU)
diff --git a/cmake/package_script.cmake b/cmake/package_script.cmake
index edef651b414..bdfcd13314d 100644
--- a/cmake/package_script.cmake
+++ b/cmake/package_script.cmake
@@ -77,6 +77,48 @@ set(ENV{BACKEND_TARGET} ${CPACK_MS_TARGET})
 set(ENV{MS_PACKAGE_NAME} ${CPACK_MS_PACKAGE_NAME})
 set(ENV{COMMIT_ID} ${GIT_COMMIT_ID})
 
+file(GLOB DEBUG_SYM
+    ${MS_PACK_ROOT_DIR}/mindspore/*.so
+    ${MS_PACK_ROOT_DIR}/mindspore/lib/*.so
+)
+
+file(GLOB DEBUG_STRIP_SYM
+    ${MS_PACK_ROOT_DIR}/mindspore/*.so
+    ${MS_PACK_ROOT_DIR}/mindspore/lib/*.so*
+)
+
+set(CMAKE_OBJCOPY $ENV{CROSS_COMPILE}objcopy)
+set(CMAKE_STRIP $ENV{CROSS_COMPILE}strip)
+
+if(CPACK_ENABLE_SYM_FILE)
+    foreach(schema ${DEBUG_SYM})
+        execute_process(
+            COMMAND ${CMAKE_OBJCOPY} "--only-keep-debug" ${schema} ${schema}.sym
+            WORKING_DIRECTORY ${MS_PACK_ROOT_DIR}
+    )
+    endforeach()
+endif()
+
+if("${CPACK_CMAKE_BUILD_TYPE}" STREQUAL "Release")
+    foreach(schema ${DEBUG_STRIP_SYM})
+    execute_process(
+        COMMAND ${CMAKE_STRIP} ${schema}
+        WORKING_DIRECTORY ${MS_PACK_ROOT_DIR}
+    )
+    endforeach()
+endif()
+
+file(GLOB DEBUG_SYM_FILE
+    ${MS_PACK_ROOT_DIR}/mindspore/*.sym
+    ${MS_PACK_ROOT_DIR}/mindspore/lib/*.sym
+)
+
+if(CPACK_ENABLE_SYM_FILE)
+    file(MAKE_DIRECTORY ${MS_ROOT_DIR}/debug_info)
+    file(COPY ${DEBUG_SYM_FILE} DESTINATION ${MS_ROOT_DIR}/debug_info/)
+    file(REMOVE_RECURSE ${DEBUG_SYM_FILE})
+endif()
+
 execute_process(
     COMMAND ${PYTHON} ${MS_ROOT_DIR}/setup.py "bdist_wheel"
     WORKING_DIRECTORY ${MS_PACK_ROOT_DIR}
@@ -104,3 +146,16 @@ file(COPY ${MS_PACK_ROOT_DIR}/${NEW_FILE_NAME} DESTINATION ${MS_ROOT_DIR}/output
 
 file(SHA256 ${MS_ROOT_DIR}/output/${NEW_FILE_NAME} SHA256_VAR)
 file(WRITE ${MS_ROOT_DIR}/output/${NEW_FILE_NAME}.sha256 ${SHA256_VAR} " " ${NEW_FILE_NAME})
+set(CMAKE_TAR $ENV{CROSS_COMPILE}tar)
+if(CPACK_ENABLE_SYM_FILE)
+    file(MAKE_DIRECTORY ${MS_ROOT_DIR}/output/${PACKAGE_NAME}-${VERSION}-${PY_TAGS}-${PLATFORM_TAG})
+    file(COPY ${MS_ROOT_DIR}/debug_info/ DESTINATION
+        ${MS_ROOT_DIR}/output/${PACKAGE_NAME}-${VERSION}-${PY_TAGS}-${PLATFORM_TAG}/)
+    execute_process(COMMAND
+        ${CMAKE_COMMAND} -E ${CMAKE_TAR} cfv
+        ${MS_ROOT_DIR}/output/${PACKAGE_NAME}-${VERSION}-${PY_TAGS}-${PLATFORM_TAG}.zip
+        ${MS_ROOT_DIR}/output/${PACKAGE_NAME}-${VERSION}-${PY_TAGS}-${PLATFORM_TAG}/ --format=zip
+        WORKING_DIRECTORY ${MS_ROOT_DIR})
+    file(REMOVE_RECURSE ${MS_ROOT_DIR}/debug_info)
+    file(REMOVE_RECURSE ${MS_ROOT_DIR}/output/${PACKAGE_NAME}-${VERSION}-${PY_TAGS}-${PLATFORM_TAG})
+endif()
diff --git a/cmake/package_tar.cmake b/cmake/package_tar.cmake
index 57fc5deba9f..1f04942d82e 100644
--- a/cmake/package_tar.cmake
+++ b/cmake/package_tar.cmake
@@ -91,18 +91,6 @@ if(ENABLE_MINDDATA)
             DESTINATION ${INSTALL_LIB_DIR}
             COMPONENT mindspore
     )
-    file(GLOB_RECURSE FFMPEG_LIB_LIST
-            ${ffmpeg_LIBPATH}/libavcodec*
-            ${ffmpeg_LIBPATH}/libavformat*
-            ${ffmpeg_LIBPATH}/libavutil*
-            ${ffmpeg_LIBPATH}/libswresample*
-            ${ffmpeg_LIBPATH}/libswscale*
-            )
-    install(
-            FILES ${FFMPEG_LIB_LIST}
-            DESTINATION ${INSTALL_LIB_DIR}
-            COMPONENT mindspore
-    )
 endif()
 
 # CPU mode
diff --git a/cmake/package_win.cmake b/cmake/package_win.cmake
index d17cf1236e9..bbed4e0ff07 100644
--- a/cmake/package_win.cmake
+++ b/cmake/package_win.cmake
@@ -42,7 +42,6 @@ set(opencv_LIBPATH ${opencv_LIBPATH}/../bin/)
 set(jpeg_turbo_LIBPATH ${jpeg_turbo_LIBPATH}/../bin/)
 set(sqlite_LIBPATH ${sqlite_LIBPATH}/../bin/)
 set(tinyxml2_LIBPATH ${tinyxml2_LIBPATH}/../bin/)
-set(ffmpeg_LIBPATH ${ffmpeg_LIBPATH}/../bin/)
 
 message("offline debugger does not support windows system temporarily")
 
@@ -98,18 +97,6 @@ if(ENABLE_MINDDATA)
     DESTINATION ${INSTALL_LIB_DIR}
     COMPONENT mindspore
   )
-  file(GLOB_RECURSE FFMPEG_LIB_LIST
-    ${ffmpeg_LIBPATH}/libavcodec*
-    ${ffmpeg_LIBPATH}/libavformat*
-    ${ffmpeg_LIBPATH}/libavutil*
-    ${ffmpeg_LIBPATH}/libswresample*
-    ${ffmpeg_LIBPATH}/libswscale*
-    )
-  install(
-    FILES ${FFMPEG_LIB_LIST}
-    DESTINATION ${INSTALL_LIB_DIR}
-    COMPONENT mindspore
-  )
 endif()
 
 if(ENABLE_CPU)
diff --git a/docker/OWNERS b/docker/OWNERS
index 36d9fc6ffe5..7c5cab59d6b 100644
--- a/docker/OWNERS
+++ b/docker/OWNERS
@@ -1,2 +1,4 @@
+approvers:
+- zhoufeng54
 reviewers:
-- HW_KK
+- HW_KK
\ No newline at end of file
diff --git a/docker/mindspore-cpu/devel/Dockerfile b/docker/mindspore-cpu/devel/Dockerfile
index ec611bc7ea9..148265abbd0 100644
--- a/docker/mindspore-cpu/devel/Dockerfile
+++ b/docker/mindspore-cpu/devel/Dockerfile
@@ -58,8 +58,11 @@ RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
     && make install -j4 \
     && rm -f /usr/local/bin/python \
     && rm -f /usr/local/bin/pip \
+    && rm -f /usr/local/lib/libpython3.7m.so.1.0 \
     && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
     && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
+    && ln -s ${PYTHON_ROOT_PATH}/lib/libpython3.7m.so.1.0 /usr/local/lib/libpython3.7m.so.1.0 \
+    && ldconfig \
     && rm -rf /tmp/cpython-3.7.5 \
     && rm -f /tmp/v3.7.5.tar.gz
 
diff --git a/docker/mindspore-cpu/runtime/Dockerfile b/docker/mindspore-cpu/runtime/Dockerfile
index b84ac946152..ad61f9b3bec 100644
--- a/docker/mindspore-cpu/runtime/Dockerfile
+++ b/docker/mindspore-cpu/runtime/Dockerfile
@@ -51,13 +51,16 @@ RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
     && tar -xvf v3.7.5.tar.gz \
     && cd /tmp/cpython-3.7.5 \
     && mkdir -p ${PYTHON_ROOT_PATH} \
-    && ./configure --prefix=${PYTHON_ROOT_PATH} \
+    && ./configure --prefix=${PYTHON_ROOT_PATH} --enable-shared \
     && make -j4 \
     && make install -j4 \
     && rm -f /usr/local/bin/python \
     && rm -f /usr/local/bin/pip \
+    && rm -f /usr/local/lib/libpython3.7m.so.1.0 \
     && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
     && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
+    && ln -s ${PYTHON_ROOT_PATH}/lib/libpython3.7m.so.1.0 /usr/local/lib/libpython3.7m.so.1.0 \
+    && ldconfig \
     && rm -rf /tmp/cpython-3.7.5 \
     && rm -f /tmp/v3.7.5.tar.gz
 
diff --git a/docker/mindspore-gpu/devel/Dockerfile b/docker/mindspore-gpu/devel/Dockerfile
index f8f4bf7ffa0..9983f3ad8a9 100644
--- a/docker/mindspore-gpu/devel/Dockerfile
+++ b/docker/mindspore-gpu/devel/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
+FROM nvidia/cuda:11.1-cudnn8-devel-ubuntu18.04
 
 MAINTAINER leonwanghui <leon.wanghui@huawei.com>
 
@@ -43,7 +43,7 @@ RUN DEBIAN_FRONTEND=noninteractive apt install -y \
     libnuma-dev
 
 # Configure cuDNN (v7.6.5)
-RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5 /usr/local/cuda/lib64/libcudnn.so
+RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.8.0.5 /usr/local/cuda/lib64/libcudnn.so
 
 # Set bash
 RUN echo "dash dash/sh boolean false" | debconf-set-selections
@@ -62,8 +62,11 @@ RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
     && make install -j4 \
     && rm -f /usr/local/bin/python \
     && rm -f /usr/local/bin/pip \
+    && rm -f /usr/local/lib/libpython3.7m.so.1.0 \
     && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
     && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
+    && ln -s ${PYTHON_ROOT_PATH}/lib/libpython3.7m.so.1.0 /usr/local/lib/libpython3.7m.so.1.0 \
+    && ldconfig \
     && rm -rf /tmp/cpython-3.7.5 \
     && rm -f /tmp/v3.7.5.tar.gz
 
diff --git a/docker/mindspore-gpu/runtime/Dockerfile b/docker/mindspore-gpu/runtime/Dockerfile
index 9ff9b71a246..5a2ed3cdbe1 100644
--- a/docker/mindspore-gpu/runtime/Dockerfile
+++ b/docker/mindspore-gpu/runtime/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
+FROM nvidia/cuda:11.1-cudnn8-devel-ubuntu18.04
 
 MAINTAINER leonwanghui <leon.wanghui@huawei.com>
 
@@ -53,13 +53,16 @@ RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \
     && tar -xvf v3.7.5.tar.gz \
     && cd /tmp/cpython-3.7.5 \
     && mkdir -p ${PYTHON_ROOT_PATH} \
-    && ./configure --prefix=${PYTHON_ROOT_PATH} \
+    && ./configure --prefix=${PYTHON_ROOT_PATH} --enable-shared \
     && make -j4 \
     && make install -j4 \
     && rm -f /usr/local/bin/python \
     && rm -f /usr/local/bin/pip \
+    && rm -f /usr/local/lib/libpython3.7m.so.1.0 \
     && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \
     && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \
+    && ln -s ${PYTHON_ROOT_PATH}/lib/libpython3.7m.so.1.0 /usr/local/lib/libpython3.7m.so.1.0 \
+    && ldconfig \
     && rm -rf /tmp/cpython-3.7.5 \
     && rm -f /tmp/v3.7.5.tar.gz
 
diff --git a/include/api/context.h b/include/api/context.h
index ec02b93598c..d1b525ef713 100644
--- a/include/api/context.h
+++ b/include/api/context.h
@@ -38,12 +38,19 @@ class Allocator;
 class Delegate;
 class DeviceInfoContext;
 
+/// \brief Context is used to store environment variables during execution.
 class MS_API Context {
  public:
   Context();
   ~Context() = default;
 
+  /// \brief Set the number of threads at runtime. This option is only valid for MindSpore Lite.
+  ///
+  /// \param[in] thread_num the number of threads at runtime.
   void SetThreadNum(int32_t thread_num);
+  /// \brief Get the current thread number setting.
+  ///
+  /// \return The current thread number setting.
   int32_t GetThreadNum() const;
 
   /// \brief Set the thread affinity to CPU cores.
@@ -60,6 +67,10 @@ class MS_API Context {
   void SetDelegate(const std::shared_ptr<Delegate> &delegate);
   std::shared_ptr<Delegate> GetDelegate() const;
 
+  /// \brief Get a mutable reference of DeviceInfoContext vector in this context. Only MindSpore Lite supports
+  /// heterogeneous scenarios with multiple members in the vector.
+  ///
+  /// \return Mutable reference of DeviceInfoContext vector in this context.
   std::vector<std::shared_ptr<DeviceInfoContext>> &MutableDeviceInfo();
 
  private:
@@ -67,14 +78,24 @@ class MS_API Context {
   std::shared_ptr<Data> data_;
 };
 
+/// \brief DeviceInfoContext defines different device contexts.
 class MS_API DeviceInfoContext : public std::enable_shared_from_this<DeviceInfoContext> {
  public:
   struct Data;
 
   DeviceInfoContext();
   virtual ~DeviceInfoContext() = default;
+
+  /// \brief Get the type of this DeviceInfoContext.
+  ///
+  /// \return Type of this DeviceInfoContext.
   virtual enum DeviceType GetDeviceType() const = 0;
 
+  /// \brief A similar function to RTTI is provided when the -fno-rtti compilation option is turned on, which converts
+  /// DeviceInfoContext to a shared pointer of type T, and returns nullptr if the conversion fails.
+  ///
+  /// \param T Type
+  /// \return A pointer of type T after conversion. If the conversion fails, it will be nullptr.
   template <class T>
   std::shared_ptr<T> Cast() {
     static_assert(std::is_base_of<DeviceInfoContext, T>::value, "Wrong cast type.");
@@ -98,27 +119,60 @@ class MS_API DeviceInfoContext : public std::enable_shared_from_this<DeviceInfoC
   std::shared_ptr<Data> data_;
 };
 
+/// \brief Derived from DeviceInfoContext, The configuration of the model running on the CPU. This option is only valid
+/// for MindSpore Lite.
 class MS_API CPUDeviceInfo : public DeviceInfoContext {
  public:
+  /// \brief Get the type of this DeviceInfoContext.
+  ///
+  /// \return Type of this DeviceInfoContext.
   enum DeviceType GetDeviceType() const override { return DeviceType::kCPU; };
 
+  /// \brief Set enables to perform the float16 inference
+  ///
+  /// \param[in] is_fp16 Enable float16 inference or not.
   void SetEnableFP16(bool is_fp16);
+  /// \brief Get enables to perform the float16 inference
+  ///
+  /// \return Whether enable float16 inference.
   bool GetEnableFP16() const;
 };
 
+/// \brief Derived from DeviceInfoContext, The configuration of the model running on the NPU. This option is only valid
+/// for MindSpore Lite.
 class MS_API KirinNPUDeviceInfo : public DeviceInfoContext {
  public:
+  /// \brief Get the type of this DeviceInfoContext.
+  ///
+  /// \return Type of this DeviceInfoContext.
   enum DeviceType GetDeviceType() const override { return DeviceType::kKirinNPU; };
 
+  /// \brief Set the NPU frequency.
+  ///
+  /// \param[in] frequency Can be set to 1 (low power consumption), 2 (balanced), 3 (high performance), 4 (extreme
+  /// performance), default as 3.
   void SetFrequency(int frequency);
+  /// \brief Get the NPU frequency.
+  ///
+  /// \return NPU frequency
   int GetFrequency() const;
 };
 
+/// \brief Derived from DeviceInfoContext, The configuration of the model running on the GPU.
 class MS_API GPUDeviceInfo : public DeviceInfoContext {
  public:
+  /// \brief Get the type of this DeviceInfoContext.
+  ///
+  /// \return Type of this DeviceInfoContext.
   enum DeviceType GetDeviceType() const override { return DeviceType::kGPU; };
 
+  /// \brief Set device id.
+  ///
+  /// \param[in] device_id The device id.
   void SetDeviceID(uint32_t device_id);
+  /// \brief Get the device id.
+  ///
+  /// \return The device id.
   uint32_t GetDeviceID() const;
 
   void SetGpuTrtInferMode(bool gpu_trt_infer_mode);
@@ -127,8 +181,15 @@ class MS_API GPUDeviceInfo : public DeviceInfoContext {
   inline void SetPrecisionMode(const std::string &precison_mode);
   inline std::string GetPrecisionMode() const;
 
+  /// \brief Set enables to perform the float16 inference
+  ///
+  /// \param[in] is_fp16 Enable float16 inference or not.
   void SetEnableFP16(bool is_fp16);
+  /// \brief Get enables to perform the float16 inference
+  ///
+  /// \return Whether enable float16 inference.
   bool GetEnableFP16() const;
+
  private:
   void SetPrecisionMode(const std::vector<char> &precision_mode);
   std::vector<char> GetPrecisionModeChar() const;
@@ -139,52 +200,113 @@ void GPUDeviceInfo::SetPrecisionMode(const std::string &precision_mode) {
 }
 std::string GPUDeviceInfo::GetPrecisionMode() const { return CharToString(GetPrecisionModeChar()); }
 
+/// \brief Derived from DeviceInfoContext, The configuration of the model running on the Ascend910. This option is
+/// invalid for MindSpore Lite.
 class MS_API Ascend910DeviceInfo : public DeviceInfoContext {
  public:
+  /// \brief Get the type of this DeviceInfoContext.
+  ///
+  /// \return Type of this DeviceInfoContext.
   enum DeviceType GetDeviceType() const override { return DeviceType::kAscend910; };
 
+  /// \brief Set device id.
+  ///
+  /// \param[in] device_id The device id.
   void SetDeviceID(uint32_t device_id);
+  /// \brief Get the device id.
+  ///
+  /// \return The device id.
   uint32_t GetDeviceID() const;
 };
 
+/// \brief Derived from DeviceInfoContext, The configuration of the model running on the Ascend310. This option is
+/// invalid for MindSpore Lite.
 class MS_API Ascend310DeviceInfo : public DeviceInfoContext {
  public:
+  /// \brief Get the type of this DeviceInfoContext.
+  ///
+  /// \return Type of this DeviceInfoContext.
   enum DeviceType GetDeviceType() const override { return DeviceType::kAscend310; };
 
+  /// \brief Set device id.
+  ///
+  /// \param[in] device_id The device id.
   void SetDeviceID(uint32_t device_id);
+  /// \brief Get the device id.
+  ///
+  /// \return The device id.
   uint32_t GetDeviceID() const;
 
   inline void SetDumpConfigPath(const std::string &cfg_path);
   inline std::string GetDumpConfigPath() const;
 
-  // aipp config file
+  /// \brief Set AIPP configuration file path.
+  ///
+  /// \param[in] cfg_path AIPP configuration file path.
   inline void SetInsertOpConfigPath(const std::string &cfg_path);
+  /// \brief Get AIPP configuration file path.
+  ///
+  /// \return AIPP configuration file path.
   inline std::string GetInsertOpConfigPath() const;
 
-  // nchw or nhwc
+  /// \brief Set format of model inputs.
+  ///
+  /// \param[in] format Optional "NCHW", "NHWC", etc.
   inline void SetInputFormat(const std::string &format);
+  /// \brief Get format of model inputs.
+  ///
+  /// \return The format of model inputs.
   inline std::string GetInputFormat() const;
 
-  // Mandatory while dynamic batch: e.g. "input_op_name1: 1,2,3,4;input_op_name2: 4,3,2,1"
+  /// \brief Set shape of model inputs.
+  ///
+  /// \param[in] shape e.g. "input_op_name1: 1,2,3,4;input_op_name2: 4,3,2,1".
   inline void SetInputShape(const std::string &shape);
+  /// \brief Get shape of model inputs.
+  ///
+  /// \return The shape of model inputs.
   inline std::string GetInputShape() const;
 
+  /// \brief Set shape of model inputs.
+  ///
+  /// \param[in] shape e.g. {{1, {1,2,3,4}}, {2, {4,3,2,1}}} means the first input shape 1,2,3,4 and the second input
+  /// shape 4,3,2,1.
   void SetInputShapeMap(const std::map<int, std::vector<int>> &shape);
+  /// \brief Get shape of model inputs.
+  ///
+  /// \return The shape of model inputs.
   std::map<int, std::vector<int>> GetInputShapeMap() const;
 
   void SetDynamicBatchSize(const std::vector<size_t> &dynamic_batch_size);
   inline std::string GetDynamicBatchSize() const;
 
-  // FP32, UINT8 or FP16, default as FP32
+  /// \brief Set type of model outputs.
+  ///
+  /// \param[in] output_type FP32, UINT8 or FP16, default as FP32.
   void SetOutputType(enum DataType output_type);
+  /// \brief Get type of model outputs.
+  ///
+  /// \return The set type of model outputs.
   enum DataType GetOutputType() const;
 
-  // "force_fp16", "allow_fp32_to_fp16", "must_keep_origin_dtype" or "allow_mix_precision", default as "force_fp16"
+  /// \brief Set precision mode of model.
+  ///
+  /// \param[in] precision_mode Optional "force_fp16", "allow_fp32_to_fp16", "must_keep_origin_dtype" and
+  /// "allow_mix_precision", "force_fp16" is set as default
   inline void SetPrecisionMode(const std::string &precision_mode);
+  /// \brief Get precision mode of model.
+  ///
+  /// \return The set type of model outputs
   inline std::string GetPrecisionMode() const;
 
-  // Optional "high_performance" and "high_precision", "high_performance" is set as default
+  /// \brief Set op select implementation mode.
+  ///
+  /// \param[in] op_select_impl_mode Optional "high_performance" and "high_precision", "high_performance" is set as
+  /// default.
   inline void SetOpSelectImplMode(const std::string &op_select_impl_mode);
+  /// \brief Get op select implementation mode.
+  ///
+  /// \return The set op select implementation mode.
   inline std::string GetOpSelectImplMode() const;
 
   inline void SetFusionSwitchConfigPath(const std::string &cfg_path);
diff --git a/include/api/model.h b/include/api/model.h
index 9c0b434f0ab..53dfdb0d51d 100644
--- a/include/api/model.h
+++ b/include/api/model.h
@@ -37,32 +37,75 @@ class Metrics;
 namespace dataset {
 class Dataset;
 }  // namespace dataset
-
+/// \brief The Model class is used to define a MindSpore model, facilitating computational graph management.
 class MS_API Model {
  public:
   Model();
   ~Model();
   Model(const Model &) = delete;
   void operator=(const Model &) = delete;
-
+  /// \brief Builds a model so that it can run on a device.
+  ///
+  /// \param[in] graph GraphCell is a derivative of Cell. Cell is not available currently. GraphCell can be constructed
+  /// from Graph, for example, model.Build(GraphCell(graph), context).
+  /// \param[in] model_context A context used to store options during execution.
+  /// \param[in] train_cfg A config used by training.
+  ///
+  /// \return Status.
   Status Build(GraphCell graph, const std::shared_ptr<Context> &model_context = nullptr,
                const std::shared_ptr<TrainCfg> &train_cfg = nullptr);
+
+  /// \brief Resizes the shapes of inputs.
+  ///
+  /// \param[in] inputs A vector that includes all input tensors in order.
+  /// \param[in] dims Defines the new shapes of inputs, should be consistent with inputs.
+  ///
+  /// \return Status.
   Status Resize(const std::vector<MSTensor> &inputs, const std::vector<std::vector<int64_t>> &dims);
 
+  /// \brief Inference model.
+  ///
+  /// \param[in] inputs A vector where model inputs are arranged in sequence.
+  /// \param[out] outputs Which is a pointer to a vector. The model outputs are filled in the container in sequence.
+  /// \param[in] before CallBack before predict.
+  /// \param[in] after CallBack after predict.
+  ///
+  /// \return Status.
   Status Predict(const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs,
                  const MSKernelCallBack &before = nullptr, const MSKernelCallBack &after = nullptr);
 
+  /// \brief Obtains all input tensors of the model.
+  ///
+  /// \return The vector that includes all input tensors.
   std::vector<MSTensor> GetInputs();
+  /// \brief Obtains the input tensor of the model by name.
+  ///
+  /// \return The input tensor with the given name, if the name is not found, an invalid tensor is returned.
   inline MSTensor GetInputByTensorName(const std::string &tensor_name);
 
   Status InitMetrics(std::vector<Metrics *> metrics);
   std::vector<Metrics *> GetMetrics();
 
+  /// \brief Obtains all output tensors of the model.
+  ///
+  /// \return The vector that includes all output tensors.
   std::vector<MSTensor> GetOutputs();
+  /// \brief Obtains names of all output tensors of the model.
+  ///
+  /// \return A vector that includes names of all output tensors.
   inline std::vector<std::string> GetOutputTensorNames();
+  /// \brief Obtains the output tensor of the model by name.
+  ///
+  /// \return The output tensor with the given name, if the name is not found, an invalid tensor is returned.
   inline MSTensor GetOutputByTensorName(const std::string &tensor_name);
   inline std::vector<MSTensor> GetOutputsByNodeName(const std::string &tensor_name);
 
+  /// \brief Inference model.
+  ///
+  /// \param[in] device_type Device type，options are kGPU, kAscend910, etc.
+  /// \param[in] model_type The type of model file, options are ModelType::kMindIR, ModelType::kOM.
+  ///
+  /// \return Is supported or not.
   static bool CheckModelSupport(enum DeviceType device_type, ModelType model_type);
 
   Status SetTrainMode(bool train);
diff --git a/include/api/serialization.h b/include/api/serialization.h
index c56e67fc2e9..dcb0a4762ae 100644
--- a/include/api/serialization.h
+++ b/include/api/serialization.h
@@ -27,13 +27,43 @@
 #include "include/api/dual_abi_helper.h"
 
 namespace mindspore {
-
+/// \brief The Serialization class is used to summarize methods for reading and writing model files.
 class MS_API Serialization {
  public:
+  /// \brief Loads a model file from memory buffer.
+  ///
+  /// \param[in] model_data A buffer filled by model file.
+  /// \param[in] data_size The size of the buffer.
+  /// \param[in] model_type The Type of model file, options are ModelType::kMindIR, ModelType::kOM.
+  /// \param[out] graph The output parameter, an object saves graph data.
+  /// \param[in] dec_key The decryption key, key length is 16, 24, or 32.
+  /// \param[in] dec_mode The decryption mode, optional options are AES-GCM, AES-CBC.
+  ///
+  /// \return Status.
   inline static Status Load(const void *model_data, size_t data_size, ModelType model_type, Graph *graph,
                             const Key &dec_key = {}, const std::string &dec_mode = kDecModeAesGcm);
+
+  /// \brief Loads a model file from path, is not supported on MindSpore Lite.
+  ///
+  /// \param[in] file The path of model file.
+  /// \param[in] model_type The Type of model file, options are ModelType::kMindIR, ModelType::kOM.
+  /// \param[out] graph The output parameter, an object saves graph data.
+  /// \param[in] dec_key The decryption key, key length is 16, 24, or 32.
+  /// \param[in] dec_mode The decryption mode, optional options are AES-GCM, AES-CBC.
+  ///
+  /// \return Status.
   inline static Status Load(const std::string &file, ModelType model_type, Graph *graph, const Key &dec_key = {},
                             const std::string &dec_mode = kDecModeAesGcm);
+
+  /// \brief Load multiple models from multiple files, MindSpore Lite does not provide this feature.
+  ///
+  /// \param[in] files The path of model files.
+  /// \param[in] model_type The Type of model file, options are ModelType::kMindIR, ModelType::kOM.
+  /// \param[out] graph The output parameter, an object saves graph data.
+  /// \param[in] dec_key The decryption key, key length is 16, 24, or 32.
+  /// \param[in] dec_mode The decryption mode, optional options are AES-GCM, AES-CBC.
+  ///
+  /// \return Status.
   inline static Status Load(const std::vector<std::string> &files, ModelType model_type, std::vector<Graph> *graphs,
                             const Key &dec_key = {}, const std::string &dec_mode = kDecModeAesGcm);
   static Status SetParameters(const std::map<std::string, Buffer> &parameters, Model *model);
diff --git a/include/api/types.h b/include/api/types.h
index 383ba5cf9ac..77f200bda5c 100644
--- a/include/api/types.h
+++ b/include/api/types.h
@@ -25,11 +25,17 @@
 #include "include/api/dual_abi_helper.h"
 #include "include/api/format.h"
 
+#ifndef MS_API
 #ifdef _WIN32
+#ifdef BUILDING_DLL
 #define MS_API __declspec(dllexport)
 #else
+#define MS_API __declspec(dllimport)
+#endif
+#else
 #define MS_API __attribute__((visibility("default")))
 #endif
+#endif
 
 namespace mindspore {
 enum ModelType : uint32_t {
@@ -64,18 +70,64 @@ struct QuantParam {
 };
 
 class Allocator;
+/// \brief The MSTensor class defines a tensor in MindSpore.
 class MS_API MSTensor {
  public:
   class Impl;
-
+  /// \brief Creates a MSTensor object, whose data need to be copied before accessed by Model, must be used in pairs
+  /// with DestroyTensorPtr.
+  ///
+  /// \param[in] name The name of the MSTensor.
+  /// \param[in] type The data type of the MSTensor.
+  /// \param[in] shape The shape of the MSTensor.
+  /// \param[in] data The data pointer that points to allocated memory.
+  /// \param[in] data_len The length of the memory, in bytes.
+  ///
+  /// \return A pointer of MSTensor.
   static inline MSTensor *CreateTensor(const std::string &name, DataType type, const std::vector<int64_t> &shape,
                                        const void *data, size_t data_len) noexcept;
+  /// \brief Creates a MSTensor object, whose data can be directly accessed by Model, must be used in pairs with
+  /// DestroyTensorPtr.
+  ///
+  /// \param[in] name The name of the MSTensor.
+  /// \param[in] type The data type of the MSTensor.
+  /// \param[in] shape The shape of the MSTensor.
+  /// \param[in] data The data pointer that points to allocated memory.
+  /// \param[in] data_len The length of the memory, in bytes.
+  ///
+  /// \return A pointer of MSTensor.
   static inline MSTensor *CreateRefTensor(const std::string &name, DataType type, const std::vector<int64_t> &shape,
                                           const void *data, size_t data_len) noexcept;
+  /// \brief Creates a MSTensor object, whose device data can be directly accessed by Model, must be used in pairs with
+  /// DestroyTensorPtr.
+  ///
+  /// \param[in] name The name of the MSTensor.
+  /// \param[in] type The data type of the MSTensor.
+  /// \param[in] shape The shape of the MSTensor.
+  /// \param[in] data The data pointer that points to device memory.
+  /// \param[in] data_len The length of the memory, in bytes.
+  ///
+  /// \return A pointer of MSTensor.
   static inline MSTensor *CreateDevTensor(const std::string &name, DataType type, const std::vector<int64_t> &shape,
                                           const void *data, size_t data_len) noexcept;
+  /// \brief Create a string type MSTensor object whose data can be accessed by Model only after being copied, must be
+  /// used in pair with DestroyTensorPtr.
+  ///
+  /// \param[in] name The name of the MSTensor.
+  /// \param[in] str A vector container containing several strings.
+  ///
+  /// \return A pointer of MSTensor.
   static inline MSTensor *StringsToTensor(const std::string &name, const std::vector<std::string> &str);
+  /// \brief Parse the string type MSTensor object into strings.
+  ///
+  /// \param[in] tensor A MSTensor object.
+  ///
+  /// \return A vector container containing several strings.
   static inline std::vector<std::string> TensorToStrings(const MSTensor &tensor);
+  /// \brief Destroy an object created by Clone, StringsToTensor, CreateRefTensor, CreateDevTensor or CreateTensor. Do
+  /// not use it to destroy MSTensor from other sources.
+  ///
+  /// \param[in] tensor A MSTensor object.
   static void DestroyTensorPtr(MSTensor *tensor) noexcept;
 
   MSTensor();
@@ -85,19 +137,51 @@ class MS_API MSTensor {
   explicit MSTensor(std::nullptr_t);
   ~MSTensor();
 
+  /// \brief Obtains the name of the MSTensor.
+  ///
+  /// \return The name of the MSTensor.
   inline std::string Name() const;
+  /// \brief Obtains the data type of the MSTensor.
+  ///
+  /// \return The data type of the MSTensor.
   enum DataType DataType() const;
+  /// \brief Obtains the shape of the MSTensor.
+  ///
+  /// \return The shape of the MSTensor.
   const std::vector<int64_t> &Shape() const;
+  /// \brief Obtains the number of elements of the MSTensor.
+  ///
+  /// \return The number of elements of the MSTensor.
   int64_t ElementNum() const;
 
+  /// \brief Obtains a shared pointer to the copy of data of the MSTensor. The data can be read on host.
+  ///
+  /// \return A shared pointer to the copy of data of the MSTensor.
   std::shared_ptr<const void> Data() const;
+  /// \brief Obtains the pointer to the data of the MSTensor. If the MSTensor is a device tensor, the data cannot be
+  /// accessed directly on host.
+  ///
+  /// \return A pointer to the data of the MSTensor.
   void *MutableData();
+  /// \brief Obtains the length of the data of the MSTensor, in bytes.
+  ///
+  /// \return The length of the data of the MSTensor, in bytes.
   size_t DataSize() const;
-
+  /// \brief Gets the boolean value that indicates whether the memory of MSTensor is on device.
+  ///
+  /// \return The boolean value that indicates whether the memory of MSTensor is on device.
   bool IsDevice() const;
-
+  /// \brief Gets a deep copy of the MSTensor, must be used in pair with DestroyTensorPtr.
+  ///
+  /// \return A pointer points to a deep copy of the MSTensor.
   MSTensor *Clone() const;
+  /// \brief Gets the boolean value that indicates whether the MSTensor is valid.
+  ///
+  /// \return The boolean value that indicates whether the MSTensor is valid.
   bool operator==(std::nullptr_t) const;
+  /// \brief Gets the boolean value that indicates whether the MSTensor is valid.
+  ///
+  /// \return The boolean value that indicates whether the MSTensor is valid.
   bool operator!=(std::nullptr_t) const;
   bool operator==(const MSTensor &tensor) const;
 
diff --git a/mindspore/_checkparam.py b/mindspore/_checkparam.py
index 978256756a1..58cec1666a4 100644
--- a/mindspore/_checkparam.py
+++ b/mindspore/_checkparam.py
@@ -23,6 +23,7 @@ from itertools import repeat, zip_longest
 from collections import deque
 from collections.abc import Iterable
 import numpy as np
+from mindspore import context
 from mindspore import log as logger
 from mindspore.common import dtype as mstype
 from mindspore._c_expression import Tensor as Tensor_
@@ -846,6 +847,10 @@ class Validator:
         """Returns an empty Tensor."""
         return Tensor_(dtype, shape)
 
+    @staticmethod
+    def check_type_support(dtype, device, supported_dtypes):
+        return dtype in supported_dtypes or not context.get_context('device_target') == device
+
 
 def check_input_format(input_param):
     """Judge input format."""
diff --git a/mindspore/_extends/graph_kernel/parallel_estimate.py b/mindspore/_extends/graph_kernel/parallel_estimate.py
index a1f7d7a0952..0cf1a954966 100644
--- a/mindspore/_extends/graph_kernel/parallel_estimate.py
+++ b/mindspore/_extends/graph_kernel/parallel_estimate.py
@@ -21,7 +21,7 @@ from . import model
 
 
 def estimate_ops(json_str: str):
-    """Call costmodel to estimate ops."""
+    """Call cost model to estimate ops."""
     try:
         json_obj = json.loads(json_str)
         graph_descs = json_obj["graph_desc"]
@@ -38,7 +38,7 @@ def estimate_ops(json_str: str):
 
 
 def estimate_calulation_amount(json_str: str):
-    """Call costmodel to estimate calculation amount of op."""
+    """Call cost model to estimate calculation amount of op."""
     try:
         graph_desc = json.loads(json_str)
         comp = model.load_composite(graph_desc)
diff --git a/mindspore/_extends/graph_kernel/splitter.py b/mindspore/_extends/graph_kernel/splitter.py
index c622159ac1c..87b7da1260a 100644
--- a/mindspore/_extends/graph_kernel/splitter.py
+++ b/mindspore/_extends/graph_kernel/splitter.py
@@ -24,7 +24,7 @@ from . import utils
 
 
 def split_with_json(json_str, flags_str):
-    """Call costmodel to split GraphKernel"""
+    """Call cost model to split GraphKernel"""
     try:
         graph_desc = json.loads(json_str)
         flags = json.loads(flags_str)
diff --git a/mindspore/_extends/parallel_compile/akg_compiler/akg_process.py b/mindspore/_extends/parallel_compile/akg_compiler/akg_process.py
index c6487c9f17c..d3f0bbf1641 100644
--- a/mindspore/_extends/parallel_compile/akg_compiler/akg_process.py
+++ b/mindspore/_extends/parallel_compile/akg_compiler/akg_process.py
@@ -50,11 +50,6 @@ def _compile_akg_task_gpu(json_strs, attrs):
         if not res:
             raise ValueError("Compile error, args: {}! build attrs: {}".format(json_str, attrs))
 
-    pid_path = os.path.realpath("./cuda_meta_" + str(os.getpid()))
-    if os.path.exists(pid_path):
-        copy_json(pid_path, os.path.realpath("./cuda_meta_" + str(os.getppid())))
-        shutil.rmtree(pid_path)
-
 
 def _compile_akg_task_ascend(json_strs, attrs):
     """
diff --git a/mindspore/_extends/parse/parser.py b/mindspore/_extends/parse/parser.py
index 3af474860cc..e3b0afee226 100644
--- a/mindspore/_extends/parse/parser.py
+++ b/mindspore/_extends/parse/parser.py
@@ -159,12 +159,17 @@ def resolve_symbol(namespace, symbol):
         if getattr(resolve_, "__hash__") is None:
             return resolve_
 
+        # Raise NotImplementedError when parsing the numpy methods, but not the numpy constant.
+        if namespace.name == "numpy" and isinstance(resolve_, (types.FunctionType, types.MethodType, types.ModuleType)):
+            raise NotImplementedError(
+                f"MindSpore does not support to use the numpy methods in the function construct with the graph mode.")
+
         # If need trope the obj
         if resolve_ in convert_object_map:
             resolve_ = convert_object_map.get(resolve_)
             logger.debug("convert resolve = %r", resolve_)
             if resolve_ == NO_IMPLEMENT:
-                raise NotImplementedError(f"Not support for `{symbol}`")
+                raise NotImplementedError(f"Not support for `{symbol}`.")
     except Exception as e:
         if isinstance(e, NotImplementedError):
             raise e
diff --git a/mindspore/_extends/parse/standard_method.py b/mindspore/_extends/parse/standard_method.py
index 40e13001493..efd29dfc760 100644
--- a/mindspore/_extends/parse/standard_method.py
+++ b/mindspore/_extends/parse/standard_method.py
@@ -1312,7 +1312,8 @@ def sum(x, axis=None, dtype=None, keepdims=False, initial=None): # pylint: disab
         >>> print(input_x.sum(axis=1))
         [10. 35.]
     """
-    dtype = x.dtype if dtype is None else dtype
+    input_x = x.astype(mstype.int32) if x.dtype == mstype.bool_ else x
+    dtype = input_x.dtype if dtype is None else dtype
     if not isinstance(keepdims, int):
         const_utils.raise_type_error("integer argument expected")
     if initial is not None and not isinstance(initial, (int, float, bool)):
@@ -1322,14 +1323,14 @@ def sum(x, axis=None, dtype=None, keepdims=False, initial=None): # pylint: disab
     else:
         axis = check_and_canonicalize_axes(axis, x.ndim)
 
-    if x.dtype == mstype.bool_:
-        x = x.astype("int32")
+    if not check_type_support(input_x.dtype, 'GPU', (mstype.float64, mstype.float32, mstype.float16)):
+        input_x = input_x.astype(mstype.float32)
     if 0 in x.shape:
         x = const_utils.make_tensor([0], x.dtype)
     if keepdims:
-        res = _reduce_sum_keepdims(x, axis)
+        res = _reduce_sum_keepdims(input_x, axis)
     else:
-        res = _reduce_sum_default(x, axis)
+        res = _reduce_sum_default(input_x, axis)
     if initial is not None:
         res += initial
     return res.astype(dtype)
@@ -1648,6 +1649,7 @@ get_log2_size = constexpr(validator.get_log2_size)
 check_axis_type = constexpr(validator.check_axis_type)
 check_and_canonicalize_axes = constexpr(validator.check_and_canonicalize_axes)
 empty_compile = constexpr(validator.empty_compile)
+check_type_support = constexpr(validator.check_type_support)
 
 
 def tensor_bool(x):
diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt
index 7027396063c..444d08a5edd 100644
--- a/mindspore/ccsrc/CMakeLists.txt
+++ b/mindspore/ccsrc/CMakeLists.txt
@@ -325,7 +325,7 @@ endif()
 set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
 set_property(SOURCE "pipeline/jit/init.cc" PROPERTY
             COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PIPELINE)
-pybind11_add_module(_c_expression "pipeline/jit/init.cc")
+pybind11_add_module(_c_expression NO_EXTRAS "pipeline/jit/init.cc")
 
 MESSAGE(STATUS "operation system is ${CMAKE_SYSTEM}")
 if(CMAKE_SYSTEM_NAME MATCHES "Linux")
diff --git a/mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt b/mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt
index 5622013fa27..954402e5c9e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt
+++ b/mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt
@@ -35,6 +35,7 @@ if(ENABLE_CPU)
         "cpu/fl/*.cc"
         "cpu/ps/*.cc"
         "cpu/quantum/*.cc"
+        "cpu/pyfunc/*.cc"
     )
 
     if(NOT ENABLE_MPI)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc
index 8b047f153a0..500be4de4ad 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc
@@ -16,6 +16,11 @@
 
 #include "backend/kernel_compiler/akg/akg_kernel_build.h"
 
+#include <stdio.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
 #include <algorithm>
 #include <map>
 #include <memory>
@@ -23,6 +28,7 @@
 #include <unordered_set>
 #include <utility>
 #include <vector>
+#include <iostream>
 #include "nlohmann/json.hpp"
 #include "ir/dtype.h"
 #include "ir/func_graph.h"
@@ -34,9 +40,320 @@
 
 namespace mindspore {
 namespace kernel {
+
+#define INIT_SET_FROM_2D_ARRAY(set_var, list_idx) \
+  std::set<size_t> set_var(kernel_lists_[list_idx], kernel_lists_[list_idx] + kernel_lists_[list_idx][kMaxKernelNum_]);
+
+#define LIST_BEGIN(list_idx) kernel_lists_[list_idx]
+#define LIST_END(list_idx) (kernel_lists_[list_idx] + kernel_lists_[list_idx][kMaxKernelNum_])
+#define RESET_LIST_SIZE(list_idx, val) kernel_lists_[list_idx][kMaxKernelNum_] = val
+
+#define INCREASE_LIST_SIZE(list_idx, val) kernel_lists_[list_idx][kMaxKernelNum_] += val
+
 constexpr int32_t PROCESS_NUM = 16;
 constexpr int32_t TIME_OUT = 300;
 
+bool AkgKernelPool::LockMng::TryLock() {
+  // Try to lock 100 times. Return errno if lock unsuccessfully
+  uint32_t trial = 100;
+
+  int32_t ret = -1;
+  while (trial > 0) {
+    ret = lockf(fd_, F_TLOCK, 0);
+    if (ret == 0 || (errno != EACCES && errno != EAGAIN)) {
+      break;
+    }
+
+    trial--;
+    usleep(5000);
+  }
+
+  if (ret == -1) {
+    MS_LOG(ERROR) << "Failed to acquire the lock, errno:" << strerror(errno) << ".";
+    return false;
+  }
+
+  return true;
+}
+
+void AkgKernelPool::LockMng::Unlock() {
+  auto ret = lockf(fd_, F_ULOCK, 0);
+  if (ret == -1) {
+    MS_LOG(ERROR) << "Failed to release the lock, errno:" << strerror(errno);
+  }
+}
+
+std::string AkgKernelPool::GetCurrentPath() {
+  char cwd[PATH_MAX];
+  char *ret = getcwd(cwd, sizeof(cwd));
+  if (ret == nullptr) {
+    MS_LOG(ERROR) << "Get current work directory failed, errno:" << strerror(errno);
+    return "";
+  }
+
+  char abspath[PATH_MAX];
+  char *res = realpath(cwd, abspath);
+  if (res == nullptr) {
+    MS_LOG(ERROR) << "Change to realpath failed, errno:" << strerror(errno);
+    return "";
+  }
+
+  return std::string(abspath);
+}
+
+void *AkgKernelPool::CreateSharedMem(const std::string &path) {
+  is_creator_ = false;
+
+  auto hash_id = std::hash<std::string>()(path);
+  auto key_id = static_cast<key_t>(hash_id);
+  auto mem_size = sizeof(size_t) * kListNum_ * (kMaxKernelNum_ + 1) + 512;
+
+  {
+    LockMng lock(fd_);
+    if (!lock.locked_) {
+      MS_LOG(ERROR) << "Failed to acquire lock.";
+      return nullptr;
+    }
+
+    // check if the shared memory exists or not.
+    // remove shared memory if exists and the nattach is 0
+    struct shmid_ds buf;
+    auto id = shmget(key_id, mem_size, 0);
+    if (id != -1) {
+      auto ret = shmctl(id, IPC_STAT, &buf);
+      if (ret == -1) {
+        MS_LOG(ERROR) << "Failed to get the info of shared memory, errno:" << strerror(errno);
+        return nullptr;
+      }
+
+      if (buf.shm_nattch == 0) {
+        ret = shmctl(id, IPC_RMID, nullptr);
+        if (ret < 0) {
+          MS_LOG(EXCEPTION) << "Realse shared_mem failed, errno:" << strerror(errno);
+        }
+      }
+    }
+  }
+
+  LockMng lock(fd_);
+  if (!lock.locked_) {
+    MS_LOG(ERROR) << "Failed to acquire lock.";
+    return nullptr;
+  }
+
+  shm_id_ = shmget(key_id, mem_size, IPC_CREAT | IPC_EXCL | 0600);
+  if (shm_id_ == -1) {
+    if (errno == EEXIST) {
+      shm_id_ = shmget(key_id, mem_size, 0);
+    }
+
+    if (shm_id_ == -1) {
+      MS_LOG(ERROR) << "Create shared_mem failed, error no:" << strerror(errno);
+      return nullptr;
+    }
+  } else {
+    is_creator_ = true;
+  }
+
+  auto local_addr = shmat(shm_id_, nullptr, 0);
+  if (local_addr == reinterpret_cast<void *>(-1)) {
+    MS_LOG(ERROR) << "Attach to shared_mem failed, error no:" << strerror(errno);
+    return nullptr;
+  }
+
+  if (is_creator_) {
+    (void)memset(local_addr, 0, mem_size);
+  }
+
+  return local_addr;
+}
+
+int32_t AkgKernelPool::Init(const std::vector<JsonNodePair> &build_args) {
+  auto cp = GetCurrentPath();
+  if (cp.empty()) {
+    return -1;
+  }
+
+  fd_ = open(kKeyName_, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
+  if (fd_ == -1) {
+    MS_LOG(ERROR) << "open file <" << kKeyName_ << "> failed, errno:" << strerror(errno);
+    return -1;
+  }
+
+  auto addr = CreateSharedMem(cp);
+  if (addr == nullptr) {
+    return -1;
+  }
+
+  InitKernelLists(addr);
+
+  auto ret = AddKernels(build_args);
+  if (ret != 0) {
+    MS_LOG(ERROR) << "AkgKernelPool AddKernels failed.";
+    return false;
+  }
+
+  return 0;
+}
+
+AkgKernelPool::~AkgKernelPool() {
+  // Detach shared memory
+  auto ret = shmdt(reinterpret_cast<void *>(kernel_lists_[0]));
+  if (ret < 0) {
+    MS_LOG(EXCEPTION) << "Shared_mem detach failed, errno:" << strerror(errno);
+  }
+
+  // Realse shared_memroy
+  if (is_creator_) {
+    ret = shmctl(shm_id_, IPC_RMID, nullptr);
+    if (ret < 0) {
+      MS_LOG(EXCEPTION) << "Realse shared_mem failed, errno:" << strerror(errno);
+    }
+  }
+
+  // Close key file
+  if (fd_ != -1) {
+    (void)close(fd_);
+  }
+}
+
+int32_t AkgKernelPool::AddKernels(const std::vector<JsonNodePair> &build_args) {
+  LockMng lock(fd_);
+  if (!lock.locked_) {
+    MS_LOG(ERROR) << "Failed to acquire lock.";
+    return -1;
+  }
+
+  INIT_SET_FROM_2D_ARRAY(todo_list, kToDoIdx_);
+  INIT_SET_FROM_2D_ARRAY(doing_list, kDoingIdx_);
+  INIT_SET_FROM_2D_ARRAY(done_list, kDoneIdx_);
+
+  for (const auto &[json_generator, anf_node] : build_args) {
+    MS_EXCEPTION_IF_NULL(anf_node);
+    auto kernel_name = json_generator.kernel_name();
+
+    auto hash_id = std::hash<std::string>()(kernel_name);
+    if (self_kernel_ids_.count(hash_id) != 0) {
+      MS_LOG(ERROR) << "Duplicated hash_id in list.";
+      return -1;
+    }
+
+    self_kernel_ids_.emplace(hash_id);
+  }
+
+  std::set<size_t> diff_from_todo;
+  std::set<size_t> diff_from_doing;
+  std::set<size_t> diff_from_done;
+
+  // add the unique kernel only once, so need to check if it exists in todo_list, doing_list, or done_list
+  std::set_difference(self_kernel_ids_.begin(), self_kernel_ids_.end(), todo_list.begin(), todo_list.end(),
+                      std::inserter(diff_from_todo, diff_from_todo.begin()));
+  std::set_difference(diff_from_todo.begin(), diff_from_todo.end(), doing_list.begin(), doing_list.end(),
+                      std::inserter(diff_from_doing, diff_from_doing.begin()));
+  std::set_difference(diff_from_doing.begin(), diff_from_doing.end(), done_list.begin(), done_list.end(),
+                      std::inserter(diff_from_done, diff_from_done.begin()));
+
+  auto new_kernel_size = diff_from_done.size();
+  if (new_kernel_size + todo_list.size() > static_cast<size_t>(kMaxKernelNum_)) {
+    MS_LOG(ERROR) << "The size of kernels is " << new_kernel_size << ", while the left space of the pool is "
+                  << kMaxKernelNum_ - todo_list.size();
+    return -1;
+  }
+
+  std::copy(diff_from_done.begin(), diff_from_done.end(), LIST_END(kToDoIdx_));
+  INCREASE_LIST_SIZE(kToDoIdx_, new_kernel_size);
+
+  return 0;
+}
+
+int32_t AkgKernelPool::FetchKernels(std::set<size_t> *out) {
+  LockMng lock(fd_);
+  if (!lock.locked_) {
+    MS_LOG(ERROR) << "Failed to acquire lock.";
+    return -1;
+  }
+
+  std::set<size_t> left_in_todo_list;
+
+  // filter out kernels which belongs to other processes
+  auto FilterBySelfList = [&left_in_todo_list, &out, this](size_t id) {
+    if (this->self_kernel_ids_.count(id) != 0) {
+      out->emplace(id);
+    } else {
+      left_in_todo_list.emplace(id);
+    }
+  };
+
+  std::for_each(LIST_BEGIN(kToDoIdx_), LIST_END(kToDoIdx_), FilterBySelfList);
+
+  std::copy(out->begin(), out->end(), LIST_END(kDoingIdx_));
+  INCREASE_LIST_SIZE(kDoingIdx_, out->size());
+
+  std::copy(left_in_todo_list.begin(), left_in_todo_list.end(), LIST_BEGIN(kToDoIdx_));
+  RESET_LIST_SIZE(kToDoIdx_, left_in_todo_list.size());
+
+  return 0;
+}
+
+int32_t AkgKernelPool::UpdateAndWait(const std::set<size_t> &ids) {
+  if (!ids.empty()) {
+    LockMng lock(fd_);
+    if (!lock.locked_) {
+      MS_LOG(ERROR) << "Failed to acquire lock.";
+      return -1;
+    }
+
+    // update the state of finished kernels to `done`
+    std::copy(ids.begin(), ids.end(), LIST_END(kDoneIdx_));
+    INCREASE_LIST_SIZE(kDoneIdx_, ids.size());
+
+    // delete the finished kernels from doing_list
+    std::vector<size_t> left_in_doing_list;
+    INIT_SET_FROM_2D_ARRAY(doing_list, kDoingIdx_);
+    std::set_difference(doing_list.begin(), doing_list.end(), ids.begin(), ids.end(),
+                        std::inserter(left_in_doing_list, left_in_doing_list.begin()));
+
+    std::copy(left_in_doing_list.begin(), left_in_doing_list.end(), LIST_BEGIN(kDoingIdx_));
+    RESET_LIST_SIZE(kDoingIdx_, left_in_doing_list.size());
+  }
+
+  auto ret = Wait();
+  if (ret != 0) {
+    MS_LOG(ERROR) << "AkgKernelPool Wait failed.";
+    return -1;
+  }
+
+  return 0;
+}
+
+int32_t AkgKernelPool::Wait() {
+  // wait until all the kernels which belong to this process finish compiling
+  uint32_t trials = 1000;
+
+  while (trials > 0) {
+    {
+      LockMng lock(fd_);
+      if (!lock.locked_) {
+        MS_LOG(ERROR) << "Failed to acquire lock.";
+        return -1;
+      }
+
+      INIT_SET_FROM_2D_ARRAY(done_list, kDoneIdx_);
+
+      if (std::all_of(self_kernel_ids_.begin(), self_kernel_ids_.end(),
+                      [&done_list](size_t id) { return done_list.count(id) != 0; })) {
+        return 0;
+      }
+    }
+
+    usleep(1000000);
+    trials--;
+  }
+
+  MS_LOG(ERROR) << "Time out while wait kernel compiling";
+  return -1;
+}
+
 std::vector<std::string> AkgKernelBuilder::GetNotCachedKernelJsons(const std::vector<JsonNodePair> &build_args) {
   // Remove cached nodes, gether unique nodes, and collect repeated nodes which need postprecess.
   std::vector<std::string> jsons;
@@ -66,6 +383,31 @@ std::vector<std::string> AkgKernelBuilder::GetNotCachedKernelJsons(const std::ve
   return jsons;
 }
 
+std::vector<JsonNodePair> AkgKernelBuilder::GetNotCachedKernels(const std::vector<JsonNodePair> &build_args) {
+  std::unordered_set<std::string> kernel_name_set;
+  std::vector<JsonNodePair> new_build_args;
+  for (const auto &[json_generator, anf_node] : build_args) {
+    MS_EXCEPTION_IF_NULL(anf_node);
+    auto kernel_name = json_generator.kernel_name();
+
+    auto cached_kernel_pack = AkgSearchCache(kernel_name);
+    if (cached_kernel_pack != nullptr) {
+      MS_LOG(DEBUG) << "Use cached kernel, kernel_name[" << kernel_name << "], fullname_with_scope["
+                    << anf_node->fullname_with_scope() << "].";
+      AkgSetKernelMod(cached_kernel_pack, json_generator, anf_node);
+      continue;
+    }
+
+    if (kernel_name_set.count(kernel_name) != 0) {
+      repeat_nodes_.push_back({json_generator, anf_node});
+      continue;
+    }
+    kernel_name_set.insert(kernel_name);
+    new_build_args.push_back({json_generator, anf_node});
+  }
+  return new_build_args;
+}
+
 bool AkgKernelBuilder::InsertToCache(const std::vector<JsonNodePair> &build_args) {
   for (const auto &[json_generator, anf_node] : build_args) {
     auto kernel_name = json_generator.kernel_name();
@@ -97,32 +439,77 @@ bool AkgKernelBuilder::HandleRepeatNodes() {
   return true;
 }
 
+std::vector<std::string> AkgKernelBuilder::GetKernelJsonsByHashId(const std::vector<JsonNodePair> &build_args,
+                                                                  std::set<size_t> fetched_ids) {
+  std::vector<std::string> jsons;
+  for (const auto &[json_generator, anf_node] : build_args) {
+    MS_EXCEPTION_IF_NULL(anf_node);
+    auto kernel_name = json_generator.kernel_name();
+
+    auto hash_id = std::hash<std::string>()(kernel_name);
+
+    if (fetched_ids.count(hash_id) == 0) {
+      continue;
+    }
+
+    auto kernel_json = json_generator.kernel_json_str();
+    AkgSaveJsonInfo(kernel_name, kernel_json);
+    jsons.push_back(kernel_json);
+  }
+  return jsons;
+}
+
 bool AkgKernelBuilder::AkgOpParallelBuild(const std::vector<JsonNodePair> &build_args) {
   repeat_nodes_.clear();
-  auto jsons = GetNotCachedKernelJsons(build_args);
-  if (jsons.empty()) {
+  auto new_build_args = GetNotCachedKernels(build_args);
+  if (new_build_args.empty()) {
     return true;
   }
 
-  auto client = GetClient();
-  MS_EXCEPTION_IF_NULL(client);
-  if (!client->AkgStart(PROCESS_NUM, TIME_OUT)) {
-    MS_LOG(ERROR) << "Akg start failed.";
+  AkgKernelPool kp;
+  auto ret = kp.Init(new_build_args);
+  if (ret != 0) {
+    MS_LOG(ERROR) << "AkgKernelPool init failed.";
     return false;
   }
-  auto attrs = CollectBuildAttrs();
-  if (!attrs.empty() && !client->AkgSendAttr(attrs)) {
-    MS_LOG(ERROR) << "Akg send attr failed.";
+
+  std::set<size_t> fetched_ids;
+  ret = kp.FetchKernels(&fetched_ids);
+  if (ret != 0) {
+    MS_LOG(ERROR) << "AkgKernelPool FetchKernels failed.";
     return false;
   }
-  if (!client->AkgSendData(jsons)) {
-    MS_LOG(ERROR) << "Akg send data failed.";
-    return false;
-  }
-  if (!client->AkgWait()) {
-    MS_LOG(ERROR) << "Akg compile failed.";
+
+  if (!fetched_ids.empty()) {
+    auto jsons = GetKernelJsonsByHashId(new_build_args, fetched_ids);
+
+    auto client = GetClient();
+    MS_EXCEPTION_IF_NULL(client);
+    if (!client->AkgStart(PROCESS_NUM, TIME_OUT)) {
+      MS_LOG(ERROR) << "Akg start failed.";
+      return false;
+    }
+    auto attrs = CollectBuildAttrs();
+    if (!attrs.empty() && !client->AkgSendAttr(attrs)) {
+      MS_LOG(ERROR) << "Akg send attr failed.";
+      return false;
+    }
+    if (!client->AkgSendData(jsons)) {
+      MS_LOG(ERROR) << "Akg send data failed.";
+      return false;
+    }
+    if (!client->AkgWait()) {
+      MS_LOG(ERROR) << "Akg compile failed.";
+      return false;
+    }
+  }
+
+  ret = kp.UpdateAndWait(fetched_ids);
+  if (ret != 0) {
+    MS_LOG(ERROR) << "AkgKernelPool UpdateAndWait failed.";
     return false;
   }
+
   // All unique done here, cache them and set kernel.
   if (!InsertToCache(build_args)) {
     MS_LOG(ERROR) << "Insert cache failed.";
diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h
index c0012ece6ff..9f9958f1464 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h
@@ -17,10 +17,13 @@
 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_AKG_KERNEL_BUILD_H_
 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_AKG_KERNEL_BUILD_H_
 
+#include <sys/shm.h>
+
 #include <string>
 #include <utility>
 #include <vector>
 #include <map>
+#include <set>
 #include "ir/anf.h"
 #include "backend/kernel_compiler/kernel.h"
 #include "backend/session/kernel_build_client.h"
@@ -45,12 +48,84 @@ class AkgKernelBuilder {
 
  private:
   std::vector<std::string> GetNotCachedKernelJsons(const std::vector<JsonNodePair> &build_args);
+  std::vector<JsonNodePair> GetNotCachedKernels(const std::vector<JsonNodePair> &build_args);
+  std::vector<std::string> GetKernelJsonsByHashId(const std::vector<JsonNodePair> &build_args,
+                                                  std::set<size_t> fetched_ids);
   bool InsertToCache(const std::vector<JsonNodePair> &build_args);
   bool HandleRepeatNodes();
   bool AkgOpParallelBuild(const std::vector<JsonNodePair> &build_args);
   std::vector<JsonNodePair> repeat_nodes_;
   std::string CollectBuildAttrs();
 };
+
+class AkgKernelPool {
+ public:
+  class LockMng {
+   public:
+    explicit LockMng(int32_t fd) {
+      fd_ = fd;
+      locked_ = TryLock();
+    }
+
+    virtual ~LockMng() {
+      if (locked_) {
+        Unlock();
+      }
+    }
+
+    bool locked_{false};
+
+   private:
+    bool TryLock();
+    void Unlock();
+
+    int32_t fd_{-1};
+  };
+
+ public:
+  AkgKernelPool() = default;
+  virtual ~AkgKernelPool();
+
+  int32_t Init(const std::vector<JsonNodePair> &build_args);
+  int32_t FetchKernels(std::set<size_t> *out);
+  int32_t UpdateAndWait(const std::set<size_t> &ids);
+
+  constexpr inline static size_t kMaxKernelNum_{1000};
+  constexpr inline static key_t kSharedMemKey_{0x57565845};
+
+  // allocate memory for todo_list, doing_list, done_list
+  constexpr inline static size_t kListNum_{3};
+
+  constexpr inline static auto kKeyName_ = "./akg_build_tmp.key";
+
+  constexpr inline static int32_t kToDoIdx_ = 0;
+  constexpr inline static int32_t kDoingIdx_ = 1;
+  constexpr inline static int32_t kDoneIdx_ = 2;
+
+ private:
+  void *CreateSharedMem(const std::string &path);
+  std::string GetCurrentPath();
+
+  inline void InitKernelLists(void *addr) {
+    kernel_lists_[kToDoIdx_] = reinterpret_cast<size_t *>(addr);
+    kernel_lists_[kDoingIdx_] = kernel_lists_[kToDoIdx_] + kMaxKernelNum_ + 1;
+    kernel_lists_[kDoneIdx_] = kernel_lists_[kDoingIdx_] + kMaxKernelNum_ + 1;
+  }
+
+  int32_t AddKernels(const std::vector<JsonNodePair> &kernel_jsons);
+  int32_t Wait();
+
+  int32_t shm_id_{-1};
+  bool is_creator_{false};
+  int32_t fd_{-1};
+
+  // includes 3 lists: todo_list, doing_list, done_list.
+  // each list has kMaxKernelNum_ + 1 elements and, the count of elements in each list
+  // is stored in kernel_lists_[xx][kMaxKernelNum_]
+  size_t *kernel_lists_[kListNum_]{nullptr, nullptr, nullptr};
+
+  std::set<size_t> self_kernel_ids_;
+};
 }  // namespace kernel
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.cc
index 34641fc481e..4f0b619848c 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.cc
@@ -44,8 +44,10 @@ KernelPackPtr AkgAscendKernelBuilder::AkgInsertCache(const std::string &kernel_n
 void AkgAscendKernelBuilder::AkgSetKernelMod(const KernelPackPtr &kernel_pack,
                                              const AkgKernelJsonGenerator &json_generator, const AnfNodePtr &anf_node) {
   auto kernel_mod_ptr = std::make_shared<AkgKernelMod>(kernel_pack);
+  auto kernel_json_info = kernel_pack->kernel_json_info();
   kernel_mod_ptr->SetInputSizeList(json_generator.input_size_list());
   kernel_mod_ptr->SetOutputSizeList(json_generator.output_size_list());
+  kernel_mod_ptr->SetWorkspaceSizeList(kernel_json_info.workspaces);
   AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get());
 }
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.cc
index 4761f359ae5..856106fec7b 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.cc
@@ -49,7 +49,7 @@ const std::vector<size_t> &AkgKernelMod::GetOutputSizeList() const { return outp
 
 const std::vector<size_t> &AkgKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; }
 
-bool AkgKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+bool AkgKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
                           const std::vector<AddressPtr> &outputs, void *stream_ptr) {
   if (stream_ptr == nullptr) {
     MS_LOG(ERROR) << "stream_ptr should not be nullptr.";
@@ -74,6 +74,10 @@ bool AkgKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vect
                        [](const AddressPtr &input) -> void * { return input->addr; });
   (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtime_args),
                        [](const AddressPtr &output) -> void * { return output->addr; });
+  if (!workspace.empty()) {
+    (void)std::transform(std::begin(workspace), std::end(workspace), std::back_inserter(runtime_args),
+                         [](const AddressPtr &addr) -> void * { return addr->addr; });
+  }
 
   rtL2Ctrl_t *l2ctrl = nullptr;
   auto stream = static_cast<rtStream_t *>(stream_ptr);
@@ -86,7 +90,8 @@ bool AkgKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vect
   return true;
 }
 
-std::vector<TaskInfoPtr> AkgKernelMod::GenTask(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+std::vector<TaskInfoPtr> AkgKernelMod::GenTask(const std::vector<AddressPtr> &inputs,
+                                               const std::vector<AddressPtr> &workspace,
                                                const std::vector<AddressPtr> &outputs, uint32_t stream_id) {
   if (kernel_pack_ == nullptr) {
     MS_LOG(EXCEPTION) << "kernel pack should not be nullptr.";
@@ -107,6 +112,10 @@ std::vector<TaskInfoPtr> AkgKernelMod::GenTask(const std::vector<AddressPtr> &in
                        [](const AddressPtr &input) -> void * { return input->addr; });
   (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(output_data_addrs),
                        [](const AddressPtr &output) -> void * { return output->addr; });
+  if (!workspace.empty()) {
+    (void)std::transform(std::begin(workspace), std::end(workspace), std::back_inserter(workspace_addrs),
+                         [](const AddressPtr &workspace) -> void * { return workspace->addr; });
+  }
 
   uint32_t block_dim = DEFAULT_BLOCK_DIM;  // default blockdim equal to 1.
   auto func_stub = KernelManager::GenFuncStub(*kernel_pack_, false, &block_dim);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.cc
index d39e75e2917..47d5c0f31ba 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.cc
@@ -39,8 +39,10 @@ KernelPackPtr AkgGpuKernelBuilder::AkgInsertCache(const std::string &kernel_name
 void AkgGpuKernelBuilder::AkgSetKernelMod(const KernelPackPtr &kernel_pack,
                                           const AkgKernelJsonGenerator &json_generator, const AnfNodePtr &anf_node) {
   auto kernel_mod_ptr = std::make_shared<GpuKernelMod>(kernel_pack);
+  auto kernel_json_info = kernel_pack->kernel_json_info();
   kernel_mod_ptr->SetInputSizeList(json_generator.input_size_list());
   kernel_mod_ptr->SetOutputSizeList(json_generator.output_size_list());
+  kernel_mod_ptr->SetWorkspaceSizeList(kernel_json_info.workspaces);
   AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get());
 }
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.cc
index 3cdb095ab41..0971bdcf42b 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.cc
@@ -92,13 +92,15 @@ void GpuKernelMod::SetInputSizeList(const std::vector<size_t> &size_list) { inpu
 
 void GpuKernelMod::SetOutputSizeList(const std::vector<size_t> &size_list) { output_size_list_ = size_list; }
 
+void GpuKernelMod::SetWorkspaceSizeList(const std::vector<size_t> &size_list) { workspace_size_list_ = size_list; }
+
 const std::vector<size_t> &GpuKernelMod::GetInputSizeList() const { return input_size_list_; }
 
 const std::vector<size_t> &GpuKernelMod::GetOutputSizeList() const { return output_size_list_; }
 
 const std::vector<size_t> &GpuKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; }
 
-bool GpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+bool GpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
                           const std::vector<AddressPtr> &outputs, void *stream_ptr) {
   if (stream_ptr == 0) {
     MS_LOG(ERROR) << "stream_ptr should not be nullptr.";
@@ -122,6 +124,10 @@ bool GpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vect
                        [](const AddressPtr &input) -> void * { return reinterpret_cast<void *>(&(input->addr)); });
   (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtimeargs),
                        [](const AddressPtr &output) -> void * { return reinterpret_cast<void *>(&(output->addr)); });
+  if (!workspace.empty()) {
+    (void)std::transform(std::begin(workspace), std::end(workspace), std::back_inserter(runtimeargs),
+                         [](const AddressPtr &addr) -> void * { return addr->addr; });
+  }
   result = cuLaunchKernel(kernel_addr, thread_info[0], thread_info[1], thread_info[2], thread_info[3], thread_info[4],
                           thread_info[5], 0, reinterpret_cast<CUstream>(stream_ptr),
                           reinterpret_cast<void **>(&runtimeargs[0]), 0);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h
index b87d223f7f3..5e9d17acfd1 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h
@@ -60,6 +60,7 @@ class GpuKernelMod : public KernelMod {
 
   void SetInputSizeList(const std::vector<size_t> &size_list);
   void SetOutputSizeList(const std::vector<size_t> &size_list);
+  void SetWorkspaceSizeList(const std::vector<size_t> &size_list);
   const std::vector<size_t> &GetInputSizeList() const override;
   const std::vector<size_t> &GetOutputSizeList() const override;
   const std::vector<size_t> &GetWorkspaceSizeList() const override;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc b/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc
index edc94673083..b9124449dd8 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc
@@ -141,14 +141,8 @@ FusionType GetFusionTypeByName(const std::string &name) {
   return iter->first;
 }
 
-void KernelMeta::Initialize(int pid) {
-  if (pid == -1) {
-    kernel_meta_path_ = std::string(kGpuKernelMeta) + "_" + std::to_string(getpid()) + "/";
-  } else {
-    kernel_meta_path_ = std::string(kGpuKernelMeta) + "_" + std::to_string(pid) + "/";
-  }
-  // remove old kernel cache
-  RemoveKernelCache();
+void KernelMeta::Initialize() {
+  kernel_meta_path_ = std::string(kGpuKernelMeta) + "/";
 
 #if defined(_WIN32) || defined(_WIN64)
   auto ret = mkdir(kernel_meta_path_.c_str());
@@ -161,21 +155,6 @@ void KernelMeta::Initialize(int pid) {
   initialized_ = true;
 }
 
-void KernelMeta::RemoveKernelCache() {
-  DIR *dir = opendir(kernel_meta_path_.c_str());
-  if (dir == nullptr) {
-    return;
-  }
-  struct dirent *entry;
-  while ((entry = readdir(dir)) != nullptr) {
-    std::string kernel_file = entry->d_name;
-    std::string kernel_file_realpath = kernel_meta_path_ + kernel_file;
-    (void)remove(kernel_file_realpath.c_str());
-  }
-  (void)closedir(dir);
-  (void)rmdir(kernel_meta_path_.c_str());
-}
-
 std::string KernelMeta::Search(const std::string &kernel_name) const {
   if (!initialized_) {
     return "";
@@ -227,7 +206,7 @@ KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &pro
     KernelPackPtr kernel_pack = std::make_shared<KernelPack>();
     // just a tmp solution.
     if (!kernel_pack->ReadFromJsonFile(kernel_json, processor)) {
-      MS_LOG(DEBUG) << "Read cache json and bin file failed[" << kernel_json << "].";
+      MS_LOG(ERROR) << "Read cache json and bin file failed[" << kernel_json << "].";
       return nullptr;
     } else {
       return kernel_pack;
@@ -250,7 +229,7 @@ KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &pro
   (void)kernel_json.append(kernel_name).append(kJsonSuffix);
   KernelPackPtr kernel_pack = std::make_shared<KernelPack>();
   if (!kernel_pack->ReadFromJsonFile(kernel_json, processor)) {
-    MS_LOG(DEBUG) << "Read json and bin file failed[" << kernel_json << "].";
+    MS_LOG(ERROR) << "Read json and bin file failed[" << kernel_json << "].";
     return nullptr;
   }
 
@@ -714,6 +693,9 @@ void GetFuncGraphOutputNodes(const FuncGraphPtr &func_graph, std::vector<AnfNode
       for (size_t input_idx = 1; input_idx < cnode->inputs().size(); ++input_idx) {
         auto input_node = cnode->input(input_idx);
         MS_EXCEPTION_IF_NULL(input_node);
+        if (input_node->isa<CNode>() && AnfAlgo::GetInputTensorNum(input_node) == 0) {
+          continue;
+        }
         output_list->push_back(AnfAlgo::VisitKernel(input_node, 0).first);
       }
     } else {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/common_utils.h b/mindspore/ccsrc/backend/kernel_compiler/common_utils.h
index 9c50ea0213f..507517954bd 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/common_utils.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/common_utils.h
@@ -55,8 +55,7 @@ using KernelMetaPtr = std::shared_ptr<KernelMetaInfo>;
 class KernelMeta {
  public:
   KernelMeta() = default;
-  void Initialize(int pid);
-  void RemoveKernelCache();
+  void Initialize();
   std::string Search(const std::string &kernel_name) const;
   bool Insert(const std::string &kernel_name, const std::string &kernel_json);
   std::string kernel_meta_path() const { return kernel_meta_path_; }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.cc
index ae3182d97f7..2bdbc7fcc26 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.cc
@@ -26,46 +26,26 @@ namespace mindspore {
 namespace kernel {
 constexpr size_t kSizeFloat16 = sizeof(float16);
 constexpr size_t kSizeFloat32 = sizeof(float);
+constexpr size_t kScalarIndex = 0;
 constexpr size_t kAdamWeightDecayInputSize = 9;
 constexpr size_t kAdamWeightDecayOutputSize = 3;
 
-void AdamWeightDecayCPUKernel::ParallelForAdam(const CTask &task, size_t count) {
-  auto max_thread_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum();
-  const float block_size = 128.0;
-  const float align_size = 16.0;
-  size_t thread_num = count < block_size * max_thread_num ? std::ceil(count / block_size) : max_thread_num;
-  std::vector<common::Task> tasks;
-  size_t start = 0;
-  size_t once_compute_size = align_size * std::ceil(count / (align_size * thread_num));
-  while (start < count) {
-    size_t end = (start + once_compute_size) > count ? count : (start + once_compute_size);
-    auto block = [&, start, end]() {
-      task(start, end);
-      return common::SUCCESS;
-    };
-    tasks.emplace_back(block);
-    start += once_compute_size;
-  }
-  common::ThreadPool::GetInstance().SyncRun(tasks);
-}
-
 template <typename T, typename S>
-void AdamWeightDecayCPUKernel::LaunchFusedAdam(const std::vector<AddressPtr> &inputs,
-                                               const std::vector<AddressPtr> &outputs) {
-  auto var = reinterpret_cast<T *>(inputs[0]->addr);
-  auto m = reinterpret_cast<T *>(inputs[1]->addr);
-  auto v = reinterpret_cast<T *>(inputs[2]->addr);
-  auto lr = reinterpret_cast<T *>(inputs[3]->addr)[0];
-  auto beta1 = reinterpret_cast<T *>(inputs[4]->addr)[0];
-  auto beta2 = reinterpret_cast<T *>(inputs[5]->addr)[0];
-  auto epsilon = reinterpret_cast<T *>(inputs[6]->addr)[0];
-  auto decay = reinterpret_cast<T *>(inputs[7]->addr)[0];
-  auto gradient16 = reinterpret_cast<S *>(inputs[8]->addr);
+void AdamWeightDecayCPUKernel::LaunchFusedAdam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &) {
+  auto var = reinterpret_cast<T *>(inputs[VAR]->addr);
+  auto m = reinterpret_cast<T *>(inputs[M]->addr);
+  auto v = reinterpret_cast<T *>(inputs[V]->addr);
+  auto lr = reinterpret_cast<T *>(inputs[LR]->addr)[kScalarIndex];
+  auto beta1 = reinterpret_cast<T *>(inputs[BETA1]->addr)[kScalarIndex];
+  auto beta2 = reinterpret_cast<T *>(inputs[BETA2]->addr)[kScalarIndex];
+  auto epsilon = reinterpret_cast<T *>(inputs[EPSILON]->addr)[kScalarIndex];
+  auto decay = reinterpret_cast<T *>(inputs[DECAY]->addr)[kScalarIndex];
+  auto gradient16 = reinterpret_cast<S *>(inputs[GRAD]->addr);
   const auto beta1_minus = 1 - beta1;
   const auto beta2_minus = 1 - beta2;
 
   // multithreading
-  size_t lens = inputs[0]->size > 0 ? static_cast<size_t>(inputs[0]->size / sizeof(float)) : 1;
+  size_t lens = inputs[VAR]->size > 0 ? static_cast<size_t>(inputs[VAR]->size / sizeof(float)) : 1;
   std::function<void(size_t, size_t)> task;
 
   task = [&](size_t start, size_t end) {
@@ -81,28 +61,27 @@ void AdamWeightDecayCPUKernel::LaunchFusedAdam(const std::vector<AddressPtr> &in
       var[i] -= lr * update;
     }
   };
-  ParallelForAdam(task, lens);
+  CPUKernelUtils::ParallelFor(task, lens);
 }
 
 template <typename T>
 void AdamWeightDecayCPUKernel::LaunchAdamWeightDecay(const std::vector<AddressPtr> &inputs,
-                                                     const std::vector<AddressPtr> &outputs) {
-  auto var = reinterpret_cast<T *>(inputs[0]->addr);
-  auto m = reinterpret_cast<T *>(inputs[1]->addr);
-  auto v = reinterpret_cast<T *>(inputs[2]->addr);
-  auto lr = reinterpret_cast<T *>(inputs[3]->addr)[0];
-  auto beta1 = reinterpret_cast<T *>(inputs[4]->addr)[0];
-  auto beta2 = reinterpret_cast<T *>(inputs[5]->addr)[0];
-  auto epsilon = reinterpret_cast<T *>(inputs[6]->addr)[0];
-  auto decay = reinterpret_cast<T *>(inputs[7]->addr)[0];
-  auto gradient = reinterpret_cast<T *>(inputs[8]->addr);
+                                                     const std::vector<AddressPtr> &) {
+  auto var = reinterpret_cast<T *>(inputs[VAR]->addr);
+  auto m = reinterpret_cast<T *>(inputs[M]->addr);
+  auto v = reinterpret_cast<T *>(inputs[V]->addr);
+  auto lr = reinterpret_cast<T *>(inputs[LR]->addr)[kScalarIndex];
+  auto beta1 = reinterpret_cast<T *>(inputs[BETA1]->addr)[kScalarIndex];
+  auto beta2 = reinterpret_cast<T *>(inputs[BETA2]->addr)[kScalarIndex];
+  auto epsilon = reinterpret_cast<T *>(inputs[EPSILON]->addr)[kScalarIndex];
+  auto decay = reinterpret_cast<T *>(inputs[DECAY]->addr)[kScalarIndex];
+  auto gradient = reinterpret_cast<T *>(inputs[GRAD]->addr);
   const auto beta1_minus = 1 - beta1;
   const auto beta2_minus = 1 - beta2;
 
   // multithreading
-  size_t lens = inputs[0]->size > 0 ? static_cast<size_t>(inputs[0]->size / sizeof(float)) : 1;
+  size_t lens = inputs[VAR]->size > 0 ? static_cast<size_t>(inputs[VAR]->size / sizeof(float)) : 1;
   std::function<void(size_t, size_t)> task;
-
   task = [&](size_t start, size_t end) {
     size_t i = AdamWeightDecayFp32(var, m, v, lr, beta1, beta2, epsilon, decay, gradient, start, end);
     // remaining
@@ -114,14 +93,14 @@ void AdamWeightDecayCPUKernel::LaunchAdamWeightDecay(const std::vector<AddressPt
       var[i] -= lr * update;
     }
   };
-  ParallelForAdam(task, lens);
+  CPUKernelUtils::ParallelFor(task, lens);
 }
 
 void AdamWeightDecayCPUKernel::InitKernel(const CNodePtr &kernel_node) {
   MS_EXCEPTION_IF_NULL(kernel_node);
-  std::vector<size_t> var_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
-  dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
-  gradient_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 8);
+  std::vector<size_t> var_shape = AnfAlgo::GetInputDeviceShape(kernel_node, VAR);
+  dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, VAR);
+  gradient_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, GRAD);
   size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
   if (input_num != kAdamWeightDecayInputSize) {
     MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but AdamWeightDecay needs 9 inputs.";
@@ -155,12 +134,12 @@ void AdamWeightDecayCPUKernel::CheckParam(const std::vector<kernel::AddressPtr>
   }
   size_t elem1_size = elem_num_ * kSizeFloat32;
   size_t elem2_size = gradient_dtype_ == kNumberTypeFloat16 ? elem_num_ * kSizeFloat16 : elem1_size;
-  if (inputs[0]->size != elem1_size || inputs[1]->size != elem1_size || inputs[2]->size != elem1_size ||
-      inputs[8]->size != elem2_size) {
+  if (inputs[VAR]->size != elem1_size || inputs[M]->size != elem1_size || inputs[V]->size != elem1_size ||
+      inputs[GRAD]->size != elem2_size) {
     MS_LOG(EXCEPTION) << "Error input data size!";
   }
-  if (inputs[3]->size != kSizeFloat32 || inputs[4]->size != kSizeFloat32 || inputs[5]->size != kSizeFloat32 ||
-      inputs[6]->size != kSizeFloat32 || inputs[7]->size != kSizeFloat32) {
+  if (inputs[LR]->size != kSizeFloat32 || inputs[BETA1]->size != kSizeFloat32 || inputs[BETA2]->size != kSizeFloat32 ||
+      inputs[EPSILON]->size != kSizeFloat32 || inputs[DECAY]->size != kSizeFloat32) {
     MS_LOG(EXCEPTION) << "The attribute beta, lr, epsilon and weight decay must be float!";
   }
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.h
index 34c56bed352..fe6f309e38e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.h
@@ -32,7 +32,6 @@ class AdamWeightDecayCPUKernel : public CPUKernel {
               const std::vector<AddressPtr> &outputs) override;
 
  private:
-  void ParallelForAdam(const CTask &task, size_t count);
   void CheckParam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
   template <typename T, typename S>
   void LaunchFusedAdam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
@@ -41,6 +40,7 @@ class AdamWeightDecayCPUKernel : public CPUKernel {
   size_t elem_num_{0};
   TypeId dtype_{kTypeUnknown};
   TypeId gradient_dtype_{kTypeUnknown};
+  enum input_list_ { VAR, M, V, LR, BETA1, BETA2, EPSILON, DECAY, GRAD };
 };
 
 MS_REG_CPU_KERNEL(AdamWeightDecay,
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_adagrad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_adagrad_cpu_kernel.cc
index 238b5c5e9a3..578eda21a66 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_adagrad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_adagrad_cpu_kernel.cc
@@ -76,27 +76,10 @@ void ApplyAdagradCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
 
   // multithreading
   size_t length = inputs[0]->size / sizeof(T);
-  size_t max_thread_num = std::thread::hardware_concurrency();
-  size_t use_thread_num = length < 128 * max_thread_num ? std::ceil(length / 128.0) : max_thread_num;
-  std::vector<std::thread> threads;
-  threads.reserve(use_thread_num);
-  size_t start = 0;
-  const size_t batch_size = (length + use_thread_num - 1) / use_thread_num;
-
-  if (batch_size == 0) {
-    MS_LOG(EXCEPTION) << "Error occur in launch kernel";
-    return;
-  }
-  while (start < length) {
-    size_t end = (start + batch_size) > length ? length : (start + batch_size);
-    threads.emplace_back(
-      std::thread(&ApplyAdagradCPUKernel::LaunchApplyAdagrad<T *>, this, var, accum, lr, gradient, start, end));
-    start += batch_size;
-  }
-
-  for (auto &it : threads) {
-    it.join();
-  }
+  auto task = [this, &var, &accum, lr, gradient](size_t start, size_t end) {
+    LaunchApplyAdagrad(var, accum, lr, gradient, start, end);
+  };
+  CPUKernelUtils::ParallelForAutoSearch(task, length, &parallel_search_info_);
 
   // Copy result to output tensor
   auto output_var = reinterpret_cast<T *>(outputs[0]->addr);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.cc
index 85fdec3c565..5e85be5fe6c 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.cc
@@ -13,10 +13,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
+#include "backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.h"
 #include <cmath>
 #include <string>
 #include <map>
-#include "backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.h"
+#include <functional>
 #include "runtime/device/cpu/cpu_device_address.h"
 
 namespace mindspore {
@@ -29,7 +31,9 @@ void ArithmeticLogicCPUKernel<T>::Less(const T *input1, const T *input2, bool *o
       auto iter = base_iter;
       iter.SetPos(start);
       for (size_t i = start; i < end; i++) {
-        out[i] = input1[iter.GetInputPosA()] < input2[iter.GetInputPosB()];
+        auto x = input1[iter.GetInputPosA()];
+        auto y = input2[iter.GetInputPosB()];
+        out[i] = std::less<T>()(x, y);
         iter.GenNextPos();
       }
     };
@@ -37,7 +41,9 @@ void ArithmeticLogicCPUKernel<T>::Less(const T *input1, const T *input2, bool *o
   } else {
     base_iter.SetPos(0);
     for (size_t i = 0; i < output_size_; i++) {
-      out[i] = input1[base_iter.GetInputPosA()] < input2[base_iter.GetInputPosB()];
+      auto x = input1[base_iter.GetInputPosA()];
+      auto y = input2[base_iter.GetInputPosB()];
+      out[i] = std::less<T>()(x, y);
       base_iter.GenNextPos();
     }
   }
@@ -50,7 +56,9 @@ void ArithmeticLogicCPUKernel<T>::Equal(const T *input1, const T *input2, bool *
     auto iter = base_iter;
     iter.SetPos(start);
     for (size_t i = start; i < end; i++) {
-      out[i] = input1[iter.GetInputPosA()] == input2[iter.GetInputPosB()];
+      auto x = input1[iter.GetInputPosA()];
+      auto y = input2[iter.GetInputPosB()];
+      out[i] = std::equal_to<T>()(x, y);
       iter.GenNextPos();
     }
   };
@@ -64,7 +72,9 @@ void ArithmeticLogicCPUKernel<T>::NotEqual(const T *input1, const T *input2, boo
     auto iter = base_iter;
     iter.SetPos(start);
     for (size_t i = start; i < end; i++) {
-      out[i] = input1[iter.GetInputPosA()] != input2[iter.GetInputPosB()];
+      auto x = input1[iter.GetInputPosA()];
+      auto y = input2[iter.GetInputPosB()];
+      out[i] = std::not_equal_to<T>()(x, y);
       iter.GenNextPos();
     }
   };
@@ -106,7 +116,9 @@ void ArithmeticLogicCPUKernel<T>::Greater(const T *input1, const T *input2, bool
     auto iter = base_iter;
     iter.SetPos(start);
     for (size_t i = start; i < end; i++) {
-      out[i] = input1[iter.GetInputPosA()] > input2[iter.GetInputPosB()];
+      auto x = input1[iter.GetInputPosA()];
+      auto y = input2[iter.GetInputPosB()];
+      out[i] = std::greater<T>()(x, y);
       iter.GenNextPos();
     }
   };
@@ -120,7 +132,9 @@ void ArithmeticLogicCPUKernel<T>::GreaterEqual(const T *input1, const T *input2,
     auto iter = base_iter;
     iter.SetPos(start);
     for (size_t i = start; i < end; i++) {
-      out[i] = input1[iter.GetInputPosA()] >= input2[iter.GetInputPosB()];
+      auto x = input1[iter.GetInputPosA()];
+      auto y = input2[iter.GetInputPosB()];
+      out[i] = std::greater_equal<T>()(x, y);
       iter.GenNextPos();
     }
   };
@@ -134,7 +148,9 @@ void ArithmeticLogicCPUKernel<T>::LessEqual(const T *input1, const T *input2, bo
     auto iter = base_iter;
     iter.SetPos(start);
     for (size_t i = start; i < end; i++) {
-      out[i] = input1[iter.GetInputPosA()] <= input2[iter.GetInputPosB()];
+      auto x = input1[iter.GetInputPosA()];
+      auto y = input2[iter.GetInputPosB()];
+      out[i] = std::less_equal<T>()(x, y);
       iter.GenNextPos();
     }
   };
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h
index 7241f6163cf..b85568f505e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019 Huawei Technologies Co., Ltd
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc
index 12ae560be86..eee6e6f4985 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc
@@ -21,6 +21,7 @@
 #include <string>
 
 #include "runtime/device/kernel_info.h"
+#include "runtime/device/cpu/kernel_select_cpu.h"
 
 namespace mindspore {
 namespace kernel {
@@ -111,6 +112,11 @@ std::pair<bool, size_t> CPUKernelFactory::CPUKernelAttrCheck(const std::string &
     MS_LOG(INFO) << "Not registered CPU kernel: op[" << kernel_name << "]!";
     return std::make_pair(false, 0);
   }
+
+  if (device::cpu::IsDynamicParamKernel(kernel_name)) {
+    return std::make_pair(true, 0);
+  }
+
   auto kernel_attrs = GetSupportedKernelAttrList(kernel_name);
   if (kernel_attrs[0].GetInputSize() == 0 && kernel_attrs[0].GetOutputSize() == 0) {
     auto op_info_ptr = mindspore::kernel::OpLib::FindOp(kernel_name, kernel::OpImplyType::kCPU);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.cc
index 743fef0cdb0..2f458845f70 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.cc
@@ -43,9 +43,9 @@ void DropoutGradCpuBwdKernel::InitKernel(const CNodePtr &kernel_node) {
 bool DropoutGradCpuBwdKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
                                      const std::vector<AddressPtr> &outputs) {
   if (dtype_ == kNumberTypeFloat16) {
-    DropoutBackwardKernel<float16>(inputs, outputs, num_count_, keep_prob_);
+    DropoutBackwardKernel<float16>(inputs, outputs, keep_prob_);
   } else if (dtype_ == kNumberTypeFloat32) {
-    DropoutBackwardKernel<float>(inputs, outputs, num_count_, keep_prob_);
+    DropoutBackwardKernel<float>(inputs, outputs, keep_prob_);
   } else {
     MS_LOG(ERROR) << "Input data type: " << dtype_ << " is not supported for DropoutGrad kernel for CPU.";
   }
@@ -55,8 +55,7 @@ bool DropoutGradCpuBwdKernel::Launch(const std::vector<AddressPtr> &inputs, cons
 
 template <typename T>
 void DropoutGradCpuBwdKernel::DropoutBackwardKernel(const std::vector<AddressPtr> &inputs,
-                                                    const std::vector<AddressPtr> &outputs, size_t num_count,
-                                                    float keep_prob) {
+                                                    const std::vector<AddressPtr> &outputs, float keep_prob) {
   auto *output = reinterpret_cast<T *>(outputs[0]->addr);
   const auto *input = reinterpret_cast<T *>(inputs[0]->addr);
   const auto *mask = reinterpret_cast<T *>(inputs[1]->addr);
@@ -70,7 +69,7 @@ void DropoutGradCpuBwdKernel::DropoutBackwardKernel(const std::vector<AddressPtr
       input_tmp[i] = static_cast<float>(input[i]);
       mask_tmp[i] = static_cast<float>(mask[i]);
     }
-    DropoutGrad(input_tmp, mask_tmp, output_tmp, num_count_, scale);
+    DropoutGrad(input_tmp, mask_tmp, output_tmp, SizeToInt(num_count_), scale);
     for (size_t i = 0; i < num_count_; ++i) {
       output[i] = static_cast<float16>(output_tmp[i]);
     }
@@ -78,7 +77,7 @@ void DropoutGradCpuBwdKernel::DropoutBackwardKernel(const std::vector<AddressPtr
     delete[] output_tmp;
     delete[] mask_tmp;
   } else if constexpr (std::is_same_v<T, float>) {
-    DropoutGrad(input, mask, output, num_count_, scale);
+    DropoutGrad(input, mask, output, SizeToInt(num_count_), scale);
   }
 }
 }  // namespace kernel
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.h
index ab5889dc4b8..e7931d08303 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.h
@@ -40,7 +40,7 @@ class DropoutGradCpuBwdKernel : public CPUKernel {
   TypeId dtype_{kTypeUnknown};
   template <typename T>
   void DropoutBackwardKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs,
-                             size_t num_count, float keep_prob);
+                             float keep_prob);
 };
 
 MS_REG_CPU_KERNEL(DropoutGrad, KernelAttr(), DropoutGradCpuBwdKernel);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc
index 394fcbbd786..926d8e172ef 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2020-2021 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,8 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <map>
+
 #include "backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h"
+#include <string>
+#include <map>
 #include "common/thread_pool.h"
 #include "runtime/device/cpu/cpu_device_address.h"
 #include "nnacl/fp32_grad/activation_grad.h"
@@ -25,50 +27,50 @@ namespace mindspore {
 namespace kernel {
 template <typename T>
 void EltWiseGradCPUKernel<T>::ReluGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const {
-  if constexpr (std::is_same_v<T, float>) {
-    int ret = ::ReluGrad(input1 + start, input2 + start, end - start, out + start);
-    if (ret == NNACL_ERR) {
-      MS_LOG(EXCEPTION) << "ReLUGrad failed.";
-    }
-  } else {
+  if constexpr (!std::is_same<T, float>::value) {
     MS_LOG(EXCEPTION) << "ReLUGrad only support float";
   }
+
+  int ret = ::ReluGrad(input1 + start, input2 + start, end - start, out + start);
+  if (ret == NNACL_ERR) {
+    MS_LOG(EXCEPTION) << "ReLUGrad execute failed.";
+  }
 }
 
 template <typename T>
 void EltWiseGradCPUKernel<T>::ReLU6Grad(const T *input1, const T *input2, T *out, size_t start, size_t end) const {
-  if constexpr (std::is_same_v<T, float>) {
-    int ret = ::Relu6Grad(input1 + start, input2 + start, end - start, out + start);
-    if (ret == NNACL_ERR) {
-      MS_LOG(EXCEPTION) << "ReLU6Grad failed.";
-    }
-  } else {
+  if constexpr (!std::is_same<T, float>::value) {
     MS_LOG(EXCEPTION) << "ReLU6Grad only support float";
   }
+
+  int ret = ::Relu6Grad(input1 + start, input2 + start, end - start, out + start);
+  if (ret == NNACL_ERR) {
+    MS_LOG(EXCEPTION) << "ReLU6Grad execute failed.";
+  }
 }
 
 template <typename T>
 void EltWiseGradCPUKernel<T>::AbsGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const {
-  if constexpr (std::is_same_v<T, float>) {
-    int ret = ::ElementAbsGrad(input1 + start, input2 + start, out + start, end - start);
-    if (ret == NNACL_ERR) {
-      MS_LOG(EXCEPTION) << "AbsGrad failed.";
-    }
-  } else {
+  if constexpr (!std::is_same<T, float>::value) {
     MS_LOG(EXCEPTION) << "AbsGrad only support float";
   }
+
+  int ret = ::ElementAbsGrad(input1 + start, input2 + start, out + start, end - start);
+  if (ret == NNACL_ERR) {
+    MS_LOG(EXCEPTION) << "AbsGrad execute failed.";
+  }
 }
 
 template <typename T>
 void EltWiseGradCPUKernel<T>::SigmoidGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const {
-  if constexpr (std::is_same_v<T, float>) {
-    int ret = ::SigmoidGrad(input2 + start, input1 + start, end - start, out + start);
-    if (ret == NNACL_ERR) {
-      MS_LOG(EXCEPTION) << "SigmoidGrad failed.";
-    }
-  } else {
+  if constexpr (!std::is_same<T, float>::value) {
     MS_LOG(EXCEPTION) << "SigmoidGrad only support float";
   }
+
+  int ret = ::SigmoidGrad(input2 + start, input1 + start, end - start, out + start);
+  if (ret == NNACL_ERR) {
+    MS_LOG(EXCEPTION) << "SigmoidGrad execute failed.";
+  }
 }
 
 template <typename T>
@@ -80,14 +82,14 @@ void EltWiseGradCPUKernel<T>::SqrtGrad(const T *input1, const T *input2, T *out,
 
 template <typename T>
 void EltWiseGradCPUKernel<T>::TanhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const {
-  if constexpr (std::is_same_v<T, float>) {
-    int ret = ::TanhGrad(input2 + start, input1 + start, end - start, out + start);
-    if (ret == NNACL_ERR) {
-      MS_LOG(EXCEPTION) << "TanhGrad failed.";
-    }
-  } else {
+  if constexpr (!std::is_same<T, float>::value) {
     MS_LOG(EXCEPTION) << "TanhGrad only support float";
   }
+
+  int ret = ::TanhGrad(input2 + start, input1 + start, end - start, out + start);
+  if (ret == NNACL_ERR) {
+    MS_LOG(EXCEPTION) << "TanhGrad execute failed.";
+  }
 }
 
 template <typename T>
@@ -207,6 +209,18 @@ void EltWiseGradCPUKernel<T>::AcoshGrad(const T *input1, const T *input2, T *out
   }
 }
 
+template <typename T>
+void EltWiseGradCPUKernel<T>::SoftplusGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const {
+  if constexpr (!std::is_same<T, float>::value) {
+    MS_LOG(EXCEPTION) << "SoftplusGrad only support float";
+  }
+
+  int ret = ::SoftplusGrad(input1 + start, input2 + start, end - start, out + start);
+  if (ret == NNACL_ERR) {
+    MS_LOG(EXCEPTION) << "SoftplusGrad execute failed.";
+  }
+}
+
 template <typename T>
 void EltWiseGradCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
   MS_EXCEPTION_IF_NULL(kernel_node);
@@ -219,12 +233,19 @@ bool EltWiseGradCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inpu
                                      const std::vector<kernel::AddressPtr> &outputs) {
   static const std::map<std::string,
                         std::function<void(EltWiseGradCPUKernel *, const T *, const T *, T *, size_t, size_t)>>
-    elt_map{{"ReluGrad", &EltWiseGradCPUKernel<T>::ReluGrad},       {"ReLU6Grad", &EltWiseGradCPUKernel<T>::ReLU6Grad},
-            {"SigmoidGrad", &EltWiseGradCPUKernel<T>::SigmoidGrad}, {"AbsGrad", &EltWiseGradCPUKernel<T>::AbsGrad},
-            {"TanhGrad", &EltWiseGradCPUKernel<T>::TanhGrad},       {"SqrtGrad", &EltWiseGradCPUKernel<T>::SqrtGrad},
-            {"GeLUGrad", &EltWiseGradCPUKernel<T>::GeluGrad},       {"AsinGrad", &EltWiseGradCPUKernel<T>::AsinGrad},
-            {"ACosGrad", &EltWiseGradCPUKernel<T>::ACosGrad},       {"AtanGrad", &EltWiseGradCPUKernel<T>::AtanGrad},
-            {"AsinhGrad", &EltWiseGradCPUKernel<T>::AsinhGrad},     {"AcoshGrad", &EltWiseGradCPUKernel<T>::AcoshGrad}};
+    elt_map{{prim::kPrimReluGrad->name(), &EltWiseGradCPUKernel<T>::ReluGrad},
+            {prim::kPrimRelu6Grad->name(), &EltWiseGradCPUKernel<T>::ReLU6Grad},
+            {prim::kPrimSigmoidGrad->name(), &EltWiseGradCPUKernel<T>::SigmoidGrad},
+            {prim::kPrimAbsGrad->name(), &EltWiseGradCPUKernel<T>::AbsGrad},
+            {prim::kPrimTanhGrad->name(), &EltWiseGradCPUKernel<T>::TanhGrad},
+            {prim::kPrimSqrtGrad->name(), &EltWiseGradCPUKernel<T>::SqrtGrad},
+            {prim::kPrimGeLUGrad->name(), &EltWiseGradCPUKernel<T>::GeluGrad},
+            {prim::kPrimAsinGrad->name(), &EltWiseGradCPUKernel<T>::AsinGrad},
+            {prim::kPrimACosGrad->name(), &EltWiseGradCPUKernel<T>::ACosGrad},
+            {prim::kPrimAtanGrad->name(), &EltWiseGradCPUKernel<T>::AtanGrad},
+            {prim::kPrimAsinhGrad->name(), &EltWiseGradCPUKernel<T>::AsinhGrad},
+            {prim::kPrimAcoshGrad->name(), &EltWiseGradCPUKernel<T>::AcoshGrad},
+            {prim::kPrimSoftplusGrad->name(), &EltWiseGradCPUKernel<T>::SoftplusGrad}};
   if (inputs.size() < 2 || outputs.size() != 1) {
     MS_LOG(ERROR) << kernel_name_ << " requires at least 2 inputs and 1 output, but got " << inputs.size()
                   << " inputs and " << outputs.size() << " output.";
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h
index f085a9a80d6..9f434981f75 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2020-2021 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -48,6 +48,7 @@ class EltWiseGradCPUKernel : public CPUKernel {
   void AtanGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
   void AsinhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
   void AcoshGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
+  void SoftplusGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const;
 
   std::string kernel_name_ = "";
 };
@@ -103,6 +104,10 @@ MS_REG_CPU_KERNEL_T(
   AcoshGrad,
   KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
   EltWiseGradCPUKernel, float);
+MS_REG_CPU_KERNEL_T(
+  SoftplusGrad,
+  KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+  EltWiseGradCPUKernel, float);
 }  // namespace kernel
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.cc
index ecb66469d0d..0d76cff47a9 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.cc
@@ -13,39 +13,47 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 #include "backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h"
+#include <string>
+#include <unordered_map>
 #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h"
 #include "runtime/device/cpu/cpu_device_address.h"
 #include "utils/ms_utils.h"
 
 namespace mindspore {
 namespace kernel {
+namespace {
+struct DescParam {
+  dnnl::algorithm algorithm;
+  float alpha = 0.f;
+  float beta = 0.f;
+};
+}  // namespace
+
 dnnl::eltwise_forward::desc EltWiseCPUKernel::GetForwardEltwiseDesc(const CNodePtr &kernel_node,
                                                                     const dnnl::memory::desc src_desc) {
+  static const std::unordered_map<std::string, DescParam> eltWiseOpDescMap{
+    {prim::kPrimRelu->name(), DescParam{dnnl::algorithm::eltwise_relu}},
+    {prim::kPrimRelu6->name(), DescParam{dnnl::algorithm::eltwise_clip, 0.f, 6.f}},
+    {prim::kPrimAbs->name(), DescParam{dnnl::algorithm::eltwise_abs}},
+    {prim::kPrimExp->name(), DescParam{dnnl::algorithm::eltwise_exp}},
+    {prim::kPrimLog->name(), DescParam{dnnl::algorithm::eltwise_log}},
+    {prim::kPrimSigmoid->name(), DescParam{dnnl::algorithm::eltwise_logistic}},
+    {prim::kPrimSqrt->name(), DescParam{dnnl::algorithm::eltwise_sqrt}},
+    {prim::kPrimSquare->name(), DescParam{dnnl::algorithm::eltwise_square}},
+    {prim::kPrimTanh->name(), DescParam{dnnl::algorithm::eltwise_tanh}},
+    {prim::kPrimElu->name(), DescParam{dnnl::algorithm::eltwise_elu, 1.f, 0.f}},
+    {prim::kPrimSoftplus->name(), DescParam{dnnl::algorithm::eltwise_soft_relu}},
+  };
+
   std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
-  if (kernel_name == "ReLU") {
-    return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_relu, src_desc, 0.0);
-  } else if (kernel_name == "ReLU6") {
-    return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_clip, src_desc, 0.0, 6.0);
-  } else if (kernel_name == "Abs") {
-    return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_abs, src_desc);
-  } else if (kernel_name == "Exp") {
-    return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_exp, src_desc);
-  } else if (kernel_name == "Log") {
-    return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_log, src_desc);
-  } else if (kernel_name == "Sigmoid") {
-    return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_logistic, src_desc);
-  } else if (kernel_name == "Sqrt") {
-    return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_sqrt, src_desc);
-  } else if (kernel_name == "Square") {
-    return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_square, src_desc);
-  } else if (kernel_name == "Tanh") {
-    return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_tanh, src_desc);
-  } else if (kernel_name == "Elu") {
-    return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_elu, src_desc, 1.0);
-  } else {
-    MS_LOG(EXCEPTION) << "Eltwise operators don't support " << kernel_name;
+  const auto desc_pair = eltWiseOpDescMap.find(kernel_name);
+  if (desc_pair == eltWiseOpDescMap.end()) {
+    MS_LOG(EXCEPTION) << "EltWiseCPUKernel does not support " << kernel_name;
   }
+  return dnnl::eltwise_forward::desc(DnnlForward, desc_pair->second.algorithm, src_desc, desc_pair->second.alpha,
+                                     desc_pair->second.beta);
 }
 
 void EltWiseCPUKernel::InitKernel(const CNodePtr &kernel_node) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h
index 18d0ae24548..cd695e2a9e6 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2019 Huawei Technologies Co., Ltd
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -56,6 +56,8 @@ MS_REG_CPU_KERNEL(Square, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutpu
                   EltWiseCPUKernel);
 MS_REG_CPU_KERNEL(Tanh, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
                   EltWiseCPUKernel);
+MS_REG_CPU_KERNEL(Softplus, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+                  EltWiseCPUKernel);
 }  // namespace kernel
 }  // namespace mindspore
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/CMakeLists.txt b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/CMakeLists.txt
index c9986d8a7bb..1b4f1e4d969 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/CMakeLists.txt
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/CMakeLists.txt
@@ -36,6 +36,24 @@ file(GLOB KERNEL_SRC
     ${NNACL_DIR}/fp32_grad/*.c
 )
 
+if(MSLITE_STRING_KERNEL)
+    file(GLOB KERNEL_SRC_INFER_STRING
+            ${NNACL_DIR}/infer/string/*.c
+            )
+    set(KERNEL_SRC
+            ${KERNEL_SRC}
+            ${KERNEL_SRC_INFER_STRING}
+            )
+endif()
+if(MSLITE_CONTROL_TENSORLIST)
+    file(GLOB KERNEL_SRC_INFER_CONTROL_TENSORLIST
+            ${NNACL_DIR}/infer/control/*.c
+            )
+    set(KERNEL_SRC
+            ${KERNEL_SRC}
+            ${KERNEL_SRC_INFER_CONTROL_TENSORLIST}
+            )
+endif()
 if(PLATFORM_ARM64)
     file(GLOB ASSEMBLY_SRC ${NNACL_DIR}/assembly/arm64/*.S)
     set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8.S b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8.S
index 8dceae7ac54..8bfaa90a5b1 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8.S
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8.S
@@ -5,7 +5,8 @@
 
 //void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4, 
 //                      const int *a_sums, const int *bias, int act_min, int act_max, int out_zp,
-//                      int *multiplier, int *left_shift, int *right_shift, int row, int col, int stride, int peroc);
+//                      const int *multiplier, const int *left_shift, const int *right_shift, int row,
+//                      int col, int stride, int peroc);
 
 // x0: a(left matrix ptr)
 // x1: b(right matrix ptr)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8Opt.S b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8Opt.S
index c3f473880b1..36546f26853 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8Opt.S
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8Opt.S
@@ -4,8 +4,9 @@
 .align 5
 
 //void MatmulInt8DpOpt(const int8_t *a, const int8_t *b, int8_t *dst, int row, int col, int deep4, const int *a_sums,
-//                     const int *bias, int act_min, int act_max, int out_zp, int32_t *multiplier, int32_t *left_shift,
-//                     int32_t *right_shift, size_t stride, size_t filter_peroc, int32_t *filter_zp)
+//                     const int *bias, int act_min, int act_max, int out_zp, const int32_t *multiplier,
+//                     const int32_t *left_shift, const int32_t *right_shift, size_t stride, size_t filter_peroc,
+//                     const int32_t *filter_zp)
 
 // x0: a(left matrix ptr)
 // x1: b(right matrix ptr)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/batch_to_space_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/batch_to_space_base.c
index fe6bb74906e..d8900df0b44 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/batch_to_space_base.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/batch_to_space_base.c
@@ -23,19 +23,19 @@ void BatchToSpaceNoCropForNHWC(const void *input, void *output, const int *in_sh
   int in_h = in_shape[1];
   int in_w = in_shape[2];
   int in_c = in_shape[3];
-  size_t stride_h = block_w * out_n;
-  size_t output_offset = 0;
-  size_t copy_size = in_c * data_size;
-  size_t in_stride_h = in_w * in_c;
-  size_t in_stride_n = in_stride_h * in_h;
+  int stride_h = block_w * out_n;
+  int output_offset = 0;
+  int copy_size = in_c * data_size;
+  int in_stride_h = in_w * in_c;
+  int in_stride_n = in_stride_h * in_h;
   for (int n = 0; n < out_n; ++n) {
     for (int h = 0; h < in_h; ++h) {
-      size_t h_offset = h * in_stride_h;
+      int h_offset = h * in_stride_h;
       for (int bh = 0; bh < block_h; ++bh) {
         for (int w = 0; w < in_w; ++w) {
-          size_t w_offset = w * in_c;
+          int w_offset = w * in_c;
           for (int bw = 0; bw < block_w; ++bw) {
-            size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset;
+            int in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset;
             memcpy((int8_t *)output + output_offset, (int8_t *)input + in_offset * data_size, copy_size);
             output_offset += copy_size;
           }
@@ -49,6 +49,9 @@ void BatchToSpaceForNHWC(const void *input, void *output, const int *in_shape, i
                          const int *crops, int data_size) {
   int block_h = block[0];
   int block_w = block[1];
+  if (block_h == 0 || block_w == 0) {
+    return;
+  }
   int in_h = in_shape[1];
   int in_w = in_shape[2];
   int in_c = in_shape[3];
@@ -61,27 +64,27 @@ void BatchToSpaceForNHWC(const void *input, void *output, const int *in_shape, i
   int w_end = MSMIN((in_w * block_w - crops[3]) / block_w + 1, in_w);
   int w_valid_end = in_w * block_w - crops[3] - 1;
 
-  size_t stride_h = block_w * out_n;
-  size_t output_offset = 0;
-  size_t copy_size = in_c * data_size;
-  size_t in_stride_h = in_w * in_c;
-  size_t in_stride_n = in_stride_h * in_h;
+  int stride_h = block_w * out_n;
+  int output_offset = 0;
+  int copy_size = in_c * data_size;
+  int in_stride_h = in_w * in_c;
+  int in_stride_n = in_stride_h * in_h;
   for (int n = 0; n < out_n; ++n) {
     for (int h = h_start; h < h_end; ++h) {
-      size_t h_offset = h * in_stride_h;
+      int h_offset = h * in_stride_h;
       for (int bh = 0; bh < block_h; ++bh) {
-        size_t h_index = h * block_h + bh;
+        int h_index = h * block_h + bh;
         if (h_index < h_valid_begin || h_index > h_valid_end) {
           continue;
         }
         for (int w = w_start; w < w_end; ++w) {
-          size_t w_offset = w * in_c;
+          int w_offset = w * in_c;
           for (int bw = 0; bw < block_w; ++bw) {
-            size_t w_index = w * block_w + bw;
+            int w_index = w * block_w + bw;
             if (w_index < w_valid_begin || w_index > w_valid_end) {
               continue;
             }
-            size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset;
+            int in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset;
             memcpy((int8_t *)output + output_offset, (int8_t *)input + in_offset * data_size, copy_size);
             output_offset += copy_size;
           }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/broadcast_to.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/broadcast_to.c
index ede7fc7166a..a4ea4318d58 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/broadcast_to.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/broadcast_to.c
@@ -62,7 +62,7 @@ void pad_input_shape(int *input_shape, int input_shape_len, int output_shape_len
     shape_info->input_shape_size_ = dim_max + 1;                                                       \
                                                                                                        \
     size_t before_dim_elements_num = accumulate(input_shape, 0, dim_max - 1);                          \
-    size_t after_dim_elements_num = input_shape[dim_max];                                              \
+    size_t after_dim_elements_num = (size_t)(input_shape[dim_max]);                                    \
     size_t dim_broadcast_rate = (size_t)(output_shape[dim_max] / input_shape[dim_max]);                \
     for (size_t i = 0; i < before_dim_elements_num; ++i) {                                             \
       const type *in_ptr = input + i * after_dim_elements_num;                                         \
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/concat_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/concat_base.c
index 747139835dc..bfef2732099 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/concat_base.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/concat_base.c
@@ -24,15 +24,18 @@ void Concat(void **input, int input_num, int axis, int **inputs_output_shape, si
   }
 
   int after_axis_size = data_size;
-  for (size_t i = axis + 1; i < shape_size; ++i) {
+  for (size_t i = (size_t)(axis) + 1; i < shape_size; ++i) {
     after_axis_size *= inputs_output_shape[0][i];
   }
   int axis_offset = 0;
   uint8_t *dst_base = (output);
-  size_t output_stride = after_axis_size * inputs_output_shape[input_num][axis];
+  int output_stride = after_axis_size * inputs_output_shape[input_num][axis];
   for (int i = 0; i < input_num; ++i) {
     const uint8_t *src_base = (input[i]);
-    size_t input_stride = after_axis_size * inputs_output_shape[i][axis];
+    if (inputs_output_shape[i] == NULL) {
+      continue;
+    }
+    int input_stride = after_axis_size * inputs_output_shape[i][axis];
     int offset = UP_DIV(input_stride, thread_num);
     int count = input_stride - offset * task_id;
     if (count <= 0) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/depth_to_space_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/depth_to_space_base.c
index e2b16837e44..bc3d3a3c1fe 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/depth_to_space_base.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/depth_to_space_base.c
@@ -22,17 +22,17 @@ void DepthToSpaceForNHWC(const void *input, void *output, const int *in_shape, c
   int32_t in_shape_dim1 = in_shape[1];
   size_t copy_size = block_size * param->out_stride_dim2_ * param->data_type_size_;
   for (int i = 0; i < in_shape[0]; ++i) {
-    size_t in_offset_n = i * param->in_stride_dim0_;
-    size_t out_offset_n = i * param->out_stride_dim0_;
+    int in_offset_n = i * param->in_stride_dim0_;
+    int out_offset_n = i * param->out_stride_dim0_;
     for (int j = 0; j < in_shape_dim1; ++j) {
-      size_t in_offset_h = in_offset_n + j * param->in_stride_dim1_;
-      size_t out_offset_h = out_offset_n + j * block_size * param->out_stride_dim1_;
+      int in_offset_h = in_offset_n + j * param->in_stride_dim1_;
+      int out_offset_h = out_offset_n + j * block_size * param->out_stride_dim1_;
       for (int k = 0; k < in_shape_dim2; ++k) {
-        size_t in_offset_w = in_offset_h + k * param->in_stride_dim2_;
-        size_t out_offset_w = out_offset_h + k * block_size * param->out_stride_dim2_;
+        int in_offset_w = in_offset_h + k * param->in_stride_dim2_;
+        int out_offset_w = out_offset_h + k * block_size * param->out_stride_dim2_;
         for (int l = 0; l < block_size; ++l) {
-          size_t out_offset = (out_offset_w + l * param->out_stride_dim1_) * param->data_type_size_;
-          size_t in_offset = (in_offset_w + l * block_size * param->out_stride_dim2_) * param->data_type_size_;
+          int out_offset = (out_offset_w + l * param->out_stride_dim1_) * param->data_type_size_;
+          int in_offset = (in_offset_w + l * block_size * param->out_stride_dim2_) * param->data_type_size_;
           memcpy((int8_t *)output + out_offset, (int8_t *)input + in_offset, copy_size);
         }
       }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/minimal_filtering_generator.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/minimal_filtering_generator.c
index b17000d3573..85d7c630562 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/minimal_filtering_generator.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/minimal_filtering_generator.c
@@ -118,7 +118,9 @@ int B(const float *poly_array, float *matrix_b, int in_unit) {
   float matrix_t[MAX_LEN];   // n * in_unit
 
   T(poly_array, matrix_t, n);
-  LT(poly_array, matrix_lt, n);
+  if (LT(poly_array, matrix_lt, n) != NNACL_OK) {
+    return NNACL_ERR;
+  }
   MatrixTranspose(matrix_lt, matrix_l, n, n);
   MatrixMultiply(matrix_l, matrix_t, matrix_b, n, n, in_unit);
   matrix_b[in_unit * in_unit - 1] = 1;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/slice_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/slice_base.c
index e252a696165..5773c6d74c7 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/slice_base.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/slice_base.c
@@ -47,43 +47,43 @@ void DoSlice(const void *input, void *output, SliceParameter *param, int thread_
   int8_t *int8_in = (int8_t *)input;
   int8_t *int8_out = (int8_t *)output;
 
-  size_t out_stride[8];
+  int out_stride[8];
   out_stride[7] = 1;
   for (int i = 6; i >= 0; --i) {
     out_stride[i] = out_stride[i + 1] * param->size_[i + 1];
   }
 
-  size_t count_per_thread = UP_DIV(param->size_[5], param->op_parameter_.thread_num_);
-  size_t thread_begin = thread_id * count_per_thread;
-  size_t thread_end = MSMIN(param->size_[5], thread_begin + count_per_thread);
-  size_t copy_size = param->size_[7] * data_size;
-  size_t in_stride[8];
+  int count_per_thread = UP_DIV(param->size_[5], param->op_parameter_.thread_num_);
+  int thread_begin = thread_id * count_per_thread;
+  int thread_end = MSMIN(param->size_[5], thread_begin + count_per_thread);
+  int copy_size = param->size_[7] * data_size;
+  int in_stride[8];
   in_stride[7] = 1;
   for (int i = 6; i >= 0; --i) {
     in_stride[i] = param->shape_[i + 1] * in_stride[i + 1];
   }
 
   for (int ii = 0; ii < param->size_[0]; ++ii) {
-    size_t out_offset0 = ii * out_stride[0];
-    size_t in_offset0 = (ii + param->begin_[0]) * in_stride[0] + param->begin_[7];
+    int out_offset0 = ii * out_stride[0];
+    int in_offset0 = (ii + param->begin_[0]) * in_stride[0] + param->begin_[7];
     for (int jj = 0; jj < param->size_[1]; ++jj) {
-      size_t out_offset1 = jj * out_stride[1] + out_offset0;
-      size_t in_offset1 = (jj + param->begin_[1]) * in_stride[1] + in_offset0;
+      int out_offset1 = jj * out_stride[1] + out_offset0;
+      int in_offset1 = (jj + param->begin_[1]) * in_stride[1] + in_offset0;
       for (int kk = 0; kk < param->size_[2]; ++kk) {
-        size_t out_offset2 = kk * out_stride[2] + out_offset1;
-        size_t in_offset2 = (kk + param->begin_[2]) * in_stride[2] + in_offset1;
+        int out_offset2 = kk * out_stride[2] + out_offset1;
+        int in_offset2 = (kk + param->begin_[2]) * in_stride[2] + in_offset1;
         for (int ll = 0; ll < param->size_[3]; ++ll) {
-          size_t out_offset3 = ll * out_stride[3] + out_offset2;
-          size_t in_offset3 = (ll + param->begin_[3]) * in_stride[3] + in_offset2;
+          int out_offset3 = ll * out_stride[3] + out_offset2;
+          int in_offset3 = (ll + param->begin_[3]) * in_stride[3] + in_offset2;
           for (int i = 0; i < param->size_[4]; ++i) {
-            size_t out_offset4 = i * out_stride[4] + out_offset3;
-            size_t in_offset4 = (i + param->begin_[4]) * in_stride[4] + in_offset3;
-            for (size_t j = thread_begin; j < thread_end; ++j) {
-              size_t out_offset5 = j * out_stride[5] + out_offset4;
-              size_t in_offset5 = (j + param->begin_[5]) * in_stride[5] + in_offset4;
+            int out_offset4 = i * out_stride[4] + out_offset3;
+            int in_offset4 = (i + param->begin_[4]) * in_stride[4] + in_offset3;
+            for (int j = thread_begin; j < thread_end; ++j) {
+              int out_offset5 = j * out_stride[5] + out_offset4;
+              int in_offset5 = (j + param->begin_[5]) * in_stride[5] + in_offset4;
               for (int k = 0; k < param->size_[6]; ++k) {
-                size_t out_offset6 = k * out_stride[6] + out_offset5;
-                size_t in_offset6 = (k + param->begin_[6]) * in_stride[6] + in_offset5;
+                int out_offset6 = k * out_stride[6] + out_offset5;
+                int in_offset6 = (k + param->begin_[6]) * in_stride[6] + in_offset5;
                 memcpy(int8_out + out_offset6 * data_size, int8_in + in_offset6 * data_size, copy_size);
               }
             }
@@ -105,8 +105,8 @@ void DoSliceNoParallel(const void *input, void *output, SliceParameter *param, i
   int8_t *int8_in = (int8_t *)input;
   int8_t *int8_out = (int8_t *)output;
 
-  size_t copy_size = param->size_[7] * data_size;
-  size_t in_stride[8];
+  int copy_size = param->size_[7] * data_size;
+  int in_stride[8];
   in_stride[7] = 1;
   for (int i = 6; i >= 0; --i) {
     in_stride[i] = param->shape_[i + 1] * in_stride[i + 1];
@@ -115,9 +115,9 @@ void DoSliceNoParallel(const void *input, void *output, SliceParameter *param, i
   for (int i = 0; i < DIMENSION_8D; ++i) {
     axis_copy_flag[i] = WhetherCopyByAxis(param->begin_, param->end_, param->shape_, i);
   }
-  size_t out_offset = 0;
+  int out_offset = 0;
   for (int32_t dim0 = param->begin_[0]; dim0 < param->end_[0]; ++dim0) {
-    size_t in_offset0 = dim0 * in_stride[0] + param->begin_[7];
+    int in_offset0 = dim0 * in_stride[0] + param->begin_[7];
 #define FAST_COPY_IF_NEED(rank)                                                      \
   if (axis_copy_flag[rank]) {                                                        \
     int left_block_num = param->end_[rank] - dim##rank;                              \
@@ -128,24 +128,24 @@ void DoSliceNoParallel(const void *input, void *output, SliceParameter *param, i
     continue;                                                                        \
   }
     FAST_COPY_IF_NEED(0);
-    for (size_t dim1 = param->begin_[1]; dim1 < param->end_[1]; ++dim1) {
-      size_t in_offset1 = dim1 * in_stride[1] + in_offset0;
+    for (int dim1 = param->begin_[1]; dim1 < param->end_[1]; ++dim1) {
+      int in_offset1 = dim1 * in_stride[1] + in_offset0;
       FAST_COPY_IF_NEED(1);
       for (int32_t dim2 = param->begin_[2]; dim2 < param->end_[2]; ++dim2) {
-        size_t in_offset2 = in_offset1 + dim2 * in_stride[2];
+        int in_offset2 = in_offset1 + dim2 * in_stride[2];
         FAST_COPY_IF_NEED(2);
         for (int32_t dim3 = param->begin_[3]; dim3 < param->end_[3]; ++dim3) {
-          size_t in_offset3 = in_offset2 + dim3 * in_stride[3];
+          int in_offset3 = in_offset2 + dim3 * in_stride[3];
           FAST_COPY_IF_NEED(3);
           for (int32_t dim4 = param->begin_[4]; dim4 < param->end_[4]; ++dim4) {
-            size_t in_offset4 = in_offset3 + dim4 * in_stride[4];
+            int in_offset4 = in_offset3 + dim4 * in_stride[4];
             FAST_COPY_IF_NEED(4);
             for (int32_t dim5 = param->begin_[5]; dim5 < param->end_[5]; ++dim5) {
-              size_t in_offset5 = in_offset4 + dim5 * in_stride[5];
+              int in_offset5 = in_offset4 + dim5 * in_stride[5];
               FAST_COPY_IF_NEED(5);
 #undef FAST_COPY_IF_NEED
               for (int32_t dim6 = param->begin_[6]; dim6 < param->end_[6]; ++dim6) {
-                size_t in_offset6 = in_offset5 + dim6 * in_stride[6];
+                int in_offset6 = in_offset5 + dim6 * in_stride[6];
                 memcpy(int8_out + out_offset * data_size, int8_in + in_offset6 * data_size, copy_size);
                 out_offset += param->size_[7];
               }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_base.c
index 9c20b5af481..9f7f70bab58 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_base.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_base.c
@@ -21,10 +21,6 @@
 
 int DoSplit(void *in_data, void **out_data, const int *input_shape, int offset, int num_unit,
             SplitParameter *split_param, int data_size) {
-  if (in_data == NULL || out_data == NULL) {
-    return NNACL_ERR;
-  }
-
   int8_t *int8_in = (int8_t *)in_data;
 
   int num_split = split_param->num_split_;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.c
index dc2711237df..b7771693ce3 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.c
@@ -26,15 +26,15 @@ void DoCopyData(const uint8_t *input_data, uint8_t *output_data, size_t size, si
 }
 
 int DoTileOneDimension(uint8_t *input_data, uint8_t *output_data, size_t dim, const TileParameter *parameter) {
-  size_t src_dim_size = parameter->in_shape_[dim];
+  int src_dim_size = parameter->in_shape_[dim];
   if (dim == parameter->in_dim_ - 1) {
     DoCopyData(input_data, output_data, src_dim_size, parameter->data_size_, parameter->multiples_[dim]);
     return 0;
   }
-  for (size_t i = 0; i < src_dim_size; ++i) {
-    for (size_t j = 0; j < parameter->multiples_[dim]; ++j) {
-      size_t in_pos = parameter->in_strides_[dim] * i;
-      size_t out_pos = parameter->out_strides_[dim] * (i + j * src_dim_size);
+  for (int i = 0; i < src_dim_size; ++i) {
+    for (int j = 0; j < parameter->multiples_[dim]; ++j) {
+      int in_pos = parameter->in_strides_[dim] * i;
+      int out_pos = parameter->out_strides_[dim] * (i + j * src_dim_size);
       DoTileOneDimension(input_data + in_pos * parameter->data_size_, output_data + out_pos * parameter->data_size_,
                          dim + 1, parameter);
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.h
index ccd91d1663d..b91bae0ced9 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.h
@@ -18,20 +18,20 @@
 #define MINDSPORE_NNACL_BASE_TILE_H_
 
 #include "nnacl/op_base.h"
-
+#define MAX_TILE_DIM_SIZE 8
 typedef struct TileParameter {
   // primitive parameter
   OpParameter op_parameter_;
-  int multiples_[8];
-  int dims_[8];
+  int multiples_[MAX_TILE_DIM_SIZE];
+  int dims_[MAX_TILE_DIM_SIZE];
   size_t dims_size_;
   size_t multiples_size_;
 
   // shape correlative
-  int in_shape_[8];
-  int out_shape_[8];
-  int in_strides_[8];
-  int out_strides_[8];
+  int in_shape_[MAX_TILE_DIM_SIZE];
+  int out_shape_[MAX_TILE_DIM_SIZE];
+  int in_strides_[MAX_TILE_DIM_SIZE];
+  int out_strides_[MAX_TILE_DIM_SIZE];
 
   // other parameter
   int in_dim_;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/transpose_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/transpose_base.c
index d47051d981b..de5c507a14d 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/transpose_base.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/transpose_base.c
@@ -184,7 +184,7 @@
     for (int i = dims - 1; i > 0; --i) {                                                               \
       *(size + i - 1) = *(size + i) * output_shape[i];                                                 \
     }                                                                                                  \
-    for (size_t idx = 0; idx < (*size) * output_shape[0]; ++idx) {                                     \
+    for (int idx = 0; idx < (*size) * output_shape[0]; ++idx) {                                        \
       int pos = idx;                                                                                   \
       int output_idx = 0;                                                                              \
       int input_idx = 0;                                                                               \
@@ -215,7 +215,7 @@
       return;                                                                                        \
     }                                                                                                \
     count = MSMIN(offset_size, count);                                                               \
-    for (size_t idx = task_offset; idx < task_offset + count; ++idx) {                               \
+    for (int idx = task_offset; idx < task_offset + count; ++idx) {                                  \
       int pos = idx;                                                                                 \
       int output_idx = 0;                                                                            \
       int input_idx = 0;                                                                             \
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.c
index a6e3f265939..7f4e7817a93 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.c
@@ -16,15 +16,19 @@
 
 #include "nnacl/common_func.h"
 
-int offset(const int *shape, const int dim0, const int dim1, const int dim2, const int dim3) {
+int Offset(const int *shape, const int dim0, const int dim1, const int dim2, const int dim3) {
   return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3] + dim3;
 }
 
-int offsetComm(const int *shape, const int dim0, const int dim1, const int dim2) {
+int OffsetComm(const int *shape, const int dim0, const int dim1, const int dim2) {
   return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3];
 }
 
-int offset4d(const int *shape, const int *dims) { return offset(shape, dims[0], dims[1], dims[2], dims[3]); }
+int Offset4d(const int *shape, const int *dims) { return Offset(shape, dims[0], dims[1], dims[2], dims[3]); }
+
+int Offset6d(const int *shape, const int *dims) {
+  return ((OffsetComm(shape, dims[0], dims[1], dims[2]) + dims[3]) * shape[4] + dims[4]) * shape[5];
+}
 
 int8_t MinInt8(int8_t a, int8_t b) { return b ^ ((a ^ b) & -(a < b)); }
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.h
index f7ca4f0b2c6..74f418d430a 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.h
@@ -36,9 +36,10 @@ void ReluFp32C8(float *data, float *dst, int ele_num);
 void Relu6Fp32C8(float *data, float *dst, int ele_num);
 #endif
 #endif
-int offset(const int *shape, const int dim0, const int dim1, const int dim2, const int dim3);
-int offsetComm(const int *shape, const int dim0, const int dim1, const int dim2);
-int offset4d(const int *shape, const int *dims);
+int Offset(const int *shape, const int dim0, const int dim1, const int dim2, const int dim3);
+int OffsetComm(const int *shape, const int dim0, const int dim1, const int dim2);
+int Offset4d(const int *shape, const int *dims);
+int Offset6d(const int *shape, const int *dims);
 
 static inline bool isAddOverflow(int32_t x, int32_t y) {
   int32_t sum = x + y;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pad_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pad_fp16.c
index 0dd833af6bc..e0d69be8409 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pad_fp16.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pad_fp16.c
@@ -19,16 +19,22 @@
 
 void PadFp16(const float16_t *input_data, float16_t *output_data, const int *input_shape, const int *output_shape,
              const int *paddings, const int tid, const int thread_num) {
-  int in[4], out[4];
+  int in[DEFAULT_PAD_NDIMS], out[DEFAULT_PAD_NDIMS];
   for (in[0] = 0; in[0] < input_shape[0]; in[0]++) {
     out[0] = in[0] + paddings[0];
     for (in[1] = tid; in[1] < input_shape[1]; in[1] += thread_num) {
       out[1] = in[1] + paddings[2];
       for (in[2] = 0; in[2] < input_shape[2]; in[2]++) {
         out[2] = in[2] + paddings[4];
-        float16_t *dst = output_data + offset(output_shape, out[0], out[1], out[2], paddings[6]);
-        const float16_t *src = input_data + offset(input_shape, in[0], in[1], in[2], 0);
-        memcpy(dst, src, input_shape[3] * sizeof(float16_t));
+        for (in[3] = 0; in[3] < input_shape[3]; in[3]++) {
+          out[3] = in[3] + paddings[6];
+          for (in[4] = 0; in[4] < input_shape[4]; in[4]++) {
+            out[4] = in[4] + paddings[8];
+            float16_t *dst = output_data + Offset6d(output_shape, out) + paddings[10];
+            const float16_t *src = input_data + Offset6d(input_shape, in);
+            memcpy(dst, src, input_shape[5] * sizeof(float16_t));
+          }
+        }
       }
     }
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.c
index 728a38964a7..19b0b7bd428 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.c
@@ -152,17 +152,15 @@ int AdamDeltaFp32(float *delta, float *m, float *v, float lr, float beta1, float
   return NNACL_OK;
 }
 
-int AdamWeightDecayFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, float decay,
-                        const float *gradient, size_t start, size_t end) {
+size_t AdamWeightDecayFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon,
+                           float decay, const float *gradient, size_t start, size_t end) {
   size_t c1 = start;
 #ifdef ENABLE_AVX512
-  const float beta1_minus = 1 - beta1;
-  const float beta2_minus = 1 - beta2;
   struct AVX_Data beta1_r, beta2_r, beta1_minus_r, beta2_minus_r, lr_neg_r, epsilon_r, decay_r;
   beta1_r.data = _mm512_set1_ps(beta1);
   beta2_r.data = _mm512_set1_ps(beta2);
-  beta1_minus_r.data = _mm512_set1_ps(beta1_minus);
-  beta2_minus_r.data = _mm512_set1_ps(beta2_minus);
+  beta1_minus_r.data = _mm512_set1_ps(1.0f - beta1);
+  beta2_minus_r.data = _mm512_set1_ps(1.0f - beta2);
   lr_neg_r.data = _mm512_set1_ps(-lr);
   epsilon_r.data = _mm512_set1_ps(epsilon);
   decay_r.data = _mm512_set1_ps(decay);
@@ -260,17 +258,15 @@ int AdamWeightDecayFp32(float *var, float *m, float *v, float lr, float beta1, f
   return c1;
 }
 
-int FusedAdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, float decay,
-                  const int16_t *gradient16, size_t start, size_t end) {
+size_t FusedAdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, float decay,
+                     const int16_t *gradient16, size_t start, size_t end) {
   size_t c1 = start;
 #ifdef ENABLE_AVX512
-  const float beta1_minus = 1 - beta1;
-  const float beta2_minus = 1 - beta2;
   struct AVX_Data beta1_r, beta2_r, beta1_minus_r, beta2_minus_r, lr_neg_r, epsilon_r, decay_r;
   beta1_r.data = _mm512_set1_ps(beta1);
   beta2_r.data = _mm512_set1_ps(beta2);
-  beta1_minus_r.data = _mm512_set1_ps(beta1_minus);
-  beta2_minus_r.data = _mm512_set1_ps(beta2_minus);
+  beta1_minus_r.data = _mm512_set1_ps(1.0f - beta1);
+  beta2_minus_r.data = _mm512_set1_ps(1.0f - beta2);
   lr_neg_r.data = _mm512_set1_ps(-lr);
   epsilon_r.data = _mm512_set1_ps(epsilon);
   decay_r.data = _mm512_set1_ps(decay);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.h
index 3690cd646e6..b4f02754d27 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.h
@@ -71,10 +71,10 @@ int AdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2,
              size_t start, size_t end, bool use_nesterov);
 int AdamDeltaFp32(float *delta, float *m, float *v, float lr, float beta1, float beta2, float epsilon,
                   const float *gradient, size_t start, size_t end, bool use_nesterov);
-int AdamWeightDecayFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, float decay,
-                        const float *gradient, size_t start, size_t end);
-int FusedAdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, float decay,
-                  const int16_t *gradient16, size_t start, size_t end);
+size_t AdamWeightDecayFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon,
+                           float decay, const float *gradient, size_t start, size_t end);
+size_t FusedAdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, float decay,
+                     const int16_t *gradient16, size_t start, size_t end);
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/arg_min_max_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/arg_min_max_fp32.c
index 21ea9658088..cb3523edfea 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/arg_min_max_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/arg_min_max_fp32.c
@@ -49,8 +49,8 @@ void ArgMaxTopK1(const float *input, void *output, float *output_value, const Ar
   float *outputfp32 = (float *)output;
   int *outputint = (int *)output;
   for (int i = 0; i < pre_axis_count; ++i) {
-    size_t output_offset = i * after_axis_count;
-    size_t input_offset = output_offset * axis_count;
+    int output_offset = i * after_axis_count;
+    int input_offset = output_offset * axis_count;
     for (int j = 0; j < after_axis_count; ++j) {
       float value = -FLT_MAX;
       int index = 0;
@@ -79,8 +79,8 @@ void ArgMinTopK1(const float *input, void *output, float *output_value, const Ar
   float *outputfp32 = (float *)output;
   int *outputint = (int *)output;
   for (int i = 0; i < pre_axis_count; ++i) {
-    size_t output_offset = i * after_axis_count;
-    size_t input_offset = output_offset * axis_count;
+    int output_offset = i * after_axis_count;
+    int input_offset = output_offset * axis_count;
     for (int j = 0; j < after_axis_count; ++j) {
       float value = FLT_MAX;
       int index = 0;
@@ -109,13 +109,13 @@ void ArgMinMaxDim0(const float *input, void *output, float *output_value, const
   int *outputint = (int *)output;
   for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
     for (int j = 0; j < in_shape[0]; ++j) {
-      size_t offset = param->in_strides_[0] * j + i;
+      int offset = param->in_strides_[0] * j + i;
       param->arg_elements_[j].index_ = j;
       param->arg_elements_[j].data_.f_data_ = input[offset];
     }
     qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), *compare_func);
     for (int j = 0; j < param->topk_; ++j) {
-      size_t out_offset = j * param->out_strides_[0] + i;
+      int out_offset = j * param->out_strides_[0] + i;
       if (param->out_value_) {
         outputfp32[out_offset] = param->arg_elements_[j].data_.f_data_;
       } else {
@@ -135,17 +135,17 @@ void ArgMinMaxDim1(const float *input, void *output, float *output_value, const
   int *outputint = (int *)output;
   int in_shape1 = in_shape[1];
   for (int i = 0; i < in_shape[0]; ++i) {
-    size_t in_dim0_offset = i * param->in_strides_[0];
-    size_t out_dim0_offset = i * param->out_strides_[0];
+    int in_dim0_offset = i * param->in_strides_[0];
+    int out_dim0_offset = i * param->out_strides_[0];
     for (int j = 0; j < param->in_strides_[1]; ++j) {
       for (int k = 0; k < in_shape1; ++k) {
-        size_t offset = param->in_strides_[1] * k + in_dim0_offset + j;
+        int offset = param->in_strides_[1] * k + in_dim0_offset + j;
         param->arg_elements_[k].index_ = k;
         param->arg_elements_[k].data_.f_data_ = input[offset];
       }
       qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), *compare_func);
       for (int k = 0; k < param->topk_; ++k) {
-        size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1];
+        int out_offset = out_dim0_offset + j + k * param->out_strides_[1];
         if (param->out_value_) {
           outputfp32[out_offset] = param->arg_elements_[k].data_.f_data_;
         } else {
@@ -167,20 +167,20 @@ void ArgMinMaxDim2(const float *input, void *output, float *output_value, const
   float *outputfp32 = (float *)output;
   int *outputint = (int *)output;
   for (int i = 0; i < in_shape[0]; ++i) {
-    size_t in_dim0_offset = i * param->in_strides_[0];
-    size_t out_dim0_offset = i * param->out_strides_[0];
+    int in_dim0_offset = i * param->in_strides_[0];
+    int out_dim0_offset = i * param->out_strides_[0];
     for (int j = 0; j < in_shape1; ++j) {
-      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
-      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
+      int in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
+      int out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
       for (int k = 0; k < param->in_strides_[2]; ++k) {
         for (int l = 0; l < in_shape2; ++l) {
-          size_t offset = param->in_strides_[2] * l + k + in_dim1_offset;
+          int offset = param->in_strides_[2] * l + k + in_dim1_offset;
           param->arg_elements_[l].index_ = l;
           param->arg_elements_[l].data_.f_data_ = input[offset];
         }
         qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), *compare_func);
         for (int l = 0; l < param->topk_; ++l) {
-          size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2];
+          int out_offset = out_dim1_offset + k + l * param->out_strides_[2];
           if (param->out_value_) {
             outputfp32[out_offset] = param->arg_elements_[l].data_.f_data_;
           } else {
@@ -203,26 +203,26 @@ void ArgMinMaxDim3(const float *input, void *output, float *output_value, const
   float *outputfp32 = (float *)output;
   int *outputint = (int *)output;
   for (int i = 0; i < in_shape[0]; ++i) {
-    size_t in_dim0_offset = i * param->in_strides_[0];
-    size_t out_dim0_offset = i * param->out_strides_[0];
+    int in_dim0_offset = i * param->in_strides_[0];
+    int out_dim0_offset = i * param->out_strides_[0];
     for (int j = 0; j < in_shape1; ++j) {
-      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
-      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
+      int in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
+      int out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
       for (int k = 0; k < in_shape2; ++k) {
-        size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset;
-        size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset;
+        int in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset;
+        int out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset;
         for (int l = 0; l < in_shape3; ++l) {
-          size_t offset = l + in_dim2_offset;
+          int offset = l + in_dim2_offset;
           param->arg_elements_[l].index_ = l;
           param->arg_elements_[l].data_.f_data_ = input[offset];
         }
         qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), *compare_func);
         for (int l = 0; l < param->topk_; ++l) {
-          size_t out_offset = out_dim2_offset + l;
+          int out_offset = out_dim2_offset + l;
           if (param->out_value_) {
             outputfp32[out_offset] = param->arg_elements_[l].data_.f_data_;
           } else {
-            outputint[out_offset] = param->arg_elements_[l].index_;
+            outputint[out_offset] = (int)(param->arg_elements_[l].index_);
           }
           if (output_value != NULL) {
             output_value[out_offset] = param->arg_elements_[l].data_.f_data_;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/common_func_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/common_func_fp32.c
index a7040ce33ee..1379226d7ef 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/common_func_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/common_func_fp32.c
@@ -21,10 +21,10 @@ void PostConvFuncComm(const float *src_ptr_, float *out_ptr, const float *bias_p
   if (size == 0) {
     return;
   }
-  for (int oc = 0; oc < output_channel; oc++) {
+  for (size_t oc = 0; oc < output_channel; oc++) {
     int oc_div = oc / size;
     int oc_mod = oc % size;
-    for (int hw = 0; hw < plane_size; hw++) {
+    for (int hw = 0; hw < (int)plane_size; hw++) {
       int src_index = oc_div * size * plane_stride + hw * size + oc_mod;
       int dst_index = hw * oc_stride + oc;
       float value = src_ptr_[src_index];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_depthwise_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_depthwise_fp32.c
index 4b4bfa43257..621abed1dcc 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_depthwise_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_depthwise_fp32.c
@@ -52,7 +52,8 @@ int ConvDw(float *output_data, const float *input_data, const float *weight_data
       int end_kh = MSMIN(conv_param->kernel_h_, UP_DIV(conv_param->input_h_ - ih_origin, conv_param->dilation_h_));
 
       for (int ow = 0; ow < conv_param->output_w_; ow++) {
-        memcpy(dst_data + ow * conv_param->output_channel_, bias_data, conv_param->output_channel_ * sizeof(float));
+        memcpy(dst_data + ow * conv_param->output_channel_, bias_data,
+               conv_param->output_channel_ * (int)(sizeof(float)));
       }
       for (int kh = start_kh; kh < end_kh; kh++) {
         int ih = ih_origin + conv_param->dilation_w_ * kh;
@@ -764,10 +765,10 @@ void ConvDwFp32IndirectRow(float *output, float **input, const float *weights, c
                            int output_width, int input_stride, bool relu, bool relu6, int kernel) {
   do {
     float **in = input;
-    size_t c = channels;
+    size_t c = (size_t)channels;
     const float *w = weights;
     float *out = output;
-    memcpy(out, bias, channels * sizeof(float));
+    memcpy(out, bias, channels * (int)sizeof(float));
     for (; c >= C4NUM; c -= C4NUM) {
       for (int i = 0; i < C4NUM; i++) {
         for (int k = 0; k < kernel; k++) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/deconv_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/deconv_fp32.c
index 31cc38b5606..c7c457c5fe9 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/deconv_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/deconv_fp32.c
@@ -61,7 +61,7 @@ void DeConvPostFp32C8(const float *src, float *tmp, const float *bias, float *ds
   for (int c = 0; c < oc8; c += 8) {
     float *dst_ptr = tmp + c * output_plane;
     const float *src_ptr = src + c * in_plane_round * kernel_plane;
-    memset(dst_ptr, 0, output_plane * C8NUM * sizeof(float));
+    memset(dst_ptr, 0, output_plane * C8NUM * (int)sizeof(float));
 
     for (int ih = 0; ih < conv_param->input_h_; ih++) {
       for (int iw = 0; iw < conv_param->input_w_; iw++) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/embedding_lookup_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/embedding_lookup_fp32.c
index 8664ec56c5f..9fdfd4eae5b 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/embedding_lookup_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/embedding_lookup_fp32.c
@@ -43,7 +43,7 @@ int CopyData(float *input_data, const int *ids, float *output_data, int num,
     parameter->is_regulated_[ids[num]] = true;
   }
 
-  memcpy(out_data, in_data, sizeof(float) * parameter->layer_size_);
+  memcpy(out_data, in_data, sizeof(float) * (size_t)(parameter->layer_size_));
   return NNACL_OK;
 }
 
@@ -52,7 +52,7 @@ int EmbeddingLookup(float *input_data, const int *ids, float *output_data, const
   if (parameter->op_parameter_.thread_num_ == 0) {
     return NNACL_PARAM_INVALID;
   }
-  for (size_t i = task_id; i < parameter->ids_size_; i += parameter->op_parameter_.thread_num_) {
+  for (int i = task_id; i < parameter->ids_size_; i += parameter->op_parameter_.thread_num_) {
     int ret = CopyData(input_data, ids, output_data, i, parameter);
     if (ret != NNACL_OK) {
       return ret;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/gatherNd_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/gatherNd_fp32.c
index 88cfdacf2a0..d1165298265 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/gatherNd_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/gatherNd_fp32.c
@@ -21,7 +21,7 @@
 int GatherNd(const float *input, float *output, const int *in_offset, int area, int count) {
   int i = 0;
   for (i = 0; i < count; i++) {
-    (void)memcpy(output + area * i, input + in_offset[i], area * sizeof(float));
+    (void)memcpy(output + area * i, input + in_offset[i], (size_t)(area) * sizeof(float));
   }
   return NNACL_OK;
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/lstm_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/lstm_fp32.c
index 9a94c35e46e..41a9b1ffaf4 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/lstm_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/lstm_fp32.c
@@ -41,7 +41,7 @@ void PackLstmBias(float *dst, const float *src, int batch, int col, int col_alig
   for (int i = 0; i < unidirectional_batch; i++) {
     const float *src_batch = src + i * col;
     float *dst_batch = dst + i * col_align;
-    memcpy(dst_batch, src_batch, col * sizeof(float));
+    memcpy(dst_batch, src_batch, col * (int)sizeof(float));
   }
   if (is_bidirectional) {
     const float *backward_src = src + batch * col;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_fp32.c
index 530fd6c6ac3..4b1702e55b5 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_fp32.c
@@ -263,9 +263,9 @@ void RowMajor2Col12Major_arm32(const float *src_c, float *dst_c, size_t col) {
 void RowMajor2Col12Major(const float *src_ptr, float *dst_ptr, int row, int col) {
   const float *src_r = src_ptr;
   float *dst_r = dst_ptr;
-  size_t ri = 0;
+  int ri = 0;
   for (; ri < (row / C12NUM * C12NUM); ri += C12NUM) {
-    size_t ci = 0;
+    int ci = 0;
     for (; ci < (col / C4NUM * C4NUM); ci += C4NUM) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C12NUM;
@@ -340,7 +340,7 @@ void RowMajor2Col12Major(const float *src_ptr, float *dst_ptr, int row, int col)
     for (; ci < col; ci++) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C12NUM;
-      for (size_t i = 0; i < C12NUM; i++) {
+      for (int i = 0; i < C12NUM; i++) {
         dst_c[i] = src_c[i * col];
       }
     }
@@ -348,16 +348,15 @@ void RowMajor2Col12Major(const float *src_ptr, float *dst_ptr, int row, int col)
     dst_r += C12NUM * col;
   }
   for (; ri < row; ri++, dst_r++, src_r += col) {
-    for (size_t i = 0; i < col; i++) {
+    for (int i = 0; i < col; i++) {
       dst_r[i * C12NUM] = src_r[i];
     }
   }
   for (; ri < UP_ROUND(row, C12NUM); ri++, dst_r++) {
-    for (size_t i = 0; i < col; i++) {
+    for (int i = 0; i < col; i++) {
       dst_r[i * C12NUM] = 0;
     }
   }
-  return;
 }
 
 #ifdef ENABLE_ARM64
@@ -532,20 +531,20 @@ void RowMajor2Col8Major_arm32(const float *src_c, float *dst_c, size_t col) {
 #endif
 #endif
 void RowMajor2Col8Major(const float *src_ptr, float *dst_ptr, int row, int col) {
-  size_t row8 = row / C8NUM * C8NUM;
+  int row8 = row / C8NUM * C8NUM;
 #ifdef ENABLE_ARM64
-  size_t col_skip = col / C8NUM * C8NUM;
+  int col_skip = col / C8NUM * C8NUM;
   int skip_size = C8NUM;
 #else
-  size_t col_skip = col / C4NUM * C4NUM;
+  int col_skip = col / C4NUM * C4NUM;
   int skip_size = C4NUM;
 #endif
   const float *src_r = src_ptr;
   float *dst_r = dst_ptr;
 
-  size_t ri = 0;
+  int ri = 0;
   for (; ri < row8; ri += C8NUM) {
-    size_t ci = 0;
+    int ci = 0;
     for (; ci < col_skip; ci += skip_size) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C8NUM;
@@ -593,7 +592,7 @@ void RowMajor2Col8Major(const float *src_ptr, float *dst_ptr, int row, int col)
     for (; ci < col; ci++) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C8NUM;
-      for (size_t i = 0; i < C8NUM; i++) {
+      for (int i = 0; i < C8NUM; i++) {
         dst_c[i] = src_c[i * col];
       }
     }
@@ -601,29 +600,28 @@ void RowMajor2Col8Major(const float *src_ptr, float *dst_ptr, int row, int col)
     dst_r += C8NUM * col;
   }
   for (; ri < row; ri++, src_r += col, dst_r++) {
-    for (size_t i = 0; i < col; i++) {
+    for (int i = 0; i < col; i++) {
       dst_r[i * C8NUM] = src_r[i];
     }
   }
 
   for (; ri < UP_ROUND(row, C8NUM); ri++, dst_r++) {
-    for (size_t i = 0; i < col; i++) {
+    for (int i = 0; i < col; i++) {
       dst_r[i * C8NUM] = 0;
     }
   }
-  return;
 }
 
 void RowMajor2Col16Major(const float *src_ptr, float *dst_ptr, int row, int col) {
-  size_t row16 = row / C16NUM * C16NUM;
-  size_t col_skip = col / C4NUM * C4NUM;
+  int row16 = row / C16NUM * C16NUM;
+  int col_skip = col / C4NUM * C4NUM;
   int skip_size = C4NUM;
   const float *src_r = src_ptr;
   float *dst_r = dst_ptr;
 
-  size_t ri = 0;
+  int ri = 0;
   for (; ri < row16; ri += C16NUM) {
-    size_t ci = 0;
+    int ci = 0;
     for (; ci < col_skip; ci += skip_size) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C16NUM;
@@ -636,7 +634,7 @@ void RowMajor2Col16Major(const float *src_ptr, float *dst_ptr, int row, int col)
     for (; ci < col; ci++) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C16NUM;
-      for (size_t i = 0; i < C16NUM; i++) {
+      for (int i = 0; i < C16NUM; i++) {
         dst_c[i] = src_c[i * col];
       }
     }
@@ -644,21 +642,20 @@ void RowMajor2Col16Major(const float *src_ptr, float *dst_ptr, int row, int col)
     dst_r += C16NUM * col;
   }
   for (; ri < row; ri++) {
-    for (size_t i = 0; i < col; i++) {
+    for (int i = 0; i < col; i++) {
       dst_r[i * C16NUM] = src_r[i];
     }
     src_r += col;
     dst_r += 1;
   }
 
-  size_t total_row = UP_ROUND(row, C16NUM);
+  int total_row = UP_ROUND(row, C16NUM);
   for (; ri < total_row; ri++) {
-    for (size_t i = 0; i < col; i++) {
+    for (int i = 0; i < col; i++) {
       dst_r[i * C16NUM] = 0;
     }
     dst_r += 1;
   }
-  return;
 }
 
 void RowMajor2Col32Major(const float *src_ptr, float *dst_ptr, int row, int col) {
@@ -680,15 +677,15 @@ void RowMajor2Col32Major(const float *src_ptr, float *dst_ptr, int row, int col)
 }
 
 void RowMajor2Col6Major(const float *src_ptr, float *dst_ptr, int row, int col) {
-  size_t totalRow = UP_ROUND(row, C6NUM);
-  size_t row6 = row / C6NUM * C6NUM;
-  size_t col8 = col / C8NUM * C8NUM;
+  int totalRow = UP_ROUND(row, C6NUM);
+  int row6 = row / C6NUM * C6NUM;
+  int col8 = col / C8NUM * C8NUM;
   const float *src_r = src_ptr;
   float *dst_r = dst_ptr;
 
-  size_t ri = 0;
+  int ri = 0;
   for (; ri < row6; ri += C6NUM) {
-    size_t ci = 0;
+    int ci = 0;
     for (; ci < col8; ci += C8NUM) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C6NUM;
@@ -753,7 +750,7 @@ void RowMajor2Col6Major(const float *src_ptr, float *dst_ptr, int row, int col)
     for (; ci < col; ci++) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C6NUM;
-      for (size_t i = 0; i < C6NUM; i++) {
+      for (int i = 0; i < C6NUM; i++) {
         dst_c[i] = src_c[i * col];
       }
     }
@@ -762,7 +759,7 @@ void RowMajor2Col6Major(const float *src_ptr, float *dst_ptr, int row, int col)
   }
 
   for (; ri < row; ri++) {
-    for (size_t i = 0; i < col; i++) {
+    for (int i = 0; i < col; i++) {
       dst_r[i * C6NUM] = src_r[i];
     }
     src_r += col;
@@ -770,30 +767,29 @@ void RowMajor2Col6Major(const float *src_ptr, float *dst_ptr, int row, int col)
   }
 
   for (; ri < totalRow; ri++) {
-    for (size_t i = 0; i < col; i++) {
+    for (int i = 0; i < col; i++) {
       dst_r[i * C6NUM] = 0;
     }
     dst_r += 1;
   }
-  return;
 }
 
 void RowMajor2Col4Major(const float *src_ptr, float *dst_ptr, int row, int col) {
-  size_t total_row = UP_ROUND(row, C4NUM);
-  size_t row4 = row / C4NUM * C4NUM;
-  size_t col4 = col / C4NUM * C4NUM;
+  int total_row = UP_ROUND(row, C4NUM);
+  int row4 = row / C4NUM * C4NUM;
+  int col4 = col / C4NUM * C4NUM;
   const float *src_r = src_ptr;
   float *dst_r = dst_ptr;
 
-  size_t ri = 0;
+  int ri = 0;
   for (; ri < row4; ri += C4NUM) {
-    size_t ci = 0;
+    int ci = 0;
     for (; ci < col4; ci += C4NUM) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C4NUM;
 
 #ifdef ENABLE_ARM32
-      size_t stride = col * 4;
+      int stride = col * 4;
       asm volatile(
         "mov r10, %[src_c]\n"
         "mov r12, %[dst_c]\n"
@@ -840,8 +836,8 @@ void RowMajor2Col4Major(const float *src_ptr, float *dst_ptr, int row, int col)
       _mm_storeu_ps(dst_c + 8, dst2);
       _mm_storeu_ps(dst_c + 12, dst3);
 #else
-      for (int tr = 0; tr < C4NUM; tr++) {
-        for (int tc = 0; tc < C4NUM; tc++) {
+      for (size_t tr = 0; tr < C4NUM; tr++) {
+        for (size_t tc = 0; tc < C4NUM; tc++) {
           dst_c[tc * C4NUM + tr] = src_c[tr * col + tc];
         }
       }
@@ -850,7 +846,7 @@ void RowMajor2Col4Major(const float *src_ptr, float *dst_ptr, int row, int col)
     for (; ci < col; ci++) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C4NUM;
-      for (size_t i = 0; i < C4NUM; i++) {
+      for (int i = 0; i < C4NUM; i++) {
         dst_c[i] = src_c[i * col];
       }
     }
@@ -858,7 +854,7 @@ void RowMajor2Col4Major(const float *src_ptr, float *dst_ptr, int row, int col)
     dst_r += C4NUM * col;
   }
   for (; ri < row; ri++) {
-    for (size_t i = 0; i < col; i++) {
+    for (int i = 0; i < col; i++) {
       dst_r[i * C4NUM] = src_r[i];
     }
     src_r += col;
@@ -866,12 +862,11 @@ void RowMajor2Col4Major(const float *src_ptr, float *dst_ptr, int row, int col)
   }
 
   for (; ri < total_row; ri++) {
-    for (size_t i = 0; i < col; i++) {
+    for (int i = 0; i < col; i++) {
       dst_r[i * C4NUM] = 0;
     }
     dst_r += 1;
   }
-  return;
 }
 
 #ifndef ENABLE_ARM
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/pad_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/pad_fp32.c
index f80bb5657d3..2daaed1bf27 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/pad_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/pad_fp32.c
@@ -23,16 +23,22 @@ void Pad(const float *input_data, float *output_data, const int *input_shape, co
   if (thread_num == 0) {
     return;
   }
-  int in[4], out[4];
+  int in[DEFAULT_PAD_NDIMS], out[DEFAULT_PAD_NDIMS];
   for (in[0] = 0; in[0] < input_shape[0]; in[0]++) {
     out[0] = in[0] + paddings[0];
     for (in[1] = tid; in[1] < input_shape[1]; in[1] += thread_num) {
       out[1] = in[1] + paddings[2];
       for (in[2] = 0; in[2] < input_shape[2]; in[2]++) {
         out[2] = in[2] + paddings[4];
-        float *dst = output_data + offset(output_shape, out[0], out[1], out[2], paddings[6]);
-        const float *src = input_data + offset(input_shape, in[0], in[1], in[2], 0);
-        memcpy(dst, src, input_shape[3] * sizeof(float));
+        for (in[3] = 0; in[3] < input_shape[3]; in[3]++) {
+          out[3] = in[3] + paddings[6];
+          for (in[4] = 0; in[4] < input_shape[4]; in[4]++) {
+            out[4] = in[4] + paddings[8];
+            float *dst = output_data + Offset6d(output_shape, out) + paddings[10];
+            const float *src = input_data + Offset6d(input_shape, in);
+            memcpy(dst, src, input_shape[5] * (int)(sizeof(float)));
+          }
+        }
       }
     }
   }
@@ -57,8 +63,7 @@ int TransOut2InputDimIndex(int out_dim_index, int left_pad, int in_dim, int offs
 
 int GetInputFlattenIndex(int out_flatten_index, const int *input_shape, const PadParameter *pad_param) {
   int in_flatten_index = 0;
-  int i;
-  for (i = 0; i < COMM_SHAPE_SIZE; ++i) {
+  for (int i = 0; i < DEFAULT_PAD_NDIMS; ++i) {
     int left_pad = pad_param->paddings_[i * 2];
     NNACL_CHECK_ZERO_RETURN_ERR(pad_param->out_strides[i])
     int out_dim_index = out_flatten_index / pad_param->out_strides[i];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/resize_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/resize_fp32.c
index 13f98915e35..89de95ff7f5 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/resize_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/resize_fp32.c
@@ -510,8 +510,8 @@ int ResizeNearestNeighbor(const float *input_data, float *output_data, const int
         } else {
           input_x = (int)(floorf(actual_x));
         }
-        int in_offset = offset(input_shape, batch, input_y, input_x, 0);
-        int out_offset = offset(output_shape, batch, y, x, 0);
+        int in_offset = Offset(input_shape, batch, input_y, input_x, 0);
+        int out_offset = Offset(output_shape, batch, y, x, 0);
         memcpy(output_data + out_offset, input_data + in_offset, c * sizeof(float));
       }
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/reverse_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/reverse_fp32.c
index 45aa7179d6d..7125f13a19b 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/reverse_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/reverse_fp32.c
@@ -20,10 +20,8 @@
 #include "nnacl/nnacl_utils.h"
 
 int Reverse(const float *input, float *output, size_t elem_size, int *index) {
-  for (int i = 0; i < elem_size; i++) {
+  for (size_t i = 0; i < elem_size; i++) {
     NNACL_ASSERT(index[i] >= 0);
-  }
-  for (int i = 0; i < elem_size; i++) {
     output[index[i]] = input[i];
   }
   return NNACL_OK;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/scatter_nd_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/scatter_nd_fp32.c
index 33db0194d73..3ad61bf142a 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/scatter_nd_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/scatter_nd_fp32.c
@@ -23,7 +23,7 @@ int DoScatterND(float *output_ptr, const float *update, int *output_unit_offsets
     return NNACL_ERR;
   }
   for (int i = 0; i < num_units; i++) {
-    (void)memcpy(output_ptr + output_unit_offsets[i], update + unit_size * i, unit_size * sizeof(float));
+    (void)memcpy(output_ptr + output_unit_offsets[i], update + unit_size * i, (size_t)(unit_size) * sizeof(float));
   }
   return NNACL_OK;
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/splice_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/splice_fp32.c
index f0c1ca8c711..a329c448248 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/splice_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/splice_fp32.c
@@ -25,7 +25,7 @@ void SpliceFp32(const float *src_data, int src_row, int src_col, const SplicePar
       forward_index++;
       const float *tmp_src_data = src_data + r_off * src_col;
       float *tmp_dst_data = dst_row_data + off * src_col;
-      memcpy(tmp_dst_data, tmp_src_data, src_col * sizeof(float));
+      memcpy(tmp_dst_data, tmp_src_data, (size_t)(src_col) * sizeof(float));
     }
   }
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/strided_slice_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/strided_slice_fp32.c
index d510cacccd1..1e63955173c 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/strided_slice_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/strided_slice_fp32.c
@@ -70,7 +70,7 @@ int DoStridedSliceIntFp64Bool(const void *in_data, void *out_data, StridedSliceP
   if (param->num_axes_ < DIMENSION_8D) {
     PadStridedSliceParameterTo8D(param);
   }
-  size_t dim_offset[DIMENSION_8D - 1];
+  int dim_offset[DIMENSION_8D - 1];
   dim_offset[6] = in_shape[7];
   dim_offset[5] = in_shape[6] * dim_offset[6];
   dim_offset[4] = in_shape[5] * dim_offset[5];
@@ -132,7 +132,7 @@ int DoStridedSlice(const void *in_data, void *out_data, StridedSliceParameter *p
   if (param->num_axes_ < DIMENSION_8D) {
     PadStridedSliceParameterTo8D(param);
   }
-  size_t dim_offset[DIMENSION_8D - 1];
+  int dim_offset[DIMENSION_8D - 1];
   dim_offset[6] = in_shape[7];
   dim_offset[5] = in_shape[6] * dim_offset[6];
   dim_offset[4] = in_shape[5] * dim_offset[5];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/transpose_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/transpose_fp32.c
index fa73291a318..820f6a8b2ed 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/transpose_fp32.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/transpose_fp32.c
@@ -180,15 +180,15 @@ void TransposeDimsFp32(const float *in_data, float *out_data, const int *output_
   int *strides = (int *)(transpose_param->strides_);
   int *out_strides = (int *)(transpose_param->out_strides_);
   int num_axes = transpose_param->num_axes_;
-  size_t data_size = (*out_strides) * output_shape[0];
-  size_t offset_size = UP_DIV(data_size, thread_num);
-  size_t task_offset = offset_size * task_id;
+  int data_size = (*out_strides) * output_shape[0];
+  int offset_size = UP_DIV(data_size, thread_num);
+  int task_offset = offset_size * task_id;
   int count = data_size - task_offset;
   if (count <= 0) {
     return;
   }
   count = MSMIN(offset_size, count);
-  for (size_t idx = task_offset; idx < task_offset + count; ++idx) {
+  for (int idx = task_offset; idx < task_offset + count; ++idx) {
     int pos = idx;
     int output_idx = 0;
     int input_idx = 0;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_transform.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_transform.c
index afdd1ab3b73..e23023dfa2f 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_transform.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_transform.c
@@ -45,7 +45,7 @@ void WinogradInputTransform(const float *input_data, float *trans_input, float *
     int dst_plane_offset = c * in_channel;
     for (int ic = 0; ic < ic4; ic++) {
       // clear tmp buffer
-      memset(tmp_data, 0, input_unit * input_unit * C4NUM * sizeof(float));
+      memset(tmp_data, 0, input_unit * input_unit * C4NUM * (int)(sizeof(float)));
 
       int real_c = in_channel - ic * C4NUM;
       real_c = real_c > C4NUM ? C4NUM : real_c;
@@ -87,7 +87,7 @@ void WinogradInputTransform(const float *input_data, float *trans_input, float *
       // input transform
       const int tile_num = C12NUM;
       int dst_ic4_offset = dst_plane_offset + ic * C4NUM;
-      size_t dst_step = tile_num * in_channel;
+      int dst_step = tile_num * in_channel;
       float *trans_input_ptr = trans_input + dst_ic4_offset;
       func(tmp_data, trans_input_ptr, C4NUM, dst_step, real_c);
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.c
index 488d413727b..366d1a9cf6a 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.c
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2020-2021 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #include <math.h>
 #include "nnacl/op_base.h"
 #include "nnacl/fp32/arithmetic_fp32.h"
+#include "nnacl/fp32/exp_fp32.h"
 #include "nnacl/fp32_grad/activation_grad.h"
 #include "nnacl/errorcode.h"
 
@@ -110,3 +111,27 @@ int GeluGrad(const float *src0, const float *src1, size_t length, float *dst) {
   }
   return NNACL_OK;
 }
+
+int SoftplusGrad(const float *src0, const float *src1, int length, float *dst) {
+  int i = 0;
+#if defined(ENABLE_AVX)
+  for (; i <= length - C8NUM; i += C8NUM) {
+    simd_exp_avx(-(MS_LD256_F32(src1 + i)), dst + i);
+    MS_ST256_F32(dst + i,
+                 MS_DIV256_F32(MS_LD256_F32(src0 + i), MS_ADD256_F32(MS_MOV256_F32(1.0f), MS_LD256_F32(dst + i))));
+  }
+#endif
+
+#if defined(ENABLE_ARM) || defined(ENABLE_SSE)
+  for (; i <= length - C4NUM; i += C4NUM) {
+    simd_exp(MS_SUBQ_F32(MS_MOVQ_F32(0.0f), MS_LDQ_F32(src1 + i)), dst + i);
+    MS_STQ_F32(dst + i, MS_DIVQ_F32(MS_LDQ_F32(src0 + i), MS_ADDQ_F32(MS_MOVQ_F32(1.0f), MS_LDQ_F32(dst + i))));
+  }
+#endif
+
+  for (; i < length; ++i) {
+    single_exp(-src1[i], dst + i);
+    dst[i] = src0[i] / (1.0f + dst[i]);
+  }
+  return NNACL_OK;
+}
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.h
index e88b27addb5..7f493215fe3 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2020-2021 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -39,6 +39,7 @@ int HSwishGrad(const float *src0, const float *src1, size_t length, float *dst);
 int HSigmoidGrad(const float *src0, const float *src1, size_t length, float *dst);
 int EluGrad(const float *src0, const float *src1, size_t length, float *dst, float alpha);
 int GeluGrad(const float *src0, const float *src1, size_t length, float *dst);
+int SoftplusGrad(const float *src, const float *src1, int length, float *dst);
 
 #ifdef __cplusplus
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/gemm.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/gemm.c
index 3523c1476c6..8df87bc4bdb 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/gemm.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/gemm.c
@@ -231,7 +231,7 @@ static void RowMajor2Col12MajorStride(const float *src_ptr, float *dst_ptr, size
     for (; ci < col; ci++) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C12NUM;
-      for (size_t i = 0; i < C12NUM; i++) {
+      for (int i = 0; i < C12NUM; i++) {
         dst_c[i] = src_c[i * lead];
       }
     }
@@ -240,7 +240,7 @@ static void RowMajor2Col12MajorStride(const float *src_ptr, float *dst_ptr, size
   }
 
   for (; ri < row; ri++) {
-    for (size_t i = 0; i < col; i++) {
+    for (int i = 0; i < col; i++) {
       dst_r[i * C12NUM] = src_r[i];
     }
     src_r += lead;
@@ -248,12 +248,11 @@ static void RowMajor2Col12MajorStride(const float *src_ptr, float *dst_ptr, size
   }
 
   for (; ri < row_up_12; ri++) {
-    for (size_t i = 0; i < col; i++) {
+    for (int i = 0; i < col; i++) {
       dst_r[i * C12NUM] = 0;
     }
     dst_r += 1;
   }
-  return;
 }
 #endif
 
@@ -261,10 +260,10 @@ static void RowMajor2Col8MajorStride(const float *src_ptr, float *dst_ptr, size_
   size_t row8 = row / C8NUM * C8NUM;
 #ifdef ENABLE_ARM64
   size_t col_skip = col / C8NUM * C8NUM;
-  int skip_size = C8NUM;
+  size_t skip_size = C8NUM;
 #else
   size_t col_skip = col / C4NUM * C4NUM;
-  int skip_size = C4NUM;
+  size_t skip_size = C4NUM;
 #endif
   const float *src_r = src_ptr;
   float *dst_r = dst_ptr;
@@ -450,7 +449,7 @@ static void RowMajor2Col8MajorStride(const float *src_ptr, float *dst_ptr, size_
     for (; ci < col; ci++) {
       const float *src_c = src_r + ci;
       float *dst_c = dst_r + ci * C8NUM;
-      for (size_t i = 0; i < C8NUM; i++) {
+      for (int i = 0; i < C8NUM; i++) {
         dst_c[i] = src_c[i * lead];
       }
     }
@@ -458,7 +457,7 @@ static void RowMajor2Col8MajorStride(const float *src_ptr, float *dst_ptr, size_
     dst_r += C8NUM * col;
   }
   for (; ri < row; ri++) {
-    for (size_t i = 0; i < col; i++) {
+    for (int i = 0; i < col; i++) {
       dst_r[i * C8NUM] = src_r[i];
     }
     src_r += lead;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/reduce_grad.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/reduce_grad.c
index d71b8356972..f7bb275591c 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/reduce_grad.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/reduce_grad.c
@@ -64,11 +64,11 @@ void ReduceSumByAxes(const float *input, const int *input_dims, float *output, c
     if (output_dims[idx] != input_dims[idx]) same_shape = 0;
   }
   if (same_shape) {
-    memcpy(output, input, num_outputs * sizeof(float));
+    memcpy(output, input, (size_t)(num_outputs) * sizeof(float));
     return;
   }
 
-  memset(output, 0, num_outputs * sizeof(float));  // zero output
+  memset(output, 0, (size_t)(num_outputs) * sizeof(float));  // zero output
 
   int input_iter[8] = {0};
   int axes[5] = {0};
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.c
index 1e5ac7ccc76..d6991dbb071 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.c
@@ -37,13 +37,13 @@ void SoftmaxGrad(const float *input_ptr, const float *yt_ptr, float *output_ptr,
   for (int i = 0; i < inner_size * input_shape[axis]; i++) sum_mul[i] = 1.0;
   for (int i = 0; i < n_dim; i++) dim *= input_shape[i];
   dim /= outter_size;
-  memcpy(output_ptr, yt_ptr, ele_size * sizeof(float));
+  memcpy(output_ptr, yt_ptr, (size_t)(ele_size) * sizeof(float));
 
   const int M = input_shape[axis];
   const int N = inner_size;
   for (int i = 0; i < outter_size; i++) {
     int outter_offset = i * dim;
-    memset(sum_data, 0.0f, inner_size * sizeof(float));
+    memset(sum_data, 0, (size_t)(inner_size) * sizeof(float));
     for (int k = 0; k < inner_size; k++) {
       int inner_offset = outter_offset + k;
       for (int j = 0; j < input_shape[axis]; j++) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/strided_slice_grad.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/strided_slice_grad.c
index b7f1f94b3b8..e3ed62cb9e0 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/strided_slice_grad.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/strided_slice_grad.c
@@ -20,7 +20,7 @@
 static size_t CalcIndex(const int *shape, size_t size, int i, size_t pos) {
   size_t res = 1;
   for (size_t j = 0; j < size; j++) {
-    res *= shape[(i + 1) + j];
+    res *= shape[((size_t)(i) + 1) + j];
   }
   return (pos / res % shape[i]);
 }
@@ -37,7 +37,7 @@ int DoStridedSliceGrad(const float *inputs, float *output, const int *dx_shape,
   const int *s = param->strides_;
   const int *b = param->begins_;
   for (int i = 0; i < DIMENSION_8D; i++) {
-    size *= param->in_shape_[i];
+    size *= (size_t)(param->in_shape_[i]);
   }
 
   for (size_t pos = 0; pos < size; pos++) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/addn_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/addn_infer.c
index b92fe1fd2e7..b0609b97abf 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/addn_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/addn_infer.c
@@ -56,13 +56,13 @@ int AddnInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **o
   for (size_t d = 0; d < inputs[max_dims_idx]->shape_size_; ++d) {
     size_t max_dim = 0;
     for (size_t i = 0; i < inputs_size; ++i) {
-      size_t shift = max_dims - inputs[i]->shape_size_;
-      size_t dim = (i < shift) ? 1 : inputs[i]->shape_[d];
+      size_t shift = max_dims - (size_t)(inputs[i]->shape_size_);
+      size_t dim = (i < shift) ? 1 : (size_t)(inputs[i]->shape_[d]);
       if (dim > max_dim) {
         max_dim = dim;
       }
     }
-    output->shape_[d] = max_dim;  // set the biggest dimension in the output tensor
+    output->shape_[d] = (int)(max_dim);  // set the biggest dimension in the output tensor
   }
 
   return NNACL_OK;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/affine_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/affine_infer.c
index 07ad84871ae..1513b841778 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/affine_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/affine_infer.c
@@ -17,8 +17,8 @@
 #include "nnacl/infer/affine_infer.h"
 #include "nnacl/infer/infer_register.h"
 
-int MatmulInfer(AffineParameter *param, int a_shape[MAX_SHAPE_SIZE], size_t a_shape_size, int b_shape[MAX_SHAPE_SIZE],
-                size_t b_shape_size) {
+int MatmulInfer(const AffineParameter *param, int a_shape[MAX_SHAPE_SIZE], size_t a_shape_size,
+                int b_shape[MAX_SHAPE_SIZE], size_t b_shape_size) {
   MatMulParameter *matmul_param = param->matmul_parameter_;
   if (matmul_param->a_transpose_) {
     if (a_shape_size < 2) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/argmin_max_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/argmin_max_infer.c
index 44cae261f29..3608e762e1d 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/argmin_max_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/argmin_max_infer.c
@@ -56,8 +56,8 @@ int ArgMinMaxInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor
   int output_shape[MAX_SHAPE_SIZE] = {0};
   size_t output_shape_size = 0;
   ShapeSet(output_shape, &output_shape_size, input->shape_, input->shape_size_);
-  size_t input_shape_size = input->shape_size_;
-  int axis = param->axis_ < 0 ? param->axis_ + (int)input_shape_size : param->axis_;
+  int input_shape_size = (int)input->shape_size_;
+  int axis = param->axis_ < 0 ? param->axis_ + input_shape_size : param->axis_;
   if (axis >= input_shape_size || axis < 0) {
     return NNACL_PARAM_INVALID;
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/arithmetic_grad_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/arithmetic_grad_infer.c
index 9971a6c2cd6..83987ccfe2f 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/arithmetic_grad_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/arithmetic_grad_infer.c
@@ -55,10 +55,10 @@ int ArithmeticGradInferShape(const TensorC *const *inputs, size_t inputs_size, T
 
   if (GetElementNum(dx1) < GetElementNum(dx2)) {
     param->ndim_ = in_shape1_size;
-    param->in_elements_num0_ = param->ndim_;
-    param->in_elements_num1_ = param->ndim_;
-    param->out_elements_num_ = param->ndim_;
-    int fill_dim_num = in_shape1_size - in_shape0_size;  // This will not work for batch!
+    param->in_elements_num0_ = (int)param->ndim_;
+    param->in_elements_num1_ = (int)param->ndim_;
+    param->out_elements_num_ = (int)param->ndim_;
+    size_t fill_dim_num = in_shape1_size - in_shape0_size;  // This will not work for batch!
     int j = 0;
     for (unsigned int i = 0; i < in_shape1_size; i++) {
       if (i < fill_dim_num) {
@@ -76,7 +76,7 @@ int ArithmeticGradInferShape(const TensorC *const *inputs, size_t inputs_size, T
     param->out_elements_num_ = param->ndim_;
     param->broadcasting_ = true;
     int j = 0;
-    int fill_dim_num = in_shape0_size - in_shape1_size;
+    size_t fill_dim_num = in_shape0_size - in_shape1_size;
     for (unsigned int i = 0; i < in_shape0_size; i++) {
       if (i < fill_dim_num) {
         param->in_shape1_[i] = 1;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/audio_spectrogram_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/audio_spectrogram_infer.c
index af020b85d57..959a4af64d6 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/audio_spectrogram_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/audio_spectrogram_infer.c
@@ -66,7 +66,7 @@ int AudioSpectrogramInferShape(const TensorC *const *inputs, size_t inputs_size,
   int sample_sub_window = input->shape_[0] - param->window_size_;
   output_shape[1] = sample_sub_window < 0 ? 0 : 1 + sample_sub_window / param->stride_;
   // compute fft length
-  int fft_length = GetFftLength(param->window_size_);
+  int fft_length = (int)GetFftLength(param->window_size_);
   output_shape[2] = fft_length / 2 + 1;
   SetShapeArray(output, output_shape, 3);
   return NNACL_OK;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/bias_grad_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/bias_grad_infer.c
index fb3f72300d0..5a78919b6ff 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/bias_grad_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/bias_grad_infer.c
@@ -33,8 +33,8 @@ int BiasGradInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
   int inshape[MAX_SHAPE_SIZE];
   size_t inshape_size = 0;
   ShapeSet(inshape, &inshape_size, in0->shape_, in0->shape_size_);
-  int ndim = inshape_size;
-  for (int i = 0; i < ndim - 1; i++) {
+  size_t ndim = inshape_size;
+  for (size_t i = 0; i < ndim - 1; i++) {
     inshape[i] = 1;
   }
   SetDataTypeFormat(out, in0);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/broadcast_to_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/broadcast_to_infer.c
index 5caedb299e8..31e36427ad0 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/broadcast_to_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/broadcast_to_infer.c
@@ -111,12 +111,12 @@ int BroadcastToInferShape(const TensorC *const *inputs, size_t inputs_size, Tens
   const int *input_shape = input->shape_;
   size_t input_shape_size = input->shape_size_;
   int shape[MAX_SHAPE_SIZE];
-  int input_shape_index = input_shape_size - 1;
+  int input_shape_index = (int)(input_shape_size)-1;
   if (input_shape_size > dst_shape_size) {
     return NNACL_ERR;
   }
 
-  for (int i = dst_shape_size - 1; i >= 0; --i) {
+  for (int i = (int)(dst_shape_size)-1; i >= 0; --i) {
     if (dst_shape[i] < 0) {
       return NNACL_ERR;
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.c
index 497287eaa94..30c75d62d1e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.c
@@ -18,6 +18,7 @@
 #include <string.h>
 #include "nnacl/infer/infer_register.h"
 
+#ifdef ENABLE_CONTROL_TENSORLIST
 int MallocTensorListData(TensorListC *tensor_list, TypeIdC dtype, const vvector *tensor_shape) {
   // This function will create a new tensors_
   // Your must to set shape(param2: tensor_shape) and data_type_(tensors_data_type_ = param1: dtype) of each tensor in
@@ -35,7 +36,7 @@ int MallocTensorListData(TensorListC *tensor_list, TypeIdC dtype, const vvector
     return NNACL_NULL_PTR;
   }
   memset(tensor_list->tensors_, 0, tensor_list->element_num_ * sizeof(TensorC));
-  for (int i = 0; i < tensor_list->element_num_; ++i) {
+  for (size_t i = 0; i < tensor_list->element_num_; ++i) {
     tensor_list->tensors_[i].format_ = Format_NHWC;
     tensor_list->tensors_[i].data_type_ = dtype;
     ShapeSet(tensor_list->tensors_[i].shape_, &(tensor_list->tensors_[i].shape_size_), tensor_shape->shape_[i],
@@ -69,6 +70,7 @@ bool TensorListIsFullyDefined(const int *shape, size_t shape_size) {
   }
   return true;
 }
+#endif
 
 int CheckAugmentNull(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
                      const OpParameter *parameter) {
@@ -157,7 +159,7 @@ void SetShapeTensor(TensorC *dst, const TensorC *src) {
 }
 
 void SetShapeArray(TensorC *dst, const int *src, size_t src_size) {
-  for (size_t i = 0; i < src_size; i++) {
+  for (size_t i = 0; i < src_size && i < MAX_SHAPE_SIZE; i++) {
     dst->shape_[i] = src[i];
   }
   dst->shape_size_ = src_size;
@@ -286,13 +288,17 @@ int GetDimensionSize(const TensorC *tensor, const size_t index) {
 }
 
 void ShapeSet(int *dst_shape, size_t *dst_shape_size, const int *src_shape, size_t src_shape_size) {
-  for (size_t i = 0; i < src_shape_size; i++) {
+  size_t i = 0;
+  for (; i < src_shape_size && i < MAX_SHAPE_SIZE; i++) {
     dst_shape[i] = src_shape[i];
   }
-  *dst_shape_size = src_shape_size;
+  *dst_shape_size = i;
 }
 
 void ShapePush(int *shape, size_t *shape_size, int value) {
+  if (*shape_size >= MAX_SHAPE_SIZE) {
+    return;
+  }
   shape[*shape_size] = value;
   *shape_size = *shape_size + 1;
 }
@@ -301,6 +307,9 @@ int ShapeInsert(int *shape, size_t *shape_size, int index, int value) {
   if (index < 0 || index > *shape_size) {
     return NNACL_ERR;
   }
+  if (*shape_size >= MAX_SHAPE_SIZE) {
+    return NNACL_ERR;
+  }
   for (int i = *shape_size; i > index; i--) {
     shape[i] = shape[i - 1];
   }
@@ -325,7 +334,7 @@ bool ShapeEqual(const int *shape0, size_t shape0_size, const int *shape1, size_t
   if (shape0_size != shape1_size) {
     return false;
   }
-  for (int i = 0; i < shape0_size; i++) {
+  for (size_t i = 0; i < shape0_size; i++) {
     if (shape0[i] != shape1[i]) {
       return false;
     }
@@ -401,96 +410,6 @@ int FftInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **ou
   return NNACL_OK;
 }
 
-int VectorCInit(VectorC *vc, size_t per_malloc_size) {
-  if (per_malloc_size == 0) {
-    return NNACL_ERR;
-  }
-  vc->data_ = (int *)malloc(per_malloc_size * sizeof(int));
-  if (vc->data_ == NULL) {
-    return NNACL_ERR;
-  }
-  vc->size_ = 0;
-  vc->max_size_ = per_malloc_size;
-  vc->per_malloc_size_ = per_malloc_size;
-  return NNACL_OK;
-}
-
-int VectorCSet(VectorC *vc, const int *src_shape, size_t src_shape_size) {
-  if (src_shape_size == 0) {
-    vc->size_ = 0;
-  } else {
-    free(vc->data_);
-    if (vc->per_malloc_size_ == 0) {
-      return NNACL_ERR;
-    }
-    vc->max_size_ = (src_shape_size / vc->per_malloc_size_ + 1) * vc->per_malloc_size_;
-    vc->data_ = (int *)malloc(sizeof(int) * vc->max_size_);
-    if (vc->data_ == NULL) {
-      return NNACL_ERR;
-    }
-    for (size_t i = 0; i < src_shape_size; i++) {
-      vc->data_[i] = src_shape[i];
-    }
-    vc->size_ = src_shape_size;
-  }
-  return NNACL_OK;
-}
-
-int VectorCPush(VectorC *vc, int value) {
-  if (vc->size_ + 1 > vc->max_size_) {
-    int *tmp = (int *)malloc(vc->per_malloc_size_ * sizeof(int) + vc->max_size_ * sizeof(int));
-    if (tmp == NULL) {
-      return NNACL_ERR;
-    }
-    memcpy(tmp, vc->data_, vc->size_ * sizeof(int));
-    free(vc->data_);
-    vc->data_ = tmp;
-    vc->max_size_ = vc->max_size_ + vc->per_malloc_size_;
-  }
-  vc->data_[vc->size_] = value;
-  vc->size_++;
-  return NNACL_OK;
-}
-
-int VectorCInsert(VectorC *vc, int index, int value) {
-  if (vc->size_ + 1 > vc->max_size_) {
-    int *tmp = (int *)malloc(vc->per_malloc_size_ * sizeof(int) + vc->max_size_ * sizeof(int));
-    if (tmp == NULL) {
-      return NNACL_ERR;
-    }
-    memcpy(tmp, vc->data_, vc->size_ * sizeof(int));
-    free(vc->data_);
-    vc->data_ = tmp;
-    vc->max_size_ = vc->max_size_ + vc->per_malloc_size_;
-  }
-  memmove(vc->data_ + index + 1, vc->data_ + index, (vc->size_ - index) * sizeof(int));
-  vc->data_[index] = value;
-  vc->size_++;
-  return NNACL_OK;
-}
-
-void VectorCErase(VectorC *vc, int index) {
-  memmove(vc->data_ + index, vc->data_ + index + 1, (vc->size_ - index - 1) * sizeof(int));
-  vc->size_--;
-}
-
-bool VectorCEqual(const VectorC *vc1, const VectorC *vc2) {
-  if (vc1->size_ != vc2->size_) {
-    return false;
-  }
-  for (size_t i = 0; i < vc1->size_; i++) {
-    if (vc1->data_[i] != vc2->data_[i]) {
-      return false;
-    }
-  }
-  return true;
-}
-
-void VectorCFree(VectorC *vc) {
-  free(vc->data_);
-  vc->data_ = NULL;
-}
-
 bool InferFlag(const TensorC *const *inputs, size_t inputs_size) {
   if (inputs == NULL) {
     return false;
@@ -499,18 +418,22 @@ bool InferFlag(const TensorC *const *inputs, size_t inputs_size) {
     if (inputs[i] == NULL) {
       return false;
     }
+#ifdef ENABLE_CONTROL_TENSORLIST
     if (inputs[i]->data_type_ == kObjectTypeTensorType) {
       TensorListC *input_tensor_list = (TensorListC *)inputs[i];
       if (input_tensor_list->shape_value_ == -1) {
         return false;
       }
     } else {
+#endif
       for (size_t j = 0; j < inputs[i]->shape_size_; ++j) {
         if (inputs[i]->shape_[j] == -1) {
           return false;
         }
       }
+#ifdef ENABLE_CONTROL_TENSORLIST
     }
+#endif
   }
   return true;
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.h
index 8e5a867cfd0..63e95a1203e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.h
@@ -138,6 +138,7 @@ typedef struct vvector {
   size_t size_;      // number of shapes
 } vvector;
 
+#ifdef ENABLE_CONTROL_TENSORLIST
 typedef struct TensorListC {
   bool is_ready_;
   int data_type_;
@@ -150,6 +151,7 @@ typedef struct TensorListC {
   size_t element_shape_size_;
   TensorC *tensors_;
 } TensorListC;
+#endif
 
 typedef struct VectorC {
   int *data_;
@@ -158,9 +160,11 @@ typedef struct VectorC {
   size_t per_malloc_size_;
 } VectorC;
 
+#ifdef ENABLE_CONTROL_TENSORLIST
 int MallocTensorListData(TensorListC *tensor_list, TypeIdC dtype, const vvector *tensor_shape);
 int TensorListMergeShape(int *element_shape, size_t *element_shape_size, const int *tmp, size_t tmp_size);
 bool TensorListIsFullyDefined(const int *shape, size_t shape_size);
+#endif
 
 int GetBatch(const TensorC *tensor);
 int GetHeight(const TensorC *tensor);
@@ -202,13 +206,6 @@ int CommonInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
 int FftInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
                   const OpParameter *parameter);
 
-int VectorCInit(VectorC *vc, size_t per_malloc_size);
-int VectorCSet(VectorC *vc, const int *src_shape, size_t src_shape_size);
-int VectorCPush(VectorC *vc, int value);
-int VectorCInsert(VectorC *vc, int index, int value);
-void VectorCErase(VectorC *vc, int index);
-bool VectorCEqual(const VectorC *vc1, const VectorC *vc2);
-void VectorCFree(VectorC *vc);
 bool InferFlag(const TensorC *const *inputs, size_t inputs_size);
 
 #ifdef __cplusplus
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/concat_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/concat_infer.c
index 638e4a1a5fd..92692403fcc 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/concat_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/concat_infer.c
@@ -54,8 +54,13 @@ int ConcatInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
   }
   int output_axis_dim = input0_shape[axis];
   for (size_t i = 1; i < inputs_size; ++i) {
-    if (inputs[i]->shape_size_ != input0_shape_size) {
-      return NNACL_PARAM_INVALID;
+    size_t input_i_shape_size = inputs[i]->shape_size_;
+    if (input_i_shape_size != input0_shape_size) {
+      if (input_i_shape_size != 0) {
+        return NNACL_PARAM_INVALID;
+      } else {
+        continue;
+      }
     }
     int shape_tmp[MAX_SHAPE_SIZE] = {0};
     size_t shape_tmp_size = 0;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/constant_of_shape_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/constant_of_shape_infer.c
index 967eb87c451..258fc03ffa7 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/constant_of_shape_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/constant_of_shape_infer.c
@@ -37,7 +37,7 @@ int ConstantOfShapeInferShape(const TensorC *const *inputs, size_t inputs_size,
     return NNACL_ERR;
   }
   int out_shape[MAX_SHAPE_SIZE];
-  size_t out_shape_size = size;
+  int out_shape_size = size;
   switch (in_tensor->data_type_) {
     case kNumberTypeInt32: {
       int32_t *in_data = (int32_t *)(in_tensor->data_);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_filter_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_filter_infer.c
index fd40ccab871..c02ba325a62 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_filter_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_filter_infer.c
@@ -34,7 +34,10 @@ int Conv2dGradFilterInferShape(const TensorC *const *inputs, size_t inputs_size,
   if (inputs[2]->shape_size_ < 1 || inputs[2]->data_ == NULL) {
     return NNACL_ERR;
   }
-  size_t filter_shape_size = inputs[2]->shape_[0];
+  if (inputs[2]->shape_[0] < 0) {
+    return NNACL_ERR;
+  }
+  size_t filter_shape_size = (size_t)(inputs[2]->shape_[0]);
   if (filter_shape_size != 4) {
     return NNACL_ERR;
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_input_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_input_infer.c
index f6f5ec00109..60609c6f0e4 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_input_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_input_infer.c
@@ -40,16 +40,16 @@ int Conv2dGradInputInferShape(const TensorC *const *inputs, size_t inputs_size,
   if (inputs[2]->shape_size_ < 1 || inputs[2]->data_ == NULL) {
     return NNACL_ERR;
   }
-  size_t shape_size = inputs[2]->shape_[0];
-  if (shape_size != 4) {
+  size_t data_size = (size_t)inputs[2]->shape_[0];
+  if (data_size != 4) {
     return NNACL_ERR;
   }
   int shape[MAX_SHAPE_SIZE];
   const int nchw2nhwc[4] = {0, 2, 3, 1};
-  for (int i = 0; i < shape_size; i++) {
+  for (size_t i = 0; i < data_size; i++) {
     shape[i] = *((int *)(inputs[2]->data_) + nchw2nhwc[i]);
   }
-  SetShapeArray(out, shape, shape_size);
+  SetShapeArray(out, shape, data_size);
 
   return NNACL_OK;
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_infer.c
index 79678b7176c..4193630893e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_infer.c
@@ -89,6 +89,8 @@ int Conv2dInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
   if (param->stride_h_ == 0 || param->stride_w_ == 0) {
     return NNACL_PARAM_INVALID;
   }
+  param->kernel_h_ = param->kernel_h_ != -1 ? param->kernel_h_ : weight_tensor->shape_[1];
+  param->kernel_w_ = param->kernel_w_ != -1 ? param->kernel_w_ : weight_tensor->shape_[2];
   ConvInferShape(input_h, input_w, &output_h, &output_w, param);
 
   int out_shape[MAX_SHAPE_SIZE];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/deconv2d_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/deconv2d_infer.c
index f030c7ce9b4..9c7d7a2fbf4 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/deconv2d_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/deconv2d_infer.c
@@ -51,8 +51,8 @@ int Deconv2dInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
     output_c = GetBatch(weight); /* depthwise */
   }
 
-  int kernel_w = param->kernel_w_;
-  int kernel_h = param->kernel_h_;
+  int kernel_w = param->kernel_w_ != -1 ? param->kernel_w_ : GetWidth(weight);
+  int kernel_h = param->kernel_h_ != -1 ? param->kernel_h_ : GetHeight(weight);
   int stride_w = param->stride_w_;
   int stride_h = param->stride_h_;
   int dilate_w = param->dilation_w_;
@@ -97,6 +97,8 @@ int Deconv2dInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
   param->output_h_ = output_h;
   param->output_w_ = output_w;
   param->output_channel_ = output_c;
+  param->kernel_h_ = kernel_h;
+  param->kernel_w_ = kernel_w;
   return NNACL_OK;
 }
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/dedepthwise_conv2d_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/dedepthwise_conv2d_infer.c
index dff2324da77..f480f23bbc6 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/dedepthwise_conv2d_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/dedepthwise_conv2d_infer.c
@@ -38,6 +38,8 @@ int DeDepthwiseConv2DInferShape(const TensorC *const *inputs, size_t inputs_size
   if (param->stride_h_ == 0 || param->stride_w_ == 0) {
     return NNACL_PARAM_INVALID;
   }
+  param->kernel_h_ = param->kernel_h_ != -1 ? param->kernel_h_ : GetHeight(inputs[kWeightIndex]);
+  param->kernel_w_ = param->kernel_w_ != -1 ? param->kernel_w_ : GetWidth(inputs[kWeightIndex]);
   output_h = param->stride_h_ * (input_h - 1) + param->kernel_h_ - param->pad_u_ - param->pad_d_;
   output_w = param->stride_w_ * (input_w - 1) + param->kernel_w_ - param->pad_l_ - param->pad_r_;
   if ((output_h + param->pad_u_ + param->pad_d_ - param->kernel_h_) % param->stride_h_ != 0) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/depthwise_conv2d_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/depthwise_conv2d_infer.c
index 6c79ffe945c..ba809ee8f38 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/depthwise_conv2d_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/depthwise_conv2d_infer.c
@@ -43,6 +43,8 @@ int DepthwiseConv2dInferShape(const TensorC *const *inputs, size_t inputs_size,
   if (param->stride_h_ == 0 || param->stride_w_ == 0) {
     return NNACL_PARAM_INVALID;
   }
+  param->kernel_h_ = param->kernel_h_ != -1 ? param->kernel_h_ : GetHeight(inputs[kWeightIndex]);
+  param->kernel_w_ = param->kernel_w_ != -1 ? param->kernel_w_ : GetWidth(inputs[kWeightIndex]);
   if (param->pad_mode_ == Pad_same) {
     output_h = ceil((float)(input_h) / (float)(param->stride_h_));
     output_w = ceil((float)(input_w) / (float)(param->stride_w_));
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/embedding_lookup_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/embedding_lookup_infer.c
index bcaecf4c583..110612f0fba 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/embedding_lookup_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/embedding_lookup_infer.c
@@ -49,6 +49,9 @@ int EmbeddingLookupInferShape(const TensorC *const *inputs, size_t inputs_size,
   size_t output_shape_size = 0;
   ShapeSet(output_shape, &output_shape_size, ids->shape_, ids->shape_size_);
   for (size_t i = 0; i < embedding_shape_size; ++i) {
+    if (output_shape_size >= MAX_SHAPE_SIZE) {
+      return NNACL_ERR;
+    }
     ShapePush(output_shape, &output_shape_size, embedding_shape[i]);
   }
   for (size_t i = 1; i < inputs_size - 1; ++i) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/expand_dims_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/expand_dims_infer.c
index 39ed749343b..e80e648a65e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/expand_dims_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/expand_dims_infer.c
@@ -34,9 +34,12 @@ int ExpandDimsInferShape(const TensorC *const *inputs, size_t inputs_size, Tenso
   if (inputs[1]->data_ == NULL) {
     return NNACL_INPUT_TENSOR_ERROR;
   }
+  if (GetElementNum(inputs[1]) < 1) {
+    return NNACL_ERR;
+  }
   int dim = ((int32_t *)(inputs[1]->data_))[0];
   if (dim < 0) {
-    dim += input->shape_size_ + 1;
+    dim += (int)(input->shape_size_) + 1;
   }
   if (dim > (int)(input->shape_size_)) {
     return NNACL_INPUT_TENSOR_ERROR;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/fill_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/fill_infer.c
index bd889bf1897..583b281e0c0 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/fill_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/fill_infer.c
@@ -29,7 +29,7 @@ int FillInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **o
   SetDataTypeFormat(output, input);
   const TensorC *dst_shape_tensor = inputs[1];
   const int32_t *dst_shape = (int32_t *)(dst_shape_tensor->data_);
-  size_t num_dims = 1;
+  int num_dims = 1;
   for (size_t i = 0; i < dst_shape_tensor->shape_size_; ++i) {
     num_dims *= dst_shape_tensor->shape_[i];
   }
@@ -44,7 +44,7 @@ int FillInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **o
   }
   int output_shape[MAX_SHAPE_SIZE] = {0};
   size_t output_shape_size = 0;
-  for (size_t i = 0; i < num_dims; i++) {
+  for (int i = 0; i < num_dims; i++) {
     ShapePush(output_shape, &output_shape_size, dst_shape[i]);
   }
   SetShapeArray(output, output_shape, output_shape_size);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/full_connection_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/full_connection_infer.c
index eccdd195b3a..7e3d7f66a16 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/full_connection_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/full_connection_infer.c
@@ -40,7 +40,7 @@ int FullConnectionInferShape(const TensorC *const *inputs, size_t inputs_size, T
   }
   int new_k = 1;
   if (param->use_axis_) {
-    for (size_t i = param->axis_; i < input0->shape_size_; ++i) {
+    for (size_t i = (size_t)(param->axis_); i < input0->shape_size_; ++i) {
       new_k *= input0->shape_[i];
     }
     if (new_k != input1->shape_[1]) {
@@ -61,7 +61,7 @@ int FullConnectionInferShape(const TensorC *const *inputs, size_t inputs_size, T
   size_t out_shape_size = 0;
   ShapeSet(out_shape, &out_shape_size, inputs[0]->shape_, inputs[0]->shape_size_);
   if (param->use_axis_) {
-    out_shape_size = param->axis_ + 1;
+    out_shape_size = (size_t)(param->axis_) + 1;
     out_shape[param->axis_] = input1->shape_[0];
   } else {
     int total = 1;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_infer.c
index b8ca877d4c2..66f1b2f6061 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_infer.c
@@ -43,6 +43,9 @@ int GatherInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
   if (inputs[2]->data_ == NULL) {
     return NNACL_NULL_PTR;
   }
+  if (GetElementNum(inputs[2]) < 1) {
+    return NNACL_ERR;
+  }
   int axis = *((int *)inputs[2]->data_);
   if (axis < 0) {
     axis += input->shape_size_;
@@ -50,12 +53,11 @@ int GatherInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
   int indices_shape[MAX_SHAPE_SIZE];
   size_t indices_shape_size = 0;
   ShapeSet(indices_shape, &indices_shape_size, indices->shape_, indices->shape_size_);
-  int indices_rank = indices_shape_size;
+  size_t indices_rank = indices_shape_size;
   int in_shape[MAX_SHAPE_SIZE] = {0};
   size_t in_shape_size = 0;
   ShapeSet(in_shape, &in_shape_size, input->shape_, input->shape_size_);
-  int in_rank = in_shape_size;
-  if (in_rank < axis + 1) {
+  if ((size_t)(in_shape_size) < axis + 1) {
     return NNACL_ERR;
   }
   int out_shape[MAX_SHAPE_SIZE] = {0};
@@ -65,7 +67,7 @@ int GatherInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
   if (erase_ret != NNACL_OK) {
     return NNACL_ERR;
   }
-  for (int i = indices_rank - 1; i >= 0; --i) {
+  for (int i = (int)(indices_rank - 1); i >= 0; --i) {
     ret = ShapeInsert(out_shape, &out_shape_size, axis, indices_shape[i]);
     if (ret != NNACL_OK) {
       return NNACL_ERR;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_nd_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_nd_infer.c
index 3511190718e..37c9fb88e27 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_nd_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_nd_infer.c
@@ -35,8 +35,8 @@ int GatherNdInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
   if (input->shape_size_ > MAX_SHAPE_SIZE || indices->shape_size_ > MAX_SHAPE_SIZE) {
     return NNACL_INPUT_TENSOR_ERROR;
   }
-  int in_rank = input->shape_size_;
-  int indices_rank = indices->shape_size_;
+  int in_rank = (int)(input->shape_size_);
+  int indices_rank = (int)(indices->shape_size_);
   if (indices->shape_[indices_rank - 1] > in_rank) {
     return NNACL_OK;
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/group_conv2d_grad_input_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/group_conv2d_grad_input_infer.c
index 64ac57b30a1..de5bf3faaf6 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/group_conv2d_grad_input_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/group_conv2d_grad_input_infer.c
@@ -31,15 +31,15 @@ int GroupConv2dGradInputInferShape(const TensorC *const *inputs, size_t inputs_s
 
   SetDataTypeFormat(out, in0);
 
-  size_t shape_size_ = in0->shape_size_;
-  if (shape_size_ > MAX_SHAPE_SIZE) {
+  size_t shape_size = in0->shape_size_;
+  if (shape_size > MAX_SHAPE_SIZE) {
     return NNACL_INPUT_TENSOR_ERROR;
   }
   int shape_[MAX_SHAPE_SIZE];
-  for (int i = 0; i < shape_size_; i++) {
+  for (size_t i = 0; i < shape_size; i++) {
     shape_[i] = in0->shape_[i];
   }
-  SetShapeArray(out, shape_, shape_size_);
+  SetShapeArray(out, shape_, shape_size);
 
   return NNACL_OK;
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.c
index 3943ca45903..84c5ffe545f 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.c
@@ -43,9 +43,9 @@
 #include "nnacl/infer/crop_and_resize_infer.h"
 #include "nnacl/infer/crop_infer.h"
 #include "nnacl/infer/cumsum_infer.h"
-#include "nnacl/infer/custom_extract_features_infer.h"
-#include "nnacl/infer/custom_normalize_infer.h"
-#include "nnacl/infer/custom_predict_infer.h"
+#include "nnacl/infer/string/custom_extract_features_infer.h"
+#include "nnacl/infer/string/custom_normalize_infer.h"
+#include "nnacl/infer/string/custom_predict_infer.h"
 #include "nnacl/infer/deconv2d_infer.h"
 #include "nnacl/infer/dedepthwise_conv2d_infer.h"
 #include "nnacl/infer/depth_to_space_infer.h"
@@ -66,18 +66,18 @@
 #include "nnacl/infer/gather_nd_infer.h"
 #include "nnacl/infer/group_conv2d_grad_input_infer.h"
 #include "nnacl/infer/gru_infer.h"
-#include "nnacl/infer/hashtable_lookup_infer.h"
+#include "nnacl/infer/string/hashtable_lookup_infer.h"
 #include "nnacl/infer/invert_permutation_infer.h"
 #include "nnacl/infer/layer_norm_grad_infer.h"
 #include "nnacl/infer/layer_norm_infer.h"
 #include "nnacl/infer/lin_space_infer.h"
 #include "nnacl/infer/log_softmax_infer.h"
-#include "nnacl/infer/lsh_projection_infer.h"
+#include "nnacl/infer/string/lsh_projection_infer.h"
 #include "nnacl/infer/lstm_infer.h"
 #include "nnacl/infer/matmul_infer.h"
 #include "nnacl/infer/max_min_grad_infer.h"
 #include "nnacl/infer/mean_infer.h"
-#include "nnacl/infer/merge_infer.h"
+#include "nnacl/infer/control/merge_infer.h"
 #include "nnacl/infer/mfcc_infer.h"
 #include "nnacl/infer/non_max_suppression_infer.h"
 #include "nnacl/infer/one_hot_infer.h"
@@ -102,7 +102,7 @@
 #include "nnacl/infer/sgd_infer.h"
 #include "nnacl/infer/shape_infer.h"
 #include "nnacl/infer/size_infer.h"
-#include "nnacl/infer/skip_gram_infer.h"
+#include "nnacl/infer/string/skip_gram_infer.h"
 #include "nnacl/infer/slice_infer.h"
 #include "nnacl/infer/softmax_cross_entropy_infer.h"
 #include "nnacl/infer/softmax_infer.h"
@@ -117,12 +117,12 @@
 #include "nnacl/infer/stack_infer.h"
 #include "nnacl/infer/strided_slice_grad_infer.h"
 #include "nnacl/infer/strided_slice_infer.h"
-#include "nnacl/infer/switch_infer.h"
-#include "nnacl/infer/tensorlist_fromtensor_infer.h"
-#include "nnacl/infer/tensorlist_getitem_infer.h"
-#include "nnacl/infer/tensorlist_reserve_infer.h"
-#include "nnacl/infer/tensorlist_setitem_infer.h"
-#include "nnacl/infer/tensorlist_stack_infer.h"
+#include "nnacl/infer/control/switch_infer.h"
+#include "nnacl/infer/control/tensorlist_fromtensor_infer.h"
+#include "nnacl/infer/control/tensorlist_getitem_infer.h"
+#include "nnacl/infer/control/tensorlist_reserve_infer.h"
+#include "nnacl/infer/control/tensorlist_setitem_infer.h"
+#include "nnacl/infer/control/tensorlist_stack_infer.h"
 #include "nnacl/infer/tile_infer.h"
 #include "nnacl/infer/topk_infer.h"
 #include "nnacl/infer/transpose_infer.h"
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.h
index 351e4f70086..5929f8e3f4f 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.h
@@ -227,8 +227,9 @@ enum PrimType {
   PrimType_Affine = 200,
   PrimType_Attention = 201,
   PrimType_LSTMGrad = 202,
+  PrimType_ScatterNdUpdate = 203,
   PrimType_MIN = PrimType_NONE,
-  PrimType_MAX = PrimType_LSTMGrad + 1
+  PrimType_MAX = PrimType_ScatterNdUpdate + 1
 };
 
 void RegInfer(int prim_type, InferShape func);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_grad_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_grad_infer.c
index f8791c23582..90d7bc6f5c0 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_grad_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_grad_infer.c
@@ -35,13 +35,16 @@ int LayerNormGradInferShape(const TensorC *const *inputs, size_t inputs_size, Te
   SetShapeTensor(output_dx, input_x);
   int begin_params_axis = param->begin_params_axis_;
   if (param->begin_params_axis_ < 0) {
-    begin_params_axis += input_x->shape_size_;
+    begin_params_axis += (int)(input_x->shape_size_);
   }
   int size = 0;
   if (input_x->shape_size_ > MAX_SHAPE_SIZE) {
     return NNACL_INPUT_TENSOR_ERROR;
   }
   for (int i = begin_params_axis; i < input_x->shape_size_; i++) {
+    if (size >= MAX_SHAPE_SIZE) {
+      return NNACL_ERR;
+    }
     output_dg->shape_[size] = input_x->shape_[i];
     output_db->shape_[size] = input_x->shape_[i];
     size++;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_infer.c
index ed8103ef080..dd8a42782e4 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_infer.c
@@ -39,7 +39,7 @@ int LayerNormInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor
     return NNACL_INPUT_TENSOR_ERROR;
   }
   param->begin_norm_axis_ =
-    param->begin_norm_axis_ < 0 ? param->begin_norm_axis_ + input->shape_size_ : param->begin_norm_axis_;
+    param->begin_norm_axis_ < 0 ? param->begin_norm_axis_ + ((int)(input->shape_size_)) : param->begin_norm_axis_;
   SetShapeTensor(output, input);
   // take care of other outputs
   if (outputs_size == 3) {
@@ -52,8 +52,8 @@ int LayerNormInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor
       output_mean->shape_[size] = input->shape_[size];
       output_var->shape_[size] = input->shape_[size];
     }
-    output_mean->shape_size_ = size;
-    output_var->shape_size_ = size;
+    output_mean->shape_size_ = (size_t)size;
+    output_var->shape_size_ = (size_t)size;
   }
 
   return NNACL_OK;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/lin_space_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/lin_space_infer.c
index 4ec6388b56c..7ab34e59d08 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/lin_space_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/lin_space_infer.c
@@ -32,6 +32,9 @@ int LinSpaceInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
   if (!InferFlag(inputs, inputs_size)) {
     return NNACL_INFER_INVALID;
   }
+  if (GetElementNum(inputs[2]) < 1) {
+    return NNACL_ERR;
+  }
   int *num = (int *)(inputs[2]->data_);
   if (num == NULL) {
     return NNACL_INFER_INVALID;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/matmul_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/matmul_infer.c
index 31f169c242d..a252684f19b 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/matmul_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/matmul_infer.c
@@ -18,7 +18,10 @@
 #include "nnacl/infer/infer_register.h"
 
 int CheckMatmulInputShape(int *a_shape, size_t a_shape_size, int *b_shape, size_t b_shape_size,
-                          MatMulParameter *param) {
+                          const MatMulParameter *param) {
+  if (a_shape_size < 2 || b_shape_size < 2) {
+    return NNACL_PARAM_INVALID;
+  }
   for (size_t i = 0; i < (a_shape_size - 2) && i < (b_shape_size - 2); ++i) {
     if (a_shape[i] != b_shape[i]) {
       return NNACL_INPUT_TENSOR_ERROR;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/max_min_grad_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/max_min_grad_infer.c
index fe84e5a1a8b..37b3f387731 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/max_min_grad_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/max_min_grad_infer.c
@@ -41,11 +41,11 @@ int MaxMinGradInferShape(const TensorC *const *inputs, size_t inputs_size, Tenso
   ArithmeticParameter *param = (ArithmeticParameter *)parameter;
 
   param->ndim_ = dy->shape_size_;
-  param->in_elements_num0_ = param->ndim_;
-  param->in_elements_num1_ = param->ndim_;
-  param->out_elements_num_ = param->ndim_;
-  int fillDimNum0 = dy->shape_size_ - x1->shape_size_;
-  int fillDimNum1 = dy->shape_size_ - x2->shape_size_;
+  param->in_elements_num0_ = (int)(param->ndim_);
+  param->in_elements_num1_ = (int)(param->ndim_);
+  param->out_elements_num_ = (int)(param->ndim_);
+  int fillDimNum0 = (int)(dy->shape_size_ - x1->shape_size_);
+  int fillDimNum1 = (int)(dy->shape_size_ - x2->shape_size_);
   int j0 = 0;
   int j1 = 0;
   for (unsigned int i = 0; i < dy->shape_size_; i++) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/mean_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/mean_infer.c
index b2300f4ecb3..2c669ac4a9b 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/mean_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/mean_infer.c
@@ -52,7 +52,10 @@ int MeanInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **o
   // reduce on selected axes
   for (size_t i = 0; i < input->shape_size_; i++) {
     bool reduce_axis = false;
-    for (size_t idx = 0; idx < num_axes; ++idx) {
+    if (num_axes > MAX_SHAPE_SIZE || num_axes < 0) {
+      return NNACL_ERR;
+    }
+    for (int idx = 0; idx < num_axes; ++idx) {
       if (((size_t)(axes[idx])) == i) {
         reduce_axis = true;
         break;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/pad_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/pad_infer.c
index a436621f49c..f26ece39dc1 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/pad_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/pad_infer.c
@@ -32,7 +32,7 @@ int PadInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **ou
     return NNACL_INFER_INVALID;
   }
 
-  if (input->shape_size_ > 4) {
+  if (input->shape_size_ > DEFAULT_PAD_NDIMS) {
     return NNACL_INPUT_TENSOR_ERROR;
   }
   const TensorC *paddings = inputs[1];
@@ -48,7 +48,7 @@ int PadInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **ou
     param->paddings_[i] = ((int *)paddings->data_)[i];
   }
 
-  int output_shape[MAX_SHAPE_SIZE] = {0};
+  int output_shape[DEFAULT_PAD_NDIMS] = {0};
   size_t output_shape_size = 0;
   for (size_t i = 0; i < input->shape_size_; i++) {
     int shape = input->shape_[i] + param->paddings_[2 * i] + param->paddings_[2 * i + 1];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/prior_box_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/prior_box_infer.c
index a1aaee328b6..795ee4b019e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/prior_box_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/prior_box_infer.c
@@ -38,8 +38,11 @@ int PriorBoxInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
 
   PriorBoxParameter *param = (PriorBoxParameter *)parameter;
   float *aspect_ratios = param->aspect_ratios;
-  size_t aspect_ratios_size = param->aspect_ratios_size;
-  for (size_t i = 0; i < aspect_ratios_size; i++) {
+  if (aspect_ratios == NULL) {
+    return NNACL_NULL_PTR;
+  }
+  int32_t aspect_ratios_size = param->aspect_ratios_size;
+  for (int32_t i = 0; i < aspect_ratios_size; i++) {
     float ratio = aspect_ratios[i];
     if (ratio == 0) {
       return NNACL_ERR;
@@ -62,8 +65,8 @@ int PriorBoxInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
     }
   }
 
-  size_t min_sizes_size = param->min_sizes_size;
-  size_t max_sizes_size = param->max_sizes_size;
+  int32_t min_sizes_size = param->min_sizes_size;
+  int32_t max_sizes_size = param->max_sizes_size;
   int32_t num_priors_box = min_sizes_size * different_aspect_ratios_size + max_sizes_size;
   const int kPriorBoxPoints = 4;
   const int kPriorBoxN = 1;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/range_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/range_infer.c
index 0d8bb85785c..0c2114f02c0 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/range_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/range_infer.c
@@ -40,7 +40,9 @@ int RangeInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **
   if (!InferFlag(inputs, inputs_size)) {
     return NNACL_INFER_INVALID;
   }
-
+  if (GetElementNum(inputs[0]) < 1 || GetElementNum(inputs[1]) < 1 || GetElementNum(inputs[2]) < 1) {
+    return NNACL_ERR;
+  }
   int shape_size = 0;
   if (inputs_size == 3) {
     if ((inputs[0]->data_ == NULL) || (inputs[1]->data_ == NULL) || (inputs[2]->data_ == NULL)) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reduce_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reduce_infer.c
index 5058ab1c3ec..936339bf22d 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reduce_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reduce_infer.c
@@ -33,7 +33,7 @@ int ReduceOnSelectedAxes(const TensorC *input, size_t num_axes, const int *actua
   for (size_t i = 0; i < input->shape_size_; i++) {
     bool reduce_axis = false;
     for (size_t idx = 0; idx < num_axes; ++idx) {
-      if ((size_t)(actual_axes[idx]) == i || (size_t)(actual_axes[idx] + input->shape_size_) == i) {
+      if ((size_t)(actual_axes[idx]) == i || (size_t)(actual_axes[idx]) + input->shape_size_ == i) {
         reduce_axis = true;
         break;
       }
@@ -79,7 +79,7 @@ int ReduceInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
   if (axes == NULL) {
     return NNACL_NULL_PTR;
   }
-  size_t num_axes;
+  int num_axes;
   if (axes_input->shape_size_ == 1) {
     num_axes = axes_input->shape_[0];
   } else if (axes_input->shape_size_ == 0) {
@@ -102,7 +102,10 @@ int ReduceInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
 
     int begin_axis;
     begin_axis = axes[0] < 0 ? axes[0] + rank : axes[0];
-    for (size_t i = begin_axis + 1; i < rank; ++i) {
+    if (rank > MAX_SHAPE_SIZE || rank < 0) {
+      return NNACL_ERR;
+    }
+    for (int i = begin_axis + 1; i < rank; ++i) {
       ShapePush(actual_axes, &actual_axes_size, i);
     }
     num_axes = rank - begin_axis;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reshape_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reshape_infer.c
index d04cc280158..6fc571263e0 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reshape_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reshape_infer.c
@@ -17,7 +17,7 @@
 #include "nnacl/infer/reshape_infer.h"
 #include "nnacl/infer/infer_register.h"
 
-void CalShape(const int *data, const TensorC *const *inputs, int *out_shape, size_t *out_shape_size, int shape_size) {
+int CalShape(const int *data, const TensorC *const *inputs, int *out_shape, size_t *out_shape_size, int shape_size) {
   int input_count = GetElementNum(inputs[0]);
   int index = 0;
   int size = 1;
@@ -32,24 +32,28 @@ void CalShape(const int *data, const TensorC *const *inputs, int *out_shape, siz
     ShapePush(out_shape, out_shape_size, data[i]);
   }
   if (size == 0) {
-    return;
+    return NNACL_ERR;
   }
   if ((int)(data[index]) == -1) {
+    if (index >= MAX_SHAPE_SIZE) {
+      return NNACL_ERR;
+    }
     out_shape[index] = input_count / size;
   }
+  return NNACL_OK;
 }
 
 int CalNewShape(const TensorC *in_tensor, int *out_shape, size_t out_shape_size) {
   size_t in_shape_size = 1;
   for (size_t i = 0; i < in_tensor->shape_size_; i++) {
-    in_shape_size *= in_tensor->shape_[i];
+    in_shape_size *= (size_t)(in_tensor->shape_[i]);
   }
   int64_t infer_index = -1;
   size_t out_shape_size_new = 1;
   for (size_t i = 0; i < out_shape_size; i++) {
     if (out_shape[i] == -1) {
       if (infer_index == -1) {
-        infer_index = i;
+        infer_index = (int64_t)(i);
       } else {
         return NNACL_ERR;
       }
@@ -64,7 +68,7 @@ int CalNewShape(const TensorC *in_tensor, int *out_shape, size_t out_shape_size)
         break;
       }
     } else {
-      out_shape_size_new *= out_shape[i];
+      out_shape_size_new *= (size_t)(out_shape[i]);
     }
   }
   if (infer_index == -1 && out_shape_size_new != in_shape_size) {
@@ -74,7 +78,10 @@ int CalNewShape(const TensorC *in_tensor, int *out_shape, size_t out_shape_size)
     if (out_shape_size_new == 0) {
       return NNACL_ERR;
     }
-    out_shape[infer_index] = in_shape_size / out_shape_size_new;
+    if (infer_index >= MAX_SHAPE_SIZE) {
+      return NNACL_ERR;
+    }
+    out_shape[infer_index] = (int)(in_shape_size / out_shape_size_new);
   }
   return NNACL_OK;
 }
@@ -94,35 +101,55 @@ int CalShapeByType(const TensorC *const *inputs, size_t shape_size, int *out_sha
       for (size_t i = 0; i < shape_size; i++) {
         data_int[i] = data[i];
       }
-      CalShape(data_int, inputs, out_shape, out_shape_size, shape_size);
+      int cal_ret = CalShape(data_int, inputs, out_shape, out_shape_size, shape_size);
+      if (cal_ret != NNACL_OK) {
+        free(data_int);
+        return NNACL_ERR;
+      }
     } break;
     case kNumberTypeInt32: {
       int32_t *data = (int32_t *)(shape_tensor->data_);
       for (size_t i = 0; i < shape_size; i++) {
         data_int[i] = data[i];
       }
-      CalShape(data_int, inputs, out_shape, out_shape_size, shape_size);
+      int cal_ret = CalShape(data_int, inputs, out_shape, out_shape_size, shape_size);
+      if (cal_ret != NNACL_OK) {
+        free(data_int);
+        return NNACL_ERR;
+      }
     } break;
     case kNumberTypeInt64: {
       int64_t *data = (int64_t *)(shape_tensor->data_);
       for (size_t i = 0; i < shape_size; i++) {
         data_int[i] = data[i];
       }
-      CalShape(data_int, inputs, out_shape, out_shape_size, shape_size);
+      int cal_ret = CalShape(data_int, inputs, out_shape, out_shape_size, shape_size);
+      if (cal_ret != NNACL_OK) {
+        free(data_int);
+        return NNACL_ERR;
+      }
     } break;
     case kNumberTypeFloat: {
       float *data = (float *)(shape_tensor->data_);
       for (size_t i = 0; i < shape_size; i++) {
         data_int[i] = data[i];
       }
-      CalShape(data_int, inputs, out_shape, out_shape_size, shape_size);
+      int cal_ret = CalShape(data_int, inputs, out_shape, out_shape_size, shape_size);
+      if (cal_ret != NNACL_OK) {
+        free(data_int);
+        return NNACL_ERR;
+      }
     } break;
     case kNumberTypeUInt32: {
       uint32_t *data = (uint32_t *)(shape_tensor->data_);
       for (size_t i = 0; i < shape_size; i++) {
         data_int[i] = data[i];
       }
-      CalShape(data_int, inputs, out_shape, out_shape_size, shape_size);
+      int cal_ret = CalShape(data_int, inputs, out_shape, out_shape_size, shape_size);
+      if (cal_ret != NNACL_OK) {
+        free(data_int);
+        return NNACL_ERR;
+      }
     } break;
     default: {
       free(data_int);
@@ -162,7 +189,10 @@ int ReshapeInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
     if (shape_tensor->data_ == NULL) {
       return NNACL_INFER_INVALID;
     }
-    size_t shape_size = GetElementNum(shape_tensor);
+    int shape_size = GetElementNum(shape_tensor);
+    if (shape_size > MAX_SHAPE_SIZE) {
+      return NNACL_ERR;
+    }
     int calRet = CalShapeByType(inputs, shape_size, out_shape, &out_shape_size);
     if (calRet != NNACL_OK) {
       return calRet;
@@ -171,7 +201,7 @@ int ReshapeInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
     if (param->shape_dim_ > MAX_SHAPE_SIZE) {
       return NNACL_PARAM_INVALID;
     }
-    for (size_t i = 0; i < param->shape_dim_; ++i) {
+    for (int i = 0; i < param->shape_dim_; ++i) {
       ShapePush(out_shape, &out_shape_size, param->shape_[i]);
     }
   } else {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/resize_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/resize_infer.c
index 2e718166262..da8d02756fa 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/resize_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/resize_infer.c
@@ -24,7 +24,7 @@ int HandleTwoInputs(const TensorC *const *inputs, ResizeParameter *param) {
   if (shape_tensor->data_ == NULL) {
     return NNACL_INFER_INVALID;
   }
-  size_t shape_size = GetElementNum(shape_tensor);
+  int shape_size = GetElementNum(shape_tensor);
   switch (shape_size) {
     case 4: {
       if (shape_tensor->data_type_ == kNumberTypeInt32) {
@@ -32,6 +32,9 @@ int HandleTwoInputs(const TensorC *const *inputs, ResizeParameter *param) {
         if (data == NULL) {
           return NNACL_INFER_INVALID;
         }
+        if (GetElementNum(shape_tensor) < 4) {
+          return NNACL_ERR;
+        }
         switch (shape_tensor->format_) {
           case Format_NCHW:
             param->new_height_ = data[2];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/scatter_nd_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/scatter_nd_infer.h
index 699405e831f..7b035b15a0e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/scatter_nd_infer.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/scatter_nd_infer.h
@@ -17,7 +17,6 @@
 #define MINDSPORE_NNACL_SCATTER_ND_INFER_H
 
 #include "nnacl/infer/common_infer.h"
-#include "nnacl/softmax_parameter.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/select_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/select_infer.c
index d1f9a695d13..9708755ea64 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/select_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/select_infer.c
@@ -34,6 +34,7 @@ int SelectInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
     TensorC *output = outputs[i];
     SetDataTypeFormat(output, input);
     if (input->data_type_ == kObjectTypeTensorType) {
+#ifdef ENABLE_CONTROL_TENSORLIST
       TensorListC *input_tensorlist = (TensorListC *)(input);
       TensorListC *output_tensorlist = (TensorListC *)(output);
       output_tensorlist->element_shape_size_ = input_tensorlist->element_shape_size_;
@@ -47,6 +48,9 @@ int SelectInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC *
       for (size_t j = 0; j < output_tensorlist->element_num_; j++) {
         memcpy(&output_tensorlist->tensors_[j], &input_tensorlist->tensors_[j], sizeof(TensorC));
       }
+#else
+      return NNACL_ERR;
+#endif
     } else {
       SetShapeTensor(output, input);
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/slice_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/slice_infer.c
index 91a3121c048..b2d1e6678b3 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/slice_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/slice_infer.c
@@ -32,6 +32,35 @@ static bool CheckInputsDataType(const TensorC *const *inputs, size_t inputs_size
   return true;
 }
 
+int InitBeginAndSizeParam(const TensorC *const *inputs, SliceParameter *param) {
+  /* init begin parameter */
+  int slice_begin_size = GetElementNum(inputs[1]);
+  int *begin_ptr = (int *)(inputs[1]->data_);
+  if (slice_begin_size != param->param_length_ || begin_ptr == NULL) {
+    return NNACL_INFER_INVALID;
+  }
+  if (slice_begin_size > MAX_AXIS_SIZE) {
+    return NNACL_ERR;
+  }
+  for (size_t i = 0; i < slice_begin_size; i++) {
+    param->begin_[i] = begin_ptr[i];
+  }
+
+  /* init size parameter */
+  int slice_size_size = GetElementNum(inputs[2]);
+  int *size_ptr = (int *)(inputs[2]->data_);
+  if (slice_size_size != param->param_length_ || size_ptr == NULL) {
+    return NNACL_INFER_INVALID;
+  }
+  if (slice_size_size > MAX_AXIS_SIZE) {
+    return NNACL_ERR;
+  }
+  for (size_t i = 0; i < slice_size_size; i++) {
+    param->size_[i] = size_ptr[i];
+  }
+  return NNACL_OK;
+}
+
 int SliceInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
                     OpParameter *parameter) {
   int ret = CheckAugmentWithMinSize(inputs, inputs_size, outputs, outputs_size, parameter, 3, 1);
@@ -54,38 +83,22 @@ int SliceInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **
     return NNACL_INPUT_TENSOR_ERROR;
   }
   SliceParameter *param = (SliceParameter *)parameter;
-  param->param_length_ = input->shape_size_;
+  param->param_length_ = (int)(input->shape_size_);
   output->shape_size_ = input->shape_size_;
 
-  /* init begin parameter */
-  size_t slice_begin_size = GetElementNum(inputs[1]);
-  int *begin_ptr = (int *)(inputs[1]->data_);
-  if (slice_begin_size != param->param_length_ || begin_ptr == NULL) {
-    return NNACL_INFER_INVALID;
-  }
-  for (int i = 0; i < slice_begin_size; i++) {
-    param->begin_[i] = begin_ptr[i];
-  }
-
-  /* init size parameter */
-  size_t slice_size_size = GetElementNum(inputs[2]);
-  int *size_ptr = (int *)(inputs[2]->data_);
-  if (slice_size_size != param->param_length_ || size_ptr == NULL) {
-    return NNACL_INFER_INVALID;
-  }
-  for (int i = 0; i < slice_size_size; i++) {
-    param->size_[i] = size_ptr[i];
+  if (InitBeginAndSizeParam(inputs, param) != NNACL_OK) {
+    return NNACL_ERR;
   }
 
   /* infer output shape information */
   int begin[MAX_SHAPE_SIZE];
   int size[MAX_SHAPE_SIZE];
-  for (size_t i = 0; i < param->param_length_; ++i) {
+  for (int32_t i = 0; i < param->param_length_; ++i) {
     begin[param->axis_[i]] = param->begin_[i];
     size[param->axis_[i]] = param->size_[i];
   }
 
-  for (size_t i = 0; i < param->param_length_; ++i) {
+  for (int32_t i = 0; i < param->param_length_; ++i) {
     if (size[i] < 0 && size[i] != -1) {
       return NNACL_PARAM_INVALID;
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_infer.c
index c9b29b4415e..9a13e40c73a 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_infer.c
@@ -38,7 +38,7 @@ int SpaceToBatchInferShape(const TensorC *const *inputs, size_t inputs_size, Ten
   }
 
   int *block_shape = param->block_sizes_;
-  size_t block_shape_size = param->m_;
+  int block_shape_size = param->m_;
   int *paddings = param->paddings_;
   int padding_left = 0;
   int padding_right = 0;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_nd_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_nd_infer.c
index 612d1408f1f..036168cd0cb 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_nd_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_nd_infer.c
@@ -29,7 +29,7 @@ int SpaceSetOutputShapeFromParam(const TensorC *const *inputs, size_t inputs_siz
   }
   SpaceToBatchParameter *param = (SpaceToBatchParameter *)parameter;
   int *block_shape = param->block_sizes_;
-  size_t block_shape_size = param->m_;
+  int block_shape_size = param->m_;
   int *padding = param->paddings_;
   int padding_left = 0;
   int padding_right = 0;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/split_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/split_infer.c
index 533a32824f7..007b50d1d71 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/split_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/split_infer.c
@@ -31,7 +31,7 @@ int SplitInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **
 
   SplitParameter *param = (SplitParameter *)parameter;
 
-  size_t num_split_ = param->num_split_ == 0 ? (int)(outputs_size) : param->num_split_;
+  int num_split_ = param->num_split_ == 0 ? (int)(outputs_size) : param->num_split_;
   if (num_split_ == 0) {
     return NNACL_ERR;
   }
@@ -43,8 +43,8 @@ int SplitInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **
   if (input->shape_size_ > MAX_SHAPE_SIZE) {
     return NNACL_INPUT_TENSOR_ERROR;
   }
-  size_t split_dim = param->split_dim_ < 0 ? input->shape_size_ + param->split_dim_ : param->split_dim_;
-  if (split_dim > input->shape_size_) {
+  int split_dim = param->split_dim_ < 0 ? ((int)(input->shape_size_)) + param->split_dim_ : param->split_dim_;
+  if (split_dim > (int)(input->shape_size_)) {
     return NNACL_ERR;
   }
   if ((int)(outputs_size) != num_split_) {
@@ -64,7 +64,10 @@ int SplitInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **
     ShapeSet(output_shape, &output_shape_size, input->shape_, input->shape_size_);
     int split_dim_i = input->shape_[split_dim];
     if (i == num_split_ - 1 && param->split_sizes_[i] == -1) {
-      for (size_t j = 0; j < param->num_split_ - 1; ++j) {
+      if (param->num_split_ - 1 < 0) {
+        return NNACL_ERR;
+      }
+      for (int j = 0; j < param->num_split_ - 1; ++j) {
         split_dim_i -= param->split_sizes_[j];
       }
       param->split_sizes_[i] = split_dim_i;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/squeeze_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/squeeze_infer.c
index cf2137f8095..2d35201add1 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/squeeze_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/squeeze_infer.c
@@ -40,7 +40,7 @@ int SqueezeInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
   size_t out_shape_size = 0;
 
   for (size_t i = 0; i < param->axis_size_; i++) {
-    param->axis_[i] = param->axis_[i] >= 0 ? param->axis_[i] : param->axis_[i] + input->shape_size_;
+    param->axis_[i] = param->axis_[i] >= 0 ? param->axis_[i] : param->axis_[i] + (int)input->shape_size_;
   }
 
   if (param->axis_size_ == 0) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/stack_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/stack_infer.c
index d533441390d..340284aeae1 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/stack_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/stack_infer.c
@@ -41,8 +41,8 @@ int StackInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **
   int32_t output_shape[MAX_SHAPE_SIZE] = {0};
   size_t output_shape_size = 0;
   ShapeSet(output_shape, &output_shape_size, input->shape_, input->shape_size_);
-  int axis = param->axis_ < 0 ? param->axis_ + input->shape_size_ + 1 : param->axis_;
-  if (axis < 0 || axis > input->shape_size_) {
+  int axis = param->axis_ < 0 ? (int)(param->axis_) + (int)(input->shape_size_) + 1 : param->axis_;
+  if (axis < 0 || axis > (int)(input->shape_size_)) {
     return NNACL_PARAM_INVALID;
   }
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_grad_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_grad_infer.c
index b4be741c3d4..ea124e94763 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_grad_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_grad_infer.c
@@ -32,8 +32,9 @@ bool StridedSliceCheckInputs(const TensorC *const *inputs, size_t inputs_size) {
   return true;  // note: the original code is ndim_ <= in_shape_size
 }
 
-void ApplyBeginEndEllipsisMask(size_t ndim, int *begins, uint32_t *begins_mask, int *ends, uint32_t *ends_mask,
-                               uint32_t *ellipsis_mask, int *in_shape) {
+void ApplyBeginEndEllipsisMask(size_t ndim, int *begins, const uint32_t *const begins_mask, int *ends,
+                               const uint32_t *const ends_mask, const uint32_t *const ellipsis_mask,
+                               const int *const in_shape) {
   for (size_t i = 0; i < ndim; i++) {
     if (begins_mask[i]) {
       begins[i] = 0;
@@ -84,8 +85,8 @@ int StridedSliceGradInferShape(const TensorC *const *inputs, size_t inputs_size,
   int *end_data = (int *)(inputs[3]->data_);
   int *stride_data = (int *)(inputs[4]->data_);
 
-  size_t ndim_ = GetElementNum(begin_tensor);
-  for (int i = 0; i < ndim_; ++i) {
+  size_t ndim_ = (size_t)GetElementNum(begin_tensor);
+  for (size_t i = 0; i < ndim_; ++i) {
     ShapePush(begins_, &begins_size, begin_data[i]);
     ShapePush(ends_, &ends_size, end_data[i]);
     ShapePush(strides_, &strides_size, stride_data[i]);
@@ -104,9 +105,9 @@ int StridedSliceGradInferShape(const TensorC *const *inputs, size_t inputs_size,
     ellipsis_mask_[i] = (unsigned)(param->ellipsisMask_) & (1 << i);
     new_axis_mask_[i] = (unsigned)(param->newAxisMask_) & (1 << i);
   }
-  param->num_axes_ = in_shape_size;
-  param->in_shape_length_ = in_shape_size;
-  for (int i = 0; i < ndim_; ++i) {
+  param->num_axes_ = (int)(in_shape_size);
+  param->in_shape_length_ = (int)(in_shape_size);
+  for (size_t i = 0; i < ndim_; ++i) {
     param->begins_[i] = begins_[i];
     param->ends_[i] = ends_[i];
     param->strides_[i] = strides_[i];
@@ -138,13 +139,16 @@ int StridedSliceGradInferShape(const TensorC *const *inputs, size_t inputs_size,
     return NNACL_OK;
   }
 
-  size_t output_size = inputs[1]->shape_[0];
+  int output_size = inputs[1]->shape_[0];
   int output_shape[MAX_SHAPE_SIZE] = {0};
   size_t output_shape_size = 0;
   if (inputs[1]->data_ == NULL) {
     return NNACL_ERR;
   }
 
+  if (output_size > MAX_SHAPE_SIZE) {
+    return NNACL_ERR;
+  }
   for (int i = 0; i < output_size; i++) {
     ShapePush(output_shape, &output_shape_size, ((int *)(inputs[1]->data_))[i]);
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_infer.c
index 442d95624d3..c8c6bf067b2 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_infer.c
@@ -70,7 +70,7 @@ int HandleAxesInputNotExist(const TensorC *const *inputs, struct StridedSliceTra
     return NNACL_ERR;
   }
   transfer_buffer->ndim_ = GetElementNum(begin_tensor);
-  for (int i = 0; i < transfer_buffer->ndim_; ++i) {
+  for (int i = 0; i < (size_t)(transfer_buffer->ndim_); ++i) {
     ShapePush(transfer_buffer->begins_, &transfer_buffer->begins_size_, begin_data[i]);
     ShapePush(transfer_buffer->ends_, &transfer_buffer->ends_size_, end_data[i]);
     ShapePush(transfer_buffer->strides_, &transfer_buffer->strides_size_, stride_data[i]);
@@ -94,7 +94,7 @@ int GenerateAxes(const TensorC *axes_tensor, int *axes, int num, int ndim) {
       axes[i] = i;
     }
   } else {
-    for (size_t i = 0; i < num; i++) {
+    for (int i = 0; i < num; i++) {
       axes[i] = axes_data[i];
     }
     for (int i = 0; i < num; ++i) {
@@ -132,23 +132,29 @@ int HandleAxesInputExist(const TensorC *const *inputs, int *ndim, int *in_shape,
   }
 
   const TensorC *axes_tensor = inputs[3];
-  int axes[MAX_SHAPE_SIZE];
+  int axes[MAX_SHAPE_SIZE] = {0};
   int ret = GenerateAxes(axes_tensor, axes, begin_ndim, *ndim);
   if (ret != NNACL_OK) {
     return ret;
   }
 
-  for (size_t i = 0; i < *ndim; i++) {
+  if (*ndim > MAX_SHAPE_SIZE || *ndim < 0) {
+    return NNACL_ERR;
+  }
+  for (int i = 0; i < *ndim; i++) {
     in_shape[i] = 0;
     begins[i] = 0;
     strides[i] = 0;
   }
-  for (size_t i = 0; i < *ndim; ++i) {
+  for (int i = 0; i < *ndim; ++i) {
     in_shape[i] = input_tensor->shape_[i];
   }
-  for (size_t i = 0; i < *ndim; ++i) {
+  for (int i = 0; i < *ndim; ++i) {
     int axes_it = 0;
-    for (size_t j = 0; j < begin_ndim; j++) {
+    if (begin_ndim > MAX_SHAPE_SIZE || begin_ndim < 0) {
+      return NNACL_ERR;
+    }
+    for (int j = 0; j < begin_ndim; j++) {
       if (axes[j] == i) {
         axes_it = j;
         break;
@@ -158,8 +164,12 @@ int HandleAxesInputExist(const TensorC *const *inputs, int *ndim, int *in_shape,
     }
     if (axes_it != begin_ndim) {
       int axis = axes_it;
-      // begins or ends exceed limit will be set to limit
-      begins[i] = imax(imin(begin_data[axis], input_tensor->shape_[i] - 1), -input_tensor->shape_[i]);
+      if (begin_data[axis] > input_tensor->shape_[i] - 1) {
+        begins[i] = begin_data[axis];
+      } else {
+        begins[i] = imax(imin(begin_data[axis], input_tensor->shape_[i] - 1), -input_tensor->shape_[i]);
+      }
+      // ends exceed limit will be set to limit
       ends[i] = imax(imin(end_data[axis], input_tensor->shape_[i]), -input_tensor->shape_[i] - 1);
       if (stride_data == NULL) {
         return NNACL_ERR;
@@ -190,7 +200,7 @@ int StrideSlicePreCheck(const TensorC *const *inputs, size_t inputs_size, Tensor
 }
 
 void Bit2Vector(StridedSliceTransferBuffer *transfer_buffer, const StridedSliceParameter *param) {
-  for (unsigned i = 0; i < (unsigned)transfer_buffer->ndim_; i++) {
+  for (unsigned i = 0; i < (unsigned)(size_t)(transfer_buffer->ndim_); i++) {
     transfer_buffer->begins_mask_[i] = (unsigned)(param->begins_mask_) & (1 << i);
     transfer_buffer->ends_mask_[i] = (unsigned)(param->ends_mask_) & (1 << i);
     transfer_buffer->ellipsis_mask_[i] = (unsigned)(param->ellipsisMask_) & (1 << i);
@@ -215,7 +225,7 @@ int ApplyNewAxisMask(StridedSliceTransferBuffer *transfer_buffer, StridedSlicePa
       transfer_buffer->strides_[i] = 1;
 
       ShapePush(transfer_buffer->begins_, &transfer_buffer->begins_size_, 0);
-      ShapePush(transfer_buffer->ends_, &transfer_buffer->ends_size_, in_shape[transfer_buffer->ndim_ - 1]);
+      ShapePush(transfer_buffer->ends_, &transfer_buffer->ends_size_, in_shape[(size_t)(transfer_buffer->ndim_) - 1]);
       ShapePush(transfer_buffer->strides_, &transfer_buffer->strides_size_, 1);
 
       transfer_buffer->begins_mask_[i] = false;
@@ -228,7 +238,7 @@ int ApplyNewAxisMask(StridedSliceTransferBuffer *transfer_buffer, StridedSlicePa
 }
 
 void ApplyBeginMask(StridedSliceTransferBuffer *transfer_buffer) {
-  for (int i = 0; i < transfer_buffer->ndim_; i++) {
+  for (int i = 0; i < (size_t)(transfer_buffer->ndim_); i++) {
     if (transfer_buffer->begins_mask_[i]) {
       transfer_buffer->begins_[i] = 0;
     }
@@ -296,7 +306,7 @@ void ApplyShrinkMask(StridedSliceTransferBuffer *transfer_buffer, int *output_sh
 
 int TransferBuffer2Param(const StridedSliceTransferBuffer *transfer_buffer, StridedSliceParameter *param,
                          const int *in_shape, size_t in_shape_size) {
-  if (transfer_buffer->ndim_ >= in_shape_size || param->in_shape_length_ >= in_shape_size) {
+  if (transfer_buffer->ndim_ >= (int)(in_shape_size) || param->in_shape_length_ >= (int)(in_shape_size)) {
     return NNACL_ERR;
   }
   for (int i = 0; i < transfer_buffer->ndim_; i++) {
@@ -325,12 +335,12 @@ void InitStridedSliceTransferBuffer(StridedSliceTransferBuffer *transfer_buffer)
 }
 
 void SetMaskSize(StridedSliceTransferBuffer *transfer_buffer) {
-  transfer_buffer->ellipsis_mask_size_ = transfer_buffer->ndim_;
-  transfer_buffer->new_axis_mask_size_ = transfer_buffer->ndim_;
-  transfer_buffer->shrink_axis_mask_size_ = transfer_buffer->ndim_;
-  transfer_buffer->begins_size_ = transfer_buffer->ndim_;
-  transfer_buffer->ends_size_ = transfer_buffer->ndim_;
-  transfer_buffer->strides_size_ = transfer_buffer->ndim_;
+  transfer_buffer->ellipsis_mask_size_ = (size_t)(transfer_buffer->ndim_);
+  transfer_buffer->new_axis_mask_size_ = (size_t)(transfer_buffer->ndim_);
+  transfer_buffer->shrink_axis_mask_size_ = (size_t)(transfer_buffer->ndim_);
+  transfer_buffer->begins_size_ = (size_t)(transfer_buffer->ndim_);
+  transfer_buffer->ends_size_ = (size_t)(transfer_buffer->ndim_);
+  transfer_buffer->strides_size_ = (size_t)(transfer_buffer->ndim_);
 }
 
 // note: begin, end, stride length are equal, but may less than rank of input
@@ -359,8 +369,8 @@ int StridedSliceInferShape(const TensorC *const *inputs, size_t inputs_size, Ten
   InitStridedSliceTransferBuffer(&transfer_buffer);
 
   StridedSliceParameter *param = (StridedSliceParameter *)parameter;
-  param->num_axes_ = in_shape_size;
-  param->in_shape_length_ = in_shape_size;
+  param->num_axes_ = (int)(in_shape_size);
+  param->in_shape_length_ = (int)(in_shape_size);
 
   transfer_buffer.ndim_ = 0;
   if (inputs_size == kStridedSliceInputNum) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tile_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tile_infer.c
index ecf1db30156..77609e8b1a2 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tile_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tile_infer.c
@@ -25,6 +25,9 @@ void TileParamCaffe2Tflite(TileParameter *param, size_t out_shape_size) {
       multiples_size_tmp[i] = 1;
     }
     for (size_t i = 0; i < param->dims_size_; i++) {
+      if (i >= MAX_TILE_DIM_SIZE) {
+        return;
+      }
       multiples_size_tmp[param->dims_[i]] = param->multiples_[i];
     }
     for (size_t i = 0; i < 5; i++) {
@@ -35,13 +38,10 @@ void TileParamCaffe2Tflite(TileParameter *param, size_t out_shape_size) {
 
 int TileInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size,
                    OpParameter *parameter) {
-  int check_ret = CheckAugmentNull(inputs, inputs_size, outputs, outputs_size, parameter);
+  int check_ret = CheckAugmentNullSize(inputs, inputs_size, outputs, outputs_size, parameter, 2, 1);
   if (check_ret != NNACL_OK) {
     return check_ret;
   }
-  if (inputs_size != 2 || outputs_size < 1) {
-    return NNACL_INPUT_TENSOR_ERROR;
-  }
 
   const TensorC *input = inputs[0];
   TensorC *output = outputs[0];
@@ -51,7 +51,7 @@ int TileInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **o
     return NNACL_INFER_INVALID;
   }
 
-  int out_shape[MAX_SHAPE_SIZE];
+  int out_shape[MAX_SHAPE_SIZE] = {0};
   size_t out_shape_size = 0;
   TileParameter *param = (TileParameter *)parameter;
 
@@ -60,7 +60,10 @@ int TileInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **o
   if (data_num > (int)(input->shape_size_) || input->shape_size_ > MAX_SHAPE_SIZE) {
     return NNACL_INPUT_TENSOR_ERROR;
   }
-  multiples_size = data_num;
+  if (data_num > MAX_TILE_DIM_SIZE) {
+    return NNACL_ERR;
+  }
+  multiples_size = (size_t)(data_num);
   if (inputs[1]->data_type_ != kNumberTypeInt && inputs[1]->data_type_ != kNumberTypeInt32) {
     return NNACL_INPUT_TENSOR_ERROR;
   }
@@ -68,7 +71,7 @@ int TileInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **o
   if (input1_data == NULL) {
     return NNACL_INFER_INVALID;
   }
-  for (size_t i = 0; i < data_num; i++) {
+  for (int i = 0; i < data_num; i++) {
     param->multiples_[i] = input1_data[i];
   }
 
@@ -91,6 +94,9 @@ int TileInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **o
     ShapePush(out_shape, &out_shape_size, input->shape_[i]);
   }
   for (size_t i = 0; i < dims_size; ++i) {
+    if (dims[i] >= MAX_SHAPE_SIZE || input->shape_[dims[i]] == 0) {
+      return NNACL_ERR;
+    }
     if (input->shape_[dims[i]] != 0 && param->multiples_[i] > INT_MAX / input->shape_[dims[i]]) {
       return NNACL_ERR;
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/transpose_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/transpose_infer.c
index 36b083ca301..0f00b7280f1 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/transpose_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/transpose_infer.c
@@ -17,8 +17,8 @@
 #include "nnacl/infer/transpose_infer.h"
 #include "nnacl/infer/infer_register.h"
 
-bool CheckPermTransFormat(const int *perm, const int *perm_transformat, const size_t size) {
-  for (size_t i = 0; i < size; ++i) {
+bool CheckPermTransFormat(const int *perm, const int *perm_transformat, const int size) {
+  for (int i = 0; i < size; ++i) {
     if (perm[i] != perm_transformat[i]) {
       return false;
     }
@@ -64,7 +64,7 @@ int TransposeInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor
   SetDataTypeFormat(output, input);
   const TensorC *perm_tensor = inputs[1];
   const int32_t *perm_data = (int32_t *)perm_tensor->data_;
-  const size_t perms_num = (size_t)perm_tensor->shape_[0];
+  const int perms_num = perm_tensor->shape_[0];
   if (perm_tensor->shape_size_ == 0) {
     return NNACL_INFER_INVALID;
   }
@@ -73,7 +73,7 @@ int TransposeInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor
   }
   int perm[MAX_TRANSPOSE_DIM_SIZE] = {0};
   size_t perm_size = 0;
-  for (size_t i = 0; i < perms_num; i++) {
+  for (int i = 0; i < perms_num; i++) {
     if (perm_data[i] >= perms_num) {
       return NNACL_ERR;
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/uniform_real_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/uniform_real_infer.c
index c5e9b8db1f3..57f1a0b06c1 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/uniform_real_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/uniform_real_infer.c
@@ -37,7 +37,7 @@ int UniformRealInferShape(const TensorC *const *inputs, size_t inputs_size, Tens
     return NNACL_INPUT_TENSOR_ERROR;
   }
   int output_shape[MAX_SHAPE_SIZE];
-  size_t output_shape_size = input_num;
+  size_t output_shape_size = (size_t)(input_num);
   for (int i = 0; i < input_num; i++) {
     output_shape[i] = input_data[i];
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsorted_segment_sum_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsorted_segment_sum_infer.c
index 77a696baf1c..cee24a5e5f4 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsorted_segment_sum_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsorted_segment_sum_infer.c
@@ -35,7 +35,7 @@ int UnsortedSegmentSumInferShape(const TensorC *const *inputs, size_t inputs_siz
   int output_shape[MAX_SHAPE_SIZE] = {0};
   size_t output_shape_size = 0;
   ShapePush(output_shape, &output_shape_size, num_segments);
-  for (int index = segment_id->shape_size_; index < (int)(x->shape_size_); index++) {
+  for (int index = (int)(segment_id->shape_size_); index < (int)(x->shape_size_); index++) {
     if (output_shape_size >= MAX_SHAPE_SIZE) {
       return NNACL_ERR;
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsqueeze_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsqueeze_infer.c
index 0119718058e..930aff8c54e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsqueeze_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsqueeze_infer.c
@@ -33,7 +33,7 @@ int UnsqueezeInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor
   }
 
   UnSqueezeParameter *param = (UnSqueezeParameter *)parameter;
-  int in_rank = input->shape_size_;
+  int in_rank = (int)(input->shape_size_);
   int dim_rank = param->num_dim_;
   int out_shape[MAX_SHAPE_SIZE] = {0};
   size_t out_shape_size = 0;
@@ -50,14 +50,17 @@ int UnsqueezeInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor
     int sz = in_rank + dim_rank;
     size_t in_itr = 0;
     size_t ax_itr = 0;
-    for (size_t i = 0; i < sz; i++) {
+    if (sz < 0) {
+      return NNACL_ERR;
+    }
+    for (int i = 0; i < sz; i++) {
       if (out_shape_size >= MAX_SHAPE_SIZE) {
         return NNACL_ERR;
       }
-      if (ax_itr < dim_rank && param->dims_[ax_itr] == (int)(i)) {
+      if (ax_itr < (size_t)(dim_rank) && param->dims_[ax_itr] == (int)(i)) {
         ShapePush(out_shape, &out_shape_size, 1);
         ax_itr++;
-      } else if (ax_itr < dim_rank && param->dims_[ax_itr] + sz == i) {
+      } else if (ax_itr < (size_t)(dim_rank) && param->dims_[ax_itr] + sz == i) {
         ShapePush(out_shape, &out_shape_size, 1);
         ax_itr++;
       } else {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unstack_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unstack_infer.c
index b4fd6165ed5..33eb635e278 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unstack_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unstack_infer.c
@@ -26,8 +26,8 @@ int UnstackInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
 
   const TensorC *input = inputs[0];
   UnstackParameter *param = (UnstackParameter *)parameter;
-  int axis = param->axis_ < 0 ? param->axis_ + input->shape_size_ : param->axis_;
-  if (axis < 0 || axis >= input->shape_size_) {
+  int axis = param->axis_ < 0 ? param->axis_ + (int)(input->shape_size_) : param->axis_;
+  if (axis < 0 || axis >= (int)(input->shape_size_)) {
     return NNACL_PARAM_INVALID;
   }
   for (size_t i = 0; i < outputs_size; i++) {
@@ -40,7 +40,7 @@ int UnstackInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC
   int output_shape[MAX_SHAPE_SIZE] = {0};
   size_t output_shape_size = 0;
   for (size_t i = 0; i < input->shape_size_; ++i) {
-    if (i != axis) {
+    if (i != (size_t)(axis)) {
       if (output_shape_size >= MAX_SHAPE_SIZE) {
         return NNACL_ERR;
       }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/where_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/where_infer.c
index b20e1e07e01..4c05f58bced 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/where_infer.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/where_infer.c
@@ -38,14 +38,14 @@ int WhereInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **
     return NNACL_INPUT_TENSOR_ERROR;
   }
 
-  SetDataTypeFormat(output, input);
+  const TensorC *input0 = inputs[0];
+  const TensorC *input1 = inputs[1];
+  const TensorC *input2 = inputs[2];
+  SetDataTypeFormat(output, input1);
   if (!InferFlag(inputs, inputs_size)) {
     return NNACL_INFER_INVALID;
   }
 
-  const TensorC *input0 = inputs[0];
-  const TensorC *input1 = inputs[1];
-  const TensorC *input2 = inputs[2];
   int num = GetElementNum(input0);
   int num1 = GetElementNum(input1);
   int num2 = GetElementNum(input2);
@@ -53,6 +53,9 @@ int WhereInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **
   int axisout = 0;
   size_t temp = 0;
   for (size_t j = 0; j < input0->shape_size_; j++) {
+    if (j >= MAX_SHAPE_SIZE) {
+      return NNACL_ERR;
+    }
     if (input0->shape_[j] == input1->shape_[j] && input0->shape_[j] != input2->shape_[j]) {
       axisout = j;
       break;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arg_min_max_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arg_min_max_int8.c
index 3b03088b3a5..6314b6b0d6f 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arg_min_max_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arg_min_max_int8.c
@@ -39,8 +39,8 @@ void DoArgMinMaxQuant(const int8_t *input, int8_t *output, const ArgMinMaxParame
   float bias = -in_quant_arg->zp_ * in_quant_arg->scale_;
   int32_t output_zp = out_quant_arg->zp_;
   for (int i = 0; i < pre_axis_count; ++i) {
-    size_t output_offset = i * after_axis_count;
-    size_t input_offset = output_offset * axis_count;
+    int output_offset = i * after_axis_count;
+    int input_offset = output_offset * axis_count;
     for (int j = 0; j < after_axis_count; ++j) {
       float value = -FLT_MAX;
       if (!param->get_max_) {
@@ -97,8 +97,8 @@ void Int8ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape,
   int32_t output_zp = out_quant_arg->zp_;
   for (int32_t i = 0; i < param->in_strides_[0]; ++i) {
     for (int j = 0; j < in_shape[0]; ++j) {
-      size_t offset = param->in_strides_[0] * j + i;
-      param->arg_elements_[j].index_ = j;
+      int offset = param->in_strides_[0] * j + i;
+      param->arg_elements_[j].index_ = (uint32_t)j;
       param->arg_elements_[j].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
     }
     if (param->get_max_) {
@@ -108,7 +108,7 @@ void Int8ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape,
     }
 
     for (int j = 0; j < param->topk_; ++j) {
-      size_t out_offset = j * param->out_strides_[0] + i;
+      int out_offset = j * param->out_strides_[0] + i;
       float real_out = out_value ? param->arg_elements_[j].data_.f_data_ : param->arg_elements_[j].index_;
       output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
     }
@@ -123,12 +123,12 @@ void Int8ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape,
   int32_t output_zp = out_quant_arg->zp_;
   int in_shape1 = in_shape[1];
   for (int i = 0; i < in_shape[0]; ++i) {
-    size_t in_dim0_offset = i * param->in_strides_[0];
-    size_t out_dim0_offset = i * param->out_strides_[0];
+    int in_dim0_offset = i * param->in_strides_[0];
+    int out_dim0_offset = i * param->out_strides_[0];
     for (int j = 0; j < param->in_strides_[1]; ++j) {
       for (int k = 0; k < in_shape1; ++k) {
-        size_t offset = param->in_strides_[1] * k + in_dim0_offset + j;
-        param->arg_elements_[k].index_ = k;
+        int offset = param->in_strides_[1] * k + in_dim0_offset + j;
+        param->arg_elements_[k].index_ = (size_t)k;
         param->arg_elements_[k].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
       }
       if (param->get_max_) {
@@ -138,7 +138,7 @@ void Int8ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape,
       }
 
       for (int k = 0; k < param->topk_; ++k) {
-        size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1];
+        int out_offset = out_dim0_offset + j + k * param->out_strides_[1];
         float real_out = out_value ? param->arg_elements_[k].data_.f_data_ : param->arg_elements_[k].index_;
         output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
       }
@@ -155,15 +155,15 @@ void Int8ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape,
   int in_shape1 = in_shape[1];
   int in_shape2 = in_shape[2];
   for (int i = 0; i < in_shape[0]; ++i) {
-    size_t in_dim0_offset = i * param->in_strides_[0];
-    size_t out_dim0_offset = i * param->out_strides_[0];
+    int in_dim0_offset = i * param->in_strides_[0];
+    int out_dim0_offset = i * param->out_strides_[0];
     for (int j = 0; j < in_shape1; ++j) {
-      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
-      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
+      int in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
+      int out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
       for (int k = 0; k < param->in_strides_[2]; ++k) {
         for (int l = 0; l < in_shape2; ++l) {
-          size_t offset = param->in_strides_[2] * l + k + in_dim1_offset;
-          param->arg_elements_[l].index_ = l;
+          int offset = param->in_strides_[2] * l + k + in_dim1_offset;
+          param->arg_elements_[l].index_ = (uint32_t)l;
           param->arg_elements_[l].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
         }
         if (param->get_max_) {
@@ -172,7 +172,7 @@ void Int8ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape,
           qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareAscInt8);
         }
         for (int l = 0; l < param->topk_; ++l) {
-          size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2];
+          int out_offset = out_dim1_offset + k + l * param->out_strides_[2];
           float real_out = out_value ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_;
           output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
         }
@@ -191,17 +191,17 @@ void Int8ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape,
   int in_shape2 = in_shape[2];
   int in_shape3 = in_shape[3];
   for (int i = 0; i < in_shape[0]; ++i) {
-    size_t in_dim0_offset = i * param->in_strides_[0];
-    size_t out_dim0_offset = i * param->out_strides_[0];
+    int in_dim0_offset = i * param->in_strides_[0];
+    int out_dim0_offset = i * param->out_strides_[0];
     for (int j = 0; j < in_shape1; ++j) {
-      size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
-      size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
+      int in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset;
+      int out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset;
       for (int k = 0; k < in_shape2; ++k) {
-        size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset;
-        size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset;
+        int in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset;
+        int out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset;
         for (int l = 0; l < in_shape3; ++l) {
-          size_t offset = l + in_dim2_offset;
-          param->arg_elements_[l].index_ = l;
+          int offset = l + in_dim2_offset;
+          param->arg_elements_[l].index_ = (uint32_t)l;
           param->arg_elements_[l].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias;
         }
         if (param->get_max_) {
@@ -210,7 +210,7 @@ void Int8ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape,
           qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareAscInt8);
         }
         for (int l = 0; l < param->topk_; ++l) {
-          size_t out_offset = out_dim2_offset + l;
+          int out_offset = out_dim2_offset + l;
           float real_out = out_value ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_;
           output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp);
         }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arithmetic_self_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arithmetic_self_int8.c
index 181dc9815f3..982d7c4da8a 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arithmetic_self_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arithmetic_self_int8.c
@@ -218,7 +218,7 @@ int16x4_t ClacSumHalfWord(int32x4_t scaled_input, int32x4_t left_shift_out_vec,
 void SquareInt8NEON(const int8_t *input_data, int8_t *output_data, int64_t element_size, ArithSelfQuantArg para,
                     int *index) {
   int32x4_t output_multiplier_vec = vdupq_n_s32(para.output_multiplier_);
-  int32x4_t left_shift_out_vec = vdupq_n_s32(1 << para.shift_left_);
+  int32x4_t left_shift_out_vec = vdupq_n_s32(1 << (size_t)para.shift_left_);
 
   for (; (*index) <= element_size - 8; (*index) += 8) {
     int16x8_t input_val = LoadAndAddOffset(input_data, *index, para.in_args_.zp_);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.c
index e3d6840d2b3..018346effb5 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.c
@@ -18,7 +18,8 @@
 
 void Conv1x1Int8Opt(const int8_t *packed_input, const int8_t *packed_weight, int8_t *dst, const int32_t *input_sum,
                     const int32_t *bias, int row, int col, int deep4, int32_t *left_shift, int32_t *right_shift,
-                    int32_t *multiplier, ConvParameter *conv_param, MATMUL_OPT_DP_FUNC matmul_func, int *filter_zp) {
+                    int32_t *multiplier, ConvParameter *conv_param, MATMUL_OPT_DP_FUNC matmul_func,
+                    const int *filter_zp) {
   int is_per_oc = (int)conv_param->conv_quant_arg_.filter_arg_num_ != 1;
   matmul_func(packed_input, packed_weight, dst, row, col, deep4, conv_param->output_channel_, input_sum, bias,
               left_shift, right_shift, multiplier, conv_param->conv_quant_arg_.output_quant_args_[0].zp_,
@@ -29,7 +30,7 @@ void Conv1x1Int8Opt(const int8_t *packed_input, const int8_t *packed_weight, int
 
 void Conv1x1Int8(const int8_t *packed_input, const int8_t *packed_weight, int8_t *dst, const int32_t *input_sum,
                  const int32_t *bias, int row, int col, int deep16, int32_t *left_shift, int32_t *right_shift,
-                 int32_t *multiplier, ConvParameter *conv_param, int32_t *filter_zp) {
+                 int32_t *multiplier, ConvParameter *conv_param, const int32_t *filter_zp) {
   int is_per_oc = (int)conv_param->conv_quant_arg_.filter_arg_num_ != 1;
   MatmulInt8Opt(packed_input, packed_weight, dst, row, col, deep16, input_sum, bias,
                 conv_param->conv_quant_arg_.out_act_min_[0], conv_param->conv_quant_arg_.out_act_max_[0],
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.h
index f8339b54198..6cc8d0d22e4 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.h
@@ -33,10 +33,11 @@ extern "C" {
 
 void Conv1x1Int8(const int8_t *packed_input, const int8_t *packed_weight, int8_t *dst, const int32_t *input_sum,
                  const int32_t *bias, int row, int col, int deep16, int32_t *left_shift, int32_t *right_shift,
-                 int32_t *multiplier, ConvParameter *conv_param, int32_t *filter_zp);
+                 int32_t *multiplier, ConvParameter *conv_param, const int32_t *filter_zp);
 void Conv1x1Int8Opt(const int8_t *packed_input, const int8_t *packed_weight, int8_t *dst, const int32_t *input_sum,
                     const int32_t *bias, int row, int col, int deep4, int32_t *left_shift, int32_t *right_shift,
-                    int32_t *multiplier, ConvParameter *conv_param, MATMUL_OPT_DP_FUNC matmul_func, int32_t *filter_zp);
+                    int32_t *multiplier, ConvParameter *conv_param, MATMUL_OPT_DP_FUNC matmul_func,
+                    const int32_t *filter_zp);
 
 #ifdef __cplusplus
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv3x3_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv3x3_int8.c
index b2f3da19cd1..6ad20cade63 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv3x3_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv3x3_int8.c
@@ -812,11 +812,11 @@ void Conv3x3Int8InputTransform(const int16_t *input_data, int16_t *trans_input,
       for (int j = real_y_start; j < real_y_end; j++) {
         const int16_t *src = input_data + src_c8_offset + C8NUM * (j * input_width + real_x_start);
         int16_t *dst = tmp_data + C8NUM * (C4NUM * j + real_x_start);
-        memcpy(dst, src, (real_x_end - real_x_start) * C8NUM * sizeof(int16_t));
+        memcpy(dst, src, (size_t)(real_x_end - real_x_start) * C8NUM * sizeof(int16_t));
       }
       // input transform
       int dst_ic8_offset = dst_plane_offset + ic * TILE_NUM * C8NUM;
-      size_t dst_step = ic8 * C8NUM * TILE_NUM;
+      size_t dst_step = (size_t)ic8 * C8NUM * TILE_NUM;
       int16_t *trans_input_ptr = trans_input + dst_ic8_offset;
       Conv3x3Int8InputUnit(tmp_data, trans_input_ptr, dst_step, input_zp);
     }
@@ -826,7 +826,7 @@ void Conv3x3Int8InputTransform(const int16_t *input_data, int16_t *trans_input,
 void Conv3x3Int8Gemm(int32_t *dst, const int16_t *src, const int16_t *weight, int oc, int ic8, size_t real_cal_num) {
   int oc4 = UP_DIV(oc, C4NUM);
 #ifdef ENABLE_ARM
-  IndirectGemmInt16to32_8x4(dst, src, weight, 16, ic8, oc4, oc4 * 4 * 16 * sizeof(int32_t));
+  IndirectGemmInt16to32_8x4(dst, src, weight, 16, ic8, oc4, (size_t)oc4 * 4 * 16 * sizeof(int32_t));
 #else
   const int input_unit_square = 16;
   for (int c = 0; c < oc4; c++) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/deconv_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/deconv_int8.c
index 6b679514ed2..cfa160e0ac5 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/deconv_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/deconv_int8.c
@@ -20,9 +20,9 @@
 int DeConvPostInt8C4(const int32_t *src, const int32_t *bias, int32_t *tmp, int8_t *out, int output_channel,
                      const ConvParameter *conv_param) {
   /* row4x4-major(ih*iw x oc*kh*kw)  ->  row4-major(oh*ow x oc) */
-  size_t input_plane = conv_param->input_w_ * conv_param->input_h_;
-  size_t kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_;
-  size_t output_plane = conv_param->output_w_ * conv_param->output_h_;
+  int input_plane = conv_param->input_w_ * conv_param->input_h_;
+  int kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_;
+  int output_plane = conv_param->output_w_ * conv_param->output_h_;
   int oc4 = UP_DIV(output_channel, C4NUM);
   int in_plane4 = UP_ROUND(input_plane, C4NUM);
 
@@ -38,7 +38,7 @@ int DeConvPostInt8C4(const int32_t *src, const int32_t *bias, int32_t *tmp, int8
   for (int c = 0; c < oc4; c++) {
     int32_t *dst_ptr = tmp + c * output_plane * C4NUM;
     const int32_t *src_ptr = src + c * in_plane4 * kernel_plane * C4NUM;
-    memset(dst_ptr, 0, output_plane * C4NUM * sizeof(int32_t));
+    memset(dst_ptr, 0, (size_t)output_plane * C4NUM * sizeof(int32_t));
 
     for (int ih = 0; ih < conv_param->input_h_; ih++) {
       for (int iw = 0; iw < conv_param->input_w_; iw++) {
@@ -81,7 +81,7 @@ int DeConvPostInt8C4(const int32_t *src, const int32_t *bias, int32_t *tmp, int8
     }       /*ih*/
   }         /*oc*/
 
-  PostFuncInt8C4(tmp, bias, out, output_channel, output_plane, conv_param->output_channel_,
+  PostFuncInt8C4(tmp, bias, out, output_channel, (size_t)output_plane, conv_param->output_channel_,
                  conv_param->conv_quant_arg_.quant_multiplier_[0], conv_param->conv_quant_arg_.left_shift_[0],
                  conv_param->conv_quant_arg_.right_shift_[0], conv_param->conv_quant_arg_.output_quant_args_[0].zp_,
                  conv_param->conv_quant_arg_.out_act_min_[0], conv_param->conv_quant_arg_.out_act_max_[0]);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/fixed_point.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/fixed_point.c
index 7635dfef316..ea6138ff8fa 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/fixed_point.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/fixed_point.c
@@ -71,7 +71,7 @@ int MultiplyByMultiplierAndRightShift(int32_t value, int32_t multiplier, int32_t
   return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(value, multiplier), right_shift);
 }
 
-int FractionsBits(int integer_bits) { return 8 * sizeof(int32_t) - 1 - integer_bits; }
+int FractionsBits(int integer_bits) { return 8 * (int)(sizeof(int32_t)) - 1 - integer_bits; }
 
 int FixedPoint_One(int integer_bits, int fractions_bits) {
   return (integer_bits == 0 ? INT32_MAX : ((1) << (uint32_t)(integer_bits == 0 ? 0 : fractions_bits)));
@@ -129,7 +129,7 @@ int SaturatingRoundingMultiplyByPOT(int32_t x, int exponent) {
   if (exponent > 0) {
     const int min = INT32_MIN;
     const int max = INT32_MAX;
-    const int scalar_int_bits = 8 * sizeof(int32_t);
+    const int scalar_int_bits = 8 * (int)(sizeof(int32_t));
     const int threshold = ((1 << (uint32_t)(scalar_int_bits - 1 - exponent)) - 1);
     const int positive_mask = x > threshold ? BitNot(0) : 0;
     const int negative_mask = x < -threshold ? BitNot(0) : 0;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/hswish_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/hswish_int8.c
index 21d8909195e..3bd9bc88f39 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/hswish_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/hswish_int8.c
@@ -39,7 +39,7 @@ int HSwishInt8(const int8_t *src, int length, int8_t *dst, HswishQuantArg *arg)
     if (arg->relu6_multiplier_exponent < 0) {
       relu6_value = RoundingDivideByPOT(relu6_value, -arg->relu6_multiplier_exponent);
     }
-    relu6_value = (relu6_value + (1 << 15)) >> 1;
+    relu6_value = (size_t)(relu6_value + (1 << 15)) >> 1;
     const int16_t preshift_output_value =
       SaturatingRoundingDoublingHighMulInt16(relu6_value, input_value_on_preshift_output_scale);
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.c
index 3b9a893707d..ac2c3b04d13 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.c
@@ -104,7 +104,7 @@ void RowMajor2Row16x4MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row,
 
   for (int ri = 0; ri < row_4div; ri += C4NUM) {
     for (int ci = 0; ci < col_16div; ci += C16NUM) {
-      size_t col_offset = col;
+      size_t col_offset = (size_t)col;
       int8_t *src_c = src_r + ci;
       int8_t *dst_c = dst_r + ci * C4NUM;
 #ifdef ENABLE_ARM64
@@ -207,7 +207,7 @@ void MatMulInt8_4x2_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row,
       int c2div = c / C2NUM, c2mod = c % C2NUM;
       size_t ci = r * stride + c;
       int32_t value = 0;
-      for (int d = 0; d < deep_16; d++) {
+      for (int d = 0; d < (int)deep_16; d++) {
         int d16div = d / C16NUM, d16mod = d % C16NUM;
         size_t ai = r4div * deep_16 * C4NUM + d16div * C4NUM * C16NUM + r4mod * C16NUM + d16mod;
         size_t bi = c2div * deep_16 * C2NUM + d16div * C2NUM * C16NUM + c2mod * C16NUM + d16mod;
@@ -269,9 +269,9 @@ void MatmulInt8Opt(const int8_t *a, const int8_t *b, int8_t *dst, int row, int c
 #endif
 
 void MatMulInt8_8x8_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                      size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
-                      int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi,
-                      size_t per_channel) {
+                      size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift,
+                      const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini,
+                      int32_t maxi, size_t per_channel) {
   /*  row8x4-major * row4x8-major => (int8)row-major  */
   for (int r = 0; r < row; r++) {
     for (int c = 0; c < col; c++) {
@@ -279,7 +279,7 @@ void MatMulInt8_8x8_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row,
       int c8div = c / C8NUM, c8mod = c % C8NUM;
       size_t ci = r * stride + c;
       int32_t value = 0;
-      for (int d = 0; d < deep_4; d++) {
+      for (int d = 0; d < (int)deep_4; d++) {
         int d4div = d / C4NUM, d4mod = d % C4NUM;
         size_t ai = r8div * deep_4 * C8NUM + d4div * C8NUM * C4NUM + r8mod * C4NUM + d4mod;
         size_t bi = c8div * deep_4 * C8NUM + d4div * C8NUM * C4NUM + c8mod * C4NUM + d4mod;
@@ -302,9 +302,9 @@ void MatMulInt8_8x8_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row,
 }
 
 void MatMulInt8_4x16_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                       size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
-                       int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi,
-                       size_t per_channel, int32_t *filter_zp) {
+                       size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift,
+                       const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini,
+                       int32_t maxi, size_t per_channel, const int32_t *filter_zp) {
   /*  row4x4-major * row4x16-major => (int8)row-major  */
   for (int r = 0; r < row; r++) {
     for (int c = 0; c < col; c++) {
@@ -312,7 +312,7 @@ void MatMulInt8_4x16_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row
       int c16div = c / C16NUM, c16mod = c % C16NUM;
       size_t ci = r * stride + c;
       int32_t value = 0;
-      for (int d = 0; d < deep_4; d++) {
+      for (int d = 0; d < (int)deep_4; d++) {
         int d4div = d / C4NUM, d4mod = d % C4NUM;
         size_t ai = r4div * deep_4 * C4NUM + d4div * C4NUM * C4NUM + r4mod * C4NUM + d4mod;
         size_t bi = c16div * deep_4 * C16NUM + d4div * C16NUM * C4NUM + c16mod * C4NUM + d4mod;
@@ -453,7 +453,7 @@ void PackInput4x4AndInputSumPert(const int8_t *src_input, int8_t *packed_input,
 #else
     int32_t tmp_sum_value[4] = {0};
     for (int ici = 0; ici < ic_4div; ici += C4NUM) {
-      for (int i = 0; i < C4NUM; i++) {
+      for (size_t i = 0; i < C4NUM; i++) {
         tmp_sum_value[i] += src_ic[0 + i * input_channel];
         tmp_sum_value[i] += src_ic[1 + i * input_channel];
         tmp_sum_value[i] += src_ic[2 + i * input_channel];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.h
index f8fa9a85d72..87424e20098 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.h
@@ -42,9 +42,9 @@ void MatmulInt8Opt(const int8_t *a, const int8_t *b, int8_t *dst, int row, int c
 /* optimize conv */
 void RowMajor2Row8x4MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, int col);
 void MatMulInt8_8x8_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                      size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
-                      int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi,
-                      size_t per_channel);
+                      size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift,
+                      const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini,
+                      int32_t maxi, size_t per_channel);
 
 /* 4x16 16x2 -> 4x2 */
 /* arm32 conv1x1 */
@@ -61,9 +61,9 @@ void RowMajor2Row4x16MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row,
 void PackInput4x4AndInputSumPert(const int8_t *src_input, int8_t *packed_input, int32_t *input_sum,
                                  size_t input_channel, size_t plane_size, int32_t filter_zp);
 void MatMulInt8_4x16_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                       size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
-                       int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi,
-                       size_t per_channel, int32_t *filter_zp);
+                       size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift,
+                       const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini,
+                       int32_t maxi, size_t per_channel, const int32_t *filter_zp);
 
 #ifdef ENABLE_ARM64
 void MatmulInt8Neon64(const int8_t *a, const int8_t *b, int8_t *dst, int row4, int col4, int deep16, const int *a_sums,
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.c
index fbda674d0cb..4ef53e8db1b 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.c
@@ -27,10 +27,10 @@ int16x4_t ClacSumHalfWordMul(int16x4_t scaled_input0, int16x4_t scaled_input1, i
   return vqmovn_s32(raw_sum);
 }
 
-void MulInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
-                 MulQuantArg *quant_arg, int *index) {
+void MulInt8NEON(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
+                 const MulQuantArg *quant_arg, int *index) {
   int32x4_t output_multiplier_vec = vdupq_n_s32(quant_arg->output_multiplier_);
-  int32x4_t left_shift_out_vec = vdupq_n_s32(1 << quant_arg->shift_left_);
+  int32x4_t left_shift_out_vec = vdupq_n_s32(1 << (size_t)quant_arg->shift_left_);
   int32x4_t right_shift_out_vec = vdupq_n_s32(-quant_arg->shift_right_);
   int16x8_t out_zp_vec = vdupq_n_s16(quant_arg->out_quant_arg_.zp_);
   int8x16_t out_min_vec = vdupq_n_s8(quant_arg->output_activation_min_);
@@ -104,8 +104,8 @@ void MulInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data,
 }
 #endif
 
-void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int depth, int64_t real_dst_count,
-             bool input1_broad, MulQuantArg *quant_arg) {
+void FastMul(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int depth,
+             int64_t real_dst_count, bool input1_broad, const MulQuantArg *quant_arg) {
   // input0 need broadcast
   int32_t zp1 = quant_arg->in_quant_args_[0].zp_;
   int32_t zp2 = quant_arg->in_quant_args_[1].zp_;
@@ -215,8 +215,8 @@ void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int
   return;
 }
 
-void Mul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
-         MulQuantArg *quant_arg) {
+void Mul(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
+         const MulQuantArg *quant_arg) {
   int index = 0;
 #ifdef ENABLE_NEON
   MulInt8NEON(input0_data, input1_data, output_data, real_dst_count, quant_arg, &index);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.h
index f19d8e40f84..a02363a1e67 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.h
@@ -28,9 +28,10 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-void Mul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, MulQuantArg *quant_arg);
-void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int depth, int64_t real_dst_count,
-             bool input1_broad, MulQuantArg *quant_arg);
+void Mul(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
+         const MulQuantArg *quant_arg);
+void FastMul(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int depth,
+             int64_t real_dst_count, bool input1_broad, const MulQuantArg *quant_arg);
 #ifdef __cplusplus
 }
 #endif
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.c
index cd5ffe72ce2..ea1b4c45731 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.c
@@ -849,7 +849,8 @@ void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, ConvPara
   }
 }
 
-void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight_data, ConvParameter *conv_param) {
+void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight_data,
+                        const ConvParameter *conv_param) {
   // origin weight format : ohwi
   int input_channel = conv_param->input_channel_;
   int ic8 = input_channel / C8NUM * C8NUM;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.h
index e63127b066d..ecac124e689 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.h
@@ -40,7 +40,7 @@ void PackInputSum16x4Int8(const int8_t *input, int32_t *input_sum, const int32_t
                           const ConvParameter *conv_param);
 void PackInputSum16x4PerLayer(const int8_t *src, int32_t *dst, int32_t filter_zp, size_t row4, size_t col16);
 void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, ConvParameter *conv_param);
-void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight_data, ConvParameter *conv_param);
+void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight_data, const ConvParameter *conv_param);
 void Im2ColPackUnitInt8Opt(const int8_t *input_data, int8_t *packed_input, int8_t *matmul_input, int real_cal_num,
                            int block_index, const int32_t *filter_zp, int32_t *input_sum,
                            const ConvParameter *conv_param, bool per_channel, bool is_optimize);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pad_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pad_int8.c
index 10f648882a7..e7c0c0eaad6 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pad_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pad_int8.c
@@ -24,9 +24,9 @@ int PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dim
   for (int n = 0; n < in_dims[0]; n++) {
     for (int h = tid; h < in_dims[1]; h += thread_num) {
       for (int w = 0; w < in_dims[2]; w++) {
-        const int8_t *in = in_data + offset(in_dims, n, h, w, 0);
-        int8_t *out = out_data + offset(out_dims, n + paddings[0], h + paddings[2], w + paddings[4], paddings[6]);
-        memcpy(out, in, copy_size * sizeof(int8_t));
+        const int8_t *in = in_data + Offset(in_dims, n, h, w, 0);
+        int8_t *out = out_data + Offset(out_dims, n + paddings[0], h + paddings[2], w + paddings[4], paddings[6]);
+        memcpy(out, in, (size_t)copy_size * sizeof(int8_t));
       }
     }
   }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/quant_dtype_cast_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/quant_dtype_cast_int8.c
index 0ec6fc72f52..6c0620a6350 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/quant_dtype_cast_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/quant_dtype_cast_int8.c
@@ -112,7 +112,7 @@ int UInt8ToInt8(const uint8_t *real_values, int8_t *quant_values, int size) {
   }
 
   for (int i = 0; i < size; ++i) {
-    int temp = real_values[i] - 128;
+    int temp = (int)real_values[i] - 128;
     if (temp > 127) {
       quant_values[i] = 127;
     } else if (temp < -128) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/resize_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/resize_int8.c
index 31dd3e92b1d..1e7cb91c2a9 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/resize_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/resize_int8.c
@@ -173,8 +173,8 @@ int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_dat
       for (x = 0; x < output_shape[2]; x++) {
         int input_x = 0;
         ComputeNearestNeighborInt(x, in_w, new_width, align_corners, &input_x);
-        int in_offset = offset(input_shape, batch, input_y, input_x, 0);
-        int out_offset = offset(output_shape, batch, y, x, 0);
+        int in_offset = Offset(input_shape, batch, input_y, input_x, 0);
+        int out_offset = Offset(output_shape, batch, y, x, 0);
         memcpy(output_data + out_offset, input_data + in_offset, c * sizeof(int8_t));
       }
     }
@@ -214,8 +214,8 @@ int ResizeNearestNeighborInt8(const int8_t *input_data, int8_t *output_data, con
         int input_x = 0;
         ComputeNearestNeighborInt(x, in_w, new_width, align_corners, &input_x);
         for (c = 0; c < output_shape[3]; c++) {
-          int in_offset = offset(input_shape, batch, input_y, input_x, c);
-          int out_offset = offset(output_shape, batch, y, x, c);
+          int in_offset = Offset(input_shape, batch, input_y, input_x, c);
+          int out_offset = Offset(output_shape, batch, y, x, c);
 
           int32_t out_value = MultiplyByQuantizedMultiplier(
                                 input_data[in_offset] - quant_in->zp_, multiplier->multiplier_,
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/scale_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/scale_int8.c
index bb33c643f17..e007e6a7754 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/scale_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/scale_int8.c
@@ -34,8 +34,8 @@ int16x4_t ClacSumHalfWordMul3(int32x4_t scaled_input0, int32x4_t scaled_input1,
                               const ScaleParameter *scale_param) {
   int32x4_t output_multiplier_vec = vdupq_n_s32(scale_param->scale_mul_arg_.multiplier_);
   int32x4_t output_multiplier_vec2 = vdupq_n_s32(scale_param->offset_mul_arg_.multiplier_);
-  int32x4_t left_shift_out_vec = vdupq_n_s32(1 << scale_param->scale_mul_arg_.left_shift_);
-  int32x4_t left_shift_out_vec2 = vdupq_n_s32(1 << scale_param->offset_mul_arg_.left_shift_);
+  int32x4_t left_shift_out_vec = vdupq_n_s32(1 << (size_t)(scale_param->scale_mul_arg_.left_shift_));
+  int32x4_t left_shift_out_vec2 = vdupq_n_s32(1 << (size_t)(scale_param->offset_mul_arg_.left_shift_));
   int32x4_t input_scale = vmulq_s32(scaled_input0, scaled_input1);
   int32x4_t raw_sum = RoundingDivideByPOTInt32x4(
     SaturatingRoundingDoublingHighMulInt32x4(vmulq_s32(input_scale, left_shift_out_vec), output_multiplier_vec),
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/sub_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/sub_int8.c
index ace1417b287..64a62152168 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/sub_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/sub_int8.c
@@ -24,7 +24,7 @@
 #ifdef ENABLE_NEON
 
 int16x4_t DoClacSumHalfWord(int32x4_t scaled_input0, int32x4_t scaled_input1, int32x4_t left_shift_out_vec,
-                            int32x4_t output_multiplier_vec, SubQuantArg *para) {
+                            int32x4_t output_multiplier_vec, const SubQuantArg *para) {
   int32x4_t raw_data = vsubq_s32(scaled_input0, scaled_input1);
 
   raw_data = RoundingDivideByPOTInt32x4(vqrdmulhq_s32(vmulq_s32(raw_data, left_shift_out_vec), output_multiplier_vec),
@@ -35,14 +35,14 @@ int16x4_t DoClacSumHalfWord(int32x4_t scaled_input0, int32x4_t scaled_input1, in
   return vqmovn_s32(raw_data);
 }
 
-void SubInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
-                 SubQuantArg *para, int *index) {
+void SubInt8NEON(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int64_t real_dst_count,
+                 const SubQuantArg *para, int *index) {
   int32x4_t left_shift_result0_vec = vdupq_n_s32(para->left_shift_result0_);
   int32x4_t left_shift_result1_vec = vdupq_n_s32(para->left_shift_result1_);
   int32x4_t input0_multiplier_vec = vdupq_n_s32(para->input0_multiplier_);
   int32x4_t input1_multiplier_vec = vdupq_n_s32(para->input1_multiplier_);
   int32x4_t output_multiplier_vec = vdupq_n_s32(para->output_multiplier_);
-  int32x4_t left_shift_out_vec = vdupq_n_s32((1 << para->left_shift_out_));
+  int32x4_t left_shift_out_vec = vdupq_n_s32((1 << (size_t)para->left_shift_out_));
   int32x4_t right_shift0_vec = vdupq_n_s32(-para->right_shift0_);
   int32x4_t right_shift1_vec = vdupq_n_s32(-para->right_shift1_);
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/transpose_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/transpose_int8.c
index bf3fd14d2c9..873c11857cf 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/transpose_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/transpose_int8.c
@@ -226,16 +226,16 @@ void TransposeDimsInt8(const int8_t *in_data, int8_t *out_data, const int *outpu
   const int *strides = transpose_param->strides_;
   const int *out_strides = transpose_param->out_strides_;
   int num_axes = transpose_param->num_axes_;
-  size_t data_size = (*out_strides) * output_shape[0];
+  size_t data_size = (size_t)((*out_strides) * output_shape[0]);
   size_t offset_size = UP_DIV(data_size, thread_num);
   size_t task_offset = offset_size * task_id;
-  int count = data_size - task_offset;
-  if (count <= 0) {
+  size_t count = data_size - task_offset;
+  if (data_size < task_offset) {
     return;
   }
   count = MSMIN(offset_size, count);
   for (size_t idx = task_offset; idx < task_offset + count; ++idx) {
-    int pos = idx;
+    int pos = (int)idx;
     int output_idx = 0;
     int input_idx = 0;
     for (int i = 0; i < num_axes; ++i) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/unsqueeze_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/unsqueeze_int8.c
index a46a3dfe864..e5f8c0aab59 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/unsqueeze_int8.c
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/unsqueeze_int8.c
@@ -24,7 +24,7 @@ int Int8Unsqueeze(const int8_t *input_ptr, int8_t *output_ptr, UnSqueezeParamete
   float input_scale = para_->quant_arg.in_quant_args_.scale_;
   int8_t input_zp = para_->quant_arg.in_quant_args_.zp_;
 
-  for (int i = task_id; i < data_size; i += para_->thread_count_) {
+  for (int i = task_id; i < (int)data_size; i += para_->thread_count_) {
     output_ptr[i] = output_zp + round(1 / output_scale * input_scale * (input_ptr[i] - input_zp));
   }
   return 0;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/matmul_parameter.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/matmul_parameter.h
index a513f4608b6..d11feea2207 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/matmul_parameter.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/matmul_parameter.h
@@ -23,14 +23,15 @@ typedef void (*MATMUL_OPT_R4_FUNC)(const int8_t *a, const int8_t *b, int *dst, i
                                    const int *input_sum, const int *bias);
 
 typedef void (*MATMUL_OPT_R_FUNC)(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                                  size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
-                                  int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
-                                  int32_t maxi, size_t per_channel);
+                                  size_t stride, const int32_t *input_sum, const int32_t *bias,
+                                  const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
+                                  int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel);
 
 typedef void (*MATMUL_OPT_DP_FUNC)(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                                   size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
-                                   int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
-                                   int32_t maxi, size_t per_channel, int *filter_zp);
+                                   size_t stride, const int32_t *input_sum, const int32_t *bias,
+                                   const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
+                                   int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel,
+                                   const int *filter_zp);
 
 typedef enum OutType { OutType_C8 = 0, OutType_Nhwc = 1, OutType_TileC8 = 2 } OutType;
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/op_base.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/op_base.h
index 1891dd7ff00..52241ba13c8 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/op_base.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/op_base.h
@@ -61,6 +61,8 @@
 #define DIMENSION_6D 6
 #define DIMENSION_7D 7
 #define DIMENSION_8D 8
+#define DIMENSION_10D 10
+#define DIMENSION_11D 11
 #define kInputIndex 0
 #define kWeightIndex 1
 #define kBiasIndex 2
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/pad_parameter.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/pad_parameter.h
index a6f2a1b5e41..c741599512c 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/pad_parameter.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/pad_parameter.h
@@ -18,8 +18,8 @@
 
 #include "nnacl/op_base.h"
 
-#define MAX_PAD_SIZE 8
-#define DEFAULT_PAD_NDIMS 4
+#define MAX_PAD_SIZE 12
+#define DEFAULT_PAD_NDIMS 6
 
 typedef struct PadQuantArg {
   QuantArg *in_quant_args_;
@@ -30,13 +30,13 @@ typedef struct PadQuantArg {
 typedef struct PadParameter {
   // Primitive parameter
   OpParameter op_parameter_;
-  int paddings_[MAX_SHAPE_SIZE];
+  int paddings_[MAX_PAD_SIZE];
   int pad_mode_;
   float constant_value_;
   // shape correlative
   int padding_length;
   // other parameter
-  int in_strides[COMM_SHAPE_SIZE];
+  int in_strides[DEFAULT_PAD_NDIMS];
   int out_strides[DEFAULT_PAD_NDIMS];
   int mirror_offset_;
   PadQuantArg pad_quant_arg_;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.cc
index e2074c70a4a..b3f879dde5e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.cc
@@ -20,6 +20,11 @@ namespace mindspore {
 namespace kernel {
 namespace ps {
 void PServerKernel::Shard(std::vector<size_t> *shape, int axis) {
+  MS_EXCEPTION_IF_NULL(shape);
+  if ((*shape).size() <= IntToSize(axis)) {
+    MS_LOG(EXCEPTION) << "Shape size is invalid.";
+    return;
+  }
   (*shape)[IntToSize(axis)] =
     LongToSize(Util::LocalShard(SizeToLong((*shape)[IntToSize(axis)]), SizeToLong(rank_id_), SizeToLong(pserver_num_)));
 }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/pyfunc/py_func_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/pyfunc/py_func_cpu_kernel.cc
index c790b4d926c..0d81da83a2e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/pyfunc/py_func_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/pyfunc/py_func_cpu_kernel.cc
@@ -135,8 +135,7 @@ void ScalarToRawMemory(const py::object &obj, const TypePtr &type, const Address
 void ArrayToRawMemory(const py::array &array, const AddressPtr &address) {
   if (static_cast<unsigned int>(array.flags()) & pybind11::detail::npy_api::NPY_ARRAY_C_CONTIGUOUS_) {
     const py::buffer_info &buf_info = array.request();
-    CHECK_RET_WITH_EXCEPT(memcpy_s(address->addr, address->size, buf_info.ptr, buf_info.size * buf_info.itemsize), EOK,
-                          "memcpy failed.");
+    CHECK_RET_WITH_EXCEPT(memcpy_s(address->addr, address->size, buf_info.ptr, buf_info.size), EOK, "memcpy failed.");
   } else {
     // Transform numpy array to row major buffer.
     Py_buffer pybuf;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.cc
index 5ba93e43fcb..2a987be45d2 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.cc
@@ -39,7 +39,7 @@ void SearchSortedCPUKernel<S, T>::InitKernel(const CNodePtr &kernel_node) {
 template <typename S, typename T>
 const S *SearchSortedCPUKernel<S, T>::CustomizedLowerBound(const S *seq_start, const S *seq_end, const S key) {
   while (seq_start < seq_end) {
-    const S *mid = seq_start + ((seq_end - seq_start) >> 1);
+    const S *mid = seq_start + ((seq_end - seq_start) / 2);
     if (!(key <= *mid)) {
       seq_start = mid + 1;
     } else {
@@ -61,11 +61,12 @@ bool SearchSortedCPUKernel<S, T>::Launch(const std::vector<kernel::AddressPtr> &
   size_t seq_dim = sequence_shape_.size();
   size_t search_repeat = values_shape_.back();
 
-  auto task = [&](size_t start, size_t end) {
+  auto task = [this, &sequence, &values, &output, seq_dim, search_repeat](size_t start, size_t end) {
     for (size_t i = start; i < end; i++) {
       auto seq_start = (seq_dim == 1) ? sequence : sequence + (i / search_repeat) * search_len;
-      output[i] = right_ ? std::upper_bound(seq_start, seq_start + search_len, values[i]) - seq_start
-                         : CustomizedLowerBound(seq_start, seq_start + search_len, values[i]) - seq_start;
+      auto result = right_ ? std::upper_bound(seq_start, seq_start + search_len, values[i]) - seq_start
+                           : CustomizedLowerBound(seq_start, seq_start + search_len, values[i]) - seq_start;
+      output[i] = static_cast<T>(result);
     }
   };
   CPUKernelUtils::ParallelFor(task, elem_num);
@@ -92,8 +93,8 @@ void SearchSortedCPUKernel<S, T>::CheckParam(const std::vector<AddressPtr> &inpu
   }
 
   auto sequence = reinterpret_cast<S *>(inputs[0]->addr);
-  size_t list_count = accumulate(sequence_shape_.begin(), sequence_shape_.end() - 1, 1, std::multiplies<int>());
-  auto task = [&](size_t start, size_t end) {
+  int list_count = accumulate(sequence_shape_.begin(), sequence_shape_.end() - 1, 1, std::multiplies<int>());
+  auto task = [this, &sequence](size_t start, size_t end) {
     for (size_t i = start; i < end; i++) {
       for (size_t j = 0; j < search_len - 1; j++) {
         if (sequence[i * search_len + j] > sequence[i * search_len + j + 1]) {
@@ -104,6 +105,5 @@ void SearchSortedCPUKernel<S, T>::CheckParam(const std::vector<AddressPtr> &inpu
   };
   CPUKernelUtils::ParallelFor(task, list_count);
 }
-
 }  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.h
index 87cea83a5ef..9333e72dc96 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.h
@@ -42,7 +42,7 @@ class SearchSortedCPUKernel : public CPUKernel {
   std::vector<size_t> sequence_shape_;
   std::vector<size_t> values_shape_;
   std::vector<size_t> output_shape_;
-  size_t search_len;
+  size_t search_len{0};
 };
 
 MS_REG_CPU_KERNEL_T_S(
@@ -104,8 +104,6 @@ MS_REG_CPU_KERNEL_T_S(
   SearchSorted,
   KernelAttr().AddInputAttr(kNumberTypeInt8).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt64),
   SearchSortedCPUKernel, int8_t, int64_t);
-
 }  // namespace kernel
 }  // namespace mindspore
-
 #endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SEARCHSORTED_CPU_KERNEL_H_
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.cc
index 40814707d1d..32606a9a4e7 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.cc
@@ -35,12 +35,12 @@ void SGDCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
 
 template <typename T>
 void SGDCPUKernel<T>::CheckParam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
-  // inputs: params, grad, lr, accum, momentum, stat
+  // inputs: param, grad, lr, accum, momentum, stat
   if (inputs.size() != kInputSize) {
     MS_LOG(EXCEPTION) << "Input number is " << inputs.size() << ", but SGD needs 6 inputs.";
   }
 
-  // output: param
+  // output: output_param
   if (outputs.size() != kOutputSize) {
     MS_LOG(EXCEPTION) << "Output number is " << outputs.size() << ", but SGD needs 1 outputs.";
   }
@@ -60,18 +60,20 @@ bool SGDCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs, const std::v
   auto output_param = reinterpret_cast<T *>(outputs[0]->addr);
   size_t elem_num = inputs[0]->size / sizeof(T);
 
-  auto task = [&](size_t start, size_t end) {
+  auto task = [this, &param, &grad, &lr, &accum, &momentum, &stat, &output_param](size_t start, size_t end) {
+    T ZERO = static_cast<T>(0);
+    T ONE = static_cast<T>(1);
     for (size_t i = start; i < end; i++) {
       T grad_new = grad[i];
-      if (weight_decay_ > 0) {
+      if (weight_decay_ > static_cast<float>(0.0)) {
         grad_new += param[i] * static_cast<T>(weight_decay_);
       }
-      if (momentum[0] > static_cast<T>(0)) {
-        if (stat[i] > static_cast<T>(0)) {
+      if (momentum[0] > ZERO) {
+        if (stat[i] > ZERO) {
           accum[i] = grad_new;
-          stat[i] = static_cast<T>(0);
+          stat[i] = ZERO;
         } else {
-          accum[i] = accum[i] * momentum[0] + static_cast<T>(1.0 - dampening_) * grad_new;
+          accum[i] = accum[i] * momentum[0] + (ONE - static_cast<T>(dampening_)) * grad_new;
         }
         if (nesterov_) {
           grad_new += accum[i] * momentum[0];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.h
index 93f25d1b657..95fb461f440 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.h
@@ -36,8 +36,8 @@ class SGDCPUKernel : public CPUKernel {
 
  private:
   static void CheckParam(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
-  float dampening_;
-  float weight_decay_;
+  float dampening_{0.0};
+  float weight_decay_{0.0};
   bool nesterov_{true};
 };
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/sort_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/sort_cpu_kernel.cc
index 837ddeca56c..10b605d4ad0 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/sort_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sort_cpu_kernel.cc
@@ -24,12 +24,12 @@ template <typename T>
 void SortCpuKernel<T>::InitKernel(const CNodePtr &kernel_node) {
   size_t input_count = AnfAlgo::GetInputTensorNum(kernel_node);
   if (input_count != 1) {
-    MS_LOG(EXCEPTION) << input_count << " inputs were provided, but SortCpuKernel expects 1.";
+    MS_LOG(EXCEPTION) << input_count << " inputs were provided, but Sort expects 1.";
   }
 
   size_t output_count = AnfAlgo::GetOutputTensorNum(kernel_node);
   if (output_count != 2) {
-    MS_LOG(EXCEPTION) << "Number of outputs is " << output_count << ", but should be 2 for SortCpuKernel.";
+    MS_LOG(EXCEPTION) << "Number of outputs is " << output_count << ", but should be 2 for Sort.";
   }
 
   auto x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
@@ -64,7 +64,7 @@ template <typename T>
 bool SortCpuKernel<T>::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
                               const std::vector<AddressPtr> &outputs) {
   if (inputs.size() != 1 || outputs.size() != 2) {
-    MS_LOG(EXCEPTION) << "TopK needs 1 input and 2 outputs, but get inputs: " << inputs.size()
+    MS_LOG(EXCEPTION) << "Sort needs 1 input and 2 outputs, but get inputs: " << inputs.size()
                       << "outputs: " << outputs.size();
   }
   if (inputs[0]->size != outer_size_ * axis_size_ * inner_size_ * sizeof(T)) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.cc
index 8f1dc225320..338ff4b405c 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.cc
@@ -64,7 +64,8 @@ void SplitCPUKernel<T>::LaunchSplit(T *input, T **output, size_t size) {
     param.split_count_ *= input_shape_[i];
   }
   auto task = [&](size_t start, size_t end) {
-    (void)DoSplit(input, reinterpret_cast<void **>(output), &input_shape_[0], start, end - start, &param, sizeof(T));
+    (void)DoSplit(input, reinterpret_cast<void **>(output), &input_shape_[0], SizeToInt(start), SizeToInt(end - start),
+                  &param, SizeToInt(sizeof(T)));
   };
   CPUKernelUtils::ParallelForAutoSearch(task, param.split_count_ * param.num_split_, &parallel_search_info_);
   return;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.cc
index 4dba82b928b..a142c9ab695 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.cc
@@ -46,8 +46,8 @@ void TransposeCPUFwdKernel::InitKernel(const CNodePtr &kernel_node) {
   transpose_param_.strides_[num_axes - 1] = 1;
   transpose_param_.out_strides_[num_axes - 1] = 1;
   for (int i = num_axes - 2; i >= 0; i--) {
-    transpose_param_.strides_[i] = input_shape_[i + 1] * transpose_param_.strides_[i + 1];
-    transpose_param_.out_strides_[i] = output_shape_[i + 1] * transpose_param_.out_strides_[i + 1];
+    transpose_param_.strides_[i] = SizeToInt(input_shape_[i + 1]) * transpose_param_.strides_[i + 1];
+    transpose_param_.out_strides_[i] = SizeToInt(output_shape_[i + 1]) * transpose_param_.out_strides_[i + 1];
   }
   launch_map_[kNumberTypeInt8] = &TransposeCPUFwdKernel::LaunchKernel<int8_t>;
   launch_map_[kNumberTypeInt16] = &TransposeCPUFwdKernel::LaunchKernel<int16_t>;
@@ -87,7 +87,7 @@ void TransposeCPUFwdKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
   }
   size_t data_count = (inputs[0]->size) / sizeof(T);
   if (axes_.size() <= DIMENSION_6D && data_count < MAX_TRANSPOSE_SERIAL_SIZE) {
-    int res = NNACL_ERR;
+    int res = static_cast<int>(NNACL_ERR);
     if constexpr (std::is_same_v<T, int8_t>) {
       res = DoTransposeInt8(input_addr, output_addr, output_shape, &transpose_param_);
     } else if constexpr (std::is_same_v<T, int16_t>) {
@@ -121,7 +121,7 @@ template <typename T>
 void TransposeCPUFwdKernel::ParallelRun(const T *input_addr, T *output_addr, const int *output_shape, size_t count) {
   auto max_thread_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum();
   const float block_size = 128.0;
-  size_t thread_num = count < block_size * max_thread_num ? std::ceil(count / block_size) : max_thread_num;
+  size_t thread_num = count < block_size * max_thread_num ? FloatToSize(std::ceil(count / block_size)) : max_thread_num;
   std::vector<common::Task> tasks;
   std::function<void(const T *, T *, const int *, TransposeParameter *, int, int)> TransposeDims;
 
@@ -147,13 +147,13 @@ void TransposeCPUFwdKernel::ParallelRun(const T *input_addr, T *output_addr, con
     TransposeDims = &TransposeDimsBool;
   }
   for (int task_id = 0; task_id < SizeToInt(thread_num); ++task_id) {
-    auto task = [&, task_id, thread_num]() {
+    auto task = [this, &TransposeDims, &input_addr, &output_addr, &output_shape, task_id, thread_num]() {
       TransposeDims(input_addr, output_addr, output_shape, &transpose_param_, task_id, SizeToInt(thread_num));
       return common::SUCCESS;
     };
-    tasks.emplace_back(task);
+    (void)tasks.emplace_back(task);
   }
-  common::ThreadPool::GetInstance().SyncRun(tasks);
+  (void)common::ThreadPool::GetInstance().SyncRun(tasks);
 }
 }  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/unpack_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/unpack_cpu_kernel.cc
index 39c113c26dd..d8ce599babc 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/cpu/unpack_cpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/unpack_cpu_kernel.cc
@@ -29,18 +29,18 @@ void UnpackCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
   }
   output_num_ = LongToSize(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "num"));
   unstack_param_.num_ = SizeToInt(output_num_);
-  unstack_param_.axis_ = LongToSize(axis_tmp);
+  unstack_param_.axis_ = LongToInt(axis_tmp);
   unstack_param_.pre_dims_ = 1;
   unstack_param_.axis_dim_ = 1;
   unstack_param_.after_dims_ = 1;
 
   for (size_t i = 0; i < input_shape.size(); i++) {
-    if (static_cast<int>(i) < unstack_param_.axis_) {
-      unstack_param_.pre_dims_ *= input_shape[i];
-    } else if (static_cast<int>(i) > unstack_param_.axis_) {
-      unstack_param_.after_dims_ *= input_shape[i];
+    if (i < IntToSize(unstack_param_.axis_)) {
+      unstack_param_.pre_dims_ *= SizeToInt(input_shape[i]);
+    } else if (i > IntToSize(unstack_param_.axis_)) {
+      unstack_param_.after_dims_ *= SizeToInt(input_shape[i]);
     } else {
-      unstack_param_.axis_dim_ = input_shape[i];
+      unstack_param_.axis_dim_ = SizeToInt(input_shape[i]);
     }
   }
   dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h
index f38e1cace11..496c59e1392 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h
@@ -49,7 +49,7 @@ class ArrayReduceGpuKernel : public GpuKernel {
     }
     T *input_addr = GetDeviceAddress<T>(inputs, 0);
     T *output_addr = GetDeviceAddress<T>(outputs, 0);
-    T *workspace_addr = GetDeviceAddress<T>(workspace, 0);
+    T *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0);
 
     T alpha = static_cast<T>(1.0f);
     T beta = static_cast<T>(0.0f);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/cast_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/cast_gpu_kernel.h
index 78dc29941e5..d91c0514091 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/cast_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/cast_gpu_kernel.h
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2020-2021 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -36,10 +36,18 @@ class CastGpuKernel : public GpuKernel {
 
   bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
               const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
-    S *input_addr = GetDeviceAddress<S>(inputs, 0);
-    T *output_addr = GetDeviceAddress<T>(outputs, 0);
+    S *input_addr = GetPossiblyNullDeviceAddress<S>(inputs, 0);
+    T *output_addr = GetPossiblyNullDeviceAddress<T>(outputs, 0);
+
+    if (input_addr == nullptr && output_addr == nullptr) {
+      return true;
+    } else if (input_addr != nullptr && output_addr != nullptr) {
+      Cast(input_size_, input_addr, output_addr, reinterpret_cast<cudaStream_t>(stream_ptr));
+    } else {
+      MS_LOG(EXCEPTION)
+        << "The input and output device addresses for CastGpuKernel should be both null or both not null.";
+    }
 
-    Cast(input_size_, input_addr, output_addr, reinterpret_cast<cudaStream_t>(stream_ptr));
     return true;
   }
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h
index e261fcdfa00..0331cd85a20 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h
@@ -43,11 +43,20 @@ class ConcatV2GpuFwdKernel : public GpuKernel {
 
   bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
               const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
+    if (input_num_ == 0) {
+      return true;
+    }
+
     T *output = GetDeviceAddress<T>(outputs, 0);
     T **inputs_device = GetDeviceAddress<T *>(workspace, 0);
     int *len_axis_device = GetDeviceAddress<int>(workspace, 1);
+    int current_dim = 0;
     for (size_t i = 0; i < inputs.size(); i++) {
-      inputs_host_[i] = GetDeviceAddress<T>(inputs, i);
+      T *input = GetPossiblyNullDeviceAddress<T>(inputs, i);
+      if (input != nullptr) {
+        inputs_host_[current_dim] = input;
+        current_dim++;
+      }
     }
     CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
                                cudaMemcpyAsync(inputs_device, inputs_host_.get(), sizeof(T *) * input_num_,
@@ -83,14 +92,21 @@ class ConcatV2GpuFwdKernel : public GpuKernel {
     input_num_ = SizeToInt(AnfAlgo::GetInputTensorNum(kernel_node));
     inputs_host_ = std::make_unique<T *[]>(input_num_);
     len_axis_ = std::make_unique<int[]>(input_num_);
+    int current_dim = 0;
     for (int i = 0; i < input_num_; i++) {
       size_t input_size = 1;
       auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i);
       for (size_t j = 0; j < input_shape.size(); j++) {
         input_size *= input_shape[j];
       }
-      input_size_list_.push_back(input_size * sizeof(T));
-      len_axis_[i] = SizeToInt(input_shape[axis_]);
+
+      if (input_size == 0) {
+        input_num_--;
+      } else {
+        input_size_list_.push_back(input_size * sizeof(T));
+        len_axis_[current_dim] = SizeToInt(input_shape[axis_]);
+        current_dim++;
+      }
     }
     workspace_size_list_.push_back(sizeof(T *) * input_num_);
     workspace_size_list_.push_back(sizeof(int) * input_num_);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_range_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_range_gpu_kernel.h
index c3384f34e7e..474858fbe52 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_range_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_range_gpu_kernel.h
@@ -102,7 +102,7 @@ class DynamicRangeGpuKernel : public GpuKernel {
                                "cudaStreamSynchronize failed");
 
     std::vector<TypeId> output_type = {AnfAlgo::GetOutputInferDataType(kernel_node_.lock(), 0)};
-    std::vector<std::vector<size_t>> output_shape = {{(size_t)output_shape_}};
+    std::vector<std::vector<size_t>> output_shape = {{static_cast<size_t>(output_shape_)}};
     AnfAlgo::SetOutputInferTypeAndShape(output_type, output_shape, kernel_node_.lock().get());
   }
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_shape_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_shape_gpu_kernel.h
index dd15fa7d221..ba3c3230d52 100755
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_shape_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_shape_gpu_kernel.h
@@ -75,8 +75,8 @@ class DynamicShapeGpuKernel : public GpuKernel {
   }
 
   void ResetResource() noexcept override {
-    input_size_ = -1;
-    output_size_ = -1;
+    input_size_ = 0;
+    output_size_ = 0;
     prev_node_output_shape_.clear();
     input_size_list_.clear();
     output_size_list_.clear();
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h
index a6e25cac507..e5e64323aec 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h
@@ -49,8 +49,10 @@ class OneHotGpuFwdKernel : public GpuKernel {
     auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
     auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
     int64_t input_dims = static_cast<int64_t>(input_shape.size());
-    if (axis >= input_dims) {
-      MS_LOG(ERROR) << "invalid one hot axis value: " << axis << " for input dims size: " << input_shape.size();
+    int64_t output_dims = static_cast<int64_t>(output_shape.size());
+    if (axis >= input_dims || axis >= output_dims) {
+      MS_LOG(ERROR) << "invalid one hot axis value: " << axis << " for input dims size: " << input_shape.size()
+                    << " or output dims size: " << output_dims;
       return false;
     }
     const int64_t default_axis = -1;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h
index 28ede23470d..0aa4d397b3f 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h
@@ -146,7 +146,7 @@ class SliceGpuFwdKernel : public GpuKernel {
     begin_ = GetAttr<std::vector<int64_t>>(kernel_node, "begin");
 
     for (size_t i = 0; i < input_shape.size(); i++) {
-      if (input_shape[i] <= 0 || size_[i] <= 0) {
+      if (i >= size_.size() || input_shape[i] <= 0 || size_[i] <= 0) {
         MS_LOG(WARNING) << "Slice output is null.";
         is_null_input_ = true;
       }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h
index 9ede49565c8..1866fe071fc 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h
@@ -105,12 +105,12 @@ class SliceGradGpuKernel : public GpuKernel {
       std::swap(size_[1], size_[2]);
     }
     for (size_t i = 0; i < begin_.size(); i++) {
-      if (begin_[i] < 0) {
+      if (begin_[i] < 0 && i < input_shape_.size()) {
         begin_[i] = begin_[i] + input_shape_[i];
       }
     }
     for (size_t i = 0; i < size_.size(); i++) {
-      if (size_[i] < 0) {
+      if (size_[i] < 0 && i < input_shape_.size()) {
         size_[i] = (size_[i] + input_shape_[i]) > 0 ? (size_[i] + input_shape_[i]) : 0;
       }
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_common.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_common.h
index 71b3b9876be..5e22fe09f35 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_common.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_common.h
@@ -43,6 +43,10 @@ class StridedSliceGpuCommon {
     strides_ = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "strides");
 
     for (size_t i = 0; i < MAX_DIMS; i++) {
+      if (i >= input_shape_.size()) {
+        input_shape_.push_back(1);
+      }
+
       if (i < begin_.size()) {
         int64_t dim = input_shape_[i];
         begin_[i] = std::min(begin_[i] < 0 ? std::max(begin_[i] + dim, static_cast<int64_t>(0)) : begin_[i], dim - 1);
@@ -60,10 +64,6 @@ class StridedSliceGpuCommon {
       if (i >= strides_.size()) {
         strides_.push_back(1);
       }
-
-      if (i >= input_shape_.size()) {
-        input_shape_.push_back(1);
-      }
     }
   }
 
@@ -71,7 +71,7 @@ class StridedSliceGpuCommon {
     auto begin_mask_int = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "begin_mask");
     auto begin_mask = Dec2Bin(begin_mask_int);
     for (size_t i = 0; i < begin_mask.size(); i++) {
-      if (begin_mask[i]) {
+      if (begin_mask[i] && i < MAX_DIMS) {
         begin_[i] = 0;
       }
     }
@@ -79,7 +79,7 @@ class StridedSliceGpuCommon {
     auto end_mask_int = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "end_mask");
     auto end_mask = Dec2Bin(end_mask_int);
     for (size_t j = 0; j < end_mask.size(); j++) {
-      if (end_mask[j]) {
+      if (end_mask[j] && j < MAX_DIMS) {
         end_[j] = input_shape_[j];
       }
     }
@@ -87,7 +87,7 @@ class StridedSliceGpuCommon {
     auto ellipsis_mask_int = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "ellipsis_mask");
     auto ellipsis_mask = Dec2Bin(ellipsis_mask_int);
     for (size_t k = 0; k < ellipsis_mask.size(); k++) {
-      if (ellipsis_mask[k]) {
+      if (ellipsis_mask[k] && k < MAX_DIMS) {
         begin_[k] = 0;
         end_[k] = input_shape_[k];
         strides_[k] = 1;
@@ -97,7 +97,7 @@ class StridedSliceGpuCommon {
     auto new_axis_mask_int = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "new_axis_mask");
     auto new_axis_mask = Dec2Bin(new_axis_mask_int);
     for (size_t l = 0; l < new_axis_mask.size(); l++) {
-      if (new_axis_mask[l]) {
+      if (new_axis_mask[l] && l < MAX_DIMS) {
         begin_[l] = 0;
         end_[l] = input_shape_[l];
         strides_[l] = 1;
@@ -107,7 +107,7 @@ class StridedSliceGpuCommon {
     auto shrink_axis_mask_int = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "shrink_axis_mask");
     auto shrink_axis_mask = Dec2Bin(shrink_axis_mask_int);
     for (size_t m = 0; m < shrink_axis_mask.size(); m++) {
-      if (shrink_axis_mask[m]) {
+      if (shrink_axis_mask[m] && m < MAX_DIMS) {
         end_[m] = end_[m] > begin_[m] ? begin_[m] + 1 : begin_[m] - 1;
         strides_[m] = end_[m] > begin_[m] ? 1 : -1;
       }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.h
index fc483c6f985..56e0aea1e60 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.h
@@ -65,13 +65,13 @@ class StridedSliceGpuKernel : public GpuKernel, public StridedSliceGpuCommon {
  protected:
   void InitSizeLists() override {
     size_t size = sizeof(T);
-    for (size_t i = 0; i < MAX_DIMS; i++) {
+    for (size_t i = 0; i < input_shape_.size(); i++) {
       size *= input_shape_[i];
     }
     input_size_list_.push_back(size);
 
     size_t size1 = sizeof(T);
-    for (size_t i = 0; i < MAX_DIMS; i++) {
+    for (size_t i = 0; i < output_shape_.size(); i++) {
       size1 *= output_shape_[i];
     }
     output_size_list_.push_back(size1);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h
index 2083dc63c17..c2f597c2958 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h
@@ -99,6 +99,19 @@ class GpuKernel : public KernelMod {
     if (index >= addr_list.size()) {
       MS_LOG(EXCEPTION) << "Address index(" << index << ") out of range(" << addr_list.size() << ")";
     }
+
+    if ((addr_list[index] == nullptr) || (addr_list[index]->addr == nullptr) || (addr_list[index]->size == 0)) {
+      MS_LOG(EXCEPTION) << "The device address is empty, address index: " << index;
+    }
+
+    return reinterpret_cast<T *>(addr_list[index]->addr);
+  }
+
+  template <typename T>
+  inline T *GetPossiblyNullDeviceAddress(const std::vector<AddressPtr> &addr_list, size_t index) {
+    if (index >= addr_list.size()) {
+      MS_LOG(EXCEPTION) << "Address index(" << index << ") out of range(" << addr_list.size() << ")";
+    }
     // Kernels may run normally without workspace, the addr_list[index] maybe nullptr.
     if ((addr_list[index] == nullptr) || (addr_list[index]->size == 0)) {
       return nullptr;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_gpu_kernel.h
index e1d5277e7a1..1f27011014b 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_gpu_kernel.h
@@ -52,18 +52,16 @@ class BatchNormGpuKernel : public GpuKernel {
     auto running_variance = GetDeviceAddress<float>(inputs, 4);
     T *z = nullptr;
     if (bn_ops_ == CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION) {
-      z = GetDeviceAddress<T>(inputs, 5);
+      z = GetPossiblyNullDeviceAddress<T>(inputs, 5);
     }
 
     auto y = GetDeviceAddress<T>(outputs, 0);
-    auto reserve_addr = GetDeviceAddress<float>(outputs, 2);
-    T *workspace_addr = nullptr;
-    if (workspace_size_ != 0) {
-      workspace_addr = GetDeviceAddress<T>(workspace, 0);
-    }
+    T *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0);
+
     const float alpha = 1;
     const float beta = 0;
     if (is_train_) {
+      auto reserve_addr = GetPossiblyNullDeviceAddress<float>(outputs, 2);
       auto save_mean = GetDeviceAddress<float>(outputs, 3);
       auto save_variance = GetDeviceAddress<float>(outputs, 4);
       CHECK_CUDNN_RET_WITH_EXCEPT(
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_grad_gpu_kernel.h
index ae36b56df85..69720b5a854 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_grad_gpu_kernel.h
@@ -71,8 +71,6 @@ class BatchNormGradGpuKernel : public GpuKernel {
     auto scale = GetDeviceAddress<float>(inputs, 2);
     auto save_mean = GetDeviceAddress<float>(inputs, 3);
     auto save_variance = GetDeviceAddress<float>(inputs, 4);
-    auto reserve_addr = GetDeviceAddress<float>(inputs, 5);
-    reserve_size_ = inputs[5]->size;
     void *bias = nullptr;
     T *y = nullptr;
     if (bn_ops_ != CUDNN_BATCHNORM_OPS_BN) {
@@ -88,11 +86,11 @@ class BatchNormGradGpuKernel : public GpuKernel {
       dz = GetDeviceAddress<T>(outputs, 3);
     }
 
-    void *workspace_addr = nullptr;
-    if (workspace_size_ != 0) {
-      workspace_addr = GetDeviceAddress<T>(workspace, 0);
-    }
     if (is_train_) {
+      auto reserve_addr = GetPossiblyNullDeviceAddress<float>(inputs, 5);
+      reserve_size_ = inputs[5]->size;
+      void *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0);
+
       const float alpha_data_diff = 1;
       const float alpha_param_diff = 1;
       const float beta_param_diff = 0;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.h
index a50f2a38f3c..f007d5073a7 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.h
@@ -58,8 +58,8 @@ class BiasAddGradGpuKernel : public GpuKernel {
                                  "cudaMemcpyAsync failed.");
     } else {
       if (use_cudnn_) {  // shared memory not satisfied or num_dim > 4
-        T *indices_addr = GetDeviceAddress<T>(workspace, 0);
-        T *workspace_addr = GetDeviceAddress<T>(workspace, 1);
+        T *indices_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0);
+        T *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 1);
         const float alpha = 1;
         const float beta = 0;
         CHECK_CUDNN_RET_WITH_EXCEPT(
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h
index 46cd0de59ef..2c5708c3b24 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h
@@ -46,10 +46,7 @@ class Conv2dGpuFwdKernel : public GpuKernel {
     T *input_addr = GetDeviceAddress<T>(inputs, 0);
     T *filter_addr = GetDeviceAddress<T>(inputs, 1);
     T *output_addr = GetDeviceAddress<T>(outputs, 0);
-    T *workspace_addr = nullptr;
-    if (workspace_size_ != 0) {
-      workspace_addr = GetDeviceAddress<T>(workspace, 0);
-    }
+    T *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0);
 
     const float alpha = 1;
     const float beta = 0;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h
index 7e5b955cc37..5ef67871fde 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h
@@ -71,16 +71,13 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel {
     T *dy = GetDeviceAddress<T>(inputs, 0);
     T *x = GetDeviceAddress<T>(inputs, 1);
     T *dw = GetDeviceAddress<T>(outputs, 0);
-    T *work_space = nullptr;
-    if (workspace_size_ != 0) {
-      work_space = GetDeviceAddress<T>(workspace, 0);
-    }
+    T *work_space = GetPossiblyNullDeviceAddress<T>(workspace, 0);
 
     const float alpha = 1;
     const float beta = 0;
 
     if (use_pad_) {
-      T *padded = GetDeviceAddress<T>(workspace, 1);
+      T *padded = GetPossiblyNullDeviceAddress<T>(workspace, 1);
       if (data_format_ == kOpFormat_NHWC) {
         CalPadNHWC(padded_size_ / sizeof(T), x, n_, old_height_, old_width_, c_, old_height_ + pad_height_,
                    old_width_ + pad_width_, pad_top_, pad_left_, pad_value_, padded,
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h
index 2453d50df71..a4b64bac75c 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h
@@ -74,14 +74,11 @@ class ConvGradInputGpuBkwKernel : public GpuKernel {
     T *dy = GetDeviceAddress<T>(inputs, 0);
     T *w = GetDeviceAddress<T>(inputs, 1);
     T *dx = GetDeviceAddress<T>(outputs, 0);
-    T *work_space = nullptr;
-    if (workspace_size_ != 0) {
-      work_space = GetDeviceAddress<T>(workspace, 0);
-    }
+    T *work_space = GetPossiblyNullDeviceAddress<T>(workspace, 0);
 
     const float alpha = 1;
     if (use_pad_) {
-      T *padded = GetDeviceAddress<T>(workspace, 1);
+      T *padded = GetPossiblyNullDeviceAddress<T>(workspace, 1);
 
       CHECK_CUDNN_RET_WITH_EXCEPT(
         kernel_node_,
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_gpu_kernel.h
index 3c01afaa5e4..3ba26473ad1 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_gpu_kernel.h
@@ -45,15 +45,12 @@ class Conv3dGpuKernel : public GpuKernel {
     T *input_addr = GetDeviceAddress<T>(inputs, 0);
     T *filter_addr = GetDeviceAddress<T>(inputs, 1);
     T *output_addr = GetDeviceAddress<T>(outputs, 0);
-    T *workspace_addr = nullptr;
-    if (workspace_size_ != 0) {
-      workspace_addr = GetDeviceAddress<T>(workspace, 0);
-    }
+    T *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0);
 
     const float alpha = 1;
     const float beta = 0;
     if (use_pad_) {
-      T *padded_addr = GetDeviceAddress<T>(workspace, 1);
+      T *padded_addr = GetPossiblyNullDeviceAddress<T>(workspace, 1);
       CalPad3d(padded_size_ / sizeof(T), input_addr, n_, c_, old_depth_, old_height_, old_width_,
                old_depth_ + pad_depth_, old_height_ + pad_height_, old_width_ + pad_width_, pad_head_, pad_top_,
                pad_left_, pad_value_, padded_addr, reinterpret_cast<cudaStream_t>(stream_ptr));
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_filter_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_filter_gpu_kernel.h
index 7d04d31e098..f88e58ad5e7 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_filter_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_filter_gpu_kernel.h
@@ -47,10 +47,7 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
     T *x = GetDeviceAddress<T>(inputs, 0);
     T *dy = GetDeviceAddress<T>(inputs, 1);
 
-    T *work_space = nullptr;
-    if (workspace_size_ != 0) {
-      work_space = GetDeviceAddress<T>(workspace, 0);
-    }
+    T *work_space = GetPossiblyNullDeviceAddress<T>(workspace, 0);
 
     T *dw = nullptr;
     float *dw_float32 = nullptr;
@@ -64,7 +61,7 @@ class Conv3dGradFilterGpuKernel : public GpuKernel {
     const float alpha = 1;
     const float beta = 0;
     if (use_pad_) {
-      T *padded = GetDeviceAddress<T>(workspace, 1);
+      T *padded = GetPossiblyNullDeviceAddress<T>(workspace, 1);
       CalPad3d(padded_size_ / sizeof(T), x, n_, c_, old_depth_, old_height_, old_width_, old_depth_ + pad_depth_,
                old_height_ + pad_height_, old_width_ + pad_width_, pad_head_, pad_top_, pad_left_, pad_value_, padded,
                reinterpret_cast<cudaStream_t>(stream_ptr));
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_input_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_input_gpu_kernel.h
index 15020b4edd1..5c525a3992b 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_input_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_input_gpu_kernel.h
@@ -46,10 +46,7 @@ class Conv3dGradInputGpuKernel : public GpuKernel {
     T *w = GetDeviceAddress<T>(inputs, 0);
     T *dy = GetDeviceAddress<T>(inputs, 1);
     T *dx = GetDeviceAddress<T>(outputs, 0);
-    T *work_space = nullptr;
-    if (workspace_size_ != 0) {
-      work_space = GetDeviceAddress<T>(workspace, 0);
-    }
+    T *work_space = GetPossiblyNullDeviceAddress<T>(workspace, 0);
 
     const float alpha = 1;
     if (use_pad_) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_transpose_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_transpose_gpu_kernel.h
index b6c4b985b15..6f8b2970e36 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_transpose_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_transpose_gpu_kernel.h
@@ -46,14 +46,11 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel {
     T *input_addr = GetDeviceAddress<T>(inputs, 0);
     T *filter_addr = GetDeviceAddress<T>(inputs, 1);
     T *output_addr = GetDeviceAddress<T>(outputs, 0);
-    T *work_space = nullptr;
-    if (workspace_size_ != 0) {
-      work_space = GetDeviceAddress<T>(workspace, 0);
-    }
+    T *work_space = GetPossiblyNullDeviceAddress<T>(workspace, 0);
 
     const float alpha = 1;
     if (use_pad_) {
-      T *padded = GetDeviceAddress<T>(workspace, 1);
+      T *padded = GetPossiblyNullDeviceAddress<T>(workspace, 1);
       CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_,
                                   cudnnConvolutionBackwardData(cudnn_handle_, &alpha, filter_desc_, filter_addr,
                                                                input_desc_, input_addr, conv_desc_, algo_, work_space,
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_gpu_kernel.h
index 60809ab376b..faf49129eb1 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_gpu_kernel.h
@@ -75,10 +75,7 @@ class InstanceNormGpuKernel : public GpuKernel {
     float *ws_beta = GetDeviceAddress<float>(workspace, 1);
     float *ws_mean = GetDeviceAddress<float>(workspace, 2);
     float *ws_var = GetDeviceAddress<float>(workspace, 3);
-    T *workspace_addr = nullptr;
-    if (workspace_size_ != 0) {
-      workspace_addr = GetDeviceAddress<T>(workspace, 4);
-    }
+    T *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 4);
 
     size_t N = input_shape_[0];
     size_t C = input_shape_[1];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_grad_gpu_kernel.h
index c1741ed1322..d4dfbbab94b 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_grad_gpu_kernel.h
@@ -78,10 +78,7 @@ class InstanceNormGradGpuKernel : public GpuKernel {
     float *ws_gamma = GetDeviceAddress<float>(workspace, 0);
     float *ws_dgamma = GetDeviceAddress<float>(workspace, 1);
     float *ws_dbeta = GetDeviceAddress<float>(workspace, 2);
-    void *workspace_addr = nullptr;
-    if (workspace_size_ != 0) {
-      workspace_addr = GetDeviceAddress<T>(workspace, 3);
-    }
+    void *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 3);
 
     size_t N = input_shape_[0];
     size_t C = input_shape_[1];
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_gpu_kernel.h
index 02c3586a603..31074c7f399 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_gpu_kernel.h
@@ -59,8 +59,8 @@ class L2NormalizeGpuKernel : public GpuKernel {
     }
     T *input_addr = GetDeviceAddress<T>(inputs, 0);
     T *output_addr = GetDeviceAddress<T>(outputs, 0);
-    T *reduce_workspace_addr = GetDeviceAddress<T>(workspace, 0);
-    T *workspace_addr = GetDeviceAddress<T>(workspace, 1);
+    T *reduce_workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0);
+    T *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 1);
 
     const float alpha = 1;
     const float beta = 0;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_grad_gpu_kernel.h
index e1a9598cada..e4221c11cb6 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_grad_gpu_kernel.h
@@ -62,10 +62,10 @@ class L2NormalizeGradGpuKernel : public GpuKernel {
     T *y_addr = GetDeviceAddress<T>(inputs, 1);
     T *dy_addr = GetDeviceAddress<T>(inputs, 2);
     T *dx_addr = GetDeviceAddress<T>(outputs, 0);
-    T *reduce_workspace_addr = GetDeviceAddress<T>(workspace, 0);
-    T *reduce_y_dy_workspace_addr = GetDeviceAddress<T>(workspace, 1);
-    T *workspace_addr = GetDeviceAddress<T>(workspace, 2);
-    T *workspace_y_dy_addr = GetDeviceAddress<T>(workspace, 3);
+    T *reduce_workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0);
+    T *reduce_y_dy_workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 1);
+    T *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 2);
+    T *workspace_y_dy_addr = GetPossiblyNullDeviceAddress<T>(workspace, 3);
 
     const float alpha = 1;
     const float beta = 0;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.h
index 8d2c0c073db..537ef654057 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.h
@@ -70,7 +70,7 @@ class LstmGpuKernel : public GpuKernel {
     auto cy_addr = GetDeviceAddress<T>(outputs, 2);
     auto reserved_addr = GetDeviceAddress<T>(outputs, 3);
     auto states_addr = GetDeviceAddress<T>(outputs, 4);
-    void *workspace_addr = GetDeviceAddress<T>(workspace, 0);
+    void *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0);
 
     if (!states_init_) {
       CHECK_CUDNN_RET_WITH_EXCEPT(
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.h
index 30e454a08be..4ca32d73634 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.h
@@ -75,7 +75,7 @@ class LstmGradDataGpuKernel : public GpuKernel {
     auto dx_addr = GetDeviceAddress<T>(outputs, 0);
     auto dhx_addr = GetDeviceAddress<T>(outputs, 1);
     auto dcx_addr = GetDeviceAddress<T>(outputs, 2);
-    void *workspace_addr = GetDeviceAddress<T>(workspace, 0);
+    void *workspace_addr = GetPossiblyNullDeviceAddress<T>(workspace, 0);
 
     if (!states_init_) {
       CHECK_CUDNN_RET_WITH_EXCEPT(
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/nll_loss_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/nll_loss_gpu_kernel.h
index f3ab1a2b90e..da2edb2178f 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/nll_loss_gpu_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/nll_loss_gpu_kernel.h
@@ -44,7 +44,7 @@ class NLLLossGpuKernel : public GpuKernel {
     T *loss_device = GetDeviceAddress<T>(outputs, 0);
     S *total_weight_device = GetDeviceAddress<S>(outputs, 1);
 
-    T *tmp_loss_device = GetDeviceAddress<T>(workspace, 0);
+    T *tmp_loss_device = GetPossiblyNullDeviceAddress<T>(workspace, 0);
     S *tmp_target_weight_device = GetDeviceAddress<S>(workspace, 1);
 
     NLLLoss(n_, c_, reduction_, input_device, target_device, weight_device, loss_device, total_weight_device,
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.cc
index 06a329dc2d0..73f8c19639d 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.cc
@@ -36,5 +36,8 @@ MS_REG_GPU_KERNEL_ONE(
 MS_REG_GPU_KERNEL_ONE(
   Assign, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
   AssignGpuKernel, int64_t)
+MS_REG_GPU_KERNEL_ONE(
+  Assign, KernelAttr().AddInputAttr(kNumberTypeUInt64).AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64),
+  AssignGpuKernel, unsigned int)
 }  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.cc
index 9a54908849f..dcd61375060 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.cc
@@ -18,7 +18,7 @@
 
 namespace mindspore {
 namespace kernel {
-MS_REG_GPU_KERNEL_TWO(NeighborListUpdateNew,
+MS_REG_GPU_KERNEL_TWO(NeighborListUpdate,
                       KernelAttr()
                         .AddInputAttr(kNumberTypeInt32)
                         .AddInputAttr(kNumberTypeInt32)
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.cc
index 01ba0fdae9f..77edb57eaba 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.cc
@@ -47,11 +47,11 @@ std::string MsOpNameToHcomOpType(const std::string &ms_op_type) {
 namespace mindspore {
 namespace kernel {
 void HcclKernelFactory::Register(const std::string &name, HcclKernelCreater &&fun) {
-  hcclKernelMap_.emplace(name, std::move(fun));
+  hccl_kernel_map_.emplace(name, fun);
 }
 
 std::shared_ptr<HcclKernel> HcclKernelFactory::Get(const std::string &name) {
-  const auto &map = Get().hcclKernelMap_;
+  const auto &map = Get().hccl_kernel_map_;
   auto it = map.find(name);
   if (it != map.end() && it->second) {
     return (it->second)();
@@ -64,14 +64,15 @@ HcclKernelFactory &HcclKernelFactory::Get() {
   return _this;
 }
 
-HcclKernel::HcclKernel() : hccl_count_(0), op_type_(HCCL_REDUCE_SUM), root_id_(0) {}
+HcclKernel::HcclKernel()
+    : hccl_count_(0), op_type_(::HcclReduceOp::HCCL_REDUCE_SUM), root_id_(0), src_rank_(0), dest_rank_(0) {}
 
 HcclKernel::~HcclKernel() {
   hccl_kernel_input_shape_list_.clear();
   hccl_kernel_output_shape_list_.clear();
   hccl_data_type_list_.clear();
   hccl_count_ = 0;
-  op_type_ = HCCL_REDUCE_SUM;
+  op_type_ = ::HcclReduceOp::HCCL_REDUCE_SUM;
   root_id_ = 0;
   input_size_list_.clear();
   output_size_list_.clear();
@@ -81,6 +82,18 @@ HcclKernel::~HcclKernel() {
 bool HcclKernel::Init(const AnfNodePtr &anf_node) {
   MS_EXCEPTION_IF_NULL(anf_node);
   op_name_ = AnfAlgo::GetCNodeName(anf_node);
+  if (op_name_ == kHcomSend) {
+    if (!HcomUtil::GetHcomDestRank(anf_node, &dest_rank_)) {
+      MS_LOG(ERROR) << "GetHcomDestRank fail!";
+      return false;
+    }
+  }
+  if (op_name_ == kReceive) {
+    if (!HcomUtil::GetHcomSrcRank(anf_node, &src_rank_)) {
+      MS_LOG(ERROR) << "GetHcomSrcRank fail!";
+      return false;
+    }
+  }
   if (!HcomUtil::GetKernelInputShape(anf_node, &hccl_kernel_input_shape_list_)) {
     MS_LOG(ERROR) << "GetKernelInputShape fail!";
     return false;
@@ -126,6 +139,10 @@ const std::vector<size_t> &HcclKernel::GetInputSizeList() const {
   if (!input_size_list_.empty()) {
     return input_size_list_;
   }
+  if (hccl_data_type_list_.size() != hccl_kernel_input_shape_list_.size()) {
+    MS_LOG(EXCEPTION) << "Invalid data type size " << hccl_data_type_list_.size() << " diff shape size "
+                      << hccl_kernel_input_shape_list_.size();
+  }
   for (ulong i = 0; i < hccl_data_type_list_.size(); ++i) {
     if (!HcomUtil::GetHcclOpSize(hccl_data_type_list_[i], hccl_kernel_input_shape_list_[i], &size)) {
       MS_LOG(ERROR) << "GetHcclOpInputSize failed";
@@ -145,6 +162,7 @@ const std::vector<size_t> &HcclKernel::GetOutputSizeList() const {
     return output_size_list_;
   }
   auto cnode = anf_node->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(cnode);
   auto op_name = AnfAlgo::GetCNodeName(cnode);
   int64_t rank_size = 1;
   if (AnfAlgo::HasNodeAttr(kAttrRankSize, cnode)) {
@@ -154,6 +172,10 @@ const std::vector<size_t> &HcclKernel::GetOutputSizeList() const {
   if (AnfAlgo::HasNodeAttr(kAttrFusion, cnode)) {
     fusion = AnfAlgo::GetNodeAttr<int64_t>(cnode, kAttrFusion);
   }
+  if (hccl_data_type_list_.size() != hccl_kernel_input_shape_list_.size()) {
+    MS_LOG(EXCEPTION) << "Invalid data type size " << hccl_data_type_list_.size() << " diff shape size "
+                      << hccl_kernel_input_shape_list_.size();
+  }
   ulong loop_size = hccl_data_type_list_.size();
   if (AnfAlgo::GetInputTensorNum(anf_node) > 1 && op_name == kAllGatherOpName && fusion >= 1) {
     loop_size *= static_cast<ulong>(rank_size);
@@ -171,10 +193,13 @@ const std::vector<size_t> &HcclKernel::GetOutputSizeList() const {
 }
 
 const std::vector<size_t> &HcclKernel::GetWorkspaceSizeList() const {
-  if (!workspace_size_list_.empty() || hccl_data_type_list_.empty()) {
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  bool is_task_sink = context_ptr->get_param<bool>(MS_CTX_ENABLE_TASK_SINK);
+  auto mode = context_ptr->get_param<int>(MS_CTX_EXECUTION_MODE);
+  if (!workspace_size_list_.empty() || hccl_data_type_list_.empty() || (!is_task_sink && mode == kGraphMode)) {
     return workspace_size_list_;
   }
-
   workspace_size_list_.emplace_back(
     hccl::HcclAdapter::GetInstance().CalcWorkspaceSize(anf_node_.lock(), hccl_data_type_list_[0]));
   return workspace_size_list_;
@@ -204,6 +229,9 @@ std::vector<TaskInfoPtr> HcclKernel::GenTask(const std::vector<AddressPtr> &inpu
   MS_EXCEPTION_IF_NULL(outputs.at(0));
   auto output_data_addr = outputs.at(0)->addr;
   std::vector<uint8_t> private_def;
+  if (hccl_data_type_list_.empty()) {
+    MS_LOG(EXCEPTION) << "Hccl data type list is empty";
+  }
   HcclDataType data_type = hccl_data_type_list_[0];
   std::vector<hccl::HcclTaskInfo> task_info;
   bool ret = hccl::HcclAdapter::GetInstance().GenTask(anf_node, data_type, &task_info);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.h
index 7cf960dcad1..493ca33fc8e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.h
@@ -51,6 +51,8 @@ class HcclKernel : public AscendKernelMod {
   uint64_t hccl_count_;
   HcclReduceOp op_type_;
   uint32_t root_id_;
+  uint32_t src_rank_;
+  uint32_t dest_rank_;
   mutable std::vector<size_t> input_size_list_;
   mutable std::vector<size_t> output_size_list_;
   mutable std::vector<size_t> workspace_size_list_;
@@ -71,7 +73,7 @@ class HcclKernelFactory {
   static std::shared_ptr<HcclKernel> Get(const string &name);
 
  private:
-  std::map<string, HcclKernelCreater> hcclKernelMap_;
+  std::map<string, HcclKernelCreater> hccl_kernel_map_;
 };
 
 class _HcclKernelRegister {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.cc
index d9a46984163..7b94ca5e659 100755
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.cc
@@ -30,7 +30,9 @@ namespace {
 constexpr size_t N_nchw = 0;
 constexpr size_t C_nchw = 1;
 std::string GetKernelFormat(const CNodePtr &kernel_node, size_t index) {
-  const std::set<std::string> kReduceNoSupportedSet = {kOpFormat_FRAC_Z, kOpFormat_FRACTAL_Z_C04, kOpFormat_C1HWNCoC0};
+  static const std::set<std::string> kReduceNoSupportedSet = {kOpFormat_FRAC_Z, kOpFormat_FRACTAL_Z_C04,
+                                                              kOpFormat_C1HWNCoC0};
+  MS_EXCEPTION_IF_NULL(kernel_node);
   auto op_name = AnfAlgo::GetCNodeName(kernel_node);
   auto parallel_context_instance = parallel::ParallelContext::GetInstance();
   MS_EXCEPTION_IF_NULL(parallel_context_instance);
@@ -61,8 +63,8 @@ std::string GetKernelFormat(const CNodePtr &kernel_node, size_t index) {
 }
 }  // namespace
 void HcclMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<KernelBuildInfo>> *kernel_info_list) {
-  const std::vector<TypeId> kHcclSupportTypes = {kNumberTypeInt8, kNumberTypeInt32, kNumberTypeFloat16,
-                                                 kNumberTypeFloat32, kNumberTypeInt16};
+  static const std::vector<TypeId> kHcclSupportTypes = {kNumberTypeInt8, kNumberTypeInt32, kNumberTypeFloat16,
+                                                        kNumberTypeFloat32, kNumberTypeInt16};
   MS_EXCEPTION_IF_NULL(kernel_info_list);
   MS_EXCEPTION_IF_NULL(kernel_node);
   std::string op_name = AnfAlgo::GetCNodeName(kernel_node);
@@ -76,7 +78,7 @@ void HcclMetadataInfo(const CNodePtr &kernel_node, std::vector<std::shared_ptr<K
     if (!HcomUtil::GetHcomReceiveType(kernel_node, &recv_type)) {
       MS_LOG(EXCEPTION) << "GetHcomReceiveType fail!";
     }
-    auto res = find(kHcclSupportTypes.begin(), kHcclSupportTypes.end(), recv_type);
+    auto res = std::find(kHcclSupportTypes.begin(), kHcclSupportTypes.end(), recv_type);
     if (res == kHcclSupportTypes.end()) {
       MS_LOG(EXCEPTION) << "HcclReceive cannot support data type: " << TypeIdToType(recv_type);
     }
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_broadcast.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_broadcast.cc
index 409f2bc8133..c0cf6c5816b 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_broadcast.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_broadcast.cc
@@ -23,6 +23,7 @@ namespace mindspore {
 namespace kernel {
 bool HcomAllBroadCastKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
                                     const std::vector<AddressPtr> &, void *stream_ptr) {
+  MS_LOG(DEBUG) << "HcomAllBroadCast launch";
   if (inputs.empty() || hccl_data_type_list_.empty()) {
     MS_LOG(ERROR) << "BroadCast param is empty";
     return false;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.cc
index 99c57736b52..014323de5c4 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.cc
@@ -16,13 +16,27 @@
 
 #include "backend/kernel_compiler/hccl/hcom_all_gather.h"
 #include <memory>
-#include "utils/ms_context.h"
+#include "runtime/hccl_adapter/hccl_adapter.h"
 
 namespace mindspore {
 namespace kernel {
-bool HcomAllGatherKernel::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
-                                 const std::vector<AddressPtr> &, void *) {
-  MS_LOG(INFO) << "HcomAllGather launch";
+bool HcomAllGatherKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+                                 const std::vector<AddressPtr> &outputs, void *stream_ptr) {
+  MS_LOG(DEBUG) << "HcomAllGather launch";
+  if (inputs.empty() || outputs.empty() || hccl_data_type_list_.empty()) {
+    MS_LOG(ERROR) << "Invalid AllGather input, output or data type size(" << inputs.size() << ", " << outputs.size()
+                  << ", " << hccl_data_type_list_.size() << ").";
+    return false;
+  }
+  MS_EXCEPTION_IF_NULL(inputs[0]);
+  MS_EXCEPTION_IF_NULL(outputs[0]);
+  MS_EXCEPTION_IF_NULL(stream_ptr);
+  auto hccl_result = hccl::HcclAdapter::GetInstance().HcclAllGather(inputs[0]->addr, outputs[0]->addr, hccl_count_,
+                                                                    hccl_data_type_list_[0], stream_ptr, group_);
+  if (hccl_result != HCCL_SUCCESS) {
+    MS_LOG(ERROR) << "HcclAllGather faled, ret:" << hccl_result;
+    return false;
+  }
   return true;
 }
 }  // namespace kernel
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.h
index 36a11d70c42..c729de94e6b 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.h
@@ -19,7 +19,6 @@
 
 #include <vector>
 #include <memory>
-#include "hccl/hcom.h"
 #include "backend/kernel_compiler/hccl/hccl_kernel.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.cc
index fbf95b00fb2..dcafbb0e1de 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.cc
@@ -22,16 +22,17 @@ namespace mindspore {
 namespace kernel {
 bool HcomAllReduceKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
                                  const std::vector<AddressPtr> &outputs, void *stream_ptr) {
-  MS_LOG(INFO) << "HcclAllReduce launch";
-  if (inputs.empty() || outputs.empty()) {
-    MS_LOG(ERROR) << "Invalid AllReduce input output size(" << inputs.size() << ", " << outputs.size() << ").";
+  MS_LOG(DEBUG) << "HcclAllReduce launch";
+  if (inputs.empty() || outputs.empty() || hccl_data_type_list_.empty()) {
+    MS_LOG(ERROR) << "Invalid AllReduce input, output or data type size (" << inputs.size() << ", " << outputs.size()
+                  << ", " << hccl_data_type_list_.size() << ").";
     return false;
   }
   MS_EXCEPTION_IF_NULL(inputs[0]);
   MS_EXCEPTION_IF_NULL(outputs[0]);
   MS_EXCEPTION_IF_NULL(stream_ptr);
-  auto hccl_result = hccl::HcclAdapter::GetInstance().HcclAllReduce(inputs[0]->addr, outputs[0]->addr, hccl_count_,
-                                                                    hccl_data_type_list_[0], op_type_, stream_ptr);
+  auto hccl_result = hccl::HcclAdapter::GetInstance().HcclAllReduce(
+    inputs[0]->addr, outputs[0]->addr, hccl_count_, hccl_data_type_list_[0], op_type_, stream_ptr, group_);
   if (hccl_result != HCCL_SUCCESS) {
     MS_LOG(ERROR) << "HcclAllReduce faled, ret:" << hccl_result;
     return false;
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.cc
index 557022bebd7..ad45b54046e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.cc
@@ -16,13 +16,27 @@
 
 #include "backend/kernel_compiler/hccl/hcom_all_reduce_scatter.h"
 #include <memory>
-#include "utils/ms_context.h"
+#include "runtime/hccl_adapter/hccl_adapter.h"
 
 namespace mindspore {
 namespace kernel {
-bool HcomAllReduceScatterKernel::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
-                                        const std::vector<AddressPtr> &, void *) {
-  MS_LOG(INFO) << "HcomAllReduceScatter launch";
+bool HcomAllReduceScatterKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+                                        const std::vector<AddressPtr> &outputs, void *stream_ptr) {
+  MS_LOG(DEBUG) << "HcomAllReduceScatter launch";
+  if (inputs.empty() || outputs.empty() || hccl_data_type_list_.empty()) {
+    MS_LOG(ERROR) << "Invalid AllReduceScatter input, output or data type size(" << inputs.size() << ", "
+                  << outputs.size() << ", " << hccl_data_type_list_.size() << ").";
+    return false;
+  }
+  MS_EXCEPTION_IF_NULL(inputs[0]);
+  MS_EXCEPTION_IF_NULL(outputs[0]);
+  MS_EXCEPTION_IF_NULL(stream_ptr);
+  auto hccl_result = hccl::HcclAdapter::GetInstance().HcclReduceScatter(
+    inputs[0]->addr, outputs[0]->addr, hccl_count_, hccl_data_type_list_[0], op_type_, stream_ptr, group_);
+  if (hccl_result != HCCL_SUCCESS) {
+    MS_LOG(ERROR) << "HcclReduceScatter faled, ret:" << hccl_result;
+    return false;
+  }
   return true;
 }
 }  // namespace kernel
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.h
index 987982a73c8..fcddfa34e8d 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.h
@@ -19,7 +19,6 @@
 
 #include <vector>
 #include <memory>
-#include "hccl/hcom.h"
 #include "backend/kernel_compiler/hccl/hccl_kernel.h"
 
 namespace mindspore {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_receive.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_receive.cc
index 2b49199b7ef..1f0fbda2101 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_receive.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_receive.cc
@@ -16,12 +16,26 @@
 
 #include "backend/kernel_compiler/hccl/hcom_receive.h"
 #include <memory>
-#include "utils/ms_context.h"
+#include "runtime/hccl_adapter/hccl_adapter.h"
+
 namespace mindspore {
 namespace kernel {
 bool HcomReceiveKernel::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
-                               const std::vector<AddressPtr> &, void *) {
-  MS_LOG(INFO) << "HcomReceive launch";
+                               const std::vector<AddressPtr> &outputs, void *stream_ptr) {
+  MS_LOG(DEBUG) << "HcomReceive launch";
+  if (outputs.empty() || hccl_data_type_list_.empty()) {
+    MS_LOG(ERROR) << "Invalid HcomReceive outputs size or data type size (" << outputs.size() << ", "
+                  << hccl_data_type_list_.size() << ").";
+    return false;
+  }
+  MS_EXCEPTION_IF_NULL(outputs[0]);
+  MS_EXCEPTION_IF_NULL(stream_ptr);
+  auto hccl_result = hccl::HcclAdapter::GetInstance().HcclRecv(outputs[0]->addr, hccl_count_, hccl_data_type_list_[0],
+                                                               src_rank_, stream_ptr, group_);
+  if (hccl_result != HCCL_SUCCESS) {
+    MS_LOG(ERROR) << "HcomReceive failed, ret:" << hccl_result;
+    return false;
+  }
   return true;
 }
 }  // namespace kernel
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_send.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_send.cc
index 9951cdeb61f..2349e363323 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_send.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_send.cc
@@ -16,13 +16,26 @@
 
 #include "backend/kernel_compiler/hccl/hcom_send.h"
 #include <memory>
-#include "utils/ms_context.h"
+#include "runtime/hccl_adapter/hccl_adapter.h"
 
 namespace mindspore {
 namespace kernel {
-bool HcomSendKernel::Launch(const std::vector<AddressPtr> &, const std::vector<AddressPtr> &,
-                            const std::vector<AddressPtr> &, void *) {
-  MS_LOG(INFO) << "HcomSend launch";
+bool HcomSendKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+                            const std::vector<AddressPtr> &, void *stream_ptr) {
+  MS_LOG(DEBUG) << "HcomSend launch";
+  if (inputs.empty() || hccl_data_type_list_.empty()) {
+    MS_LOG(ERROR) << "Invalid HcomSend input size or data type size (" << inputs.size() << ", "
+                  << hccl_data_type_list_.size() << ").";
+    return false;
+  }
+  MS_EXCEPTION_IF_NULL(inputs[0]);
+  MS_EXCEPTION_IF_NULL(stream_ptr);
+  auto hccl_result = hccl::HcclAdapter::GetInstance().HcclSend(inputs[0]->addr, hccl_count_, hccl_data_type_list_[0],
+                                                               dest_rank_, stream_ptr, group_);
+  if (hccl_result != HCCL_SUCCESS) {
+    MS_LOG(ERROR) << "HcomSend faled, ret:" << hccl_result;
+    return false;
+  }
   return true;
 }
 }  // namespace kernel
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.cc
index d5814fcbfe6..033f20ee234 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.cc
@@ -22,11 +22,13 @@
 #include "utils/utils.h"
 
 namespace mindspore {
+namespace {
 bool IsPyNativeMode() {
   auto ms_context = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(ms_context);
   return ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode;
 }
+}  // namespace
 
 bool HcomUtil::GetKernelInputShape(const AnfNodePtr &anf_node, vector<vector<size_t>> *hccl_kernel_intput_shape_list) {
   MS_EXCEPTION_IF_NULL(anf_node);
@@ -67,8 +69,8 @@ bool HcomUtil::GetHcomDataType(const AnfNodePtr &anf_node, vector<HcclDataType>
     } else {
       type_ptr = AnfAlgo::GetInputDeviceDataType(anf_node, i);
     }
-    auto iter = CONST_OP_HCOM_DATA_TYPE_MAP.find(type_ptr);
-    if (iter == CONST_OP_HCOM_DATA_TYPE_MAP.end()) {
+    auto iter = kConstOpHcomDataTypeMap.find(type_ptr);
+    if (iter == kConstOpHcomDataTypeMap.end()) {
       MS_LOG(EXCEPTION) << "HcomDataType can't support Current Ascend Data Type : " << type_ptr;
     }
     data_type_list->emplace_back(iter->second);
@@ -102,8 +104,8 @@ bool HcomUtil::GetHcclOpSize(const HcclDataType &data_type, const vector<size_t>
 
 bool HcomUtil::GetHcomTypeSize(const HcclDataType &data_type, uint32_t *size) {
   MS_EXCEPTION_IF_NULL(size);
-  auto iter = CONST_OP_HCOM_DATA_TYPE_SIZE_MAP.find(data_type);
-  if (iter == CONST_OP_HCOM_DATA_TYPE_SIZE_MAP.end()) {
+  auto iter = kConstOpHcomDataTypeSizeMap.find(data_type);
+  if (iter == kConstOpHcomDataTypeSizeMap.end()) {
     MS_LOG(ERROR) << "HcomUtil::HcomDataTypeSize, No DataTypeSize!";
     return false;
   }
@@ -123,6 +125,7 @@ bool HcomUtil::GetHcomCount(const AnfNodePtr &anf_node, const vector<HcclDataTyp
   uint32_t type_size = 4;
   size_t size = AnfAlgo::GetInputTensorNum(anf_node);
   auto cnode = anf_node->cast<CNodePtr>();
+  MS_EXCEPTION_IF_NULL(cnode);
   if (AnfAlgo::GetCNodeName(anf_node) == kReceiveOpName) {
     size = AnfAlgo::GetOutputTensorNum(anf_node);
   }
@@ -140,8 +143,8 @@ bool HcomUtil::GetHcomCount(const AnfNodePtr &anf_node, const vector<HcclDataTyp
       int64_t rank_size;
       auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
       MS_EXCEPTION_IF_NULL(primitive);
-      if (primitive->GetAttr("rank_size") != nullptr) {
-        rank_size = GetValue<int64_t>(primitive->GetAttr("rank_size"));
+      if (primitive->GetAttr(kAttrRankSize) != nullptr) {
+        rank_size = GetValue<int64_t>(primitive->GetAttr(kAttrRankSize));
       } else {
         MS_LOG(ERROR) << "Get rank size failed";
         return false;
@@ -181,11 +184,11 @@ bool HcomUtil::GetHcomOperationType(const AnfNodePtr &anf_node, HcclReduceOp *op
   MS_EXCEPTION_IF_NULL(op_type);
   auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
   MS_EXCEPTION_IF_NULL(primitive);
-  if (primitive->GetAttr("op") == nullptr) {
+  if (primitive->GetAttr(kAttrOp) == nullptr) {
     MS_LOG(ERROR) << "Get HCOM_ATTR_REDUCE_TYPE fail, not support!";
     return false;
   }
-  auto hcom_op_type = GetValue<std::string>(primitive->GetAttr("op"));
+  auto hcom_op_type = GetValue<std::string>(primitive->GetAttr(kAttrOp));
   if (hcom_op_type == "min") {
     *op_type = HCCL_REDUCE_MIN;
   } else if (hcom_op_type == "max") {
@@ -206,8 +209,8 @@ bool HcomUtil::GetHcomRootId(const AnfNodePtr &anf_node, uint32_t *root_id) {
   MS_EXCEPTION_IF_NULL(root_id);
   auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
   MS_EXCEPTION_IF_NULL(primitive);
-  if (primitive->GetAttr("root_rank") != nullptr) {
-    *root_id = (uint32_t)GetValue<int64_t>(primitive->GetAttr("root_rank"));
+  if (primitive->GetAttr(kAttrRootRank) != nullptr) {
+    *root_id = (uint32_t)GetValue<int64_t>(primitive->GetAttr(kAttrRootRank));
   } else {
     MS_LOG(ERROR) << "HcomUtil::Get HCOM_ATTR_ROOT_INDEX fail, not support!";
     return false;
@@ -215,6 +218,34 @@ bool HcomUtil::GetHcomRootId(const AnfNodePtr &anf_node, uint32_t *root_id) {
   return true;
 }
 
+bool HcomUtil::GetHcomSrcRank(const AnfNodePtr &anf_node, uint32_t *src_rank) {
+  MS_EXCEPTION_IF_NULL(anf_node);
+  MS_EXCEPTION_IF_NULL(src_rank);
+  auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
+  MS_EXCEPTION_IF_NULL(primitive);
+  if (primitive->GetAttr("src_rank") != nullptr) {
+    *src_rank = static_cast<uint32_t>(GetValue<int64_t>(primitive->GetAttr("src_rank")));
+  } else {
+    MS_LOG(ERROR) << "HcomUtil::Get HCOM_ATTR_SRC_RANK fail, not support!";
+    return false;
+  }
+  return true;
+}
+
+bool HcomUtil::GetHcomDestRank(const AnfNodePtr &anf_node, uint32_t *dest_rank) {
+  MS_EXCEPTION_IF_NULL(anf_node);
+  MS_EXCEPTION_IF_NULL(dest_rank);
+  auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
+  MS_EXCEPTION_IF_NULL(primitive);
+  if (primitive->GetAttr("dest_rank") != nullptr) {
+    *dest_rank = static_cast<uint32_t>(GetValue<int64_t>(primitive->GetAttr("dest_rank")));
+  } else {
+    MS_LOG(ERROR) << "HcomUtil::Get HCOM_ATTR_DEST_RANK fail, not support!";
+    return false;
+  }
+  return true;
+}
+
 bool HcomUtil::GetHcomReceiveType(const AnfNodePtr &anf_node, TypeId *receive_type) {
   MS_EXCEPTION_IF_NULL(anf_node);
   MS_EXCEPTION_IF_NULL(receive_type);
@@ -232,7 +263,7 @@ bool HcomUtil::GetHcomReceiveType(const AnfNodePtr &anf_node, TypeId *receive_ty
 void HcomUtil::GetHcomGroup(NotNull<const AnfNodePtr &> anf_node, NotNull<std::string *> group) {
   auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
   MS_EXCEPTION_IF_NULL(primitive);
-  auto attr = primitive->GetAttr("group");
+  auto attr = primitive->GetAttr(kAttrGroup);
   if (attr != nullptr) {
     *group = GetValue<std::string>(attr);
   } else {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.h
index 13427e852b7..c08c6762386 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.h
@@ -40,7 +40,7 @@ constexpr auto kReduceScatter = "ReduceScatter";
 constexpr auto kAllToAllv = "AllToAllv";
 
 /* Correspondence between data_type and hcom data type in Ascend */
-static map<int64_t, HcclDataType> CONST_OP_HCOM_DATA_TYPE_MAP = {
+static map<int64_t, HcclDataType> kConstOpHcomDataTypeMap = {
   {TypeId::kNumberTypeFloat32, HCCL_DATA_TYPE_FP32},
   {TypeId::kNumberTypeFloat16, HCCL_DATA_TYPE_FP16},
   {TypeId::kNumberTypeInt8, HCCL_DATA_TYPE_INT8},
@@ -48,7 +48,7 @@ static map<int64_t, HcclDataType> CONST_OP_HCOM_DATA_TYPE_MAP = {
 };
 
 /* Correspondence between data_type and occupied byte size in hcom */
-static map<HcclDataType, uint32_t> CONST_OP_HCOM_DATA_TYPE_SIZE_MAP = {
+static map<HcclDataType, uint32_t> kConstOpHcomDataTypeSizeMap = {
   {HCCL_DATA_TYPE_FP32, sizeof(float)},
   {HCCL_DATA_TYPE_FP16, sizeof(float) / 2},
   {HCCL_DATA_TYPE_INT8, sizeof(int8_t)},
@@ -66,6 +66,8 @@ class HcomUtil {
                            const vector<vector<size_t>> &shape_list, uint64_t *total_count);
   static bool GetHcomOperationType(const AnfNodePtr &anf_node, HcclReduceOp *op_type);
   static bool GetHcomRootId(const AnfNodePtr &anf_node, uint32_t *root_id);
+  static bool GetHcomSrcRank(const AnfNodePtr &anf_node, uint32_t *src_rank);
+  static bool GetHcomDestRank(const AnfNodePtr &anf_node, uint32_t *dest_rank);
   static void GetHcomGroup(NotNull<const AnfNodePtr &> anf_node, NotNull<std::string *> group);
   static bool GetHcomReceiveType(const AnfNodePtr &anf_node, TypeId *receive_type);
 };
diff --git a/mindspore/ccsrc/backend/kernel_compiler/kash/kernel_pack.cc b/mindspore/ccsrc/backend/kernel_compiler/kash/kernel_pack.cc
index 9651eea3e69..88442c17511 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/kash/kernel_pack.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/kash/kernel_pack.cc
@@ -118,6 +118,16 @@ bool KernelPack::ReadFromJsonFile(const std::string &json_f, const std::string &
     if (!CheckHash(json_f, bin_f, js)) {
       return false;
     }
+
+    // cuda json file may have workspace information
+    if (js.find("workspace") != js.end()) {
+      auto workspace = js.at("workspace");
+      std::vector<size_t> sizes = workspace.at("size");
+      for (auto size : sizes) {
+        kernel_json_info_.workspaces.push_back(size);
+      }
+    }
+
     return true;
   }
 
diff --git a/mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc b/mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc
index 533d8660685..dee08117266 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc
@@ -112,6 +112,12 @@ void KernelQuery(const CNodePtr &kernel_node, std::vector<std::shared_ptr<kernel
   if (IsPrimitiveCNode(kernel_node, kPrimProdForceSeA)) {
     kernel_type = KernelType::AKG_KERNEL;
   }
+
+  const PrimitivePtr kPrimLoadIm2Col = std::make_shared<Primitive>("LoadIm2Col");
+  if (IsPrimitiveCNode(kernel_node, kPrimLoadIm2Col)) {
+    kernel_type = KernelType::AKG_KERNEL;
+  }  // use LoadIm2Col only for THOR optimizer
+
   switch (kernel_type) {
     case KernelType::AKG_KERNEL:
       AkgMetadataInfo(kernel_node, kernel_info_list);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.cc
index 44902348473..27861c773c9 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.cc
@@ -372,7 +372,7 @@ std::shared_ptr<OpInfo> TbeDynamicShapeUtil::FindOp(const std::string &op_name,
 RangePair TbeDynamicShapeUtil::GetInputDynamicRange(const AnfNodePtr &anf_node, size_t index,
                                                     const std::string &def_format) {
   MS_EXCEPTION_IF_NULL(anf_node);
-  auto kernel_info = static_cast<device::KernelInfo *>(anf_node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(anf_node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto format =
     kernel_info->select_kernel_build_info() == nullptr ? def_format : AnfAlgo::GetInputFormat(anf_node, index);
@@ -396,7 +396,7 @@ RangePair TbeDynamicShapeUtil::GetInputDynamicRange(const AnfNodePtr &anf_node,
 RangePair TbeDynamicShapeUtil::GetOutputDynamicRange(const AnfNodePtr &anf_node, size_t index,
                                                      const std::string &def_format) {
   MS_EXCEPTION_IF_NULL(anf_node);
-  auto kernel_info = static_cast<device::KernelInfo *>(anf_node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(anf_node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto format =
     kernel_info->select_kernel_build_info() == nullptr ? def_format : AnfAlgo::GetOutputFormat(anf_node, index);
diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.cc
index 6d230e078b8..f194b8f2a81 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.cc
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.cc
@@ -192,7 +192,7 @@ bool TbeJsonCreator::GenComputeJson(const AnfNodePtr &anf_node, nlohmann::json *
 
 void TbeJsonCreator::GenFusionOpName(nlohmann::json *kernel_json, std::string prefix) {
   json_name_.clear();
-  size_t hash_id = GenJsonHash((*kernel_json));
+  json_hash_ = GenJsonHash((*kernel_json));
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
   json_name_ = std::move(prefix);
@@ -203,7 +203,7 @@ void TbeJsonCreator::GenFusionOpName(nlohmann::json *kernel_json, std::string pr
       json_name_.append("_");
     }
   }
-  json_name_ = json_name_ + std::to_string(hash_id) + "_" + std::to_string(device_id);
+  json_name_ = json_name_ + std::to_string(json_hash_) + "_" + std::to_string(device_id);
   MS_LOG(DEBUG) << "Generate Json name: " << json_name_;
   (*kernel_json)[kJFusionOpName] = json_name_;
 }
@@ -231,7 +231,7 @@ size_t TbeJsonCreator::GenJsonHash(nlohmann::json tbe_json) {
       DeleteDescName(&op.at(kJInputDesc));
     }
   }
-  return std::hash<std::string>()(tbe_json.dump());
+  return std::hash<std::string>()(op_lists.dump());
 }
 
 void TbeJsonCreator::AddOpNameForComputeNode(nlohmann::json *kernel_json) {
diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.h
index 83c3bfdc90f..e71838dfa0e 100644
--- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.h
+++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.h
@@ -48,6 +48,7 @@ class TbeJsonCreator {
   virtual bool GenJson(const AnfNodePtr &anf_node, nlohmann::json *kernel_json) { return false; }
   virtual bool GenJson(const FusionScopeInfo &fusion_scope_info, nlohmann::json *fusion_json) { return false; }
   std::string GetJsonName() { return json_name_; }
+  size_t GetJsonHash() { return json_hash_; }
 
  protected:
   bool GenComputeJson(const AnfNodePtr &anf_node, nlohmann::json *compute_json);
@@ -72,6 +73,7 @@ class TbeJsonCreator {
 
  private:
   std::string json_name_;
+  size_t json_hash_;
 };
 
 }  // namespace mindspore::kernel
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/enhancer/concat_outputs_for_all_gather.cc b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/concat_outputs_for_all_gather.cc
index 09aa7ea04f9..e7538b6fc04 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/enhancer/concat_outputs_for_all_gather.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/concat_outputs_for_all_gather.cc
@@ -33,7 +33,7 @@ OutputInfo GetNodeOutputInfo(const AnfNodePtr &node) {
   auto type_ptr = node->Type();
   auto shape_ptr = node->Shape();
   size_t output_num = AnfAlgo::GetOutputTensorNum(node);
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.cc b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.cc
index fd66dd7ecc3..07957ee3334 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.cc
@@ -27,7 +27,7 @@ namespace opt {
 namespace {
 // insert tensormove for some cnode even if not a Ref cnode
 const std::set<std::string> kNeedInsertTensorMoveOpSet = {kLambNextMVOpName, kLambNextMVWithDecayOpName,
-                                                          kLambUpdateWithLROpName};
+                                                          kLambUpdateWithLROpName, kGetNextOpName};
 
 bool IsParameterOrValueNode(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
@@ -41,15 +41,18 @@ bool IsParameterOrValueNode(const AnfNodePtr &node) {
 }
 
 // NodeUsersMap, for node B input i use node A, it will be one item in map with key: A, and value: (B, i)
-bool IsNodeOutPutUsedByOtherRealKernel(const AnfNodeIndexSet &node_users) {
+bool IsNodeOutPutUsedByOtherRealKernel(const AnfNodeIndexSet &node_users, const CNodePtr &known_user,
+                                       size_t known_index) {
   if (node_users.size() == 1) {
     MS_LOG(INFO) << "This node only used once, no need to insert tensormove node.";
     return false;
   }
   for (const auto &node_pair : node_users) {
-    auto node = node_pair.first;
-    if (AnfAlgo::IsRealKernel(node) && !AnfAlgo::IsCommunicationOp(node)) {
-      MS_LOG(INFO) << "This node only used other real kernel: " << node->fullname_with_scope();
+    auto &node = node_pair.first;
+    size_t idx = IntToSize(node_pair.second);
+    if (AnfAlgo::IsRealKernel(node) && !(known_user == node && known_index == idx)) {
+      MS_LOG(INFO) << "User " << node->DebugString() << " idx " << idx << " is real kernel and diff with known "
+                   << known_user->DebugString() << " idx " << known_index;
       return true;
     }
   }
@@ -58,11 +61,13 @@ bool IsNodeOutPutUsedByOtherRealKernel(const AnfNodeIndexSet &node_users) {
 }
 }  // namespace
 
-bool InsertTensorMoveForHcclOp::NeedInsertTensorMove(const FuncGraphPtr &graph, const AnfNodePtr &input,
-                                                     const CNodePtr &cur_node) const {
+bool InsertTensorMoveForHcclOp::NeedInsertTensorMove(const FuncGraphPtr &graph, const CNodePtr &cur_node,
+                                                     size_t input_idx) const {
   MS_EXCEPTION_IF_NULL(graph);
-  MS_EXCEPTION_IF_NULL(input);
   MS_EXCEPTION_IF_NULL(cur_node);
+  auto input = cur_node->input(input_idx);
+  MS_EXCEPTION_IF_NULL(input);
+
   if (IsPrimitiveCNode(cur_node, prim::kPrimReceive)) {
     return false;
   }
@@ -81,9 +86,10 @@ bool InsertTensorMoveForHcclOp::NeedInsertTensorMove(const FuncGraphPtr &graph,
     if (kernel_query_->IsTbeRef(input)) {
       return true;
     }
-
+    auto kernel_with_index = AnfAlgo::VisitKernelWithReturnType(input, 0, true);
+    auto real_node = kernel_with_index.first;
     // when input is some special cnodes
-    if (kNeedInsertTensorMoveOpSet.find(AnfAlgo::GetCNodeName(input)) != kNeedInsertTensorMoveOpSet.end()) {
+    if (kNeedInsertTensorMoveOpSet.find(AnfAlgo::GetCNodeName(real_node)) != kNeedInsertTensorMoveOpSet.end()) {
       return true;
     }
 
@@ -93,7 +99,7 @@ bool InsertTensorMoveForHcclOp::NeedInsertTensorMove(const FuncGraphPtr &graph,
       MS_LOG(EXCEPTION) << "node has no output in manager"
                         << " trace: " << trace::DumpSourceLines(input);
     }
-    if (IsNodeOutPutUsedByOtherRealKernel(iter->second)) {
+    if (IsNodeOutPutUsedByOtherRealKernel(iter->second, cur_node, input_idx)) {
       return true;
     }
   }
@@ -107,7 +113,7 @@ void InsertTensorMoveForHcclOp::InsertTensorMove(const FuncGraphPtr &graph, cons
   std::vector<AnfNodePtr> new_inputs = {hccl_node->input(0)};
   for (size_t i = 1; i < hccl_node->size(); ++i) {
     auto input = hccl_node->input(i);
-    if (NeedInsertTensorMove(graph, input, hccl_node)) {
+    if (NeedInsertTensorMove(graph, hccl_node, i)) {
       auto tensor_move = CreateTensorMoveOp(graph, input);
       if (tensor_move == nullptr) {
         MS_LOG(EXCEPTION) << "Create tensor_move op failed.";
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.h b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.h
index 98856375c24..7f3c146b339 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.h
+++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.h
@@ -32,7 +32,7 @@ class InsertTensorMoveForHcclOp : public PatternProcessPass {
 
  private:
   void InsertTensorMove(const FuncGraphPtr &graph, const CNodePtr &hccl_node) const;
-  bool NeedInsertTensorMove(const FuncGraphPtr &graph, const AnfNodePtr &input, const CNodePtr &cur_node) const;
+  bool NeedInsertTensorMove(const FuncGraphPtr &graph, const CNodePtr &cur_node, size_t input_idx) const;
   KernelQueryPtr kernel_query_;
 };
 }  // namespace opt
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/format_type/change_axis_of_reduce_kernel.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/change_axis_of_reduce_kernel.cc
index 30d899f3916..8e240b308fd 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/format_type/change_axis_of_reduce_kernel.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/change_axis_of_reduce_kernel.cc
@@ -53,6 +53,15 @@ void SafeCheckFunction(const CNodePtr &cnode, const std::vector<int64_t> &reduce
   }
 }
 
+void DynamicAttrUpdate(const AnfNodePtr &node) {
+  MS_EXCEPTION_IF_NULL(node);
+  auto primitive = AnfAlgo::GetCNodePrimitive(node);
+  MS_EXCEPTION_IF_NULL(primitive);
+  auto axis_attr = primitive->GetAttr(kAttrAxis);
+  AnfAlgo::SetNodeAttr(kAttrAxes, axis_attr, node);
+  AnfAlgo::EraseNodeAttr(kAttrAxis, node);
+}
+
 void ConvertReduceAttrFraczAnd6HD(const CNodePtr &cnode) {
   auto axis = kernel::GetReduceAttrAxis(cnode);
   std::vector<int64_t> convert_axis;
@@ -95,9 +104,15 @@ const AnfNodePtr ChangeAxisOfReduceKernel::Process(const FuncGraphPtr &, const A
   }
   auto convert_map = kReduceConvertMap.find(AnfAlgo::GetInputFormat(node, 0));
   if (convert_map == kReduceConvertMap.end()) {
+    if (AnfAlgo::IsDynamicShape(node)) {
+      DynamicAttrUpdate(node);
+    }
     return nullptr;
   }
   convert_map->second(node->cast<CNodePtr>());
+  if (AnfAlgo::IsDynamicShape(node)) {
+    DynamicAttrUpdate(node);
+  }
   return nullptr;
 }
 }  // namespace opt
diff --git a/mindspore/ccsrc/backend/optimizer/ascend/mindir/all_to_all_unify_mindir.cc b/mindspore/ccsrc/backend/optimizer/ascend/mindir/all_to_all_unify_mindir.cc
index 08cf18be42c..6b2d57b9a18 100644
--- a/mindspore/ccsrc/backend/optimizer/ascend/mindir/all_to_all_unify_mindir.cc
+++ b/mindspore/ccsrc/backend/optimizer/ascend/mindir/all_to_all_unify_mindir.cc
@@ -69,7 +69,7 @@ CNodePtr CreateSplitNode(const FuncGraphPtr &graph, const CNodePtr &all_to_all)
   if (SizeToLong(shape.size()) <= split_dim) {
     MS_LOG(EXCEPTION) << "Invalid split dim " << split_dim << " is over the shape size " << shape.size();
   }
-  if (shape[LongToSize(split_dim)] % split_count != 0) {
+  if (split_count == 0 || shape[LongToSize(split_dim)] % split_count != 0) {
     MS_LOG(EXCEPTION) << "Invalid split count " << split_count << " cannot be divisible by shape[" << split_dim
                       << "] = " << shape[LongToSize(split_dim)];
   }
diff --git a/mindspore/ccsrc/backend/optimizer/common/helper.cc b/mindspore/ccsrc/backend/optimizer/common/helper.cc
index f39ed59a594..a59499da83d 100644
--- a/mindspore/ccsrc/backend/optimizer/common/helper.cc
+++ b/mindspore/ccsrc/backend/optimizer/common/helper.cc
@@ -484,6 +484,7 @@ bool IsNotRealUsedByOthers(const FuncGraphPtr &graph, const AnfNodePtr &node) {
 }
 
 CNodePtr CreatTupleGetItemNode(const FuncGraphPtr &func_graph, const AnfNodePtr &node, size_t output_idx) {
+  MS_EXCEPTION_IF_NULL(func_graph);
   auto idx = NewValueNode(SizeToLong(output_idx));
   MS_EXCEPTION_IF_NULL(idx);
   auto imm = std::make_shared<Int64Imm>(SizeToLong(output_idx));
@@ -713,8 +714,17 @@ AbstractBasePtrList RectifyAbstractFromRegAttr(const PrimitivePtr &primitive,
   if (!opt::ConstInputToAttrInfoRegistry::Instance().GetRegisterByOpName(primitive->name(), &reg)) {
     return input_abstract;
   }
-  if (AnfAlgo::HasDynamicShapeFlag(primitive) ||
-      DynamicShapeConstInputToAttr.find(primitive->name()) != DynamicShapeConstInputToAttr.end()) {
+  if (AnfAlgo::HasDynamicShapeFlag(primitive)) {
+    return input_abstract;
+  }
+  auto ms_context = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(ms_context);
+  auto device = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
+  if (device == kGPUDevice) {
+    if (DynamicShapeConstInputToAttrGPU.find(primitive->name()) != DynamicShapeConstInputToAttrGPU.end()) {
+      return input_abstract;
+    }
+  } else if (DynamicShapeConstInputToAttr.find(primitive->name()) != DynamicShapeConstInputToAttr.end()) {
     return input_abstract;
   }
   auto convert_input_list = reg.GetConstInputAttrInfo();
diff --git a/mindspore/ccsrc/backend/optimizer/cpu/insert_cast_cpu.cc b/mindspore/ccsrc/backend/optimizer/cpu/insert_cast_cpu.cc
index 7859345fbb7..0ed7c6ca663 100644
--- a/mindspore/ccsrc/backend/optimizer/cpu/insert_cast_cpu.cc
+++ b/mindspore/ccsrc/backend/optimizer/cpu/insert_cast_cpu.cc
@@ -20,6 +20,7 @@
 #include <string>
 #include <vector>
 #include <utility>
+#include "backend/optimizer/common/helper.h"
 #include "backend/kernel_compiler/kernel_build_info.h"
 #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
 #include "backend/session/anf_runtime_algorithm.h"
@@ -89,6 +90,34 @@ void InsertCast(const FuncGraphPtr &func_graph, const CNodePtr &cnode) {
     }
   }
 }
+
+void InsertCastForGraphOutput(const FuncGraphPtr &func_graph, const CNodePtr &cnode, const AnfNodePtr &func_output) {
+  MS_EXCEPTION_IF_NULL(cnode);
+  size_t output_num = AnfAlgo::GetOutputTensorNum(cnode);
+  for (size_t i = 0; i < output_num; i++) {
+    auto infer_type = AnfAlgo::GetOutputInferDataType(cnode, i);
+    auto device_type = AnfAlgo::GetOutputDeviceDataType(cnode, i);
+    const std::string dev_fmt = AnfAlgo::GetOutputFormat(cnode, i);
+    if (infer_type != device_type) {
+      auto used_node_list = GetRealNodeUsedListByOutputIdx(func_graph, cnode, i);
+      for (size_t j = 0; j < used_node_list->size(); j++) {
+        auto used_node = used_node_list->at(j).first;
+        if (used_node != func_output) {
+          continue;
+        }
+        auto used_node_index = static_cast<size_t>(used_node_list->at(j).second - 1);
+        auto cur_input = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(used_node), used_node_index);
+        const std::vector<size_t> origin_shape =
+          AnfAlgo::GetPrevNodeOutputInferShape(utils::cast<CNodePtr>(used_node), i);
+        auto cast =
+          AddCastOpNodeToGraph(func_graph, cur_input, dev_fmt, device_type, infer_type, origin_shape, infer_type);
+        MS_EXCEPTION_IF_NULL(cast);
+        cast->set_scope(used_node->scope());
+        utils::cast<CNodePtr>(used_node)->set_input(used_node_index + 1, cast);
+      }
+    }
+  }
+}
 }  // namespace
 
 bool InsertCastCPU::Run(const FuncGraphPtr &func_graph) {
@@ -100,6 +129,15 @@ bool InsertCastCPU::Run(const FuncGraphPtr &func_graph) {
       InsertCast(func_graph, cnode);
     }
   }
+  AnfNodePtrList outputs;
+  kernel::GetFuncGraphOutputNodes(func_graph, &outputs);
+  auto func_output = func_graph->output();
+  for (auto node : outputs) {
+    if (node != nullptr && node->isa<CNode>() && AnfAlgo::IsRealKernel(node)) {
+      auto cnode = node->cast<CNodePtr>();
+      InsertCastForGraphOutput(func_graph, cnode, func_output);
+    }
+  }
   return true;
 }
 }  // namespace opt
diff --git a/mindspore/ccsrc/backend/optimizer/gpu/concat_outputs_for_all_gather.cc b/mindspore/ccsrc/backend/optimizer/gpu/concat_outputs_for_all_gather.cc
index 0f105b9090b..f7ea32119aa 100644
--- a/mindspore/ccsrc/backend/optimizer/gpu/concat_outputs_for_all_gather.cc
+++ b/mindspore/ccsrc/backend/optimizer/gpu/concat_outputs_for_all_gather.cc
@@ -33,7 +33,7 @@ OutputInfo GetNodeOutputInfo(const AnfNodePtr &node) {
   auto type_ptr = node->Type();
   auto shape_ptr = node->Shape();
   size_t output_num = AnfAlgo::GetOutputTensorNum(node);
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
diff --git a/mindspore/ccsrc/backend/optimizer/gpu/insert_format_transform_op.cc b/mindspore/ccsrc/backend/optimizer/gpu/insert_format_transform_op.cc
index 95733a9b4f6..0aff8d507b9 100644
--- a/mindspore/ccsrc/backend/optimizer/gpu/insert_format_transform_op.cc
+++ b/mindspore/ccsrc/backend/optimizer/gpu/insert_format_transform_op.cc
@@ -100,9 +100,13 @@ CNodePtr InsertTransposeOp(const FuncGraphPtr &graph, const AnfNodePtr &node, co
   MS_EXCEPTION_IF_NULL(transpose_op);
   // 3.Set the output info of transpose.
   auto transpose_type = {AnfAlgo::GetPrevNodeOutputInferDataType(used_node, used_node_index)};
-  auto transpose_shape = {AnfAlgo::GetPrevNodeOutputInferShape(used_node, used_node_index)};
-  AnfAlgo::SetOutputInferTypeAndShape(transpose_type, transpose_shape, transpose_op.get());
-  if (!is_fake) {
+  auto transpose_shape = AnfAlgo::GetPrevNodeOutputInferShape(used_node, used_node_index);
+  AnfAlgo::SetOutputInferTypeAndShape(transpose_type, {transpose_shape}, transpose_op.get());
+  if (is_fake) {
+    std::vector<int64_t> shape;
+    std::transform(transpose_shape.begin(), transpose_shape.end(), std::back_inserter(shape), SizeToLong);
+    AnfAlgo::SetNodeAttr("shape", MakeValue(shape), transpose_op);
+  } else {
     AnfAlgo::SetNodeAttr(kAttrPerm, MakeValue(transpose_perm), transpose_op);
   }
   // 4. Set the new edge of transpose op.
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/add_atomic_clean.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/add_atomic_clean.cc
index 8d99cbb24b6..2cbe882d595 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/add_atomic_clean.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/add_atomic_clean.cc
@@ -230,7 +230,7 @@ bool AtomicAddCheckerAscend::SuitableForAtomicAdd(const AnfNodePtr &node) {
 
 void AtomicCleanInsertter::CorrectKernelBuildInfo(const AnfNodePtr &composite_node, const AnfNodePtr &new_input) {
   // Change kernel build info.
-  auto kernel_info = static_cast<device::KernelInfo *>(composite_node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(composite_node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   const auto &origin_kernel_build_info = kernel_info->GetMutableSelectKernelBuildInfo();
   auto origin_inputs_format = origin_kernel_build_info->GetAllInputFormats();
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/add_stitch_atomic_clean_gpu.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/add_stitch_atomic_clean_gpu.cc
index 740e97f0b45..ab181401fdb 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/add_stitch_atomic_clean_gpu.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/add_stitch_atomic_clean_gpu.cc
@@ -41,7 +41,7 @@ namespace mindspore {
 namespace opt {
 void StitchAtomicCleanInsertter::CorrectKernelBuildInfo(const AnfNodePtr &composite_node, const AnfNodePtr &new_input) {
   // Change kernel build info.
-  auto kernel_info = static_cast<device::KernelInfo *>(composite_node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(composite_node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   const auto &origin_kernel_build_info = kernel_info->GetMutableSelectKernelBuildInfo();
   auto origin_inputs_format = origin_kernel_build_info->GetAllInputFormats();
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/arithmetic_simplify.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/arithmetic_simplify.cc
index e4c2e59ec2f..4ea6b813056 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/arithmetic_simplify.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/arithmetic_simplify.cc
@@ -643,29 +643,33 @@ bool ArithmeticSimplify::Run(const FuncGraphPtr &func_graph) {
   expressions_map_ = GetExpressions();
   for (auto node : func_graph->GetOrderedCnodes()) {
     if (AnfAlgo::IsGraphKernel(node)) {
-      auto sub_graph = AnfAlgo::GetCNodeFuncGraphPtr(node);
-      graphkernel::LiteGraphPtr lg = AnfGraph2LiteGraph(sub_graph);
-      bool find_pattern = true;
-      bool change_anf_graph = false;
-      while (find_pattern) {
-        find_pattern = false;
-        find_pattern = DoArithmeticTrans(lg) || find_pattern;
-        find_pattern = DoConstantFold(lg) || find_pattern;
-        change_anf_graph = change_anf_graph || find_pattern;
+      try {
+        auto sub_graph = AnfAlgo::GetCNodeFuncGraphPtr(node);
+        graphkernel::LiteGraphPtr lg = AnfGraph2LiteGraph(sub_graph);
+        bool find_pattern = true;
+        bool change_anf_graph = false;
+        while (find_pattern) {
+          find_pattern = false;
+          find_pattern = DoArithmeticTrans(lg) || find_pattern;
+          find_pattern = DoConstantFold(lg) || find_pattern;
+          change_anf_graph = change_anf_graph || find_pattern;
+        }
+        if (!change_anf_graph) continue;
+        ReorganizeEmptyGraph(lg);
+        AnfNodePtrList outputs;
+        auto new_funcgraph = LiteGraph2AnfGraph(lg, &outputs);
+        new_funcgraph->set_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL, sub_graph->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL));
+        auto cnode = node->cast<CNodePtr>();
+        AnfNodePtrList inputs(cnode->inputs().begin() + 1, cnode->inputs().end());
+        EliminateRedundantParameters(new_funcgraph, &inputs);
+        auto new_node = CreateNewFuseCNode(func_graph, new_funcgraph, inputs, outputs);
+        SetNewKernelInfo(new_node, new_funcgraph, inputs, outputs);
+        mng->Replace(node, new_node);
+        mng->AddFuncGraph(new_funcgraph);
+        do_simplify = true;
+      } catch (const graphkernel::GKException &e) {
+        MS_LOG(WARNING) << e.what() << ", so we undo airthmetic simplify for this graph";
       }
-      if (!change_anf_graph) continue;
-      ReorganizeEmptyGraph(lg);
-      AnfNodePtrList outputs;
-      auto new_funcgraph = LiteGraph2AnfGraph(lg, &outputs);
-      new_funcgraph->set_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL, sub_graph->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL));
-      auto cnode = node->cast<CNodePtr>();
-      AnfNodePtrList inputs(cnode->inputs().begin() + 1, cnode->inputs().end());
-      EliminateRedundantParameters(new_funcgraph, &inputs);
-      auto new_node = CreateNewFuseCNode(func_graph, new_funcgraph, inputs, outputs);
-      SetNewKernelInfo(new_node, new_funcgraph, inputs, outputs);
-      mng->Replace(node, new_node);
-      mng->AddFuncGraph(new_funcgraph);
-      do_simplify = true;
     }
   }
   return do_simplify;
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cse.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cse.cc
index 01c004a06f8..12ffe5ee75c 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cse.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cse.cc
@@ -78,8 +78,8 @@ bool GraphKernelBackendCSE::CheckEqualKernelBuildInfo(const AnfNodePtr &main, co
     return BackendCSE::CheckEqualKernelBuildInfo(main, node);
   }
 
-  auto main_kernel_info = static_cast<device::KernelInfo *>(main->kernel_info());
-  auto node_kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto main_kernel_info = dynamic_cast<device::KernelInfo *>(main->kernel_info());
+  auto node_kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   if (main_kernel_info == nullptr && node_kernel_info == nullptr) {
     return true;
   }
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc
index bf25889d1e5..d5a16a15b51 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc
@@ -613,7 +613,7 @@ void ResetKernelInfo(const AnfNodePtr &node, KernelType kernel_type) {
 }
 
 std::string GetFormat(const AnfNodePtr &node) {
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto kernel_build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(kernel_build_info);
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_optimization.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_optimization.cc
index 9ace0cb9a6b..30e160ee01d 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_optimization.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_optimization.cc
@@ -42,8 +42,11 @@
 #include "backend/optimizer/graph_kernel/reorder_ops.h"
 #include "backend/optimizer/graph_kernel/update_state_formatter.h"
 #include "backend/optimizer/graph_kernel/axis_normalizer.h"
+#include "backend/optimizer/graph_kernel/decrease_compute_precision.h"
+#include "backend/optimizer/graph_kernel/decrease_transfer_precision.h"
 #include "backend/optimizer/pass/getitem_tuple.h"
 #include "backend/optimizer/graph_kernel/graph_kernel_pass_manager.h"
+#include "backend/optimizer/graph_kernel/rewrite_output_shape.h"
 
 namespace mindspore {
 namespace opt {
@@ -60,6 +63,9 @@ PassManagerPtr GraphKernelOptimizer::PreProcess() const {
   // Do cse before all passes of graphkernel
   pm->AddPass(std::make_shared<CommonSubexpressionElimination>("cse1"), OptLevel_1);
 
+  // Save the original output info
+  pm->AddPass(std::make_shared<SaveOutputShape>(), OptLevel_1);
+
   // Change Assign(p, a, U) to Assign(Depend(p, U), a)
   pm->AddPass(std::make_shared<SplitAssign>(), OptLevel_1, is_gpu);
 
@@ -152,6 +158,10 @@ PassManagerPtr GraphKernelOptimizer::HighLevelOpt2() const {
   auto level = GetPassLevelByFlag(context::GraphKernelFlags::GetInstance().enable_stitch_fusion);
   pm->AddPass(std::make_shared<StitchAtomicCleanInsertter>(), level, is_gpu);
 
+  // Enable low precision
+  auto level_low_precision = GetPassLevelByFlag(context::GraphKernelFlags::GetInstance().enable_low_precision);
+  pm->AddPass(std::make_shared<DecreaseTransferPrecision>(), level_low_precision);
+  pm->AddPass(std::make_shared<DecreaseComputePrecision>(), level_low_precision, is_ascend);
   return pm;
 }
 
@@ -166,11 +176,15 @@ PassManagerPtr GraphKernelOptimizer::Combine() const {
 
 PassManagerPtr GraphKernelOptimizer::PostProcess() const {
   auto pm = std::make_shared<GraphKernelPassManager>(6, "postprocess");
-  // Add the new tensors to the kernel_graph
-  pm->AddPass(std::make_shared<BindValueToGraph>(), OptLevel_1);
-
   // Make Tuple for the inputs of UpdateState. (the reverse of SpreadUpdateState)
   pm->AddPass(std::make_shared<ShrinkUpdateState>(), OptLevel_1);
+
+  // Recover the original output info
+  pm->AddPass(std::make_shared<GetitemTuple>(), OptLevel_1);
+  pm->AddPass(std::make_shared<RewriteOutputShape>(), OptLevel_1);
+
+  // Add the new tensors to the kernel_graph
+  pm->AddPass(std::make_shared<BindValueToGraph>(), OptLevel_1);
   return pm;
 }
 
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_splitter.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_splitter.cc
index eacbff0907d..f8917fa0092 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_splitter.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_splitter.cc
@@ -425,7 +425,12 @@ class AreaGraph {
         AnfNodePtrList getitem_inputs = {NewValueNode(prim::kPrimTupleGetItem), main_cnodes[input_area], idx};
         TraceGuard g_sub(std::make_shared<TraceOpt>(main_cnodes[input_area]->debug_info()));
         auto getitem_node = main_func_graph->NewCNode(getitem_inputs);
-        getitem_node->set_abstract(main_cnodes[input_area]->abstract());
+        auto abs_tuple = dyn_cast<abstract::AbstractTuple>(main_cnodes[input_area]->abstract());
+        if (idx_val < SizeToLong(abs_tuple->size())) {
+          getitem_node->set_abstract(abs_tuple->elements()[idx_val]);
+        } else {
+          getitem_node->set_abstract(main_cnodes[input_area]->abstract());
+        }
         main_cnode_inputs.emplace_back(getitem_node);
       } else {
         main_cnode_inputs.emplace_back(main_cnodes[input_area]);
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.cc
index 6a0f9168c03..d113064a337 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.cc
@@ -27,6 +27,7 @@
 
 #include "backend/optimizer/graph_kernel/model/node.h"
 #include "backend/optimizer/graph_kernel/model/op_node.h"
+#include "backend/optimizer/graph_kernel/model/op_register.h"
 
 namespace mindspore {
 namespace opt {
@@ -107,36 +108,15 @@ NodePtr LiteGraph::GraphBuilder::Emit(const std::string &op, const NodePtrList &
 
 NodePtr LiteGraph::GraphBuilder::Op(const std::string &op, const NodeBase &baseinfo, const NodePtrList &inputs,
                                     const DAttrs &attrs, std::string node_name) {
-  auto op_ptr = Emit(op, inputs, attrs, node_name);
+  PrimOpPtr op_ptr = CreateOp(op, node_name);
+  op_ptr->SetInputs(inputs);
+  op_ptr->SetAttrs(attrs);
   op_ptr->SetBaseInfo(baseinfo);
-  return op_ptr;
+  return graph_->Add(op_ptr);
 }
 
 PrimOpPtr LiteGraph::GraphBuilder::CreateOp(const std::string &op, const std::string &node_name) {
-  static std::map<std::string, std::function<PrimOpPtr(const std::string &, const std::string &)>> creators;
-  if (creators.empty()) {
-    creators = {{"Add", Elemwise},
-                {"Sub", Elemwise},
-                {"RealDiv", Elemwise},
-                {"Mul", Elemwise},
-                {"Log", Elemwise},
-                {"Exp", Elemwise},
-                {"Pow", Elemwise},
-                {"Sqrt", Elemwise},
-                {"Rsqrt", Elemwise},
-                {"Neg", Elemwise},
-                {"Reciprocal", Elemwise},
-                {"Abs", Elemwise},
-                {"BroadcastTo", BroadcastTo},
-                {"Reshape", Reshape},
-                {"ReduceSum", Reduce},
-                {"ReduceMax", Reduce},
-                {"ReduceMin", Reduce},
-                {"Conv2D", Conv2d}};
-  }
-  auto iter = creators.find(op);
-  auto creator = (iter == creators.end() ? Opaque : iter->second);
-  return creator(op, node_name);
+  return OpRegistry::Instance().NewOp(op, node_name);
 }
 }  // namespace graphkernel
 }  // namespace opt
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.h b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.h
index 439a172fc58..fc1cb42475e 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.h
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.h
@@ -81,28 +81,6 @@ class LiteGraph::GraphBuilder {
   LiteGraphPtr Get() { return graph_; }
 
  private:
-  static PrimOpPtr Elemwise(const std::string &op, const std::string &name) {
-    return std::make_shared<ElemwiseOp>(op, name);
-  }
-
-  static PrimOpPtr BroadcastTo(const std::string &op, const std::string &name) {
-    return std::make_shared<BroadcastToOp>(op, name);
-  }
-
-  static PrimOpPtr Reshape(const std::string &op, const std::string &name) {
-    return std::make_shared<ReshapeOp>(op, name);
-  }
-
-  static PrimOpPtr Reduce(const std::string &op, const std::string &name) {
-    return std::make_shared<ReduceOp>(op, name);
-  }
-  static PrimOpPtr Opaque(const std::string &op, const std::string &name) {
-    return std::make_shared<OpaqueOp>(op, name);
-  }
-  static PrimOpPtr Conv2d(const std::string &op, const std::string &name) {
-    return std::make_shared<Conv2dOp>(op, name);
-  }
-
   PrimOpPtr CreateOp(const std::string &id, const std::string &name);
   std::string NewName(std::string prefix = "output_") { return prefix + std::to_string(graph_->name_id_++); }
 
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/node.h b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/node.h
index 50dd34fb5e4..7c34218f14e 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/node.h
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/node.h
@@ -26,6 +26,7 @@
 #include <iostream>
 #include <utility>
 #include <string>
+#include <stdexcept>
 
 #include "mindspore/core/ir/dtype/type_id.h"
 #include "mindspore/core/ir/value.h"
@@ -85,6 +86,8 @@ class Node : public NodeBase {
   void SetInput(size_t i, const NodePtr &new_input);
   void SetInputs(const NodePtrList &inputs);
   void ReplaceWith(const NodePtr &other_node);
+  void SetAttrs(const DAttrs &attrs) { attrs_ = attrs; }
+  void SetAttr(const std::string &key, const ValuePtr &value) { attrs_[key] = value; }
 
   template <typename T>
   T *As() {
@@ -146,6 +149,15 @@ class OutputNode : public Node {
   void Dump(std::ostringstream &os) const override { ; }
   NType NodeType() override { return NType::Output; }
 };
+
+class GKException : public std::exception {
+ public:
+  explicit GKException(const std::string &message) : msg_(message) {}
+  const char *what() const noexcept override { return msg_.c_str(); }
+
+ protected:
+  std::string msg_;
+};
 }  // namespace graphkernel
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.cc
index 3a03f3cf4b5..0d8a073c0a5 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.cc
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.cc
@@ -31,7 +31,58 @@
 namespace mindspore {
 namespace opt {
 namespace graphkernel {
+std::vector<int64_t> GetListInt(const ValuePtr &attr_value) {
+  bool is_int64 = true;
+  auto get_int_value = [&is_int64](const ValuePtr &value) -> int64_t {
+    if (value->isa<Int64Imm>()) {
+      return GetValue<int64_t>(value);
+    }
+    is_int64 = false;
+    return static_cast<int64_t>(GetValue<int>(value));
+  };
+  std::vector<int64_t> list_int;
+  const auto &vals = attr_value->cast<ValueSequeuePtr>()->value();
+  (void)std::transform(vals.begin(), vals.end(), std::back_inserter(list_int), get_int_value);
+  if (!is_int64) {
+    MS_LOG(WARNING) << "Vector type should be 'int64_t' but got 'int'";
+  }
+  return list_int;
+}
+
+void PrimOp::Check(const NodePtrList &inputs, const DAttrs &attrs) {
+  CheckShape(inputs, attrs);
+  CheckType(inputs, attrs);
+  CheckFormat(inputs, attrs);
+}
+
+// check all type to be identical
+void PrimOp::CheckType(const NodePtrList &inputs, const DAttrs &attrs) {
+  TypeId tid = inputs[0]->type;
+  for (size_t i = 1; i < inputs.size(); i++) {
+    if (inputs[i]->type != tid) {
+      MS_LOG(EXCEPTION) << "Incompatible dtype between input " << 0 << "and" << i;
+    }
+  }
+}
+
+// check all formats are compatible, only DefaultForant is compatible with others
+void PrimOp::CheckFormat(const NodePtrList &inputs, const DAttrs &attrs) {
+  DFormat res = inputs[0]->format;
+  size_t i = 0;
+  for (size_t j = 1; j < inputs.size(); j++) {
+    if (inputs[j]->format != res) {
+      if (inputs[j]->format != kOpFormat_DEFAULT && res != kOpFormat_DEFAULT) {
+        MS_LOG(EXCEPTION) << "Incompatible format between input " << i << "and" << (j + 1);
+      }
+      if (res == kOpFormat_DEFAULT) {
+        res = inputs[j]->format;
+        i = j + 1;
+      }
+    }
+  }
+}
 void PrimOp::Infer(const NodePtrList &inputs, const DAttrs &attrs) {
+  Check(inputs, attrs);
   this->shape = InferShape(inputs, attrs);
   this->type = InferType(inputs, attrs);
   this->format = InferFormat(inputs, attrs);
@@ -146,6 +197,88 @@ NodePtr PrimOp::InferValue(const NodePtrList &inputs, const DAttrs &attrs, const
   return res == nullptr ? nullptr : std::make_shared<ConstTensorNode>(res);
 }
 
+// default format shape to fractal_Nz format shape
+DShape ToNz(const DShape &default_shape) {
+  if (default_shape.size() != 1 && default_shape.size() != 2) {
+    throw GKException("shape is too long");
+  }
+  DShape output_shape;
+  if (default_shape.size() == 1 || (default_shape.size() == 2 && default_shape[0] == 1)) {
+    output_shape = {default_shape[default_shape.size() - 1] / 16, 1, 1, 16};
+    if (default_shape[default_shape.size() - 1] % 16 != 0) {
+      throw GKException("should be multiplies of 16");
+    }
+
+  } else if (default_shape.size() == 2 || default_shape[1] == 1) {
+    output_shape = {1, default_shape[0] / 16, 16, 1};
+    if (default_shape[0] % 16 != 0) {
+      throw GKException("should be multiplies of 16");
+    }
+
+  } else {
+    output_shape = {default_shape[1] / 16, default_shape[0] / 16, 16, 16};
+    if (default_shape[0] % 16 != 0 || default_shape[1] % 16 != 0) {
+      throw GKException("should be multiplies of 16");
+    }
+  }
+  return output_shape;
+}
+
+DShape BroadcastShape(const NodePtrList &inputs, bool to_nz = false) {
+  std::vector<std::vector<int64_t>> shapes;
+  for (auto &input : inputs) {
+    if (to_nz && input->format != kOpFormat_FRAC_NZ) {
+      shapes.emplace_back(ToNz(input->shape));
+    } else {
+      shapes.emplace_back(input->shape);
+    }
+  }
+  auto max_dim_input =
+    std::max_element(shapes.begin(), shapes.end(),
+                     [](const std::vector<int64_t> &a, const std::vector<int64_t> &b) { return a.size() < b.size(); });
+  auto max_dim = max_dim_input->size();
+  std::vector<std::vector<int64_t>> align_shapes;
+  for (auto &s : shapes) {
+    std::vector<int64_t> cur(max_dim - s.size(), 1);
+    cur.insert(cur.end(), s.begin(), s.end());
+    align_shapes.emplace_back(cur);
+  }
+  std::vector<int64_t> output_shape(max_dim, 1);
+  for (size_t i = 0; i < max_dim; i++) {
+    for (auto &align_shape : align_shapes) {
+      if (align_shape[i] > 1) {
+        if (output_shape[i] == 1) {
+          output_shape[i] = align_shape[i];
+        }
+        if (output_shape[i] != align_shape[i]) {
+          throw GKException("shape broadcast failed");
+        }
+      }
+    }
+  }
+  return output_shape;
+}
+
+DShape ElemwiseOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) {
+  if (std::all_of(inputs.begin(), inputs.end(), [](const NodePtr &input) {
+        return input->format == kOpFormat_DEFAULT || input->format == kOpFormat_NHWC || input->format == kOpFormat_NCHW;
+      })) {
+    return BroadcastShape(inputs, false);
+  }
+  if (std::all_of(inputs.begin(), inputs.end(), [](const NodePtr &input) {
+        return input->format == kOpFormat_DEFAULT || input->format == kOpFormat_NHWC ||
+               input->format == kOpFormat_NCHW || input->format == kOpFormat_FRAC_NZ;
+      })) {
+    return BroadcastShape(inputs, true);
+  }
+  throw GKException("Only support default and fractal_nz");
+}
+
+DFormat ElemwiseOp::InferFormat(const NodePtrList &inputs, const DAttrs &attrs) {
+  auto it = std::find_if(inputs.begin(), inputs.end(), [](const NodePtr &i) { return i->format != kOpFormat_DEFAULT; });
+  return it == inputs.end() ? kOpFormat_DEFAULT : (*it)->format;
+}
+
 void ElemwiseOp::Infer(const NodePtrList &inputs, const DAttrs &attrs) {
   PrimOp::Infer(inputs, attrs);
   auto IsBroadcast = [this](const NodePtrList &inputs) -> bool {
@@ -160,26 +293,64 @@ void ElemwiseOp::Infer(const NodePtrList &inputs, const DAttrs &attrs) {
   compute_type_ = IsBroadcast(inputs) ? BROADCAST : ELEMWISE;
 }
 
-DShape BroadcastToOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) {
-  return GetValue<std::vector<int64_t>>(attrs.find("shape")->second);
+TypeId CastOp::InferType(const NodePtrList &inputs, const DAttrs &attrs) {
+  CHECK_ATTR(attrs, "dst_type");
+  auto dst_type = attrs.find("dst_type")->second;
+  if (dst_type->isa<Type>()) {
+    return dst_type->cast<TypePtr>()->type_id();
+  }
+  return kernel::DtypeToTypeId(GetValue<std::string>(dst_type));
+}
+
+void SelectOp::CheckType(const NodePtrList &inputs, const DAttrs &attrs) {
+  if (inputs[0]->type != TypeId::kNumberTypeBool) {
+    MS_LOG(EXCEPTION) << "Select's input[0] should be bool type";
+  }
+  if (inputs[1]->type != inputs[2]->type) {
+    MS_LOG(EXCEPTION) << "Select's input[1] and input[2]'s type doesn't match";
+  }
 }
 
 DShape ReshapeOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) {
-  auto new_shape = GetValue<std::vector<int64_t>>(attrs.find("shape")->second);
+  CHECK_ATTR(attrs, "shape");
+  auto new_shape = GetListInt(attrs.find("shape")->second);
   auto origin_shape = inputs[0]->shape;
+  auto origin_product = std::accumulate(origin_shape.begin(), origin_shape.end(), 1, std::multiplies<int64_t>());
+  auto new_product = std::accumulate(new_shape.begin(), new_shape.end(), 1, std::multiplies<int64_t>());
   for (size_t i = 0; i < new_shape.size(); i++) {
     if (new_shape[i] == -1) {
-      auto origin_product = std::accumulate(origin_shape.begin(), origin_shape.end(), 1, std::multiplies<int64_t>());
-      auto new_product = std::accumulate(new_shape.begin(), new_shape.end(), 1, std::multiplies<int64_t>());
       new_shape[i] = origin_product / new_product * (-1);
-      break;
+      return new_shape;
     }
   }
+  if (origin_product != new_product) {
+    MS_LOG(EXCEPTION) << "The shape product before and after reshaping should be equal";
+  }
   return new_shape;
 }
 
+DShape BroadcastToOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) {
+  CHECK_ATTR(attrs, "shape");
+  return GetListInt(attrs.find("shape")->second);
+}
+
+// check rudece axis in range [-size,size)
+void ReduceOp::Check(const NodePtrList &inputs, const DAttrs &attrs) {
+  PrimOp::Check(inputs, attrs);
+  CHECK_ATTR(attrs, "axis");
+  auto axis = GetListInt(attrs.find("axis")->second);
+  int64_t size = static_cast<int64_t>(inputs[0]->shape.size());
+  auto it = std::find_if(axis.begin(), axis.end(), [&size](const int64_t &i) { return (i >= size || i < (-size)); });
+  if (it != axis.end()) {
+    MS_LOG(EXCEPTION) << "reduce_axis should be in range [" << (-size) << "," << size << ")"
+                      << ",but got " << (*it);
+  }
+}
+
 DShape ReduceOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) {
-  auto axis = GetValue<std::vector<int64_t>>(attrs.find("axis")->second);
+  CHECK_ATTR(attrs, "axis");
+  CHECK_ATTR(attrs, "keep_dims");
+  auto axis = GetListInt(attrs.find("axis")->second);
   auto keepdims = GetValue<bool>(attrs.find("keep_dims")->second);
   if (keepdims) {
     DShape new_shape = inputs[0]->shape;
@@ -200,6 +371,171 @@ DShape ReduceOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) {
   }
   return new_shape;
 }
+
+void CheckNd(const std::vector<int64_t> &shape, size_t n) {
+  if (shape.size() != n) {
+    std::ostringstream info;
+    info << "input dimension should be " << n << ", but got  " << shape.size();
+    throw GKException(info.str());
+  }
+}
+
+DShape Conv2dOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) {
+  auto shape0 = inputs[0]->shape;
+  auto shape1 = inputs[1]->shape;
+  CheckNd(shape0, 4);
+  CheckNd(shape1, 4);
+  if (inputs[0]->format != kOpFormat_NHWC && inputs[1]->format != kOpFormat_NHWC &&
+      GetValue<std::string>(attrs.find("format")->second) != kOpFormat_NHWC) {
+    throw GKException("check NHWC format failed");
+  }
+  auto n = shape0[0];
+  auto h = shape0[1];
+  auto w = shape0[2];
+  auto out_channel = shape1[0];
+  CHECK_ATTR(attrs, "pad_list");
+  CHECK_ATTR(attrs, "pad_mode");
+  CHECK_ATTR(attrs, "kernel_size");
+  CHECK_ATTR(attrs, "stride");
+  CHECK_ATTR(attrs, "dilation");
+  auto pad_list = GetListInt(attrs.find("pad_list")->second);
+  auto pad_mode = GetValue<std::string>(attrs.find("pad_mode")->second);
+  auto kernel_size = GetListInt(attrs.find("kernel_size")->second);
+  auto stride = GetListInt(attrs.find("stride")->second);
+  auto dilation = GetListInt(attrs.find("dilation")->second);
+  CheckNd(pad_list, 4);
+  CheckNd(kernel_size, 2);
+  CheckNd(stride, 4);
+  CheckNd(dilation, 4);
+  bool has_pad = false;
+  if (pad_list[0] != pad_list[1] || pad_list[2] != pad_list[3]) {
+    has_pad = true;
+  } else {
+    if (pad_mode == "VALID" || pad_mode == "valid") {
+      if (std::any_of(pad_list.begin(), pad_list.end(), [](int i) { return i == 0; })) {
+        has_pad = true;
+      }
+    }
+  }
+  if (!has_pad) {
+    pad_list = {0, 0, 0, 0};
+  }
+  auto k_h = (kernel_size[0] - 1) * dilation[2] + 1;
+  auto k_w = (kernel_size[1] - 1) * dilation[3] + 1;
+  auto out_h = (h + pad_list[0] + pad_list[1] - k_h) / stride[2] + 1;
+  auto out_w = (w + pad_list[2] + pad_list[3] - k_w) / stride[3] + 1;
+  std::vector<int64_t> output = {n, out_h, out_w, out_channel};
+  return output;
+}
+
+TypeId Conv2dOp::InferType(const NodePtrList &inputs, const DAttrs &attrs) {
+  if (attrs.find("dst_type") == attrs.end()) return inputs[0]->type;
+  auto dst_type = attrs.find("dst_type")->second;
+  if (dst_type->isa<Type>()) {
+    return dst_type->cast<TypePtr>()->type_id();
+  }
+  return kernel::DtypeToTypeId(GetValue<std::string>(dst_type));
+}
+
+DShape TransposeOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) {
+  CHECK_ATTR(attrs, "perm");
+  auto perm = GetListInt(attrs.find("perm")->second);
+  auto &old_shape = inputs[0]->shape;
+  DShape new_shape;
+  if (perm.size() != old_shape.size()) {
+    MS_LOG(EXCEPTION) << "perm.size() != old_shape.size(). " << perm.size() << " vs " << old_shape.size();
+  }
+  std::transform(perm.begin(), perm.end(), std::back_inserter(new_shape),
+                 [&old_shape](int64_t p) { return old_shape[p]; });
+  return new_shape;
+}
+
+DFormat TransposeOp::InferFormat(const NodePtrList &inputs, const DAttrs &attrs) {
+  if (inputs[0]->shape.size() != 4) return kOpFormat_DEFAULT;
+  CHECK_ATTR(attrs, "perm");
+  auto perm = GetListInt(attrs.find("perm")->second);
+  const auto &ori_format = inputs[0]->format;
+  if (ori_format == kOpFormat_DEFAULT || ori_format == kOpFormat_NCHW) {
+    std::vector<int64_t> nchw2nhwc = {0, 2, 3, 1};
+    if (perm == nchw2nhwc) return kOpFormat_NHWC;
+  } else if (ori_format == kOpFormat_NHWC) {
+    std::vector<int64_t> nhwc2nchw = {0, 3, 1, 2};
+    if (perm == nhwc2nchw) return kOpFormat_DEFAULT;
+  }
+  std::ostringstream info;
+  info << "Unsupported Transpose. ori_format = " << ori_format << ", perm = " << attrs.find("perm")->second->ToString();
+  throw GKException(info.str());
+}
+
+DShape MatMulOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) {
+  std::vector<int64_t> shape0 = inputs[0]->shape;
+  std::vector<int64_t> shape1 = inputs[1]->shape;
+  if (shape0.size() != 2 || shape1.size() != 2) {
+    std::ostringstream info;
+    info << "MatMul's input's dimension must be 2, but got " << shape0.size() << " and " << shape1.size();
+    throw GKException(info.str());
+  }
+  auto transpose_a = GetValue<bool>(attrs.find("transpose_a")->second);
+  auto transpose_b = GetValue<bool>(attrs.find("transpose_b")->second);
+  int64_t m = transpose_a ? shape0[1] : shape0[0];
+  int64_t k1 = transpose_a ? shape0[0] : shape0[1];
+  int64_t k2 = transpose_b ? shape1[1] : shape1[0];
+  int64_t n = transpose_b ? shape1[0] : shape1[1];
+  if (k1 != k2) {
+    MS_LOG(EXCEPTION) << "MatMul's inputs have different k value " << k1 << " vs " << k2;
+  }
+  std::vector<int64_t> output = {m, n};
+  return output;
+}
+
+TypeId MatMulOp::InferType(const NodePtrList &inputs, const DAttrs &attrs) {
+  if (attrs.find("dst_type") == attrs.end()) return inputs[0]->type;
+  auto dst_type = attrs.find("dst_type")->second;
+  if (dst_type->isa<Type>()) {
+    return dst_type->cast<TypePtr>()->type_id();
+  }
+  return kernel::DtypeToTypeId(GetValue<std::string>(dst_type));
+}
+
+DShape PadAkgOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) {
+  std::vector<int64_t> shape0 = inputs[0]->shape;
+  size_t n = shape0.size();
+  std::vector<int64_t> pad_before = GetListInt(attrs.find("head")->second);
+  std::vector<int64_t> pad_after = GetListInt(attrs.find("tail")->second);
+  if (pad_before.size() != n || pad_after.size() != n) {
+    MS_LOG(EXCEPTION) << "Input dimension and pad mismatch: " << n << " vs " << pad_before.size() << " vs "
+                      << pad_after.size();
+  }
+  std::vector<int64_t> output;
+  for (size_t i = 0; i < n; i++) {
+    output.emplace_back(shape0[i] + pad_before[i] + pad_after[i]);
+  }
+  return output;
+}
+
+DShape UnPadAkgOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) {
+  std::vector<int64_t> shape0 = inputs[0]->shape;
+  size_t n = shape0.size();
+  std::vector<int64_t> unpad_after = GetListInt(attrs.find("tail")->second);
+  if (unpad_after.size() != n) {
+    MS_LOG(EXCEPTION) << "Input dimension and pad mismatch: " << n << " vs " << unpad_after.size();
+  }
+  std::vector<int64_t> output;
+  for (size_t i = 0; i < n; i++) {
+    output.emplace_back(shape0[i] - unpad_after[i]);
+  }
+  return output;
+}
+
+void ComplexOp::CheckType(const NodePtrList &inputs, const DAttrs &attrs) {
+  if (inputs[0]->type != TypeId::kNumberTypeFloat32) {
+    throw GKException("Complex's input[0] should be float32");
+  }
+  if (inputs[0]->type != inputs[1]->type) {
+    MS_LOG(EXCEPTION) << "Complex's input[0] and inputs[1]'s type mismatch";
+  }
+}
+
 }  // namespace graphkernel
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.h b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.h
index c477bd08488..fd59c677ce8 100644
--- a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.h
+++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.h
@@ -20,12 +20,23 @@
 #include <algorithm>
 #include <sstream>
 #include <string>
+#include <unordered_map>
+#include <functional>
 
 #include "backend/optimizer/graph_kernel/model/node.h"
+#include "backend/kernel_compiler/common_utils.h"
+#include "ir/dtype/type.h"
 
 namespace mindspore {
 namespace opt {
 namespace graphkernel {
+#define CHECK_ATTR(attrs, attr_name)                                                              \
+  do {                                                                                            \
+    if (attrs.count(attr_name) == 0) {                                                            \
+      MS_LOG(EXCEPTION) << "The attr [" << attr_name << "] does not exist in [" << #attrs << "]"; \
+    }                                                                                             \
+  } while (0)
+
 class PrimOp : public Node {
  public:
   enum ComputeType {
@@ -39,43 +50,109 @@ class PrimOp : public Node {
   PrimOp(const std::string &op, const std::string &node_name, ComputeType compute)
       : Node({{}, TypeId::kNumberTypeBegin, kOpFormat_DEFAULT}, node_name), op_(op), compute_type_(compute) {}
 
+  virtual void Check(const NodePtrList &inputs, const DAttrs &attrs);
+  virtual void CheckShape(const NodePtrList &inputs, const DAttrs &attrs) {}
+  virtual void CheckType(const NodePtrList &inputs, const DAttrs &attrs);
+  virtual void CheckFormat(const NodePtrList &inputs, const DAttrs &attrs);
+
   virtual void Infer(const NodePtrList &inputs, const DAttrs &attrs);
+  virtual NodePtr InferValue(const NodePtrList &inputs, const DAttrs &attrs, const std::string &op);
+  virtual DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) { return inputs[0]->shape; }
+  virtual TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) { return inputs[0]->type; }
+  virtual DFormat InferFormat(const NodePtrList &inputs, const DAttrs &attrs) { return inputs[0]->format; }
+
   void Dump(std::ostringstream &os) const override;
   NType NodeType() override { return NType::Primitive; }
 
   const std::string &op() const { return op_; }
   ComputeType compute_type() const { return compute_type_; }
-  virtual NodePtr InferValue(const NodePtrList &inputs, const DAttrs &attrs, const std::string &op);
 
  protected:
   std::string op_;
   ComputeType compute_type_;
-  virtual DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) { return inputs[0]->shape; }
-  virtual TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) { return inputs[0]->type; }
-  virtual DFormat InferFormat(const NodePtrList &inputs, const DAttrs &attrs) { return inputs[0]->format; }
 };
 using PrimOpPtr = std::shared_ptr<PrimOp>;
 
 class ElemwiseOp : public PrimOp {
  public:
   ElemwiseOp(const std::string &op, const std::string &node_name) : PrimOp(op, node_name, ELEMWISE) {}
+
   void Infer(const NodePtrList &inputs, const DAttrs &attrs) override;
-  // TODO(dayschan) rewrite InferShape/InferFormat
+  DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override;
+  DFormat InferFormat(const NodePtrList &inputs, const DAttrs &attrs) override;
+};
+
+class CastOp : public ElemwiseOp {
+ public:
+  CastOp(const std::string &op, const std::string &node_name) : ElemwiseOp("Cast", node_name) {}
+
+  TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override;
+};
+
+class InplaceAssignOp : public ElemwiseOp {
+ public:
+  InplaceAssignOp(const std::string &op, const std::string &node_name) : ElemwiseOp("InplaceAssign", node_name) {}
+
+  DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override { return inputs[2]->shape; }
+  TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override { return inputs[2]->type; }
+  DFormat InferFormat(const NodePtrList &inputs, const DAttrs &attrs) override { return inputs[2]->format; }
+};
+
+class SelectOp : public ElemwiseOp {
+ public:
+  SelectOp(const std::string &op, const std::string &node_name) : ElemwiseOp("Select", node_name) {}
+
+  void CheckType(const NodePtrList &inputs, const DAttrs &attrs) override;
+  TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override { return inputs[1]->type; }
+};
+
+class CompareOp : public ElemwiseOp {
+ public:
+  CompareOp(const std::string &op, const std::string &node_name) : ElemwiseOp(op, node_name) {}
+
+  TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override { return TypeId::kNumberTypeBool; }
+};
+
+class LessOp : public CompareOp {
+ public:
+  LessOp(const std::string &op, const std::string &node_name) : CompareOp("Less", node_name) {}
+};
+
+class EqualOp : public CompareOp {
+ public:
+  EqualOp(const std::string &op, const std::string &node_name) : CompareOp("Equal", node_name) {}
+};
+
+class LessEqualOp : public CompareOp {
+ public:
+  LessEqualOp(const std::string &op, const std::string &node_name) : CompareOp("LessEqual", node_name) {}
+};
+
+class GreaterOp : public CompareOp {
+ public:
+  GreaterOp(const std::string &op, const std::string &node_name) : CompareOp("Greater", node_name) {}
+};
+
+class GreaterEqualOp : public CompareOp {
+ public:
+  GreaterEqualOp(const std::string &op, const std::string &node_name) : CompareOp("GreaterEqual", node_name) {}
 };
 
 class ReshapeOp : public PrimOp {
  public:
   ReshapeOp(const std::string &op, const std::string &node_name) : PrimOp(op, node_name, RESHAPE) {}
 
- protected:
   DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override;
+  DFormat InferFormat(const NodePtrList &inputs, const DAttrs &attrs) override {
+    return attrs.find("format") == attrs.end() ? kOpFormat_DEFAULT
+                                               : GetValue<std::string>(attrs.find("format")->second);
+  }
 };
 
 class BroadcastToOp : public PrimOp {
  public:
   BroadcastToOp(const std::string &op, const std::string &node_name) : PrimOp(op, node_name, BROADCAST) {}
 
- protected:
   DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override;
 };
 
@@ -83,8 +160,10 @@ class ReduceOp : public PrimOp {
  public:
   ReduceOp(const std::string &op, const std::string &node_name) : PrimOp(op, node_name, REDUCE) {}
 
- protected:
+  void Check(const NodePtrList &inputs, const DAttrs &attrs) override;
+
   DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override;
+  DFormat InferFormat(const NodePtrList &inputs, const DAttrs &attrs) override { return kOpFormat_DEFAULT; };
 };
 
 class OpaqueOp : public PrimOp {
@@ -95,6 +174,74 @@ class OpaqueOp : public PrimOp {
 class Conv2dOp : public OpaqueOp {
  public:
   Conv2dOp(const std::string &op, const std::string &node_name) : OpaqueOp("Conv2D", node_name) {}
+
+  DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override;
+  TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override;
+};
+
+class TransposeOp : public OpaqueOp {
+ public:
+  TransposeOp(const std::string &op, const std::string &node_name) : OpaqueOp("Transpose", node_name) {}
+
+  DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override;
+  DFormat InferFormat(const NodePtrList &inputs, const DAttrs &attrs) override;
+};
+
+class MatMulOp : public OpaqueOp {
+ public:
+  MatMulOp(const std::string &op, const std::string &node_name) : OpaqueOp("MatMul", node_name) {}
+
+  DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override;
+  TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override;
+};
+
+class PadAkgOp : public OpaqueOp {
+ public:
+  PadAkgOp(const std::string &op, const std::string &node_name) : OpaqueOp("PadAkg", node_name) {}
+
+  DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override;
+};
+
+class UnPadAkgOp : public OpaqueOp {
+ public:
+  UnPadAkgOp(const std::string &op, const std::string &node_name) : OpaqueOp("UnPadAkg", node_name) {}
+
+  DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override;
+};
+
+class CImagOp : public ElemwiseOp {
+ public:
+  CImagOp(const std::string &op, const std::string &node_name) : ElemwiseOp("CImag", node_name) {}
+
+  void CheckType(const NodePtrList &inputs, const DAttrs &attrs) override {
+    if (inputs[0]->type != TypeId::kNumberTypeComplex64) {
+      throw GKException("CImag's input[0] should be complex64");
+    }
+  };
+
+  TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override { return TypeId::kNumberTypeFloat32; }
+};
+
+class CRealOp : public ElemwiseOp {
+ public:
+  CRealOp(const std::string &op, const std::string &node_name) : ElemwiseOp("CReal", node_name) {}
+
+  void CheckType(const NodePtrList &inputs, const DAttrs &attrs) override {
+    if (inputs[0]->type != TypeId::kNumberTypeComplex64) {
+      throw GKException("CReal's input[0] should be complex64");
+    }
+  };
+
+  TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override { return TypeId::kNumberTypeFloat32; }
+};
+
+class ComplexOp : public ElemwiseOp {
+ public:
+  ComplexOp(const std::string &op, const std::string &node_name) : ElemwiseOp("Complex", node_name) {}
+
+  void CheckType(const NodePtrList &inputs, const DAttrs &attrs) override;
+
+  TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override { return TypeId::kNumberTypeComplex64; }
 };
 }  // namespace graphkernel
 }  // namespace opt
diff --git a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.cc b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.cc
index 47b1d74de2a..8551e59f098 100644
--- a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.cc
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.cc
@@ -47,14 +47,17 @@ std::vector<DeviceMemPtr> DynamicMemPoolBestFit::AllocContinuousTensorMem(size_t
   }
   std::lock_guard<std::mutex> locker(mutex_);
   // Remove the pre-alloc memory.
-  auto mem_block = FindMemBlock(device_addr);
+  const auto &mem_block = FindMemBlock(device_addr);
   MS_EXCEPTION_IF_NULL(mem_block);
-  auto iter = mem_block->block_all_mem_buf_map_.find(device_addr);
+  const auto &iter = mem_block->block_all_mem_buf_map_.find(device_addr);
   if (iter == mem_block->block_all_mem_buf_map_.end()) {
     MS_LOG(EXCEPTION) << "Can't find the device address[" << device_addr << "].";
   }
   auto mem_buf = iter->second;
   MS_EXCEPTION_IF_NULL(mem_buf);
+  if (mem_buf->size_ < total_size) {
+    MS_LOG(EXCEPTION) << "The size of membuf is less than total_size.";
+  }
   auto rest_size = mem_buf->size_ - total_size;
   (void)mem_block->block_all_mem_buf_map_.erase(iter);
   // Split the pre-alloc memory into continuous memory by the size list.
@@ -79,7 +82,7 @@ size_t DynamicMemPoolBestFit::AlignMemorySize(size_t size) const {
 }
 
 DeviceMemPtr DynamicMemPoolBestFit::FindIdleMemBuf(size_t size) {
-  auto iter = global_idle_mem_buf_map_.lower_bound(size);
+  const auto &iter = global_idle_mem_buf_map_.lower_bound(size);
   if (iter != global_idle_mem_buf_map_.end()) {
     auto mem_buf = iter->second;
     MS_EXCEPTION_IF_NULL(mem_buf);
@@ -120,7 +123,8 @@ DeviceMemPtr DynamicMemPoolBestFit::AddMemBlockAndMemBuf(size_t size) {
   mem_alloc_unit_size_ = DYNAMIC_MEM_ALLOC_UNIT_SIZE;
   auto mem_block = std::make_shared<DynamicMemBlock>(device_addr, real_alloc_size);
   MS_EXCEPTION_IF_NULL(mem_block);
-  auto iter = std::upper_bound(global_mem_block_list_.begin(), global_mem_block_list_.end(), device_addr, CmpMemBlock);
+  const auto &iter =
+    std::upper_bound(global_mem_block_list_.begin(), global_mem_block_list_.end(), device_addr, CmpMemBlock);
   (void)global_mem_block_list_.insert(iter, mem_block);
   // Add new memory buf
   auto mem_buf = std::make_shared<DynamicMemBuf>(device_addr, kMemBufUsed, real_alloc_size);
@@ -163,9 +167,12 @@ bool DynamicMemPoolBestFit::IsDivide(size_t tensor_size, size_t mem_buf_size) co
 
 void DynamicMemPoolBestFit::DivideMemBuf(size_t size, const DynamicMemBufPtr &mem_buf) {
   MS_EXCEPTION_IF_NULL(mem_buf);
-  auto mem_block = FindMemBlock(mem_buf->device_addr_);
+  const auto &mem_block = FindMemBlock(mem_buf->device_addr_);
   MS_EXCEPTION_IF_NULL(mem_block);
   // Divide new memory buf
+  if (mem_buf->size_ < size) {
+    MS_LOG(EXCEPTION) << "The size of membuf is less than size.";
+  }
   size_t newbuf_size = mem_buf->size_ - size;
   mem_buf->size_ = size;
   DeviceMemPtr newbuf_addr = AddressOffset(mem_buf->device_addr_, size);
@@ -184,7 +191,8 @@ bool DynamicMemPoolBestFit::CmpMemBlock(const DeviceMemPtr &device_addr, const D
 
 DynamicMemBlockPtr DynamicMemPoolBestFit::FindMemBlock(const DeviceMemPtr &device_addr) {
   MS_EXCEPTION_IF_NULL(device_addr);
-  auto iter = std::upper_bound(global_mem_block_list_.begin(), global_mem_block_list_.end(), device_addr, CmpMemBlock);
+  auto &&iter =
+    std::upper_bound(global_mem_block_list_.begin(), global_mem_block_list_.end(), device_addr, CmpMemBlock);
   if (iter != global_mem_block_list_.begin()) {
     return *(--iter);
   }
@@ -194,7 +202,7 @@ DynamicMemBlockPtr DynamicMemPoolBestFit::FindMemBlock(const DeviceMemPtr &devic
 void DynamicMemPoolBestFit::FreeTensorMem(const DeviceMemPtr &device_addr) {
   MS_EXCEPTION_IF_NULL(device_addr);
   std::lock_guard<std::mutex> locker(mutex_);
-  auto mem_block = FindMemBlock(device_addr);
+  const auto &mem_block = FindMemBlock(device_addr);
   if (mem_block == nullptr) {
     // May be destroy the memory pool first, then destroy the address, so this is normal case.
     MS_LOG(DEBUG) << "Can't find the mem_block of the device address[" << device_addr << "].";
@@ -206,7 +214,7 @@ void DynamicMemPoolBestFit::FreeTensorMem(const DeviceMemPtr &device_addr) {
 void DynamicMemPoolBestFit::CombineMemBuf(const DynamicMemBlockPtr &mem_block, const DeviceMemPtr &device_addr) {
   MS_EXCEPTION_IF_NULL(mem_block);
   MS_EXCEPTION_IF_NULL(device_addr);
-  auto iter = mem_block->block_all_mem_buf_map_.find(device_addr);
+  const auto &iter = mem_block->block_all_mem_buf_map_.find(device_addr);
   if (iter == mem_block->block_all_mem_buf_map_.end()) {
     MS_LOG(EXCEPTION) << "Can't find the device address[" << device_addr << "].";
   }
@@ -216,6 +224,9 @@ void DynamicMemPoolBestFit::CombineMemBuf(const DynamicMemBlockPtr &mem_block, c
     MS_LOG(EXCEPTION) << "Find the mem_buf is not used, mem_buf_address[" << mem_buf->device_addr_ << "].";
   }
   mem_buf->status_ = kMemBufIdle;
+  if (total_used_mem_statistics_ < mem_buf->size_) {
+    MS_LOG(EXCEPTION) << "The total used mem size is less than the size of membuf.";
+  }
   total_used_mem_statistics_ -= mem_buf->size_;
   // Combine backward(combine the next_mem_buf to mem_buf)
   auto next_iter = iter;
@@ -254,7 +265,7 @@ void DynamicMemPoolBestFit::CombineMemBuf(const DynamicMemBlockPtr &mem_block, c
 
 void DynamicMemPoolBestFit::EraseIdleMemBuf(size_t size, const DeviceMemPtr &device_addr) {
   MS_EXCEPTION_IF_NULL(device_addr);
-  auto iter = global_idle_mem_buf_map_.equal_range(size);
+  auto &&iter = global_idle_mem_buf_map_.equal_range(size);
   while (iter.first != iter.second) {
     MS_EXCEPTION_IF_NULL(iter.first->second);
     // Remove map of the idle memory buf by size and device address
@@ -272,7 +283,7 @@ void DynamicMemPoolBestFit::ReleaseDeviceRes() {
   MS_LOG(INFO) << "The dynamic memory pool total size is " << total_mem_statistics_ << ", total used size is "
                << total_used_mem_statistics_ << ", used peak size is " << used_mem_peak_statistics_ << ".";
   for (auto iter = global_mem_block_list_.begin(); iter != global_mem_block_list_.end(); ++iter) {
-    auto device_addr = (*iter)->device_addr();
+    auto &device_addr = (*iter)->device_addr_base_;
     if (device_addr != nullptr) {
       if (!FreeDeviceMem(device_addr)) {
         MS_LOG(EXCEPTION) << "Free device memory[" << device_addr << "] error.";
diff --git a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.h b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.h
index 6141a9a2711..a90429f9f30 100644
--- a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.h
+++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.h
@@ -65,10 +65,13 @@ class DynamicMemBlock {
   ~DynamicMemBlock() { block_all_mem_buf_map_.clear(); }
   const DeviceMemPtr &device_addr() const { return device_addr_base_; }
   size_t size() const { return mem_block_size_; }
+
+ private:
+  friend class DynamicMemPoolBestFit;
+
   // The map of all memory buf in this memory block by device address.
   DeviceAddrMapMemBuf block_all_mem_buf_map_;
 
- private:
   DeviceMemPtr device_addr_base_{nullptr};
   size_t mem_block_size_{0};
 };
diff --git a/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.cc b/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.cc
index c4befc7e8f9..06b7edafb68 100644
--- a/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.cc
@@ -391,7 +391,7 @@ bool CommunicationOpFusion::DoFusion(const FuncGraphPtr &func_graph, const Commu
       MS_EXCEPTION_IF_NULL(communication_op_node_item);
       tuple_getitem->set_abstract(communication_op_node_item->abstract());
       if (kernel_graph->IsInternalOutput(communication_op_node_item, 0)) {
-        kernel_graph->ReplaceInternalOutput(communication_op_node_item, new_communication_op, 0, offset);
+        kernel_graph->ReplaceInternalOutput(communication_op_node_item, new_communication_op, 0, LongToSize(offset));
       }
       if (!manager->Replace(communication_op_node_item, tuple_getitem)) {
         MS_LOG(EXCEPTION) << "manager replace node failed";
diff --git a/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.cc b/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.cc
index 02316be0e11..b6b48703573 100644
--- a/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.cc
@@ -46,17 +46,26 @@ const AnfNodePtr ConvertConstInputToAttr::Process(const FuncGraphPtr &, const An
       return nullptr;
     }
   }
+  auto ms_context = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(ms_context);
+  auto device = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
   if (AnfAlgo::GetCNodeName(cnode) == prim::kPrimGatherD->name()) {
-    auto ms_context = MsContext::GetInstance();
-    MS_EXCEPTION_IF_NULL(ms_context);
-    if (ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET) != kGPUDevice) {
+    if (device != kGPUDevice) {
       return nullptr;
     }
   }
-  if (AnfAlgo::IsDynamicShape(cnode) &&
-      DynamicShapeConstInputToAttr.find(AnfAlgo::GetCNodeName(cnode)) == DynamicShapeConstInputToAttr.end()) {
-    MS_LOG(INFO) << "current node is dynamic shape " << cnode->fullname_with_scope();
-    return nullptr;
+  if (AnfAlgo::IsDynamicShape(cnode)) {
+    if (device == kGPUDevice) {
+      if (DynamicShapeConstInputToAttrGPU.find(AnfAlgo::GetCNodeName(cnode)) == DynamicShapeConstInputToAttrGPU.end()) {
+        MS_LOG(INFO) << "current node is dynamic shape " << cnode->fullname_with_scope();
+        return nullptr;
+      }
+    } else {
+      if (DynamicShapeConstInputToAttr.find(AnfAlgo::GetCNodeName(cnode)) == DynamicShapeConstInputToAttr.end()) {
+        MS_LOG(INFO) << "current node is dynamic shape " << cnode->fullname_with_scope();
+        return nullptr;
+      }
+    }
   }
   ConstInputToAttr(cnode, reg.GetConstInputAttrInfo());
 
diff --git a/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.cc b/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.cc
index 94ec9ed5ca0..c86db4644ce 100644
--- a/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.cc
@@ -50,7 +50,7 @@ int64_t SplitTupleInputs(const FuncGraphPtr &graph, const AnfNodePtr &tuple_inpu
   }
   for (size_t index = 0; index < input_size; ++index) {
     auto dynamic_input_node = CreatTupleGetItemNode(graph, tuple_input, index);
-    plant_inputs->emplace_back(dynamic_input_node);
+    (void)plant_inputs->emplace_back(dynamic_input_node);
   }
   return input_size;
 }
diff --git a/mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.cc b/mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.cc
index e8f311c04d3..4b04d4de543 100644
--- a/mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.cc
+++ b/mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.cc
@@ -191,6 +191,9 @@ const AnfNodePtr OptimizeDependence::Process(const FuncGraphPtr &func_graph, con
   std::vector<AnfNodePtr> new_inputs = cnode->inputs();
   bool inputs_changed = false;
   for (auto index : candidate_inputs) {
+    if (index >= new_inputs.size()) {
+      MS_LOG(EXCEPTION) << "Index is out of the size of cnode inputs.";
+    }
     auto replace_node = GetConvertNode(func_graph, cnode, index);
     if (replace_node != nullptr) {
       new_inputs[index] = replace_node;
diff --git a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
index 8f71663b0a4..25d38c9bd0d 100644
--- a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
+++ b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc
@@ -620,7 +620,7 @@ std::vector<std::string> AnfRuntimeAlgorithm::GetAllOutputFormats(const AnfNodeP
                       << "#node [" << node->DebugString() << "]"
                       << " trace: " << trace::DumpSourceLines(node);
   }
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -635,7 +635,7 @@ std::vector<std::string> AnfRuntimeAlgorithm::GetAllInputFormats(const AnfNodePt
                       << "#node [" << node->DebugString() << "]"
                       << " trace: " << trace::DumpSourceLines(node);
   }
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -650,7 +650,7 @@ std::vector<TypeId> AnfRuntimeAlgorithm::GetAllInputDeviceTypes(const AnfNodePtr
                       << "#node [" << node->DebugString() << "]"
                       << " trace: " << trace::DumpSourceLines(node);
   }
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -665,7 +665,7 @@ std::vector<TypeId> AnfRuntimeAlgorithm::GetAllOutputDeviceTypes(const AnfNodePt
                       << "#node [" << node->DebugString() << "]"
                       << " trace: " << trace::DumpSourceLines(node);
   }
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -680,7 +680,7 @@ std::string AnfRuntimeAlgorithm::GetOriginDataFormat(const AnfNodePtr &node) {
                       << "#node [" << node->DebugString() << "]"
                       << " trace: " << trace::DumpSourceLines(node);
   }
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -699,7 +699,7 @@ std::string AnfRuntimeAlgorithm::GetOutputFormat(const AnfNodePtr &node, size_t
   if (!AnfAlgo::IsRealKernel(node)) {
     return AnfAlgo::GetPrevNodeOutputFormat(node, output_idx);
   }
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -723,7 +723,7 @@ std::string AnfRuntimeAlgorithm::GetInputFormat(const AnfNodePtr &node, size_t i
   if (!IsRealKernel(node)) {
     return GetPrevNodeOutputFormat(node, input_idx);
   }
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -869,7 +869,7 @@ std::string AnfRuntimeAlgorithm::GetInputReshapeType(const AnfNodePtr &node, siz
   if (!IsRealKernel(node)) {
     return GetPrevNodeOutputReshapeType(node, input_idx);
   }
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -889,7 +889,7 @@ std::string AnfRuntimeAlgorithm::GetOutputReshapeType(const AnfNodePtr &node, si
   if (!IsRealKernel(node)) {
     return GetPrevNodeOutputReshapeType(node, output_idx);
   }
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -943,7 +943,7 @@ TypeId AnfRuntimeAlgorithm::GetOutputDeviceDataType(const AnfNodePtr &node, size
   if (!IsRealKernel(node)) {
     return GetPrevNodeOutputDeviceDataType(node, output_idx);
   }
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -966,7 +966,7 @@ TypeId AnfRuntimeAlgorithm::GetInputDeviceDataType(const AnfNodePtr &node, size_
   if (!IsRealKernel(node)) {
     return GetPrevNodeOutputDeviceDataType(node, 0);
   }
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -998,7 +998,7 @@ const DeviceAddress *AnfRuntimeAlgorithm::GetOutputAddr(const AnfNodePtr &node,
                         << " trace: " << trace::DumpSourceLines(node);
     }
   }
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto addr = kernel_info->GetOutputAddr(output_idx);
   if (addr == nullptr) {
@@ -1023,7 +1023,7 @@ DeviceAddressPtr AnfRuntimeAlgorithm::GetMutableOutputAddr(const AnfNodePtr &nod
     }
   }
   // Critical path performance optimization: `KernelInfo` is unique subclass of `KernelInfoDevice`
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto addr = kernel_info->GetMutableOutputAddr(output_idx);
   if (addr == nullptr) {
@@ -1046,7 +1046,7 @@ bool AnfRuntimeAlgorithm::OutputAddrExist(const AnfNodePtr &node, size_t output_
     return false;
   }
   // Critical path performance optimization: `KernelInfo` is unique subclass of `KernelInfoDevice`
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->OutputAddrExist(output_idx);
 }
@@ -1054,7 +1054,7 @@ bool AnfRuntimeAlgorithm::OutputAddrExist(const AnfNodePtr &node, size_t output_
 bool AnfRuntimeAlgorithm::WorkspaceAddrExist(const AnfNodePtr &node, size_t output_idx) {
   MS_EXCEPTION_IF_NULL(node);
   // Critical path performance optimization: `KernelInfo` is unique subclass of `KernelInfoDevice`
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->WorkspaceAddrExist(output_idx);
 }
@@ -1074,7 +1074,7 @@ DeviceAddressPtr AnfRuntimeAlgorithm::GetPrevNodeMutableOutputAddr(const AnfNode
 // set output device addr of anf_node
 void AnfRuntimeAlgorithm::SetOutputAddr(const DeviceAddressPtr &addr, size_t output_idx, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   if (!kernel_info->SetOutputAddr(addr, output_idx)) {
     MS_LOG(EXCEPTION) << "Node " << node->DebugString() << "set adr" << output_idx << " fail."
@@ -1085,7 +1085,7 @@ void AnfRuntimeAlgorithm::SetOutputAddr(const DeviceAddressPtr &addr, size_t out
 // set workspace device addr of anf_node
 void AnfRuntimeAlgorithm::SetWorkspaceAddr(const DeviceAddressPtr &addr, size_t output_idx, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   if (!kernel_info->SetWorkspaceAddr(addr, output_idx)) {
     MS_LOG(EXCEPTION) << "Node " << node->DebugString() << "set adr" << output_idx << " fail。"
@@ -1096,7 +1096,7 @@ void AnfRuntimeAlgorithm::SetWorkspaceAddr(const DeviceAddressPtr &addr, size_t
 // get workspace device addr of anf_node
 DeviceAddress *AnfRuntimeAlgorithm::GetWorkspaceAddr(const AnfNodePtr &node, size_t output_idx) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto addr = kernel_info->GetWorkspaceAddr(output_idx);
   if (addr == nullptr) {
@@ -1110,7 +1110,7 @@ DeviceAddress *AnfRuntimeAlgorithm::GetWorkspaceAddr(const AnfNodePtr &node, siz
 // get workspace device mutable addr of anf_node
 DeviceAddressPtr AnfRuntimeAlgorithm::GetMutableWorkspaceAddr(const AnfNodePtr &node, size_t index) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto addr = kernel_info->GetMutableWorkspaceAddr(index);
   if (addr == nullptr) {
@@ -1248,7 +1248,7 @@ void AnfRuntimeAlgorithm::CopyAbstract(const AnfNodePtr &from_node, AnfNode *to_
 
 kernel::OpPattern AnfRuntimeAlgorithm::GetOpPattern(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   // select_kernel_build_info() has checked whether return pointer is null
   auto build_info = kernel_info->select_kernel_build_info();
@@ -1259,7 +1259,7 @@ kernel::OpPattern AnfRuntimeAlgorithm::GetOpPattern(const AnfNodePtr &node) {
 // get KernelBuildType of node, such as ATT,RT,FWK and so on
 KernelType AnfRuntimeAlgorithm::GetKernelType(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   // select_kernel_build_info() has checked whether return pointer is null
   auto build_info = kernel_info->select_kernel_build_info();
@@ -1287,7 +1287,7 @@ void AnfRuntimeAlgorithm::SetOutputDataDesc(const AnfNodePtr &node, const std::v
 
 std::vector<nlohmann::json> AnfRuntimeAlgorithm::GetOutputDataDesc(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   if (kernel_info == nullptr) {
     return {};
   }
@@ -1300,7 +1300,7 @@ std::vector<nlohmann::json> AnfRuntimeAlgorithm::GetOutputDataDesc(const AnfNode
 
 kernel::Processor AnfRuntimeAlgorithm::GetProcessor(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(build_info);
@@ -1309,7 +1309,7 @@ kernel::Processor AnfRuntimeAlgorithm::GetProcessor(const AnfNodePtr &node) {
 
 kernel::FusionType AnfRuntimeAlgorithm::GetFusionType(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->select_kernel_build_info();
   if (build_info == nullptr) {
@@ -1321,7 +1321,7 @@ kernel::FusionType AnfRuntimeAlgorithm::GetFusionType(const AnfNodePtr &node) {
 // set select kernel_build_info
 void AnfRuntimeAlgorithm::SetSelectKernelBuildInfo(const KernelBuildInfoPtr &select_kernel_build_info, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->set_select_kernel_build_info(select_kernel_build_info);
 }
@@ -1329,7 +1329,7 @@ void AnfRuntimeAlgorithm::SetSelectKernelBuildInfo(const KernelBuildInfoPtr &sel
 // get select kernel_build_info
 KernelBuildInfoPtr AnfRuntimeAlgorithm::GetSelectKernelBuildInfo(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->GetMutableSelectKernelBuildInfo();
 }
@@ -1337,7 +1337,7 @@ KernelBuildInfoPtr AnfRuntimeAlgorithm::GetSelectKernelBuildInfo(const AnfNodePt
 // get kernelMode
 KernelMod *AnfRuntimeAlgorithm::GetKernelMod(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->MutableKernelMod();
 }
@@ -1345,7 +1345,7 @@ KernelMod *AnfRuntimeAlgorithm::GetKernelMod(const AnfNodePtr &node) {
 // set kernel mod
 void AnfRuntimeAlgorithm::SetKernelMod(const KernelModPtr &kernel_mod, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   kernel_info->set_kernel_mod(kernel_mod);
 }
@@ -1441,42 +1441,42 @@ bool AnfRuntimeAlgorithm::IsLabelIndexInNode(const AnfNodePtr &node, size_t labe
 
 void AnfRuntimeAlgorithm::SetStreamId(uint32_t stream_id, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   kernel_info->set_stream_id(stream_id);
 }
 
 uint32_t AnfRuntimeAlgorithm::GetStreamId(const AnfNodePtr &node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->stream_id();
 }
 
 void AnfRuntimeAlgorithm::SetStreamDistinctionLabel(uint32_t stream_label, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   kernel_info->set_stream_distinction_label(stream_label);
 }
 
 uint32_t AnfRuntimeAlgorithm::GetStreamDistinctionLabel(const AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = static_cast<const device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<const device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->stream_distinction_label();
 }
 
 void AnfRuntimeAlgorithm::SetGraphId(uint32_t graph_id, AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = static_cast<device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   kernel_info->set_graph_id(graph_id);
 }
 
 uint32_t AnfRuntimeAlgorithm::GetGraphId(const AnfNode *node) {
   MS_EXCEPTION_IF_NULL(node);
-  auto kernel_info = static_cast<const device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<const device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->graph_id();
 }
@@ -1510,7 +1510,7 @@ bool AnfRuntimeAlgorithm::IsFeatureMapOutput(const AnfNodePtr &node) {
   if (IsPrimitiveCNode(node, prim::kPrimLoad)) {
     return IsFeatureMapOutput(node->cast<CNodePtr>()->input(1));
   }
-  auto kernel_info = static_cast<const device::KernelInfo *>(node->kernel_info());
+  auto kernel_info = dynamic_cast<const device::KernelInfo *>(node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   return kernel_info->is_feature_map();
 }
@@ -1575,16 +1575,15 @@ bool AnfRuntimeAlgorithm::IsInplaceNode(const mindspore::AnfNodePtr &kernel, con
 }
 
 bool AnfRuntimeAlgorithm::IsCommunicationOp(const AnfNodePtr &node) {
+  static const std::set<std::string> kCommunicationOpNames = {kAllReduceOpName,     kAllGatherOpName, kBroadcastOpName,
+                                                              kReduceScatterOpName, kHcomSendOpName,  kReceiveOpName,
+                                                              kAllToAllVOpName};
   MS_EXCEPTION_IF_NULL(node);
   if (!node->isa<CNode>()) {
     return false;
   }
   auto kernel_name = AnfAlgo::GetCNodeName(node);
-  if (kernel_name == kAllReduceOpName || kernel_name == kAllGatherOpName || kernel_name == kBroadcastOpName ||
-      kernel_name == kReduceScatterOpName || kernel_name == kHcomSendOpName || kernel_name == kReceiveOpName) {
-    return true;
-  }
-  return false;
+  return (kCommunicationOpNames.find(kernel_name) != kCommunicationOpNames.end());
 }
 
 bool AnfRuntimeAlgorithm::IsFusedCommunicationOp(const AnfNodePtr &node) {
@@ -2125,6 +2124,8 @@ void AnfRuntimeAlgorithm::InferShape(const CNodePtr &node, std::map<uint32_t, te
   for (size_t i = 0; i < input_size; ++i) {
     auto input_with_index = AnfAlgo::GetPrevNodeOutput(node, i);
     auto real_input = input_with_index.first;
+    auto cnode_input = node->input(i + 1);
+    MS_EXCEPTION_IF_NULL(cnode_input);
     MS_EXCEPTION_IF_NULL(real_input);
     if (depend_tensors != nullptr) {
       auto iter_tensor = depend_tensors->find(i);
@@ -2133,24 +2134,29 @@ void AnfRuntimeAlgorithm::InferShape(const CNodePtr &node, std::map<uint32_t, te
         MS_EXCEPTION_IF_NULL(tensor_ptr);
         // sync data from device to host
         tensor_ptr->data_sync();
-        real_input->abstract()->set_value(tensor_ptr);
+        auto real_abs = real_input->abstract();
+        if (real_abs->isa<abstract::AbstractTensor>()) {
+          real_input->abstract()->set_value(tensor_ptr);
+        } else if (real_abs->isa<abstract::AbstractTuple>()) {
+          auto tuple_get_item_index = AnfAlgo::GetTupleGetItemOutIndex(cnode_input->cast<CNodePtr>());
+          auto abstract_tuple = real_abs->cast<abstract::AbstractTuplePtr>();
+          MS_EXCEPTION_IF_NULL(abstract_tuple);
+          auto tuple_elements = abstract_tuple->elements()[tuple_get_item_index];
+          tuple_elements->set_value(tensor_ptr);
+        }
       }
     }
-    auto cnode_input = node->input(i + 1);
-    MS_EXCEPTION_IF_NULL(cnode_input);
     if (AnfAlgo::CheckPrimitiveType(cnode_input, prim::kPrimTupleGetItem)) {
       auto base_shape = real_input->Shape();
       if (!base_shape->isa<abstract::TupleShape>()) {
         MS_LOG(EXCEPTION) << "Node:" << node->DebugString()
                           << " input is a tuple_get_item but real input node shape is not a TupleShape";
       }
-      auto tuple_ptr = base_shape->cast<abstract::TupleShapePtr>();
-      MS_EXCEPTION_IF_NULL(tuple_ptr);
-      auto tuple_get_item_index = AnfAlgo::GetTupleGetItemOutIndex(cnode_input->cast<CNodePtr>());
-      auto real_shape = tuple_ptr->shape().at(tuple_get_item_index);
-      auto abstract_tensor = cnode_input->abstract()->cast<abstract::AbstractTensorPtr>();
-      MS_EXCEPTION_IF_NULL(abstract_tensor);
-      args_spec_list.emplace_back(std::make_shared<abstract::AbstractTensor>(abstract_tensor->element(), real_shape));
+      auto abs = real_input->abstract()->cast<abstract::AbstractTuplePtr>();
+      MS_EXCEPTION_IF_NULL(abs);
+      auto tuple_get_item_indexk = AnfAlgo::GetTupleGetItemOutIndex(cnode_input->cast<CNodePtr>());
+      auto abs_i = abs->elements()[tuple_get_item_indexk];
+      args_spec_list.emplace_back(abs_i);
     } else if (cnode_input->isa<CNode>() && AnfAlgo::GetCNodeName(cnode_input) == prim::kPrimReshape->name()) {
       args_spec_list.emplace_back(cnode_input->abstract());
     } else {
diff --git a/mindspore/ccsrc/backend/session/ascend_session.cc b/mindspore/ccsrc/backend/session/ascend_session.cc
index eae542e2164..5504bd5537a 100644
--- a/mindspore/ccsrc/backend/session/ascend_session.cc
+++ b/mindspore/ccsrc/backend/session/ascend_session.cc
@@ -1173,8 +1173,12 @@ void AscendSession::DumpSetup(const std::shared_ptr<KernelGraph> &kernel_graph)
 void AscendSession::Dump(const std::shared_ptr<KernelGraph> &kernel_graph) const {
   MS_LOG(DEBUG) << "Start!";
   MS_EXCEPTION_IF_NULL(kernel_graph);
-  E2eDump::DumpData(kernel_graph.get(), rank_id_);
-  MS_LOG(DEBUG) << "Finish!";
+  bool finish = E2eDump::DumpData(kernel_graph.get(), rank_id_);
+  if (finish) {
+    MS_LOG(DEBUG) << "Finish!";
+  } else {
+    MS_LOG(ERROR) << "Dump Data failed!";
+  }
 }
 
 void AscendSession::DumpAllGraphs(const std::vector<KernelGraphPtr> &all_graphs) {
diff --git a/mindspore/ccsrc/backend/session/executor.cc b/mindspore/ccsrc/backend/session/executor.cc
index 94ba3e605e8..ebe54e57dd6 100644
--- a/mindspore/ccsrc/backend/session/executor.cc
+++ b/mindspore/ccsrc/backend/session/executor.cc
@@ -380,8 +380,8 @@ void Executor::RunGraphAsync(const SessionPtr &session, const GraphId &graph_id,
   session->CreateOutputTensors(graph_id, inputs, outputs, &task->tensor_to_node_);
   // maintain a copy of output vector
   task->outputs_ = *outputs;
-  // sync run graph without output tensor(int dataset graph)
-  if (!TensorInVector(outputs) && !graph->HasPostGraph()) {
+  // sync run graph without output tensor(int dataset graph) or the graph require gil.
+  if ((!TensorInVector(outputs) && !graph->HasPostGraph()) || graph->is_need_gil()) {
     task->sync_run_ = true;
     RunTask(task, true, true);
     return;
diff --git a/mindspore/ccsrc/backend/session/gpu_session.cc b/mindspore/ccsrc/backend/session/gpu_session.cc
index abf3879ded2..6f4c60987c4 100644
--- a/mindspore/ccsrc/backend/session/gpu_session.cc
+++ b/mindspore/ccsrc/backend/session/gpu_session.cc
@@ -175,6 +175,7 @@ void GPUSession::Optimize(const std::shared_ptr<KernelGraph> &kernel_graph) {
 }
 
 void GPUSession::HardwareOptimize(const std::shared_ptr<KernelGraph> &kernel_graph) {
+  MS_EXCEPTION_IF_NULL(kernel_graph);
   auto optimizer = std::make_shared<opt::GraphOptimizer>();
   auto pm = std::make_shared<opt::PassManager>();
   pm->AddPass(std::make_shared<opt::BatchNormReluFusion>());
@@ -212,6 +213,7 @@ void GPUSession::RunOpOptimize(const std::shared_ptr<KernelGraph> &kernel_graph)
 }
 
 void GPUSession::RunOpHardwareOptimize(const std::shared_ptr<KernelGraph> &kernel_graph) {
+  MS_EXCEPTION_IF_NULL(kernel_graph);
   auto optimizer = std::make_shared<opt::GraphOptimizer>();
   auto pm = std::make_shared<opt::PassManager>();
   pm->AddPass(std::make_shared<opt::ReducePrecisionFusion>("reduce_precision"));
@@ -334,6 +336,7 @@ void GPUSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
 #endif
       auto pk_node = input_node->cast<ParameterPtr>();
       auto device_address = AnfAlgo::GetMutableOutputAddr(pk_node, 0);
+      MS_EXCEPTION_IF_NULL(device_address);
       auto tensor_address = std::dynamic_pointer_cast<device::DeviceAddress>(tensor->device_address());
       bool need_sync = false;
       if (ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_INFER)) {
@@ -354,7 +357,6 @@ void GPUSession::LoadInputData(const std::shared_ptr<KernelGraph> &kernel_graph,
             ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) {
           tensor->set_device_address(device_address);
         }
-        MS_EXCEPTION_IF_NULL(device_address);
         auto size = UpdateGraphInputAbstract(input_node, tensor);
         if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(pk_node, 0), size, tensor->data_type(),
                                               tensor->data_c())) {
@@ -381,7 +383,7 @@ GraphId GPUSession::CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) {
   auto root_graph = ConstructKernelGraph(func_graph, &all_graphs);
   MS_EXCEPTION_IF_NULL(root_graph);
   if (all_graphs.size() != 1) {
-    MS_LOG(EXCEPTION) << "Gpu backend does not support multi-graph schedule. graph num" << all_graphs.size();
+    MS_LOG(EXCEPTION) << "Gpu backend does not support multi-graph schedule, graph num is " << all_graphs.size();
   }
   // Insert maketuple graph output in case of multi-outputs.
   // The ConvertTupleOutputToMaketuple pass will insert TupleGetItem.
@@ -391,6 +393,7 @@ GraphId GPUSession::CompileGraphImpl(NotNull<FuncGraphPtr> func_graph) {
 }
 
 GraphId GPUSession::CompileGraphImpl(KernelGraphPtr graph) {
+  MS_EXCEPTION_IF_NULL(graph);
   // Prepare ms context info for dump .pb graph
   auto context_ptr = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(context_ptr);
@@ -627,6 +630,7 @@ void GPUSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_info,
   EraseValueNodeTensor(tensors_mask, input_tensors);
   // wait for allreduce
   for (auto &tensor : *input_tensors) {
+    MS_EXCEPTION_IF_NULL(tensor);
     if (tensor->NeedWaitDevice()) {
       tensor->WaitDevice();
     }
diff --git a/mindspore/ccsrc/backend/session/kernel_graph.cc b/mindspore/ccsrc/backend/session/kernel_graph.cc
index 77fbdddc7f1..ee2dcab041f 100644
--- a/mindspore/ccsrc/backend/session/kernel_graph.cc
+++ b/mindspore/ccsrc/backend/session/kernel_graph.cc
@@ -470,7 +470,7 @@ void KernelGraph::CreateKernelInfoFromNewParameter(const CNodePtr &cnode) {
   }
 }
 
-void KernelGraph::ResetAssignInputFeaatureMapFlag(const CNodePtr &cnode) const {
+void KernelGraph::ResetAssignInputFeatureMapFlag(const CNodePtr &cnode) const {
   if (kOpAssignKernelNameList.find(AnfAlgo::GetCNodeName(cnode)) == kOpAssignKernelNameList.end()) {
     MS_LOG(EXCEPTION) << "Only supported to change the node [Assign , AssignSub, AssignAdd] node's input feature map "
                          "flag but got the node :"
@@ -482,7 +482,7 @@ void KernelGraph::ResetAssignInputFeaatureMapFlag(const CNodePtr &cnode) const {
     return;
   }
   if (!AnfAlgo::IsFeatureMapOutput(input_node) && AnfAlgo::IsFeatureMapOutput(assign_value_node)) {
-    auto kernel_info = static_cast<device::KernelInfo *>(input_node->kernel_info());
+    auto kernel_info = dynamic_cast<device::KernelInfo *>(input_node->kernel_info());
     kernel_info->set_feature_map_flag(true);
   }
 }
@@ -493,7 +493,7 @@ void KernelGraph::SetKernelInfoForNode(const AnfNodePtr &node) const {
   node->set_kernel_info(kernel_info);
   if (node->isa<CNode>()) {
     if (kOpAssignKernelNameList.find(AnfAlgo::GetCNodeName(node)) != kOpAssignKernelNameList.end()) {
-      ResetAssignInputFeaatureMapFlag(node->cast<CNodePtr>());
+      ResetAssignInputFeatureMapFlag(node->cast<CNodePtr>());
     }
 #if defined(__APPLE__)
     std::vector<int> feature_map_input_indexs;
@@ -1347,6 +1347,9 @@ void KernelGraph::SetOptimizerFlag() {
   for (const auto &cnode : execution_order_) {
     MS_EXCEPTION_IF_NULL(cnode);
     auto node_name = AnfAlgo::GetCNodeName(cnode);
+    if (AnfAlgo::HasNodeAttr(kAttrAsync, cnode) && AnfAlgo::GetNodeAttr<bool>(cnode, kAttrAsync)) {
+      continue;
+    }
     if (kOptOperatorSet.find(node_name) != kOptOperatorSet.end()) {
       has_optimizer_ = true;
     } else if (node_name.find("Assign") == string::npos) {
diff --git a/mindspore/ccsrc/backend/session/kernel_graph.h b/mindspore/ccsrc/backend/session/kernel_graph.h
index 0bd1a75f8cd..bc9e2c4de0d 100644
--- a/mindspore/ccsrc/backend/session/kernel_graph.h
+++ b/mindspore/ccsrc/backend/session/kernel_graph.h
@@ -111,7 +111,7 @@ class KernelGraph : public FuncGraph {
   CNodePtr NewCNodeWithInfos(const std::vector<AnfNodePtr> &inputs, const CNodePtr &ori_cnode = nullptr);
   void CreateKernelInfoFromNewParameter(const CNodePtr &cnode);
   CNodePtr NewCNode(const CNodePtr &cnode);
-  void ResetAssignInputFeaatureMapFlag(const CNodePtr &cnode) const;
+  void ResetAssignInputFeatureMapFlag(const CNodePtr &cnode) const;
   ParameterPtr NewParameter(const ParameterPtr &parameter = nullptr);
   ParameterPtr NewParameter(const abstract::AbstractBasePtr &abstract);
   ValueNodePtr NewValueNode(const AbstractBasePtr &abstract, const ValuePtr &value);
@@ -341,6 +341,10 @@ class KernelGraph : public FuncGraph {
   void set_is_all_nop_node(bool is_all_nop_node) { is_all_nop_node_ = is_all_nop_node; }
   std::map<AnfWithOutIndex, AnfWithOutIndex> graph_output_map() { return graph_output_to_front_node_map_; }
 
+  // The interface to set/get the graph GIL flag.
+  void set_is_need_gil(bool flag) { is_need_gil_ = flag; }
+  bool is_need_gil() { return is_need_gil_; }
+
  private:
   // remove value node form graph
   bool RemoveValueNodeFromGraph(const ValueNodePtr &value_node);
@@ -446,6 +450,9 @@ class KernelGraph : public FuncGraph {
 
   // If all the nodes of graph is the nop node.
   bool is_all_nop_node_{false};
+
+  // Indicate whether the kernels in the graphs acquire Python GIL.
+  bool is_need_gil_{false};
 };
 }  // namespace session
 using KernelGraphPtr = std::shared_ptr<session::KernelGraph>;
diff --git a/mindspore/ccsrc/backend/session/session_basic.cc b/mindspore/ccsrc/backend/session/session_basic.cc
index a470715accc..a204f11c6f4 100644
--- a/mindspore/ccsrc/backend/session/session_basic.cc
+++ b/mindspore/ccsrc/backend/session/session_basic.cc
@@ -182,7 +182,7 @@ BaseRef CreateNodeOutputTensor(const session::KernelWithIndex &node_output_pair,
                                const std::vector<tensor::TensorPtr> &input_tensors,
                                std::map<tensor::TensorPtr, session::KernelWithIndex> *tensor_to_node) {
   auto &node = node_output_pair.first;
-  int output_index = SizeToInt(node_output_pair.second);
+  size_t output_index = node_output_pair.second;
   MS_EXCEPTION_IF_NULL(node);
   MS_EXCEPTION_IF_NULL(graph);
   auto tensor_from_input = GetNodeOutputTensorFromInputs(node_output_pair, graph, input_tensors);
@@ -435,6 +435,17 @@ void CheckInputTensorShape(const TensorPtr &tensor, const CNodePtr &kernel, size
     }
   }
 }
+
+void UpdateGraphAquireGilAttr(const NotNull<KernelGraphPtr> &root_graph) {
+  for (const auto &cnode : root_graph->execution_order()) {
+    if (AnfAlgo::CheckPrimitiveType(cnode, prim::kPyFunc)) {
+      MS_LOG(INFO) << "The Graph require GIL. Graph id: " << root_graph->graph_id();
+      root_graph->set_is_need_gil(true);
+      return;
+    }
+  }
+  return;
+}
 }  // namespace
 
 GraphId SessionBasic::graph_sum_ = 0;
@@ -1103,6 +1114,7 @@ KernelGraphPtr SessionBasic::ConstructKernelGraph(const AnfNodePtrList &lst, con
   UnifyMindIR(graph);
   // Update Graph Dynamic Shape Attr
   UpdateGraphDynamicShapeAttr(NOT_NULL(graph));
+  UpdateGraphAquireGilAttr(NOT_NULL(graph));
   opt::BackendCommonOptimization(graph);
   graph->SetInputNodes();
   SetInputNodeUsage(graph, manager);
@@ -1566,8 +1578,8 @@ void SessionBasic::UpdateOutputs(const std::shared_ptr<KernelGraph> &kernel_grap
     if (AnfAlgo::IsDynamicShape(node)) {
       const auto &updated_shape = AnfAlgo::GetOutputInferShape(node, output_index);
       ShapeVector int_shape;
-      std::transform(updated_shape.begin(), updated_shape.end(), std::back_inserter(int_shape), SizeToInt);
-      tensor->set_shape(int_shape);
+      (void)std::transform(updated_shape.begin(), updated_shape.end(), std::back_inserter(int_shape), SizeToInt);
+      (void)tensor->set_shape(int_shape);
     }
     if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) != kPynativeMode) {
       tensor->data_sync(false);
@@ -1596,8 +1608,18 @@ std::vector<tensor::TensorPtr> SessionBasic::GetInputNeedLockTensors(const Graph
   if (!graph->has_optimizer()) {
     return {};
   }
+  auto input_nodes = graph->inputs();
+  bool check_monad = false;
+  if (input_nodes.size() == inputs.size()) {
+    check_monad = true;
+  }
   std::vector<tensor::TensorPtr> result;
-  for (auto &tensor : inputs) {
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    if (check_monad && HasAbstractMonad(input_nodes[i])) {
+      continue;
+    }
+    auto &tensor = inputs[i];
+    MS_EXCEPTION_IF_NULL(tensor);
     if (!tensor->IsGraphOutput()) {
       result.emplace_back(tensor);
     }
@@ -1868,8 +1890,7 @@ AnfNodePtr GetSupportedInternalNode(const AnfNodePtr &front_node) {
 
 constexpr auto kMixTarget = "MixTarget";
 constexpr auto kNoTarget = "NoTarget";
-std::string SessionBasic::AddPartialParametersMap(const FuncGraphManagerPtr &front_func_graph_manager,
-                                                  const AnfNodePtr &partial_node) {
+std::string SessionBasic::AddPartialParametersMap(const AnfNodePtr &partial_node) {
   MS_EXCEPTION_IF_NULL(partial_node);
   auto iter = partial_target_map_.find(partial_node);
   if (iter != partial_target_map_.end()) {
@@ -1881,11 +1902,12 @@ std::string SessionBasic::AddPartialParametersMap(const FuncGraphManagerPtr &fro
   MS_EXCEPTION_IF_NULL(partial_graph);
   auto parameters = partial_graph->parameters();
   auto partial_inputs = partial_cnode->inputs();
-  if (parameters.size() + 2 != partial_inputs.size()) {
+  const size_t kNonParameterNum = 2;
+  if (parameters.size() + kNonParameterNum != partial_inputs.size()) {
     return kMixTarget;
   }
   for (size_t i = 0; i < parameters.size(); ++i) {
-    partial_parameters_map_[parameters[i]] = partial_inputs[2 + i];
+    partial_parameters_map_[parameters[i]] = partial_inputs[kNonParameterNum + i];
   }
   auto graph_nodes = TopoSort(partial_graph->get_return());
   std::string graph_target = kNoTarget;
@@ -1905,7 +1927,7 @@ std::string SessionBasic::AddPartialParametersMap(const FuncGraphManagerPtr &fro
       break;
     }
   }
-  (void)partial_target_map_.insert({partial_node, graph_target});
+  (void)partial_target_map_.emplace(std::pair<AnfNodePtr, std::string>(partial_node, graph_target));
   return graph_target;
 }
 
@@ -1937,7 +1959,7 @@ void SessionBasic::HandleInternalOutput(const AnfNodePtr &input_front_node, cons
     auto users = ExtendNodeUsers(front_func_graph_manager, front_node);
     for (auto &user : users) {
       if (AnfAlgo::CheckPrimitiveType(user, prim::kPrimPartial) && kernel_target != kGPUDevice) {
-        auto partial_target = AddPartialParametersMap(front_func_graph_manager, user);
+        auto partial_target = AddPartialParametersMap(user);
         if (partial_target != kNoTarget && partial_target != kernel_target) {
           unique_target = false;
         }
@@ -2098,9 +2120,6 @@ KernelGraphPtr SessionBasic::NewKernelGraph() {
 AnfNodePtr SessionBasic::FindPullNode(const AnfNodePtr &push_node, const std::vector<AnfNodePtr> &node_list) {
   MS_EXCEPTION_IF_NULL(push_node);
   for (auto &node : node_list) {
-    if (IsPrimitiveCNode(node, prim::kPrimUpdateState)) {
-      continue;
-    }
     if (node != nullptr && node->isa<CNode>()) {
       for (auto input : node->cast<CNodePtr>()->inputs()) {
         if (push_node == AnfAlgo::VisitKernel(input, 0).first) {
diff --git a/mindspore/ccsrc/backend/session/session_basic.h b/mindspore/ccsrc/backend/session/session_basic.h
index e20cd762351..ef3b137626e 100644
--- a/mindspore/ccsrc/backend/session/session_basic.h
+++ b/mindspore/ccsrc/backend/session/session_basic.h
@@ -176,8 +176,7 @@ class SessionBasic : public std::enable_shared_from_this<SessionBasic> {
   void HandleInternalOutput(const AnfNodePtr &input_front_node, const AnfNodePtr &backend_node,
                             const FuncGraphManagerPtr &front_func_graph_manager,
                             const std::shared_ptr<KernelGraph> &backend_graph);
-  std::string AddPartialParametersMap(const FuncGraphManagerPtr &front_func_graph_manager,
-                                      const AnfNodePtr &partial_node);
+  std::string AddPartialParametersMap(const AnfNodePtr &partial_node);
   void GetParameterIndex(const KernelGraph *graph, const std::vector<tensor::TensorPtr> &inputs,
                          std::map<AnfNodePtr, size_t> *parameter_index);
   void CreateOutputPlaceholder(const KernelGraphPtr &kernel_graph, const std::vector<tensor::TensorPtr> &input_tensors,
diff --git a/mindspore/ccsrc/common/trans.cc b/mindspore/ccsrc/common/trans.cc
index bed7d3ca87b..59b95d4a0a2 100644
--- a/mindspore/ccsrc/common/trans.cc
+++ b/mindspore/ccsrc/common/trans.cc
@@ -1013,7 +1013,7 @@ bool NchwTo4D(const FormatArgs &args, void *result) {
       for (size_t hi = 0; hi < h; hi++) {
         for (size_t wi = 0; wi < w; wi++) {
           auto src_idx = ni * c * h * w + ci * h * w + hi * w + wi;
-          auto dst_idx = 0;
+          size_t dst_idx = 0;
           if (args.device_format == kOpFormat_NHWC) {
             dst_idx = ni * h * w * c + hi * w * c + wi * c + ci;
           } else if (args.device_format == kOpFormat_HWCN) {
@@ -1045,7 +1045,7 @@ bool ToNchw(const FormatArgs &args, void *result) {
       for (size_t hi = 0; hi < h; hi++) {
         for (size_t wi = 0; wi < w; wi++) {
           auto dst_idx = ni * c * h * w + ci * h * w + hi * w + wi;
-          auto src_idx = 0;
+          size_t src_idx = 0;
           if (args.device_format == kOpFormat_NHWC) {
             src_idx = ni * h * w * c + hi * w * c + wi * c + ci;
           } else if (args.device_format == kOpFormat_HWCN) {
@@ -1801,7 +1801,7 @@ bool NchwFracZTransWithGroups(const FormatArgs &args, void *result, bool to_devi
   auto c_dim = args.host_shape[kC];
   auto h_dim = args.host_shape[kH];
   auto w_dim = args.host_shape[kW];
-  size_t d_dim = 1;
+  const size_t d_dim = 1;
   size_t group_size = LongToSize(groups);
   auto cin_ori = c_dim;
   auto cout_ori = n_dim / group_size;
diff --git a/mindspore/ccsrc/cxx_api/graph/acl/acl_graph_impl.cc b/mindspore/ccsrc/cxx_api/graph/acl/acl_graph_impl.cc
index d7ba761091d..d41370a996e 100644
--- a/mindspore/ccsrc/cxx_api/graph/acl/acl_graph_impl.cc
+++ b/mindspore/ccsrc/cxx_api/graph/acl/acl_graph_impl.cc
@@ -17,6 +17,7 @@
 #include "include/api/context.h"
 #include "cxx_api/model/acl/model_converter.h"
 #include "utils/log_adapter.h"
+#include "mindspore/core/utils/convert_utils_base.h"
 
 namespace mindspore {
 API_FACTORY_REG(GraphCell::GraphImpl, Ascend310, AclGraphImpl);
@@ -33,7 +34,7 @@ AclGraphImpl::~AclGraphImpl() { (void)FinalizeEnv(); }
 
 Status AclGraphImpl::Run(const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs) {
   MS_EXCEPTION_IF_NULL(outputs);
-  Status ret = Load(device_id_);
+  Status ret = Load(IntToUint(device_id_));
   if (ret != kSuccess) {
     MS_LOG(ERROR) << "Prepare model resource failed.";
     return ret;
@@ -43,7 +44,7 @@ Status AclGraphImpl::Run(const std::vector<MSTensor> &inputs, std::vector<MSTens
 }
 
 std::vector<MSTensor> AclGraphImpl::GetInputs() {
-  Status ret = Load(device_id_);
+  Status ret = Load(IntToUint(device_id_));
   if (ret != kSuccess) {
     MS_LOG(ERROR) << "Prepare model resource failed.";
     return {};
@@ -53,7 +54,7 @@ std::vector<MSTensor> AclGraphImpl::GetInputs() {
 }
 
 std::vector<MSTensor> AclGraphImpl::GetOutputs() {
-  Status ret = Load(device_id_);
+  Status ret = Load(IntToUint(device_id_));
   if (ret != kSuccess) {
     MS_LOG(ERROR) << "Prepare model resource failed.";
     return {};
@@ -176,7 +177,7 @@ Status AclGraphImpl::Load(uint32_t device_id) {
   auto om_data = graph_data->GetOMData();
 
   // init
-  device_id_ = device_id;
+  device_id_ = UintToInt(device_id);
   Status ret = InitEnv();
   if (ret != kSuccess) {
     MS_LOG(ERROR) << "InitEnv failed.";
diff --git a/mindspore/ccsrc/cxx_api/graph/acl/model_process.cc b/mindspore/ccsrc/cxx_api/graph/acl/model_process.cc
index 2f724fccd34..3f246dacf26 100644
--- a/mindspore/ccsrc/cxx_api/graph/acl/model_process.cc
+++ b/mindspore/ccsrc/cxx_api/graph/acl/model_process.cc
@@ -19,6 +19,7 @@
 #include <algorithm>
 #include <map>
 #include "utils/utils.h"
+#include "mindspore/core/utils/convert_utils_base.h"
 
 namespace mindspore {
 static DataType TransToApiType(aclDataType data_type) {
@@ -157,13 +158,14 @@ Status ModelProcess::InitInputsBuffer() {
     if (ret != ACL_ERROR_NONE) {
       MS_LOG(ERROR) << "Get input shape failed";
       if (!is_run_on_device_) {
-        aclrtFree(data_mem_buffer);
+        (void)aclrtFree(data_mem_buffer);
       }
       return kMCDeviceError;
     }
     aclDataType data_type = aclmdlGetInputDataType(model_desc_, i);
     std::vector<int64_t> shape(dims.dims, dims.dims + dims.dimCount);
-    std::string input_name = aclmdlGetInputNameByIndex(model_desc_, i);
+    const char *input_name_char = aclmdlGetInputNameByIndex(model_desc_, i);
+    std::string input_name = (input_name_char == nullptr) ? input_name_char : std::string();
     if (input_name.empty()) {
       MS_LOG(WARNING) << "Get name of input " << i << " failed.";
     }
@@ -175,7 +177,7 @@ Status ModelProcess::InitInputsBuffer() {
   return kSuccess;
 }
 
-Status ModelProcess::CreateDataBuffer(void **data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset) {
+Status ModelProcess::CreateDataBuffer(void **data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset) const {
   MS_EXCEPTION_IF_NULL(data_mem_buffer);
   aclError ret;
   auto free_data_buffer = [this](void *dataMemBuffer) {
@@ -246,7 +248,8 @@ Status ModelProcess::InitOutputsBuffer() {
     }
     aclDataType data_type = aclmdlGetOutputDataType(model_desc_, i);
     std::vector<int64_t> shape(dims.dims, dims.dims + dims.dimCount);
-    std::string output_name = aclmdlGetOutputNameByIndex(model_desc_, i);
+    const char *output_name_char = aclmdlGetOutputNameByIndex(model_desc_, i);
+    std::string output_name = (output_name_char == nullptr) ? output_name_char : std::string();
     if (output_name.empty()) {
       MS_LOG(WARNING) << "Get name of output " << i << " failed.";
     }
@@ -344,7 +347,7 @@ Status ModelProcess::SetBatchSize(const std::vector<MSTensor> &inputs) {
   }
   auto *p = reinterpret_cast<const float *>(inputs[inputs.size() - 1].Data().get());
   MS_EXCEPTION_IF_NULL(p);
-  auto dynamicBatchSize = p[0];
+  size_t dynamicBatchSize = FloatToSize(p[0]);
   ret = aclmdlGetInputIndexByName(model_desc_, ACL_DYNAMIC_TENSOR_NAME, &index);
   if (ret != ACL_ERROR_NONE) {
     MS_LOG(ERROR) << "get index failed";
@@ -442,7 +445,7 @@ Status ModelProcess::ResetOutputSize() {
   aclError ret;
   size_t output_size = aclmdlGetNumOutputs(model_desc_);
   for (size_t index = 0; index < output_size; index++) {
-    size_t dims = 1;
+    int64_t dims = 1;
     struct aclmdlIODims output_dims;
     ret = aclmdlGetCurOutputDims(model_desc_, index, &output_dims);
     if (ret != ACL_ERROR_NONE) {
@@ -453,7 +456,7 @@ Status ModelProcess::ResetOutputSize() {
       dims *= output_dims.dims[i];
     }
     output_type = aclmdlGetOutputDataType(model_desc_, index);
-    output_infos_[index].buffer_size = dims * aclDataTypeSize(output_type);
+    output_infos_[index].buffer_size = LongToSize(dims) * aclDataTypeSize(output_type);
   }
   return kSuccess;
 }
diff --git a/mindspore/ccsrc/cxx_api/graph/acl/model_process.h b/mindspore/ccsrc/cxx_api/graph/acl/model_process.h
index 342170ecbdd..8da78cb1069 100644
--- a/mindspore/ccsrc/cxx_api/graph/acl/model_process.h
+++ b/mindspore/ccsrc/cxx_api/graph/acl/model_process.h
@@ -60,7 +60,7 @@ class ModelProcess {
   uint32_t model_id() const { return model_id_; }
 
  private:
-  Status CreateDataBuffer(void **data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset);
+  Status CreateDataBuffer(void **data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset) const;
   Status CheckAndInitInput(const std::vector<MSTensor> &inputs);
   Status ConstructTensors(const std::vector<AclTensorInfo> &acl_tensor_list, std::vector<MSTensor> *tensor_list);
   Status BuildOutputs(std::vector<MSTensor> *outputs);
diff --git a/mindspore/ccsrc/cxx_api/model/acl/model_converter.cc b/mindspore/ccsrc/cxx_api/model/acl/model_converter.cc
index 517bb9446cb..e25809bfe3f 100644
--- a/mindspore/ccsrc/cxx_api/model/acl/model_converter.cc
+++ b/mindspore/ccsrc/cxx_api/model/acl/model_converter.cc
@@ -164,7 +164,7 @@ Buffer ModelConverter::LoadMindIR(const FuncGraphPtr &func_graph) {
     }
     // receive convert model result from child
     CreateBufferCall call = [&buffer_ret](size_t msg_len) -> uint8_t * {
-      buffer_ret.ResizeData(msg_len);
+      (void)buffer_ret.ResizeData(msg_len);
       return reinterpret_cast<uint8_t *>(buffer_ret.MutableData());
     };
     status = multi_process->ReceiveMsg(call);
@@ -179,7 +179,7 @@ Buffer ModelConverter::LoadMindIR(const FuncGraphPtr &func_graph) {
     // receive original model from parent
     Buffer model;
     CreateBufferCall call = [&model](size_t msg_len) -> uint8_t * {
-      model.ResizeData(msg_len);
+      (void)model.ResizeData(msg_len);
       return reinterpret_cast<uint8_t *>(model.MutableData());
     };
     auto status = multi_process->ReceiveMsg(call);
diff --git a/mindspore/ccsrc/cxx_api/model/acl/model_converter.h b/mindspore/ccsrc/cxx_api/model/acl/model_converter.h
index f75d7a14054..e9652a10665 100644
--- a/mindspore/ccsrc/cxx_api/model/acl/model_converter.h
+++ b/mindspore/ccsrc/cxx_api/model/acl/model_converter.h
@@ -31,6 +31,7 @@ namespace mindspore {
 class ModelConverter {
  public:
   ModelConverter() : options_(nullptr) {}
+  ~ModelConverter() = default;
 
   Buffer LoadMindIR(const FuncGraphPtr &func_graph);
 
@@ -40,9 +41,9 @@ class ModelConverter {
   transform::DfGraphPtr ConvertFuncGraphToAIR(const FuncGraphPtr &anf_graph);
   Buffer BuildAirModel(const transform::DfGraphPtr &graph, const std::map<std::string, std::string> &init_options,
                        const std::map<std::string, std::string> &build_options);
-  AclModelOptions *options_;
-
   Buffer LoadAscendIRInner(const Buffer &model_data);
+
+  AclModelOptions *options_;
 };
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_CXXAPI_SESSION_ACL_MODEL_CONVERTER_H
diff --git a/mindspore/ccsrc/cxx_api/model/model.cc b/mindspore/ccsrc/cxx_api/model/model.cc
index 699d68a1126..f6282fa5177 100644
--- a/mindspore/ccsrc/cxx_api/model/model.cc
+++ b/mindspore/ccsrc/cxx_api/model/model.cc
@@ -65,14 +65,14 @@ Status Model::Build(GraphCell graph_cell, const std::shared_ptr<Context> &model_
   return impl_->Build();
 }
 
-Status Model::Build(const void *model_data, size_t data_size, ModelType model_type,
-                    const std::shared_ptr<Context> &model_context, const Key &dec_key, const std::string &dec_mode) {
+Status Model::Build(const void *, size_t, ModelType, const std::shared_ptr<Context> &, const Key &,
+                    const std::string &) {
   MS_LOG(ERROR) << "Unsupported Feature.";
   return kMCFailed;
 }
 
-Status Model::Build(const std::string &model_path, ModelType model_type, const std::shared_ptr<Context> &model_context,
-                    const Key &dec_key, const std::string &dec_mode) {
+Status Model::Build(const std::string &, ModelType, const std::shared_ptr<Context> &, const Key &,
+                    const std::string &) {
   MS_LOG(ERROR) << "Unsupported Feature.";
   return kMCFailed;
 }
diff --git a/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.cc b/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.cc
index 60b6056dca4..909524e4004 100644
--- a/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.cc
+++ b/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.cc
@@ -25,14 +25,14 @@
 
 namespace mindspore {
 namespace {
-uint64_t kSharedMemorySize = 100ull << 20;  // 100 MB
+constexpr uint64_t kSharedMemorySize = 100ull << 20;  // 100 MB
 }
 
 MultiProcess::MultiProcess() = default;
 
 MultiProcess::~MultiProcess() = default;
 
-Status MultiProcess::MainProcess(ProcessFuncCall parent_process, ProcessFuncCall child_process) {
+Status MultiProcess::MainProcess(const ProcessFuncCall &parent_process, const ProcessFuncCall &child_process) {
   MS_EXCEPTION_IF_NULL(parent_process);
   MS_EXCEPTION_IF_NULL(child_process);
   Status ret;
@@ -61,7 +61,8 @@ Status MultiProcess::MainProcess(ProcessFuncCall parent_process, ProcessFuncCall
   }
   constexpr size_t kMsgStructNum = 2;
   shmat_data_addr_ = shmat_addr_ + sizeof(MessageFlag) * kMsgStructNum;
-  shmat_data_max_size_ = memory_size_ - (shmat_data_addr_ - shmat_addr_);
+  shmat_data_max_size_ =
+    memory_size_ - (reinterpret_cast<uintptr_t>(shmat_data_addr_) - reinterpret_cast<uintptr_t>(shmat_addr_));
   MS_LOG_INFO << "Shm addr " << (uint64_t)shmat_addr_;
   if (pid == 0) {
     ChildProcess(child_process);
@@ -85,7 +86,7 @@ Status MultiProcess::MainProcess(ProcessFuncCall parent_process, ProcessFuncCall
         child_exited = true;
         break;
       }
-      sleep(1);
+      (void)sleep(1);
     }
     if (!child_exited) {
       MS_LOG(WARNING) << "Child process " << pid << " has been killed but waitpid failed.";
@@ -95,7 +96,7 @@ Status MultiProcess::MainProcess(ProcessFuncCall parent_process, ProcessFuncCall
   return ret;
 }
 
-Status MultiProcess::ParentProcess(ProcessFuncCall parent_process) {
+Status MultiProcess::ParentProcess(const ProcessFuncCall &parent_process) {
   auto parent_msg = reinterpret_cast<MessageFlag *>(shmat_addr_);
   auto child_msg = reinterpret_cast<MessageFlag *>(shmat_addr_ + sizeof(MessageFlag));
   send_msg_ = parent_msg;
@@ -112,12 +113,12 @@ Status MultiProcess::ParentProcess(ProcessFuncCall parent_process) {
     ret = kMEFailed;
   }
   stopped_ = true;
-  send_msg_->stop = true;
+  send_msg_->stop = 1;
   heartbeat_thread.join();
   return ret;
 }
 
-void MultiProcess::ChildProcess(ProcessFuncCall child_process) {
+void MultiProcess::ChildProcess(const ProcessFuncCall &child_process) {
   auto parent_msg = reinterpret_cast<MessageFlag *>(shmat_addr_);
   auto child_msg = reinterpret_cast<MessageFlag *>(shmat_addr_ + sizeof(MessageFlag));
   send_msg_ = child_msg;
@@ -138,26 +139,30 @@ void MultiProcess::ChildProcess(ProcessFuncCall child_process) {
 }
 
 Status MultiProcess::SendMsg(const void *buffer, uint64_t msg_len) {
+  MS_EXCEPTION_IF_NULL(buffer);
   MS_LOG_INFO << "Start to send message to peer process, msg len " << msg_len;
   send_msg_->msg_total_len = msg_len;
   uint64_t cur_offset = 0;
   while (msg_len > cur_offset) {
     uint64_t sub_msg_len = std::min(msg_len - cur_offset, shmat_data_max_size_);
-
+    if (sub_msg_len == 0) {
+      MS_LOG(ERROR) << "Invalid message len " << sub_msg_len;
+      return kMEFailed;
+    }
     auto ret =
       memcpy_s(shmat_data_addr_, shmat_data_max_size_, static_cast<const uint8_t *>(buffer) + cur_offset, sub_msg_len);
     if (ret != EOK) {
-      MS_LOG(INFO) << "memcpy_s failed, ret = " << ret;
+      MS_LOG(ERROR) << "memcpy_s failed, ret = " << ret;
       return kMEFailed;
     }
     cur_offset += sub_msg_len;
 
     send_msg_->msg_len = sub_msg_len;
-    send_msg_->read_finish_flag = false;
-    send_msg_->read_ready_flag = true;
+    send_msg_->read_finish_flag = 0;
+    send_msg_->read_ready_flag = 1;
     MS_LOG_INFO << "Send start " << cur_offset << ", msg len " << sub_msg_len << ", total len " << msg_len;
     while (!send_msg_->read_finish_flag && !peer_stopped_) {
-      usleep(1000);  // 1ms
+      (void)usleep(1000);  // 1ms
     }
     if (peer_stopped_) {
       if (!send_msg_->read_finish_flag) {
@@ -171,14 +176,14 @@ Status MultiProcess::SendMsg(const void *buffer, uint64_t msg_len) {
   return kSuccess;
 }
 
-Status MultiProcess::ReceiveMsg(CreateBufferCall create_buffer_call) {
+Status MultiProcess::ReceiveMsg(const CreateBufferCall &create_buffer_call) {
   uint64_t cur_offset = 0;
   uint8_t *msg_buffer = nullptr;
   uint64_t msg_len = 0;
   do {
     MS_LOG_INFO << "Receive start from " << cur_offset;
     while (!receive_msg_->read_ready_flag && !peer_stopped_) {
-      usleep(1000);  // 1ms
+      (void)usleep(1000);  // 1ms
     }
     if (peer_stopped_) {
       return kMEFailed;
@@ -193,8 +198,8 @@ Status MultiProcess::ReceiveMsg(CreateBufferCall create_buffer_call) {
       return kMEFailed;
     }
     cur_offset += receive_msg_->msg_len;
-    receive_msg_->read_ready_flag = false;
-    receive_msg_->read_finish_flag = true;
+    receive_msg_->read_ready_flag = 0;
+    receive_msg_->read_finish_flag = 1;
     MS_LOG_INFO << "Receive end, current length " << cur_offset << ", total length " << msg_len << std::endl;
   } while (msg_len > cur_offset);
   return kSuccess;
@@ -225,7 +230,7 @@ void MultiProcess::HeartbeatThreadFuncInner() {
       }
     }
     send_msg_->heartbeat += 1;
-    usleep(100000);  // sleep 100 ms
+    (void)usleep(100000);  // sleep 100 ms
   }
 }
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.h b/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.h
index 8958c13e625..e120fa021b3 100644
--- a/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.h
+++ b/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.h
@@ -39,9 +39,9 @@ class MultiProcess {
   MultiProcess();
   ~MultiProcess();
 
-  Status MainProcess(ProcessFuncCall parent_process, ProcessFuncCall child_process);
+  Status MainProcess(const ProcessFuncCall &parent_process, const ProcessFuncCall &child_process);
   Status SendMsg(const void *buffer, uint64_t msg_len);
-  Status ReceiveMsg(CreateBufferCall create_buffer_call);
+  Status ReceiveMsg(const CreateBufferCall &create_buffer_call);
 
  private:
   uint8_t *shmat_addr_ = nullptr;
@@ -56,8 +56,8 @@ class MultiProcess {
 
   static void HeartbeatThreadFunc(MultiProcess *multi_process);
   void HeartbeatThreadFuncInner();
-  Status ParentProcess(ProcessFuncCall parent_process);
-  void ChildProcess(ProcessFuncCall child_process);
+  Status ParentProcess(const ProcessFuncCall &parent_process);
+  void ChildProcess(const ProcessFuncCall &child_process);
 };
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_CXXAPI_MULTI_PROCESS_H
diff --git a/mindspore/ccsrc/cxx_api/model/model_converter_utils/shared_memory.h b/mindspore/ccsrc/cxx_api/model/model_converter_utils/shared_memory.h
index 5200a2d26d6..e49d3167f21 100644
--- a/mindspore/ccsrc/cxx_api/model/model_converter_utils/shared_memory.h
+++ b/mindspore/ccsrc/cxx_api/model/model_converter_utils/shared_memory.h
@@ -26,9 +26,11 @@ class SharedMemory {
   Status Attach();
   void Detach();
   void Destroy();
-  uint8_t *GetSharedMemoryAddr() { return shmat_addr_; }
 
  private:
+  friend class MultiProcess;
+  uint8_t *GetSharedMemoryAddr() { return shmat_addr_; }
+
   int shm_id_ = -1;
   uint8_t *shmat_addr_ = nullptr;
 };
diff --git a/mindspore/ccsrc/cxx_api/types.cc b/mindspore/ccsrc/cxx_api/types.cc
index 0f4a25dd2c2..5448de2d999 100644
--- a/mindspore/ccsrc/cxx_api/types.cc
+++ b/mindspore/ccsrc/cxx_api/types.cc
@@ -360,25 +360,25 @@ bool MSTensor::IsDevice() const {
   return impl_->IsDevice();
 }
 
-void MSTensor::SetShape(const std::vector<int64_t> &shape) { MS_LOG_EXCEPTION << "Invalid implement."; }
+void MSTensor::SetShape(const std::vector<int64_t> &) { MS_LOG_EXCEPTION << "Invalid implement."; }
 
-void MSTensor::SetDataType(enum DataType data_type) { MS_LOG_EXCEPTION << "Invalid implement."; }
+void MSTensor::SetDataType(enum DataType) { MS_LOG_EXCEPTION << "Invalid implement."; }
 
-void MSTensor::SetTensorName(const std::string &name) { MS_LOG_EXCEPTION << "Invalid implement."; }
+void MSTensor::SetTensorName(const std::string &) { MS_LOG_EXCEPTION << "Invalid implement."; }
 
-void MSTensor::SetAllocator(std::shared_ptr<Allocator> allocator) { MS_LOG_EXCEPTION << "Invalid implement."; }
+void MSTensor::SetAllocator(std::shared_ptr<Allocator>) { MS_LOG_EXCEPTION << "Invalid implement."; }
 
 std::shared_ptr<Allocator> MSTensor::allocator() const { MS_LOG_EXCEPTION << "Invalid implement."; }
 
-void MSTensor::SetFormat(mindspore::Format format) { MS_LOG_EXCEPTION << "Invalid implement."; }
+void MSTensor::SetFormat(mindspore::Format) { MS_LOG_EXCEPTION << "Invalid implement."; }
 
 mindspore::Format MSTensor::format() const { MS_LOG_EXCEPTION << "Invalid implement."; }
 
-void MSTensor::SetData(void *data) { MS_LOG_EXCEPTION << "Invalid implement."; }
+void MSTensor::SetData(void *) { MS_LOG_EXCEPTION << "Invalid implement."; }
 
 std::vector<QuantParam> MSTensor::QuantParams() const { MS_LOG_EXCEPTION << "Invalid implement."; }
 
-void MSTensor::SetQuantParams(std::vector<QuantParam> quant_params) { MS_LOG_EXCEPTION << "Invalid implement."; }
+void MSTensor::SetQuantParams(std::vector<QuantParam>) { MS_LOG_EXCEPTION << "Invalid implement."; }
 
 Buffer::Buffer() : impl_(std::make_shared<Impl>()) {}
 Buffer::Buffer(const void *data, size_t data_len) : impl_(std::make_shared<Impl>(data, data_len)) {}
diff --git a/mindspore/ccsrc/debug/anf_ir_dump.cc b/mindspore/ccsrc/debug/anf_ir_dump.cc
index 502af18e916..4248dc7f5f4 100644
--- a/mindspore/ccsrc/debug/anf_ir_dump.cc
+++ b/mindspore/ccsrc/debug/anf_ir_dump.cc
@@ -596,7 +596,8 @@ void DumpIR(const std::string &filename, const FuncGraphPtr &graph, bool dump_fu
   std::ofstream fout(realpath.value());
   std::ostringstream buffer;
   if (!fout.is_open()) {
-    MS_LOG(ERROR) << "Open dump file '" << realpath.value() << "' failed!";
+    MS_LOG(ERROR) << "Open dump file '" << realpath.value() << "' failed!"
+                  << " Errno:" << errno << " ErrInfo:" << strerror(errno);
     return;
   }
 
@@ -638,7 +639,8 @@ void DumpIRForRDR(const std::string &filename, const FuncGraphPtr &graph, bool d
   std::ofstream fout(realpath.value());
   std::ostringstream buffer;
   if (!fout.is_open()) {
-    MS_LOG(ERROR) << "Open dump file '" << realpath.value() << "' failed!";
+    MS_LOG(ERROR) << "Open dump file '" << realpath.value() << "' failed!"
+                  << " Errno:" << errno << " ErrInfo:" << strerror(errno);
     return;
   }
 
diff --git a/mindspore/ccsrc/debug/anf_ir_utils.cc b/mindspore/ccsrc/debug/anf_ir_utils.cc
index 7130fbc7b83..aba493689ed 100644
--- a/mindspore/ccsrc/debug/anf_ir_utils.cc
+++ b/mindspore/ccsrc/debug/anf_ir_utils.cc
@@ -606,7 +606,8 @@ void AnfExporter::ExportFuncGraph(const std::string &filename, const FuncGraphPt
 
   std::ofstream ofs(filename);
   if (!ofs.is_open()) {
-    MS_LOG(ERROR) << "Open file '" << filename << "' failed!";
+    MS_LOG(ERROR) << "Open file '" << filename << "' failed!"
+                  << " Errno:" << errno << " ErrInfo:" << strerror(errno);
     return;
   }
 
diff --git a/mindspore/ccsrc/debug/common.cc b/mindspore/ccsrc/debug/common.cc
index 3758b8787a5..876eb32ecf9 100644
--- a/mindspore/ccsrc/debug/common.cc
+++ b/mindspore/ccsrc/debug/common.cc
@@ -28,7 +28,8 @@
 namespace mindspore {
 std::optional<std::string> Common::GetRealPath(const std::string &input_path) {
   if (input_path.length() >= PATH_MAX) {
-    MS_LOG(EXCEPTION) << "The length of path: " << input_path << " exceeds limit: " << PATH_MAX;
+    MS_LOG(ERROR) << "The length of path: " << input_path << " exceeds limit: " << PATH_MAX;
+    return std::nullopt;
   }
   auto path_split_pos = input_path.find_last_of('/');
   if (path_split_pos == std::string::npos) {
@@ -46,7 +47,8 @@ std::optional<std::string> Common::GetRealPath(const std::string &input_path) {
     }
 #if defined(SYSTEM_ENV_POSIX)
     if (file_name.length() > NAME_MAX) {
-      MS_LOG(EXCEPTION) << "The length of file name : " << file_name.length() << " exceeds limit: " << NAME_MAX;
+      MS_LOG(ERROR) << "The length of file name : " << file_name.length() << " exceeds limit: " << NAME_MAX;
+      return std::nullopt;
     }
     if (realpath(common::SafeCStr(prefix_path), real_path) == nullptr) {
       MS_LOG(ERROR) << "The dir " << prefix_path << " does not exist.";
@@ -63,7 +65,8 @@ std::optional<std::string> Common::GetRealPath(const std::string &input_path) {
   // input_path is only file_name
 #if defined(SYSTEM_ENV_POSIX)
   if (input_path.length() > NAME_MAX) {
-    MS_LOG(EXCEPTION) << "The length of file name : " << input_path.length() << " exceeds limit: " << NAME_MAX;
+    MS_LOG(ERROR) << "The length of file name : " << input_path.length() << " exceeds limit: " << NAME_MAX;
+    return std::nullopt;
   }
   if (realpath(common::SafeCStr(input_path), real_path) == nullptr) {
     MS_LOG(INFO) << "The file " << input_path << " does not exist, it will be created.";
@@ -145,8 +148,8 @@ std::optional<std::string> Common::GetConfigFile(const std::string &env) {
 bool Common::IsStrLengthValid(const std::string &str, size_t length_limit, const std::string &error_message) {
   auto len_str = str.length();
   if (len_str > length_limit) {
-    MS_LOG(WARNING) << error_message << "The length is " << str.length() << ", exceeding the limit of " << length_limit
-                    << ".";
+    MS_LOG(ERROR) << error_message << "The length is " << str.length() << ", exceeding the limit of " << length_limit
+                  << ".";
     return false;
   }
   return true;
@@ -198,14 +201,16 @@ bool Common::IsPathValid(const std::string &path, size_t length_limit, const std
     return false;
   }
 
-  if (!std::all_of(path.begin(), path.end(),
-                   [](char c) { return ::isalpha(c) || ::isdigit(c) || c == '-' || c == '_' || c == '/'; })) {
-    MS_LOG(WARNING) << err_msg << "The path only supports alphabets, digit or {'-', '_', '/'}, but got:" << path << ".";
+  if (!std::all_of(path.begin(), path.end(), [](char c) {
+        return ::isalpha(c) || ::isdigit(c) || c == '-' || c == '_' || c == '.' || c == '/';
+      })) {
+    MS_LOG(ERROR) << err_msg << "The path only supports alphabets, digit or {'-', '_', '.', '/'}, but got:" << path
+                  << ".";
     return false;
   }
 
   if (path[0] != '/') {
-    MS_LOG(WARNING) << err_msg << "The path only supports absolute path and should start with '/'.";
+    MS_LOG(ERROR) << err_msg << "The path only supports absolute path and should start with '/'.";
     return false;
   }
 
@@ -229,11 +234,10 @@ bool Common::IsFilenameValid(const std::string &filename, size_t length_limit, c
   if (!IsStrLengthValid(filename, length_limit, err_msg)) {
     return false;
   }
-
-  if (!std::all_of(filename.begin(), filename.end(),
-                   [](char c) { return ::isalpha(c) || ::isdigit(c) || c == '-' || c == '_' || c == '.'; })) {
-    MS_LOG(WARNING) << err_msg << "The filename only supports alphabets, digit or {'-', '_', '.'}, but got:" << filename
-                    << ".";
+  auto func = [](char c) { return ::isalpha(c) || ::isdigit(c) || c == '-' || c == '_' || c == '.'; };
+  if (!std::all_of(filename.begin(), filename.end(), func)) {
+    MS_LOG(ERROR) << err_msg << "The filename only supports alphabets, digit or {'-', '_', '.'}, but got:" << filename
+                  << ".";
     return false;
   }
   return true;
@@ -274,7 +278,8 @@ bool Common::SaveStringToFile(const std::string filename, const std::string stri
   ofs.open(real_path.value());
 
   if (!ofs.is_open()) {
-    MS_LOG(ERROR) << "Open dump file '" << real_path.value() << "' failed!";
+    MS_LOG(ERROR) << "Open dump file '" << real_path.value() << "' failed!"
+                  << " Errno:" << errno << " ErrInfo:" << strerror(errno);
     return false;
   }
   ofs << string_info << std::endl;
@@ -300,16 +305,19 @@ struct GlogLogDirRegister {
       std::string log_dir_str = std::string(log_dir);
 
       auto real_log_dir_str = Common::GetRealPath(log_dir_str);
-      // While 'GLOG_logtostderr' = 0, logs output to files.
-      // 'GLOG_log_dir' must be specified as the path of log files.
+      // While 'GLOG_logtostderr' = 0, logs output to files. 'GLOG_log_dir' must be specified as the path of log files.
+      // Here can not throw exception and use python to catch, because the PYBIND11_MODULE is not yet been initialed.
       if (logtostderr_str == "0" && real_log_dir_str.has_value()) {
         if (!Common::IsPathValid(real_log_dir_str.value(), MAX_DIRECTORY_LENGTH, "")) {
-          MS_LOG(EXCEPTION) << "The path of log files, set by 'GLOG_log_dir', is invalid";
+          MS_LOG(ERROR) << "The path of log files, which set by 'GLOG_log_dir', is invalid";
+          exit(EXIT_FAILURE);
         } else if (!Common::CreateNotExistDirs(real_log_dir_str.value())) {
-          MS_LOG(EXCEPTION) << "Create the path of log files, set by 'GLOG_log_dir', failed.";
+          MS_LOG(ERROR) << "Create the path of log files, which set by 'GLOG_log_dir', failed.";
+          exit(EXIT_FAILURE);
         }
       } else if (logtostderr_str == "0") {
-        MS_LOG(EXCEPTION) << "The path of log files, set by 'GLOG_log_dir', is invalid.";
+        MS_LOG(ERROR) << "The path of log files, which set by 'GLOG_log_dir', is invalid.";
+        exit(EXIT_FAILURE);
       }
     }
   }
diff --git a/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc b/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc
index c62716b4a9d..59f3864dc4c 100644
--- a/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc
+++ b/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc
@@ -1,5 +1,5 @@
 /**
- * Copyright 2020 Huawei Technologies Co., Ltd
+ * Copyright 2020-2021 Huawei Technologies Co., Ltd
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -60,8 +60,8 @@ std::string GetIfstreamString(const std::ifstream &ifstream) {
 }
 
 bool DumpJsonParser::IsDumpEnabled() {
-  auto config_path = std::getenv(kMindsporeDumpConfig);
-  if (config_path == nullptr) {
+  auto config_path = common::GetEnv(kMindsporeDumpConfig);
+  if (config_path.empty()) {
     return false;
   }
   MS_LOG(INFO) << "Dump config path is " << config_path;
@@ -90,9 +90,14 @@ void DumpJsonParser::Parse() {
     MS_LOG(EXCEPTION) << "Get dump config file failed";
   }
 
-  std::ifstream json_file(dump_config_file.value());
+  auto dump_file_realpath = Common::GetRealPath(dump_config_file.value());
+  if (!dump_file_realpath.has_value()) {
+    MS_LOG(EXCEPTION) << "Get real path failed in Parse.";
+  }
+  std::ifstream json_file(dump_file_realpath.value());
   if (!json_file.is_open()) {
-    MS_LOG(EXCEPTION) << "Dump file:" << dump_config_file.value() << " open failed.";
+    MS_LOG(EXCEPTION) << "Dump file:" << dump_config_file.value() << " open failed."
+                      << " Errno:" << errno << " ErrInfo:" << strerror(errno);
   }
 
   nlohmann::json j;
@@ -100,6 +105,7 @@ void DumpJsonParser::Parse() {
     json_file >> j;
   } catch (nlohmann::json::parse_error &e) {
     MS_LOG(ERROR) << "Dump json contents:" << GetIfstreamString(json_file);
+    json_file.close();
     MS_LOG(EXCEPTION) << "Parse dump json failed, error:" << e.what();
   }
 
@@ -107,6 +113,7 @@ void DumpJsonParser::Parse() {
   std::stringstream ss;
   ss << j;
   std::string cfg = ss.str();
+  json_file.close();
   MS_LOG(INFO) << "Dump json:" << cfg;
 
   ParseE2eDumpSetting(j);
@@ -128,13 +135,14 @@ void DumpJsonParser::CopyJsonToDir(uint32_t rank_id) {
     auto realpath = Common::GetRealPath(path_ + "/rank_" + std::to_string(rank_id) + "/.dump_metadata/data_dump.json");
     if (!realpath.has_value()) {
       MS_LOG(ERROR) << "Get real path failed in CopyJsonDir.";
+    } else {
+      const std::string file_path = realpath.value();
+      ChangeFileMode(file_path, S_IWUSR);
+      std::ofstream json_copy(file_path);
+      json_copy << json_file.rdbuf();
+      json_copy.close();
+      ChangeFileMode(file_path, S_IRUSR);
     }
-    const std::string file_path = realpath.value();
-    ChangeFileMode(file_path, S_IWUSR);
-    std::ofstream json_copy(file_path);
-    json_copy << json_file.rdbuf();
-    json_copy.close();
-    ChangeFileMode(file_path, S_IRUSR);
   }
 }
 
@@ -176,7 +184,7 @@ void DumpJsonParser::CopyMSCfgJsonToDir(uint32_t rank_id) {
     auto context = MsContext::GetInstance();
     MS_EXCEPTION_IF_NULL(context);
     ms_info["device_target"] = context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
-    ms_info["ms_version"] = "1.3.0";
+    ms_info["ms_version"] = "1.4.0";
     const std::string file_path = realpath.value();
     ChangeFileMode(file_path, S_IWUSR);
     std::ofstream json_create(file_path);
@@ -204,7 +212,8 @@ bool DumpJsonParser::DumpToFile(const std::string &filename, const void *data, s
   ChangeFileMode(file_path, S_IWUSR);
   std::ofstream fd(file_path, std::ios::out | std::ios::trunc | std::ios::binary);
   if (!fd.is_open()) {
-    MS_LOG(ERROR) << "Open file " << file_path << " failed.";
+    MS_LOG(ERROR) << "Open file " << file_path << " failed."
+                  << " Errno:" << errno << " ErrInfo:" << strerror(errno);
     return false;
   }
   std::string npy_header = GenerateNpyHeader(shape, type);
diff --git a/mindspore/ccsrc/debug/debug_services.cc b/mindspore/ccsrc/debug/debug_services.cc
index 5ed077f4ff1..1f543f86376 100644
--- a/mindspore/ccsrc/debug/debug_services.cc
+++ b/mindspore/ccsrc/debug/debug_services.cc
@@ -39,6 +39,10 @@ namespace mindspore {
 DebugServices::DebugServices() { tensor_loader_ = std::make_shared<TensorLoader>(); }
 
 DebugServices::DebugServices(const DebugServices &other) {
+  wp_id_cache = other.wp_id_cache;
+  net_name = other.net_name;
+  dump_dir = other.dump_dir;
+  is_sync_mode = other.is_sync_mode;
   tensor_loader_ = other.tensor_loader_;
   watchpoint_table = other.watchpoint_table;
 }
@@ -313,14 +317,7 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *const name, std::
   MS_LOG(INFO) << "tensor list size: " << tensor_list_size;
   if (tensor_list_size == 0) return;
   // default value for number of threads
-  int max_thread_num = 32;
-  auto thread_num = getenv("MS_dbg_num_thread");
-  if (thread_num != nullptr) {
-    max_thread_num = std::stoi(thread_num);
-  }
-  if (max_thread_num > tensor_list_size) {
-    max_thread_num = tensor_list_size;
-  }
+  const int max_thread_num = 32;
   MS_LOG(INFO) << "Number of threads used for checkwatchpoint: " << max_thread_num;
   int chunk_size = tensor_list_size / max_thread_num;
   int remainder = tensor_list_size % max_thread_num;
@@ -355,8 +352,7 @@ void DebugServices::CheckWatchpoints(std::vector<std::string> *const name, std::
     tensor_future_vec[i].wait();
     tensor_future_vec[i].get();
     for (unsigned int j = 0; j < chunk_exec_orders[i].size(); j++) {
-      std::vector<int>::iterator iter;
-      iter = std::lower_bound(exec_order.begin(), exec_order.end(), chunk_exec_orders[i][j]);
+      std::vector<int>::iterator iter = std::lower_bound(exec_order.begin(), exec_order.end(), chunk_exec_orders[i][j]);
       // if the execution order is repeated,inserts the new one before the others with same execution order.
       int position = iter - exec_order.begin();
       exec_order.insert(iter, chunk_exec_orders[i][j]);
@@ -399,7 +395,8 @@ void DebugServices::ReadTensorFromNpy(const std::string &file_name, std::string
   MS_LOG(INFO) << "Reading in file: " << file_path;
   infile.open(file_path.c_str(), std::ios::ate | std::ios::binary | std::ios::in);
   if (!infile.is_open()) {
-    MS_LOG(ERROR) << "Failed to open file (In ReadTensorFromNpy) " << file_path;
+    MS_LOG(ERROR) << "Failed to open file (In ReadTensorFromNpy) " << file_path << " Errno:" << errno
+                  << " ErrInfo:" << strerror(errno);
     return;
   }
   uint64_t file_size = infile.tellg();
@@ -409,11 +406,18 @@ void DebugServices::ReadTensorFromNpy(const std::string &file_name, std::string
     MS_LOG(ERROR) << "Failed to read file (In ReadTensorFromNpy) " << file_path;
     return;
   }
-  constexpr int header_len_offset = 8;
+  const int substr_len = 2;
+  const int header_len_offset = 8;
+  const int header_offset = 9;
+  const int type_offset = 10;
   uint16_t header_len = *reinterpret_cast<uint16_t *>(buffer->data() + header_len_offset);
-  std::string header(buffer->data() + header_len_offset + 1, header_len);
-  std::size_t type_i = header.find("descr") + 10;
-  *tensor_type = header.substr(type_i, 2);
+  std::string header(buffer->data() + header_offset, header_len);
+  std::size_t type_i = header.find("descr") + type_offset;
+  if (header.length() < type_i + substr_len) {
+    MS_LOG(ERROR) << "Cannot get tensor_type, header length is " << header.length();
+    return;
+  }
+  *tensor_type = header.substr(type_i, substr_len);
   std::size_t shape_i_open = header.find("(");
   std::size_t shape_i_close = header.find(")");
   std::string shape_str = header.substr(shape_i_open + 1, shape_i_close - shape_i_open - 1);
@@ -426,7 +430,7 @@ void DebugServices::ReadTensorFromNpy(const std::string &file_name, std::string
   std::size_t word_size = std::stoul(std::string(1, (*tensor_type)[1]));
   std::size_t data_len = std::accumulate(shape->begin(), shape->end(), 1, std::multiplies<uint64_t>());
   std::size_t data_size = data_len * word_size;
-  infile.seekg(header_len + 10);
+  infile.seekg(header_len + type_offset);
   *data_buffer = new std::vector<char>(data_size);
   if (data_buffer == nullptr || !infile.read((*data_buffer)->data(), data_size)) {
     MS_LOG(ERROR) << "Unable to get tensor data from npy";
@@ -479,25 +483,29 @@ void DebugServices::ConvertToHostFormat(const std::map<std::string, std::vector<
         MS_LOG(EXCEPTION) << "Can't find package mindspore.offline_debug.convert_async";
       }
 
-      DIR *d_handle = opendir(dump_key.c_str());
-      if (d_handle != nullptr) {
-        struct dirent *dir = nullptr;
-        while ((dir = readdir(d_handle)) != NULL) {
-          if (dir->d_type == DT_REG) {
-            std::string candidate = dir->d_name;
-            for (const std::string &file_to_find : files_to_convert_in_dir) {
-              std::string file_n = file_to_find.substr(file_to_find.find_last_of("\\/") + 1);
-              if (candidate.find(file_n) != std::string::npos && candidate.rfind(file_format) != std::string::npos) {
-                // we found a converted file for this op
-                std::string found_file = dump_key + "/" + candidate;
-                if (std::find(result_list->begin(), result_list->end(), found_file) == result_list->end()) {
-                  result_list->push_back(found_file);
-                }
+      std::string abspath = RealPath(dump_key);
+      DIR *d_handle = opendir(abspath.c_str());
+      if (d_handle == nullptr) {
+        MS_LOG(ERROR) << "Directory does not exit in ConvertToHostFormat.";
+        return;
+      }
+      struct dirent *dir = nullptr;
+      while ((dir = readdir(d_handle)) != NULL) {
+        if (dir->d_type == DT_REG) {
+          std::string candidate = dir->d_name;
+          for (const std::string &file_to_find : files_to_convert_in_dir) {
+            std::string file_n = file_to_find.substr(file_to_find.find_last_of("\\/") + 1);
+            if (candidate.find(file_n) != std::string::npos && candidate.rfind(file_format) != std::string::npos) {
+              // we found a converted file for this op
+              std::string found_file = dump_key + "/" + candidate;
+              if (std::find(result_list->begin(), result_list->end(), found_file) == result_list->end()) {
+                result_list->push_back(found_file);
               }
             }
           }
         }
       }
+      closedir(d_handle);
     }
   }
 }
@@ -552,9 +560,12 @@ void DebugServices::ConvertReadTensors(std::vector<std::string> backend_name, st
                                     std::to_string(root_graph_id[i]) + "/" + IterationString(iteration[i]);
 
     // search files in dir for the one that meets the filename prefix and read the file into memory
-    DIR *d;
-    d = opendir(specific_dump_dir.c_str());
-    if (d != nullptr) {
+    std::string abspath = RealPath(specific_dump_dir);
+    DIR *d = opendir(abspath.c_str());
+    if (d == nullptr) {
+      MS_LOG(ERROR) << "Directory does not exist in ConvertReadTensors.";
+      return;
+    } else {
       struct dirent *dir = nullptr;
       while ((dir = readdir(d)) != NULL) {
         if (dir->d_type == DT_REG) {
@@ -575,8 +586,8 @@ void DebugServices::ConvertReadTensors(std::vector<std::string> backend_name, st
           }
         }
       }
+      closedir(d);
     }
-    closedir(d);
   }
   ConvertToHostFormat(dir_to_files_map, result_list);
 }
@@ -590,9 +601,12 @@ void DebugServices::ConvertWatchPointNodes(const std::vector<std::tuple<std::str
     std::string dump_name = std::get<1>(node);
     dump_name = dump_name.substr(0, dump_name.rfind("."));
     // search files in dir for the one that meets the filename prefix and read the file into memory
-    DIR *d;
-    d = opendir(specific_dump_dir.c_str());
-    if (d != nullptr) {
+    std::string abspath = RealPath(specific_dump_dir);
+    DIR *d = opendir(abspath.c_str());
+    if (d == nullptr) {
+      MS_LOG(ERROR) << "Directory " << specific_dump_dir.c_str() << " does not exist in ConvertWatchPointNodes.";
+      return;
+    } else {
       struct dirent *dir = nullptr;
       while ((dir = readdir(d)) != NULL) {
         if (dir->d_type == DT_REG) {
@@ -613,8 +627,8 @@ void DebugServices::ConvertWatchPointNodes(const std::vector<std::tuple<std::str
           }
         }
       }
+      closedir(d);
     }
-    closedir(d);
   }
   ConvertToHostFormat(dir_to_files_map, result_list);
 }
@@ -748,11 +762,13 @@ void DebugServices::ReadDumpedTensor(std::vector<std::string> backend_name, std:
     std::vector<int64_t> shape;
     uint64_t data_size = 0;
     if (is_sync_mode) {
-      DIR *d;
-      d = opendir(specific_dump_dir.c_str());
+      std::string abspath = RealPath(specific_dump_dir);
+      DIR *d = opendir(abspath.c_str());
       bool found_file = false;
       std::vector<std::string> matched_paths;
-      if (d != nullptr) {
+      if (d == nullptr) {
+        MS_LOG(ERROR) << "Directory " << specific_dump_dir << " does not exist!";
+      } else {
         struct dirent *dir = nullptr;
         while ((dir = readdir(d)) != NULL) {
           if (dir->d_type == DT_REG) {
@@ -770,9 +786,8 @@ void DebugServices::ReadDumpedTensor(std::vector<std::string> backend_name, std:
             matched_paths.push_back(full_path);
             found_file = true;
           }
+          closedir(d);
         }
-      } else {
-        MS_LOG(INFO) << "Directory " << specific_dump_dir << " does not exist!";
       }
 
       if (found_file) {
@@ -786,7 +801,6 @@ void DebugServices::ReadDumpedTensor(std::vector<std::string> backend_name, std:
                         type_name, shape, buffer, result_list);
         MS_LOG(INFO) << "Target tensor has not been found.";
       }
-      closedir(d);
     } else {
       bool found = false;
       std::vector<std::string> matched_paths;
@@ -895,9 +909,11 @@ std::vector<std::shared_ptr<TensorData>> DebugServices::ReadNeededDumpedTensors(
     }
     if (is_sync_mode) {
       // search files in dir for the one that meets the filename prefix and read the file into memory
-      DIR *d;
-      d = opendir(specific_dump_dir.c_str());
-      if (d != nullptr) {
+      std::string abspath = RealPath(specific_dump_dir);
+      DIR *d = opendir(abspath.c_str());
+      if (d == nullptr) {
+        MS_LOG(ERROR) << "Directory " << specific_dump_dir.c_str() << " does not exist in ReadNeededDumpedTensors.";
+      } else {
         struct dirent *dir = nullptr;
         while ((dir = readdir(d)) != NULL) {
           if (dir->d_type == DT_REG) {
@@ -924,6 +940,7 @@ std::vector<std::shared_ptr<TensorData>> DebugServices::ReadNeededDumpedTensors(
             }
           }
         }
+        closedir(d);
       }
     } else {
       GetTensorDataInfoAsync(proto_to_dump, specific_dump_dir, iteration, device_id, root_graph_id, *async_file_pool,
@@ -985,7 +1002,7 @@ bool DebugServices::IsWatchPoint(const std::string &kernel_name, const CNodePtr
 }
 
 bool DebugServices::IsWatchPointNodeInput(const std::string &w_name, const CNodePtr &kernel) const {
-  if (kernel) {
+  if (kernel && w_name.length() > 0) {
     auto input_size = AnfAlgo::GetInputTensorNum(kernel);
     for (size_t j = 0; j < input_size; ++j) {
       auto input_kernel = kernel->input(j + 1);
@@ -1095,8 +1112,11 @@ bool DebugServices::CheckOpOverflow(std::string node_name_to_find, unsigned int
 
     MS_LOG(INFO) << "Processing bin file path " << overflow_bin_path;
 
-    DIR *d = opendir(overflow_bin_path.c_str());
-    if (d != nullptr) {
+    std::string abspath = RealPath(overflow_bin_path);
+    DIR *d = opendir(abspath.c_str());
+    if (d == nullptr) {
+      MS_LOG(ERROR) << "OverFlow bin directory does not exist!";
+    } else {
       struct dirent *dir = nullptr;
       while ((dir = readdir(d)) != nullptr) {
         if (dir->d_type == DT_REG) {
@@ -1108,8 +1128,8 @@ bool DebugServices::CheckOpOverflow(std::string node_name_to_find, unsigned int
           std::ifstream infile;
           infile.open(file_path.c_str(), std::ios::ate | std::ios::binary | std::ios::in);
           if (!infile.is_open()) {
-            MS_LOG(ERROR) << "Failed to open overflow bin file " << file_name;
-            MS_LOG(ERROR) << "Error: " << strerror(errno);
+            MS_LOG(ERROR) << "Failed to open overflow bin file " << file_name << " Errno:" << errno
+                          << " ErrInfo:" << strerror(errno);
             continue;
           }
 
@@ -1149,10 +1169,8 @@ bool DebugServices::CheckOpOverflow(std::string node_name_to_find, unsigned int
           infile.close();
         }
       }
-    } else {
-      MS_LOG(INFO) << "OverFlow bin directory does not exist!";
+      closedir(d);
     }
-    closedir(d);
 
     // find the op_names with an overflow hit
     for (auto &task_stream : task_stream_hit) {
diff --git a/mindspore/ccsrc/debug/debugger/debug_grpc.proto b/mindspore/ccsrc/debug/debugger/debug_grpc.proto
index e34dce3b2ed..2d3870cc6e4 100644
--- a/mindspore/ccsrc/debug/debugger/debug_grpc.proto
+++ b/mindspore/ccsrc/debug/debugger/debug_grpc.proto
@@ -27,6 +27,7 @@ service EventListener {
   rpc SendTensors (stream TensorProto) returns (EventReply) {};
   rpc SendWatchpointHits (stream WatchpointHit) returns (EventReply) {};
   rpc SendMultiGraphs (stream Chunk) returns (EventReply) {};
+  rpc SendHeartbeat (Heartbeat) returns (EventReply) {};
 }
 
 message Metadata {
@@ -136,3 +137,8 @@ message WatchpointHit {
   int32 id = 3;
   int32 error_code = 4;
 }
+
+message Heartbeat {
+  string message = 1;
+  int32 period = 2;
+}
diff --git a/mindspore/ccsrc/debug/debugger/debugger.cc b/mindspore/ccsrc/debug/debugger/debugger.cc
index 9b509f4e729..bddc3c5a2ce 100644
--- a/mindspore/ccsrc/debug/debugger/debugger.cc
+++ b/mindspore/ccsrc/debug/debugger/debugger.cc
@@ -59,12 +59,14 @@ using debugger::WatchpointHit;
 namespace mindspore {
 
 static constexpr auto g_chunk_size = 1024 * 1024 * 3;
+static constexpr int32_t heartbeat_period_second = 30;
 DebuggerPtr Debugger::debugger_ = nullptr;
 std::mutex Debugger::instance_lock_;
 
 Debugger::Debugger()
     : grpc_client_(nullptr),
       debug_services_(nullptr),
+      heartbeat_thread_(nullptr),
       device_id_(0),
       device_target_(""),
       num_step_(0),
@@ -113,7 +115,7 @@ void Debugger::Init(const uint32_t device_id, const std::string device_target) {
   device_id_ = device_id;
   MS_LOG(INFO) << "Debugger got device_target: " << device_target;
   device_target_ = device_target;
-  version_ = "1.3.0";
+  version_ = "1.4.0";
 }
 
 bool IsTypeDebuggerSupported(TypeId type) {
@@ -132,6 +134,7 @@ void Debugger::EnableDebugger() {
   partial_memory_ = false;
   grpc_client_ = nullptr;
   debug_services_ = nullptr;
+  heartbeat_thread_ = nullptr;
 
   // see if dump using debugger backend is enabled
   bool dump_enabled = CheckDebuggerDumpEnabled();
@@ -147,8 +150,22 @@ void Debugger::EnableDebugger() {
   }
 
   if (debugger_enabled_) {
-    std::string host = "localhost";
-
+    // configure grpc host
+    std::string env_host_str = common::GetEnv("MS_DEBUGGER_HOST");
+    std::string host;
+    if (!env_host_str.empty()) {
+      if (CheckIp(env_host_str)) {
+        MS_LOG(INFO) << "Getenv MS_DEBUGGER_HOST: " << env_host_str;
+        host = env_host_str;
+      } else {
+        debugger_enabled_ = false;
+        MS_EXCEPTION(ValueError) << "Environment variable MS_DEBUGGER_HOST isn't a valid IP address. "
+                                    "Please set environment variable MS_DEBUGGER_HOST=x.x.x.x to a valid IP";
+      }
+    } else {
+      MS_LOG(INFO) << "Environment variable MS_DEBUGGER_HOST doesn't exist. Using default debugger host: localhost";
+      host = "localhost";
+    }
     // configure grpc port
     std::string env_port_str = common::GetEnv("MS_DEBUGGER_PORT");
     std::string port;
@@ -170,6 +187,8 @@ void Debugger::EnableDebugger() {
     }
     // initialize grpc client
     grpc_client_ = std::make_unique<GrpcClient>(host, port);
+    // initialize sending heartbeat
+    heartbeat_thread_ = std::make_unique<std::thread>([&]() { SendHeartbeat(heartbeat_period_second); });
   }
   debug_services_ = std::make_unique<DebugServices>();
 }
@@ -561,6 +580,38 @@ GraphProto Debugger::GetGraphProto(const KernelGraphPtr &graph_ptr) const {
   ModelProto model = GetDebuggerFuncGraphProto(graph_ptr);
   return model.graph();
 }
+
+void Debugger::SendHeartbeat(int32_t period) {
+  bool heartbeat_enabled_ = true;
+  int num_heartbeat_fail = 0;
+  const int max_num_heartbeat_fail = 5;
+  const int retry_period = 500;
+
+  Heartbeat heartbeat;
+  heartbeat.set_message("Debugger is alive");
+  heartbeat.set_period(heartbeat_period_second);
+
+  bool run_ = CheckDebuggerEnabled() && heartbeat_enabled_;
+  while (run_) {
+    EventReply reply = grpc_client_->SendHeartbeat(heartbeat);
+
+    if (reply.status() != reply.OK) {
+      MS_LOG(ERROR) << "Error: SendHeartbeat failed";
+      num_heartbeat_fail++;
+      if (num_heartbeat_fail >= max_num_heartbeat_fail) {
+        MS_LOG(ERROR) << "Maximum number of failure for SendHeartbeat reached : exiting training session.";
+        Exit();
+        run_ = false;
+      } else {
+        MS_LOG(ERROR) << "Number of consecutive SendHeartbeat fail:" << num_heartbeat_fail;
+        std::this_thread::sleep_for(std::chrono::milliseconds(retry_period));
+      }
+    } else {
+      std::this_thread::sleep_for(std::chrono::milliseconds(period * 1000));
+    }
+  }
+}
+
 void Debugger::SendGraphAndSuspend(const GraphProto &graph_proto) {
   if (SendMetadata(true)) {
     // send graph to Mindinsight server
@@ -1120,6 +1171,17 @@ bool Debugger::CheckPort(const std::string &port) const {
   return true;
 }
 
+bool Debugger::CheckIp(const std::string &host) const {
+  std::regex reg_ip(
+    "(25[0-4]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[1-9])"
+    "[.](25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])"
+    "[.](25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])"
+    "[.](25[0-4]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[1-9])");
+  std::smatch smat;
+  std::string host_str = host;
+  return std::regex_match(host_str, smat, reg_ip);
+}
+
 uint32_t Debugger::GetFirstRunGraphId() const { return rungraph_id_list_.front(); }
 
 void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output_index) {
diff --git a/mindspore/ccsrc/debug/debugger/debugger.h b/mindspore/ccsrc/debug/debugger/debugger.h
index 49e103ea082..9446f96b61d 100644
--- a/mindspore/ccsrc/debug/debugger/debugger.h
+++ b/mindspore/ccsrc/debug/debugger/debugger.h
@@ -195,6 +195,9 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
   // serialize graph and get proto
   GraphProto GetGraphProto(const KernelGraphPtr &graph_ptr) const;
 
+  // send heartbeat message to UI once per 30 second by default
+  void SendHeartbeat(int32_t period);
+
   // send graph and enter command wait loop
   void SendGraphAndSuspend(const GraphProto &graph_proto);
 
@@ -235,12 +238,16 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
   // Check if the port is valid
   bool CheckPort(const std::string &port) const;
 
+  // Check if the IP is valid
+  bool CheckIp(const std::string &host) const;
+
   void LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output_index);
 
   // class members
 
   std::unique_ptr<GrpcClient> grpc_client_;
   std::unique_ptr<DebugServices> debug_services_;
+  std::unique_ptr<std::thread> heartbeat_thread_;
   KernelGraphPtr graph_ptr_;
   uint32_t device_id_;
   std::string device_target_;
diff --git a/mindspore/ccsrc/debug/debugger/grpc_client.cc b/mindspore/ccsrc/debug/debugger/grpc_client.cc
index 9f1607bc5e5..d0bbc51c87a 100644
--- a/mindspore/ccsrc/debug/debugger/grpc_client.cc
+++ b/mindspore/ccsrc/debug/debugger/grpc_client.cc
@@ -24,6 +24,7 @@ using debugger::EventListener;
 using debugger::EventReply;
 using debugger::EventReply_Status_FAILED;
 using debugger::GraphProto;
+using debugger::Heartbeat;
 using debugger::Metadata;
 using debugger::TensorProto;
 using debugger::WatchpointHit;
@@ -185,4 +186,18 @@ EventReply GrpcClient::SendWatchpointHits(const std::list<WatchpointHit> &watchp
   }
   return reply;
 }
+
+EventReply GrpcClient::SendHeartbeat(const Heartbeat &heartbeat) {
+  EventReply reply;
+  grpc::ClientContext context;
+
+  grpc::Status status = stub_->SendHeartbeat(&context, heartbeat, &reply);
+
+  if (!status.ok()) {
+    MS_LOG(ERROR) << "RPC failed: SendHeartbeat";
+    MS_LOG(ERROR) << status.error_code() << ": " << status.error_message();
+    reply.set_status(EventReply_Status_FAILED);
+  }
+  return reply;
+}
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/debug/debugger/grpc_client.h b/mindspore/ccsrc/debug/debugger/grpc_client.h
index 34f3b4badb5..36479edba50 100644
--- a/mindspore/ccsrc/debug/debugger/grpc_client.h
+++ b/mindspore/ccsrc/debug/debugger/grpc_client.h
@@ -27,6 +27,7 @@ using debugger::Chunk;
 using debugger::EventListener;
 using debugger::EventReply;
 using debugger::GraphProto;
+using debugger::Heartbeat;
 using debugger::Metadata;
 using debugger::TensorProto;
 using debugger::WatchpointHit;
@@ -60,6 +61,8 @@ class GrpcClient {
 
   std::vector<std::string> ChunkString(std::string str, int graph_size);
 
+  EventReply SendHeartbeat(const Heartbeat &heartbeat);
+
  private:
   std::unique_ptr<EventListener::Stub> stub_;
 };
diff --git a/mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.cc b/mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.cc
index f4fd451bec5..eec6addc0fd 100644
--- a/mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.cc
+++ b/mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.cc
@@ -48,7 +48,7 @@ DbgServices::~DbgServices() {
 
 std::string DbgServices::GetVersion() {
   MS_LOG(INFO) << "get version is called";
-  return "1.3.0";
+  return "1.4.0";
 }
 
 int32_t DbgServices::Initialize(std::string net_name, std::string dump_folder_path, bool is_sync_mode) {
diff --git a/mindspore/ccsrc/debug/debugger/offline_debug/offline_logger.h b/mindspore/ccsrc/debug/debugger/offline_debug/offline_logger.h
index 3b02b06ead8..7edd0cf016c 100644
--- a/mindspore/ccsrc/debug/debugger/offline_debug/offline_logger.h
+++ b/mindspore/ccsrc/debug/debugger/offline_debug/offline_logger.h
@@ -18,6 +18,8 @@
 
 #include <iostream>
 
+#define PATH_MAX 4096
+
 #define MS_LOG(level) MS_LOG_##level
 
 #define MS_LOG_INFO static_cast<void>(0), !(DbgLogger::verbose) ? void(0) : DbgLogger(DbgLoggerLvl::INFO) < std::cout
@@ -28,8 +30,7 @@
 
 #define MS_LOG_WARNING MS_LOG_INFO
 
-#define MS_LOG_EXCEPTION \
-  static_cast<void>(0), !(DbgLogger::verbose) ? void(0) : DbgLogger(DbgLoggerLvl::EXCEPTION) < std::cout
+#define MS_LOG_EXCEPTION static_cast<void>(0), DbgLogger(DbgLoggerLvl::EXCEPTION) < std::cout
 
 enum DbgLoggerLvl : int { DEBUG = 0, INFO, WARNING, ERROR, EXCEPTION };
 
@@ -38,17 +39,20 @@ class DbgLogger {
   explicit DbgLogger(DbgLoggerLvl lvl) : lvl_(lvl) {}
   ~DbgLogger() = default;
   void operator<(std::ostream &os) const {
-    char *dbg_log_path = getenv("OFFLINE_DBG_LOG");
-    if (dbg_log_path != NULL) {
-      FILE *fp;
-      fp = freopen(dbg_log_path, "a", stdout);
+    char *dbg_log_path = std::getenv("OFFLINE_DBG_LOG");
+    if (dbg_log_path != nullptr) {
+      char abspath[PATH_MAX];
+      if (sizeof(dbg_log_path) > PATH_MAX || NULL == realpath(dbg_log_path, abspath)) {
+        return;
+      }
+      FILE *fp = freopen(abspath, "a", stdout);
       if (fp == nullptr) {
         std::cout << "ERROR: DbgLogger could not redirect all stdout to a file";
       }
     }
     os << std::endl;
     if (lvl_ == DbgLoggerLvl::EXCEPTION) {
-      throw;
+      throw lvl_;
     }
   }
   static bool verbose;
diff --git a/mindspore/ccsrc/debug/debugger/proto_exporter.cc b/mindspore/ccsrc/debug/debugger/proto_exporter.cc
index 90ba50569df..3db363edcab 100644
--- a/mindspore/ccsrc/debug/debugger/proto_exporter.cc
+++ b/mindspore/ccsrc/debug/debugger/proto_exporter.cc
@@ -573,7 +573,8 @@ void DumpIRProtoWithSrcInfo(const FuncGraphPtr &func_graph, const std::string &s
   // write to pb file
   std::ofstream ofs(realpath.value());
   if (!ofs.is_open()) {
-    MS_LOG(ERROR) << "Open file '" << realpath.value() << "' failed!";
+    MS_LOG(ERROR) << "Open file '" << realpath.value() << "' failed!"
+                  << " Errno:" << errno << " ErrInfo:" << strerror(errno);
     return;
   }
   ofs << graph_proto;
diff --git a/mindspore/ccsrc/debug/dump_proto.cc b/mindspore/ccsrc/debug/dump_proto.cc
index 180f952dfd4..ec6a67ed872 100644
--- a/mindspore/ccsrc/debug/dump_proto.cc
+++ b/mindspore/ccsrc/debug/dump_proto.cc
@@ -555,7 +555,8 @@ void DumpIRProto(const FuncGraphPtr &func_graph, const std::string &suffix) {
   // write to pb file
   std::ofstream ofs(file_path);
   if (!ofs.is_open()) {
-    MS_LOG(ERROR) << "Open file '" << file_path << "' failed!";
+    MS_LOG(ERROR) << "Open file '" << file_path << "' failed!"
+                  << " Errno:" << errno << " ErrInfo:" << strerror(errno);
     return;
   }
   ofs << GetFuncGraphProtoString(func_graph);
diff --git a/mindspore/ccsrc/debug/env_config_parser.cc b/mindspore/ccsrc/debug/env_config_parser.cc
index 58f39ed9aad..3a43fcc1238 100644
--- a/mindspore/ccsrc/debug/env_config_parser.cc
+++ b/mindspore/ccsrc/debug/env_config_parser.cc
@@ -122,7 +122,8 @@ void EnvConfigParser::ParseFromFile() {
   std::ifstream json_file(config_file_);
   if (!json_file.is_open()) {
     MS_LOG(WARNING) << "Env config file:" << config_file_ << " open failed."
-                    << " Please check the config file '" << config_file_ << "' set by 'env_config_path' in context.";
+                    << " Please check the config file '" << config_file_ << "' set by 'env_config_path' in context."
+                    << " Errno:" << errno << " ErrInfo:" << strerror(errno);
     return;
   }
 
diff --git a/mindspore/ccsrc/debug/tensor_data.h b/mindspore/ccsrc/debug/tensor_data.h
index e6d5acd8218..0a85b10ede6 100644
--- a/mindspore/ccsrc/debug/tensor_data.h
+++ b/mindspore/ccsrc/debug/tensor_data.h
@@ -171,6 +171,9 @@ class TensorData {
     this->shape = obj.shape;
     this->iteration = obj.iteration;
     this->device_id = obj.device_id;
+    this->data_ptr = obj.data_ptr;
+    this->root_graph_id = obj.root_graph_id;
+    this->is_output = obj.is_output;
 #ifdef ONLINE_DBG_MODE
     this->tensor_ptr = obj.tensor_ptr;
 #endif
@@ -194,39 +197,39 @@ class TensorData {
 
   void SetSlot(size_t slot) { this->slot = slot; }
 
-  char *GetDataPtr() { return data_ptr; }
+  char *GetDataPtr() const { return this->data_ptr; }
 
   void SetDataPtr(char *data_ptr) { this->data_ptr = data_ptr; }
 
   uint32_t GetNumElements() { return size / data_type_size; }
 
-  uint64_t GetByteSize() { return size; }
+  uint64_t GetByteSize() const { return this->size; }
 
   void SetByteSize(uint64_t size) { this->size = size; }
 
-  std::vector<int64_t> GetShape() { return shape; }
+  std::vector<int64_t> GetShape() const { return this->shape; }
 
   void SetShape(std::vector<int64_t> shape) { this->shape = shape; }
 
-  unsigned int GetIteration() { return iteration; }
+  unsigned int GetIteration() const { return this->iteration; }
 
   void SetIteration(unsigned int iteration) { this->iteration = iteration; }
 
-  unsigned int GetDeviceId() { return device_id; }
+  unsigned int GetDeviceId() const { return this->device_id; }
 
   void SetDeviceId(unsigned int device_id) { this->device_id = device_id; }
 
-  unsigned int GetRootGraphId() { return root_graph_id; }
+  unsigned int GetRootGraphId() const { return this->root_graph_id; }
 
   void SetRootGraphId(unsigned int root_graph_id) { this->root_graph_id = root_graph_id; }
 
-  DbgDataType GetType() { return data_type; }
+  DbgDataType GetType() const { return this->data_type; }
 
   void SetType(unsigned int type) { ConvertMsToDbgType(type); }
 
   void SetType(std::string type_name) { ConvertStringToDbgType(type_name); }
 
-  bool GetIsOutput() { return is_output; }
+  bool GetIsOutput() const { return this->is_output; }
 
   void SetIsOutput(bool is_output) { this->is_output = is_output; }
 
diff --git a/mindspore/ccsrc/debug/trace.cc b/mindspore/ccsrc/debug/trace.cc
index 1b0e9399448..72d2de8f796 100644
--- a/mindspore/ccsrc/debug/trace.cc
+++ b/mindspore/ccsrc/debug/trace.cc
@@ -138,7 +138,7 @@ class AnalyzeFailExporter : public AnfExporter {
                    std::map<AnfNodePtr, int> *const apply_map) override;
   std::string GetNodeType(const AnfNodePtr &nd) override;
   AbstractBasePtr GetNodeAbstract(const AnfNodePtr &nd);
-  AnfNodeConfigPtr GetFordwardConfig(const AnfNodeConfigPtr &cfg);
+  AnfNodeConfigPtr GetForwardConfig(const AnfNodeConfigPtr &cfg);
   void ProcessFuncGraphCall(const CNodePtr &node, std::string *const op_comment);
   void OutputStatementComment(std::ofstream &ofs, const CNodePtr &node);
   std::unordered_map<FuncGraphPtr, TaggedNodeMap> CreateTaggedNodeMap(
@@ -157,7 +157,7 @@ std::unordered_map<FuncGraphPtr, TaggedNodeMap> AnalyzeFailExporter::CreateTagge
     MS_EXCEPTION_IF_NULL(node_config);
 
     // Record new config in set.
-    auto new_config = GetFordwardConfig(node_config);
+    auto new_config = GetForwardConfig(node_config);
     if (new_config != node_config) {
       MS_LOG(DEBUG) << "The node_config is forwarded, old config: " << node_config->ToString()
                     << ", new_config: " << new_config->ToString();
@@ -218,7 +218,7 @@ AbstractBasePtr AnalyzeFailExporter::GetNodeAbstract(const AnfNodePtr &node) {
   return nullptr;
 }
 
-AnfNodeConfigPtr AnalyzeFailExporter::GetFordwardConfig(const AnfNodeConfigPtr &cfg) {
+AnfNodeConfigPtr AnalyzeFailExporter::GetForwardConfig(const AnfNodeConfigPtr &cfg) {
   MS_EXCEPTION_IF_NULL(cfg);
   MS_EXCEPTION_IF_NULL(engine_);
   AnfNodeConfigPtr cur_cfg = cfg;
@@ -242,7 +242,7 @@ void AnalyzeFailExporter::ProcessFuncGraphCall(const CNodePtr &node, std::string
   try {
     FuncGraphPtr dummy_call_func_graph = nullptr;
     auto cfg = engine_->MakeConfig(node, current_context_, dummy_call_func_graph);
-    cfg = GetFordwardConfig(cfg);
+    cfg = GetForwardConfig(cfg);
     cnode = dyn_cast<CNode>(cfg->node());
   } catch (const std::exception &e) {
     MS_LOG(INFO) << "Exception: " << e.what();
@@ -346,9 +346,16 @@ bool AnalyzeFailExporter::ExportFuncGraph(const std::string &filename, const Tra
     MS_LOG(DEBUG) << "Node configs is empty";
     return false;
   }
-  std::ofstream ofs(filename);
+  auto real_filepath = Common::GetRealPath(filename);
+  if (!real_filepath.has_value()) {
+    MS_LOG(ERROR) << "The export ir path: " << filename << " is not illegal.";
+    return false;
+  }
+  ChangeFileMode(real_filepath.value(), S_IWUSR);
+  std::ofstream ofs(real_filepath.value());
   if (!ofs.is_open()) {
-    MS_LOG(ERROR) << "Open file '" << filename << "' failed!";
+    MS_LOG(ERROR) << "Open file '" << real_filepath.value() << "' failed!"
+                  << " Errno:" << errno << " ErrInfo:" << strerror(errno);
     return false;
   }
 
@@ -389,6 +396,7 @@ bool AnalyzeFailExporter::ExportFuncGraph(const std::string &filename, const Tra
         << " internal frames).\n";
   }
   ofs.close();
+  ChangeFileMode(real_filepath.value(), S_IRUSR);
   return true;
 }
 
diff --git a/mindspore/ccsrc/fl/server/consistent_hash_ring.cc b/mindspore/ccsrc/fl/server/consistent_hash_ring.cc
index db3a35087db..1d170e3873d 100644
--- a/mindspore/ccsrc/fl/server/consistent_hash_ring.cc
+++ b/mindspore/ccsrc/fl/server/consistent_hash_ring.cc
@@ -38,6 +38,8 @@ bool ConsistentHashRing::Erase(uint32_t rank) {
   for (auto iterator = ring_.begin(); iterator != ring_.end();) {
     if (iterator->second == rank) {
       (void)ring_.erase(iterator++);
+    } else {
+      iterator++;
     }
   }
   return true;
diff --git a/mindspore/ccsrc/fl/server/distributed_count_service.cc b/mindspore/ccsrc/fl/server/distributed_count_service.cc
index e3ceb8ae7a9..c28c76b856c 100644
--- a/mindspore/ccsrc/fl/server/distributed_count_service.cc
+++ b/mindspore/ccsrc/fl/server/distributed_count_service.cc
@@ -103,6 +103,7 @@ bool DistributedCountService::Count(const std::string &name, const std::string &
       return false;
     }
 
+    MS_ERROR_IF_NULL_W_RET_VAL(report_cnt_rsp_msg, false);
     CountResponse count_rsp;
     (void)count_rsp.ParseFromArray(report_cnt_rsp_msg->data(), SizeToInt(report_cnt_rsp_msg->size()));
     if (!count_rsp.result()) {
diff --git a/mindspore/ccsrc/fl/server/executor.cc b/mindspore/ccsrc/fl/server/executor.cc
index 460b8dba502..cf87a3513eb 100644
--- a/mindspore/ccsrc/fl/server/executor.cc
+++ b/mindspore/ccsrc/fl/server/executor.cc
@@ -231,6 +231,9 @@ bool Executor::IsWeightAggrDone(const std::vector<std::string> &param_names) {
     std::unique_lock<std::mutex> lock(mtx);
     auto &param_aggr = param_aggrs_[name];
     MS_ERROR_IF_NULL_W_RET_VAL(param_aggr, false);
+    if (!param_aggr->requires_aggr()) {
+      continue;
+    }
     if (!param_aggr->IsAggregationDone()) {
       MS_LOG(DEBUG) << "Update model for " << name << " is not done yet.";
       return false;
@@ -265,6 +268,8 @@ std::map<std::string, AddressPtr> Executor::GetModel() {
   return model;
 }
 
+const std::vector<std::string> &Executor::param_names() const { return param_names_; }
+
 bool Executor::Unmask() {
 #ifdef ENABLE_ARMOUR
   auto model = GetModel();
@@ -274,7 +279,17 @@ bool Executor::Unmask() {
 #endif
 }
 
-const std::vector<std::string> &Executor::param_names() const { return param_names_; }
+void Executor::set_unmasked(bool unmasked) { unmasked_ = unmasked; }
+
+bool Executor::unmasked() const {
+  std::string encrypt_type = ps::PSContext::instance()->encrypt_type();
+  if (encrypt_type == ps::kPWEncryptType) {
+    return unmasked_.load();
+  } else {
+    // If the algorithm of pairwise encrypt is not enabled, consider_ unmasked flag as true.
+    return true;
+  }
+}
 
 std::string Executor::GetTrainableParamName(const CNodePtr &cnode) {
   MS_EXCEPTION_IF_NULL(cnode);
diff --git a/mindspore/ccsrc/fl/server/executor.h b/mindspore/ccsrc/fl/server/executor.h
index 1ba82d9a852..bc0963cb519 100644
--- a/mindspore/ccsrc/fl/server/executor.h
+++ b/mindspore/ccsrc/fl/server/executor.h
@@ -93,10 +93,16 @@ class Executor {
   bool initialized() const;
 
   const std::vector<std::string> &param_names() const;
+
+  // The unmasking method for pairwise encrypt algorithm.
   bool Unmask();
 
+  // The setter and getter for unmasked flag to judge whether the unmasking is completed.
+  void set_unmasked(bool unmasked);
+  bool unmasked() const;
+
  private:
-  Executor() : initialized_(false), aggregation_count_(0), param_names_({}), param_aggrs_({}) {}
+  Executor() : initialized_(false), aggregation_count_(0), param_names_({}), param_aggrs_({}), unmasked_(false) {}
   ~Executor() = default;
   Executor(const Executor &) = delete;
   Executor &operator=(const Executor &) = delete;
@@ -123,9 +129,13 @@ class Executor {
   // Because ParameterAggregator is not threadsafe, we have to create mutex for each ParameterAggregator so we can
   // acquire lock before calling its method.
   std::map<std::string, std::mutex> parameter_mutex_;
+
 #ifdef ENABLE_ARMOUR
   armour::CipherUnmask cipher_unmask_;
 #endif
+
+  // The flag represents the unmasking status.
+  std::atomic<bool> unmasked_;
 };
 }  // namespace server
 }  // namespace fl
diff --git a/mindspore/ccsrc/fl/server/iteration_timer.cc b/mindspore/ccsrc/fl/server/iteration_timer.cc
index 27a98c4191a..780c2ff2f16 100644
--- a/mindspore/ccsrc/fl/server/iteration_timer.cc
+++ b/mindspore/ccsrc/fl/server/iteration_timer.cc
@@ -40,7 +40,9 @@ void IterationTimer::Start(const std::chrono::milliseconds &duration) {
 
 void IterationTimer::Stop() {
   running_ = false;
-  monitor_thread_.join();
+  if (monitor_thread_.joinable()) {
+    monitor_thread_.join();
+  }
 }
 
 void IterationTimer::SetTimeOutCallBack(const TimeOutCb &timeout_cb) {
diff --git a/mindspore/ccsrc/fl/server/kernel/dense_grad_accum_kernel.h b/mindspore/ccsrc/fl/server/kernel/dense_grad_accum_kernel.h
index 90368f5c9f8..eb3b5fd3bb8 100644
--- a/mindspore/ccsrc/fl/server/kernel/dense_grad_accum_kernel.h
+++ b/mindspore/ccsrc/fl/server/kernel/dense_grad_accum_kernel.h
@@ -60,6 +60,8 @@ class DenseGradAccumKernel : public AggregationKernel {
       MS_LOG(ERROR) << "The inputs number of DenseGradAccumKernel should be 2, but got " << inputs.size();
       return false;
     }
+    MS_ERROR_IF_NULL_W_RET_VAL(inputs[0], false);
+    MS_ERROR_IF_NULL_W_RET_VAL(inputs[1], false);
     MS_ERROR_IF_NULL_W_RET_VAL(inputs[0]->addr, false);
     MS_ERROR_IF_NULL_W_RET_VAL(inputs[1]->addr, false);
 
diff --git a/mindspore/ccsrc/fl/server/kernel/fed_avg_kernel.h b/mindspore/ccsrc/fl/server/kernel/fed_avg_kernel.h
index fa7b4abc172..b201fa83d92 100644
--- a/mindspore/ccsrc/fl/server/kernel/fed_avg_kernel.h
+++ b/mindspore/ccsrc/fl/server/kernel/fed_avg_kernel.h
@@ -97,6 +97,10 @@ class FedAvgKernel : public AggregationKernel {
         MS_LOG(ERROR) << "Federated average allreduce failed.";
         return;
       }
+      if (data_size_addr[0] == 0) {
+        MS_LOG(ERROR) << "After AllReduce, the data size is 0.";
+        return;
+      }
       LocalMetaStore::GetInstance().put_value(kCtxFedAvgTotalDataSize, data_size_addr[0]);
       for (size_t i = 0; i < weight_size / sizeof(T); i++) {
         weight_addr[i] /= data_size_addr[0];
@@ -115,6 +119,10 @@ class FedAvgKernel : public AggregationKernel {
       MS_LOG(ERROR) << "The inputs number of FedAvgKernel should be 4, but got " << inputs.size();
       return false;
     }
+    MS_ERROR_IF_NULL_W_RET_VAL(inputs[0], false);
+    MS_ERROR_IF_NULL_W_RET_VAL(inputs[1], false);
+    MS_ERROR_IF_NULL_W_RET_VAL(inputs[2], false);
+    MS_ERROR_IF_NULL_W_RET_VAL(inputs[3], false);
     MS_ERROR_IF_NULL_W_RET_VAL(inputs[0]->addr, false);
     MS_ERROR_IF_NULL_W_RET_VAL(inputs[1]->addr, false);
     MS_ERROR_IF_NULL_W_RET_VAL(inputs[2]->addr, false);
diff --git a/mindspore/ccsrc/fl/server/kernel/optimizer_kernel.h b/mindspore/ccsrc/fl/server/kernel/optimizer_kernel.h
index f744df961f9..98c41ee2f49 100644
--- a/mindspore/ccsrc/fl/server/kernel/optimizer_kernel.h
+++ b/mindspore/ccsrc/fl/server/kernel/optimizer_kernel.h
@@ -76,7 +76,7 @@ class OptimizerKernel : public CPUKernel {
     }
     size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
     for (size_t output_index = 0; output_index < output_num; ++output_index) {
-      std::vector<size_t> shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, output_index);
+      std::vector<size_t> shape = AnfAlgo::GetOutputInferShape(kernel_node, output_index);
       size_t tensor_size =
         shape.empty() ? type_size : std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies<size_t>());
       output_size_list_.emplace_back(tensor_size);
diff --git a/mindspore/ccsrc/fl/server/kernel/round/pull_weight_kernel.cc b/mindspore/ccsrc/fl/server/kernel/round/pull_weight_kernel.cc
index 9cb6799489c..07ce238d926 100644
--- a/mindspore/ccsrc/fl/server/kernel/round/pull_weight_kernel.cc
+++ b/mindspore/ccsrc/fl/server/kernel/round/pull_weight_kernel.cc
@@ -90,7 +90,7 @@ void PullWeightKernel::PullWeight(const std::shared_ptr<FBBuilder> &fbb,
   for (size_t i = 0; i < weights_names_fbs->size(); i++) {
     weight_names.push_back(weights_names_fbs->Get(i)->str());
   }
-  if (!executor_->IsWeightAggrDone(weight_names)) {
+  if (!executor_->IsWeightAggrDone(weight_names) || !executor_->unmasked()) {
     ++retry_count_;
     std::string reason = "The aggregation for the weights is not done yet.";
     BuildPullWeightRsp(fbb, schema::ResponseCode_SucNotReady, reason, current_iter, feature_maps);
diff --git a/mindspore/ccsrc/fl/server/kernel/round/push_weight_kernel.cc b/mindspore/ccsrc/fl/server/kernel/round/push_weight_kernel.cc
index f93a6cbfd99..f851b8cf702 100644
--- a/mindspore/ccsrc/fl/server/kernel/round/push_weight_kernel.cc
+++ b/mindspore/ccsrc/fl/server/kernel/round/push_weight_kernel.cc
@@ -123,7 +123,7 @@ std::map<std::string, Address> PushWeightKernel::ParseFeatureMap(const schema::R
   MS_ERROR_IF_NULL_W_RET_VAL(push_weight_req, {});
   std::map<std::string, Address> upload_feature_map;
   auto fbs_feature_map = push_weight_req->feature_map();
-  MS_ERROR_IF_NULL_W_RET_VAL(push_weight_req, upload_feature_map);
+  MS_ERROR_IF_NULL_W_RET_VAL(fbs_feature_map, upload_feature_map);
   for (size_t i = 0; i < fbs_feature_map->size(); i++) {
     std::string weight_full_name = fbs_feature_map->Get(i)->weight_fullname()->str();
     float *weight_data = const_cast<float *>(fbs_feature_map->Get(i)->data()->data());
diff --git a/mindspore/ccsrc/fl/server/kernel/round/reconstruct_secrets_kernel.cc b/mindspore/ccsrc/fl/server/kernel/round/reconstruct_secrets_kernel.cc
index da1d4dc1f08..3cc0e91695c 100644
--- a/mindspore/ccsrc/fl/server/kernel/round/reconstruct_secrets_kernel.cc
+++ b/mindspore/ccsrc/fl/server/kernel/round/reconstruct_secrets_kernel.cc
@@ -35,9 +35,11 @@ void ReconstructSecretsKernel::InitKernel(size_t required_cnt) {
     return;
   }
   auto last_cnt_handler = [&](std::shared_ptr<ps::core::MessageHandler>) {
-    MS_LOG(INFO) << "start FinishIteration";
-    FinishIteration();
-    MS_LOG(INFO) << "end FinishIteration";
+    if (ps::PSContext::instance()->resetter_round() == ps::ResetterRound::kReconstructSeccrets) {
+      MS_LOG(INFO) << "start FinishIteration";
+      FinishIteration();
+      MS_LOG(INFO) << "end FinishIteration";
+    }
     return;
   };
   auto first_cnt_handler = [&](std::shared_ptr<ps::core::MessageHandler>) { return; };
@@ -146,6 +148,7 @@ void ReconstructSecretsKernel::OnLastCountEvent(const std::shared_ptr<ps::core::
       std::this_thread::sleep_for(std::chrono::milliseconds(5));
     }
     MS_LOG(INFO) << "end unmask";
+    Executor::GetInstance().set_unmasked(true);
     std::string worker_id = std::to_string(DistributedCountService::GetInstance().local_rank());
     DistributedCountService::GetInstance().Count(name_unmask_, worker_id);
   }
@@ -157,6 +160,7 @@ bool ReconstructSecretsKernel::Reset() {
   DistributedCountService::GetInstance().ResetCounter(name_);
   DistributedCountService::GetInstance().ResetCounter(name_unmask_);
   StopTimer();
+  Executor::GetInstance().set_unmasked(false);
   cipher_reconstruct_.ClearReconstructSecrets();
   return true;
 }
diff --git a/mindspore/ccsrc/fl/server/model_store.cc b/mindspore/ccsrc/fl/server/model_store.cc
index 8cbab89a9cc..8444798a614 100644
--- a/mindspore/ccsrc/fl/server/model_store.cc
+++ b/mindspore/ccsrc/fl/server/model_store.cc
@@ -129,10 +129,6 @@ std::shared_ptr<MemoryRegister> ModelStore::AssignNewModelMemory() {
     MS_ERROR_IF_NULL_W_RET_VAL(weight_data, nullptr);
     MS_ERROR_IF_NULL_W_RET_VAL(weight.second, nullptr);
     MS_ERROR_IF_NULL_W_RET_VAL(weight.second->addr, nullptr);
-    if (weight_data == nullptr) {
-      MS_LOG(EXCEPTION) << "Assign memory for weight failed.";
-      return nullptr;
-    }
 
     auto src_data_size = weight_size;
     auto dst_data_size = weight_size;
diff --git a/mindspore/ccsrc/fl/server/parameter_aggregator.cc b/mindspore/ccsrc/fl/server/parameter_aggregator.cc
index cb93808ad24..9a5cf531821 100644
--- a/mindspore/ccsrc/fl/server/parameter_aggregator.cc
+++ b/mindspore/ccsrc/fl/server/parameter_aggregator.cc
@@ -174,8 +174,14 @@ bool ParameterAggregator::IsOptimizingDone() const { return optimizing_done_; }
 
 bool ParameterAggregator::IsPullingDone() const { return pulling_done_; }
 
+bool ParameterAggregator::requires_aggr() const { return requires_aggr_; }
+
 bool ParameterAggregator::InitAggregationKernels(const CNodePtr &cnode) {
   MS_EXCEPTION_IF_NULL(cnode);
+  if (!JudgeRequiresAggr(cnode)) {
+    MS_LOG(WARNING) << "Aggregation for weight for kernel " << AnfAlgo::GetCNodeName(cnode) << " is not required.";
+  }
+
   std::vector<std::string> aggr_kernel_names = SelectAggregationAlgorithm(cnode);
   for (const std::string &name : aggr_kernel_names) {
     auto aggr_kernel = kernel::AggregationKernelFactory::GetInstance().Create(name, cnode);
@@ -333,13 +339,36 @@ std::vector<std::string> ParameterAggregator::SelectAggregationAlgorithm(const C
   } else if (ps::PSContext::instance()->server_mode() == ps::kServerModePS) {
     (void)aggregation_algorithm.emplace_back("DenseGradAccum");
   } else {
-    MS_LOG(ERROR) << "Server doesn't support mode " << ps::PSContext::instance()->server_mode();
+    MS_LOG(EXCEPTION) << "Server doesn't support mode " << ps::PSContext::instance()->server_mode();
+    return aggregation_algorithm;
   }
 
   MS_LOG(INFO) << "Aggregation algorithm selection result: " << aggregation_algorithm;
   return aggregation_algorithm;
 }
 
+bool ParameterAggregator::JudgeRequiresAggr(const CNodePtr &cnode) {
+  MS_EXCEPTION_IF_NULL(cnode);
+  std::string cnode_name = AnfAlgo::GetCNodeName(cnode);
+  if (kNameToIdxMap.count(cnode_name) == 0 || kNameToIdxMap.at(cnode_name).count("inputs") == 0 ||
+      kNameToIdxMap.at(cnode_name).at("inputs").count("weight") == 0) {
+    MS_LOG(EXCEPTION) << "Can't find index info of weight for kernel " << cnode_name;
+    return false;
+  }
+  size_t cnode_weight_idx = kNameToIdxMap.at(cnode_name).at("inputs").at("weight");
+  auto weight_node = AnfAlgo::VisitKernelWithReturnType(AnfAlgo::GetInputNode(cnode, cnode_weight_idx), 0).first;
+  MS_EXCEPTION_IF_NULL(weight_node);
+
+  if (!weight_node->isa<Parameter>()) {
+    MS_LOG(EXCEPTION) << weight_node->fullname_with_scope() << " is not a parameter node.";
+    return false;
+  }
+  auto param_info = weight_node->cast<ParameterPtr>()->param_info();
+  MS_EXCEPTION_IF_NULL(param_info);
+  requires_aggr_ = param_info->requires_aggr();
+  return requires_aggr_;
+}
+
 template bool ParameterAggregator::AssignMemory(std::shared_ptr<kernel::OptimizerKernel> server_kernel,
                                                 const CNodePtr &cnode,
                                                 const ReuseKernelNodeInfo &reuse_kernel_node_inputs_info,
diff --git a/mindspore/ccsrc/fl/server/parameter_aggregator.h b/mindspore/ccsrc/fl/server/parameter_aggregator.h
index f7f02f7ea07..4fc3fe60f0c 100644
--- a/mindspore/ccsrc/fl/server/parameter_aggregator.h
+++ b/mindspore/ccsrc/fl/server/parameter_aggregator.h
@@ -57,7 +57,8 @@ class ParameterAggregator {
         aggregation_done_(false),
         optimizing_done_(false),
         pulling_done_(true),
-        memory_register_(nullptr) {}
+        memory_register_(nullptr),
+        requires_aggr_(true) {}
   ~ParameterAggregator() = default;
 
   // Initialize ParameterAggregator with a cnode. This cnode is normally a optimizer kernel for now.
@@ -94,6 +95,9 @@ class ParameterAggregator {
   bool IsOptimizingDone() const;
   bool IsPullingDone() const;
 
+  // Return whether this parameter requires aggragation.
+  bool requires_aggr() const;
+
  private:
   // Initializing aggregation/optimizer kenerls based on the cnode. The reason of this is described in the file
   // kernel/kernel_factory.h.
@@ -118,6 +122,9 @@ class ParameterAggregator {
   // configuration, etc.
   std::vector<std::string> SelectAggregationAlgorithm(const CNodePtr &cnode);
 
+  // Judge whether the parameter needs to be aggregated.
+  bool JudgeRequiresAggr(const CNodePtr &cnode);
+
   ServerMode server_mode_;
   size_t required_push_count_;
   size_t required_pull_count_;
@@ -135,6 +142,9 @@ class ParameterAggregator {
   // Here stores multiple pairs of server kernels to parameters of their Launch function.
   std::vector<std::pair<std::shared_ptr<kernel::AggregationKernel>, KernelParams>> aggregation_kernel_parameters_;
   std::vector<std::pair<std::shared_ptr<kernel::OptimizerKernel>, KernelParams>> optimizer_kernel_parameters_;
+
+  // Whether this parameter needs to be aggregated.
+  bool requires_aggr_;
 };
 }  // namespace server
 }  // namespace fl
diff --git a/mindspore/ccsrc/fl/server/round.cc b/mindspore/ccsrc/fl/server/round.cc
index 2805d27a880..0b578814b29 100644
--- a/mindspore/ccsrc/fl/server/round.cc
+++ b/mindspore/ccsrc/fl/server/round.cc
@@ -169,12 +169,11 @@ bool Round::check_timeout() const { return check_timeout_; }
 size_t Round::time_window() const { return time_window_; }
 
 void Round::OnFirstCountEvent(const std::shared_ptr<ps::core::MessageHandler> &message) {
-  MS_ERROR_IF_NULL_WO_RET_VAL(message);
   MS_ERROR_IF_NULL_WO_RET_VAL(kernel_);
-  MS_ERROR_IF_NULL_WO_RET_VAL(iter_timer_);
   MS_LOG(INFO) << "Round " << name_ << " first count event is triggered.";
   // The timer starts only after the first count event is triggered by DistributedCountService.
   if (check_timeout_) {
+    MS_ERROR_IF_NULL_WO_RET_VAL(iter_timer_);
     iter_timer_->Start(std::chrono::milliseconds(time_window_));
   }
 
@@ -184,12 +183,11 @@ void Round::OnFirstCountEvent(const std::shared_ptr<ps::core::MessageHandler> &m
 }
 
 void Round::OnLastCountEvent(const std::shared_ptr<ps::core::MessageHandler> &message) {
-  MS_ERROR_IF_NULL_WO_RET_VAL(message);
   MS_ERROR_IF_NULL_WO_RET_VAL(kernel_);
-  MS_ERROR_IF_NULL_WO_RET_VAL(iter_timer_);
   MS_LOG(INFO) << "Round " << name_ << " last count event is triggered.";
   // Same as the first count event, the timer must be stopped by DistributedCountService.
   if (check_timeout_) {
+    MS_ERROR_IF_NULL_WO_RET_VAL(iter_timer_);
     iter_timer_->Stop();
   }
 
diff --git a/mindspore/ccsrc/fl/server/server.h b/mindspore/ccsrc/fl/server/server.h
index 8566d4f6f2d..bd0a3c6aa68 100644
--- a/mindspore/ccsrc/fl/server/server.h
+++ b/mindspore/ccsrc/fl/server/server.h
@@ -72,7 +72,15 @@ class Server {
         scheduler_ip_(""),
         scheduler_port_(0),
         server_num_(0),
-        worker_num_(0) {}
+        worker_num_(0),
+        fl_server_port_(0),
+        cipher_initial_client_cnt_(0),
+        cipher_exchange_secrets_cnt_(0),
+        cipher_share_secrets_cnt_(0),
+        cipher_get_clientlist_cnt_(0),
+        cipher_reconstruct_secrets_up_cnt_(0),
+        cipher_reconstruct_secrets_down_cnt_(0),
+        cipher_time_window_(0) {}
   ~Server() = default;
   Server(const Server &) = delete;
   Server &operator=(const Server &) = delete;
diff --git a/mindspore/ccsrc/frontend/optimizer/ad/dfunctor.cc b/mindspore/ccsrc/frontend/optimizer/ad/dfunctor.cc
index 86d0bf78cc0..5f35bc96558 100644
--- a/mindspore/ccsrc/frontend/optimizer/ad/dfunctor.cc
+++ b/mindspore/ccsrc/frontend/optimizer/ad/dfunctor.cc
@@ -909,9 +909,9 @@ CNodePtr GetPrimalUser(const CNodePtr &j_user, const std::map<FuncGraphPtr, std:
   return primal_user;
 }
 
-static std::vector<std::pair<CNodePtr, CNodePtr>> FindPrimalJPair(const FuncGraphManagerPtr &manager,
-                                                                  const FuncGraphPtr &primal_graph) {
-  std::vector<std::pair<CNodePtr, CNodePtr>> primal_j_pair;
+static std::unordered_map<CNodePtr, std::vector<CNodePtr>> FindPrimalJPair(const FuncGraphManagerPtr &manager,
+                                                                           const FuncGraphPtr &primal_graph) {
+  std::vector<CNodePtr> j_users;
   std::map<FuncGraphPtr, std::vector<CNodePtr>> primal_map;
   const auto &node_user_map = manager->node_users();
   // Search primal graph user cnodes.
@@ -930,20 +930,22 @@ static std::vector<std::pair<CNodePtr, CNodePtr>> FindPrimalJPair(const FuncGrap
       primal_map[fg] = {cnode};
     } else if (IsPrimitive(cnode->inputs().at(0), prim::kPrimJ)) {
       // To find J user.
-      auto j_user = GetJUser(node_user_map, cnode, index);
-      (void)primal_j_pair.emplace_back(std::pair<CNodePtr, CNodePtr>(nullptr, j_user));
+      j_users.emplace_back(GetJUser(node_user_map, cnode, index));
     }
   }
 
-  for (auto &[primal_user, j_user] : primal_j_pair) {
+  std::unordered_map<CNodePtr, std::vector<CNodePtr>> primal_user_to_j_users;
+  for (const auto &j_user : j_users) {
+    MS_EXCEPTION_IF_NULL(j_user);
     auto primal = GetPrimalUser(j_user, primal_map);
-    if (primal != nullptr) {
-      MS_LOG(DEBUG) << "Primal_J pair is found, where primal is: " << primal->DebugString()
-                    << " and J user is: " << j_user->DebugString();
-      primal_user = primal;
+    if (primal == nullptr) {
+      continue;
     }
+    MS_LOG(DEBUG) << "Primal_J pair is found, where primal is: " << primal->DebugString()
+                  << " and J user is: " << j_user->DebugString();
+    primal_user_to_j_users[primal].emplace_back(j_user);
   }
-  return primal_j_pair;
+  return primal_user_to_j_users;
 }
 
 static void RemovePrimalUpdateStates(const FuncGraphManagerPtr &manager, const CNodePtr &primal_call) {
@@ -1007,26 +1009,32 @@ void DFunctor::EliminatePrimalGraph() {
   // Find primal user and paired J user cnodes.
   auto manager = primal_graph_->manager();
   MS_EXCEPTION_IF_NULL(manager);
-  auto prim_j_pair = FindPrimalJPair(manager, primal_graph_);
-  for (auto &[primal_user, j_user] : prim_j_pair) {
-    if (primal_user == nullptr || j_user == nullptr) {
-      // Skip if one of them not found.
-      return;
+  auto primal_user_to_j_users = FindPrimalJPair(manager, primal_graph_);
+  for (const auto &iter : primal_user_to_j_users) {
+    auto primal_user = iter.first;
+    auto &j_users = iter.second;
+    MS_EXCEPTION_IF_NULL(primal_user);
+    if (j_users.size() == 1) {
+      // If both inputs are same except monads, we copy primal monad args to k graph
+      // so that they can be combined in CSE (common subexpression elimination) pass.
+      // Only do this when the size of j_users is 1 in order to keep the execution order.
+      const bool has_monad = CopyMonadArguments(primal_user, j_users[0]);
+      // Remove the UpdateState nodes after primal_user if need.
+      if (has_monad) {
+        RemovePrimalUpdateStates(manager, primal_user);
+      }
+    } else {
+      MS_LOG(INFO) << "There are multiple j users with the same primal user " << primal_user->DebugString();
     }
 
     // Replace primal graph with k graph.
     auto k_vnode = NewValueNode(k_graph_);
     primal_user->set_input(0, k_vnode);
-    primal_user->set_abstract(j_user->abstract());
-
-    // If both inputs are same except monads, we copy primal monad args to k graph
-    // so that they can be combined in CSE (common subexpression elimination) pass.
-    const bool has_monad = CopyMonadArguments(primal_user, j_user);
-    // Remove the UpdateState nodes after primal_user if need.
-    if (has_monad) {
-      RemovePrimalUpdateStates(manager, primal_user);
+    if (j_users.empty()) {
+      MS_LOG(EXCEPTION) << "The J nodes for primal graph " << primal_graph_->ToString()
+                        << " should be used by at least one other node.";
     }
-
+    primal_user->set_abstract(j_users[0]->abstract());
     // Insert tuple_getitem after primal user cnode.
     auto construct_wrapper = primal_user->func_graph();
     auto tuple_getitem = NewValueNode(prim::kPrimTupleGetItem);
diff --git a/mindspore/ccsrc/frontend/optimizer/irpass.cc b/mindspore/ccsrc/frontend/optimizer/irpass.cc
index 1dcc6593bc4..478afa46d86 100644
--- a/mindspore/ccsrc/frontend/optimizer/irpass.cc
+++ b/mindspore/ccsrc/frontend/optimizer/irpass.cc
@@ -186,8 +186,19 @@ OptimizeIRPassLib::OptimizeIRPassLib() {
     MakeSubstitution(std::make_shared<SpecializeOnGraphArguments>(), "specialize_transform", IsCNodeGraph);
 
   // UpdateState eliminate
-  updatestate_eliminater_ =
-    MakeSubstitution(std::make_shared<UpdatestateEliminater>(), "updatestate_eliminater", prim::kPrimUpdateState);
+  updatestate_only_used_node_eliminater_ =
+    MakeSubstitution(std::make_shared<UpdatestateOnlyUsedNodeEliminater>(), "updatestate_only_used_node_eliminater",
+                     prim::kPrimUpdateState);
+  updatestate_pure_node_eliminater_ = MakeSubstitution(std::make_shared<UpdatestatePureNodeEliminater>(),
+                                                       "updatestate_pure_node_eliminater", prim::kPrimUpdateState);
+  updatestate_depend_eliminater_ = MakeSubstitution(std::make_shared<UpdatestateDependEliminater>(),
+                                                    "updatestate_depend_eliminater", prim::kPrimUpdateState);
+  updatestate_assign_eliminater_ = MakeSubstitution(std::make_shared<UpdatestateAssignEliminater>(),
+                                                    "updatestate_assign_eliminater", prim::kPrimUpdateState);
+  updatestate_maketuple_eliminater_ = MakeSubstitution(std::make_shared<UpdatestateMakeTupleEliminater>(),
+                                                       "updatestate_maketuple_eliminater", prim::kPrimUpdateState);
+  updatestate_loads_eliminater_ = MakeSubstitution(std::make_shared<UpdatestateLoadsEliminater>(),
+                                                   "updatestate_loads_eliminater", prim::kPrimUpdateState);
   switch_call_monad_eliminater_ = MakeSubstitution(std::make_shared<SwitchCallMonadParameterEliminater>(),
                                                    "switch_call_monad_eliminater", IsCNodeDup);
 
@@ -261,13 +272,9 @@ OptimizeIRPassLib::OptimizeIRPassLib() {
 }
 
 ResolveIRPassLib::ResolveIRPassLib() {
-  resolver_resolve_and_getattr_ =
-    MakeSubstitution(std::make_shared<ResolverResolveAndGetAttr>(), "resolver_resolve_and_getattr",
-                     {prim::kPrimGetAttr, prim::kPrimResolve});
-  resolver_resolve_ = MakeSubstitution(std::make_shared<ResolverResolve>(), "resolver_resolve", prim::kPrimResolve);
-  resolver_getattr_ = MakeSubstitution(std::make_shared<ResolverGetAttr>(), "resolver_getattr", prim::kPrimGetAttr);
-  resolver_getattr_resolve_ =
-    MakeSubstitution(std::make_shared<ResolverGetAttrResolve>(), "resolver_getattr_resolve", prim::kPrimGetAttr);
+  // In resolver_getattr_resolve_, some patterns have priority over others.
+  resolver_getattr_resolve_ = MakeSubstitution(std::make_shared<ResolverGetAttrResolve>(), "getattr_resolve",
+                                               {prim::kPrimGetAttr, prim::kPrimResolve}, opt::CHECK_RENORM, true);
 }
 
 InferenceOptPrepareLib::InferenceOptPrepareLib() {
diff --git a/mindspore/ccsrc/frontend/optimizer/irpass.h b/mindspore/ccsrc/frontend/optimizer/irpass.h
index 5d0d2d36e89..6db60d397b2 100644
--- a/mindspore/ccsrc/frontend/optimizer/irpass.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass.h
@@ -108,7 +108,12 @@ class OptimizeIRPassLib {
   SubstitutionPtr specialize_transform_;
 
   // Auto-monad related eliminaters.
-  SubstitutionPtr updatestate_eliminater_;
+  SubstitutionPtr updatestate_only_used_node_eliminater_;
+  SubstitutionPtr updatestate_pure_node_eliminater_;
+  SubstitutionPtr updatestate_depend_eliminater_;
+  SubstitutionPtr updatestate_assign_eliminater_;
+  SubstitutionPtr updatestate_maketuple_eliminater_;
+  SubstitutionPtr updatestate_loads_eliminater_;
   SubstitutionPtr switch_call_monad_eliminater_;
   SubstitutionPtr stopgrad_eliminater_;
   SubstitutionPtr load_eliminater_;
@@ -166,10 +171,6 @@ class ResolveIRPassLib {
  public:
   ResolveIRPassLib();
   ~ResolveIRPassLib() = default;
-
-  SubstitutionPtr resolver_resolve_and_getattr_;
-  SubstitutionPtr resolver_resolve_;
-  SubstitutionPtr resolver_getattr_;
   SubstitutionPtr resolver_getattr_resolve_;
 };
 
diff --git a/mindspore/ccsrc/frontend/optimizer/irpass/symbol_resolver.h b/mindspore/ccsrc/frontend/optimizer/irpass/symbol_resolver.h
index 68545b213b3..53aa13c93b0 100644
--- a/mindspore/ccsrc/frontend/optimizer/irpass/symbol_resolver.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/symbol_resolver.h
@@ -34,117 +34,17 @@
 namespace mindspore {
 namespace opt {
 namespace irpass {
-const char PARSE_SUPER_NAME[] = "namespace";
-
-// {prim::kPrimResolve, Ns, Sym}
-class ResolverResolve : public AnfVisitor {
- public:
-  AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override {
-    Reset();
-    AnfVisitor::Match(prim::kPrimResolve, {IsVNode, IsVNode})(node);
-    if (sym_ != nullptr) {
-      return parse::ResolveSymbol(optimizer->manager(), ns_, sym_, node);
-    }
-    return nullptr;
-  }
-
-  void Visit(const ValueNodePtr &vnode) override {
-    if (IsValueNode<parse::NameSpace>(vnode)) {
-      ns_ = GetValueNode<parse::NameSpacePtr>(vnode);
-    } else if (ns_ != nullptr && IsValueNode<parse::Symbol>(vnode)) {
-      sym_ = GetValueNode<parse::SymbolPtr>(vnode);
-    }
-  }
-
-  void Reset() {
-    ns_ = nullptr;
-    sym_ = nullptr;
-  }
-
- private:
-  parse::NameSpacePtr ns_{nullptr};
-  parse::SymbolPtr sym_{nullptr};
-};
-
-// {prim::kPrimGetAttr, Ns, Str}
-class ResolverGetAttr : public AnfVisitor {
- public:
-  AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override {
-    Reset();
-    AnfVisitor::Match(prim::kPrimGetAttr, {IsVNode, IsVNode})(node);
-    if (sym_ != nullptr) {
-      return parse::ResolveSymbol(optimizer->manager(), ns_, sym_, node);
-    }
-    return nullptr;
-  }
-
-  void Visit(const AnfNodePtr &node) override {
-    if (IsValueNode<parse::NameSpace>(node)) {
-      ns_ = GetValueNode<parse::NameSpacePtr>(node);
-    } else if (ns_ != nullptr && IsValueNode<StringImm>(node)) {
-      auto str = GetValue<std::string>(GetValueNode(node));
-      sym_ = std::make_shared<parse::Symbol>(str);
-    }
-  }
-
-  void Reset() {
-    ns_ = nullptr;
-    sym_ = nullptr;
-  }
-
- private:
-  parse::NameSpacePtr ns_{nullptr};
-  parse::SymbolPtr sym_{nullptr};
-};
-
-// {prim::kPrimGetAttr, {prim::kPrimResolve, ns_node, sym_node}, attr_node}
+// Put GetAttr pattern and Resolve pattern together to ensure that GetAttr pattern always takes precedence over Resolve
+// pattern. After matching GetAttr pattern, there may be new nodes that can match GetAttr pattern and Resolve pattern.
+// The same is true for matching Resolve pattern.
+//
+// {prim::kPrimGetAttr, {prim::kPrimResolve, namespace, symbol}, attr}
+// {prim::kPrimGetAttr, namespace, attr}
+// {prim::kPrimGetAttr, bool, attr}
+// {prim::kPrimResolve, namespace, symbol}
 class ResolverGetAttrResolve : public OptimizerCaller {
  public:
-  AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override {
-    PatternNode<AnfNodePtr> ns_node, sym_node, attr_node;
-    auto ResolveAttrLambda = [&node, &ns_node, &sym_node, &attr_node, &optimizer]() -> AnfNodePtr {
-      auto node_to_getattr = node->cast<CNodePtr>()->input(1);
-      std::string attr_as_string = GetValueNode<StringImmPtr>(attr_node.GetNode(node))->value();
-
-      auto ns_ = GetValueNode<parse::NameSpacePtr>(ns_node.GetNode(node));
-      auto sym_ = GetValueNode<parse::SymbolPtr>(sym_node.GetNode(node));
-      if (ns_->module() == parse::RESOLVE_NAMESPACE_NAME_CLASS_MEMBER && sym_->symbol() != PARSE_SUPER_NAME) {
-        // deal with the case of getting attr from a class member
-        // and avoid the case of getting attr from self (the result of ParseSuper)
-        auto result = parse::ResolveCellwithAttr(optimizer->manager(), ns_, sym_, node_to_getattr, attr_as_string);
-        return result;
-      }
-      return nullptr;
-    };
-    MATCH_REPLACE_LAMBDA_IF(
-      node, PPrimitive(prim::kPrimGetAttr, PPrimitive(prim::kPrimResolve, ns_node, sym_node), attr_node),
-      ResolveAttrLambda, attr_node.CheckFunc(IsValueNode<StringImm>, node));
-
-    return nullptr;
-  }
-};
-
-class ResolverResolveAndGetAttr : public OptimizerCaller {
- public:
-  ResolverResolveAndGetAttr() {
-    resolver_optimizers_ = {std::make_shared<ResolverGetAttrResolve>(), std::make_shared<ResolverResolve>(),
-                            std::make_shared<ResolverGetAttr>()};
-  }
-  virtual ~ResolverResolveAndGetAttr() = default;
-
-  AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override {
-    AnfNodePtr new_node;
-    for (const auto &resolver_opt : resolver_optimizers_) {
-      new_node = (*resolver_opt)(optimizer, node);
-      if (new_node != nullptr) {
-        return new_node;
-      }
-    }
-    return nullptr;
-  }
-
- private:
-  std::vector<OptimizerCallerPtr> resolver_optimizers_{};
+  AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override;
 };
 }  // namespace irpass
 }  // namespace opt
diff --git a/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.cc b/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.cc
index a4d9137bc09..02eede35af8 100644
--- a/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.cc
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.cc
@@ -22,6 +22,10 @@
 #include <vector>
 
 #include "frontend/operator/ops.h"
+#include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/optimizer_caller.h"
+#include "frontend/optimizer/anf_visitor.h"
+#include "ir/pattern_matcher.h"
 
 namespace mindspore::opt::irpass {
 namespace {
@@ -81,35 +85,7 @@ bool OnlyUsedByTwoNode(const AnfNodePtr &be_used_node, const AnfNodePtr &first_n
          (first_user == second_node && second_user == first_node);
 }
 
-// Eliminate useless node that only used by associated update_state.
-// Convert:
-//   x1 = node(x, u)
-//   u1 = update_state(u, x1) # update_state is the only user of node
-//   user(u1)
-// To:
-//   user(u)
-AnfNodePtr EliminateUpdateStateOnlyUsedNode(const CNodePtr &update_state, const AnfNodePtr &node) {
-  if (!OnlyUsedByOneNode(node, update_state)) {
-    // Skip if UpdateState is not the only user of cnode.
-    return nullptr;
-  }
-  // Replace UpdateState with the input monad.
-  return update_state->input(kInputIndex);
-}
-
-// Eliminate UpdateState that attaches a pure (no-side-effect) node.
-// Convert:
-//   x = pure_node(args) # no side effect
-//   u1 = update_state(u, x)
-//   user(u1)
-// To:
-//   x = pure_node(args)
-//   user(u)
 AnfNodePtr EliminateUpdateStateForPureNode(const CNodePtr &update_state, const AnfNodePtr &attach) {
-  if (IsPrimitiveCNode(attach, prim::kPrimTupleGetItem)) {
-    // Skip tuple_getitem.
-    return nullptr;
-  }
   auto cnode = dyn_cast<CNode>(attach);
   if (cnode == nullptr) {
     // Skip value node or parameter.
@@ -122,26 +98,11 @@ AnfNodePtr EliminateUpdateStateForPureNode(const CNodePtr &update_state, const A
       return nullptr;
     }
   }
-  // Skip Call/Switch/SwitchLayer.
-  auto first_input_node = cnode->input(kFirstInputIndex);
-  if (IsPrimitiveCNode(first_input_node, prim::kPrimCall) || IsPrimitiveCNode(first_input_node, prim::kPrimSwitch) ||
-      IsPrimitiveCNode(first_input_node, prim::kPrimSwitchLayer)) {
-    return nullptr;
-  }
 
   // Remove UpdateState by replace it with its input monad.
   return update_state->input(kInputIndex);
 }
 
-// Eliminate redundant UpdateState/Depend pair nodes caused by inline.
-// Convert:
-//    x1 = Depend(x, u)
-//    u1 = UpdateState(u, x1)
-//    out = x_user(x1)
-//    u2 = u_user(u1)
-// To:
-//    out = x_user(x)
-//    u2 = u_user(u)
 AnfNodePtr EliminateUpdateStateWithDepend(const CNodePtr &update_state, const CNodePtr &depend) {
   auto input_monad = depend->inputs().back();
   if (!HasAbstractMonad(input_monad)) {
@@ -638,28 +599,86 @@ AnfNodePtr EliminateUpdateStateBetweenAssignMakeTuple(const CNodePtr &update_sta
   }
   return nullptr;
 }
-
 }  // namespace
 
-AnfNodePtr UpdatestateEliminater::operator()(const OptimizerPtr &, const AnfNodePtr &node) {
+// Eliminate useless node that only used by associated update_state.
+// {prim::kPrimUpdateState, u, {prim::kPrimLoad, m, u}} -> u
+// {prim::kPrimUpdateState, u, {prim::kPrimPartial, m, u}} -> u
+// Convert:
+//   x1 = node(x, u)
+//   u1 = update_state(u, x1) # update_state is the only user of x1.
+//   user(u1)
+// To:
+//   user(u)
+AnfNodePtr UpdatestateOnlyUsedNodeEliminater::operator()(const OptimizerPtr &, const AnfNodePtr &node) {
   auto update_state_node = dyn_cast<CNode>(node);
   if (update_state_node == nullptr || update_state_node->inputs().empty()) {
     MS_LOG(WARNING) << "UpdatestateEliminater encounter invalid node: " << node->DebugString();
     return nullptr;
   }
   auto &attach = update_state_node->input(kAttachIndex);
+  if (IsPrimitiveCNode(attach, prim::kPrimPartial) || IsPrimitiveCNode(attach, prim::kPrimLoad)) {
+    // Replace UpdateState with the input monad.
+    if (OnlyUsedByOneNode(attach, update_state_node)) {
+      return update_state_node->input(kInputIndex);
+    }
+  }
+  return nullptr;
+}
 
-  // Handle UpdateState(u, Depend(...)).
+// Eliminate UpdateState that attaches a pure (no-side-effect) node.
+// Convert:
+//   x = pure_node(args) # no side effect
+//   u1 = update_state(u, x)
+//   user(u1)
+// To:
+//   x = pure_node(args)
+//   user(u)
+AnfNodePtr UpdatestatePureNodeEliminater::operator()(const OptimizerPtr &, const AnfNodePtr &node) {
+  auto update_state_node = dyn_cast<CNode>(node);
+  if (update_state_node == nullptr || update_state_node->inputs().empty()) {
+    MS_LOG(WARNING) << "UpdatestateEliminater encounter invalid node: " << node->DebugString();
+    return nullptr;
+  }
+  auto &attach = update_state_node->input(kAttachIndex);
+  if (IsPrimitiveCNode(attach, prim::kPrimTupleGetItem) || IsPrimitiveCNode(attach, prim::kPrimDepend) ||
+      IsPrimitiveCNode(attach, prim::kPrimPartial) || IsPrimitiveCNode(attach, prim::kPrimMakeTuple)) {
+    return nullptr;
+  }
+  return EliminateUpdateStateForPureNode(update_state_node, attach);
+}
+
+// Eliminate redundant UpdateState/Depend pair nodes caused by inline.
+// Convert:
+//    x1 = Depend(x, u)
+//    u1 = UpdateState(u, x1)
+//    out = x_user(x1)
+//    u2 = u_user(u1)
+// To:
+//    out = x_user(x)
+//    u2 = u_user(u)
+AnfNodePtr UpdatestateDependEliminater::operator()(const OptimizerPtr &, const AnfNodePtr &node) {
+  auto update_state_node = dyn_cast<CNode>(node);
+  if (update_state_node == nullptr || update_state_node->inputs().empty()) {
+    MS_LOG(WARNING) << "UpdatestateEliminater encounter invalid node: " << node->DebugString();
+    return nullptr;
+  }
+  auto &attach = update_state_node->input(kAttachIndex);
   if (IsPrimitiveCNode(attach, prim::kPrimDepend)) {
     return EliminateUpdateStateWithDepend(update_state_node, attach->cast<CNodePtr>());
   }
+  return nullptr;
+}
 
-  // Handle UpdateState(u, Partial(...)).
-  if (IsPrimitiveCNode(attach, prim::kPrimPartial)) {
-    return EliminateUpdateStateOnlyUsedNode(update_state_node, attach);
+// Eliminate UpdateStates between Assign nodes.
+// Eliminate UpdateStates between Assign and MakeTuple.
+AnfNodePtr UpdatestateAssignEliminater::operator()(const OptimizerPtr &, const AnfNodePtr &node) {
+  auto update_state_node = dyn_cast<CNode>(node);
+  if (update_state_node == nullptr || update_state_node->inputs().empty()) {
+    MS_LOG(WARNING) << "UpdatestateEliminater encounter invalid node: " << node->DebugString();
+    return nullptr;
   }
-
-  // Handle UpdateState(u, Assign(...)).
+  auto &attach = update_state_node->input(kAttachIndex);
   if (IsPrimitiveCNode(attach, prim::kPrimAssign)) {
     auto new_node = EliminateUpdateStateBetweenAssigns(update_state_node, attach);
     if (new_node != nullptr) {
@@ -667,20 +686,15 @@ AnfNodePtr UpdatestateEliminater::operator()(const OptimizerPtr &, const AnfNode
     }
     return EliminateUpdateStateBetweenMakeTupleAssign(update_state_node, attach);
   }
+  return nullptr;
+}
 
-  // Handle UpdateState(u, Load(...)).
-  const bool attach_is_load = IsPrimitiveCNode(attach, prim::kPrimLoad);
-  if (attach_is_load) {
-    auto new_node = EliminateUpdateStateOnlyUsedNode(update_state_node, attach);
-    if (new_node != nullptr) {
-      return new_node;
-    }
-  }
-
-  // Handle UpdateState(u, MakeTuple(...)).
-  const bool attach_is_tuple = IsPrimitiveCNode(attach, prim::kPrimMakeTuple);
-  if (attach_is_tuple) {
-    auto make_tuple = attach->cast<CNodePtr>();
+// Eliminate UpdateStates which the second input is MakeTuple.
+AnfNodePtr UpdatestateMakeTupleEliminater::operator()(const OptimizerPtr &, const AnfNodePtr &node) {
+  PatternNode<AnfNodePtr> u, attach;
+  auto MakeTupleLambda = [&node, &u, &attach]() -> AnfNodePtr {
+    auto update_state_node = node->cast<CNodePtr>();
+    auto make_tuple = attach.GetNode(node)->cast<CNodePtr>();
     auto new_node = EliminateMakeTupleWithDeadNode(update_state_node, make_tuple);
     if (new_node != nullptr) {
       return new_node;
@@ -689,23 +703,31 @@ AnfNodePtr UpdatestateEliminater::operator()(const OptimizerPtr &, const AnfNode
     if (new_node != nullptr) {
       return new_node;
     }
-    new_node = EliminateUpdateStateBetweenAssignMakeTuple(update_state_node, make_tuple);
-    if (new_node != nullptr) {
-      return new_node;
-    }
+    return EliminateUpdateStateBetweenAssignMakeTuple(update_state_node, make_tuple);
+  };
+
+  MATCH_REPLACE_LAMBDA_IF(node, PPrimitive(prim::kPrimUpdateState, u, attach), MakeTupleLambda,
+                          IsPrimitiveCNode(attach.GetNode(node), prim::kPrimMakeTuple));
+  return nullptr;
+}
+
+// Eliminate UpdateStates for consecutive Loads.
+AnfNodePtr UpdatestateLoadsEliminater::operator()(const OptimizerPtr &, const AnfNodePtr &node) {
+  auto update_state_node = dyn_cast<CNode>(node);
+  if (update_state_node == nullptr || update_state_node->inputs().empty()) {
+    MS_LOG(WARNING) << "UpdatestateEliminater encounter invalid node: " << node->DebugString();
+    return nullptr;
   }
-  // Merge UpdateStates for Loads.
-  if (attach_is_load || attach_is_tuple) {
+  auto &attach = update_state_node->input(kAttachIndex);
+  if (IsPrimitiveCNode(attach, prim::kPrimLoad) || IsPrimitiveCNode(attach, prim::kPrimMakeTuple)) {
     std::vector<CNodePtr> update_states;
     std::vector<CNodePtr> loads;
     GetLoadsFromUpdateState(update_state_node, &update_states, &loads);
     if (update_states.size() > 1 && loads.size() > 1) {
       return EliminateUpdateStateForLoads(update_state_node, update_states, loads);
     }
-    return nullptr;
   }
-  // Eliminate UpdateStates that attaches a no-side-effect node.
-  return EliminateUpdateStateForPureNode(update_state_node, attach);
+  return nullptr;
 }
 
 // Eliminate Monad parameter for switch call.
@@ -725,7 +747,7 @@ AnfNodePtr UpdatestateEliminater::operator()(const OptimizerPtr &, const AnfNode
 //     g2 = Partial(..., u)
 //     s = switch(cond, g1, g2)
 //     res = s()
-AnfNodePtr EliminateMonadParameterForSwitchCall(const AnfNodePtr &node) {
+AnfNodePtr SwitchCallMonadParameterEliminater::operator()(const OptimizerPtr &, const AnfNodePtr &node) {
   const CNodePtr &switch_call = dyn_cast<CNode>(node);
   if (switch_call == nullptr) {
     return nullptr;
@@ -777,8 +799,4 @@ AnfNodePtr EliminateMonadParameterForSwitchCall(const AnfNodePtr &node) {
   auto new_switch_call = fg->NewCNode({new_switch_cnode});
   return new_switch_call;
 }
-
-AnfNodePtr SwitchCallMonadParameterEliminater::operator()(const OptimizerPtr &, const AnfNodePtr &node) {
-  return EliminateMonadParameterForSwitchCall(node);
-}
 }  // namespace mindspore::opt::irpass
diff --git a/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.h
index 1e61459cc72..60fe63e0d9d 100644
--- a/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.h
+++ b/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.h
@@ -21,17 +21,44 @@
 #include "frontend/optimizer/anf_visitor.h"
 
 namespace mindspore::opt::irpass {
-//
-// UpdatestateEliminater eliminates redundant UpdateState related nodes.
-//
-class UpdatestateEliminater : public AnfVisitor {
+// Eliminate useless node that only used by associated update_state.
+class UpdatestateOnlyUsedNodeEliminater : public AnfVisitor {
+ public:
+  AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override;
+};
+
+// Eliminate UpdateStates that attaches a no-side-effect node.
+class UpdatestatePureNodeEliminater : public AnfVisitor {
+ public:
+  AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override;
+};
+
+// Eliminate redundant UpdateState/Depend pair nodes caused by inline.
+class UpdatestateDependEliminater : public AnfVisitor {
+ public:
+  AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override;
+};
+
+// Eliminate UpdateStates between Assign nodes.
+// Eliminate UpdateStates between Assign and MakeTuple.
+class UpdatestateAssignEliminater : public AnfVisitor {
+ public:
+  AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override;
+};
+
+// Eliminate UpdateStates which the second input is MakeTuple.
+class UpdatestateMakeTupleEliminater : public AnfVisitor {
+ public:
+  AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override;
+};
+
+// Eliminate UpdateStates for consecutive Loads.
+class UpdatestateLoadsEliminater : public AnfVisitor {
  public:
   AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override;
 };
 
-//
 // SwitchCallMonadParameterEliminater eliminates Monad parameter in switch call.
-//
 class SwitchCallMonadParameterEliminater : public AnfVisitor {
  public:
   AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override;
diff --git a/mindspore/ccsrc/frontend/optimizer/opt.cc b/mindspore/ccsrc/frontend/optimizer/opt.cc
index 1b0bbf80415..30ec46304b3 100644
--- a/mindspore/ccsrc/frontend/optimizer/opt.cc
+++ b/mindspore/ccsrc/frontend/optimizer/opt.cc
@@ -30,13 +30,14 @@ namespace mindspore {
 /* namespace to support opt */
 namespace opt {
 SubstitutionPtr MakeSubstitution(const OptimizerCallerPtr &transform, const std::string &name, const PrimitivePtr &prim,
-                                 const RenormAction &renorm_action) {
+                                 const RenormAction &renorm_action, bool has_priority_pattern) {
   auto fn = [prim](const AnfNodePtr &node) -> bool { return IsPrimitiveCNode(node, prim); };
-  return std::make_shared<Substitution>(transform, name, fn, renorm_action);
+  return std::make_shared<Substitution>(transform, name, fn, renorm_action, has_priority_pattern);
 }
 
 SubstitutionPtr MakeSubstitution(const OptimizerCallerPtr &transform, const std::string &name,
-                                 const std::vector<PrimitivePtr> &prims, const RenormAction &renorm_action) {
+                                 const std::vector<PrimitivePtr> &prims, const RenormAction &renorm_action,
+                                 bool has_priority_pattern) {
   auto fn = [prims](const AnfNodePtr &node) -> bool {
     if (!node->isa<CNode>()) {
       return false;
@@ -59,12 +60,13 @@ SubstitutionPtr MakeSubstitution(const OptimizerCallerPtr &transform, const std:
     return false;
   };
 
-  return std::make_shared<Substitution>(transform, name, fn, renorm_action);
+  return std::make_shared<Substitution>(transform, name, fn, renorm_action, has_priority_pattern);
 }
 
 SubstitutionPtr MakeSubstitution(const OptimizerCallerPtr &transform, const std::string &name,
-                                 const PredicateFuncType &predicate, const RenormAction &renorm_action) {
-  return std::make_shared<Substitution>(transform, name, predicate, renorm_action);
+                                 const PredicateFuncType &predicate, const RenormAction &renorm_action,
+                                 bool has_priority_pattern) {
+  return std::make_shared<Substitution>(transform, name, predicate, renorm_action, has_priority_pattern);
 }
 
 AnfNodePtr Substitution::operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) {
@@ -126,16 +128,41 @@ static AnfNodePtr DoTransform(const OptimizerPtr &optimizer, const AnfNodePtr &n
   return nullptr;
 }
 
-static void UpdateTransformingList(const OptimizerPtr &optimizer, const AnfNodePtr &node, std::deque<AnfNodePtr> *todo,
-                                   bool change, size_t seen) {
+static void UpdateTransformingListForSubstitutions(const AnfNodePtr &node, std::deque<AnfNodePtr> *todo, bool change) {
   if (IsValueNode<FuncGraph>(node)) {
     (*todo).emplace_back(GetValueNode<FuncGraphPtr>(node)->output());
   }
-  if (node->isa<CNode>()) {
-    auto &inputs = node->cast<CNodePtr>()->inputs();
-    (void)std::copy(inputs.begin(), inputs.end(), std::back_inserter(*todo));
+
+  if (change) {
+    (*todo).emplace_back(node);
+  } else {
+    if (node->isa<CNode>()) {
+      auto &inputs = node->cast<CNodePtr>()->inputs();
+      (void)std::copy(inputs.begin(), inputs.end(), std::back_inserter(*todo));
+    }
+  }
+}
+
+static void UpdateTransformingListForIR(const AnfNodePtr &node, std::deque<AnfNodePtr> *todo, bool change,
+                                        const SubstitutionPtr &substitution) {
+  if (IsValueNode<FuncGraph>(node)) {
+    (*todo).emplace_back(GetValueNode<FuncGraphPtr>(node)->output());
   }
 
+  // If there is a priority pattern in substitution, don't transform the new node,
+  // otherwise some nodes may match the wrong patterns.
+  if (change && substitution != nullptr && !substitution->has_priority_pattern_) {
+    (*todo).emplace_back(node);
+  } else {
+    if (node->isa<CNode>()) {
+      auto &inputs = node->cast<CNodePtr>()->inputs();
+      (void)std::copy(inputs.begin(), inputs.end(), std::back_inserter(*todo));
+    }
+  }
+}
+
+static void UpdateTransformingListWithUserNodes(const OptimizerPtr &optimizer, const AnfNodePtr &node,
+                                                std::deque<AnfNodePtr> *todo, bool change, size_t seen) {
   if (!change) {
     return;
   }
@@ -185,11 +212,11 @@ bool SubstitutionList::ApplyIRToSubstitutions(const OptimizerPtr &optimizer, con
         change = true;
         changes = true;
         node = res;
-        todo.emplace_back(res);
         break;
       }
     }
-    UpdateTransformingList(optimizer, node, &todo, change, seen);
+    UpdateTransformingListForSubstitutions(node, &todo, change);
+    UpdateTransformingListWithUserNodes(optimizer, node, &todo, change, seen);
   }
 #ifdef ENABLE_PROFILE
   MsProfile::StatTime("opt.transforms." + optimizer->name(), GetTime() - start);
@@ -197,7 +224,7 @@ bool SubstitutionList::ApplyIRToSubstitutions(const OptimizerPtr &optimizer, con
   return changes;
 }
 
-bool SubstitutionList::ApplySubstitutionToIR(const OptimizerPtr &optimizer, const AnfNodePtr &root_node,
+bool SubstitutionList::ApplySubstitutionToIR(const OptimizerPtr &optimizer, const FuncGraphPtr &func_graph,
                                              const SubstitutionPtr &substitution) const {
 #ifdef ENABLE_PROFILE
   double start = GetTime();
@@ -205,7 +232,7 @@ bool SubstitutionList::ApplySubstitutionToIR(const OptimizerPtr &optimizer, cons
   FuncGraphManagerPtr manager = optimizer->manager();
   auto seen = NewSeenGeneration();
   std::deque<AnfNodePtr> todo;
-  todo.emplace_back(root_node);
+  todo.emplace_back(func_graph->output());
   bool changes = false;
 
   auto &all_nodes = manager->all_nodes();
@@ -225,7 +252,8 @@ bool SubstitutionList::ApplySubstitutionToIR(const OptimizerPtr &optimizer, cons
       changes = true;
       node = res;
     }
-    UpdateTransformingList(optimizer, node, &todo, change, seen);
+    UpdateTransformingListForIR(node, &todo, change, substitution);
+    UpdateTransformingListWithUserNodes(optimizer, node, &todo, change, seen);
   }
 
 #ifdef ENABLE_PROFILE
@@ -268,7 +296,7 @@ bool SubstitutionList::ApplySubstitutionsToIR(const OptimizerPtr &optimizer, con
     loop = false;
     for (size_t i = 0; i < list_.size(); i++) {
       const auto &substitution = list_[i];
-      bool change = ApplySubstitutionToIR(optimizer, func_graph->output(), substitution);
+      bool change = ApplySubstitutionToIR(optimizer, func_graph, substitution);
       changes = changes || change;
       loop = loop || change;
 
diff --git a/mindspore/ccsrc/frontend/optimizer/opt.h b/mindspore/ccsrc/frontend/optimizer/opt.h
index 74711b4583a..3370f1cebcd 100644
--- a/mindspore/ccsrc/frontend/optimizer/opt.h
+++ b/mindspore/ccsrc/frontend/optimizer/opt.h
@@ -17,6 +17,7 @@
 #ifndef MINDSPORE_CCSRC_FRONTEND_OPTIMIZER_OPT_H_
 #define MINDSPORE_CCSRC_FRONTEND_OPTIMIZER_OPT_H_
 
+#include <deque>
 #include <memory>
 #include <string>
 #include <vector>
@@ -41,11 +42,18 @@ class Substitution {
   OptimizerCallerPtr transform_;
   std::string name_;
   PredicateFuncType predicate_{nullptr};
-  // an enum to mark this Substitution relation to renormalize pass
+  // An enum to mark this Substitution relation to renormalize pass
   RenormAction renorm_action_;
+  // Determine whether it is a priority substitution, that is, some patterns need to be matched prior to others.
+  bool has_priority_pattern_{false};
+
   Substitution(const OptimizerCallerPtr &transform, const std::string &name, const PredicateFuncType &predicate,
-               const RenormAction &renorm_action)
-      : transform_(transform), name_(name), predicate_(predicate), renorm_action_(renorm_action) {}
+               const RenormAction &renorm_action, bool has_priority_pattern)
+      : transform_(transform),
+        name_(name),
+        predicate_(predicate),
+        renorm_action_(renorm_action),
+        has_priority_pattern_(has_priority_pattern) {}
   ~Substitution() = default;
   AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node);
 };
@@ -53,12 +61,13 @@ class Substitution {
 using SubstitutionPtr = std::shared_ptr<Substitution>;
 
 SubstitutionPtr MakeSubstitution(const OptimizerCallerPtr &transform, const std::string &name, const PrimitivePtr &prim,
-                                 const RenormAction &action_renorm = CHECK_RENORM);
+                                 const RenormAction &action_renorm = CHECK_RENORM, bool has_priority_pattern = false);
 SubstitutionPtr MakeSubstitution(const OptimizerCallerPtr &transform, const std::string &name,
                                  const std::vector<PrimitivePtr> &prims,
-                                 const RenormAction &action_renorm = CHECK_RENORM);
+                                 const RenormAction &action_renorm = CHECK_RENORM, bool has_priority_pattern = false);
 SubstitutionPtr MakeSubstitution(const OptimizerCallerPtr &transform, const std::string &name,
-                                 const PredicateFuncType &predicate, const RenormAction &action_renorm = CHECK_RENORM);
+                                 const PredicateFuncType &predicate, const RenormAction &action_renorm = CHECK_RENORM,
+                                 bool has_priority_pattern = false);
 
 enum OptTraverseSubstitutionsMode { kOptTraverseFromIRToSubstitutions = 0, kOptTraverseFromSubstitutionsToIR };
 
@@ -73,15 +82,16 @@ class SubstitutionList {
 
  private:
   bool ApplyIRToSubstitutions(const OptimizerPtr &optimizer, const FuncGraphPtr &func_graph) const;
-  bool ApplySubstitutionToIR(const OptimizerPtr &optimizer, const AnfNodePtr &node, const SubstitutionPtr &sub) const;
+  bool ApplySubstitutionToIR(const OptimizerPtr &optimizer, const FuncGraphPtr &func_graph,
+                             const SubstitutionPtr &sub) const;
   bool ApplySubstitutionsToIR(const OptimizerPtr &optimizer, const FuncGraphPtr &func_graph) const;
   void DisplayStatusOfSubstitution(const std::unordered_map<std::string, std::vector<bool>> &status,
                                    const OptimizerPtr &optimizer, size_t space) const;
 
   std::vector<SubstitutionPtr> list_;
   // a flag to mark this list of Substitution can only be executed only once
-  bool is_once_;
-  bool global_sensitive_;
+  bool is_once_{false};
+  bool global_sensitive_{false};
 };
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.cc b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
index f57913c4b9f..81d1168667a 100644
--- a/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.cc
@@ -29,6 +29,7 @@
 
 namespace mindspore {
 namespace parallel {
+
 void GenerateStrategy(const std::shared_ptr<Graph> &graph, const std::vector<std::shared_ptr<OperatorInfo>> &ops,
                       const std::shared_ptr<std::vector<std::vector<size_t>>> &eli_list,
                       const std::vector<std::vector<std::string>> &input_tensor_names,
@@ -37,6 +38,7 @@ void GenerateStrategy(const std::shared_ptr<Graph> &graph, const std::vector<std
   MS_EXCEPTION_IF_NULL(eli_list);
   MS_EXCEPTION_IF_NULL(index_list);
   GeneratePartitionedOperatorStrategy(graph, ops, index_list);
+
   std::shared_ptr<std::vector<size_t>> no_stra_op_list(new std::vector<size_t>);
   for (size_t i = 0; i < eli_list->size(); i++) {
     no_stra_op_list->push_back(eli_list->at(i)[0]);
@@ -488,6 +490,44 @@ Strategys MakeDataParallelStrategy(const std::shared_ptr<Graph> &graph,
   return strategies;
 }
 
+Strategys MakeFullBatchStrategy(const std::shared_ptr<Graph> &graph,
+                                const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_graph,
+                                const size_t iter_ops) {
+  if (ops.empty()) {
+    MS_LOG(EXCEPTION) << "Failure: Operators is empty.";
+  }
+  if (iter_ops >= ops.size()) {
+    MS_LOG(EXCEPTION) << "Failure: Operators' elements out of range.";
+  }
+
+  StrategyPtr origin_strategy = ops[iter_ops]->strategy();
+  Strategys strategies;
+  for (size_t iter_op_inputs = 0; iter_op_inputs < ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) {
+    if (iter_op_inputs >= origin_strategy->GetInputDim().size()) {
+      MS_LOG(EXCEPTION) << "Failure: Strategy's InputDim out of range.";
+    }
+    Dimensions s;
+    size_t input_size = origin_strategy->GetInputDim()[iter_op_inputs].size();
+    for (size_t dim = 0; dim < input_size; dim++) {
+      if (input_size >= 1 && input_size <= 4) {
+        s.push_back(1);
+      } else if (input_size == 0) {
+        s = {};
+      } else {
+        MS_LOG(EXCEPTION) << ops[iter_ops]->name() << ": Tensor shape " << input_size << " is unexpected.";
+      }
+    }
+    strategies.push_back(s);
+  }
+  // Update the output strategy of Rec Graph
+  graph->nodes[iter_graph].tensor_parm.tensor_str.str_n = 1.0;
+  graph->nodes[iter_graph].tensor_parm.tensor_str.str_c = 1.0;
+  graph->nodes[iter_graph].tensor_parm.tensor_str.str_h = 1.0;
+  graph->nodes[iter_graph].tensor_parm.tensor_str.str_w = 1.0;
+
+  return strategies;
+}
+
 void SetBackToRawStrategy(const std::shared_ptr<OperatorInfo> &op) {
   StrategyPtr origin_strategy = op->strategy();
   Strategys strategies;
@@ -528,9 +568,14 @@ Strategys PrepareStrategy(const std::shared_ptr<Graph> &graph, const std::vector
     return PrepareOneHot(graph, ops, iter_graph, iter_ops);
   } else if ((type == SOFTMAX) || (type == LAYER_NORM)) {
     return PrepareAxisRelatedStrategy(graph, ops, iter_graph, iter_ops);
-  } else if ((type == SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS) || (type == "_VirtualDataset") || (type == "Dropout") ||
-             (type == BATCH_MATMUL)) {
+  } else if ((type == SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS) || (type == "Dropout") || (type == BATCH_MATMUL)) {
     return MakeDataParallelStrategy(graph, ops, iter_graph, iter_ops);
+  } else if (type == "_VirtualDataset") {
+    if (ParallelContext::GetInstance()->full_batch()) {
+      return MakeFullBatchStrategy(graph, ops, iter_graph, iter_ops);
+    } else {
+      return MakeDataParallelStrategy(graph, ops, iter_graph, iter_ops);
+    }
   } else {
     return MakeRecSearchStrategy(graph, ops, iter_graph, iter_ops);
   }
diff --git a/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.h
index cee86413c2c..cc7c86a2285 100644
--- a/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.h
+++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.h
@@ -55,6 +55,9 @@ Strategys CheckDivisible(const std::vector<std::shared_ptr<OperatorInfo>> &ops,
 Strategys MakeDataParallelStrategy(const std::shared_ptr<Graph> &graph,
                                    const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_graph,
                                    const size_t iter_ops);
+Strategys MakeFullBatchStrategy(const std::shared_ptr<Graph> &graph,
+                                const std::vector<std::shared_ptr<OperatorInfo>> &ops, const size_t iter_graph,
+                                const size_t iter_ops);
 void SetBackToRawStrategy(const std::shared_ptr<OperatorInfo> &op);
 Strategys PrepareStrategy(const std::shared_ptr<Graph> &graph, const std::vector<std::shared_ptr<OperatorInfo>> &ops,
                           const size_t iter_graph, const size_t iter_ops);
diff --git a/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.cc b/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.cc
index 113227e56e3..2658c3042a2 100644
--- a/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.cc
+++ b/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.cc
@@ -100,7 +100,7 @@ AnfNodePtr CreatInt64Imm(int64_t value) {
   return ValuePtrToAnfNodePtr(value_ptr);
 }
 
-AnfNodePtr CreatTuple(const std::vector<int64_t> &tuple) {
+AnfNodePtr CreateTuple(const std::vector<int64_t> &tuple) {
   std::vector<ValuePtr> value_list;
   std::transform(tuple.begin(), tuple.end(), std::back_inserter(value_list),
                  [](const int64_t value) { return MakeValue(value); });
diff --git a/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.h b/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.h
index 55801c0af5f..12c0c6bc157 100644
--- a/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.h
+++ b/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.h
@@ -41,7 +41,7 @@ AnfNodePtr CreatTypeInt(int64_t value);
 AnfNodePtr CreatInt64Imm(int64_t value);
 AnfNodePtr CreateInt32Tensor(int64_t value);
 AnfNodePtr ValuePtrToAnfNodePtr(const ValuePtr &value_ptr);
-AnfNodePtr CreatTuple(const std::vector<int64_t> &tuple);
+AnfNodePtr CreateTuple(const std::vector<int64_t> &tuple);
 std::string HashInstanceName(const std::string &name);
 
 class GenerateGraph {
diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.cc
index 8fc52daed14..39d998aa2aa 100644
--- a/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.cc
@@ -148,6 +148,9 @@ Status Conv2DInfo::CheckHWStrategy(int64_t h_strategy, int64_t w_strategy) {
     return FAILED;
   }
 
+  int64_t h_slice_shape = inputs_shape_[0][2] / h_strategy;
+  int64_t w_slice_shape = inputs_shape_[0][3] / w_strategy;
+
   if (pad_mode_ == 0) {  // 'pad' mode
     MS_LOG(ERROR) << name_ << ": The 'pad' mode do not support to split H or W";
     return FAILED;
@@ -160,8 +163,6 @@ Status Conv2DInfo::CheckHWStrategy(int64_t h_strategy, int64_t w_strategy) {
     }
 
     if (kernel_size_[0] <= stride_[2] || kernel_size_[1] <= stride_[3]) {
-      int64_t h_slice_shape = inputs_shape_[0][2] / h_strategy;
-      int64_t w_slice_shape = inputs_shape_[0][3] / w_strategy;
       if (h_slice_shape % stride_[2] != 0 || w_slice_shape % stride_[3] != 0) {
         MS_LOG(ERROR) << name_
                       << ": The 'same' mode do not support to split H or W when kernel_size <= stride but slice shape "
@@ -177,24 +178,18 @@ Status Conv2DInfo::CheckHWStrategy(int64_t h_strategy, int64_t w_strategy) {
       return FAILED;
     }
 
-    if (kernel_size_[0] <= stride_[2]) {
-      int64_t h_slice_shape = inputs_shape_[0][2] / h_strategy;
-      if (h_slice_shape % stride_[2] != 0) {
-        MS_LOG(ERROR) << name_
-                      << ": The 'valid' mode do not support to split H when kernel_size <= stride but slice shape is "
-                         "not divisible by stride ";
-        return FAILED;
-      }
+    if (kernel_size_[0] <= stride_[2] && h_slice_shape % stride_[2] != 0) {
+      MS_LOG(ERROR) << name_
+                    << ": The 'valid' mode do not support to split H when kernel_size <= stride but slice shape is "
+                       "not divisible by stride ";
+      return FAILED;
     }
 
-    if (kernel_size_[1] <= stride_[3]) {
-      int64_t w_slice_shape = inputs_shape_[0][3] / w_strategy;
-      if (w_slice_shape % stride_[3] != 0) {
-        MS_LOG(ERROR) << name_
-                      << ": The 'valid' mode do not support to split W when kernel_size <= stride but slice shape is "
-                         "not divisible by stride ";
-        return FAILED;
-      }
+    if (kernel_size_[1] <= stride_[3] && w_slice_shape % stride_[3] != 0) {
+      MS_LOG(ERROR) << name_
+                    << ": The 'valid' mode do not support to split W when kernel_size <= stride but slice shape is "
+                       "not divisible by stride ";
+      return FAILED;
     }
   }
 
@@ -234,6 +229,7 @@ Status Conv2DInfo::CheckStrategyBase(const StrategyPtr &strategy) {
     new_out_channel_ = out_channel_ / weight_strategy[0];
   } else {
     out_channel_shard_ = false;
+    new_out_channel_ = out_channel_;
   }
 
   return SUCCESS;
@@ -527,7 +523,19 @@ void Conv2DInfo::InferOverlapShapes() {
     right_recv_shape[3] = overlap_right_size_;
     recv_shapes_.push_back(right_recv_shape);
   }
-  MS_LOG(INFO) << name_ << ": the recv shapes is " << recv_shapes_;
+
+  if (left_need_send_) {
+    Shape left_send_shape = input_slice_shape_;
+    left_send_shape[3] = left_rank_overlap_right_size_;
+    send_shapes_.push_back(left_send_shape);
+  }
+
+  if (right_need_send_) {
+    Shape right_send_shape = input_slice_shape_;
+    right_send_shape[3] = right_rank_overlap_left_size_;
+    send_shapes_.push_back(right_send_shape);
+  }
+  MS_LOG(INFO) << name_ << ": the recv shapes is " << recv_shapes_ << ", the send shapes is " << send_shapes_;
 }
 
 void Conv2DInfo::InferStridedSliceAttrs() {
@@ -536,9 +544,6 @@ void Conv2DInfo::InferStridedSliceAttrs() {
     left_strided_slice_end_ = input_slice_shape_;
     left_strided_slice_end_[3] = left_rank_overlap_right_size_;
     left_strided_slice_strides_ = {1, 1, 1, 1};
-    Shape left_send_shape = input_slice_shape_;
-    left_send_shape[3] = left_rank_overlap_right_size_;
-    send_shapes_.push_back(left_send_shape);
     MS_LOG(INFO) << name_ << ": The left strided slice begin is " << left_strided_slice_begin_ << ", end is "
                  << left_strided_slice_end_;
   }
@@ -548,9 +553,6 @@ void Conv2DInfo::InferStridedSliceAttrs() {
     right_strided_slice_begin_[3] = input_slice_shape_[3] - right_rank_overlap_left_size_;
     right_strided_slice_end_ = input_slice_shape_;
     right_strided_slice_strides_ = {1, 1, 1, 1};
-    Shape right_send_shape = input_slice_shape_;
-    right_send_shape[3] = right_rank_overlap_left_size_;
-    send_shapes_.push_back(right_send_shape);
     MS_LOG(INFO) << name_ << ": The right strided slice begin is " << right_strided_slice_begin_ << ", end is "
                  << right_strided_slice_end_;
   }
@@ -566,7 +568,7 @@ void Conv2DInfo::InferNewOperatorAttrs() {
   InferStridedSliceAttrs();
 }
 
-OperatorAttrs Conv2DInfo::CreatNeighborExchangeAttrs(const CNodePtr &cnode) {
+OperatorAttrs Conv2DInfo::CreateNeighborExchangeAttrs(const CNodePtr &cnode) {
   auto type = cnode->Type();
   MS_EXCEPTION_IF_NULL(type);
   auto tensor_type = type->cast<mindspore::TensorTypePtr>();
@@ -582,7 +584,7 @@ OperatorAttrs Conv2DInfo::CreatNeighborExchangeAttrs(const CNodePtr &cnode) {
   return attrs;
 }
 
-OperatorAttrs Conv2DInfo::CreatConv2DAttrs() {
+OperatorAttrs Conv2DInfo::CreateConv2DAttrs() {
   Attr out_channel = {OUT_CHANNEL, MakeValue(new_out_channel_)};
   Attr kernel_size = {KERNEL_SIZE, MakeValue(kernel_size_)};
   Attr mode = {MODE, MakeValue(mode_)};
@@ -592,65 +594,130 @@ OperatorAttrs Conv2DInfo::CreatConv2DAttrs() {
   Attr dilation = {DILATION, MakeValue(dilation_)};
   Attr group = {GROUP, MakeValue(group_)};
   Attr data_format = {DATA_FORMAT, MakeValue(format_)};
-  OperatorAttrs attrs = {out_channel, kernel_size, mode, pad_mode, pad, stride, dilation, group, data_format};
+
+  OperatorAttrs attrs;
+  if (name_.find(CONV2D_INFO) != std::string::npos) {
+    attrs = {out_channel, kernel_size, mode, pad_mode, pad, stride, dilation, group, data_format};
+  } else {  // Conv2DTranspose
+    attrs = {out_channel, kernel_size, pad_mode, pad, pad, mode, stride, dilation, group, data_format};
+  }
+
   return attrs;
 }
 
+std::string Conv2DInfo::ReplaceNodeName() {
+  if (name_.find(CONV2D_INFO) != std::string::npos) {
+    return CONV2D;
+  }
+
+  if (name_.find(CONV2D_BACK_PROP_INPUT_INFO) != std::string::npos) {
+    return CONV2D_BACK_PROP_INPUT;
+  }
+
+  if (name_.find(CONV2D_TRANSPOSE_INFO) != std::string::npos) {
+    return CONV2D_TRANSPOSE;
+  }
+
+  MS_LOG(EXCEPTION) << "Invalid name: " << name_;
+}
+
+AnfNodePtr Conv2DInfo::GenerateConv2DNode(const AnfNodePtr &new_input, const CNodePtr &cnode) {
+  auto conv2d_attrs = CreateConv2DAttrs();
+  auto node_name = ReplaceNodeName();
+
+  // conv2d
+  if (name_.find(CONV2D_INFO) != std::string::npos) {
+    if (cnode->size() < 3) {
+      MS_LOG(EXCEPTION) << name_ << ": The size of cnode is invalid: " << cnode->size();
+    }
+    return gen_g_.PushBack({gen_g_.NewOpInst(node_name, conv2d_attrs), new_input, cnode->input(2)});
+  }
+
+  // conv2dtranspose
+  if (cnode->size() < 4) {
+    MS_LOG(EXCEPTION) << name_ << ": The size of cnode is invalid: " << cnode->size();
+  }
+  return gen_g_.PushBack({gen_g_.NewOpInst(node_name, conv2d_attrs), new_input, cnode->input(2), cnode->input(3)});
+}
+
 Status Conv2DInfo::ComputeReplaceGraph(const CNodePtr &cnode) {
   auto graph = cnode->func_graph();
   MS_EXCEPTION_IF_NULL(graph);
-  GenerateGraph gen_g = GenerateGraph(attrs_);
-  if (gen_g.Init(cnode) != SUCCESS) {
-    MS_LOG(ERROR) << "GenerateGraph Init failed";
-    return FAILED;
+
+  if (gen_g_.Init(cnode) != SUCCESS) {
+    MS_LOG(EXCEPTION) << "GenerateGraph Init failed";
   }
+
+  if (!left_need_send_ && !right_need_send_) {
+    MS_LOG(EXCEPTION) << name_ << ": Now do not support left no need to send and right no need to send";
+  }
+
+  if (!left_need_recv_ && !right_need_recv_) {
+    MS_LOG(EXCEPTION) << name_ << ": Now do not support left no need to recv and right no need to recv";
+  }
+
   std::vector<std::pair<AnfNodePtr, int64_t>> input_nodes;
   std::vector<AnfNodePtr> make_tuple_a_inputs = {NewValueNode(prim::kPrimMakeTuple)};
   if (left_need_send_) {
-    auto slice_left_begin = CreatTuple(left_strided_slice_begin_);
-    auto slice_left_end = CreatTuple(left_strided_slice_end_);
-    auto slice_left_strided = CreatTuple(left_strided_slice_strides_);
-    auto slice_left = gen_g.PushBack(
-      {gen_g.NewOpInst(STRIDED_SLICE), cnode->input(1), slice_left_begin, slice_left_end, slice_left_strided});
+    auto slice_left_begin = CreateTuple(left_strided_slice_begin_);
+    auto slice_left_end = CreateTuple(left_strided_slice_end_);
+    auto slice_left_strided = CreateTuple(left_strided_slice_strides_);
+    auto slice_left = gen_g_.PushBack({gen_g_.NewOpInst(STRIDED_SLICE), gen_g_.virtual_input_node(), slice_left_begin,
+                                       slice_left_end, slice_left_strided});
     make_tuple_a_inputs.push_back(slice_left);
+    input_nodes.push_back(std::make_pair(slice_left, 1));
   }
   if (right_need_send_) {
-    auto slice_right_begin = CreatTuple(right_strided_slice_begin_);
-    auto slice_right_end = CreatTuple(right_strided_slice_end_);
-    auto slice_right_strided = CreatTuple(right_strided_slice_strides_);
-    auto slice_right = gen_g.PushBack(
-      {gen_g.NewOpInst(STRIDED_SLICE), cnode->input(1), slice_right_begin, slice_right_end, slice_right_strided});
+    auto slice_right_begin = CreateTuple(right_strided_slice_begin_);
+    auto slice_right_end = CreateTuple(right_strided_slice_end_);
+    auto slice_right_strided = CreateTuple(right_strided_slice_strides_);
+    auto slice_right = gen_g_.PushBack({gen_g_.NewOpInst(STRIDED_SLICE), gen_g_.virtual_input_node(), slice_right_begin,
+                                        slice_right_end, slice_right_strided});
     make_tuple_a_inputs.push_back(slice_right);
+    input_nodes.push_back(std::make_pair(slice_right, 1));
   }
+
   auto make_tuple_a = graph->NewCNode(make_tuple_a_inputs);
-  auto alltoall_attrs = CreatNeighborExchangeAttrs(cnode);
-  auto alltoall_v = gen_g.PushBack({gen_g.NewOpInst(NEIGHBOREXCHANGE, alltoall_attrs), make_tuple_a});
-  std::vector<AnfNodePtr> make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple)};
+  auto alltoall_attrs = CreateNeighborExchangeAttrs(cnode);
+  auto alltoall_v = gen_g_.PushBack({gen_g_.NewOpInst(NEIGHBOREXCHANGE, alltoall_attrs), make_tuple_a});
+
+  AnfNodePtr conv2d;
+  Attr concat_axis = {AXIS, MakeValue(-1)};
+  OperatorAttrs concat_attrs = {concat_axis};
+
   if (left_need_recv_) {
     std::vector<AnfNodePtr> tuple_getitem_l_inputs = {NewValueNode(prim::kPrimTupleGetItem), alltoall_v,
                                                       CreatInt64Imm(0)};
     auto tuple_getitem_l = graph->NewCNode(tuple_getitem_l_inputs);
-    std::vector<AnfNodePtr> make_tuple_l_inputs = {NewValueNode(prim::kPrimMakeTuple), cnode->input(1),
-                                                   tuple_getitem_l};
+    std::vector<AnfNodePtr> make_tuple_l_inputs = {NewValueNode(prim::kPrimMakeTuple), tuple_getitem_l,
+                                                   cnode->input(1)};
     auto make_tuple_l = graph->NewCNode(make_tuple_l_inputs);
-    auto concat_l = gen_g.PushBack({gen_g.NewOpInst(CONCAT), make_tuple_l});
-    make_tuple_inputs.push_back(concat_l);
+    auto concat_l = gen_g_.PushBack({gen_g_.NewOpInst(CONCAT, concat_attrs), make_tuple_l});
+
+    if (right_need_recv_) {
+      std::vector<AnfNodePtr> tuple_getitem_r_inputs = {NewValueNode(prim::kPrimTupleGetItem), alltoall_v,
+                                                        CreatInt64Imm(1)};
+      auto tuple_getitem_r = graph->NewCNode(tuple_getitem_r_inputs);
+      std::vector<AnfNodePtr> make_tuple_r_inputs = {NewValueNode(prim::kPrimMakeTuple), concat_l, tuple_getitem_r};
+      auto make_tuple_r = graph->NewCNode(make_tuple_r_inputs);
+      auto concat_r = gen_g_.PushBack({gen_g_.NewOpInst(CONCAT, concat_attrs), make_tuple_r});
+      conv2d = GenerateConv2DNode(concat_r, cnode);
+    } else {
+      conv2d = GenerateConv2DNode(concat_l, cnode);
+    }
+  } else {  // left no need recv, and right need recv
+    std::vector<AnfNodePtr> tuple_getitem_r_inputs_1 = {NewValueNode(prim::kPrimTupleGetItem), alltoall_v,
+                                                        CreatInt64Imm(0)};
+    auto tuple_getitem_r_1 = graph->NewCNode(tuple_getitem_r_inputs_1);
+    std::vector<AnfNodePtr> make_tuple_r_inputs_1 = {NewValueNode(prim::kPrimMakeTuple), gen_g_.virtual_input_node(),
+                                                     tuple_getitem_r_1};
+    auto make_tuple_r_1 = graph->NewCNode(make_tuple_r_inputs_1);
+    input_nodes.push_back(std::make_pair(make_tuple_r_1, 1));
+
+    auto concat_r_1 = gen_g_.PushBack({gen_g_.NewOpInst(CONCAT, concat_attrs), make_tuple_r_1});
+    conv2d = GenerateConv2DNode(concat_r_1, cnode);
   }
-  if (right_need_recv_) {
-    std::vector<AnfNodePtr> tuple_getitem_r_inputs = {NewValueNode(prim::kPrimTupleGetItem), alltoall_v,
-                                                      CreatInt64Imm(0)};
-    auto tuple_getitem_r = graph->NewCNode(tuple_getitem_r_inputs);
-    make_tuple_inputs.push_back(tuple_getitem_r);
-  } else {
-    make_tuple_inputs.push_back(cnode->input(1));
-  }
-  auto make_tuple = graph->NewCNode(make_tuple_inputs);
-  Attr concat_axis = {AXIS, MakeValue(-1)};
-  OperatorAttrs concat_attrs = {concat_axis};
-  std::vector<AnfNodePtr> concat_inputs = {gen_g.NewOpInst(CONCAT, concat_attrs), make_tuple};
-  auto concat = graph->NewCNode(concat_inputs);
-  auto conv2d_attrs = CreatConv2DAttrs();
-  auto conv2d = gen_g.PushBack({gen_g.NewOpInst(CONV2D, conv2d_attrs), concat, cnode->input(2)});
+
   replace_graph_ = std::make_shared<std::pair<std::vector<std::pair<AnfNodePtr, int64_t>>, AnfNodePtr>>(
     std::make_pair(input_nodes, conv2d));
   return SUCCESS;
diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.h
index 1ae1e4a752a..3786dc5f826 100644
--- a/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.h
@@ -23,6 +23,7 @@
 #include <vector>
 
 #include "ir/value.h"
+#include "frontend/parallel/graph_util/generate_graph.h"
 #include "frontend/parallel/auto_parallel/operator_costmodel.h"
 #include "frontend/parallel/ops_info/operator_info.h"
 #include "frontend/parallel/strategy.h"
@@ -57,9 +58,11 @@ class Conv2DInfo : public OperatorInfo {
   void InferSendRecvFlag();
   void InferOverlapShapes();
   void InferStridedSliceAttrs();
+  std::string ReplaceNodeName();
+  AnfNodePtr GenerateConv2DNode(const AnfNodePtr &new_input, const CNodePtr &cnode);
   ReplaceGraphPtr replace_graph(const CNodePtr &cnode) override;
-  OperatorAttrs CreatNeighborExchangeAttrs(const CNodePtr &cnode);
-  OperatorAttrs CreatConv2DAttrs();
+  OperatorAttrs CreateNeighborExchangeAttrs(const CNodePtr &cnode);
+  OperatorAttrs CreateConv2DAttrs();
   Status ComputeReplaceGraph(const CNodePtr &cnode);
 
   int64_t out_channel_ = 1;
@@ -106,6 +109,8 @@ class Conv2DInfo : public OperatorInfo {
   Shapes send_shapes_;
   Shapes recv_shapes_;
 
+  GenerateGraph gen_g_ = GenerateGraph(attrs_);
+
   virtual Status CheckHWStrategy(int64_t h_strategy, int64_t w_strategy);
   virtual void InferNewPadList();
   virtual int64_t ComputeOverlapLeftSizeByRankBias(int64_t rank_bias);
diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.cc
index 35cd2405c03..64a2a0b3b83 100644
--- a/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.cc
@@ -172,6 +172,22 @@ Status GatherDInfo::InferMirrorOps() {
   return SUCCESS;
 }
 
+void GatherDInfo::ReComputeBatchSplitFlagList() {
+  if (InferAttrs() != SUCCESS) {
+    MS_LOG(EXCEPTION) << name_ << ": Infer attrs failed";
+  }
+
+  if (dim_ == 0) {
+    MS_LOG(EXCEPTION)
+      << name_
+      << ": Can not generate batch data parallel strategy since the dim is 0, please set others strategy for it";
+  }
+
+  for (size_t i = 0; i < inputs_shape_.size(); ++i) {
+    split_flag_list_[i] = true;
+  }
+}
+
 Status GatherDInfo::SetCostUnderStrategy(const StrategyPtr &strategy) { return SetCostUnderStrategyBase(strategy); }
 
 std::vector<StrategyPtr> GatherDInfo::GenerateOpStrategies(int64_t stage_id) {
diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.h
index 8288fe11ae1..1d8a2fe24d2 100644
--- a/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.h
@@ -40,6 +40,7 @@ class GatherDInfo : public OperatorInfo {
   Status InitForCostModel(const StrategyPtr &strategy) override;
   std::vector<StrategyPtr> GenerateOpStrategies(int64_t) override;
   Status SetCostUnderStrategy(const StrategyPtr &) override;
+  void ReComputeBatchSplitFlagList() override;
 
  protected:
   Status GetAttrs() override;
diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h b/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h
index dd6a3237da5..c0c89beb245 100644
--- a/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h
@@ -283,6 +283,9 @@ constexpr char ARGMINWITHVALUE[] = "ArgMinWithValue";
 constexpr char CONV2D[] = "Conv2D";
 constexpr char CONV2D_BACK_PROP_INPUT[] = "Conv2DBackpropInput";
 constexpr char CONV2D_TRANSPOSE[] = "Conv2DTranspose";
+constexpr char CONV2D_INFO[] = "Conv2DInfo";
+constexpr char CONV2D_BACK_PROP_INPUT_INFO[] = "Conv2DBackpropInputInfo";
+constexpr char CONV2D_TRANSPOSE_INFO[] = "Conv2DTransposeInfo";
 constexpr char FUSE_BATCH_NORM[] = "FusedBatchNorm";
 constexpr char FUSE_BATCH_NORM_EX[] = "FusedBatchNormEx";
 constexpr char BATCH_NORM[] = "BatchNorm";
diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/virtual_output_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/virtual_output_info.cc
index ae6411f8f35..712d44e509e 100644
--- a/mindspore/ccsrc/frontend/parallel/ops_info/virtual_output_info.cc
+++ b/mindspore/ccsrc/frontend/parallel/ops_info/virtual_output_info.cc
@@ -64,8 +64,14 @@ Status VirtualOutputInfo::GenerateStrategies(int64_t stage_id) {
   }
   for (auto &shape : inputs_shape_) {
     Shape temp;
-    temp.emplace_back(SizeToLong(total_dev_num));
-    (void)temp.insert(temp.end(), shape.size() - 1, 1);
+    if (!shape.empty()) {
+      if (shape[0] % total_dev_num == 0) {
+        temp.emplace_back(SizeToLong(total_dev_num));
+      } else {
+        temp.emplace_back(1);
+      }
+      (void)temp.insert(temp.end(), shape.size() - 1, 1);
+    }
     strategy.push_back(temp);
   }
   sp = std::make_shared<Strategy>(stage_id, strategy);
diff --git a/mindspore/ccsrc/frontend/parallel/step_parallel.cc b/mindspore/ccsrc/frontend/parallel/step_parallel.cc
index 357b115a871..043f8dd9833 100644
--- a/mindspore/ccsrc/frontend/parallel/step_parallel.cc
+++ b/mindspore/ccsrc/frontend/parallel/step_parallel.cc
@@ -2038,7 +2038,12 @@ void SetVirtualDatasetStrategy(const CNodePtr &node) {
       if (shape_list[0][i].empty()) {
         MS_LOG(EXCEPTION) << "shape_list[ " << i << " ].size() is zero";
       }
-      Dimensions input_strategy = {dev_num};
+      Dimensions input_strategy;
+      if (!shape_list[0][i].empty() && shape_list[0][i][0] % dev_num == 0) {
+        input_strategy.push_back(dev_num);
+      } else if (!shape_list[0][i].empty()) {
+        input_strategy.push_back(1);
+      }
       for (size_t j = 1; j < shape_list[0][i].size(); j++) {
         input_strategy.push_back(1);
       }
@@ -3222,12 +3227,9 @@ void MarkForwardCNode(const FuncGraphPtr &root) {
   }
 }
 
-Status ParallelInit() {
-  MS_EXCEPTION_IF_NULL(ParallelContext::GetInstance());
+CommInfo GetCommInfo() {
   int64_t device_num = ParallelContext::GetInstance()->device_num();
   int64_t global_rank = ParallelContext::GetInstance()->global_rank();
-  int32_t split_stage_num = ParallelContext::GetInstance()->pipeline_stage_split_num();
-  std::string parallel_mode = ParallelContext::GetInstance()->parallel_mode();
   auto ms_context = MsContext::GetInstance();
   MS_EXCEPTION_IF_NULL(ms_context);
   std::string backend = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
@@ -3240,15 +3242,8 @@ Status ParallelInit() {
     world_group = NCCL_WORLD_GROUP;
     communication_backend = NCCL_BACKEND;
   } else {
-    MS_LOG(ERROR) << "Invalid communication backend: " << backend;
-    return FAILED;
+    MS_LOG(EXCEPTION) << "Invalid communication backend: " << backend;
   }
-
-  if (split_stage_num <= 0) {
-    MS_LOG(ERROR) << "Invalid stage num " << split_stage_num << ", expected a positive stage number";
-    return FAILED;
-  }
-
   uint32_t world_rank_size = 0;
   if (!ParallelContext::GetInstance()->device_num_is_set()) {
     if (!CommManager::GetInstance().GetRankSize(world_group, &world_rank_size)) {
@@ -3266,7 +3261,21 @@ Status ParallelInit() {
     global_rank = UintToInt(rank_id);
     MS_LOG(INFO) << "Get global rank from communication model, the global rank is  " << global_rank;
   }
+  CommInfo comm_info{device_num, global_rank, world_group, communication_backend};
+  return comm_info;
+}
 
+Status ParallelInit() {
+  MS_EXCEPTION_IF_NULL(ParallelContext::GetInstance());
+  int32_t split_stage_num = ParallelContext::GetInstance()->pipeline_stage_split_num();
+  std::string parallel_mode = ParallelContext::GetInstance()->parallel_mode();
+  if (split_stage_num <= 0) {
+    MS_LOG(ERROR) << "Invalid stage num " << split_stage_num << ", expected a positive stage number";
+    return FAILED;
+  }
+  auto comm_info = GetCommInfo();
+  int64_t device_num = comm_info.device_num;
+  int64_t global_rank = comm_info.global_rank;
   if ((device_num <= 0) || (device_num > MAX_DEVICE_NUM)) {
     MS_LOG(ERROR) << "Invalid device num " << device_num;
     return FAILED;
@@ -3293,13 +3302,14 @@ Status ParallelInit() {
     return FAILED;
   }
 
-  if (!InitDevice(device_num, global_rank, communication_backend, stages)) {
+  if (!InitDevice(device_num, global_rank, comm_info.communication_backend, stages)) {
     MS_LOG(ERROR) << "Init device failed";
     return FAILED;
   }
 
   MS_LOG(INFO) << "The parallel context: dev num: " << device_num << ", global rank: " << global_rank
-               << ", backend: " << backend << ", gradients_mean: " << ParallelContext::GetInstance()->gradients_mean()
+               << ", communication_backend: " << comm_info.communication_backend
+               << ", gradients_mean: " << ParallelContext::GetInstance()->gradients_mean()
                << ", gradient_fp32_sync: " << ParallelContext::GetInstance()->gradient_fp32_sync();
 
   return SUCCESS;
@@ -3714,7 +3724,13 @@ void ReorderForPipelineSplit(const FuncGraphPtr &root, const FuncGraphManagerPtr
 
 bool IsInsertVirtualOutput(const FuncGraphPtr &root) {
   MS_EXCEPTION_IF_NULL(ParallelContext::GetInstance());
-  return (!root->has_flag(TRAINING) && ParallelContext::GetInstance()->dataset_strategy().empty());
+  auto comm_info = GetCommInfo();
+  int32_t split_stage_num = ParallelContext::GetInstance()->pipeline_stage_split_num();
+  int32_t per_stage_device_num = comm_info.device_num / split_stage_num;
+  int32_t current_stage = comm_info.global_rank / per_stage_device_num;
+  MS_LOG(INFO) << "The current stage is: " << current_stage;
+  return (!root->has_flag(TRAINING) && ParallelContext::GetInstance()->dataset_strategy().empty() &&
+          current_stage == split_stage_num - 1);
 }
 
 bool StepParallel(const FuncGraphPtr &root, const opt::OptimizerPtr &optimizer) {
diff --git a/mindspore/ccsrc/frontend/parallel/step_parallel.h b/mindspore/ccsrc/frontend/parallel/step_parallel.h
index 71c69705080..996cc11ba33 100644
--- a/mindspore/ccsrc/frontend/parallel/step_parallel.h
+++ b/mindspore/ccsrc/frontend/parallel/step_parallel.h
@@ -47,6 +47,13 @@ struct LossNodeInfo {
   CNodePtr loss_node = nullptr;
 };
 
+struct CommInfo {
+  int64_t device_num = 1;
+  int64_t global_rank = 0;
+  std::string world_group;
+  std::string communication_backend;
+};
+
 struct ParameterSliceInfo {
   Shape slice_shape;
   RankList group_ranks;
@@ -178,6 +185,8 @@ void InsertVirtualOutput(const FuncGraphPtr &root, const std::vector<AnfNodePtr>
 
 std::string MirrorOpName();
 
+CommInfo GetCommInfo();
+
 void ReorderForPipelineSplit(const FuncGraphPtr &root, const FuncGraphManagerPtr &manager, int64_t pipeline_stages);
 }  // namespace parallel
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/CMakeLists.txt
index 6107952a89a..454d33ebb6d 100644
--- a/mindspore/ccsrc/minddata/dataset/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/CMakeLists.txt
@@ -93,6 +93,7 @@ add_dependencies(engine-gnn core)
 add_dependencies(engine core)
 add_dependencies(callback core)
 add_dependencies(audio-kernels core)
+add_dependencies(audio-ir core)
 add_dependencies(audio-ir-kernels core)
 add_dependencies(text core)
 add_dependencies(text-kernels core)
@@ -156,6 +157,7 @@ set(submodules
         $<TARGET_OBJECTS:engine-cache-client>
         $<TARGET_OBJECTS:engine>
         $<TARGET_OBJECTS:audio-kernels>
+        $<TARGET_OBJECTS:audio-ir>
         $<TARGET_OBJECTS:audio-ir-kernels>
         $<TARGET_OBJECTS:text>
         $<TARGET_OBJECTS:text-kernels>
diff --git a/mindspore/ccsrc/minddata/dataset/api/audio.cc b/mindspore/ccsrc/minddata/dataset/api/audio.cc
index 5a9a6498abd..eb4f8c20c1d 100644
--- a/mindspore/ccsrc/minddata/dataset/api/audio.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/audio.cc
@@ -16,12 +16,56 @@
 
 #include "minddata/dataset/include/dataset/audio.h"
 
+#include "minddata/dataset/audio/ir/kernels/allpass_biquad_ir.h"
+#include "minddata/dataset/audio/ir/kernels/amplitude_to_db_ir.h"
+#include "minddata/dataset/audio/ir/kernels/angle_ir.h"
 #include "minddata/dataset/audio/ir/kernels/band_biquad_ir.h"
+#include "minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.h"
+#include "minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.h"
+#include "minddata/dataset/audio/ir/kernels/bass_biquad_ir.h"
+#include "minddata/dataset/audio/ir/kernels/time_stretch_ir.h"
 
 namespace mindspore {
 namespace dataset {
 
 namespace audio {
+// AllpassBiquad Transform Operation.
+struct AllpassBiquad::Data {
+  Data(int32_t sample_rate, float central_freq, float Q)
+      : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q) {}
+  int32_t sample_rate_;
+  float central_freq_;
+  float Q_;
+};
+
+AllpassBiquad::AllpassBiquad(int32_t sample_rate, float central_freq, float Q)
+    : data_(std::make_shared<Data>(sample_rate, central_freq, Q)) {}
+
+std::shared_ptr<TensorOperation> AllpassBiquad::Parse() {
+  return std::make_shared<AllpassBiquadOperation>(data_->sample_rate_, data_->central_freq_, data_->Q_);
+}
+
+// AmplitudeToDB Operation.
+struct AmplitudeToDB::Data {
+  Data(ScaleType stype, float ref_value, float amin, float top_db)
+      : stype_(stype), ref_value_(ref_value), amin_(amin), top_db_(top_db) {}
+  ScaleType stype_;
+  float ref_value_;
+  float amin_;
+  float top_db_;
+};
+
+AmplitudeToDB::AmplitudeToDB(ScaleType stype, float ref_value, float amin, float top_db)
+    : data_(std::make_shared<Data>(stype, ref_value, amin, top_db)) {}
+
+std::shared_ptr<TensorOperation> AmplitudeToDB::Parse() {
+  return std::make_shared<AmplitudeToDBOperation>(data_->stype_, data_->ref_value_, data_->amin_, data_->top_db_);
+}
+
+// Angle Transform Operation.
+Angle::Angle() {}
+
+std::shared_ptr<TensorOperation> Angle::Parse() { return std::make_shared<AngleOperation>(); }
 // BandBiquad Transform Operation.
 struct BandBiquad::Data {
   Data(int32_t sample_rate, float central_freq, float Q, bool noise)
@@ -38,6 +82,74 @@ BandBiquad::BandBiquad(int32_t sample_rate, float central_freq, float Q, bool no
 std::shared_ptr<TensorOperation> BandBiquad::Parse() {
   return std::make_shared<BandBiquadOperation>(data_->sample_rate_, data_->central_freq_, data_->Q_, data_->noise_);
 }
+
+// BandpassBiquad Transform Operation.
+struct BandpassBiquad::Data {
+  Data(int32_t sample_rate, float central_freq, float Q, bool const_skirt_gain)
+      : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q), const_skirt_gain_(const_skirt_gain) {}
+  int32_t sample_rate_;
+  float central_freq_;
+  float Q_;
+  bool const_skirt_gain_;
+};
+
+BandpassBiquad::BandpassBiquad(int32_t sample_rate, float central_freq, float Q, bool const_skirt_gain)
+    : data_(std::make_shared<Data>(sample_rate, central_freq, Q, const_skirt_gain)) {}
+
+std::shared_ptr<TensorOperation> BandpassBiquad::Parse() {
+  return std::make_shared<BandpassBiquadOperation>(data_->sample_rate_, data_->central_freq_, data_->Q_,
+                                                   data_->const_skirt_gain_);
+}
+
+// BandrejectBiquad Transform Operation.
+struct BandrejectBiquad::Data {
+  Data(int32_t sample_rate, float central_freq, float Q)
+      : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q) {}
+  int32_t sample_rate_;
+  float central_freq_;
+  float Q_;
+};
+
+BandrejectBiquad::BandrejectBiquad(int32_t sample_rate, float central_freq, float Q)
+    : data_(std::make_shared<Data>(sample_rate, central_freq, Q)) {}
+
+std::shared_ptr<TensorOperation> BandrejectBiquad::Parse() {
+  return std::make_shared<BandrejectBiquadOperation>(data_->sample_rate_, data_->central_freq_, data_->Q_);
+}
+
+// BassBiquad Transform Operation.
+struct BassBiquad::Data {
+  Data(int32_t sample_rate, float gain, float central_freq, float Q)
+      : sample_rate_(sample_rate), gain_(gain), central_freq_(central_freq), Q_(Q) {}
+  int32_t sample_rate_;
+  float gain_;
+  float central_freq_;
+  float Q_;
+};
+
+BassBiquad::BassBiquad(int32_t sample_rate, float gain, float central_freq, float Q)
+    : data_(std::make_shared<Data>(sample_rate, gain, central_freq, Q)) {}
+
+std::shared_ptr<TensorOperation> BassBiquad::Parse() {
+  return std::make_shared<BassBiquadOperation>(data_->sample_rate_, data_->gain_, data_->central_freq_, data_->Q_);
+}
+
+// TimeStretch Operation.
+struct TimeStretch::Data {
+  explicit Data(float hop_length, int n_freq, float fixed_rate)
+      : hop_length_(hop_length), n_freq_(n_freq), fixed_rate_(fixed_rate) {}
+  float hop_length_;
+  int n_freq_;
+  float fixed_rate_;
+};
+
+TimeStretch::TimeStretch(float hop_length, int n_freq, float fixed_rate)
+    : data_(std::make_shared<Data>(hop_length, n_freq, fixed_rate)) {}
+
+std::shared_ptr<TensorOperation> TimeStretch::Parse() {
+  return std::make_shared<TimeStretchOperation>(data_->hop_length_, data_->n_freq_, data_->fixed_rate_);
+}
+
 }  // namespace audio
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/api/datasets.cc b/mindspore/ccsrc/minddata/dataset/api/datasets.cc
index bb1d65bc2ca..cf4898f9766 100644
--- a/mindspore/ccsrc/minddata/dataset/api/datasets.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/datasets.cc
@@ -85,7 +85,7 @@
 // IR leaf nodes
 #include "minddata/dataset/engine/ir/datasetops/source/album_node.h"
 #include "minddata/dataset/engine/ir/datasetops/source/mnist_node.h"
-#include "minddata/dataset/engine/ir/datasetops/source/libri_speech_node.h"
+#include "minddata/dataset/engine/ir/datasetops/source/cmu_arctic_node.h"
 
 // IR leaf nodes disabled for android
 #ifndef ENABLE_ANDROID
@@ -95,6 +95,7 @@
 #include "minddata/dataset/engine/ir/datasetops/source/clue_node.h"
 #include "minddata/dataset/engine/ir/datasetops/source/coco_node.h"
 #include "minddata/dataset/engine/ir/datasetops/source/csv_node.h"
+#include "minddata/dataset/engine/ir/datasetops/source/flickr_node.h"
 #include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h"
 #include "minddata/dataset/engine/ir/datasetops/source/random_node.h"
 #include "minddata/dataset/engine/ir/datasetops/source/text_file_node.h"
@@ -928,6 +929,32 @@ CSVDataset::CSVDataset(const std::vector<std::vector<char>> &dataset_files, char
   ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
 }
 
+FlickrDataset::FlickrDataset(const std::vector<char> &dataset_dir, const std::vector<char> &annotation_file,
+                             bool decode, const std::shared_ptr<Sampler> &sampler,
+                             const std::shared_ptr<DatasetCache> &cache) {
+  auto sampler_obj = sampler ? sampler->Parse() : nullptr;
+  auto ds =
+    std::make_shared<FlickrNode>(CharToString(dataset_dir), CharToString(annotation_file), decode, sampler_obj, cache);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+
+FlickrDataset::FlickrDataset(const std::vector<char> &dataset_dir, const std::vector<char> &annotation_file,
+                             bool decode, const Sampler *sampler, const std::shared_ptr<DatasetCache> &cache) {
+  auto sampler_obj = sampler ? sampler->Parse() : nullptr;
+  auto ds =
+    std::make_shared<FlickrNode>(CharToString(dataset_dir), CharToString(annotation_file), decode, sampler_obj, cache);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+
+FlickrDataset::FlickrDataset(const std::vector<char> &dataset_dir, const std::vector<char> &annotation_file,
+                             bool decode, const std::reference_wrapper<Sampler> sampler,
+                             const std::shared_ptr<DatasetCache> &cache) {
+  auto sampler_obj = sampler.get().Parse();
+  auto ds =
+    std::make_shared<FlickrNode>(CharToString(dataset_dir), CharToString(annotation_file), decode, sampler_obj, cache);
+  ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
+}
+
 ImageFolderDataset::ImageFolderDataset(const std::vector<char> &dataset_dir, bool decode,
                                        const std::shared_ptr<Sampler> &sampler,
                                        const std::set<std::vector<char>> &extensions,
@@ -1110,29 +1137,27 @@ MnistDataset::MnistDataset(const std::vector<char> &dataset_dir, const std::vect
   ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
 }
 
-
-LibriSpeechDataset::LibriSpeechDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage,
+CmuArcticDataset::CmuArcticDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage,
                            const std::shared_ptr<Sampler> &sampler, const std::shared_ptr<DatasetCache> &cache) {
   auto sampler_obj = sampler ? sampler->Parse() : nullptr;
-  auto ds = std::make_shared<LibriSpeechNode>(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache);
+  auto ds = std::make_shared<CmuArcticNode>(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache);
   ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
 }
 
-LibriSpeechDataset::LibriSpeechDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage, const Sampler *sampler,
+CmuArcticDataset::CmuArcticDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage, const Sampler *sampler,
                            const std::shared_ptr<DatasetCache> &cache) {
   auto sampler_obj = sampler ? sampler->Parse() : nullptr;
-  auto ds = std::make_shared<LibriSpeechNode>(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache);
+  auto ds = std::make_shared<CmuArcticNode>(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache);
   ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
 }
 
-LibriSpeechDataset::LibriSpeechDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage,
+CmuArcticDataset::CmuArcticDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage,
                            const std::reference_wrapper<Sampler> sampler, const std::shared_ptr<DatasetCache> &cache) {
   auto sampler_obj = sampler.get().Parse();
-  auto ds = std::make_shared<LibriSpeechNode>(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache);
+  auto ds = std::make_shared<CmuArcticNode>(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache);
   ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
 }
 
-
 #ifndef ENABLE_ANDROID
 TextFileDataset::TextFileDataset(const std::vector<std::vector<char>> &dataset_files, int64_t num_samples,
                                  ShuffleMode shuffle, int32_t num_shards, int32_t shard_id,
diff --git a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc
index e0c85d69f60..8f4c63469cb 100644
--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc
@@ -17,12 +17,58 @@
 
 #include "minddata/dataset/api/python/pybind_conversion.h"
 #include "minddata/dataset/api/python/pybind_register.h"
+#include "minddata/dataset/audio/ir/kernels/allpass_biquad_ir.h"
+#include "minddata/dataset/audio/ir/kernels/amplitude_to_db_ir.h"
+#include "minddata/dataset/audio/ir/kernels/angle_ir.h"
 #include "minddata/dataset/audio/ir/kernels/band_biquad_ir.h"
+#include "minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.h"
+#include "minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.h"
+#include "minddata/dataset/audio/ir/kernels/bass_biquad_ir.h"
+#include "minddata/dataset/audio/ir/kernels/time_stretch_ir.h"
 #include "minddata/dataset/include/dataset/transforms.h"
 
 namespace mindspore {
 namespace dataset {
 
+PYBIND_REGISTER(
+  AllpassBiquadOperation, 1, ([](const py::module *m) {
+    (void)py::class_<audio::AllpassBiquadOperation, TensorOperation, std::shared_ptr<audio::AllpassBiquadOperation>>(
+      *m, "AllpassBiquadOperation")
+      .def(py::init([](int32_t sample_rate, float central_freq, float Q) {
+        auto allpass_biquad = std::make_shared<audio::AllpassBiquadOperation>(sample_rate, central_freq, Q);
+        THROW_IF_ERROR(allpass_biquad->ValidateParams());
+        return allpass_biquad;
+      }));
+  }));
+
+PYBIND_REGISTER(
+  AmplitudeToDBOperation, 1, ([](const py::module *m) {
+    (void)py::class_<audio::AmplitudeToDBOperation, TensorOperation, std::shared_ptr<audio::AmplitudeToDBOperation>>(
+      *m, "AmplitudeToDBOperation")
+      .def(py::init([](ScaleType stype, float ref_value, float amin, float top_db) {
+        auto amplitude_to_db = std::make_shared<audio::AmplitudeToDBOperation>(stype, ref_value, amin, top_db);
+        THROW_IF_ERROR(amplitude_to_db->ValidateParams());
+        return amplitude_to_db;
+      }));
+  }));
+
+PYBIND_REGISTER(ScaleType, 0, ([](const py::module *m) {
+                  (void)py::enum_<ScaleType>(*m, "ScaleType", py::arithmetic())
+                    .value("DE_SCALETYPE_MAGNITUDE", ScaleType::kMagnitude)
+                    .value("DE_SCALETYPE_POWER", ScaleType::kPower)
+                    .export_values();
+                }));
+
+PYBIND_REGISTER(AngleOperation, 1, ([](const py::module *m) {
+                  (void)py::class_<audio::AngleOperation, TensorOperation, std::shared_ptr<audio::AngleOperation>>(
+                    *m, "AngleOperation")
+                    .def(py::init([]() {
+                      auto angle = std::make_shared<audio::AngleOperation>();
+                      THROW_IF_ERROR(angle->ValidateParams());
+                      return angle;
+                    }));
+                }));
+
 PYBIND_REGISTER(
   BandBiquadOperation, 1, ([](const py::module *m) {
     (void)py::class_<audio::BandBiquadOperation, TensorOperation, std::shared_ptr<audio::BandBiquadOperation>>(
@@ -34,5 +80,49 @@ PYBIND_REGISTER(
       }));
   }));
 
+PYBIND_REGISTER(
+  BandpassBiquadOperation, 1, ([](const py::module *m) {
+    (void)py::class_<audio::BandpassBiquadOperation, TensorOperation, std::shared_ptr<audio::BandpassBiquadOperation>>(
+      *m, "BandpassBiquadOperation")
+      .def(py::init([](int32_t sample_rate, float central_freq, float Q, bool const_skirt_gain) {
+        auto bandpass_biquad =
+          std::make_shared<audio::BandpassBiquadOperation>(sample_rate, central_freq, Q, const_skirt_gain);
+        THROW_IF_ERROR(bandpass_biquad->ValidateParams());
+        return bandpass_biquad;
+      }));
+  }));
+
+PYBIND_REGISTER(BandrejectBiquadOperation, 1, ([](const py::module *m) {
+                  (void)py::class_<audio::BandrejectBiquadOperation, TensorOperation,
+                                   std::shared_ptr<audio::BandrejectBiquadOperation>>(*m, "BandrejectBiquadOperation")
+                    .def(py::init([](int32_t sample_rate, float central_freq, float Q) {
+                      auto bandreject_biquad =
+                        std::make_shared<audio::BandrejectBiquadOperation>(sample_rate, central_freq, Q);
+                      THROW_IF_ERROR(bandreject_biquad->ValidateParams());
+                      return bandreject_biquad;
+                    }));
+                }));
+
+PYBIND_REGISTER(
+  BassBiquadOperation, 1, ([](const py::module *m) {
+    (void)py::class_<audio::BassBiquadOperation, TensorOperation, std::shared_ptr<audio::BassBiquadOperation>>(
+      *m, "BassBiquadOperation")
+      .def(py::init([](int32_t sample_rate, float gain, float central_freq, float Q) {
+        auto bass_biquad = std::make_shared<audio::BassBiquadOperation>(sample_rate, gain, central_freq, Q);
+        THROW_IF_ERROR(bass_biquad->ValidateParams());
+        return bass_biquad;
+      }));
+  }));
+
+PYBIND_REGISTER(
+  TimeStretchOperation, 1, ([](const py::module *m) {
+    (void)py::class_<audio::TimeStretchOperation, TensorOperation, std::shared_ptr<audio::TimeStretchOperation>>(
+      *m, "TimeStretchOperation")
+      .def(py::init([](float hop_length, int n_freq, float fixed_rate) {
+        auto timestretch = std::make_shared<audio::TimeStretchOperation>(hop_length, n_freq, fixed_rate);
+        THROW_IF_ERROR(timestretch->ValidateParams());
+        return timestretch;
+      }));
+  }));
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/bindings.cc
index 7ef87c941d5..cc486b10336 100644
--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/bindings.cc
@@ -70,11 +70,23 @@ PYBIND_REGISTER(DatasetNode, 1, ([](const py::module *m) {
                         return zip;
                       },
                       py::arg("datasets"))
-                    .def("to_json", [](std::shared_ptr<DatasetNode> self, const std::string &json_filepath) {
-                      nlohmann::json args;
-                      auto serdas = std::make_shared<Serdes>();
-                      THROW_IF_ERROR(serdas->SaveToJSON(self, json_filepath, &args));
-                      return args.dump();
+                    .def("to_json",
+                         [](std::shared_ptr<DatasetNode> self, const std::string &json_filepath) {
+                           nlohmann::json args;
+                           THROW_IF_ERROR(Serdes::SaveToJSON(self, json_filepath, &args));
+                           return args.dump();
+                         })
+                    .def_static("from_json_file",
+                                [](const std::string &json_filepath) {
+                                  std::shared_ptr<DatasetNode> output;
+                                  THROW_IF_ERROR(Serdes::Deserialize(json_filepath, &output));
+                                  return output;
+                                })
+                    .def_static("from_json_string", [](const std::string &json_string) {
+                      std::shared_ptr<DatasetNode> output;
+                      nlohmann::json json_obj = nlohmann::json::parse(json_string);
+                      THROW_IF_ERROR(Serdes::ConstructPipeline(json_obj, &output));
+                      return output;
                     });
                 }));
 
diff --git a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/source/bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/source/bindings.cc
index 73422631b43..a6265bcf592 100644
--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/source/bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/source/bindings.cc
@@ -34,7 +34,7 @@
 #include "minddata/dataset/engine/ir/datasetops/source/generator_node.h"
 #include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h"
 #include "minddata/dataset/engine/ir/datasetops/source/mnist_node.h"
-#include "minddata/dataset/engine/ir/datasetops/source/libri_speech_node.h"
+#include "minddata/dataset/engine/ir/datasetops/source/cmu_arctic_node.h"
 #include "minddata/dataset/engine/ir/datasetops/source/random_node.h"
 #include "minddata/dataset/engine/ir/datasetops/source/text_file_node.h"
 
@@ -211,13 +211,13 @@ PYBIND_REGISTER(MnistNode, 2, ([](const py::module *m) {
 
 
 
-PYBIND_REGISTER(LibriSpeechNode, 2, ([](const py::module *m) {
-                  (void)py::class_<LibriSpeechNode, DatasetNode, std::shared_ptr<LibriSpeechNode>>(*m, "LibriSpeechNode",
-                                                                                       "to create an LibriSpeechNode")
+PYBIND_REGISTER(CmuArcticNode, 2, ([](const py::module *m) {
+                  (void)py::class_<CmuArcticNode, DatasetNode, std::shared_ptr<CmuArcticNode>>(*m, "CmuArcticNode",
+                                                                                       "to create an CmuArcticNode")
                     .def(py::init([](std::string dataset_dir, std::string usage, py::handle sampler) {
-                      auto librispeech = std::make_shared<LibriSpeechNode>(dataset_dir, usage, toSamplerObj(sampler), nullptr);
-                      THROW_IF_ERROR(librispeech->ValidateParams());
-                      return librispeech;
+                      auto cmuarctic = std::make_shared<CmuArcticNode>(dataset_dir, usage, toSamplerObj(sampler), nullptr);
+                      THROW_IF_ERROR(cmuarctic->ValidateParams());
+                      return cmuarctic;
                     }));
                 }));
 
diff --git a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/image/bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/image/bindings.cc
index 50c427633fd..524b1cd432d 100644
--- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/image/bindings.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/image/bindings.cc
@@ -18,6 +18,7 @@
 #include "minddata/dataset/api/python/pybind_register.h"
 #include "minddata/dataset/include/dataset/transforms.h"
 
+#include "minddata/dataset/kernels/ir/vision/adjust_gamma_ir.h"
 #include "minddata/dataset/kernels/ir/vision/auto_contrast_ir.h"
 #include "minddata/dataset/kernels/ir/vision/bounding_box_augment_ir.h"
 #include "minddata/dataset/kernels/ir/vision/center_crop_ir.h"
@@ -67,6 +68,17 @@
 namespace mindspore {
 namespace dataset {
 
+PYBIND_REGISTER(
+  AdjustGammaOperation, 1, ([](const py::module *m) {
+    (void)py::class_<vision::AdjustGammaOperation, TensorOperation, std::shared_ptr<vision::AdjustGammaOperation>>(
+      *m, "AdjustGammaOperation")
+      .def(py::init([](float gamma, float gain) {
+        auto ajust_gamma = std::make_shared<vision::AdjustGammaOperation>(gamma, gain);
+        THROW_IF_ERROR(ajust_gamma->ValidateParams());
+        return ajust_gamma;
+      }));
+  }));
+
 PYBIND_REGISTER(
   AutoContrastOperation, 1, ([](const py::module *m) {
     (void)py::class_<vision::AutoContrastOperation, TensorOperation, std::shared_ptr<vision::AutoContrastOperation>>(
diff --git a/mindspore/ccsrc/minddata/dataset/api/vision.cc b/mindspore/ccsrc/minddata/dataset/api/vision.cc
index c451cd19cc5..e933e5fb44b 100644
--- a/mindspore/ccsrc/minddata/dataset/api/vision.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/vision.cc
@@ -21,6 +21,7 @@
 #endif
 
 #include "minddata/dataset/include/dataset/transforms.h"
+#include "minddata/dataset/kernels/ir/vision/adjust_gamma_ir.h"
 #include "minddata/dataset/kernels/ir/vision/affine_ir.h"
 #include "minddata/dataset/kernels/ir/vision/auto_contrast_ir.h"
 #include "minddata/dataset/kernels/ir/vision/bounding_box_augment_ir.h"
@@ -118,6 +119,19 @@ std::shared_ptr<TensorOperation> Affine::Parse() {
 }
 
 #ifndef ENABLE_ANDROID
+// AdjustGamma Transform Operation.
+struct AdjustGamma::Data {
+  Data(float gamma, float gain) : gamma_(gamma), gain_(gain) {}
+  float gamma_;
+  float gain_;
+};
+
+AdjustGamma::AdjustGamma(float gamma, float gain) : data_(std::make_shared<Data>(gamma, gain)) {}
+
+std::shared_ptr<TensorOperation> AdjustGamma::Parse() {
+  return std::make_shared<AdjustGammaOperation>(data_->gamma_, data_->gain_);
+}
+
 // AutoContrast Transform Operation.
 struct AutoContrast::Data {
   Data(float cutoff, const std::vector<uint32_t> &ignore) : cutoff_(cutoff), ignore_(ignore) {}
diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/audio/ir/CMakeLists.txt
index ceebec399c9..f6f6040e52a 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/CMakeLists.txt
@@ -2,3 +2,5 @@ add_subdirectory(kernels)
 
 file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
+
+add_library(audio-ir OBJECT validators.cc)
diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/CMakeLists.txt
index a2bd0355c0f..0547fd3850b 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/CMakeLists.txt
@@ -2,5 +2,12 @@ file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc"
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
 
 add_library(audio-ir-kernels OBJECT
+        allpass_biquad_ir.cc
+        amplitude_to_db_ir.cc
+        angle_ir.cc
         band_biquad_ir.cc
+        bandpass_biquad_ir.cc
+        bandreject_biquad_ir.cc
+        bass_biquad_ir.cc
+        time_stretch_ir.cc
         )
diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/allpass_biquad_ir.cc b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/allpass_biquad_ir.cc
index b760aae4844..35cf10b83c6 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/allpass_biquad_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/allpass_biquad_ir.cc
@@ -16,20 +16,20 @@
 
 #include "minddata/dataset/audio/ir/kernels/allpass_biquad_ir.h"
 
-#include "minddata/dataset/audio/ir/validators.h"
 #include "minddata/dataset/audio/kernels/allpass_biquad_op.h"
 
+#include "minddata/dataset/audio/ir/validators.h"
+
 namespace mindspore {
 namespace dataset {
 namespace audio {
-
 // AllpassBiquadOperation
 AllpassBiquadOperation::AllpassBiquadOperation(int32_t sample_rate, float central_freq, float Q)
     : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q) {}
 
 Status AllpassBiquadOperation::ValidateParams() {
-  RETURN_IF_NOT_OK(ValidateScalarNotZero("AllpassBiquad", "sample_rate", sample_rate_));
-  RETURN_IF_NOT_OK(ValidateScalarNotZero("AllpassBiquad", "central_freq", central_freq_));
+  RETURN_IF_NOT_OK(CheckScalarNotZero("AllpassBiquad", "sample_rate", sample_rate_));
+  RETURN_IF_NOT_OK(CheckScalarNotZero("AllpassBiquad", "central_freq", central_freq_));
   RETURN_IF_NOT_OK(ValidateScalar("AllpassBiquad", "Q", Q_, {0, 1.0}, true, false));
   return Status::OK();
 }
@@ -38,7 +38,6 @@ std::shared_ptr<TensorOp> AllpassBiquadOperation::Build() {
   std::shared_ptr<AllpassBiquadOp> tensor_op = std::make_shared<AllpassBiquadOp>(sample_rate_, central_freq_, Q_);
   return tensor_op;
 }
-
 Status AllpassBiquadOperation::to_json(nlohmann::json *out_json) {
   nlohmann::json args;
   args["sample_rate"] = sample_rate_;
diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/allpass_biquad_ir.h b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/allpass_biquad_ir.h
index 398287db244..c8d2be832bf 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/allpass_biquad_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/allpass_biquad_ir.h
@@ -20,7 +20,6 @@
 #include <memory>
 #include <string>
 #include <vector>
-
 #include "include/api/status.h"
 #include "minddata/dataset/include/dataset/constants.h"
 #include "minddata/dataset/include/dataset/transforms.h"
@@ -28,8 +27,9 @@
 
 namespace mindspore {
 namespace dataset {
-namespace audio {
 
+namespace audio {
+// Char arrays storing name of corresponding classes (in alphabetical order)
 constexpr char kAllpassBiquadOperation[] = "AllpassBiquad";
 
 class AllpassBiquadOperation : public TensorOperation {
@@ -52,6 +52,7 @@ class AllpassBiquadOperation : public TensorOperation {
   float Q_;
 };
 }  // namespace audio
+
 }  // namespace dataset
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_ALLPASS_BIQUAD_IR_H_
diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/amplitude_to_db_ir.cc b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/amplitude_to_db_ir.cc
index 61313e7fac1..80412b1c437 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/amplitude_to_db_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/amplitude_to_db_ir.cc
@@ -15,15 +15,15 @@
  */
 
 #include "minddata/dataset/audio/ir/kernels/amplitude_to_db_ir.h"
+#include "minddata/dataset/audio/kernels/amplitude_to_db_op.h"
 
 #include "minddata/dataset/audio/ir/validators.h"
-#include "minddata/dataset/audio/kernels/amplitude_to_db_op.h"
 
 namespace mindspore {
 namespace dataset {
 namespace audio {
 
-// AmplitudeToDBOperation
+// AmplitudeToDB
 AmplitudeToDBOperation::AmplitudeToDBOperation(ScaleType stype, float ref_value, float amin, float top_db)
     : stype_(stype), ref_value_(ref_value), amin_(amin), top_db_(top_db) {}
 
@@ -32,9 +32,9 @@ AmplitudeToDBOperation::~AmplitudeToDBOperation() = default;
 std::string AmplitudeToDBOperation::Name() const { return kAmplitudeToDBOperation; }
 
 Status AmplitudeToDBOperation::ValidateParams() {
-  RETURN_IF_NOT_OK(ValidateFloatScalarNonNegative("AmplitudeToDB", "top_db", top_db_));
-  RETURN_IF_NOT_OK(ValidateFloatScalarPositive("AmplitudeToDB", "amin", amin_));
-  RETURN_IF_NOT_OK(ValidateFloatScalarPositive("AmplitudeToDB", "ref_value", ref_value_));
+  RETURN_IF_NOT_OK(CheckFloatScalarNonNegative("AmplitudeToDB", "top_db", top_db_));
+  RETURN_IF_NOT_OK(CheckFloatScalarPositive("AmplitudeToDB", "amin", amin_));
+  RETURN_IF_NOT_OK(CheckFloatScalarPositive("AmplitudeToDB", "ref_value", ref_value_));
 
   return Status::OK();
 }
diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/angle_ir.cc b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/angle_ir.cc
index 131a440e279..53b1850e976 100755
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/angle_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/angle_ir.cc
@@ -16,12 +16,13 @@
 
 #include "minddata/dataset/audio/ir/kernels/angle_ir.h"
 
+// Kernel Audio headers
 #include "minddata/dataset/audio/kernels/angle_op.h"
 
 namespace mindspore {
 namespace dataset {
-namespace audio {
 
+namespace audio {
 // AngleOperation
 AngleOperation::AngleOperation() {}
 
diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/angle_ir.h b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/angle_ir.h
index 0c35ba075b0..e0f1ce2ff80 100755
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/angle_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/angle_ir.h
@@ -29,8 +29,9 @@
 
 namespace mindspore {
 namespace dataset {
-namespace audio {
 
+namespace audio {
+// Char arrays storing name of corresponding classes
 constexpr char kAngleOperation[] = "Angle";
 
 class AngleOperation : public TensorOperation {
diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.cc b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.cc
index 062cfd2a43a..a335f6500fd 100755
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.cc
@@ -16,13 +16,13 @@
 
 #include "minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.h"
 
-#include "minddata/dataset/audio/ir/validators.h"
 #include "minddata/dataset/audio/kernels/bandpass_biquad_op.h"
 
+#include "minddata/dataset/audio/ir/validators.h"
+
 namespace mindspore {
 namespace dataset {
 namespace audio {
-
 // BandpassBiquadOperation
 BandpassBiquadOperation::BandpassBiquadOperation(int32_t sample_rate, float central_freq, float Q,
                                                  bool const_skirt_gain)
@@ -30,10 +30,9 @@ BandpassBiquadOperation::BandpassBiquadOperation(int32_t sample_rate, float cent
 
 Status BandpassBiquadOperation::ValidateParams() {
   RETURN_IF_NOT_OK(ValidateScalar("BandpassBiquad", "Q", Q_, {0, 1.0}, true, false));
-  RETURN_IF_NOT_OK(ValidateScalarNotZero("BandpassBiquad", "sample_rate", sample_rate_));
+  RETURN_IF_NOT_OK(CheckScalarNotZero("BandpassBiquad", "sample_rate", sample_rate_));
   return Status::OK();
 }
-
 std::shared_ptr<TensorOp> BandpassBiquadOperation::Build() {
   std::shared_ptr<BandpassBiquadOp> tensor_op =
     std::make_shared<BandpassBiquadOp>(sample_rate_, central_freq_, Q_, const_skirt_gain_);
diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.h b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.h
index 309d0453833..23cb220e9f1 100755
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.h
@@ -21,7 +21,6 @@
 #include <string>
 #include <utility>
 #include <vector>
-
 #include "include/api/status.h"
 #include "minddata/dataset/include/dataset/constants.h"
 #include "minddata/dataset/include/dataset/transforms.h"
@@ -29,8 +28,9 @@
 
 namespace mindspore {
 namespace dataset {
-namespace audio {
 
+namespace audio {
+// Char arrays storing name of corresponding classes (in alphabetical order)
 constexpr char kBandpassBiquadOperation[] = "BandpassBiquad";
 
 class BandpassBiquadOperation : public TensorOperation {
diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.cc b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.cc
index f66c65030da..0688cb6b4d6 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.cc
@@ -15,21 +15,19 @@
  */
 
 #include "minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.h"
-
-#include "minddata/dataset/audio/ir/validators.h"
 #include "minddata/dataset/audio/kernels/bandreject_biquad_op.h"
+#include "minddata/dataset/audio/ir/validators.h"
 
 namespace mindspore {
 namespace dataset {
 namespace audio {
-
 // BandrejectBiquadOperation
 BandrejectBiquadOperation::BandrejectBiquadOperation(int32_t sample_rate, float central_freq, float Q)
     : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q) {}
 
 Status BandrejectBiquadOperation::ValidateParams() {
   RETURN_IF_NOT_OK(ValidateScalar("BandrejectBiquad", "Q", Q_, {0, 1.0}, true, false));
-  RETURN_IF_NOT_OK(ValidateScalarNotZero("BandrejectBiquad", "sample_rate", sample_rate_));
+  RETURN_IF_NOT_OK(CheckScalarNotZero("BandrejectBiquad", "sample_rate", sample_rate_));
   return Status::OK();
 }
 
diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.h b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.h
index 28b75c60739..9a38185c4b8 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.h
@@ -16,12 +16,10 @@
 
 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_BANDREJECT_BIQUAD_IR_H_
 #define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_BANDREJECT_BIQUAD_IR_H_
-
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>
-
 #include "include/api/status.h"
 #include "minddata/dataset/include/dataset/constants.h"
 #include "minddata/dataset/include/dataset/transforms.h"
@@ -29,8 +27,10 @@
 
 namespace mindspore {
 namespace dataset {
+
 namespace audio {
 
+// Char arrays storing name of corresponding classes (in alphabetical order)
 constexpr char kBandrejectBiquadOperation[] = "BandrejectBiquad";
 
 class BandrejectBiquadOperation : public TensorOperation {
diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bass_biquad_ir.cc b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bass_biquad_ir.cc
index 83766e50a6a..f2f22aff0be 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bass_biquad_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bass_biquad_ir.cc
@@ -16,9 +16,10 @@
 
 #include "minddata/dataset/audio/ir/kernels/bass_biquad_ir.h"
 
-#include "minddata/dataset/audio/ir/validators.h"
 #include "minddata/dataset/audio/kernels/bass_biquad_op.h"
 
+#include "minddata/dataset/audio/ir/validators.h"
+
 namespace mindspore {
 namespace dataset {
 namespace audio {
@@ -29,7 +30,7 @@ BassBiquadOperation::BassBiquadOperation(int32_t sample_rate, float gain, float
 
 Status BassBiquadOperation::ValidateParams() {
   RETURN_IF_NOT_OK(ValidateScalar("BassBiquad", "Q", Q_, {0, 1.0}, true, false));
-  RETURN_IF_NOT_OK(ValidateScalarNotZero("BassBiquad", "sample_rate", sample_rate_));
+  RETURN_IF_NOT_OK(CheckScalarNotZero("BassBiquad", "sample_rate", sample_rate_));
   return Status::OK();
 }
 
@@ -37,7 +38,6 @@ std::shared_ptr<TensorOp> BassBiquadOperation::Build() {
   std::shared_ptr<BassBiquadOp> tensor_op = std::make_shared<BassBiquadOp>(sample_rate_, gain_, central_freq_, Q_);
   return tensor_op;
 }
-
 Status BassBiquadOperation::to_json(nlohmann::json *out_json) {
   nlohmann::json args;
   args["sample_rate"] = sample_rate_;
diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bass_biquad_ir.h b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bass_biquad_ir.h
index 725000591a7..1fdd38b8a90 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bass_biquad_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bass_biquad_ir.h
@@ -31,6 +31,7 @@ namespace mindspore {
 namespace dataset {
 namespace audio {
 
+// Char arrays storing name of corresponding classes (in alphabetical order)
 constexpr char kBassBiquadOperation[] = "BassBiquad";
 
 class BassBiquadOperation : public TensorOperation {
diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/time_stretch_ir.cc b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/time_stretch_ir.cc
index 4a94c4c6693..a78c4523705 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/time_stretch_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/time_stretch_ir.cc
@@ -14,15 +14,14 @@
  * limitations under the License.
  */
 #include "minddata/dataset/audio/ir/kernels/time_stretch_ir.h"
-
-#include "minddata/dataset/audio/ir/validators.h"
 #include "minddata/dataset/audio/kernels/time_stretch_op.h"
 
+#include "minddata/dataset/audio/ir/validators.h"
 namespace mindspore {
 namespace dataset {
 namespace audio {
 
-// TimeStretchOperation
+// TimeStretch
 TimeStretchOperation::TimeStretchOperation(float hop_length, int n_freq, float fixed_rate)
     : hop_length_(hop_length), n_freq_(n_freq), fixed_rate_(fixed_rate) {}
 
@@ -32,10 +31,10 @@ std::string TimeStretchOperation::Name() const { return kTimeStretchOperation; }
 
 Status TimeStretchOperation::ValidateParams() {
   //  param check
-  RETURN_IF_NOT_OK(ValidateFloatScalarPositive("TimeStretch", "hop_length", hop_length_));
-  RETURN_IF_NOT_OK(ValidateIntScalarPositive("TimeStretch", "n_freq", n_freq_));
-  RETURN_IF_NOT_OK(ValidateFloatScalarNotNan("TimeStretch", "fixed_rate", fixed_rate_));
-  RETURN_IF_NOT_OK(ValidateFloatScalarPositive("TimeStretch", "fixed_rate", fixed_rate_));
+  RETURN_IF_NOT_OK(CheckFloatScalarPositive("TimeStretch", "hop_length", hop_length_));
+  RETURN_IF_NOT_OK(CheckIntScalarPositive("TimeStretch", "n_freq", n_freq_));
+  RETURN_IF_NOT_OK(CheckFloatScalarNotNan("TimeStretch", "fixed_rate", fixed_rate_));
+  RETURN_IF_NOT_OK(CheckFloatScalarPositive("TimeStretch", "fixed_rate", fixed_rate_));
   return Status::OK();
 }
 
diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/validators.cc b/mindspore/ccsrc/minddata/dataset/audio/ir/validators.cc
index e3f8c127b54..7700298c1a7 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/validators.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/validators.cc
@@ -17,20 +17,82 @@
 
 namespace mindspore {
 namespace dataset {
-
-Status ValidateIntScalarNonNegative(const std::string &op_name, const std::string &scalar_name, int32_t scalar) {
-  RETURN_IF_NOT_OK(ValidateScalar(op_name, scalar_name, scalar, {0}, false));
+/* ####################################### Validator Functions ############################################ */
+Status CheckFloatScalarPositive(const std::string &op_name, const std::string &scalar_name, float scalar) {
+  RETURN_IF_NOT_OK(CheckScalar(op_name, scalar_name, scalar, {0}, true));
   return Status::OK();
 }
 
-Status ValidateFloatScalarNotNan(const std::string &op_name, const std::string &scalar_name, float scalar) {
+Status CheckFloatScalarNotNan(const std::string &op_name, const std::string &scalar_name, float scalar) {
   if (std::isnan(scalar)) {
-    std::string err_msg = op_name + ": " + scalar_name + " should be specified, got: Nan";
+    std::string err_msg = op_name + ":" + scalar_name + " should be specified, got: Nan.";
     MS_LOG(ERROR) << err_msg;
     return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, err_msg);
   }
   return Status::OK();
 }
 
+Status CheckFloatScalarNonNegative(const std::string &op_name, const std::string &scalar_name, float scalar) {
+  RETURN_IF_NOT_OK(CheckScalar(op_name, scalar_name, scalar, {0}, false));
+  return Status::OK();
+}
+
+Status CheckIntScalarPositive(const std::string &op_name, const std::string &scalar_name, int32_t scalar) {
+  RETURN_IF_NOT_OK(CheckScalar(op_name, scalar_name, scalar, {0}, true));
+  return Status::OK();
+}
+
+Status CheckStringScalarInList(const std::string &op_name, const std::string &scalar_name, const std::string &scalar,
+                               const std::vector<std::string> &str_vec) {
+  auto ret = std::find(str_vec.begin(), str_vec.end(), scalar);
+  if (ret == str_vec.end()) {
+    std::string interval_description = "[";
+    for (int m = 0; m < str_vec.size(); m++) {
+      std::string word = str_vec[m];
+      interval_description = interval_description + word;
+      if (m != str_vec.size() - 1) interval_description = interval_description + ", ";
+    }
+    interval_description = interval_description + "]";
+
+    std::string err_msg = op_name + ": " + scalar_name + " must be one of " + interval_description + ", got: " + scalar;
+    MS_LOG(ERROR) << err_msg;
+    return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, err_msg);
+  }
+  return Status::OK();
+}
+
+template <typename T>
+Status CheckScalar(const std::string &op_name, const std::string &scalar_name, const T scalar,
+                   const std::vector<T> &range, bool left_open_interval, bool right_open_interval) {
+  if (range.empty() || range.size() > 2) {
+    std::string err_msg = "Range check expecting size 1 or 2, but got: " + std::to_string(range.size());
+    MS_LOG(ERROR) << err_msg;
+    return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, err_msg);
+  }
+  if ((left_open_interval && scalar <= range[0]) || (!left_open_interval && scalar < range[0])) {
+    std::string interval_description = left_open_interval ? " greater than " : " greater than or equal to ";
+    std::string err_msg = op_name + ":" + scalar_name + " must be" + interval_description + std::to_string(range[0]) +
+                          ", got: " + std::to_string(scalar);
+    MS_LOG(ERROR) << err_msg;
+    return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, err_msg);
+  }
+  if (range.size() == 2) {
+    if ((right_open_interval && scalar >= range[1]) || (!right_open_interval && scalar > range[1])) {
+      std::string left_bracket = left_open_interval ? "(" : "[";
+      std::string right_bracket = right_open_interval ? ")" : "]";
+      std::string err_msg = op_name + ":" + scalar_name + " is out of range " + left_bracket +
+                            std::to_string(range[0]) + ", " + std::to_string(range[1]) + right_bracket +
+                            ", got: " + std::to_string(scalar);
+      MS_LOG(ERROR) << err_msg;
+      return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, err_msg);
+    }
+  }
+  return Status::OK();
+}
+template Status CheckScalar(const std::string &op_name, const std::string &scalar_name, const float scalar,
+                            const std::vector<float> &range, bool left_open_interval, bool right_open_interval);
+
+template Status CheckScalar(const std::string &op_name, const std::string &scalar_name, const int32_t scalar,
+                            const std::vector<int32_t> &range, bool left_open_interval, bool right_open_interval);
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/validators.h b/mindspore/ccsrc/minddata/dataset/audio/ir/validators.h
index 837c3f0a0f4..7cfa0bfa0be 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/ir/validators.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/ir/validators.h
@@ -18,11 +18,25 @@
 #define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_VALIDATORS_H_
 
 #include <string>
+#include <vector>
 #include "minddata/dataset/kernels/ir/validators.h"
 
+#include "minddata/dataset/core/tensor.h"
+#include "minddata/dataset/kernels/ir/tensor_operation.h"
+#include "minddata/dataset/util/status.h"
+
 namespace mindspore {
 namespace dataset {
 
+// Helper function to non-nan float scalar
+Status CheckFloatScalarNotNan(const std::string &op_name, const std::string &scalar_name, float scalar);
+
+// Helper function to positive float scalar
+Status CheckFloatScalarPositive(const std::string &op_name, const std::string &scalar_name, float scalar);
+
+// Helper function to positive int scalar
+Status CheckIntScalarPositive(const std::string &op_name, const std::string &scalar_name, int32_t scalar);
+
 template <typename T>
 // Helper function to check scalar is not equal to zero
 Status CheckScalarNotZero(const std::string &op_name, const std::string &scalar_name, const T scalar) {
@@ -34,6 +48,20 @@ Status CheckScalarNotZero(const std::string &op_name, const std::string &scalar_
   return Status::OK();
 }
 
+// Helper function to positive float scalar
+Status CheckFloatScalarPositive(const std::string &op_name, const std::string &scalar_name, float scalar);
+
+// Helper function to non-negative float scalar
+Status CheckFloatScalarNonNegative(const std::string &op_name, const std::string &scalar_name, float scalar);
+
+// Helper function to check string scalar
+Status CheckStringScalarInList(const std::string &op_name, const std::string &scalar_name, const std::string &scalar,
+                               const std::vector<std::string> &str_vec);
+
+// Helper function to validate scalar
+template <typename T>
+Status CheckScalar(const std::string &op_name, const std::string &scalar_name, const T scalar,
+                   const std::vector<T> &range, bool left_open_interval = false, bool right_open_interval = false);
 }  // namespace dataset
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_ADUIO_IR_VALIDATORS_H_
diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/audio/kernels/CMakeLists.txt
index f78a30fd232..c6517814031 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/CMakeLists.txt
@@ -2,6 +2,13 @@ file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc"
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
 
 add_library(audio-kernels OBJECT
+        allpass_biquad_op.cc
+        amplitude_to_db_op.cc
+        angle_op.cc
+        audio_utils.cc
         band_biquad_op.cc
+        bandpass_biquad_op.cc
+        bandreject_biquad_op.cc
+        bass_biquad_op.cc
+        time_stretch_op.cc
         )
-
diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/allpass_biquad_op.cc b/mindspore/ccsrc/minddata/dataset/audio/kernels/allpass_biquad_op.cc
index b1b4625e066..da2f88964af 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/allpass_biquad_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/allpass_biquad_op.cc
@@ -20,15 +20,14 @@
 
 namespace mindspore {
 namespace dataset {
-
 Status AllpassBiquadOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
   IO_CHECK(input, output);
   TensorShape input_shape = input->shape();
-  CHECK_FAIL_RETURN_UNEXPECTED(input_shape.Size() > 0, "AllpassBiquad: input tensor is not in shape of <..., time>.");
-  CHECK_FAIL_RETURN_UNEXPECTED(
-    input->type() == DataType(DataType::DE_FLOAT32) || input->type() == DataType(DataType::DE_FLOAT16) ||
-      input->type() == DataType(DataType::DE_FLOAT64),
-    "AllpassBiquad: input tensor type should be float, but got: " + input->type().ToString());
+  CHECK_FAIL_RETURN_UNEXPECTED(input_shape.Size() > 0, "AllpassBiquad: input dimension should be greater than 0.");
+  CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType(DataType::DE_FLOAT32) ||
+                                 input->type() == DataType(DataType::DE_FLOAT16) ||
+                                 input->type() == DataType(DataType::DE_FLOAT64),
+                               "AllpassBiquad: input type should be float, but got " + input->type().ToString());
   double w0 = 2 * PI * central_freq_ / sample_rate_;
   double alpha = sin(w0) / 2 / Q_;
   double b0 = 1 - alpha;
@@ -37,16 +36,15 @@ Status AllpassBiquadOp::Compute(const std::shared_ptr<Tensor> &input, std::share
   double a0 = b2;
   double a1 = -2 * cos(w0);
   double a2 = 1 - alpha;
-  if (input->type() == DataType(DataType::DE_FLOAT32)) {
+  if (input->type() == DataType(DataType::DE_FLOAT32))
     return Biquad(input, output, static_cast<float>(b0), static_cast<float>(b1), static_cast<float>(b2),
                   static_cast<float>(a0), static_cast<float>(a1), static_cast<float>(a2));
-  } else if (input->type() == DataType(DataType::DE_FLOAT64)) {
+  else if (input->type() == DataType(DataType::DE_FLOAT64))
     return Biquad(input, output, static_cast<double>(b0), static_cast<double>(b1), static_cast<double>(b2),
                   static_cast<double>(a0), static_cast<double>(a1), static_cast<double>(a2));
-  } else {
+  else
     return Biquad(input, output, static_cast<float16>(b0), static_cast<float16>(b1), static_cast<float16>(b2),
                   static_cast<float16>(a0), static_cast<float16>(a1), static_cast<float16>(a2));
-  }
 }
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/allpass_biquad_op.h b/mindspore/ccsrc/minddata/dataset/audio/kernels/allpass_biquad_op.h
index 26c7b729f0a..d4e7e17b95a 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/allpass_biquad_op.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/allpass_biquad_op.h
@@ -26,7 +26,6 @@
 
 namespace mindspore {
 namespace dataset {
-
 class AllpassBiquadOp : public TensorOp {
  public:
   AllpassBiquadOp(int32_t sample_rate, float central_freq, float Q)
diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/amplitude_to_db_op.cc b/mindspore/ccsrc/minddata/dataset/audio/kernels/amplitude_to_db_op.cc
index 8a202f497c4..dbebec42d39 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/amplitude_to_db_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/amplitude_to_db_op.cc
@@ -13,8 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "minddata/dataset/audio/kernels/amplitude_to_db_op.h"
+#include <limits>
 
+#include "minddata/dataset/audio/kernels/amplitude_to_db_op.h"
 #include "minddata/dataset/audio/kernels/audio_utils.h"
 #include "minddata/dataset/kernels/data/data_utils.h"
 #include "minddata/dataset/util/status.h"
@@ -25,7 +26,7 @@ namespace dataset {
 Status AmplitudeToDBOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
   IO_CHECK(input, output);
   if (input->shape().Rank() < 2) {
-    std::string err_msg = "AmplitudeToDB: input tensor is not in shape of <..., freq, time>.";
+    std::string err_msg = "AmplitudeToDB: input tensor shape should be <..., freq, time>";
     MS_LOG(ERROR) << err_msg;
     RETURN_STATUS_SYNTAX_ERROR(err_msg);
   }
@@ -39,12 +40,12 @@ Status AmplitudeToDBOp::Compute(const std::shared_ptr<Tensor> &input, std::share
 
   // typecast
   CHECK_FAIL_RETURN_UNEXPECTED(input->type() != DataType::DE_STRING,
-                               "AmplitudeToDB: input tensor type should be float, but got: string.");
+                               "AmplitudeToDB: input type should be float, but got string.");
   if (input->type() != DataType::DE_FLOAT64) {
-    CHECK_FAIL_RETURN_UNEXPECTED(
-      TypeCast(input, &input_tensor, DataType(DataType::DE_FLOAT32)),
-      "AmplitudeToDB: input tensor type should be float, but got: " + input->type().ToString());
+    CHECK_FAIL_RETURN_UNEXPECTED(TypeCast(input, &input_tensor, DataType(DataType::DE_FLOAT32)),
+                                 "AmplitudeToDB: input type should be float, but got " + input->type().ToString());
     return AmplitudeToDB<float>(input_tensor, output, multiplier, amin, db_multiplier, top_db);
+
   } else {
     input_tensor = input;
     return AmplitudeToDB<double>(input_tensor, output, multiplier, amin, db_multiplier, top_db);
diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/amplitude_to_db_op.h b/mindspore/ccsrc/minddata/dataset/audio/kernels/amplitude_to_db_op.h
index 9aa2672878e..bd84e888f9e 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/amplitude_to_db_op.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/amplitude_to_db_op.h
@@ -29,7 +29,6 @@
 
 namespace mindspore {
 namespace dataset {
-
 class AmplitudeToDBOp : public TensorOp {
  public:
   AmplitudeToDBOp(ScaleType stype, float ref_value, float amin, float top_db)
diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/angle_op.cc b/mindspore/ccsrc/minddata/dataset/audio/kernels/angle_op.cc
index 9dc313f606a..54827c934ee 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/angle_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/angle_op.cc
@@ -25,10 +25,8 @@ namespace dataset {
 Status AngleOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
   IO_CHECK(input, output);
   // if If the last dimension is not 2, then it's not a complex number
-  CHECK_FAIL_RETURN_UNEXPECTED(input->shape()[-1] == 2, "Angle: input tensor is not in shape of <..., complex=2>.");
-  CHECK_FAIL_RETURN_UNEXPECTED(
-    input->type().IsNumeric(),
-    "Angle: input tensor type should be int, float or double, but got: " + input->type().ToString());
+  CHECK_FAIL_RETURN_UNEXPECTED(input->shape()[-1] == 2, "Angle: The input is not several legal complex numbers");
+  CHECK_FAIL_RETURN_UNEXPECTED(input->type().IsNumeric(), "Angle: The input type should be numbers");
   if (input->type() == DataType(DataType::DE_FLOAT64)) {
     return Angle<double>(input, output);
   } else {
diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/angle_op.h b/mindspore/ccsrc/minddata/dataset/audio/kernels/angle_op.h
index 501981b2138..aff0ab44a4d 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/angle_op.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/angle_op.h
@@ -26,7 +26,6 @@
 
 namespace mindspore {
 namespace dataset {
-
 class AngleOp : public TensorOp {
  public:
   // Convert complex numbers to angles
diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.cc b/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.cc
index d225eabd48b..701a4ca6dde 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.cc
@@ -16,27 +16,62 @@
 
 #include "minddata/dataset/audio/kernels/audio_utils.h"
 
-#include <complex>
-
-#include "mindspore/core/base/float16.h"
-#include "minddata/dataset/core/type_id.h"
-#include "minddata/dataset/kernels/data/data_utils.h"
-#include "minddata/dataset/util/random.h"
-#include "minddata/dataset/util/status.h"
-
 namespace mindspore {
 namespace dataset {
 
-/// \brief Generate linearly spaced vector.
+template <typename T>
+Status AmplitudeToDB(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, T multiplier, T amin,
+                     T db_multiplier, T top_db) {
+  TensorShape input_shape = input->shape();
+  TensorShape to_shape = input_shape.Rank() == 2
+                           ? TensorShape({1, 1, input_shape[-2], input_shape[-1]})
+                           : TensorShape({input->Size() / (input_shape[-3] * input_shape[-2] * input_shape[-1]),
+                                          input_shape[-3], input_shape[-2], input_shape[-1]});
+  RETURN_IF_NOT_OK(input->Reshape(to_shape));
+
+  std::vector<T> max_val;
+  int step = to_shape[-3] * input_shape[-2] * input_shape[-1];
+  int cnt = 0;
+  T temp_max = std::numeric_limits<T>::lowest();
+  for (auto itr = input->begin<T>(); itr != input->end<T>(); itr++) {
+    // do clamp
+    *itr = *itr < amin ? log10(amin) * multiplier : log10(*itr) * multiplier;
+    *itr -= multiplier * db_multiplier;
+    // calculate max by axis
+    cnt++;
+    if ((*itr) > temp_max) temp_max = *itr;
+    if (cnt % step == 0) {
+      max_val.push_back(temp_max);
+      temp_max = std::numeric_limits<T>::lowest();
+    }
+  }
+
+  if (!std::isnan(top_db)) {
+    int ind = 0;
+    for (auto itr = input->begin<T>(); itr != input->end<T>(); itr++, ind++) {
+      float lower_bound = max_val[ind / step] - top_db;
+      *itr = std::max((*itr), static_cast<T>(lower_bound));
+    }
+  }
+  RETURN_IF_NOT_OK(input->Reshape(input_shape));
+  *output = input;
+  return Status::OK();
+}
+template Status AmplitudeToDB<float>(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output,
+                                     float multiplier, float amin, float db_multiplier, float top_db);
+template Status AmplitudeToDB<double>(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output,
+                                      double multiplier, double amin, double db_multiplier, double top_db);
+
+/// \brief Generate linearly spaced vector
 /// \param[in] start - Value of the startpoint.
 /// \param[in] end - Value of the endpoint.
 /// \param[in] n - N points in the output tensor.
 /// \param[out] output - Tensor has n points with linearly space. The spacing between the points is (end-start)/(n-1).
-/// \return Status return code.
+/// \return Status return code
 template <typename T>
-Status Linspace(std::shared_ptr<Tensor> *output, T start, T end, int n) {
+Status Linespace(std::shared_ptr<Tensor> *output, T start, T end, int n) {
   if (start > end) {
-    std::string err = "Linspace: input param end must be greater than start.";
+    std::string err = "Linespace: input param end must be greater than start.";
     RETURN_STATUS_UNEXPECTED(err);
   }
   n = std::isnan(n) ? 100 : n;
@@ -54,10 +89,10 @@ Status Linspace(std::shared_ptr<Tensor> *output, T start, T end, int n) {
   return Status::OK();
 }
 
-/// \brief Calculate complex tensor angle.
+/// \brief Calculate complex tensor angle
 /// \param[in] input - Input tensor, must be complex, <channel, freq, time, complex=2>.
 /// \param[out] output - Complex tensor angle.
-/// \return Status return code.
+/// \return Status return code
 template <typename T>
 Status ComplexAngle(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
   // check complex
@@ -86,10 +121,10 @@ Status ComplexAngle(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor
   return Status::OK();
 }
 
-/// \brief Calculate complex tensor abs.
+/// \brief Calculate complex tensor abs
 /// \param[in] input - Input tensor, must be complex, <channel, freq, time, complex=2>.
 /// \param[out] output - Complex tensor abs.
-/// \return Status return code.
+/// \return Status return code
 template <typename T>
 Status ComplexAbs(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
   // check complex
@@ -115,17 +150,17 @@ Status ComplexAbs(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor>
   return Status::OK();
 }
 
-/// \brief Reconstruct complex tensor from norm and angle.
+/// \brief Reconstruct complex tensor from norm and angle
 /// \param[in] abs - The absolute value of the complex tensor.
 /// \param[in] angle - The angle of the complex tensor.
 /// \param[out] output - Complex tensor, <channel, freq, time, complex=2>.
-/// \return Status return code.
+/// \return Status return code
 template <typename T>
 Status Polar(const std::shared_ptr<Tensor> &abs, const std::shared_ptr<Tensor> &angle,
              std::shared_ptr<Tensor> *output) {
   // check shape
   if (abs->shape() != angle->shape()) {
-    std::string err_msg = "Polar: input tensor shape of abs and angle must be the same.";
+    std::string err_msg = "Polar: input shape of abs and angle must be same.";
     MS_LOG(ERROR) << err_msg;
     RETURN_STATUS_SYNTAX_ERROR(err_msg);
   }
@@ -148,12 +183,12 @@ Status Polar(const std::shared_ptr<Tensor> &abs, const std::shared_ptr<Tensor> &
   return Status::OK();
 }
 
-/// \brief Pad complex tensor.
+/// \brief Pad complex tensor
 /// \param[in] input - The complex tensor.
 /// \param[in] length - The length of padding.
 /// \param[in] dim - The dim index for padding.
 /// \param[out] output - Complex tensor, <channel, freq, time, complex=2>.
-/// \return Status return code.
+/// \return Status return code
 template <typename T>
 Status PadComplexTensor(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int length, int dim) {
   TensorShape input_shape = input->shape();
@@ -181,13 +216,13 @@ Status PadComplexTensor(const std::shared_ptr<Tensor> &input, std::shared_ptr<Te
   return Status::OK();
 }
 
-/// \brief Calculate phase.
+/// \brief Calculate phase
 /// \param[in] angle_0 - The angle.
 /// \param[in] angle_1 - The angle.
 /// \param[in] phase_advance - The phase advance.
 /// \param[in] phase_time0 - The phase at time 0.
 /// \param[out] output - Phase tensor.
-/// \return Status return code.
+/// \return Status return code
 template <typename T>
 Status Phase(const std::shared_ptr<Tensor> &angle_0, const std::shared_ptr<Tensor> &angle_1,
              const std::shared_ptr<Tensor> &phase_advance, const std::shared_ptr<Tensor> &phase_time0,
@@ -232,12 +267,12 @@ Status Phase(const std::shared_ptr<Tensor> &angle_0, const std::shared_ptr<Tenso
   return Status::OK();
 }
 
-/// \brief Calculate magnitude.
+/// \brief Calculate magnitude
 /// \param[in] alphas - The alphas.
 /// \param[in] abs_0 - The norm.
 /// \param[in] abs_1 - The norm.
 /// \param[out] output - Magnitude tensor.
-/// \return Status return code.
+/// \return Status return code
 template <typename T>
 Status Mag(const std::shared_ptr<Tensor> &abs_0, const std::shared_ptr<Tensor> &abs_1, std::shared_ptr<Tensor> *output,
            const std::vector<T> &alphas) {
@@ -332,178 +367,19 @@ Status TimeStretch(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *outpu
   std::shared_ptr<Tensor> phase_advance;
   switch (input->type().value()) {
     case DataType::DE_FLOAT32:
-      RETURN_IF_NOT_OK(Linspace<float>(&phase_advance, 0, PI * hop_length, n_freq));
+      RETURN_IF_NOT_OK(Linespace<float>(&phase_advance, 0, PI * hop_length, n_freq));
       RETURN_IF_NOT_OK(TimeStretch<float>(input, output, rate, phase_advance));
       break;
     case DataType::DE_FLOAT64:
-      RETURN_IF_NOT_OK(Linspace<double>(&phase_advance, 0, PI * hop_length, n_freq));
+      RETURN_IF_NOT_OK(Linespace<double>(&phase_advance, 0, PI * hop_length, n_freq));
       RETURN_IF_NOT_OK(TimeStretch<double>(input, output, rate, phase_advance));
       break;
     default:
-      RETURN_STATUS_UNEXPECTED("TimeStretch: input tensor type should be float or double, but got: " +
-                               input->type().ToString());
+      RETURN_STATUS_UNEXPECTED(
+        "TimeStretch: unsupported type, currently supported types include "
+        "[float, double].");
   }
   return Status::OK();
 }
-
-Status RandomMaskAlongAxis(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int64_t mask_param,
-                           double mask_value, int axis, std::mt19937 rnd) {
-  std::uniform_int_distribution<int64_t> mask_width_value(0, mask_param);
-  TensorShape input_shape = input->shape();
-  int64_t mask_dim_size = axis == 1 ? input_shape[-2] : input_shape[-1];
-  int64_t mask_width = mask_width_value(rnd);
-  std::uniform_int_distribution<int64_t> min_freq_value(0, mask_dim_size - mask_width);
-  int64_t mask_start = min_freq_value(rnd);
-
-  return MaskAlongAxis(input, output, mask_width, mask_start, mask_value, axis);
-}
-
-Status MaskAlongAxis(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, int64_t mask_width,
-                     int64_t mask_start, double mask_value, int axis) {
-  if (axis != 2 && axis != 1) {
-    RETURN_STATUS_UNEXPECTED("MaskAlongAxis: only support Time and Frequency masking, axis should be 1 or 2.");
-  }
-  TensorShape input_shape = input->shape();
-  // squeeze input
-  TensorShape squeeze_shape = TensorShape({-1, input_shape[-2], input_shape[-1]});
-  input->Reshape(squeeze_shape);
-
-  int check_dim_ind = (axis == 1) ? -2 : -1;
-  CHECK_FAIL_RETURN_UNEXPECTED(0 <= mask_start && mask_start <= input_shape[check_dim_ind],
-                               "MaskAlongAxis: mask_start should be less than the length of chosen dimension.");
-  CHECK_FAIL_RETURN_UNEXPECTED(mask_start + mask_width <= input_shape[check_dim_ind],
-                               "MaskAlongAxis: the sum of mask_start and mask_width is out of bounds.");
-
-  int64_t cell_size = input->type().SizeInBytes();
-
-  if (axis == 1) {
-    // freq
-    for (int ind = 0; ind < input->Size() / input_shape[-2] * mask_width; ind++) {
-      int block_num = ind / (mask_width * input_shape[-1]);
-      auto start_pos = ind % (mask_width * input_shape[-1]) + mask_start * input_shape[-1] +
-                       input_shape[-1] * input_shape[-2] * block_num;
-      auto start_mem_pos = const_cast<uchar *>(input->GetBuffer() + start_pos * cell_size);
-      if (input->type() != DataType::DE_FLOAT64) {
-        // tensor float 32
-        auto mask_val = static_cast<float>(mask_value);
-        CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(start_mem_pos, cell_size, &mask_val, cell_size) == 0,
-                                     "MaskAlongAxis: mask failed, memory copy error.");
-      } else {
-        // tensor float 64
-        CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(start_mem_pos, cell_size, &mask_value, cell_size) == 0,
-                                     "MaskAlongAxis: mask failed, memory copy error.");
-      }
-    }
-  } else {
-    // time
-    for (int ind = 0; ind < input->Size() / input_shape[-1] * mask_width; ind++) {
-      int row_num = ind / mask_width;
-      auto start_pos = ind % mask_width + mask_start + input_shape[-1] * row_num;
-      auto start_mem_pos = const_cast<uchar *>(input->GetBuffer() + start_pos * cell_size);
-      if (input->type() != DataType::DE_FLOAT64) {
-        // tensor float 32
-        auto mask_val = static_cast<float>(mask_value);
-        CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(start_mem_pos, cell_size, &mask_val, cell_size) == 0,
-                                     "MaskAlongAxis: mask failed, memory copy error.");
-      } else {
-        // tensor float 64
-        CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(start_mem_pos, cell_size, &mask_value, cell_size) == 0,
-                                     "MaskAlongAxis: mask failed, memory copy error.");
-      }
-    }
-  }
-  // unsqueeze input
-  input->Reshape(input_shape);
-  *output = input;
-  return Status::OK();
-}
-
-template <typename T>
-Status Norm(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float power) {
-  // calcutate total complex num
-  int32_t dim = input->shape().Size();
-  int32_t total_num = 1;
-  for (int32_t i = 0; i < (dim - 1); i++) {
-    total_num *= (input->shape()[i]);
-  }
-
-  // calculate the output dimension
-  auto input_size = input->shape().AsVector();
-  int32_t dim_back = input_size.back();
-  CHECK_FAIL_RETURN_UNEXPECTED(
-    dim_back == 2, "ComplexNorm: expect complex input of shape <..., 2>, but got: " + std::to_string(dim_back));
-  input_size.pop_back();
-  int32_t complex_num = input_size.back();
-  int32_t iter_num = total_num / complex_num;
-  // TensorShape out_put_shape{}
-  input_size.pop_back();
-  input_size.emplace_back(2);
-  TensorShape out_shape = TensorShape(input_size);
-  RETURN_IF_NOT_OK(Tensor::CreateEmpty(out_shape, input->type(), output));
-
-  // slice input into real tensor and imaginary tensor
-  std::shared_ptr<Tensor> re_tensor;
-  std::shared_ptr<Tensor> im_tensor;
-  RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({total_num, 1}), input->type(), &re_tensor));
-  RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({total_num, 1}), input->type(), &im_tensor));
-  std::vector<SliceOption> slice_re = {};
-  std::vector<SliceOption> slice_im = {};
-  for (int32_t i = 0; i < (dim - 1); i++) {
-    slice_re.emplace_back(SliceOption(true));
-    slice_im.emplace_back(SliceOption(true));
-  }
-  slice_re.emplace_back(SliceOption(std::vector<dsize_t>{0}));
-  slice_im.emplace_back(SliceOption(std::vector<dsize_t>{1}));
-  RETURN_IF_NOT_OK(input->Slice(&re_tensor, slice_re));
-  RETURN_IF_NOT_OK(input->Slice(&im_tensor, slice_im));
-
-  // calculate norm, using: .pow(2.).sum(-1).pow(0.5 * power)
-  auto itr_out = (*output)->begin<T>();
-  auto itr_re = re_tensor->begin<T>();
-  auto itr_im = im_tensor->begin<T>();
-  for (int32_t i = 0; i < iter_num; i++) {
-    double re = 0.0;
-    double im = 0.0;
-    for (int32_t j = complex_num * i; j < complex_num * (i + 1); j++) {
-      double a = static_cast<double>(*itr_re);
-      double b = static_cast<double>(*itr_im);
-      re = re + (pow(a, 2) - pow(b, 2));
-      im = im + (2 * a * b);
-      ++itr_re;
-      ++itr_im;
-    }
-    std::complex<double> comp(re, im);
-    comp = std::pow(comp, (0.5 * power));
-    *itr_out = static_cast<T>(comp.real());
-    ++itr_out;
-    *itr_out = static_cast<T>(comp.imag());
-    ++itr_out;
-  }
-  RETURN_IF_NOT_OK((*output)->Reshape(out_shape));
-  return Status::OK();
-}
-
-Status ComplexNorm(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, float power) {
-  try {
-    if (input->type().value() >= DataType::DE_INT8 && input->type().value() <= DataType::DE_FLOAT16) {
-      // convert the data type to float
-      std::shared_ptr<Tensor> input_tensor;
-      RETURN_IF_NOT_OK(Tensor::CreateEmpty(input->shape(), DataType(DataType::DE_FLOAT32), &input_tensor));
-      RETURN_IF_NOT_OK(TypeCast(input, &input_tensor, DataType(DataType::DE_FLOAT32)));
-
-      Norm<float>(input_tensor, output, power);
-    } else if (input->type().value() == DataType::DE_FLOAT32) {
-      Norm<float>(input, output, power);
-    } else if (input->type().value() == DataType::DE_FLOAT64) {
-      Norm<double>(input, output, power);
-    } else {
-      RETURN_STATUS_UNEXPECTED("ComplexNorm: input tensor type should be int, float or double, but got: " +
-                               input->type().ToString());
-    }
-    return Status::OK();
-  } catch (std::runtime_error &e) {
-    RETURN_STATUS_UNEXPECTED("ComplexNorm: " + std::string(e.what()));
-  }
-}
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h b/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h
index 23e1e518219..d66340fbf76 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h
@@ -17,8 +17,11 @@
 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_AUDIO_UTILS_H_
 #define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_AUDIO_UTILS_H_
 
+#include <algorithm>
 #include <cmath>
+#include <limits>
 #include <memory>
+#include <string>
 #include <vector>
 
 #include "minddata/dataset/core/tensor.h"
@@ -28,6 +31,42 @@
 constexpr double PI = 3.141592653589793;
 namespace mindspore {
 namespace dataset {
+/// \brief Turn a tensor from the power/amplitude scale to the decibel scale.
+/// \param input/output: Tensor of shape <...,freq,time>
+/// \param multiplier: power - 10, amplitude - 20
+/// \param amin: lower bound
+/// \param db_multiplier: multiplier for decibels
+/// \param top_db: the lower bound for decibels cut-off
+/// \return Status code
+template <typename T>
+Status AmplitudeToDB(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, T multiplier, T amin,
+                     T db_multiplier, T top_db);
+
+/// \brief Calculate the angles of the complex numbers
+/// \param input/output: Tensor of shape <...,time>
+template <typename T>
+Status Angle(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
+  TensorShape shape = input->shape();
+  std::vector output_shape = shape.AsVector();
+  output_shape.pop_back();
+  std::shared_ptr<Tensor> output_tensor;
+  std::vector<T> out;
+  T o;
+  T x;
+  T y;
+  for (auto itr = input->begin<T>(); itr != input->end<T>(); itr++) {
+    x = static_cast<T>(*itr);
+    itr++;
+    y = static_cast<T>(*itr);
+    o = std::atan2(y, x);
+    out.emplace_back(o);
+  }
+  // Generate multidimensional results corresponding to input
+  Tensor::CreateFromVector(out, TensorShape{output_shape}, &output_tensor);
+  *output = output_tensor;
+  return Status::OK();
+}
+
 /// \brief Perform a biquad filter of input tensor.
 /// \param input/output: Tensor of shape <...,time>
 /// \param a0: denominator coefficient of current output y[n], typically 1
@@ -138,6 +177,15 @@ Status LFilter(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *ou
   return Status::OK();
 }
 
+/// \brief Stretch STFT in time at a given rate, without changing the pitch.
+/// \param[in] input - Tensor of shape <...,freq,time>.
+/// \param[in] rate - Stretch factor.
+/// \param[in] phase_advance - Expected phase advance in each bin.
+/// \param[out] output - Tensor after stretch in time domain.
+/// \return Status return code
+Status TimeStretch(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, float rate, float hop_length,
+                   float n_freq);
+
 }  // namespace dataset
 }  // namespace mindspore
 #endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_AUDIO_UTILS_H_
diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/bandpass_biquad_op.cc b/mindspore/ccsrc/minddata/dataset/audio/kernels/bandpass_biquad_op.cc
index ab0fa546f3a..475485f0e1a 100755
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/bandpass_biquad_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/bandpass_biquad_op.cc
@@ -24,12 +24,12 @@ namespace dataset {
 Status BandpassBiquadOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
   IO_CHECK(input, output);
   TensorShape input_shape = input->shape();
-  CHECK_FAIL_RETURN_UNEXPECTED(input_shape.Size() > 0, "BandpassBiquad: input tensor is not in shape of <..., time>.");
+  CHECK_FAIL_RETURN_UNEXPECTED(input_shape.Size() > 0, "BandpassBiquad: inpute dimension should be greater than 0.");
   // check input type, it should be DE_FLOAT32 or DE_FLOAT16 or DE_FLOAT64
-  CHECK_FAIL_RETURN_UNEXPECTED(
-    input->type() == DataType(DataType::DE_FLOAT32) || input->type() == DataType(DataType::DE_FLOAT16) ||
-      input->type() == DataType(DataType::DE_FLOAT64),
-    "BandpassBiquad: input tensor type should be float, but got: " + input->type().ToString());
+  CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType(DataType::DE_FLOAT32) ||
+                                 input->type() == DataType(DataType::DE_FLOAT16) ||
+                                 input->type() == DataType(DataType::DE_FLOAT64),
+                               "BandpassBiquad: input type should be float, but got " + input->type().ToString());
   float w0 = 2 * PI * central_freq_ / sample_rate_;
   float alpha = sin(w0) / 2 / Q_;
   float temp;
@@ -46,16 +46,15 @@ Status BandpassBiquadOp::Compute(const std::shared_ptr<Tensor> &input, std::shar
   float a1 = (-2) * cos(w0);
   float a2 = 1 - alpha;
 
-  if (input->type() == DataType(DataType::DE_FLOAT32)) {
+  if (input->type() == DataType(DataType::DE_FLOAT32))
     return Biquad(input, output, static_cast<float>(b0), static_cast<float>(b1), static_cast<float>(b2),
                   static_cast<float>(a0), static_cast<float>(a1), static_cast<float>(a2));
-  } else if (input->type() == DataType(DataType::DE_FLOAT64)) {
+  else if (input->type() == DataType(DataType::DE_FLOAT64))
     return Biquad(input, output, static_cast<double>(b0), static_cast<double>(b1), static_cast<double>(b2),
                   static_cast<double>(a0), static_cast<double>(a1), static_cast<double>(a2));
-  } else {
+  else
     return Biquad(input, output, static_cast<float16>(b0), static_cast<float16>(b1), static_cast<float16>(b2),
                   static_cast<float16>(a0), static_cast<float16>(a1), static_cast<float16>(a2));
-  }
 }
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/bandpass_biquad_op.h b/mindspore/ccsrc/minddata/dataset/audio/kernels/bandpass_biquad_op.h
index dead035fbc4..0fb21441425 100755
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/bandpass_biquad_op.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/bandpass_biquad_op.h
@@ -17,8 +17,8 @@
 #define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_BANDPASS_BIQUAD_OP_H_
 
 #include <memory>
-#include <string>
 #include <vector>
+#include <string>
 
 #include "minddata/dataset/core/tensor.h"
 #include "minddata/dataset/kernels/tensor_op.h"
@@ -26,7 +26,6 @@
 
 namespace mindspore {
 namespace dataset {
-
 class BandpassBiquadOp : public TensorOp {
  public:
   BandpassBiquadOp(int32_t sample_rate, float central_freq, float Q, bool const_skirt_gain)
diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/bandreject_biquad_op.cc b/mindspore/ccsrc/minddata/dataset/audio/kernels/bandreject_biquad_op.cc
index 0e9244af2b1..d321cbf6d52 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/bandreject_biquad_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/bandreject_biquad_op.cc
@@ -20,17 +20,15 @@
 
 namespace mindspore {
 namespace dataset {
-
 Status BandrejectBiquadOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
   IO_CHECK(input, output);
   // check input type and input shape
   TensorShape input_shape = input->shape();
-  CHECK_FAIL_RETURN_UNEXPECTED(input_shape.Size() > 0,
-                               "BandrejectBiquad: input tensor is not in shape of <..., time>.");
-  CHECK_FAIL_RETURN_UNEXPECTED(
-    input->type() == DataType(DataType::DE_FLOAT32) || input->type() == DataType(DataType::DE_FLOAT16) ||
-      input->type() == DataType(DataType::DE_FLOAT64),
-    "BandrejectBiquad: input tensor type should be float, but got: " + input->type().ToString());
+  CHECK_FAIL_RETURN_UNEXPECTED(input_shape.Size() > 0, "BandrejectBiquad: input dimension should be greater than 0.");
+  CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType(DataType::DE_FLOAT32) ||
+                                 input->type() == DataType(DataType::DE_FLOAT16) ||
+                                 input->type() == DataType(DataType::DE_FLOAT64),
+                               "BandrejectBiquad: input type should be float, but got " + input->type().ToString());
   double w0 = 2 * PI * central_freq_ / sample_rate_;
   double alpha = sin(w0) / 2 / Q_;
   double b0 = 1;
@@ -39,16 +37,15 @@ Status BandrejectBiquadOp::Compute(const std::shared_ptr<Tensor> &input, std::sh
   double a0 = 1 + alpha;
   double a1 = b1;
   double a2 = 1 - alpha;
-  if (input->type() == DataType(DataType::DE_FLOAT32)) {
+  if (input->type() == DataType(DataType::DE_FLOAT32))
     return Biquad(input, output, static_cast<float>(b0), static_cast<float>(b1), static_cast<float>(b2),
                   static_cast<float>(a0), static_cast<float>(a1), static_cast<float>(a2));
-  } else if (input->type() == DataType(DataType::DE_FLOAT64)) {
+  else if (input->type() == DataType(DataType::DE_FLOAT64))
     return Biquad(input, output, static_cast<double>(b0), static_cast<double>(b1), static_cast<double>(b2),
                   static_cast<double>(a0), static_cast<double>(a1), static_cast<double>(a2));
-  } else {
+  else
     return Biquad(input, output, static_cast<float16>(b0), static_cast<float16>(b1), static_cast<float16>(b2),
                   static_cast<float16>(a0), static_cast<float16>(a1), static_cast<float16>(a2));
-  }
 }
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/bandreject_biquad_op.h b/mindspore/ccsrc/minddata/dataset/audio/kernels/bandreject_biquad_op.h
index e59d0cf3220..3b42a6ccb82 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/bandreject_biquad_op.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/bandreject_biquad_op.h
@@ -26,7 +26,6 @@
 
 namespace mindspore {
 namespace dataset {
-
 class BandrejectBiquadOp : public TensorOp {
  public:
   BandrejectBiquadOp(int32_t sample_rate, float central_freq, float Q)
diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/bass_biquad_op.cc b/mindspore/ccsrc/minddata/dataset/audio/kernels/bass_biquad_op.cc
index d05a7ff2471..71799b17852 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/bass_biquad_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/bass_biquad_op.cc
@@ -24,12 +24,12 @@ namespace dataset {
 Status BassBiquadOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
   IO_CHECK(input, output);
   TensorShape input_shape = input->shape();
-  CHECK_FAIL_RETURN_UNEXPECTED(input_shape.Size() > 0, "BassBiquad: input tensor is not in shape of <..., time>.");
+  CHECK_FAIL_RETURN_UNEXPECTED(input_shape.Size() > 0, "BassBiquad: input dimension should be greater than 0.");
   // check input type, it should be DE_FLOAT32 or DE_FLOAT16 or DE_FLOAT64
   CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType(DataType::DE_FLOAT32) ||
                                  input->type() == DataType(DataType::DE_FLOAT16) ||
                                  input->type() == DataType(DataType::DE_FLOAT64),
-                               "BassBiquad: input tensor type should be float, but got: " + input->type().ToString());
+                               "BassBiquad: input type should be float, but got " + input->type().ToString());
 
   double w0 = 2 * PI * central_freq_ / sample_rate_;
   double alpha = sin(w0) / 2 / Q_;
@@ -45,18 +45,17 @@ Status BassBiquadOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_p
   double a0 = (A + 1) + temp2 + temp1;
   double a1 = -2 * ((A - 1) + temp3);
   double a2 = (A + 1) + temp2 - temp1;
-  if (input->type() == DataType(DataType::DE_FLOAT32)) {
+  if (input->type() == DataType(DataType::DE_FLOAT32))
     return Biquad(input, output, static_cast<float>(b0 / a0), static_cast<float>(b1 / a0), static_cast<float>(b2 / a0),
                   static_cast<float>(1.0), static_cast<float>(a1 / a0), static_cast<float>(a2 / a0));
-  } else if (input->type() == DataType(DataType::DE_FLOAT64)) {
+  else if (input->type() == DataType(DataType::DE_FLOAT64))
     return Biquad(input, output, static_cast<double>(b0 / a0), static_cast<double>(b1 / a0),
                   static_cast<double>(b2 / a0), static_cast<double>(1.0), static_cast<double>(a1 / a0),
                   static_cast<double>(a2 / a0));
-  } else {
+  else
     return Biquad(input, output, static_cast<float16>(b0 / a0), static_cast<float16>(b1 / a0),
                   static_cast<float16>(b2 / a0), static_cast<float16>(1.0), static_cast<float16>(a1 / a0),
                   static_cast<float16>(a2 / a0));
-  }
 }
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/bass_biquad_op.h b/mindspore/ccsrc/minddata/dataset/audio/kernels/bass_biquad_op.h
index 68552c1bb80..2aa31f2428c 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/bass_biquad_op.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/bass_biquad_op.h
@@ -27,7 +27,6 @@
 
 namespace mindspore {
 namespace dataset {
-
 class BassBiquadOp : public TensorOp {
  public:
   BassBiquadOp(int32_t sample_rate, float gain, float central_freq, float Q)
@@ -36,7 +35,7 @@ class BassBiquadOp : public TensorOp {
   ~BassBiquadOp() override = default;
 
   void Print(std::ostream &out) const override {
-    out << Name() << ": sample_rate: " << sample_rate_ << ", gain: " << gain_ << ", central_freq: " << central_freq_
+    out << Name() << ": sample_rate: " << sample_rate_ << ", gain:" << gain_ << ", central_freq: " << central_freq_
         << ", Q: " << Q_ << std::endl;
   }
 
diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/time_stretch_op.cc b/mindspore/ccsrc/minddata/dataset/audio/kernels/time_stretch_op.cc
index 05a14891b00..0f990348ff7 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/time_stretch_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/time_stretch_op.cc
@@ -33,8 +33,15 @@ Status TimeStretchOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_
   IO_CHECK(input, output);
 
   // check shape
-  if (input->shape().Rank() < 3 || !input->IsComplex()) {
-    std::string err_msg = "TimeStretch: input tensor is not in shape of <..., freq, num_frame, complex=2>.";
+  if (input->shape().Rank() < 3) {
+    std::string err_msg = "TimeStretch: input tensor shape is not <..., freq, num_frame, complex=2>.";
+    MS_LOG(ERROR) << err_msg;
+    RETURN_STATUS_SYNTAX_ERROR(err_msg);
+  }
+
+  // check complex
+  if (!input->IsComplex()) {
+    std::string err_msg = "TimeStretch: input tensor is not in shape of <..., 2>.";
     MS_LOG(ERROR) << err_msg;
     RETURN_STATUS_SYNTAX_ERROR(err_msg);
   }
@@ -44,7 +51,7 @@ Status TimeStretchOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_
   float hop_length = std::isnan(hop_length_) ? (n_freq_ - 1) : hop_length_;
   // typecast
   CHECK_FAIL_RETURN_UNEXPECTED(input->type() != DataType::DE_STRING,
-                               "TimeStretch: input tensor type should be int, float or double, but got: string.");
+                               "TimeStretch: input tensor type should be [int, float, double], but got string.");
   if (input->type() != DataType::DE_FLOAT64) {
     RETURN_IF_NOT_OK(TypeCast(input, &input_tensor, DataType(DataType::DE_FLOAT32)));
   } else {
diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/time_stretch_op.h b/mindspore/ccsrc/minddata/dataset/audio/kernels/time_stretch_op.h
index 5a7b1dd9d59..d1a119bf8c0 100644
--- a/mindspore/ccsrc/minddata/dataset/audio/kernels/time_stretch_op.h
+++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/time_stretch_op.h
@@ -27,7 +27,6 @@
 
 namespace mindspore {
 namespace dataset {
-
 class TimeStretchOp : public TensorOp {
  public:
   /// Default value
@@ -44,6 +43,9 @@ class TimeStretchOp : public TensorOp {
 
   std::string Name() const override { return kTimeStretchOp; }
 
+  /// \param[in] inputs
+  /// \param[out] outputs
+  /// \return  Status code
   Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
 
  private:
diff --git a/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.cc b/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.cc
index 85e1177906a..8838c06efd2 100644
--- a/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.cc
+++ b/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.cc
@@ -61,27 +61,27 @@ Status PyDSCallback::ExecutePyfunc(py::function f, const CallbackParam &cb_param
   }
   return Status::OK();
 }
-void PyDSCallback::setBegin(py::function f) {
+void PyDSCallback::setBegin(const py::function &f) {
   begin_func_ = f;
   begin_needed_ = true;
 }
-void PyDSCallback::setEnd(py::function f) {
+void PyDSCallback::setEnd(const py::function &f) {
   end_func_ = f;
   end_needed_ = true;
 }
-void PyDSCallback::setEpochBegin(py::function f) {
+void PyDSCallback::setEpochBegin(const py::function &f) {
   epoch_begin_func_ = f;
   epoch_begin_needed_ = true;
 }
-void PyDSCallback::setEpochEnd(py::function f) {
+void PyDSCallback::setEpochEnd(const py::function &f) {
   epoch_end_func_ = f;
   epoch_end_needed_ = true;
 }
-void PyDSCallback::setStepBegin(py::function f) {
+void PyDSCallback::setStepBegin(const py::function &f) {
   step_begin_func_ = f;
   step_begin_needed_ = true;
 }
-void PyDSCallback::setStepEnd(py::function f) {
+void PyDSCallback::setStepEnd(const py::function &f) {
   step_end_func_ = f;
   step_end_needed_ = true;
 }
diff --git a/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.h b/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.h
index dcc57415014..d3782d51542 100644
--- a/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.h
+++ b/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.h
@@ -44,12 +44,12 @@ class PyDSCallback : public DSCallback {
 
   ~PyDSCallback() = default;
 
-  void setBegin(py::function f);
-  void setEnd(py::function f);
-  void setEpochBegin(py::function f);
-  void setEpochEnd(py::function f);
-  void setStepBegin(py::function f);
-  void setStepEnd(py::function f);
+  void setBegin(const py::function &f);
+  void setEnd(const py::function &f);
+  void setEpochBegin(const py::function &f);
+  void setEpochEnd(const py::function &f);
+  void setStepBegin(const py::function &f);
+  void setStepEnd(const py::function &f);
 
   /// \brief actual callback function for begin, needs to be overridden in the derived class
   /// \param cb_param, callback parameter passed in from DatasetOp when calling the callback
diff --git a/mindspore/ccsrc/minddata/dataset/core/cv_tensor.cc b/mindspore/ccsrc/minddata/dataset/core/cv_tensor.cc
index 0f33d499155..052a585eb1d 100644
--- a/mindspore/ccsrc/minddata/dataset/core/cv_tensor.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/cv_tensor.cc
@@ -40,12 +40,21 @@ Status CVTensor::CreateEmpty(const TensorShape &shape, DataType type, CVTensorPt
   return (*out)->MatInit((*out)->GetMutableBuffer(), (*out)->shape_, (*out)->type_, &(*out)->mat_);
 }
 
-Status CVTensor::CreateFromMat(const cv::Mat &mat, CVTensorPtr *out) {
+Status CVTensor::CreateFromMat(const cv::Mat &mat, const dsize_t rank, CVTensorPtr *out) {
   TensorPtr out_tensor;
   cv::Mat mat_local = mat;
   // if the input Mat's memory is not continuous, copy it to one block of memory
-  if (!mat.isContinuous()) mat_local = mat.clone();
-  TensorShape shape(mat.size, mat_local.type());
+  if (!mat.isContinuous()) {
+    mat_local = mat.clone();
+  }
+  TensorShape shape({});
+  if (mat.dims == 2 && rank == 2) {
+    shape = TensorShape({mat.rows, mat.cols});
+  } else if (mat.dims == 2 && rank == 3) {
+    shape = TensorShape({mat.rows, mat.cols, mat.channels()});
+  } else {
+    RETURN_STATUS_UNEXPECTED("Error in creating CVTensor: Invalid input rank or cv::mat dimension.");
+  }
   DataType type = DataType::FromCVType(mat_local.type());
   RETURN_IF_NOT_OK(CreateFromMemory(shape, type, mat_local.data, &out_tensor));
   *out = AsCVTensor(out_tensor);
@@ -55,14 +64,13 @@ Status CVTensor::CreateFromMat(const cv::Mat &mat, CVTensorPtr *out) {
 std::pair<std::array<int, 2>, int> CVTensor::IsValidImage(const TensorShape &shape, const DataType &type) {
   std::array<int, 2> size = {1, 1};
   if (shape.Rank() <= 2 || (shape.Rank() == 3 && shape[2] <= CV_CN_MAX)) {
-    uint8_t ch = 1;
+    uint16_t ch = 1;
     if (shape.Rank() == 3) {
-      ch = static_cast<uint8_t>(shape[2]);
+      ch = static_cast<uint16_t>(shape[2]);
     }
     if (shape.Rank() > 0) size[0] = static_cast<int>(shape[0]);
     if (shape.Rank() > 1) size[1] = static_cast<int>(shape[1]);
     if (type.AsCVType() == kCVInvalidType) return std::make_pair(size, -1);
-
     int cv_type = CV_MAKETYPE(type.AsCVType(), ch);
     return std::make_pair(size, cv_type);
   }
diff --git a/mindspore/ccsrc/minddata/dataset/core/cv_tensor.h b/mindspore/ccsrc/minddata/dataset/core/cv_tensor.h
index 1c10a7066f6..80b125997f6 100644
--- a/mindspore/ccsrc/minddata/dataset/core/cv_tensor.h
+++ b/mindspore/ccsrc/minddata/dataset/core/cv_tensor.h
@@ -53,9 +53,10 @@ class CVTensor : public Tensor {
   /// Create CV tensor from cv::Mat
   /// \note This constructor allocates a new space in the memory and copies the CV::Mat buffer into it.
   /// \param mat [in] cv::Mat to be copied into the new tensor.
+  /// \param shape [in] the rank of output CVTensor.
   /// \param out [out] Generated tensor
   /// \return Status code
-  static Status CreateFromMat(const cv::Mat &mat, CVTensorPtr *out);
+  static Status CreateFromMat(const cv::Mat &mat, const dsize_t rank, CVTensorPtr *out);
 
   ~CVTensor() override = default;
 
diff --git a/mindspore/ccsrc/minddata/dataset/core/data_type.cc b/mindspore/ccsrc/minddata/dataset/core/data_type.cc
index 0e03a7d3270..71c519c2387 100644
--- a/mindspore/ccsrc/minddata/dataset/core/data_type.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/data_type.cc
@@ -61,7 +61,7 @@ uint8_t DataType::AsCVType() const {
   }
 
   return res;
-}  // namespace dataset
+}
 
 DataType DataType::FromCVType(int cv_type) {
   auto depth = static_cast<uchar>(cv_type) & static_cast<uchar>(CV_MAT_DEPTH_MASK);
diff --git a/mindspore/ccsrc/minddata/dataset/core/de_tensor.cc b/mindspore/ccsrc/minddata/dataset/core/de_tensor.cc
index ee5768917f7..957e4c763cd 100644
--- a/mindspore/ccsrc/minddata/dataset/core/de_tensor.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/de_tensor.cc
@@ -76,7 +76,7 @@ size_t DETensor::DataSize() const {
   }
 #endif
   EXCEPTION_IF_NULL(tensor_impl_);
-  return static_cast<uint32_t>(tensor_impl_->SizeInBytes());
+  return static_cast<size_t>(tensor_impl_->SizeInBytes());
 }
 
 const std::vector<int64_t> &DETensor::Shape() const { return shape_; }
diff --git a/mindspore/ccsrc/minddata/dataset/core/tensor.cc b/mindspore/ccsrc/minddata/dataset/core/tensor.cc
index c03c78ad8d2..315ce87ed84 100644
--- a/mindspore/ccsrc/minddata/dataset/core/tensor.cc
+++ b/mindspore/ccsrc/minddata/dataset/core/tensor.cc
@@ -263,10 +263,10 @@ Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr<Tensor> *
   }
   std::ifstream fs;
   fs.open(path, std::ios::binary | std::ios::in);
-  CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "Fail to open file: " + path);
+  CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "Failed to open file: " + path);
   int64_t num_bytes = fs.seekg(0, std::ios::end).tellg();
   CHECK_FAIL_RETURN_UNEXPECTED(num_bytes <= kDeMaxDim, "Invalid file to allocate tensor memory, check path: " + path);
-  CHECK_FAIL_RETURN_UNEXPECTED(fs.seekg(0, std::ios::beg).good(), "Fail to find size of file, check path: " + path);
+  CHECK_FAIL_RETURN_UNEXPECTED(fs.seekg(0, std::ios::beg).good(), "Failed to find size of file, check path: " + path);
   RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape{num_bytes}, DataType(DataType::DE_UINT8), out));
   int64_t written_bytes = fs.read(reinterpret_cast<char *>((*out)->GetMutableBuffer()), num_bytes).gcount();
   CHECK_FAIL_RETURN_UNEXPECTED(written_bytes == num_bytes && fs.good(),
@@ -508,7 +508,9 @@ Status Tensor::GetItemPtr(uchar **ptr, const std::vector<dsize_t> &index, offset
     RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &flat_idx));
     offset_t length_temp = 0;
     RETURN_IF_NOT_OK(GetStringAt(flat_idx, ptr, &length_temp));
-    if (length != nullptr) *length = length_temp;
+    if (length != nullptr) {
+      *length = length_temp;
+    }
     return Status::OK();
   } else {
     std::string err = "data type not compatible";
@@ -626,15 +628,97 @@ Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) {
 
 Status Tensor::to_json(nlohmann::json *out_json) {
   nlohmann::json args;
-  args["shape"] = shape_.ToString();
+  args["shape"] = shape_.AsVector();
   args["type"] = type_.ToString();
-  std::stringstream ss;
-  this->PrintData(ss);
-  args["data"] = ss.str();
+  if (type_ == DataType::DE_BOOL) {
+    RETURN_IF_NOT_OK(to_json_convert<bool>(&args));
+  } else if (type_ == DataType::DE_INT8) {
+    RETURN_IF_NOT_OK(to_json_convert<int8_t>(&args));
+  } else if (type_ == DataType::DE_INT16) {
+    RETURN_IF_NOT_OK(to_json_convert<int16_t>(&args));
+  } else if (type_ == DataType::DE_INT32) {
+    RETURN_IF_NOT_OK(to_json_convert<int32_t>(&args));
+  } else if (type_ == DataType::DE_INT64) {
+    RETURN_IF_NOT_OK(to_json_convert<int64_t>(&args));
+  } else if (type_ == DataType::DE_UINT8) {
+    RETURN_IF_NOT_OK(to_json_convert<uint8_t>(&args));
+  } else if (type_ == DataType::DE_UINT16) {
+    RETURN_IF_NOT_OK(to_json_convert<uint16_t>(&args));
+  } else if (type_ == DataType::DE_UINT32) {
+    RETURN_IF_NOT_OK(to_json_convert<uint32_t>(&args));
+  } else if (type_ == DataType::DE_UINT64) {
+    RETURN_IF_NOT_OK(to_json_convert<uint64_t>(&args));
+  } else if (type_ == DataType::DE_FLOAT32) {
+    RETURN_IF_NOT_OK(to_json_convert<float>(&args));
+  } else if (type_ == DataType::DE_FLOAT64) {
+    RETURN_IF_NOT_OK(to_json_convert<double>(&args));
+  } else if (type_ == DataType::DE_STRING) {
+    std::vector<std::string> data_out;
+    for (auto it = this->begin<std::string_view>(); it != this->end<std::string_view>(); it++) {
+      data_out.emplace_back(*it);
+    }
+    args["data"] = data_out;
+  } else {
+    return Status(StatusCode::kMDUnexpectedError, "Type is not supported for tensor");
+  }
   *out_json = args;
   return Status::OK();
 }
 
+template <typename T>
+Status Tensor::to_json_convert(nlohmann::json *args) {
+  std::vector<T> data_out;
+  for (auto it = this->begin<T>(); it != this->end<T>(); it++) {
+    data_out.emplace_back(*it);
+  }
+  (*args)["data"] = data_out;
+  return Status::OK();
+}
+
+Status Tensor::from_json(nlohmann::json op_params, std::shared_ptr<Tensor> *tensor) {
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("shape") != op_params.end(), "Failed to find shape");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("type") != op_params.end(), "Failed to find type");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("data") != op_params.end(), "Failed to find data");
+  std::string type = op_params["type"];
+  std::vector<dsize_t> list = op_params["shape"];
+  TensorShape shape = TensorShape(list);
+  if (type == "bool") {
+    RETURN_IF_NOT_OK(from_json_convert<bool>(op_params["data"], shape, tensor));
+  } else if (type == "int8") {
+    RETURN_IF_NOT_OK(from_json_convert<int8_t>(op_params["data"], shape, tensor));
+  } else if (type == "int16") {
+    RETURN_IF_NOT_OK(from_json_convert<int16_t>(op_params["data"], shape, tensor));
+  } else if (type == "int32") {
+    RETURN_IF_NOT_OK(from_json_convert<int32_t>(op_params["data"], shape, tensor));
+  } else if (type == "int64") {
+    RETURN_IF_NOT_OK(from_json_convert<int64_t>(op_params["data"], shape, tensor));
+  } else if (type == "uint8") {
+    RETURN_IF_NOT_OK(from_json_convert<uint8_t>(op_params["data"], shape, tensor));
+  } else if (type == "uint16") {
+    RETURN_IF_NOT_OK(from_json_convert<uint16_t>(op_params["data"], shape, tensor));
+  } else if (type == "uint32") {
+    RETURN_IF_NOT_OK(from_json_convert<uint32_t>(op_params["data"], shape, tensor));
+  } else if (type == "uint64") {
+    RETURN_IF_NOT_OK(from_json_convert<uint64_t>(op_params["data"], shape, tensor));
+  } else if (type == "float32") {
+    RETURN_IF_NOT_OK(from_json_convert<float>(op_params["data"], shape, tensor));
+  } else if (type == "float64") {
+    RETURN_IF_NOT_OK(from_json_convert<double>(op_params["data"], shape, tensor));
+  } else if (type == "string") {
+    RETURN_IF_NOT_OK(from_json_convert<std::string>(op_params["data"], shape, tensor));
+  } else {
+    return Status(StatusCode::kMDUnexpectedError, "Type is not supported for tensor");
+  }
+  return Status::OK();
+}
+
+template <typename T>
+Status Tensor::from_json_convert(nlohmann::json json_data, TensorShape shape, std::shared_ptr<Tensor> *tensor) {
+  std::vector<T> data = json_data;
+  RETURN_IF_NOT_OK(CreateFromVector(data, shape, tensor));
+  return Status::OK();
+}
+
 template <typename T>
 Status Tensor::GetItemAt(T *o, const std::vector<dsize_t> &index) const {
   if (data_ == nullptr) {
diff --git a/mindspore/ccsrc/minddata/dataset/core/tensor.h b/mindspore/ccsrc/minddata/dataset/core/tensor.h
index 50ed6f6a2e7..cc011232fde 100644
--- a/mindspore/ccsrc/minddata/dataset/core/tensor.h
+++ b/mindspore/ccsrc/minddata/dataset/core/tensor.h
@@ -68,7 +68,7 @@ class Tensor {
   Tensor(const Tensor &other) = delete;
   Tensor &operator=(const Tensor &other) = delete;
 
-  /// Create a tensor using shape and type. This constructor should not be used directly, use CreateFromTensor instead
+  /// Create a tensor using shape and type. This constructor should not be used directly, use CreateFromTensor instead.
   /// \note The shape and type information should be known and valid
   /// \note The constructor does not allocate data
   /// \param shape TensorShape
@@ -219,6 +219,14 @@ class Tensor {
 
   Status to_json(nlohmann::json *out_json);
 
+  template <typename T>
+  Status to_json_convert(nlohmann::json *args);
+
+  static Status from_json(nlohmann::json op_params, std::shared_ptr<Tensor> *tensor);
+
+  template <typename T>
+  static Status from_json_convert(nlohmann::json json_data, TensorShape shape, std::shared_ptr<Tensor> *tensor);
+
   /// Get item located at `index`, caller needs to provide the type.
   /// \tparam T
   /// \param[in] index vector<dsize_t>
@@ -306,6 +314,13 @@ class Tensor {
   /// \return bool - true if tensor is not empty
   bool HasData() const { return data_ != nullptr; }
 
+  /// Check if tensor is complex
+  /// \return bool - true if tensor is complex
+  bool IsComplex() const {
+    // check the last dim all be 2
+    return shape_[-1] == 2;
+  }
+
   /// Reshape the tensor. The given shape should have the same number of elements in the Tensor
   /// \param shape
   virtual Status Reshape(const TensorShape &shape);
diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_grpc_client.cc b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_grpc_client.cc
index e1ce544f08c..428192bd785 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_grpc_client.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_grpc_client.cc
@@ -73,7 +73,7 @@ Status CacheClientGreeter::DoServiceStop() {
     void *tag;
     while (cq_.Next(&tag, &success)) {
       auto r = reinterpret_cast<CacheClientRequestTag *>(tag);
-      req_.erase(r->seqNo_);
+      (void)req_.erase(r->seqNo_);
     }
   }
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc b/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc
index 33cfa15d334..c99ffdaf733 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc
@@ -251,7 +251,7 @@ Status SaveToDisk::Save() {
   auto mr_writer = std::make_unique<mindrecord::ShardWriter>();
   std::vector<std::string> blob_fields;
   if (mindrecord::SUCCESS != mindrecord::ShardWriter::initialize(&mr_writer, file_names)) {
-    RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardWriter.");
+    RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardWriter, please check above `ERROR` level message.");
   }
 
   std::unordered_map<std::string, int32_t> column_name_id_map;
diff --git a/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc b/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc
index 136c331db31..849b903cdb8 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc
@@ -408,7 +408,7 @@ void DataSchema::Print(std::ostream &out) const {
 // Adds a column descriptor to the schema
 Status DataSchema::AddColumn(const ColDescriptor &cd) {
   // Sanity check there's not a duplicate name before adding the column
-  for (int32_t i = 0; i < col_descs_.size(); ++i) {
+  for (auto i = 0; i < col_descs_.size(); ++i) {
     if (col_descs_[i].name() == cd.name()) {
       std::ostringstream ss;
       ss << "column name '" << cd.name() << "' already exists in schema.";
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc
index a2ec25124d9..1d45a0437fc 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc
@@ -15,6 +15,7 @@
  */
 #include "minddata/dataset/engine/datasetops/rename_op.h"
 
+#include <set>
 #include <vector>
 #include <unordered_map>
 
@@ -52,6 +53,7 @@ Status RenameOp::ComputeColMap() {
     std::unordered_map<std::string, int32_t> new_col_name_id_map = {};
     // parameter for input check
     size_t found = 0;
+    std::set<std::string> new_col_name;
 
     // iterate over all the pairs and if there is a name match with rename, rename the column and add it to new map
     // by doing it this way we recreate a new ColNameIdMap and allow for switching
@@ -67,12 +69,27 @@ Status RenameOp::ComputeColMap() {
         found += 1;
         int index = std::distance(in_columns_.begin(), it);
         MS_LOG(DEBUG) << "Rename operator index found " << index << " value " << id << ".";
-
+        if (new_col_name.find(out_columns_[index]) != new_col_name.end()) {
+          std::string err_msg(
+            "rename operation does not support rename one column name into another already exist column name, existed"
+            " column name is: " +
+            out_columns_[index] + ".");
+          RETURN_STATUS_UNEXPECTED(err_msg);
+        }
         new_col_name_id_map[out_columns_[index]] = id;
+        new_col_name.insert(out_columns_[index]);
       } else {
         // not found
+        if (new_col_name.find(name) != new_col_name.end()) {
+          std::string err_msg(
+            "rename operation does not support rename one column name into another already exist column name, existed"
+            " column name is: " +
+            name + ".");
+          RETURN_STATUS_UNEXPECTED(err_msg);
+        }
         MS_LOG(DEBUG) << "Rename operator index not found: " << id << " is the column id.";
         new_col_name_id_map[name] = id;
+        new_col_name.insert(name);
       }
     }
     // only checks number of renamed columns have been found, this input check doesn't check everything
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt
index 7b882e83558..767cff8c4d2 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt
@@ -16,6 +16,7 @@ set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
     album_op.cc
     mappable_leaf_op.cc
     nonmappable_leaf_op.cc
+    flickr_op.cc
     )
 
 set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc
index d11a5a7eb8f..3c8af4dd067 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc
@@ -118,7 +118,7 @@ bool AlbumOp::CheckImageType(const std::string &file_name, bool *valid) {
   return true;
 }
 
-Status AlbumOp::LoadImageTensor(const std::string &image_file_path, uint32_t col_num, TensorRow *row) {
+Status AlbumOp::LoadImageTensor(const std::string &image_file_path, int32_t col_num, TensorRow *row) {
   TensorPtr image;
   std::ifstream fs;
   fs.open(image_file_path, std::ios::binary | std::ios::in);
@@ -168,7 +168,7 @@ Status AlbumOp::LoadImageTensor(const std::string &image_file_path, uint32_t col
   return Status::OK();
 }
 
-Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) {
+Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) {
   std::vector<std::string> data = json_obj;
 
   MS_LOG(INFO) << "String array label found: " << data << ".";
@@ -178,7 +178,7 @@ Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t c
   return Status::OK();
 }
 
-Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) {
+Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) {
   std::string data = json_obj;
   // now we iterate over the elements in json
 
@@ -189,7 +189,7 @@ Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_nu
   return Status::OK();
 }
 
-Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) {
+Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) {
   TensorPtr label;
   // consider templating this function to handle all ints
   if (data_schema_->column(col_num).type() == DataType::DE_INT64) {
@@ -218,7 +218,7 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_
   return Status::OK();
 }
 
-Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) {
+Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) {
   TensorPtr float_array;
   // consider templating this function to handle all ints
   if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) {
@@ -247,7 +247,7 @@ Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t co
   return Status::OK();
 }
 
-Status AlbumOp::LoadIDTensor(const std::string &file, uint32_t col_num, TensorRow *row) {
+Status AlbumOp::LoadIDTensor(const std::string &file, int32_t col_num, TensorRow *row) {
   if (data_schema_->column(col_num).type() == DataType::DE_STRING) {
     TensorPtr id;
     RETURN_IF_NOT_OK(Tensor::CreateScalar<std::string>(file, &id));
@@ -263,7 +263,7 @@ Status AlbumOp::LoadIDTensor(const std::string &file, uint32_t col_num, TensorRo
   return Status::OK();
 }
 
-Status AlbumOp::LoadEmptyTensor(uint32_t col_num, TensorRow *row) {
+Status AlbumOp::LoadEmptyTensor(int32_t col_num, TensorRow *row) {
   // hack to get the file name without extension, the 1 is to get rid of the backslash character
   TensorPtr empty_tensor;
   RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->column(col_num).type(), &empty_tensor));
@@ -275,7 +275,7 @@ Status AlbumOp::LoadEmptyTensor(uint32_t col_num, TensorRow *row) {
 // So we actually have to check what type we want to fill the tensor with.
 // Float64 doesn't work with reinterpret cast here. Otherwise we limit the float in the schema to
 // only be float32, seems like a weird limitation to impose
-Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) {
+Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) {
   TensorPtr float_tensor;
   if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) {
     double data = json_obj;
@@ -291,7 +291,7 @@ Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num
 }
 
 // Loads a tensor with int value, we have to cast the value to type specified in the schema.
-Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) {
+Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) {
   TensorPtr int_tensor;
   if (data_schema_->column(col_num).type() == DataType::DE_INT64) {
     int64_t data = json_obj;
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h
index 8c8b3e9fd72..f069c7bdbcf 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h
@@ -88,62 +88,62 @@ class AlbumOp : public MappableLeafOp {
   /// \param[in] col_num Column num in schema
   /// \param[in, out] row Tensor row to push to
   /// \return Status The status code returned
-  Status LoadImageTensor(const std::string &image_file, uint32_t col_num, TensorRow *row);
+  Status LoadImageTensor(const std::string &image_file, int32_t col_num, TensorRow *row);
 
   /// \brief Load vector of ints to tensor, append tensor to tensor row
   /// \param[in] json_obj Json object containing multi-dimensional label
   /// \param[in] col_num Column num in schema
   /// \param[in, out] row Tensor row to push to
   /// \return Status The status code returned
-  Status LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
+  Status LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row);
 
   /// \brief Load vector of floatss to tensor, append tensor to tensor row
   /// \param[in] json_obj Json object containing array data
   /// \param[in] col_num Column num in schema
   /// \param[in, out] row Tensor row to push to
   /// \return Status The status code returned
-  Status LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
+  Status LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row);
 
   /// \brief Load string array into a tensor, append tensor to tensor row
   /// \param[in] json_obj Json object containing string tensor
   /// \param[in] col_num Column num in schema
   /// \param[in, out] row Tensor row to push to
   /// \return Status The status code returned
-  Status LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
+  Status LoadStringArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row);
 
   /// \brief Load string into a tensor, append tensor to tensor row
   /// \param[in] json_obj Json object containing string tensor
   /// \param[in] col_num Column num in schema
   /// \param[in, out] row Tensor row to push to
   /// \return Status The status code returned
-  Status LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
+  Status LoadStringTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row);
 
   /// \brief Load float value to tensor row
   /// \param[in] json_obj Json object containing float
   /// \param[in] col_num Column num in schema
   /// \param[in, out] row Tensor row to push to
   /// \return Status The status code returned
-  Status LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
+  Status LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row);
 
   /// \brief Load int value to tensor row
   /// \param[in] json_obj Json object containing int
   /// \param[in] col_num Column num in schema
   /// \param[in, out] row Tensor row to push to
   /// \return Status The status code returned
-  Status LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
+  Status LoadIntTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row);
 
   /// \brief Load empty tensor to tensor row
   /// \param[in] col_num Column num in schema
   /// \param[in, out] row Tensor row to push to
   /// \return Status The status code returned
-  Status LoadEmptyTensor(uint32_t col_num, TensorRow *row);
+  Status LoadEmptyTensor(int32_t col_num, TensorRow *row);
 
   /// \brief Load id from file name to tensor row
   /// \param[in] file The file name to get ID from
   /// \param[in] col_num Column num in schema
   /// \param[in, out] row Tensor row to push to
   /// \return Status The status code returned
-  Status LoadIDTensor(const std::string &file, uint32_t col_num, TensorRow *row);
+  Status LoadIDTensor(const std::string &file, int32_t col_num, TensorRow *row);
 
   /// \brief Load a tensor row according to a json file
   /// \param[in] row_id_type row_id - id for this tensor row
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc
index 38dd454328e..6b865917ed4 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc
@@ -368,7 +368,7 @@ Status CifarOp::CountTotalRows(const std::string &dir, const std::string &usage,
 Status CifarOp::ComputeColMap() {
   // set the column name map (base class field)
   if (column_name_id_map_.empty()) {
-    for (uint32_t i = 0; i < data_schema_->NumColumns(); ++i) {
+    for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
       column_name_id_map_[data_schema_->column(i).name()] = i;
     }
   } else {
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cmu_arctic_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cmu_arctic_op.cc
new file mode 100644
index 00000000000..ee7f872b590
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cmu_arctic_op.cc
@@ -0,0 +1,254 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "minddata/dataset/engine/datasetops/source/cmu_arctic_op.h"
+
+#include <fstream>
+#include <iomanip>
+#include <set>
+#include "utils/ms_utils.h"
+#include "minddata/dataset/core/config_manager.h"
+#include "minddata/dataset/core/tensor_shape.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/db_connector.h"
+#include "minddata/dataset/engine/execution_tree.h"
+
+namespace mindspore {
+namespace dataset {
+
+const size_t kWavHandSize=44;
+const size_t kReadbufferSize=20480;
+const std::string dataDirectory = "wav";
+const std::string labelDirectory = "etc";
+const std::string labelFileName = "txt.done.data";
+
+const std::string pre="cmu_us_";
+const std::string suf="_arctic";
+
+CmuArcticOp::CmuArcticOp(const std::string &usage, int32_t num_workers, std::string folder_path, int32_t queue_size,
+             std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler)
+    : MappableLeafOp(num_workers, queue_size, std::move(sampler)),
+      usage_(usage),
+      folder_path_(folder_path),
+      data_schema_(std::move(data_schema)) {
+  io_block_queues_.Init(num_workers, queue_size);
+}
+
+Status CmuArcticOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
+  CmuArcticLabelTuple audio_tuple = audio_label_tuple_[row_id];
+  std::shared_ptr <Tensor> waveform, rate, utterance, utterance_id;
+  RETURN_IF_NOT_OK(Tensor::CreateFromTensor(audio_tuple.waveform, &waveform));
+  RETURN_IF_NOT_OK(Tensor::CreateScalar(audio_tuple.sample_rate, &rate));
+  RETURN_IF_NOT_OK(Tensor::CreateScalar(audio_tuple.utterance, &utterance));
+  RETURN_IF_NOT_OK(Tensor::CreateScalar(audio_tuple.utterance_id, &utterance_id));
+  (*trow) = TensorRow(row_id, {std::move(waveform), std::move(rate), std::move(utterance), std::move(utterance_id)});
+  trow->setPath({audio_names_[row_id].first});
+  return Status::OK();
+}
+
+void CmuArcticOp::Print(std::ostream &out, bool show_all) const {
+  if (!show_all) {
+    // Call the super class for displaying any common 1-liner info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal 1-liner info for this op
+    out << "\n";
+  }
+  else {
+    // Call the super class for displaying any common detailed info
+    ParallelOp::Print(out, show_all);
+    // Then show any custom derived-internal stuff
+    out << "\nNumber of rows:" << num_rows_ << "\nCmuArctic Directory: " << folder_path_ << "\n\n";
+  }
+}
+
+// Derived from RandomAccessOp
+Status CmuArcticOp::GetClassIds(std::map<std::string, std::vector<int64_t>> *cls_ids) const {
+  if (cls_ids == nullptr || !cls_ids->empty() || audio_label_tuple_.empty()) {
+    if (audio_label_tuple_.empty()) {
+      RETURN_STATUS_UNEXPECTED("No audio found in dataset, please check if Op read audios successfully or not.");
+    }
+    else {
+      RETURN_STATUS_UNEXPECTED(
+          "Map for storaging audio-index pair is nullptr or has been set in other place,"
+          "it must be empty before using GetClassIds.");
+    }
+  }
+  for (size_t i = 0; i < audio_label_tuple_.size(); ++i) {
+    (*cls_ids)[audio_label_tuple_[i].utterance_id].push_back(i);//
+  }
+  for (auto &pair : (*cls_ids)) {
+    pair.second.shrink_to_fit();
+  }
+  return Status::OK();
+}
+
+
+Status CmuArcticOp::CountTotalRows(const std::string &dir, const std::string &usage, int64_t *count) {
+  *count = 0;
+  const int64_t num_samples = 0;
+  const int64_t start_index = 0;
+  auto sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
+  auto schema = std::make_unique<DataSchema>();
+
+  RETURN_IF_NOT_OK(schema->AddColumn(ColDescriptor("waveform", DataType(DataType::DE_FLOAT64), TensorImpl::kCv, 1)));
+  TensorShape scalar_rate = TensorShape::CreateScalar();
+  RETURN_IF_NOT_OK(
+      schema->AddColumn(ColDescriptor("sample_rate", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0,
+                      &scalar_rate)));
+  TensorShape scalar_utterance = TensorShape::CreateScalar();
+  RETURN_IF_NOT_OK(
+      schema->AddColumn(ColDescriptor("utterance", DataType(DataType::DE_STRING), TensorImpl::kFlexible, 0,
+                      &scalar_utterance)));
+  TensorShape scalar_utterance_id = TensorShape::CreateScalar();
+  RETURN_IF_NOT_OK(
+      schema->AddColumn(ColDescriptor("utterance_id", DataType(DataType::DE_STRING), TensorImpl::kFlexible, 0,
+                      &scalar_utterance_id)));
+  std::shared_ptr <ConfigManager> cfg = GlobalContext::config_manager();
+
+  int32_t num_workers = cfg->num_parallel_workers();
+  int32_t op_connect_size = cfg->op_connector_size();
+  auto op = std::make_shared<CmuArcticOp>(usage, num_workers, dir, op_connect_size, std::move(schema),
+                      std::move(sampler));
+  RETURN_IF_NOT_OK(op->WalkAllFiles());
+  *count = op->audio_names_.size();
+  return Status::OK();
+}
+
+Status CmuArcticOp::ComputeColMap() {
+  // set the column name map (base class field)
+  if (column_name_id_map_.empty()) {
+    for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
+      column_name_id_map_[data_schema_->column(i).name()] = i;
+    }
+  }
+  else {
+    MS_LOG(WARNING) << "Column name map is already set!";
+  }
+  return Status::OK();
+}
+
+Status CmuArcticOp::ReadLabel() {
+  char buffer[1024];
+  for (std::string u:label_files_) {
+    std::ifstream in(u);
+    while (!in.eof()) {
+      in.getline(buffer, 1024);
+      if (buffer[0] != '(')
+        break;
+      int32_t blank[3] = {0};
+      int32_t cur = 0;
+      for (int32_t i = 0; cur < 2 && i < 1024; i++) {
+        if (buffer[i] == '"')
+          blank[cur++] = i;
+      }
+      if (cur != 2)
+        RETURN_STATUS_UNEXPECTED("Label file error!");
+      buffer[blank[0] - 1] = 0;
+      buffer[blank[1]] = 0;
+      label_pairs_.push_back({std::string(buffer + 2), std::string(buffer + blank[0] + 1)});
+    }
+  }
+  if (audio_names_.size() != label_pairs_.size())
+    RETURN_STATUS_UNEXPECTED("The number of files is different from the number of labels!");
+  std::sort(audio_names_.begin(), audio_names_.end());
+  std::sort(label_pairs_.begin(), label_pairs_.end());
+  return Status::OK();
+}
+
+Status CmuArcticOp::ReadAudio() {
+  char header[kWavHandSize];
+  short buff[kReadbufferSize];
+  const double mx = 32768.0;
+  std::vector<double> tempArr;
+  for (uint32_t i = 0; i < audio_names_.size(); i++) {
+    if (audio_names_[i].first != label_pairs_[i].first + ".wav") {
+      RETURN_STATUS_UNEXPECTED("An error occurred between the label and the file content!");
+    }
+    tempArr.clear();
+    auto item = audio_names_[i];
+    const char *dir = item.second.data();
+    FILE *fp = fopen(dir, "rb");
+    if (fp == NULL) {
+      MS_LOG(WARNING) << "File missing . dir:" << dir;
+      continue;
+    }
+    uint32_t s = fread(header, 1, kWavHandSize, fp);
+    if (s != kWavHandSize)
+      RETURN_STATUS_UNEXPECTED("Audio header error!");
+    uint32_t rate = *(uint32_t * )(header + 0x18);
+    uint32_t frame = *(uint32_t * )(header + 0x28) / 2;
+    uint32_t surplus = frame;
+    while (surplus) {
+      uint32_t len = fread(buff, 2, kReadbufferSize, fp);
+      for (uint32_t i = 0; i < len; i++) {
+        tempArr.push_back(buff[i] / mx);
+      }
+      surplus -= len;
+    }
+    fclose(fp);
+    std::shared_ptr <Tensor> audio;
+    RETURN_IF_NOT_OK(Tensor::CreateFromVector(tempArr, &audio));
+    audio_label_tuple_.push_back({audio, rate, label_pairs_[i].second, label_pairs_[i].first});
+  }
+  num_rows_ = audio_names_.size();
+  return Status::OK();
+}
+
+Status CmuArcticOp::WalkAllFiles() {
+  Path dir(folder_path_);
+  Path fullDir = (dir + pre + usage_ + suf) / dataDirectory;
+  Path label = (dir + pre + usage_ + suf) / labelDirectory / labelFileName;
+  label_files_.push_back(label.toString());
+  auto dirIt = Path::DirIterator::OpenDirectory(&fullDir);
+  if (dirIt != nullptr) {
+    while (dirIt->hasNext()) {
+      Path file = dirIt->next();
+      std::string fileName = file.toString();
+      auto pos = fileName.find_last_of('.');
+      std::string ext = fileName.substr(pos);
+      if (ext == ".wav") {
+        audio_names_.push_back({file.Basename(), file.toString()});
+      }
+      else {
+        MS_LOG(WARNING) << "File name format error :" << file.toString() << ".";
+      }
+    }
+  }
+  else {
+    MS_LOG(WARNING) << "Unable to open directory " << fullDir.toString() << ".";
+  }
+  return Status::OK();
+}
+
+Status CmuArcticOp::LaunchThreadsAndInitOp() {
+  if (tree_ == nullptr) {
+    RETURN_STATUS_UNEXPECTED("Pipeline init failed, Execution tree not set.");
+  }
+  RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks()));
+  RETURN_IF_NOT_OK(wait_for_workers_post_.Register(tree_->AllTasks()));
+  RETURN_IF_NOT_OK(
+      tree_->LaunchWorkers(num_workers_, std::bind(&CmuArcticOp::WorkerEntry, this, std::placeholders::_1), "",
+                 id()));
+  TaskManager::FindMe()->Post();
+  RETURN_IF_NOT_OK(this->WalkAllFiles());
+  RETURN_IF_NOT_OK(this->ReadLabel());
+  RETURN_IF_NOT_OK(this->ReadAudio());
+  RETURN_IF_NOT_OK(this->InitSampler());  // handle shake with sampler
+  return Status::OK();
+}
+
+}  // namespace dataset
+}  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cmu_arctic_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cmu_arctic_op.h
new file mode 100644
index 00000000000..bb7ceff5a5a
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cmu_arctic_op.h
@@ -0,0 +1,126 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_CMUARCTIC_OP_H_
+#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_CMUARCTIC_OP_H_
+
+#include <memory>
+#include <string>
+#include <algorithm>
+#include <map>
+#include <vector>
+#include <utility>
+
+#include "minddata/dataset/core/tensor.h"
+
+#include "minddata/dataset/engine/data_schema.h"
+#include "minddata/dataset/engine/datasetops/parallel_op.h"
+#include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/queue.h"
+#include "minddata/dataset/util/status.h"
+#include "minddata/dataset/util/wait_post.h"
+
+namespace mindspore {
+namespace dataset {
+
+
+
+
+
+struct CmuArcticLabelTuple{
+  std::shared_ptr<Tensor> waveform;
+  uint32_t sample_rate;
+  std::string utterance;
+  std::string utterance_id;
+};
+
+
+class CmuArcticOp : public MappableLeafOp {
+ public:
+  // Constructor
+  // @param const std::string &usage - Usage of this dataset, can be 'train', 'test' ,'valid'or 'all'
+  // @param int32_t num_workers - number of workers reading audios in parallel
+  // @param std::string folder_path - dir directory of mnist
+  // @param int32_t queue_size - connector queue size
+  // @param std::unique_ptr<DataSchema> data_schema - the schema of the mnist dataset
+  // @param td::unique_ptr<Sampler> sampler - sampler tells CmuArcticOp what to read
+  CmuArcticOp(const std::string &usage, int32_t num_workers, std::string folder_path, int32_t queue_size,
+          std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler);
+
+  // Destructor.
+  ~CmuArcticOp() = default;
+
+  // Method derived from RandomAccess Op, enable Sampler to get all ids for each class
+  // @param (std::map<uint64_t, std::vector<uint64_t >> * map - key label, val all ids for this class
+  // @return Status The status code returned
+  Status GetClassIds(std::map<std::string, std::vector<int64_t>> *cls_ids) const ;
+
+  // A print method typically used for debugging
+  // @param out
+  // @param show_all
+  void Print(std::ostream &out, bool show_all) const override;
+
+  // Function to count the number of samples in the MNIST dataset
+  // @param dir path to the MNIST directory
+  // @param count output arg that will hold the minimum of the actual dataset size and numSamples
+  // @return
+
+ static Status CountTotalRows(const std::string &dir, const std::string &usage, int64_t *count);
+
+  // Op name getter
+  // @return Name of the current Op
+ std::string Name() const override { return "CmuArcticOp"; }
+
+ private:
+  // Load a tensor row according to a pair
+  // @param row_id_type row_id - id for this tensor row
+  // @param ImageLabelPair pair - <audiofile,label>
+  // @param TensorRow row - audio & label read into this tensor row
+  // @return Status The status code returned
+  Status LoadTensorRow(row_id_type row_id, TensorRow *row) override;
+
+  Status ReadAudio();
+
+  Status ReadLabel();
+
+  // Read all files in the directory
+  // @return Status The status code returned
+  Status WalkAllFiles();
+
+  // Called first when function is called
+  // @return Status The status code returned
+  Status LaunchThreadsAndInitOp() override;
+
+  // Private function for computing the assignment of the column name map.
+  // @return - Status
+  Status ComputeColMap() override;
+
+
+  std::string folder_path_;  // directory of audio folder
+  const std::string usage_;  
+  std::unique_ptr<DataSchema> data_schema_;
+  std::vector<CmuArcticLabelTuple> audio_label_tuple_;
+  std::vector<std::pair<std::string,std::string>> audio_names_;
+  std::vector<std::pair<std::string,std::string>> label_pairs_;
+  std::vector<std::string> label_files_;
+};
+
+
+
+}  // namespace dataset
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_CMUARCTIC_OP_H_
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/flickr_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/flickr_op.cc
index 6a3c17f39c2..65735a488f9 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/flickr_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/flickr_op.cc
@@ -17,10 +17,10 @@
 
 #include <algorithm>
 #include <fstream>
+#include <iomanip>
 #include <set>
 #include <utility>
 
-#include "debug/common.h"
 #include "minddata/dataset/core/config_manager.h"
 #include "minddata/dataset/core/tensor_shape.h"
 #include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
@@ -94,13 +94,7 @@ void FlickrOp::Print(std::ostream &out, bool show_all) const {
 }
 
 Status FlickrOp::ParseFlickrData() {
-  auto real_file_path = Common::GetRealPath(file_path_);
-  if (!real_file_path.has_value()) {
-    MS_LOG(ERROR) << "Get real path failed, path=" << file_path_;
-    RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + file_path_);
-  }
-
-  std::ifstream file_handle(real_file_path.value());
+  std::ifstream file_handle(file_path_);
   if (!file_handle.is_open()) {
     RETURN_STATUS_UNEXPECTED("Invalid file, failed to open Flickr annotation file: " + file_path_);
   }
@@ -135,11 +129,7 @@ Status FlickrOp::ParseFlickrData() {
       }
 
       bool valid = false;
-      Status type_check = CheckImageType(image_file_path, &valid);
-      if (type_check.IsError()) {
-        file_handle.close();
-        RETURN_IF_NOT_OK(type_check);
-      }
+      RETURN_IF_NOT_OK(CheckImageType(image_file_path, &valid));
       if (!valid) {
         continue;
       }
@@ -163,16 +153,10 @@ Status FlickrOp::ParseFlickrData() {
 // Optimization: Could take in a tensor
 // This function does not return status because we want to just skip bad input, not crash
 Status FlickrOp::CheckImageType(const std::string &file_name, bool *valid) {
-  auto real_file_name = Common::GetRealPath(file_name);
-  if (!real_file_name.has_value()) {
-    MS_LOG(ERROR) << "Get real path failed, path=" << file_name;
-    RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + file_name);
-  }
-
   std::ifstream file_handle;
   constexpr int read_num = 3;
   *valid = false;
-  file_handle.open(real_file_name.value(), std::ios::binary | std::ios::in);
+  file_handle.open(file_name, std::ios::binary | std::ios::in);
   if (!file_handle.is_open()) {
     RETURN_STATUS_UNEXPECTED("Invalid file, failed to open image file: " + file_name);
   }
@@ -240,7 +224,7 @@ Status FlickrOp::ComputeColMap() {
   // Set the column name map (base class field)
   if (column_name_id_map_.empty()) {
     for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
-      column_name_id_map_[data_schema_->Column(i).Name()] = i;
+      column_name_id_map_[data_schema_->column(i).name()] = i;
     }
   } else {
     MS_LOG(WARNING) << "Column name map is already set!";
diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc
index 48b8597be9b..91d7c14566c 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc
@@ -223,7 +223,7 @@ Status MindRecordOp::GetRowFromReader(TensorRow *fetched_row, uint64_t row_id, i
 
 Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector<uint8_t> &columns_blob,
                                    const mindrecord::json &columns_json, const mindrecord::TaskType task_type) {
-  for (uint32_t i_col = 0; i_col < columns_to_load_.size(); i_col++) {
+  for (int32_t i_col = 0; i_col < columns_to_load_.size(); i_col++) {
     auto column_name = columns_to_load_[i_col];
 
     // Initialize column parameters
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/cache/dataset_cache_impl.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/cache/dataset_cache_impl.cc
index e818089636d..55e13659c67 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/cache/dataset_cache_impl.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/cache/dataset_cache_impl.cc
@@ -31,10 +31,18 @@ Status DatasetCacheImpl::Build() {
 
   CacheClient::Builder builder;
   builder.SetSessionId(session_id_).SetCacheMemSz(cache_mem_sz_).SetSpill(spill_);
-  if (hostname_) builder.SetHostname(hostname_.value());
-  if (port_) builder.SetPort(port_.value());
-  if (num_connections_) builder.SetNumConnections(num_connections_.value());
-  if (prefetch_sz_) builder.SetPrefetchSize(prefetch_sz_.value());
+  if (hostname_) {
+    (void)builder.SetHostname(hostname_.value());
+  }
+  if (port_) {
+    (void)builder.SetPort(port_.value());
+  }
+  if (num_connections_) {
+    (void)builder.SetNumConnections(num_connections_.value());
+  }
+  if (prefetch_sz_) {
+    (void)builder.SetPrefetchSize(prefetch_sz_.value());
+  }
   return builder.Build(&cache_client_);
 }
 
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h
index bb3752d0505..a591484cc4b 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h
@@ -79,14 +79,15 @@ constexpr char kCelebANode[] = "CelebADataset";
 constexpr char kCifar100Node[] = "Cifar100Dataset";
 constexpr char kCifar10Node[] = "Cifar10Dataset";
 constexpr char kCLUENode[] = "CLUEDataset";
+constexpr char kCmuArcticNode[] = "CmuArcticDataset";
 constexpr char kCocoNode[] = "CocoDataset";
 constexpr char kCSVNode[] = "CSVDataset";
+constexpr char kFlickrNode[] = "FlickrDataset";
 constexpr char kGeneratorNode[] = "GeneratorDataset";
 constexpr char kImageFolderNode[] = "ImageFolderDataset";
 constexpr char kManifestNode[] = "ManifestDataset";
 constexpr char kMindDataNode[] = "MindDataDataset";
 constexpr char kMnistNode[] = "MnistDataset";
-constexpr char kLibriSpeechNode[] = "LibriSpeechDataset";
 constexpr char kRandomNode[] = "RandomDataset";
 constexpr char kTextFileNode[] = "TextFileDataset";
 constexpr char kTFRecordNode[] = "TFRecordDataset";
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.cc
index 883f1673ac5..e41b475c694 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.cc
@@ -71,13 +71,13 @@ Status EpochCtrlNode::ValidateParams() {
 }
 
 // Visitor accepting method for IRNodePass
-Status EpochCtrlNode::Accept(IRNodePass *p, bool *const modified) {
+Status EpochCtrlNode::Accept(IRNodePass *const p, bool *const modified) {
   // Downcast shared pointer then call visitor
   return p->Visit(shared_from_base<EpochCtrlNode>(), modified);
 }
 
 // Visitor accepting method for IRNodePass
-Status EpochCtrlNode::AcceptAfter(IRNodePass *p, bool *const modified) {
+Status EpochCtrlNode::AcceptAfter(IRNodePass *const p, bool *const modified) {
   // Downcast shared pointer then call visitor
   return p->VisitAfter(shared_from_base<EpochCtrlNode>(), modified);
 }
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.h
index 709f92afa43..867a3010674 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.h
@@ -67,13 +67,13 @@ class EpochCtrlNode : public RepeatNode {
   /// \param[in] p The node to visit
   /// \param[out] modified Indicator if the node was modified
   /// \return Status of the node visit
-  Status Accept(IRNodePass *p, bool *const modified) override;
+  Status Accept(IRNodePass *const p, bool *const modified) override;
 
   /// \brief Base-class override for accepting IRNodePass visitor
   /// \param[in] p The node to visit
   /// \param[out] modified Indicator if the node was modified
   /// \return Status of the node visit
-  Status AcceptAfter(IRNodePass *p, bool *const modified) override;
+  Status AcceptAfter(IRNodePass *const p, bool *const modified) override;
 };
 
 }  // namespace dataset
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/CMakeLists.txt
index d33d89ffde9..4ca3d503641 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/CMakeLists.txt
@@ -10,6 +10,7 @@ set(DATASET_ENGINE_IR_DATASETOPS_SOURCE_SRC_FILES
         clue_node.cc
         coco_node.cc
         csv_node.cc
+        flickr_node.cc
         image_folder_node.cc
         manifest_node.cc
         minddata_node.cc
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc
index 2617c11fa03..54d191be18a 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc
@@ -83,7 +83,7 @@ Status AlbumNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops)
 }
 
 // Get the shard id of node
-Status AlbumNode::GetShardId(int32_t *shard_id) {
+Status AlbumNode::GetShardId(int32_t *const shard_id) {
   *shard_id = sampler_->ShardId();
 
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.h
index dc19c2c1ca5..23cd4519995 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.h
@@ -59,7 +59,7 @@ class AlbumNode : public MappableSourceNode {
 
   /// \brief Get the shard id of node
   /// \return Status Status::OK() if get shard id successfully
-  Status GetShardId(int32_t *shard_id) override;
+  Status GetShardId(int32_t *const shard_id) override;
 
   /// \brief Base-class override for GetDatasetSize
   /// \param[in] size_getter Shared pointer to DatasetSizeGetter
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cmu_arctic_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cmu_arctic_node.cc
new file mode 100644
index 00000000000..f86485a0168
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cmu_arctic_node.cc
@@ -0,0 +1,107 @@
+#include "minddata/dataset/engine/ir/datasetops/source/cmu_arctic_node.h"
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "minddata/dataset/engine/datasetops/source/cmu_arctic_op.h"
+
+#include "minddata/dataset/util/status.h"
+namespace mindspore {
+namespace dataset {
+    
+CmuArcticNode::CmuArcticNode(std::string dataset_dir, std::string usage, std::shared_ptr<SamplerObj> sampler,
+                     std::shared_ptr<DatasetCache> cache)  
+    : MappableSourceNode(std::move(cache)), dataset_dir_(dataset_dir), usage_(usage), sampler_(sampler) {}
+    
+void CmuArcticNode::Print(std::ostream &out) const { out << Name(); }
+    
+std::shared_ptr<DatasetNode> CmuArcticNode::Copy() {
+  std::shared_ptr<SamplerObj> sampler = (sampler_ == nullptr) ? nullptr : sampler_->SamplerCopy();
+  auto node = std::make_shared<CmuArcticNode>(dataset_dir_, usage_, sampler, cache_);
+  return node;
+}
+    
+Status CmuArcticNode::ValidateParams() {
+  RETURN_IF_NOT_OK(DatasetNode::ValidateParams());
+  RETURN_IF_NOT_OK(ValidateDatasetDirParam("CmuArcticNode", dataset_dir_));
+  RETURN_IF_NOT_OK(ValidateDatasetSampler("CmuArcticNode", sampler_));
+  RETURN_IF_NOT_OK(ValidateStringValue("CmuArcticNode", usage_, {"aew", "ahw", "aup", "awb", "axb", "bdl", "clb", "eey", "fem", "gka", "jmk", "ksp", "ljm", "lnh", "rms", "rxr", "slp" , "slt"}));
+  return Status::OK();
+}
+    
+Status CmuArcticNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
+  // Do internal Schema generation.
+  auto schema = std::make_unique<DataSchema>();
+  
+
+  RETURN_IF_NOT_OK(schema->AddColumn(ColDescriptor("waveform", DataType(DataType::DE_FLOAT64), TensorImpl::kCv, 1)));  
+  TensorShape scalar_rate = TensorShape::CreateScalar();
+  RETURN_IF_NOT_OK(
+    schema->AddColumn(ColDescriptor("sample_rate", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar_rate)));
+  TensorShape scalar_utterance = TensorShape::CreateScalar();
+  RETURN_IF_NOT_OK(
+    schema->AddColumn(ColDescriptor("utterance", DataType(DataType::DE_STRING), TensorImpl::kFlexible, 0, &scalar_utterance)));
+  TensorShape scalar_utterance_id = TensorShape::CreateScalar();
+  RETURN_IF_NOT_OK(
+    schema->AddColumn(ColDescriptor("utterance_id", DataType(DataType::DE_STRING), TensorImpl::kFlexible, 0, &scalar_utterance_id)));
+
+
+
+  std::shared_ptr<SamplerRT> sampler_rt = nullptr;
+  RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
+
+  auto op = std::make_shared<CmuArcticOp>(usage_, num_workers_, dataset_dir_, connector_que_size_, std::move(schema),std::move(sampler_rt));
+  op->set_total_repeats(GetTotalRepeats());
+  op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
+  node_ops->push_back(op);
+
+  return Status::OK();
+}
+    
+// Get the shard id of node
+Status CmuArcticNode::GetShardId(int32_t *shard_id) {
+  *shard_id = sampler_->ShardId();
+  return Status::OK();
+}
+
+    
+// Get Dataset size
+Status CmuArcticNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate,int64_t *dataset_size) {
+  if (dataset_size_ > 0) {
+    *dataset_size = dataset_size_;
+    return Status::OK();
+  }
+  int64_t num_rows, sample_size;
+  RETURN_IF_NOT_OK(CmuArcticOp::CountTotalRows(dataset_dir_, usage_, &num_rows));
+  std::shared_ptr<SamplerRT> sampler_rt = nullptr;
+  RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
+  sample_size = sampler_rt->CalculateNumSamples(num_rows);
+  if (sample_size == -1) {
+    RETURN_IF_NOT_OK(size_getter->DryRun(shared_from_this(), &sample_size));
+  }
+  *dataset_size = sample_size;
+  dataset_size_ = *dataset_size;
+  return Status::OK();
+}
+
+    
+Status CmuArcticNode::to_json(nlohmann::json *out_json) {
+  nlohmann::json args, sampler_args;
+  RETURN_IF_NOT_OK(sampler_->to_json(&sampler_args));
+  args["sampler"] = sampler_args;
+  args["num_parallel_workers"] = num_workers_;
+  args["dataset_dir"] = dataset_dir_;
+  args["usage"] = usage_;
+  if (cache_ != nullptr) {
+    nlohmann::json cache_args;
+    RETURN_IF_NOT_OK(cache_->to_json(&cache_args));
+    args["cache"] = cache_args;
+  }
+  *out_json = args;
+  return Status::OK();
+}
+   
+} // namespace dataset
+} // namespace mindspor
\ No newline at end of file
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cmu_arctic_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cmu_arctic_node.h
new file mode 100644
index 00000000000..6b79dd07a31
--- /dev/null
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cmu_arctic_node.h
@@ -0,0 +1,76 @@
+#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_CUMARCTIC_NODE_H_
+#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_CUMARCTIC_NODE_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+    
+#include "minddata/dataset/engine/ir/datasetops/dataset_node.h"
+namespace mindspore {
+namespace dataset {
+class CmuArcticNode:public MappableSourceNode {
+public:
+
+CmuArcticNode(std::string dataset_dir, std::string usage, std::shared_ptr<SamplerObj> sampler, std::shared_ptr<DatasetCache> cache);   
+
+~ CmuArcticNode() = default;
+    
+/// \brief Node name getter
+/// \return Name of the current node
+std::string Name() const override { return "kCmuArcticNode"; } 
+
+/// \brief Print the description
+/// \param out - The output stream to write output to
+void Print(std::ostream &out) const override;
+    
+/// \brief Copy the node to a new object
+/// \return A shared pointer to the new copy
+std::shared_ptr<DatasetNode> Copy() override;
+    
+/// \brief a base class override function to create the required runtime dataset op objects for this class
+/// \param node_ops - A vector containing shared pointer to the Dataset Ops that this object will create
+/// \return Status Status::OK() if build successfully
+Status Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) override;
+    
+/// \brief Parameters validation
+/// \return Status Status::OK() if all the parameters are valid
+Status ValidateParams() override;
+    
+/// \brief Get the shard id of node    什么是shard id？？
+/// \return Status Status::OK() if get shard id successfully
+Status GetShardId(int32_t *shard_id) override;
+    
+/// \brief Base-class override for GetDatasetSize
+/// \param[in] size_getter Shared pointer to DatasetSizeGetter
+/// \param[in] estimate This is only supported by some of the ops and it's used to speed up the process of getting
+///     dataset size at the expense of accuracy.
+/// \param[out] dataset_size the size of the dataset
+/// \return Status of the function
+Status GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate,
+                        int64_t *dataset_size) override;
+
+/// \brief Getter functions
+const std::string &DatasetDir() const { return dataset_dir_; }
+const std::string &Usage() const { return usage_; }
+        
+/// \brief Get the arguments of node
+/// \param[out] out_json JSON string of all attributes
+/// \return Status of the function
+Status to_json(nlohmann::json *out_json) override;
+    
+/// \brief Sampler getter
+/// \return SamplerObj of the current node
+std::shared_ptr<SamplerObj> Sampler() override { return sampler_; }
+
+
+void SetSampler(std::shared_ptr<SamplerObj> sampler) override { sampler_ = sampler; }
+
+private:
+std::string dataset_dir_;
+std::string usage_;
+std::shared_ptr<SamplerObj> sampler_;
+};
+
+} // namespace dataset
+} // namespace mindspore
+#endif ///home/user06/zjm/act/mindspore/mindspore/ccsrc/minddata/dataset/api
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc
index e3fa2eca3aa..b14a803ae12 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc
@@ -70,7 +70,7 @@ Status MnistNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops)
 }
 
 // Get the shard id of node
-Status MnistNode::GetShardId(int32_t *shard_id) {
+Status MnistNode::GetShardId(int32_t *const shard_id) {
   *shard_id = sampler_->ShardId();
 
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.h
index 6c1c37a91d1..183ef75cea5 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.h
@@ -58,7 +58,7 @@ class MnistNode : public MappableSourceNode {
 
   /// \brief Get the shard id of node
   /// \return Status Status::OK() if get shard id successfully
-  Status GetShardId(int32_t *shard_id) override;
+  Status GetShardId(int32_t *const shard_id) override;
 
   /// \brief Base-class override for GetDatasetSize
   /// \param[in] size_getter Shared pointer to DatasetSizeGetter
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc
index d92b9f5bd1a..e1183c49389 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc
@@ -118,7 +118,7 @@ Status RandomNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops
 }
 
 // Get the shard id of node
-Status RandomNode::GetShardId(int32_t *shard_id) {
+Status RandomNode::GetShardId(int32_t *const shard_id) {
   // RandomDataset doesn't support multiple shards
   *shard_id = 0;
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h
index 0758fd2bd91..f099910e677 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h
@@ -80,7 +80,7 @@ class RandomNode : public NonMappableSourceNode {
 
   /// \brief Get the shard id of node
   /// \return Status Status::OK() if get shard id successfully
-  Status GetShardId(int32_t *shard_id) override;
+  Status GetShardId(int32_t *const shard_id) override;
 
   /// \brief Base-class override for GetDatasetSize
   /// \param[in] size_getter Shared pointer to DatasetSizeGetter
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc
index 3a5e3e97e9f..b9bf8fec4d9 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc
@@ -156,7 +156,7 @@ Status TFRecordNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_o
 }
 
 // Get the shard id of node
-Status TFRecordNode::GetShardId(int32_t *shard_id) {
+Status TFRecordNode::GetShardId(int32_t *const shard_id) {
   *shard_id = shard_id_;
 
   return Status::OK();
@@ -259,7 +259,7 @@ Status TFRecordNode::Accept(IRNodePass *p, bool *const modified) {
 }
 
 // Visitor accepting method for IRNodePass
-Status TFRecordNode::AcceptAfter(IRNodePass *p, bool *const modified) {
+Status TFRecordNode::AcceptAfter(IRNodePass *const p, bool *const modified) {
   // Downcast shared pointer then call visitor
   return p->VisitAfter(shared_from_base<TFRecordNode>(), modified);
 }
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.h
index c56f205b580..9a9ccfc0266 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.h
@@ -95,7 +95,7 @@ class TFRecordNode : public NonMappableSourceNode {
 
   /// \brief Get the shard id of node
   /// \return Status Status::OK() if get shard id successfully
-  Status GetShardId(int32_t *shard_id) override;
+  Status GetShardId(int32_t *const shard_id) override;
 
   /// \brief Base-class override for GetDatasetSize
   /// \param[in] size_getter Shared pointer to DatasetSizeGetter
@@ -152,7 +152,7 @@ class TFRecordNode : public NonMappableSourceNode {
   /// \param[in] p The node to visit
   /// \param[out] modified Indicator if the node was modified
   /// \return Status of the node visit
-  Status AcceptAfter(IRNodePass *p, bool *const modified) override;
+  Status AcceptAfter(IRNodePass *const p, bool *const modified) override;
 
  private:
   std::vector<std::string> dataset_files_;
diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.cc
index 9fe9eab9b93..2d0bcc6d38d 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.cc
@@ -117,6 +117,9 @@ Status TransferNode::AcceptAfter(IRNodePass *const p, bool *const modified) {
 
 Status TransferNode::to_json(nlohmann::json *out_json) {
   nlohmann::json args;
+  args["queue_name"] = queue_name_;
+  args["device_type"] = device_type_;
+  args["device_id"] = device_id_;
   args["send_epoch_end"] = send_epoch_end_;
   args["total_batch"] = total_batch_;
   args["create_data_info_queue"] = create_data_info_queue_;
diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.cc
index 8eb9b5599fa..778c1262b5d 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.cc
@@ -192,7 +192,7 @@ Status RepeatPass::VisitAfter(std::shared_ptr<TransferNode> node, bool *const mo
 }
 
 // Adds an operator to the cached operator stack save area
-void RepeatPass::AddToCachedNodeStack(std::shared_ptr<DatasetNode> node) { cached_node_stacks_.push(node); }
+void RepeatPass::AddToCachedNodeStack(const std::shared_ptr<DatasetNode> &node) { cached_node_stacks_.push(node); }
 
 // Pops an operator from the cached operator stack save area
 std::shared_ptr<DatasetNode> RepeatPass::PopFromCachedNodeStack() {
diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.h b/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.h
index 6c9f257bd02..708b04ba9c5 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.h
@@ -112,7 +112,7 @@ class RepeatPass : public IRNodePass {
   /// \brief Adds an operator to the cached stack save area
   /// \param node - The dataset node to add to cached stack
   /// \return Status The status code returned
-  void AddToCachedNodeStack(std::shared_ptr<DatasetNode> node);
+  void AddToCachedNodeStack(const std::shared_ptr<DatasetNode> &node);
 
   /// \brief Pops an operator from the cached stack save area
   /// \return shared_ptr to the popped dataset node
diff --git a/mindspore/ccsrc/minddata/dataset/engine/serdes.cc b/mindspore/ccsrc/minddata/dataset/engine/serdes.cc
index b575ce8b27e..5d53483d75c 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/serdes.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/serdes.cc
@@ -84,7 +84,7 @@ Status Serdes::Deserialize(std::string json_filepath, std::shared_ptr<DatasetNod
 }
 
 Status Serdes::ConstructPipeline(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("children") != json_obj.end(), "Fail to find children");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("children") != json_obj.end(), "Failed to find children");
   std::shared_ptr<DatasetNode> child_ds;
 
   if (json_obj["children"].size() == 0) {
@@ -98,7 +98,7 @@ Status Serdes::ConstructPipeline(nlohmann::json json_obj, std::shared_ptr<Datase
     RETURN_IF_NOT_OK(CreateNode(child_ds, json_obj, ds));
   } else {
     // if json object has more than 1 children, the operation must be zip.
-    CHECK_FAIL_RETURN_UNEXPECTED((json_obj["op_type"] == "Zip"), "Fail to find right op_type - zip");
+    CHECK_FAIL_RETURN_UNEXPECTED((json_obj["op_type"] == "Zip"), "Failed to find right op_type - zip");
     std::vector<std::shared_ptr<DatasetNode>> datasets;
     for (auto child_json_obj : json_obj["children"]) {
       RETURN_IF_NOT_OK(ConstructPipeline(child_json_obj, &child_ds));
@@ -112,7 +112,7 @@ Status Serdes::ConstructPipeline(nlohmann::json json_obj, std::shared_ptr<Datase
 
 Status Serdes::CreateNode(std::shared_ptr<DatasetNode> child_ds, nlohmann::json json_obj,
                           std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("op_type") != json_obj.end(), "Fail to find op_type");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("op_type") != json_obj.end(), "Failed to find op_type");
   std::string op_type = json_obj["op_type"];
   if (child_ds == nullptr) {
     // if dataset doesn't have any child, then create a source dataset IR. e.g., ImageFolderNode, CocoNode
@@ -125,11 +125,11 @@ Status Serdes::CreateNode(std::shared_ptr<DatasetNode> child_ds, nlohmann::json
 }
 
 Status Serdes::CreateCelebADatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Fail to find usage");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Fail to find decode");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("extensions") != json_obj.end(), "Fail to find extension");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Failed to find decode");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("extensions") != json_obj.end(), "Failed to find extension");
   std::string dataset_dir = json_obj["dataset_dir"];
   std::string usage = json_obj["usage"];
   std::shared_ptr<SamplerObj> sampler;
@@ -143,9 +143,9 @@ Status Serdes::CreateCelebADatasetNode(nlohmann::json json_obj, std::shared_ptr<
 }
 
 Status Serdes::CreateCifar10DatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Fail to find usage");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler");
   std::string dataset_dir = json_obj["dataset_dir"];
   std::string usage = json_obj["usage"];
   std::shared_ptr<SamplerObj> sampler;
@@ -157,9 +157,9 @@ Status Serdes::CreateCifar10DatasetNode(nlohmann::json json_obj, std::shared_ptr
 }
 
 Status Serdes::CreateCifar100DatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Fail to find usage");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler");
   std::string dataset_dir = json_obj["dataset_dir"];
   std::string usage = json_obj["usage"];
   std::shared_ptr<SamplerObj> sampler;
@@ -171,13 +171,13 @@ Status Serdes::CreateCifar100DatasetNode(nlohmann::json json_obj, std::shared_pt
 }
 
 Status Serdes::CreateCLUEDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("task") != json_obj.end(), "Fail to find task");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Fail to find usage");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Fail to find num_samples");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Fail to find shuffle");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Fail to find num_shards");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Fail to find shard_id");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("task") != json_obj.end(), "Failed to find task");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Failed to find num_samples");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Failed to find num_shards");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Failed to find shard_id");
   std::vector<std::string> dataset_files = json_obj["dataset_dir"];
   std::string task = json_obj["task"];
   std::string usage = json_obj["usage"];
@@ -192,11 +192,11 @@ Status Serdes::CreateCLUEDatasetNode(nlohmann::json json_obj, std::shared_ptr<Da
 }
 
 Status Serdes::CreateCocoDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("annotation_file") != json_obj.end(), "Fail to find annotation_file");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("task") != json_obj.end(), "Fail to find task");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Fail to find decode");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("annotation_file") != json_obj.end(), "Failed to find annotation_file");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("task") != json_obj.end(), "Failed to find task");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Failed to find decode");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler");
   std::string dataset_dir = json_obj["dataset_dir"];
   std::string annotation_file = json_obj["annotation_file"];
   std::string task = json_obj["task"];
@@ -211,13 +211,13 @@ Status Serdes::CreateCocoDatasetNode(nlohmann::json json_obj, std::shared_ptr<Da
 }
 
 Status Serdes::CreateCSVDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_files") != json_obj.end(), "Fail to find dataset_files");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("field_delim") != json_obj.end(), "Fail to find field_delim");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("column_names") != json_obj.end(), "Fail to find column_names");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Fail to find num_samples");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Fail to find shuffle");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Fail to find num_shards");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Fail to find shard_id");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_files") != json_obj.end(), "Failed to find dataset_files");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("field_delim") != json_obj.end(), "Failed to find field_delim");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("column_names") != json_obj.end(), "Failed to find column_names");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Failed to find num_samples");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Failed to find num_shards");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Failed to find shard_id");
   std::vector<std::string> dataset_files = json_obj["dataset_files"];
   std::string field_delim = json_obj["field_delim"];
   std::vector<std::shared_ptr<CsvBase>> column_defaults = {};
@@ -234,11 +234,11 @@ Status Serdes::CreateCSVDatasetNode(nlohmann::json json_obj, std::shared_ptr<Dat
 }
 
 Status Serdes::CreateImageFolderDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Fail to find decode");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("extensions") != json_obj.end(), "Fail to find extension");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("class_indexing") != json_obj.end(), "Fail to find class_indexing");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Failed to find decode");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("extensions") != json_obj.end(), "Failed to find extension");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("class_indexing") != json_obj.end(), "Failed to find class_indexing");
   std::string dataset_dir = json_obj["dataset_dir"];
   bool decode = json_obj["decode"];
   std::shared_ptr<SamplerObj> sampler;
@@ -260,11 +260,11 @@ Status Serdes::CreateImageFolderDatasetNode(nlohmann::json json_obj, std::shared
 }
 
 Status Serdes::CreateManifestDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_file") != json_obj.end(), "Fail to find dataset_file");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Fail to find usage");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("class_indexing") != json_obj.end(), "Fail to find class_indexing");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Fail to find decode");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_file") != json_obj.end(), "Failed to find dataset_file");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("class_indexing") != json_obj.end(), "Failed to find class_indexing");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Failed to find decode");
   std::string dataset_file = json_obj["dataset_file"];
   std::string usage = json_obj["usage"];
   std::shared_ptr<SamplerObj> sampler;
@@ -284,9 +284,9 @@ Status Serdes::CreateManifestDatasetNode(nlohmann::json json_obj, std::shared_pt
 }
 
 Status Serdes::CreateMnistDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Fail to find usage");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler");
   std::string dataset_dir = json_obj["dataset_dir"];
   std::string usage = json_obj["usage"];
   std::shared_ptr<SamplerObj> sampler;
@@ -298,11 +298,11 @@ Status Serdes::CreateMnistDatasetNode(nlohmann::json json_obj, std::shared_ptr<D
 }
 
 Status Serdes::CreateTextFileDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_files") != json_obj.end(), "Fail to find dataset_files");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Fail to find num_samples");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Fail to find shuffle");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Fail to find num_shards");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Fail to find shard_id");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_files") != json_obj.end(), "Failed to find dataset_files");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Failed to find num_samples");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Failed to find num_shards");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Failed to find shard_id");
   std::vector<std::string> dataset_files = json_obj["dataset_files"];
   int64_t num_samples = json_obj["num_samples"];
   ShuffleMode shuffle = static_cast<ShuffleMode>(json_obj["shuffle"]);
@@ -315,14 +315,14 @@ Status Serdes::CreateTextFileDatasetNode(nlohmann::json json_obj, std::shared_pt
 }
 
 Status Serdes::CreateTFRecordDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_files") != json_obj.end(), "Fail to find dataset_files");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("schema") != json_obj.end(), "Fail to find schema");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("columns_list") != json_obj.end(), "Fail to find columns_list");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Fail to find num_samples");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Fail to find shuffle");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Fail to find num_shards");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Fail to find shard_id");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_equal_rows") != json_obj.end(), "Fail to find shard_equal_rows");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_files") != json_obj.end(), "Failed to find dataset_files");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("schema") != json_obj.end(), "Failed to find schema");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("columns_list") != json_obj.end(), "Failed to find columns_list");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Failed to find num_samples");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Failed to find num_shards");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Failed to find shard_id");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_equal_rows") != json_obj.end(), "Failed to find shard_equal_rows");
   std::vector<std::string> dataset_files = json_obj["dataset_files"];
   std::string schema = json_obj["schema"];
   std::vector<std::string> columns_list = json_obj["columns_list"];
@@ -339,12 +339,12 @@ Status Serdes::CreateTFRecordDatasetNode(nlohmann::json json_obj, std::shared_pt
 }
 
 Status Serdes::CreateVOCDatasetNode(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("task") != json_obj.end(), "Fail to find task");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Fail to find usage");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("class_indexing") != json_obj.end(), "Fail to find class_indexing");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Fail to find decode");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("task") != json_obj.end(), "Failed to find task");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("class_indexing") != json_obj.end(), "Failed to find class_indexing");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Failed to find decode");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler");
   std::string dataset_dir = json_obj["dataset_dir"];
   std::string task = json_obj["task"];
   std::string usage = json_obj["usage"];
@@ -398,8 +398,8 @@ Status Serdes::CreateDatasetNode(nlohmann::json json_obj, std::string op_type, s
 
 Status Serdes::CreateBatchOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
                                         std::shared_ptr<DatasetNode> *result) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("batch_size") != json_obj.end(), "Fail to find batch_size");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("drop_remainder") != json_obj.end(), "Fail to find drop_remainder");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("batch_size") != json_obj.end(), "Failed to find batch_size");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("drop_remainder") != json_obj.end(), "Failed to find drop_remainder");
   int32_t batch_size = json_obj["batch_size"];
   bool drop_remainder = json_obj["drop_remainder"];
   *result = std::make_shared<BatchNode>(ds, batch_size, drop_remainder);
@@ -408,22 +408,25 @@ Status Serdes::CreateBatchOperationNode(std::shared_ptr<DatasetNode> ds, nlohman
 
 Status Serdes::CreateMapOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
                                       std::shared_ptr<DatasetNode> *result) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("input_columns") != json_obj.end(), "Fail to find input_columns");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("output_columns") != json_obj.end(), "Fail to find output_columns");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("project_columns") != json_obj.end(), "Fail to find project_columns");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("operations") != json_obj.end(), "Fail to find operations");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(),
+                               "Failed to find num_parallel_workers");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("input_columns") != json_obj.end(), "Failed to find input_columns");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("output_columns") != json_obj.end(), "Failed to find output_columns");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("project_columns") != json_obj.end(), "Failed to find project_columns");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("operations") != json_obj.end(), "Failed to find operations");
   std::vector<std::string> input_columns = json_obj["input_columns"];
   std::vector<std::string> output_columns = json_obj["output_columns"];
   std::vector<std::string> project_columns = json_obj["project_columns"];
   std::vector<std::shared_ptr<TensorOperation>> operations;
   RETURN_IF_NOT_OK(ConstructTensorOps(json_obj["operations"], &operations));
   *result = std::make_shared<MapNode>(ds, operations, input_columns, output_columns, project_columns);
+  (*result)->SetNumWorkers(json_obj["num_parallel_workers"]);
   return Status::OK();
 }
 
 Status Serdes::CreateProjectOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
                                           std::shared_ptr<DatasetNode> *result) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("columns") != json_obj.end(), "Fail to find columns");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("columns") != json_obj.end(), "Failed to find columns");
   std::vector<std::string> columns = json_obj["columns"];
   *result = std::make_shared<ProjectNode>(ds, columns);
   return Status::OK();
@@ -431,8 +434,8 @@ Status Serdes::CreateProjectOperationNode(std::shared_ptr<DatasetNode> ds, nlohm
 
 Status Serdes::CreateRenameOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
                                          std::shared_ptr<DatasetNode> *result) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("input_columns") != json_obj.end(), "Fail to find input_columns");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("output_columns") != json_obj.end(), "Fail to find output_columns");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("input_columns") != json_obj.end(), "Failed to find input_columns");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("output_columns") != json_obj.end(), "Failed to find output_columns");
   std::vector<std::string> input_columns = json_obj["input_columns"];
   std::vector<std::string> output_columns = json_obj["output_columns"];
   *result = std::make_shared<RenameNode>(ds, input_columns, output_columns);
@@ -441,7 +444,7 @@ Status Serdes::CreateRenameOperationNode(std::shared_ptr<DatasetNode> ds, nlohma
 
 Status Serdes::CreateRepeatOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
                                          std::shared_ptr<DatasetNode> *result) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("count") != json_obj.end(), "Fail to find count");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("count") != json_obj.end(), "Failed to find count");
   int32_t count = json_obj["count"];
   *result = std::make_shared<RepeatNode>(ds, count);
   return Status::OK();
@@ -449,9 +452,9 @@ Status Serdes::CreateRepeatOperationNode(std::shared_ptr<DatasetNode> ds, nlohma
 
 Status Serdes::CreateShuffleOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
                                           std::shared_ptr<DatasetNode> *result) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("buffer_size") != json_obj.end(), "Fail to find buffer_size");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("buffer_size") != json_obj.end(), "Failed to find buffer_size");
   CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("reshuffle_each_epoch") != json_obj.end(),
-                               "Fail to find reshuffle_each_epoch");
+                               "Failed to find reshuffle_each_epoch");
   int32_t buffer_size = json_obj["buffer_size"];
   bool reset_every_epoch = json_obj["reshuffle_each_epoch"];
   *result = std::make_shared<ShuffleNode>(ds, buffer_size, reset_every_epoch);
@@ -460,15 +463,35 @@ Status Serdes::CreateShuffleOperationNode(std::shared_ptr<DatasetNode> ds, nlohm
 
 Status Serdes::CreateSkipOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
                                        std::shared_ptr<DatasetNode> *result) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("count") != json_obj.end(), "Fail to find count");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("count") != json_obj.end(), "Failed to find count");
   int32_t count = json_obj["count"];
   *result = std::make_shared<SkipNode>(ds, count);
   return Status::OK();
 }
 
+Status Serdes::CreateTransferOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
+                                           std::shared_ptr<DatasetNode> *result) {
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("queue_name") != json_obj.end(), "Failed to find queue_name");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("device_type") != json_obj.end(), "Failed to find device_type");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("device_id") != json_obj.end(), "Failed to find device_id");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("send_epoch_end") != json_obj.end(), "Failed to find send_epoch_end");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("total_batch") != json_obj.end(), "Failed to find total_batch");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("create_data_info_queue") != json_obj.end(),
+                               "Failed to find create_data_info_queue");
+  std::string queue_name = json_obj["queue_name"];
+  std::string device_type = json_obj["device_type"];
+  int32_t device_id = json_obj["device_id"];
+  bool send_epoch_end = json_obj["send_epoch_end"];
+  int32_t total_batch = json_obj["total_batch"];
+  bool create_data_info_queue = json_obj["create_data_info_queue"];
+  *result = std::make_shared<TransferNode>(ds, queue_name, device_type, device_id, send_epoch_end, total_batch,
+                                           create_data_info_queue);
+  return Status::OK();
+}
+
 Status Serdes::CreateTakeOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
                                        std::shared_ptr<DatasetNode> *result) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("count") != json_obj.end(), "Fail to find count");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("count") != json_obj.end(), "Failed to find count");
   int32_t count = json_obj["count"];
   *result = std::make_shared<TakeNode>(ds, count);
   return Status::OK();
@@ -490,6 +513,8 @@ Status Serdes::CreateDatasetOperationNode(std::shared_ptr<DatasetNode> ds, nlohm
     RETURN_IF_NOT_OK(CreateShuffleOperationNode(ds, json_obj, result));
   } else if (op_type == kSkipNode) {
     RETURN_IF_NOT_OK(CreateSkipOperationNode(ds, json_obj, result));
+  } else if (op_type == kTransferNode) {
+    RETURN_IF_NOT_OK(CreateTransferOperationNode(ds, json_obj, result));
   } else if (op_type == kTakeNode) {
     RETURN_IF_NOT_OK(CreateTakeOperationNode(ds, json_obj, result));
   } else {
@@ -500,12 +525,12 @@ Status Serdes::CreateDatasetOperationNode(std::shared_ptr<DatasetNode> ds, nlohm
 
 Status Serdes::ConstructDistributedSampler(nlohmann::json json_obj, int64_t num_samples,
                                            std::shared_ptr<SamplerObj> *sampler) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Fail to find num_shards");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Fail to find shard_id");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Fail to find shuffle");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("seed") != json_obj.end(), "Fail to find seed");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("offset") != json_obj.end(), "Fail to find offset");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("even_dist") != json_obj.end(), "Fail to find even_dist");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Failed to find num_shards");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Failed to find shard_id");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("seed") != json_obj.end(), "Failed to find seed");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("offset") != json_obj.end(), "Failed to find offset");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("even_dist") != json_obj.end(), "Failed to find even_dist");
   int64_t num_shards = json_obj["num_shards"];
   int64_t shard_id = json_obj["shard_id"];
   bool shuffle = json_obj["shuffle"];
@@ -522,8 +547,8 @@ Status Serdes::ConstructDistributedSampler(nlohmann::json json_obj, int64_t num_
 }
 
 Status Serdes::ConstructPKSampler(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr<SamplerObj> *sampler) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_val") != json_obj.end(), "Fail to find num_val");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Fail to find shuffle");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_val") != json_obj.end(), "Failed to find num_val");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle");
   int64_t num_val = json_obj["num_val"];
   bool shuffle = json_obj["shuffle"];
   *sampler = std::make_shared<PKSamplerObj>(num_val, shuffle, num_samples);
@@ -536,7 +561,7 @@ Status Serdes::ConstructPKSampler(nlohmann::json json_obj, int64_t num_samples,
 
 Status Serdes::ConstructRandomSampler(nlohmann::json json_obj, int64_t num_samples,
                                       std::shared_ptr<SamplerObj> *sampler) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("replacement") != json_obj.end(), "Fail to find replacement");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("replacement") != json_obj.end(), "Failed to find replacement");
   bool replacement = json_obj["replacement"];
   *sampler = std::make_shared<RandomSamplerObj>(replacement, num_samples);
   if (json_obj.find("child_sampler") != json_obj.end()) {
@@ -548,7 +573,7 @@ Status Serdes::ConstructRandomSampler(nlohmann::json json_obj, int64_t num_sampl
 
 Status Serdes::ConstructSequentialSampler(nlohmann::json json_obj, int64_t num_samples,
                                           std::shared_ptr<SamplerObj> *sampler) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("start_index") != json_obj.end(), "Fail to find start_index");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("start_index") != json_obj.end(), "Failed to find start_index");
   int64_t start_index = json_obj["start_index"];
   *sampler = std::make_shared<SequentialSamplerObj>(start_index, num_samples);
   if (json_obj.find("child_sampler") != json_obj.end()) {
@@ -560,7 +585,7 @@ Status Serdes::ConstructSequentialSampler(nlohmann::json json_obj, int64_t num_s
 
 Status Serdes::ConstructSubsetRandomSampler(nlohmann::json json_obj, int64_t num_samples,
                                             std::shared_ptr<SamplerObj> *sampler) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("indices") != json_obj.end(), "Fail to find indices");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("indices") != json_obj.end(), "Failed to find indices");
   std::vector<int64_t> indices = json_obj["indices"];
   *sampler = std::make_shared<SubsetRandomSamplerObj>(indices, num_samples);
   if (json_obj.find("child_sampler") != json_obj.end()) {
@@ -572,8 +597,8 @@ Status Serdes::ConstructSubsetRandomSampler(nlohmann::json json_obj, int64_t num
 
 Status Serdes::ConstructWeightedRandomSampler(nlohmann::json json_obj, int64_t num_samples,
                                               std::shared_ptr<SamplerObj> *sampler) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("replacement") != json_obj.end(), "Fail to find replacement");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("weights") != json_obj.end(), "Fail to find weights");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("replacement") != json_obj.end(), "Failed to find replacement");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("weights") != json_obj.end(), "Failed to find weights");
   bool replacement = json_obj["replacement"];
   std::vector<double> weights = json_obj["weights"];
   *sampler = std::make_shared<WeightedRandomSamplerObj>(weights, num_samples, replacement);
@@ -585,8 +610,8 @@ Status Serdes::ConstructWeightedRandomSampler(nlohmann::json json_obj, int64_t n
 }
 
 Status Serdes::ConstructSampler(nlohmann::json json_obj, std::shared_ptr<SamplerObj> *sampler) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Fail to find num_samples");
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler_name") != json_obj.end(), "Fail to find sampler_name");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Failed to find num_samples");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler_name") != json_obj.end(), "Failed to find sampler_name");
   int64_t num_samples = json_obj["num_samples"];
   std::string sampler_name = json_obj["sampler_name"];
   if (sampler_name == "DistributedSampler") {
@@ -609,7 +634,7 @@ Status Serdes::ConstructSampler(nlohmann::json json_obj, std::shared_ptr<Sampler
 
 Status Serdes::ChildSamplerFromJson(nlohmann::json json_obj, std::shared_ptr<SamplerObj> parent_sampler,
                                     std::shared_ptr<SamplerObj> *sampler) {
-  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("child_sampler") != json_obj.end(), "Fail to find child_sampler");
+  CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("child_sampler") != json_obj.end(), "Failed to find child_sampler");
   for (nlohmann::json child : json_obj["child_sampler"]) {
     std::shared_ptr<SamplerObj> child_sampler;
     RETURN_IF_NOT_OK(ConstructSampler(child, &child_sampler));
@@ -619,8 +644,8 @@ Status Serdes::ChildSamplerFromJson(nlohmann::json json_obj, std::shared_ptr<Sam
 }
 
 Status Serdes::BoundingBoxAugmentFromJson(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("transform") != op_params.end(), "Fail to find transform");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Fail to find ratio");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("transform") != op_params.end(), "Failed to find transform");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Failed to find ratio");
   std::vector<std::shared_ptr<TensorOperation>> transforms;
   std::vector<nlohmann::json> json_operations = {};
   json_operations.push_back(op_params["transform"]);
@@ -633,14 +658,14 @@ Status Serdes::BoundingBoxAugmentFromJson(nlohmann::json op_params, std::shared_
 }
 
 Status Serdes::RandomSelectSubpolicyFromJson(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("policy") != op_params.end(), "Fail to find policy");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("policy") != op_params.end(), "Failed to find policy");
   nlohmann::json policy_json = op_params["policy"];
   std::vector<std::vector<std::pair<std::shared_ptr<TensorOperation>, double>>> policy;
   std::vector<std::pair<std::shared_ptr<TensorOperation>, double>> policy_items;
   for (nlohmann::json item : policy_json) {
     for (nlohmann::json item_pair : item) {
-      CHECK_FAIL_RETURN_UNEXPECTED(item_pair.find("prob") != item_pair.end(), "Fail to find prob");
-      CHECK_FAIL_RETURN_UNEXPECTED(item_pair.find("tensor_op") != item_pair.end(), "Fail to find tensor_op");
+      CHECK_FAIL_RETURN_UNEXPECTED(item_pair.find("prob") != item_pair.end(), "Failed to find prob");
+      CHECK_FAIL_RETURN_UNEXPECTED(item_pair.find("tensor_op") != item_pair.end(), "Failed to find tensor_op");
       std::vector<std::shared_ptr<TensorOperation>> operations;
       std::pair<std::shared_ptr<TensorOperation>, double> policy_pair;
       std::shared_ptr<TensorOperation> operation;
@@ -659,8 +684,8 @@ Status Serdes::RandomSelectSubpolicyFromJson(nlohmann::json op_params, std::shar
 }
 
 Status Serdes::UniformAugFromJson(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("transforms") != op_params.end(), "Fail to find transforms");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("num_ops") != op_params.end(), "Fail to find num_ops");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("transforms") != op_params.end(), "Failed to find transforms");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("num_ops") != op_params.end(), "Failed to find num_ops");
   std::vector<std::shared_ptr<TensorOperation>> transforms = {};
   RETURN_IF_NOT_OK(ConstructTensorOps(op_params["transforms"], &transforms));
   int32_t num_ops = op_params["num_ops"];
@@ -671,12 +696,14 @@ Status Serdes::UniformAugFromJson(nlohmann::json op_params, std::shared_ptr<Tens
 Status Serdes::ConstructTensorOps(nlohmann::json operations, std::vector<std::shared_ptr<TensorOperation>> *result) {
   std::vector<std::shared_ptr<TensorOperation>> output;
   for (auto op : operations) {
-    CHECK_FAIL_RETURN_UNEXPECTED(op.find("tensor_op_name") != op.end(), "Fail to find tensor_op_name");
-    CHECK_FAIL_RETURN_UNEXPECTED(op.find("tensor_op_params") != op.end(), "Fail to find tensor_op_params");
+    CHECK_FAIL_RETURN_UNEXPECTED(op.find("is_python_front_end_op") == op.end(),
+                                 "python operation is not yet supported");
+    CHECK_FAIL_RETURN_UNEXPECTED(op.find("tensor_op_name") != op.end(), "Failed to find tensor_op_name");
+    CHECK_FAIL_RETURN_UNEXPECTED(op.find("tensor_op_params") != op.end(), "Failed to find tensor_op_params");
     std::string op_name = op["tensor_op_name"];
     nlohmann::json op_params = op["tensor_op_params"];
     std::shared_ptr<TensorOperation> operation = nullptr;
-    CHECK_FAIL_RETURN_UNEXPECTED(func_ptr_.find(op_name) != func_ptr_.end(), "Fail to find " + op_name);
+    CHECK_FAIL_RETURN_UNEXPECTED(func_ptr_.find(op_name) != func_ptr_.end(), "Failed to find " + op_name);
     RETURN_IF_NOT_OK(func_ptr_[op_name](op_params, &operation));
     output.push_back(operation);
   }
@@ -691,6 +718,7 @@ Serdes::InitializeFuncPtr() {
   ops_ptr[vision::kAutoContrastOperation] = &(vision::AutoContrastOperation::from_json);
   ops_ptr[vision::kBoundingBoxAugmentOperation] = &(BoundingBoxAugmentFromJson);
   ops_ptr[vision::kCenterCropOperation] = &(vision::CenterCropOperation::from_json);
+  ops_ptr[vision::kCropOperation] = &(vision::CropOperation::from_json);
   ops_ptr[vision::kCutMixBatchOperation] = &(vision::CutMixBatchOperation::from_json);
   ops_ptr[vision::kCutOutOperation] = &(vision::CutOutOperation::from_json);
   ops_ptr[vision::kDecodeOperation] = &(vision::DecodeOperation::from_json);
@@ -730,12 +758,20 @@ Serdes::InitializeFuncPtr() {
   ops_ptr[vision::kResizeWithBBoxOperation] = &(vision::ResizeWithBBoxOperation::from_json);
   ops_ptr[vision::kRgbaToBgrOperation] = &(vision::RgbaToBgrOperation::from_json);
   ops_ptr[vision::kRgbaToRgbOperation] = &(vision::RgbaToRgbOperation::from_json);
+  ops_ptr[vision::kRgbToBgrOperation] = &(vision::RgbToBgrOperation::from_json);
+  ops_ptr[vision::kRgbToGrayOperation] = &(vision::RgbToGrayOperation::from_json);
   ops_ptr[vision::kRotateOperation] = &(vision::RotateOperation::from_json);
+  ops_ptr[vision::kSlicePatchesOperation] = &(vision::SlicePatchesOperation::from_json);
   ops_ptr[vision::kSoftDvppDecodeRandomCropResizeJpegOperation] =
     &(vision::SoftDvppDecodeRandomCropResizeJpegOperation::from_json);
   ops_ptr[vision::kSoftDvppDecodeResizeJpegOperation] = &(vision::SoftDvppDecodeResizeJpegOperation::from_json);
   ops_ptr[vision::kSwapRedBlueOperation] = &(vision::SwapRedBlueOperation::from_json);
   ops_ptr[vision::kUniformAugOperation] = &(UniformAugFromJson);
+  ops_ptr[vision::kVerticalFlipOperation] = &(vision::VerticalFlipOperation::from_json);
+  ops_ptr[transforms::kFillOperation] = &(transforms::FillOperation::from_json);
+  ops_ptr[transforms::kOneHotOperation] = &(transforms::OneHotOperation::from_json);
+  ops_ptr[transforms::kTypeCastOperation] = &(transforms::TypeCastOperation::from_json);
+  ops_ptr[text::kToNumberOperation] = &(text::ToNumberOperation::from_json);
   return ops_ptr;
 }
 
diff --git a/mindspore/ccsrc/minddata/dataset/engine/serdes.h b/mindspore/ccsrc/minddata/dataset/engine/serdes.h
index ee7e43a7097..962b622c66d 100644
--- a/mindspore/ccsrc/minddata/dataset/engine/serdes.h
+++ b/mindspore/ccsrc/minddata/dataset/engine/serdes.h
@@ -39,6 +39,7 @@
 #include "minddata/dataset/engine/ir/datasetops/repeat_node.h"
 #include "minddata/dataset/engine/ir/datasetops/shuffle_node.h"
 #include "minddata/dataset/engine/ir/datasetops/skip_node.h"
+#include "minddata/dataset/engine/ir/datasetops/transfer_node.h"
 #include "minddata/dataset/engine/ir/datasetops/take_node.h"
 #include "minddata/dataset/engine/ir/datasetops/zip_node.h"
 
@@ -115,8 +116,10 @@
 #include "minddata/dataset/kernels/ir/vision/resize_with_bbox_ir.h"
 #include "minddata/dataset/kernels/ir/vision/rgba_to_bgr_ir.h"
 #include "minddata/dataset/kernels/ir/vision/rgba_to_rgb_ir.h"
+#include "minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.h"
 #include "minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.h"
 #include "minddata/dataset/kernels/ir/vision/rotate_ir.h"
+#include "minddata/dataset/kernels/ir/vision/slice_patches_ir.h"
 #include "minddata/dataset/kernels/ir/vision/softdvpp_decode_random_crop_resize_jpeg_ir.h"
 #include "minddata/dataset/kernels/ir/vision/softdvpp_decode_resize_jpeg_ir.h"
 #include "minddata/dataset/kernels/ir/vision/swap_red_blue_ir.h"
@@ -142,7 +145,7 @@ class Serdes {
   /// \param[in] filename The file name. If specified, save the generated JSON string into the file
   /// \param[out] out_json The result json string
   /// \return Status The status code returned
-  Status SaveToJSON(std::shared_ptr<DatasetNode> node, const std::string &filename, nlohmann::json *out_json);
+  static Status SaveToJSON(std::shared_ptr<DatasetNode> node, const std::string &filename, nlohmann::json *out_json);
 
   /// \brief function to de-serialize JSON file to IR tree
   /// \param[in] json_filepath input path of json file
@@ -150,19 +153,19 @@ class Serdes {
   /// \return Status The status code returned
   static Status Deserialize(std::string json_filepath, std::shared_ptr<DatasetNode> *ds);
 
- protected:
-  /// \brief Helper function to save JSON to a file
-  /// \param[in] json_string The JSON string to be saved to the file
-  /// \param[in] file_name The file name
-  /// \return Status The status code returned
-  Status SaveJSONToFile(nlohmann::json json_string, const std::string &file_name);
-
   /// \brief Helper function to construct IR tree, separate zip and other operations
   /// \param[in] json_obj The JSON object to be deserialized
   /// \param[out] ds Shared pointer of a DatasetNode object containing the deserialized IR tree
   /// \return Status The status code returned
   static Status ConstructPipeline(nlohmann::json json_obj, std::shared_ptr<DatasetNode> *ds);
 
+ protected:
+  /// \brief Helper function to save JSON to a file
+  /// \param[in] json_string The JSON string to be saved to the file
+  /// \param[in] file_name The file name
+  /// \return Status The status code returned
+  static Status SaveJSONToFile(nlohmann::json json_string, const std::string &file_name);
+
   /// \brief Function to determine type of the node - dataset node if no dataset exists or operation node
   /// \param[in] child_ds children datasets that is already created
   /// \param[in] json_obj json object to read out type of the node
@@ -234,6 +237,8 @@ class Serdes {
                                            std::shared_ptr<DatasetNode> *result);
   static Status CreateSkipOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
                                         std::shared_ptr<DatasetNode> *result);
+  static Status CreateTransferOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
+                                            std::shared_ptr<DatasetNode> *result);
   static Status CreateTakeOperationNode(std::shared_ptr<DatasetNode> ds, nlohmann::json json_obj,
                                         std::shared_ptr<DatasetNode> *result);
 
diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h b/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h
index f3cd204996b..e3286e968ab 100644
--- a/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h
@@ -17,10 +17,12 @@
 #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_AUDIO_H_
 #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_AUDIO_H_
 
+#include <limits>
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>
+
 #include "include/api/dual_abi_helper.h"
 #include "include/api/status.h"
 #include "minddata/dataset/include/dataset/constants.h"
@@ -33,6 +35,20 @@ class TensorOperation;
 
 // Transform operations for performing computer audio.
 namespace audio {
+/// \brief Compute the angle of complex tensor input.
+class Angle final : public TensorTransform {
+ public:
+  /// \brief Constructor.
+  Angle();
+  /// \brief Destructor.
+  ~Angle() = default;
+
+ protected:
+  /// \brief Function to convert TensorTransform object into a TensorOperation object.
+  /// \return Shared pointer to TensorOperation object.
+  std::shared_ptr<TensorOperation> Parse() override;
+};
+
 /// \brief Design two-pole band filter.
 class BandBiquad final : public TensorTransform {
  public:
@@ -56,6 +72,144 @@ class BandBiquad final : public TensorTransform {
   std::shared_ptr<Data> data_;
 };
 
+/// \brief Design two-pole allpass filter. Similar to SoX implementation.
+class AllpassBiquad final : public TensorTransform {
+ public:
+  /// \param[in] sample_rate Sampling rate of the waveform, e.g. 44100 (Hz).
+  /// \param[in] central_freq Central frequency (in Hz).
+  /// \param[in] Q https://en.wikipedia.org/wiki/Q_factor (Default: 0.707).
+  explicit AllpassBiquad(int32_t sample_rate, float central_freq, float Q = 0.707);
+
+  /// \brief Destructor.
+  ~AllpassBiquad() = default;
+
+ protected:
+  /// \brief Function to convert TensorTransform object into a TensorOperation object.
+  /// \return Shared pointer to TensorOperation object.
+  std::shared_ptr<TensorOperation> Parse() override;
+
+ private:
+  struct Data;
+  std::shared_ptr<Data> data_;
+};
+
+/// \brief AmplitudeToDB TensorTransform.
+/// \notes Turn a tensor from the power/amplitude scale to the decibel scale.
+class AmplitudeToDB final : public TensorTransform {
+ public:
+  /// \brief Constructor.
+  /// \param[in] stype ['kPower', 'kMagnitude']
+  /// \param[in] ref_value Calculate db_multiplier
+  /// \param[in] amin Clamp the input waveform
+  /// \param[in] top_db Decibels cut-off value
+  explicit AmplitudeToDB(ScaleType stype = ScaleType::kPower, float ref_value = 1.0, float amin = 1e-10,
+                         float top_db = 80.0);
+
+  /// \brief Destructor.
+  ~AmplitudeToDB() = default;
+
+ protected:
+  /// \brief Function to convert TensorTransform object into a TensorOperation object.
+  /// \return Shared pointer to TensorOperation object.
+  std::shared_ptr<TensorOperation> Parse() override;
+
+ private:
+  struct Data;
+  std::shared_ptr<Data> data_;
+};
+
+/// \brief Design two-pole band-pass filter.
+class BandpassBiquad final : public TensorTransform {
+ public:
+  /// \brief Constructor.
+  /// \param[in] sample_rate Sampling rate of the waveform, e.g. 44100 (Hz).
+  /// \param[in] central_freq Central frequency (in Hz).
+  /// \param[in] Q Quality factor, https://en.wikipedia.org/wiki/Q_factor  (Default: 0.707).
+  /// \param[in] const_skirt_gain, If ``True``, uses a constant skirt gain (peak gain = Q). If ``False``, uses a
+  /// constant 0dB peak gain. (Default: False).
+  explicit BandpassBiquad(int32_t sample_rate, float central_freq, float Q = 0.707, bool const_skirt_gain = false);
+
+  /// \brief Destructor.
+  ~BandpassBiquad() = default;
+
+ protected:
+  /// \brief Function to convert TensorTransform object into a TensorOperation object.
+  /// \return Shared pointer to TensorOperation object.
+  std::shared_ptr<TensorOperation> Parse() override;
+
+ private:
+  struct Data;
+  std::shared_ptr<Data> data_;
+};
+
+/// \brief Design two-pole band-reject filter. Similar to SoX implementation.
+class BandrejectBiquad final : public TensorTransform {
+ public:
+  /// \brief Constructor.
+  /// \param[in] sample_rate Sampling rate of the waveform, e.g. 44100 (Hz).
+  /// \param[in] central_freq Central frequency (in Hz).
+  /// \param[in] Q Quality factor, https://en.wikipedia.org/wiki/Q_factor (Default: 0.707).
+  explicit BandrejectBiquad(int32_t sample_rate, float central_freq, float Q = 0.707);
+
+  /// \brief Destructor.
+  ~BandrejectBiquad() = default;
+
+ protected:
+  /// \brief Function to convert TensorTransform object into a TensorOperation object.
+  /// \return Shared pointer to TensorOperation object.
+  std::shared_ptr<TensorOperation> Parse() override;
+
+ private:
+  struct Data;
+  std::shared_ptr<Data> data_;
+};
+
+/// \brief Design a bass tone-control effect.
+class BassBiquad final : public TensorTransform {
+ public:
+  /// \brief Constructor.
+  /// \param[in] sample_rate Sampling rate of the waveform, e.g. 44100 (Hz).
+  /// \param[in] gain Desired gain at the boost (or attenuation) in dB.
+  /// \param[in] central_freq Central frequency (in Hz).
+  /// \param[in] Q https://en.wikipedia.org/wiki/Q_factor (Default: 0.707).
+  explicit BassBiquad(int32_t sample_rate, float gain, float central_freq = 100, float Q = 0.707);
+
+  /// \brief Destructor.
+  ~BassBiquad() = default;
+
+ protected:
+  /// \brief Function to convert TensorTransform object into a TensorOperation object.
+  /// \return Shared pointer to TensorOperation object.
+  std::shared_ptr<TensorOperation> Parse() override;
+
+ private:
+  struct Data;
+  std::shared_ptr<Data> data_;
+};
+
+/// \brief TimeStretch TensorTransform
+/// \notes Stretch STFT in time at a given rate, without changing the pitch.
+class TimeStretch final : public TensorTransform {
+ public:
+  /// \brief Constructor.
+  /// \param[in] hop_length Length of hop between STFT windows. Default: None.
+  /// \param[in] n_freq Number of filter banks form STFT. Default: 201.
+  /// \param[in] fixed_rate Rate to speed up or slow down the input in time. Default: None.
+  explicit TimeStretch(float hop_length = std::numeric_limits<float>::quiet_NaN(), int n_freq = 201,
+                       float fixed_rate = std::numeric_limits<float>::quiet_NaN());
+
+  /// \brief Destructor.
+  ~TimeStretch() = default;
+
+ protected:
+  /// \brief Function to convert TensorTransform object into a TensorOperation object.
+  /// \return Shared pointer to TensorOperation object.
+  std::shared_ptr<TensorOperation> Parse() override;
+
+ private:
+  struct Data;
+  std::shared_ptr<Data> data_;
+};
 }  // namespace audio
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h b/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h
index 851ca5637e2..7af6fb81267 100644
--- a/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h
@@ -49,6 +49,12 @@ enum class ShuffleMode {
   kInfile = 3   ///< Shuffle data within each file.
 };
 
+/// \brief Possible scale for input audio.
+enum class ScaleType {
+  kMagnitude = 0,  ///< Audio scale is magnitude.
+  kPower = 1,      ///< Audio scale is power.
+};
+
 /// \brief The method of padding.
 enum class BorderType {
   kConstant = 0,  ///< Fill the border with constant values.
diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/datasets.h b/mindspore/ccsrc/minddata/dataset/include/dataset/datasets.h
index 53e47112da7..90017f22968 100644
--- a/mindspore/ccsrc/minddata/dataset/include/dataset/datasets.h
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/datasets.h
@@ -1091,6 +1091,64 @@ inline std::shared_ptr<CSVDataset> CSV(const std::vector<std::string> &dataset_f
                                       cache);
 }
 
+class FlickrDataset : public Dataset {
+ public:
+  explicit FlickrDataset(const std::vector<char> &dataset_dir, const std::vector<char> &annotation_file, bool decode,
+                         const std::shared_ptr<Sampler> &sampler, const std::shared_ptr<DatasetCache> &cache);
+  explicit FlickrDataset(const std::vector<char> &dataset_dir, const std::vector<char> &annotation_file, bool decode,
+                         const Sampler *sampler, const std::shared_ptr<DatasetCache> &cache);
+  explicit FlickrDataset(const std::vector<char> &dataset_dir, const std::vector<char> &annotation_file, bool decode,
+                         const std::reference_wrapper<Sampler> sampler, const std::shared_ptr<DatasetCache> &cache);
+  ~FlickrDataset() = default;
+};
+
+/// \brief Function to create a FlickrDataset
+/// \notes The generated dataset has two columns ["image", "annotation"]
+/// \param[in] dataset_dir The dataset dir to be read
+/// \param[in] annotation_file The annotation file to be read
+/// \param[in] decode Decode the images after reading (default=false).
+/// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not
+///     given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()).
+/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
+/// \return Shared pointer to the current FlickrDataset
+inline std::shared_ptr<FlickrDataset> Flickr(
+  const std::string &dataset_dir, const std::string &annotation_file, bool decode = false,
+  const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(),
+  const std::shared_ptr<DatasetCache> &cache = nullptr) {
+  return std::make_shared<FlickrDataset>(StringToChar(dataset_dir), StringToChar(annotation_file), decode, sampler,
+                                         cache);
+}
+
+/// \brief Function to create a FlickrDataset
+/// \notes The generated dataset has two columns ["image", "annotation"]
+/// \param[in] dataset_dir The dataset dir to be read
+/// \param[in] annotation_file The annotation file to be read
+/// \param[in] decode Decode the images after reading.
+/// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset.
+/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
+/// \return Shared pointer to the current FlickrDataset
+inline std::shared_ptr<FlickrDataset> Flickr(const std::string &dataset_dir, const std::string &annotation_file,
+                                             bool decode, const Sampler *sampler,
+                                             const std::shared_ptr<DatasetCache> &cache = nullptr) {
+  return std::make_shared<FlickrDataset>(StringToChar(dataset_dir), StringToChar(annotation_file), decode, sampler,
+                                         cache);
+}
+
+/// \brief Function to create a FlickrDataset
+/// \notes The generated dataset has two columns ["image", "annotation"]
+/// \param[in] dataset_dir The dataset dir to be read
+/// \param[in] annotation_file The annotation file to be read
+/// \param[in] decode Decode the images after reading.
+/// \param[in] sampler Sampler object used to choose samples from the dataset.
+/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
+/// \return Shared pointer to the current FlickrDataset
+inline std::shared_ptr<FlickrDataset> Flickr(const std::string &dataset_dir, const std::string &annotation_file,
+                                             bool decode, const std::reference_wrapper<Sampler> sampler,
+                                             const std::shared_ptr<DatasetCache> &cache = nullptr) {
+  return std::make_shared<FlickrDataset>(StringToChar(dataset_dir), StringToChar(annotation_file), decode, sampler,
+                                         cache);
+}
+
 class ImageFolderDataset : public Dataset {
  public:
   explicit ImageFolderDataset(const std::vector<char> &dataset_dir, bool decode,
@@ -1487,6 +1545,60 @@ inline std::shared_ptr<ConcatDataset> operator+(const std::shared_ptr<Dataset> &
   return std::make_shared<ConcatDataset>(std::vector({datasets1, datasets2}));
 }
 
+class CmuArcticDataset : public Dataset {
+ public:
+  explicit CmuArcticDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage,
+                        const std::shared_ptr<Sampler> &sampler, const std::shared_ptr<DatasetCache> &cache);
+  explicit CmuArcticDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage, const Sampler *sampler,
+                        const std::shared_ptr<DatasetCache> &cache);
+  explicit CmuArcticDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage,
+                        const std::reference_wrapper<Sampler> sampler, const std::shared_ptr<DatasetCache> &cache);
+  ~CmuArcticDataset() = default;
+};
+
+/// \brief Function to create a CmuArcticDataset.
+/// \note The generated dataset has two columns ["audio", "samplerate", "label"].
+/// \param[in] dataset_dir Path to the root directory that contains the dataset.
+/// \param[in] usage Part of dataset of GTZAN, can be "training", "validation", "testing" or "all" (default = "all").
+/// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not
+///     given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()).
+/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
+/// \return Shared pointer to the CmuArcticDataset.
+inline std::shared_ptr<CmuArcticDataset> CmuArctic(const std::string &dataset_dir, const std::string &usage = "all",
+                                           const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(),
+                                           const std::shared_ptr<DatasetCache> &cache = nullptr) {
+  return std::make_shared<CmuArcticDataset>(StringToChar(dataset_dir), StringToChar(usage), sampler, cache);
+}
+
+
+/// \brief Function to create a CmuArcticDataset.
+/// \note The generated dataset has two columns ["audio", "samplerate", "label"].
+/// \param[in] dataset_dir Path to the root directory that contains the dataset.
+/// \param[in] usage Part of dataset of GTZAN, can be "training", "validation", "testing" or "all" (default = "all").
+/// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset.
+/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
+/// \return Shared pointer to the CmuArcticDataset.
+inline std::shared_ptr<CmuArcticDataset> CmuArctic(const std::string &dataset_dir, const std::string &usage,
+                                           const Sampler *sampler,
+                                           const std::shared_ptr<DatasetCache> &cache = nullptr) {
+  return std::make_shared<CmuArcticDataset>(StringToChar(dataset_dir), StringToChar(usage), sampler, cache);
+}
+
+
+/// \brief Function to create a CmuArcticDataset.
+/// \note The generated dataset has two columns ["audio", "samplerate", "label"].
+/// \param[in] dataset_dir Path to the root directory that contains the dataset.
+/// \param[in] usage Part of dataset of GTZAN, can be "training", "validation", "testing" or "all" (default = "all").
+/// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset.
+/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
+/// \return Shared pointer to the CmuArcticDataset.
+inline std::shared_ptr<CmuArcticDataset> CmuArctic(const std::string &dataset_dir, const std::string &usage,
+                                           const std::reference_wrapper<Sampler> sampler,
+                                           const std::shared_ptr<DatasetCache> &cache = nullptr) {
+  return std::make_shared<CmuArcticDataset>(StringToChar(dataset_dir), StringToChar(usage), sampler, cache);
+}
+
+
 class RandomDataDataset : public Dataset {
  public:
   RandomDataDataset(const int32_t &total_rows, std::shared_ptr<SchemaObj> schema,
@@ -1519,60 +1631,6 @@ std::shared_ptr<RandomDataDataset> RandomData(const int32_t &total_rows = 0, con
   return ds;
 }
 
-class LibriSpeechDataset : public Dataset {
- public:
-  explicit LibriSpeechDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage,
-                        const std::shared_ptr<Sampler> &sampler, const std::shared_ptr<DatasetCache> &cache);
-  explicit LibriSpeechDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage, const Sampler *sampler,
-                        const std::shared_ptr<DatasetCache> &cache);
-  explicit LibriSpeechDataset(const std::vector<char> &dataset_dir, const std::vector<char> &usage,
-                        const std::reference_wrapper<Sampler> sampler, const std::shared_ptr<DatasetCache> &cache);
-  ~LibriSpeechDataset() = default;
-};
-
-/// \brief Function to create a LibriSpeechDataset.
-/// \note The generated dataset has two columns ["audio", "samplerate", "label"].
-/// \param[in] dataset_dir Path to the root directory that contains the dataset.
-/// \param[in] usage Part of dataset of LibriSpeech, can be "training", "validation", "testing" or "all" (default = "all").
-/// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not
-///     given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()).
-/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
-/// \return Shared pointer to the LibriSpeechDataset.
-inline std::shared_ptr<LibriSpeechDataset> LibriSpeech(const std::string &dataset_dir, const std::string &usage = "all",
-                                           const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(),
-                                           const std::shared_ptr<DatasetCache> &cache = nullptr) {
-  return std::make_shared<LibriSpeechDataset>(StringToChar(dataset_dir), StringToChar(usage), sampler, cache);
-}
-
-
-/// \brief Function to create a LibriSpeechDataset.
-/// \note The generated dataset has two columns ["audio", "samplerate", "label"].
-/// \param[in] dataset_dir Path to the root directory that contains the dataset.
-/// \param[in] usage Part of dataset of LibriSpeech, can be "training", "validation", "testing" or "all" (default = "all").
-/// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset.
-/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
-/// \return Shared pointer to the LibriSpeechDataset.
-inline std::shared_ptr<LibriSpeechDataset> LibriSpeech(const std::string &dataset_dir, const std::string &usage,
-                                           const Sampler *sampler,
-                                           const std::shared_ptr<DatasetCache> &cache = nullptr) {
-  return std::make_shared<LibriSpeechDataset>(StringToChar(dataset_dir), StringToChar(usage), sampler, cache);
-}
-
-
-/// \brief Function to create a LibriSpeechDataset.
-/// \note The generated dataset has two columns ["audio", "samplerate", "label"].
-/// \param[in] dataset_dir Path to the root directory that contains the dataset.
-/// \param[in] usage Part of dataset of LibriSpeech, can be "training", "validation", "testing" or "all" (default = "all").
-/// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset.
-/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
-/// \return Shared pointer to the LibriSpeechDataset.
-inline std::shared_ptr<LibriSpeechDataset> LibriSpeech(const std::string &dataset_dir, const std::string &usage,
-                                           const std::reference_wrapper<Sampler> sampler,
-                                           const std::shared_ptr<DatasetCache> &cache = nullptr) {
-  return std::make_shared<LibriSpeechDataset>(StringToChar(dataset_dir), StringToChar(usage), sampler, cache);
-}
-
-
 class TextFileDataset : public Dataset {
  public:
   explicit TextFileDataset(const std::vector<std::vector<char>> &dataset_files, int64_t num_samples,
diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/samplers.h b/mindspore/ccsrc/minddata/dataset/include/dataset/samplers.h
index 5eb5ca2eec0..8bb241fdb3a 100644
--- a/mindspore/ccsrc/minddata/dataset/include/dataset/samplers.h
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/samplers.h
@@ -37,11 +37,12 @@ class Sampler : std::enable_shared_from_this<Sampler> {
   friend class CLUEDataset;
   friend class CocoDataset;
   friend class CSVDataset;
+  friend class FlickrDataset;
   friend class ImageFolderDataset;
   friend class ManifestDataset;
-  friend class LibriSpeechDataset;
   friend class MindDataDataset;
   friend class MnistDataset;
+  friend class CmuArcticDataset;
   friend class RandomDataDataset;
   friend class TextFileDataset;
   friend class TFRecordDataset;
diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/vision.h b/mindspore/ccsrc/minddata/dataset/include/dataset/vision.h
index 0af4b98efbf..c16b6e9e22b 100644
--- a/mindspore/ccsrc/minddata/dataset/include/dataset/vision.h
+++ b/mindspore/ccsrc/minddata/dataset/include/dataset/vision.h
@@ -57,7 +57,31 @@ class AutoContrast final : public TensorTransform {
   std::shared_ptr<Data> data_;
 };
 
-/// \brief Apply a given image transform on a random selection of bounding box regions of a given image.
+/// \brief AdjustGamma TensorTransform.
+/// \notes Apply gamma correction on input image.
+class AdjustGamma final : public TensorTransform {
+ public:
+  /// \brief Constructor.
+  /// \param[in] gamma Non negative real number, which makes the output image pixel value
+  ///     exponential in relation to the input image pixel value.
+  /// \param[in] gain The constant multiplier.
+  explicit AdjustGamma(float gamma, float gain = 1);
+
+  /// \brief Destructor.
+  ~AdjustGamma() = default;
+
+ protected:
+  /// \brief Function to convert TensorTransform object into a TensorOperation object.
+  /// \return Shared pointer to TensorOperation object.
+  std::shared_ptr<TensorOperation> Parse() override;
+
+ private:
+  struct Data;
+  std::shared_ptr<Data> data_;
+};
+
+/// \brief BoundingBoxAugment TensorTransform.
+/// \note  Apply a given image transform on a random selection of bounding box regions of a given image.
 class BoundingBoxAugment final : public TensorTransform {
  public:
   /// \brief Constructor.
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt
index 8ef4bf82d87..47c4c2c8f71 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt
@@ -6,6 +6,7 @@ if(ENABLE_ACL)
     add_subdirectory(dvpp)
 endif()
 add_library(kernels-image OBJECT
+    adjust_gamma_op.cc
     affine_op.cc
     auto_contrast_op.cc
     bounding_box.cc
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/adjust_gamma_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/adjust_gamma_op.cc
index 338d257d547..3698482fe56 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/adjust_gamma_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/adjust_gamma_op.cc
@@ -15,21 +15,21 @@
  */
 
 #include "minddata/dataset/kernels/image/adjust_gamma_op.h"
-
+#include <memory>
 #include "minddata/dataset/kernels/data/data_utils.h"
 #include "minddata/dataset/kernels/image/image_utils.h"
 
 namespace mindspore {
 namespace dataset {
 
-constexpr float AdjustGammaOp::kGain = 1.0;
+const float AdjustGammaOp::kGain = 1.0;
 
 Status AdjustGammaOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
   IO_CHECK(input, output);
 
   // typecast
   CHECK_FAIL_RETURN_UNEXPECTED(input->type() != DataType::DE_STRING,
-                               "AdjustGamma: input tensor type should be int, float or double, but got: string.");
+                               "AdjustGamma: input tensor type should be [int, float, double], but got string.");
 
   if (input->type().IsFloat()) {
     std::shared_ptr<Tensor> input_tensor;
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/crop_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/crop_op.cc
index 389452da4e3..46939c4aa32 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/crop_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/crop_op.cc
@@ -41,9 +41,15 @@ Status CropOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector<T
   RETURN_IF_NOT_OK(TensorOp::OutputShape(inputs, outputs));
   outputs.clear();
   TensorShape out = TensorShape{height_, width_};
-  if (inputs[0].Rank() == 2) outputs.emplace_back(out);
-  if (inputs[0].Rank() == 3) outputs.emplace_back(out.AppendDim(inputs[0][2]));
-  if (!outputs.empty()) return Status::OK();
+  if (inputs[0].Rank() == 2) {
+    (void)outputs.emplace_back(out);
+  }
+  if (inputs[0].Rank() == 3) {
+    (void)outputs.emplace_back(out.AppendDim(inputs[0][2]));
+  }
+  if (!outputs.empty()) {
+    return Status::OK();
+  }
   return Status(StatusCode::kMDUnexpectedError,
                 "Crop: invalid input shape, expected 2D or 3D input, but got input dimension is:" +
                   std::to_string(inputs[0].Rank()));
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/hwc_to_chw_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/hwc_to_chw_op.cc
index 45c79a4f91a..5e5d4d16d5c 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/hwc_to_chw_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/hwc_to_chw_op.cc
@@ -31,8 +31,12 @@ Status HwcToChwOp::OutputShape(const std::vector<TensorShape> &inputs, std::vect
   outputs.clear();
   TensorShape in = inputs[0];
   TensorShape out = TensorShape{in[2], in[0], in[1]};
-  if (inputs[0].Rank() == 3) outputs.emplace_back(out);
-  if (!outputs.empty()) return Status::OK();
+  if (inputs[0].Rank() == 3) {
+    (void)outputs.emplace_back(out);
+  }
+  if (!outputs.empty()) {
+    return Status::OK();
+  }
   return Status(
     StatusCode::kMDUnexpectedError,
     "HWC2CHW: invalid input shape, expected 3D input, but got input dimension is:" + std::to_string(inputs[0].Rank()));
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc
index 0478382e27c..1c3b7e35b1a 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc
@@ -189,7 +189,7 @@ Status DecodeCv(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *o
     }
     cv::cvtColor(img_mat, img_mat, static_cast<int>(cv::COLOR_BGR2RGB));
     std::shared_ptr<CVTensor> output_cv;
-    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(img_mat, &output_cv));
+    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(img_mat, 3, &output_cv));
     *output = std::static_pointer_cast<Tensor>(output_cv);
     return Status::OK();
   } catch (const cv::Exception &e) {
@@ -600,7 +600,7 @@ Status CropAndResize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tenso
     if (mode == InterpolationMode::kCubicPil) {
       cv::Mat input_roi = cv_in(roi);
       std::shared_ptr<CVTensor> input_image;
-      RETURN_IF_NOT_OK(CVTensor::CreateFromMat(input_roi, &input_image));
+      RETURN_IF_NOT_OK(CVTensor::CreateFromMat(input_roi, input_cv->Rank(), &input_image));
       LiteMat imIn, imOut;
       std::shared_ptr<Tensor> output_tensor;
       TensorShape new_shape = TensorShape({target_height, target_width, 3});
@@ -676,7 +676,7 @@ Status Rotate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
       // use memcpy and don't compute the new shape since openCV has a rounding problem
       cv::warpAffine(input_img, output_img, rot, bbox.size(), GetCVInterpolationMode(interpolation),
                      cv::BORDER_CONSTANT, fill_color);
-      RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_img, &output_cv));
+      RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_img, input_cv->Rank(), &output_cv));
       RETURN_UNEXPECTED_IF_NULL(output_cv);
     }
     *output = std::static_pointer_cast<Tensor>(output_cv);
@@ -872,6 +872,64 @@ Status AdjustContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tens
   return Status::OK();
 }
 
+Status AdjustGamma(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &gamma,
+                   const float &gain) {
+  try {
+    int num_channels = 1;
+    if (input->Rank() < 2) {
+      RETURN_STATUS_UNEXPECTED("AdjustGamma: image shape is not <...,H,W,C> or <H,W>.");
+    }
+    if (input->Rank() > 2) {
+      num_channels = input->shape()[-1];
+    }
+    if (num_channels != 1 && num_channels != 3) {
+      RETURN_STATUS_UNEXPECTED("AdjustGamma: channel of input image should be 1 or 3.");
+    }
+    if (input->type().IsFloat()) {
+      for (auto itr = input->begin<float>(); itr != input->end<float>(); itr++) {
+        *itr = pow((*itr) * gain, gamma);
+        *itr = std::min(std::max((*itr), 0.0f), 1.0f);
+      }
+      *output = input;
+
+    } else {
+      std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input);
+      if (!input_cv->mat().data) {
+        RETURN_STATUS_UNEXPECTED("AdjustGamma: load image failed.");
+      }
+      cv::Mat input_img = input_cv->mat();
+      std::shared_ptr<CVTensor> output_cv;
+      RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
+      uchar LUT[256] = {};
+      for (int i = 0; i < 256; i++) {
+        float f = i / 255.0;
+        f = pow(f, gamma);
+        LUT[i] = static_cast<uchar>(floor(std::min(f * (255.0 + 1 - 1e-3) * gain, 255.0)));
+      }
+      if (input_img.channels() == 1) {
+        cv::MatIterator_<uchar> it = input_img.begin<uchar>();
+        cv::MatIterator_<uchar> it_end = input_img.end<uchar>();
+        for (; it != it_end; ++it) {
+          *it = LUT[(*it)];
+        }
+      } else {
+        cv::MatIterator_<cv::Vec3b> it = input_img.begin<cv::Vec3b>();
+        cv::MatIterator_<cv::Vec3b> it_end = input_img.end<cv::Vec3b>();
+        for (; it != it_end; ++it) {
+          (*it)[0] = LUT[(*it)[0]];
+          (*it)[1] = LUT[(*it)[1]];
+          (*it)[2] = LUT[(*it)[2]];
+        }
+      }
+      output_cv->mat() = input_img * 1;
+      *output = std::static_pointer_cast<Tensor>(output_cv);
+    }
+  } catch (const cv::Exception &e) {
+    RETURN_STATUS_UNEXPECTED("AdjustGamma: " + std::string(e.what()));
+  }
+  return Status::OK();
+}
+
 Status AutoContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &cutoff,
                     const std::vector<uint32_t> &ignore) {
   try {
@@ -941,7 +999,7 @@ Status AutoContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor
     cv::merge(image_result, result);
     result.convertTo(result, input_cv->mat().type());
     std::shared_ptr<CVTensor> output_cv;
-    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, &output_cv));
+    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, input_cv->Rank(), &output_cv));
     (*output) = std::static_pointer_cast<Tensor>(output_cv);
     RETURN_IF_NOT_OK((*output)->Reshape(input_cv->shape()));
   } catch (const cv::Exception &e) {
@@ -1042,7 +1100,7 @@ Status Equalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *o
     cv::Mat result;
     cv::merge(image_result, result);
     std::shared_ptr<CVTensor> output_cv;
-    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, &output_cv));
+    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, input_cv->Rank(), &output_cv));
     (*output) = std::static_pointer_cast<Tensor>(output_cv);
     RETURN_IF_NOT_OK((*output)->Reshape(input_cv->shape()));
   } catch (const cv::Exception &e) {
@@ -1138,7 +1196,7 @@ Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output
       cv::copyMakeBorder(input_cv->mat(), out_image, pad_top, pad_bottom, pad_left, pad_right, b_type);
     }
     std::shared_ptr<CVTensor> output_cv;
-    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_image, &output_cv));
+    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_image, input_cv->Rank(), &output_cv));
     // pad the dimension if shape information is only 2 dimensional, this is grayscale
     int num_channels = input_cv->shape()[CHANNEL_INDEX];
     if (input_cv->Rank() == DEFAULT_IMAGE_RANK && num_channels == MIN_IMAGE_CHANNELS &&
@@ -1283,7 +1341,7 @@ Status GaussianBlur(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor
     cv::GaussianBlur(input_cv->mat(), output_cv_mat, cv::Size(kernel_x, kernel_y), static_cast<double>(sigma_x),
                      static_cast<double>(sigma_y));
     std::shared_ptr<CVTensor> output_cv;
-    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_cv_mat, &output_cv));
+    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_cv_mat, input_cv->Rank(), &output_cv));
     (*output) = std::static_pointer_cast<Tensor>(output_cv);
     return Status::OK();
   } catch (const cv::Exception &e) {
@@ -1356,8 +1414,9 @@ Status SlicePatches(const std::shared_ptr<Tensor> &input, std::vector<std::share
     for (int i = 0; i < num_height; ++i) {
       for (int j = 0; j < num_width; ++j) {
         std::shared_ptr<CVTensor> patch_cv;
-        cv::Rect patch(j * patch_w, i * patch_h, patch_w, patch_h);
-        RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_img(patch), &patch_cv));
+        cv::Rect rect(j * patch_w, i * patch_h, patch_w, patch_h);
+        cv::Mat patch(out_img(rect));
+        RETURN_IF_NOT_OK(CVTensor::CreateFromMat(patch, input_cv->Rank(), &patch_cv));
         (*output).push_back(std::static_pointer_cast<Tensor>(patch_cv));
       }
     }
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.h b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.h
index a26671db498..6886f274bbd 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.h
@@ -234,6 +234,16 @@ Status AdjustContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tens
 Status AutoContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &cutoff,
                     const std::vector<uint32_t> &ignore);
 
+/// \brief Returns image with gamma correction.
+/// \param[in] input: Tensor of shape <H,W,3>/<H,W,1>/<H,W> in RGB/Grayscale and any OpenCV compatible type,
+///     see CVTensor.
+/// \param[in] gamma: Non negative real number, same as gamma in the equation. gamma larger than 1 make the shadows
+///     darker, while gamma smaller than 1 make dark regions lighter.
+/// \param[in] gain: The constant multiplier.
+/// \param[out] output: Adjusted image of same shape and type.
+Status AdjustGamma(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const float &gamma,
+                   const float &gain);
+
 /// \brief Returns image with adjusted saturation.
 /// \param input: Tensor of shape <H,W,3> in RGB order and any OpenCv compatible type, see CVTensor.
 /// \param alpha: Alpha value to adjust saturation by. Should be a positive number.
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc
index d10828c579c..3e1c6f6fe49 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc
@@ -1015,7 +1015,7 @@ std::vector<std::vector<float>> GetDefaultBoxes(BoxesConfig config) {
   }
   scales.push_back(1.0f);
   std::vector<std::vector<float>> default_boxes;
-  for (int i = 0; i < config.feature_size.size(); i++) {
+  for (auto i = 0; i < config.feature_size.size(); i++) {
     float sk1 = scales[i];
     float sk2 = scales[i + 1];
     float sk3 = sqrt(sk1 * sk2);
@@ -1069,10 +1069,10 @@ void ConvertBoxes(std::vector<std::vector<float>> &boxes, const std::vector<std:
 
 std::vector<int> ApplyNms(const std::vector<std::vector<float>> &all_boxes, std::vector<float> &all_scores, float thres,
                           int max_boxes) {
-  int boxes_num = all_boxes.size();
+  size_t boxes_num = all_boxes.size();
   std::vector<float> areas(boxes_num);
   std::vector<int> order(boxes_num);
-  for (int i = 0; i < boxes_num; i++) {
+  for (auto i = 0; i < boxes_num; i++) {
     if (all_boxes[i].size() < 4) {
       return {};
     }
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/warp_affine.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/warp_affine.cc
index 1099941bffb..2ec3fb0fed3 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/warp_affine.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/warp_affine.cc
@@ -410,7 +410,7 @@ bool WarpAffineBilinear(const LiteMat &src, LiteMat &dst, const LiteMat &M, int
   int *a = &_a[0], *b = a + dst.width_;
   const int SCALE = 1 << 10;
   const int B_SIZE = 64;
-  int16_t WH[B_SIZE * B_SIZE * 2];
+  int16_t *WH = new int16_t[B_SIZE * B_SIZE * 2];
   int16_t A_Ptr[B_SIZE * B_SIZE];
   int r_delta = SCALE / kTabSz / 2;
   int x, y, x1, y1;
@@ -449,7 +449,7 @@ bool WarpAffineBilinear(const LiteMat &src, LiteMat &dst, const LiteMat &M, int
       Remap(src, lite_part, _HW, _matA, borderType, borderValue);
     }
   }
-
+  delete[] WH;
   delete[] _a;
   return true;
 }
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/posterize_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/posterize_op.cc
index de4c4ab5c07..9757ee1c5a3 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/posterize_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/posterize_op.cc
@@ -46,7 +46,8 @@ Status PosterizeOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_pt
                                  input->type().ToString());
   cv::LUT(in_image, lut_vector, output_img);
   std::shared_ptr<CVTensor> result_tensor;
-  RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_img, &result_tensor));
+
+  RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_img, input_cv->Rank(), &result_tensor));
   *output = std::static_pointer_cast<Tensor>(result_tensor);
   return Status::OK();
 }
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/random_color_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_color_op.cc
index 5d1088a80bf..3a7bb7610be 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/random_color_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_color_op.cc
@@ -46,7 +46,7 @@ Status RandomColorOp::Compute(const std::shared_ptr<Tensor> &in, std::shared_ptr
   cv::Mat cv_out;
   cv::merge(temp, 3, cv_out);
   std::shared_ptr<CVTensor> cvt_out;
-  RETURN_IF_NOT_OK(CVTensor::CreateFromMat(cv_out, &cvt_out));
+  RETURN_IF_NOT_OK(CVTensor::CreateFromMat(cv_out, cvt_in->Rank(), &cvt_out));
   if (abs(t - 0.0) < eps) {
     // return grayscale
     *out = std::static_pointer_cast<Tensor>(cvt_out);
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.cc
index b8fc8ef866d..33d209f37a5 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.cc
@@ -61,9 +61,15 @@ Status RandomCropAndResizeOp::OutputShape(const std::vector<TensorShape> &inputs
   RETURN_IF_NOT_OK(TensorOp::OutputShape(inputs, outputs));
   outputs.clear();
   TensorShape out = TensorShape{target_height_, target_width_};
-  if (inputs[0].Rank() == 2) outputs.emplace_back(out);
-  if (inputs[0].Rank() == 3) outputs.emplace_back(out.AppendDim(inputs[0][2]));
-  if (!outputs.empty()) return Status::OK();
+  if (inputs[0].Rank() == 2) {
+    (void)outputs.emplace_back(out);
+  }
+  if (inputs[0].Rank() == 3) {
+    (void)outputs.emplace_back(out.AppendDim(inputs[0][2]));
+  }
+  if (!outputs.empty()) {
+    return Status::OK();
+  }
   return Status(StatusCode::kMDUnexpectedError, "RandomCropAndResize: invalid input shape");
 }
 Status RandomCropAndResizeOp::GetCropBox(int h_in, int w_in, int *x, int *y, int *crop_height, int *crop_width) {
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.cc
index e69fc2ab8b7..561e28b0262 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.cc
@@ -143,9 +143,15 @@ Status RandomCropOp::OutputShape(const std::vector<TensorShape> &inputs, std::ve
   RETURN_IF_NOT_OK(TensorOp::OutputShape(inputs, outputs));
   outputs.clear();
   TensorShape out = TensorShape{crop_height_, crop_width_};
-  if (inputs[0].Rank() == 2) outputs.emplace_back(out);
-  if (inputs[0].Rank() == 3) outputs.emplace_back(out.AppendDim(inputs[0][2]));
-  if (!outputs.empty()) return Status::OK();
+  if (inputs[0].Rank() == 2) {
+    (void)outputs.emplace_back(out);
+  }
+  if (inputs[0].Rank() == 3) {
+    (void)outputs.emplace_back(out.AppendDim(inputs[0][2]));
+  }
+  if (!outputs.empty()) {
+    return Status::OK();
+  }
   return Status(StatusCode::kMDUnexpectedError,
                 "RandomCrop: invalid input shape, expected 2D or 3D input, but got input dimension is:" +
                   std::to_string(inputs[0].Rank()));
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.cc
index 62614b89c10..9e06072fc23 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.cc
@@ -61,9 +61,15 @@ Status ResizeOp::OutputShape(const std::vector<TensorShape> &inputs, std::vector
     outputW = size2_;
   }
   TensorShape out = TensorShape{outputH, outputW};
-  if (inputs[0].Rank() == 2) outputs.emplace_back(out);
-  if (inputs[0].Rank() == 3) outputs.emplace_back(out.AppendDim(inputs[0][2]));
-  if (!outputs.empty()) return Status::OK();
+  if (inputs[0].Rank() == 2) {
+    (void)outputs.emplace_back(out);
+  }
+  if (inputs[0].Rank() == 3) {
+    (void)outputs.emplace_back(out.AppendDim(inputs[0][2]));
+  }
+  if (!outputs.empty()) {
+    return Status::OK();
+  }
   return Status(StatusCode::kMDUnexpectedError, "Resize: invalid input wrong shape.");
 }
 }  // namespace dataset
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/sharpness_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/sharpness_op.cc
index 8dd690d2c25..b24359089ac 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/sharpness_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/sharpness_op.cc
@@ -63,7 +63,7 @@ Status SharpnessOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_pt
     cv::addWeighted(input_img, alpha_, result, 1.0 - alpha_, 0.0, result);
 
     std::shared_ptr<CVTensor> output_cv;
-    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, &output_cv));
+    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, input_cv->Rank(), &output_cv));
     RETURN_UNEXPECTED_IF_NULL(output_cv);
 
     *output = std::static_pointer_cast<Tensor>(output_cv);
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.cc
index b54d15dd0cf..237dc590dcc 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.cc
@@ -74,7 +74,8 @@ Status SoftDvppDecodeRandomCropResizeJpegOp::Compute(const std::shared_ptr<Tenso
     error_info += std::to_string(ret) + ", please check the log information for more details.";
     CHECK_FAIL_RETURN_UNEXPECTED(ret == 0, error_info);
     std::shared_ptr<CVTensor> cv_tensor = nullptr;
-    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_rgb_img, &cv_tensor));
+
+    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_rgb_img, 3, &cv_tensor));
     *output = std::static_pointer_cast<Tensor>(cv_tensor);
   } catch (const cv::Exception &e) {
     std::string error = "SoftDvppDecodeRandomCropResizeJpeg:" + std::string(e.what());
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.cc
index 0a8687d352c..211d706bf51 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.cc
@@ -66,7 +66,8 @@ Status SoftDvppDecodeResizeJpegOp::Compute(const std::shared_ptr<Tensor> &input,
     error_info += std::to_string(ret) + ", please check the log information for more details.";
     CHECK_FAIL_RETURN_UNEXPECTED(ret == 0, error_info);
     std::shared_ptr<CVTensor> cv_tensor = nullptr;
-    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_rgb_img, &cv_tensor));
+
+    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_rgb_img, 3, &cv_tensor));
     *output = std::static_pointer_cast<Tensor>(cv_tensor);
   } catch (const cv::Exception &e) {
     std::string error = "SoftDvppDecodeResizeJpeg:" + std::string(e.what());
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/solarize_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/solarize_op.cc
index e8ee2c85cb6..a8762e1af8a 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/solarize_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/solarize_op.cc
@@ -41,7 +41,7 @@ Status SolarizeOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr
 
     std::shared_ptr<CVTensor> mask_mat_tensor;
     std::shared_ptr<CVTensor> output_cv_tensor;
-    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(input_cv->mat(), &mask_mat_tensor));
+    RETURN_IF_NOT_OK(CVTensor::CreateFromMat(input_img, input_cv->Rank(), &mask_mat_tensor));
 
     RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv_tensor));
     RETURN_UNEXPECTED_IF_NULL(mask_mat_tensor);
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.h
index 435876ad947..8fa83efa91c 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.h
@@ -49,8 +49,8 @@ class UniformAugOp : public TensorOp {
   std::string Name() const override { return kUniformAugOp; }
 
  private:
-  int32_t num_ops_;
   std::vector<std::shared_ptr<TensorOp>> tensor_op_list_;
+  int32_t num_ops_;
   std::mt19937 rnd_;
 };
 }  // namespace dataset
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc
index 26542868c9a..ffb398c61ac 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc
@@ -135,6 +135,13 @@ Status FillOperation::to_json(nlohmann::json *out_json) {
   return Status::OK();
 }
 
+Status FillOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
+  std::shared_ptr<Tensor> fill_value;
+  RETURN_IF_NOT_OK(Tensor::from_json(op_params, &fill_value));
+  *operation = std::make_shared<transforms::FillOperation>(fill_value);
+  return Status::OK();
+}
+
 // MaskOperation
 MaskOperation::MaskOperation(RelationalOp op, const std::shared_ptr<Tensor> &constant, DataType dtype)
     : op_(op), constant_(constant), dtype_(dtype) {}
@@ -173,6 +180,13 @@ Status OneHotOperation::to_json(nlohmann::json *out_json) {
   return Status::OK();
 }
 
+Status OneHotOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("num_classes") != op_params.end(), "Failed tofind num_classes");
+  int32_t num_classes = op_params["num_classes"];
+  *operation = std::make_shared<transforms::OneHotOperation>(num_classes);
+  return Status::OK();
+}
+
 #ifndef ENABLE_ANDROID
 // PadEndOperation
 PadEndOperation::PadEndOperation(const TensorShape &pad_shape, const std::shared_ptr<Tensor> &pad_value)
@@ -273,6 +287,13 @@ Status TypeCastOperation::to_json(nlohmann::json *out_json) {
   return Status::OK();
 }
 
+Status TypeCastOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("data_type") != op_params.end(), "Failed tofind data_type");
+  std::string data_type = op_params["data_type"];
+  *operation = std::make_shared<transforms::TypeCastOperation>(data_type);
+  return Status::OK();
+}
+
 #ifndef ENABLE_ANDROID
 // UniqueOperation
 Status UniqueOperation::ValidateParams() { return Status::OK(); }
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.h
index f0c060529e8..f4be1173d6a 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.h
@@ -27,6 +27,10 @@
 
 namespace mindspore {
 namespace dataset {
+
+// Transform operations for performing data transformation.
+namespace transforms {
+
 // Char arrays storing name of corresponding classes (in alphabetical order)
 constexpr char kComposeOperation[] = "Compose";
 constexpr char kConcatenateOperation[] = "Concatenate";
@@ -42,9 +46,6 @@ constexpr char kRandomChoiceOperation[] = "RandomChoice";
 constexpr char kTypeCastOperation[] = "TypeCast";
 constexpr char kUniqueOperation[] = "Unique";
 constexpr char kPluginOperation[] = "Plugin";
-
-// Transform operations for performing data transformation.
-namespace transforms {
 /* ####################################### Derived TensorOperation classes ################################# */
 
 class ComposeOperation : public TensorOperation {
@@ -109,6 +110,8 @@ class FillOperation : public TensorOperation {
 
   Status to_json(nlohmann::json *out_json) override;
 
+  static Status from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
+
  private:
   std::shared_ptr<Tensor> fill_value_;
 };
@@ -145,6 +148,8 @@ class OneHotOperation : public TensorOperation {
 
   Status to_json(nlohmann::json *out_json) override;
 
+  static Status from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
+
  private:
   int32_t num_classes_;
 };
@@ -248,6 +253,8 @@ class TypeCastOperation : public TensorOperation {
 
   Status to_json(nlohmann::json *out_json) override;
 
+  static Status from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
+
  private:
   DataType data_type_;
 };
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.cc
index 668337777c2..0bc911024f9 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.cc
@@ -38,6 +38,11 @@ Status ValidateFloatScalarPositive(const std::string &op_name, const std::string
   return Status::OK();
 }
 
+Status ValidateFloatScalarNonNegative(const std::string &op_name, const std::string &scalar_name, float scalar) {
+  RETURN_IF_NOT_OK(ValidateScalar(op_name, scalar_name, scalar, {0}, false));
+  return Status::OK();
+}
+
 Status ValidateVectorFillvalue(const std::string &op_name, const std::vector<uint8_t> &fill_value) {
   if (fill_value.empty() || (fill_value.size() != 1 && fill_value.size() != 3)) {
     std::string err_msg =
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.h
index d420377bb0e..72bbaf570e3 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.h
@@ -36,6 +36,9 @@ Status ValidateIntScalarPositive(const std::string &op_name, const std::string &
 // Helper function to positive float scalar
 Status ValidateFloatScalarPositive(const std::string &op_name, const std::string &scalar_name, float scalar);
 
+// Helper function to non-negative float scalar
+Status ValidateFloatScalarNonNegative(const std::string &op_name, const std::string &scalar_name, float scalar);
+
 // Helper function to validate scalar
 template <typename T>
 Status ValidateScalar(const std::string &op_name, const std::string &scalar_name, const T scalar,
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/CMakeLists.txt
index d46a9bfe52b..7a241b89ed3 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/CMakeLists.txt
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/CMakeLists.txt
@@ -2,6 +2,7 @@ file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc"
 set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
 
 set(DATASET_KERNELS_IR_VISION_SRC_FILES
+        adjust_gamma_ir.cc
         affine_ir.cc
         auto_contrast_ir.cc
         bounding_box_augment_ir.cc
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/adjust_gamma_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/adjust_gamma_ir.cc
index 52c75289141..8b81888f965 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/adjust_gamma_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/adjust_gamma_ir.cc
@@ -13,6 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#include <algorithm>
+
 #include "minddata/dataset/kernels/ir/vision/adjust_gamma_ir.h"
 
 #ifndef ENABLE_ANDROID
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/affine_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/affine_ir.cc
index 30fc14dce81..cc05c637bb3 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/affine_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/affine_ir.cc
@@ -82,12 +82,12 @@ Status AffineOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status AffineOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Fail to find degrees");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("translate") != op_params.end(), "Fail to find translate");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Fail to find scale");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("shear") != op_params.end(), "Fail to find shear");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Fail to find resample");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Fail to find fill_value");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Failed to find degrees");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("translate") != op_params.end(), "Failed to find translate");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Failed to find scale");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("shear") != op_params.end(), "Failed to find shear");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Failed to find resample");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value");
   float_t degrees = op_params["degrees"];
   std::vector<float> translation = op_params["translate"];
   float scale = op_params["scale"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/auto_contrast_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/auto_contrast_ir.cc
index 8cf5bcb36cc..93c7cdfd589 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/auto_contrast_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/auto_contrast_ir.cc
@@ -68,8 +68,8 @@ Status AutoContrastOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status AutoContrastOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("cutoff") != op_params.end(), "Fail to find cutoff");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ignore") != op_params.end(), "Fail to find ignore");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("cutoff") != op_params.end(), "Failed to find cutoff");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ignore") != op_params.end(), "Failed to find ignore");
   float cutoff = op_params["cutoff"];
   std::vector<uint32_t> ignore = op_params["ignore"];
   *operation = std::make_shared<vision::AutoContrastOperation>(cutoff, ignore);
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/center_crop_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/center_crop_ir.cc
index 00b4d72cb3e..174c1bf9dbd 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/center_crop_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/center_crop_ir.cc
@@ -55,7 +55,7 @@ Status CenterCropOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status CenterCropOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
   std::vector<int32_t> size = op_params["size"];
   *operation = std::make_shared<CenterCropOperation>(size);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.cc
index e46d6682383..db5ad3478cf 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.cc
@@ -63,6 +63,21 @@ std::shared_ptr<TensorOp> CropOperation::Build() {
   std::shared_ptr<CropOp> tensor_op = std::make_shared<CropOp>(y, x, height, width);
   return tensor_op;
 }
+
+Status CropOperation::to_json(nlohmann::json *out_json) {
+  (*out_json)["coordinates"] = coordinates_;
+  (*out_json)["size"] = size_;
+  return Status::OK();
+}
+
+Status CropOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("coordinates") != op_params.end(), "Failed to find coordinates");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
+  std::vector<int32_t> coordinates = op_params["coordinates"];
+  std::vector<int32_t> size = op_params["size"];
+  *operation = std::make_shared<CropOperation>(coordinates, size);
+  return Status::OK();
+}
 }  // namespace vision
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.h
index 21388f9f301..170323c0c9c 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.h
@@ -47,6 +47,10 @@ class CropOperation : public TensorOperation {
 
   std::string Name() const override;
 
+  Status to_json(nlohmann::json *out_json) override;
+
+  static Status from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
+
  private:
   std::vector<int32_t> coordinates_;
   std::vector<int32_t> size_;
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutmix_batch_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutmix_batch_ir.cc
index a4adfa0d8bd..49df9682d66 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutmix_batch_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutmix_batch_ir.cc
@@ -57,9 +57,9 @@ Status CutMixBatchOperation::to_json(nlohmann::json *out_json) {
 
 Status CutMixBatchOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
   CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("image_batch_format") != op_params.end(),
-                               "Fail to find image_batch_format");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("alpha") != op_params.end(), "Fail to find alpha");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Fail to find prob");
+                               "Failed to find image_batch_format");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("alpha") != op_params.end(), "Failed to find alpha");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Failed to find prob");
   ImageBatchFormat image_batch = static_cast<ImageBatchFormat>(op_params["image_batch_format"]);
   float alpha = op_params["alpha"];
   float prob = op_params["prob"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutout_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutout_ir.cc
index 1b8944fc8bd..50ba03f1d88 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutout_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutout_ir.cc
@@ -53,8 +53,8 @@ Status CutOutOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status CutOutOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("length") != op_params.end(), "Fail to find length");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("num_patches") != op_params.end(), "Fail to find num_patches");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("length") != op_params.end(), "Failed to find length");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("num_patches") != op_params.end(), "Failed to find num_patches");
   int32_t length = op_params["length"];
   int32_t num_patches = op_params["num_patches"];
   *operation = std::make_shared<vision::CutOutOperation>(length, num_patches);
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/decode_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/decode_ir.cc
index d4c478cf3d2..cbc457ed167 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/decode_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/decode_ir.cc
@@ -40,7 +40,7 @@ Status DecodeOperation::to_json(nlohmann::json *out_json) {
   return Status::OK();
 }
 Status DecodeOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("rgb") != op_params.end(), "Fail to find rgb");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("rgb") != op_params.end(), "Failed to find rgb");
   bool rgb = op_params["rgb"];
   *operation = std::make_shared<vision::DecodeOperation>(rgb);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/gaussian_blur_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/gaussian_blur_ir.cc
index b45d8c7d473..88eaaed382b 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/gaussian_blur_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/gaussian_blur_ir.cc
@@ -65,8 +65,8 @@ Status GaussianBlurOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status GaussianBlurOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("kernel_size") != op_params.end(), "Fail to find kernel_size");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("sigma") != op_params.end(), "Fail to find sigma");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("kernel_size") != op_params.end(), "Failed to find kernel_size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("sigma") != op_params.end(), "Failed to find sigma");
   std::vector<int32_t> kernel_size = op_params["kernel_size"];
   std::vector<float> sigma = op_params["sigma"];
   *operation = std::make_shared<vision::GaussianBlurOperation>(kernel_size, sigma);
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/mixup_batch_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/mixup_batch_ir.cc
index 56e8e72878b..fb23c57d20c 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/mixup_batch_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/mixup_batch_ir.cc
@@ -47,7 +47,7 @@ Status MixUpBatchOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status MixUpBatchOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("alpha") != op_params.end(), "Fail to find alpha");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("alpha") != op_params.end(), "Failed to find alpha");
   float alpha = op_params["alpha"];
   *operation = std::make_shared<vision::MixUpBatchOperation>(alpha);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/normalize_pad_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/normalize_pad_ir.cc
index 8095036afb0..7e9b62f0799 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/normalize_pad_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/normalize_pad_ir.cc
@@ -64,9 +64,9 @@ Status NormalizePadOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status NormalizePadOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("mean") != op_params.end(), "Fail to find mean");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("std") != op_params.end(), "Fail to find std");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("dtype") != op_params.end(), "Fail to find dtype");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("mean") != op_params.end(), "Failed to find mean");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("std") != op_params.end(), "Failed to find std");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("dtype") != op_params.end(), "Failed to find dtype");
   std::vector<float> mean = op_params["mean"];
   std::vector<float> std = op_params["std"];
   std::string dtype = op_params["dtype"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/pad_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/pad_ir.cc
index 5cf7a2ff386..3e5499b41db 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/pad_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/pad_ir.cc
@@ -99,9 +99,9 @@ Status PadOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status PadOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding") != op_params.end(), "Fail to find padding");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Fail to find fill_value");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding_mode") != op_params.end(), "Fail to find padding_mode");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding") != op_params.end(), "Failed to find padding");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding_mode") != op_params.end(), "Failed to find padding_mode");
   std::vector<int32_t> padding = op_params["padding"];
   std::vector<uint8_t> fill_value = op_params["fill_value"];
   BorderType padding_mode = static_cast<BorderType>(op_params["padding_mode"]);
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_affine_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_affine_ir.cc
index c77707ff7f5..2c4fc91eedb 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_affine_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_affine_ir.cc
@@ -24,7 +24,6 @@
 namespace mindspore {
 namespace dataset {
 namespace vision {
-
 constexpr size_t dimension_zero = 0;
 constexpr size_t dimension_one = 1;
 constexpr size_t dimension_two = 2;
@@ -157,12 +156,12 @@ Status RandomAffineOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomAffineOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Fail to find degrees");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("translate") != op_params.end(), "Fail to find translate");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Fail to find scale");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("shear") != op_params.end(), "Fail to find shear");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Fail to find resample");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Fail to find fill_value");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Failed to find degrees");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("translate") != op_params.end(), "Failed to find translate");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Failed to find scale");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("shear") != op_params.end(), "Failed to find shear");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Failed to find resample");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value");
   std::vector<float_t> degrees = op_params["degrees"];
   std::vector<float_t> translate_range = op_params["translate"];
   std::vector<float_t> scale_range = op_params["scale"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_adjust_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_adjust_ir.cc
index 53d99f00034..f8e38289b92 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_adjust_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_adjust_ir.cc
@@ -26,7 +26,6 @@
 namespace mindspore {
 namespace dataset {
 namespace vision {
-
 constexpr size_t dimension_zero = 0;
 constexpr size_t dimension_one = 1;
 constexpr size_t size_two = 2;
@@ -96,10 +95,10 @@ Status RandomColorAdjustOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomColorAdjustOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("brightness") != op_params.end(), "Fail to find brightness");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("contrast") != op_params.end(), "Fail to find contrast");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("saturation") != op_params.end(), "Fail to find saturation");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("hue") != op_params.end(), "Fail to find hue");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("brightness") != op_params.end(), "Failed to find brightness");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("contrast") != op_params.end(), "Failed to find contrast");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("saturation") != op_params.end(), "Failed to find saturation");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("hue") != op_params.end(), "Failed to find hue");
   std::vector<float> brightness = op_params["brightness"];
   std::vector<float> contrast = op_params["contrast"];
   std::vector<float> saturation = op_params["saturation"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_ir.cc
index d70e4715b22..384945c985a 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_ir.cc
@@ -64,7 +64,7 @@ Status RandomColorOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomColorOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Fail to find degrees");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Failed to find degrees");
   std::vector<float> degrees = op_params["degrees"];
   CHECK_FAIL_RETURN_UNEXPECTED(degrees.size() == 2, "The number of degrees should be 2");
   float t_lb = degrees[0];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_decode_resize_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_decode_resize_ir.cc
index d2008c0018b..e9d2337662f 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_decode_resize_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_decode_resize_ir.cc
@@ -79,11 +79,11 @@ Status RandomCropDecodeResizeOperation::to_json(nlohmann::json *out_json) {
 
 Status RandomCropDecodeResizeOperation::from_json(nlohmann::json op_params,
                                                   std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Fail to find scale");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Fail to find ratio");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Fail to find interpolation");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Fail to find max_attempts");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Failed to find scale");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Failed to find ratio");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Failed to find interpolation");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Failed to find max_attempts");
   std::vector<int32_t> size = op_params["size"];
   std::vector<float> scale = op_params["scale"];
   std::vector<float> ratio = op_params["ratio"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_ir.cc
index 3dc38d3eec7..19611028949 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_ir.cc
@@ -119,11 +119,11 @@ Status RandomCropOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomCropOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding") != op_params.end(), "Fail to find padding");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("pad_if_needed") != op_params.end(), "Fail to find pad_if_needed");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Fail to find fill_value");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding_mode") != op_params.end(), "Fail to find padding_mode");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding") != op_params.end(), "Failed to find padding");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("pad_if_needed") != op_params.end(), "Failed to find pad_if_needed");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding_mode") != op_params.end(), "Failed to find padding_mode");
   std::vector<int32_t> size = op_params["size"];
   std::vector<int32_t> padding = op_params["padding"];
   bool pad_if_needed = op_params["pad_if_needed"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_with_bbox_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_with_bbox_ir.cc
index c264f011fcc..2329dffae52 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_with_bbox_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_with_bbox_ir.cc
@@ -120,11 +120,11 @@ Status RandomCropWithBBoxOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomCropWithBBoxOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding") != op_params.end(), "Fail to find padding");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("pad_if_needed") != op_params.end(), "Fail to find pad_if_needed");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Fail to find fill_value");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding_mode") != op_params.end(), "Fail to find padding_mode");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding") != op_params.end(), "Failed to find padding");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("pad_if_needed") != op_params.end(), "Failed to find pad_if_needed");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding_mode") != op_params.end(), "Failed to find padding_mode");
   std::vector<int32_t> size = op_params["size"];
   std::vector<int32_t> padding = op_params["padding"];
   bool pad_if_needed = op_params["pad_if_needed"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_ir.cc
index e6aa5e199de..5654905da25 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_ir.cc
@@ -50,7 +50,7 @@ Status RandomHorizontalFlipOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomHorizontalFlipOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Fail to find prob");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Failed to find prob");
   float prob = op_params["prob"];
   *operation = std::make_shared<vision::RandomHorizontalFlipOperation>(prob);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_with_bbox_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_with_bbox_ir.cc
index aec39374744..703f737a218 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_with_bbox_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_with_bbox_ir.cc
@@ -53,7 +53,7 @@ Status RandomHorizontalFlipWithBBoxOperation::to_json(nlohmann::json *out_json)
 
 Status RandomHorizontalFlipWithBBoxOperation::from_json(nlohmann::json op_params,
                                                         std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Fail to find prob");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Failed to find prob");
   float prob = op_params["prob"];
   *operation = std::make_shared<vision::RandomHorizontalFlipWithBBoxOperation>(prob);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_posterize_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_posterize_ir.cc
index 174ad1fa8fe..cf95b7affd2 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_posterize_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_posterize_ir.cc
@@ -81,7 +81,7 @@ Status RandomPosterizeOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomPosterizeOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("bits") != op_params.end(), "Fail to find bits");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("bits") != op_params.end(), "Failed to find bits");
   std::vector<uint8_t> bit_range = op_params["bits"];
   *operation = std::make_shared<vision::RandomPosterizeOperation>(bit_range);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_ir.cc
index 80e6d79a913..c4542b534ce 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_ir.cc
@@ -64,7 +64,7 @@ Status RandomResizeOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomResizeOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
   std::vector<int32_t> size = op_params["size"];
   *operation = std::make_shared<vision::RandomResizeOperation>(size);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_with_bbox_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_with_bbox_ir.cc
index 4dfeddb5a00..46e6b568335 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_with_bbox_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_with_bbox_ir.cc
@@ -65,7 +65,7 @@ Status RandomResizeWithBBoxOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomResizeWithBBoxOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
   std::vector<int32_t> size = op_params["size"];
   *operation = std::make_shared<vision::RandomResizeWithBBoxOperation>(size);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_ir.cc
index c2f04243e47..535537851d0 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_ir.cc
@@ -90,11 +90,11 @@ Status RandomResizedCropOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomResizedCropOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Fail to find scale");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Fail to find ratio");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Fail to find interpolation");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Fail to find max_attempts");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Failed to find scale");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Failed to find ratio");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Failed to find interpolation");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Failed to find max_attempts");
   std::vector<int32_t> size = op_params["size"];
   std::vector<float> scale = op_params["scale"];
   std::vector<float> ratio = op_params["ratio"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_with_bbox_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_with_bbox_ir.cc
index 252e29015e5..e33d4dfc02c 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_with_bbox_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_with_bbox_ir.cc
@@ -86,11 +86,11 @@ Status RandomResizedCropWithBBoxOperation::to_json(nlohmann::json *out_json) {
 
 Status RandomResizedCropWithBBoxOperation::from_json(nlohmann::json op_params,
                                                      std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Fail to find scale");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Fail to find ratio");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Fail to find interpolation");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Fail to find max_attempts");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Failed to find scale");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Failed to find ratio");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Failed to find interpolation");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Failed to find max_attempts");
   std::vector<int32_t> size = op_params["size"];
   std::vector<float> scale = op_params["scale"];
   std::vector<float> ratio = op_params["ratio"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_rotation_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_rotation_ir.cc
index 91b95ac68f7..4926d3ab574 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_rotation_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_rotation_ir.cc
@@ -119,11 +119,11 @@ Status RandomRotationOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomRotationOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Fail to find degrees");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Fail to find resample");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("expand") != op_params.end(), "Fail to find expand");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("center") != op_params.end(), "Fail to find center");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Fail to find fill_value");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Failed to find degrees");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Failed to find resample");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("expand") != op_params.end(), "Failed to find expand");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("center") != op_params.end(), "Failed to find center");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value");
   std::vector<float> degrees = op_params["degrees"];
   InterpolationMode resample = static_cast<InterpolationMode>(op_params["resample"]);
   bool expand = op_params["expand"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_sharpness_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_sharpness_ir.cc
index a2729d9e7d7..82c88eea9e1 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_sharpness_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_sharpness_ir.cc
@@ -66,7 +66,7 @@ Status RandomSharpnessOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomSharpnessOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Fail to find degrees");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Failed to find degrees");
   std::vector<float> degrees = op_params["degrees"];
   *operation = std::make_shared<vision::RandomSharpnessOperation>(degrees);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_solarize_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_solarize_ir.cc
index 988c6da07ff..fecdb96acac 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_solarize_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_solarize_ir.cc
@@ -47,7 +47,7 @@ Status RandomSolarizeOperation::ValidateParams() {
     MS_LOG(ERROR) << err_msg;
     RETURN_STATUS_SYNTAX_ERROR(err_msg);
   }
-  for (int32_t i = 0; i < threshold_.size(); ++i) {
+  for (size_t i = 0; i < threshold_.size(); ++i) {
     if (threshold_[i] < 0 || threshold_[i] > kThresholdMax) {
       std::string err_msg =
         "RandomSolarize: threshold has to be between 0 and 255, got:" + std::to_string(threshold_[i]);
@@ -74,7 +74,7 @@ Status RandomSolarizeOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomSolarizeOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("threshold") != op_params.end(), "Fail to find threshold");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("threshold") != op_params.end(), "Failed to find threshold");
   std::vector<uint8_t> threshold = op_params["threshold"];
   *operation = std::make_shared<vision::RandomSolarizeOperation>(threshold);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_ir.cc
index 389daf2fd4a..c0442ffb217 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_ir.cc
@@ -51,7 +51,7 @@ Status RandomVerticalFlipOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RandomVerticalFlipOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Fail to find prob");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Failed to find prob");
   float prob = op_params["prob"];
   *operation = std::make_shared<vision::RandomVerticalFlipOperation>(prob);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_with_bbox_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_with_bbox_ir.cc
index 2b3fa07bd0d..5c94515b518 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_with_bbox_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_with_bbox_ir.cc
@@ -54,7 +54,7 @@ Status RandomVerticalFlipWithBBoxOperation::to_json(nlohmann::json *out_json) {
 
 Status RandomVerticalFlipWithBBoxOperation::from_json(nlohmann::json op_params,
                                                       std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Fail to find prob");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Failed to find prob");
   float prob = op_params["prob"];
   *operation = std::make_shared<vision::RandomVerticalFlipWithBBoxOperation>(prob);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rescale_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rescale_ir.cc
index 9c0024943b2..7e61d6212b9 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rescale_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rescale_ir.cc
@@ -57,8 +57,8 @@ Status RescaleOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status RescaleOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("rescale") != op_params.end(), "Fail to find rescale");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("shift") != op_params.end(), "Fail to find shift");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("rescale") != op_params.end(), "Failed to find rescale");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("shift") != op_params.end(), "Failed to find shift");
   float rescale = op_params["rescale"];
   float shift = op_params["shift"];
   *operation = std::make_shared<vision::RescaleOperation>(rescale, shift);
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_ir.cc
index 8aeee7f82cd..50d328745bb 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_ir.cc
@@ -64,8 +64,8 @@ Status ResizeOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status ResizeOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Fail to find interpolation");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Failed to find interpolation");
   std::vector<int32_t> size = op_params["size"];
   InterpolationMode interpolation = static_cast<InterpolationMode>(op_params["interpolation"]);
   *operation = std::make_shared<vision::ResizeOperation>(size, interpolation);
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_preserve_ar_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_preserve_ar_ir.cc
index 5c22e1894d3..48bf6cf8721 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_preserve_ar_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_preserve_ar_ir.cc
@@ -48,9 +48,9 @@ Status ResizePreserveAROperation::to_json(nlohmann::json *out_json) {
 }
 
 Status ResizePreserveAROperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("height") != op_params.end(), "Fail to find height");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("width") != op_params.end(), "Fail to find width");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("img_orientation") != op_params.end(), "Fail to find img_orientation");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("height") != op_params.end(), "Failed to find height");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("width") != op_params.end(), "Failed to find width");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("img_orientation") != op_params.end(), "Failed to find img_orientation");
   int32_t height = op_params["height"];
   int32_t width = op_params["width"];
   int32_t img_orientation = op_params["img_orientation"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_with_bbox_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_with_bbox_ir.cc
index 2ed1877a027..05503c348e3 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_with_bbox_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_with_bbox_ir.cc
@@ -65,8 +65,8 @@ Status ResizeWithBBoxOperation::to_json(nlohmann::json *out_json) {
 }
 
 Status ResizeWithBBoxOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Fail to find interpolation");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Failed to find interpolation");
   std::vector<int32_t> size = op_params["size"];
   InterpolationMode interpolation = static_cast<InterpolationMode>(op_params["interpolation"]);
   *operation = std::make_shared<vision::ResizeWithBBoxOperation>(size, interpolation);
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.cc
index 8c14f5d88c7..4f23dcffb07 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.cc
@@ -37,6 +37,11 @@ Status RgbToBgrOperation::ValidateParams() { return Status::OK(); }
 
 std::shared_ptr<TensorOp> RgbToBgrOperation::Build() { return std::make_shared<RgbToBgrOp>(); }
 
+Status RgbToBgrOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
+  *operation = std::make_shared<vision::RgbToBgrOperation>();
+  return Status::OK();
+}
+
 }  // namespace vision
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.h
index 339e68a4d7d..82aac13c06a 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.h
@@ -46,6 +46,8 @@ class RgbToBgrOperation : public TensorOperation {
   Status ValidateParams() override;
 
   std::string Name() const override;
+
+  static Status from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
 };
 
 }  // namespace vision
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.cc
index c1c1e19c228..b041ecbc902 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.cc
@@ -34,6 +34,12 @@ std::string RgbToGrayOperation::Name() const { return kRgbToGrayOperation; }
 Status RgbToGrayOperation::ValidateParams() { return Status::OK(); }
 
 std::shared_ptr<TensorOp> RgbToGrayOperation::Build() { return std::make_shared<RgbToGrayOp>(); }
+
+Status RgbToGrayOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
+  *operation = std::make_shared<vision::RgbToGrayOperation>();
+  return Status::OK();
+}
+
 }  // namespace vision
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.h
index f1a0135923e..45c6630073a 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.h
@@ -46,6 +46,8 @@ class RgbToGrayOperation : public TensorOperation {
   Status ValidateParams() override;
 
   std::string Name() const override;
+
+  static Status from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
 };
 
 }  // namespace vision
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgba_to_bgr_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgba_to_bgr_ir.cc
index 394e3c7efd0..1e402873a4f 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgba_to_bgr_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgba_to_bgr_ir.cc
@@ -25,7 +25,6 @@
 
 namespace mindspore {
 namespace dataset {
-
 namespace vision {
 #ifndef ENABLE_ANDROID
 // RgbaToBgrOperation.
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rotate_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rotate_ir.cc
index 24a6ccf4c46..ff0a3d548e9 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rotate_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rotate_ir.cc
@@ -85,11 +85,11 @@ Status RotateOperation::to_json(nlohmann::json *out_json) {
 
 Status RotateOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
 #ifndef ENABLE_ANDROID
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degree") != op_params.end(), "Fail to find degree");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Fail to find resample");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("expand") != op_params.end(), "Fail to find expand");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("center") != op_params.end(), "Fail to find center");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Fail to find fill_value");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degree") != op_params.end(), "Failed to find degree");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Failed to find resample");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("expand") != op_params.end(), "Failed to find expand");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("center") != op_params.end(), "Failed to find center");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value");
   float degrees = op_params["degree"];
   InterpolationMode resample = static_cast<InterpolationMode>(op_params["resample"]);
   bool expand = op_params["expand"];
@@ -97,7 +97,7 @@ Status RotateOperation::from_json(nlohmann::json op_params, std::shared_ptr<Tens
   std::vector<uint8_t> fill_value = op_params["fill_value"];
   *operation = std::make_shared<vision::RotateOperation>(degrees, resample, expand, center, fill_value);
 #else
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("angle_id") != op_params.end(), "Fail to find angle_id");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("angle_id") != op_params.end(), "Failed to find angle_id");
   uint64_t angle_id = op_params["angle_id"];
   std::shared_ptr<RotateOperation> rotate_operation = std::make_shared<vision::RotateOperation>();
   rotate_operation.get()->setAngle(angle_id);
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.cc
index c8fefe54389..0edaa28ba53 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.cc
@@ -57,6 +57,18 @@ Status SlicePatchesOperation::to_json(nlohmann::json *out_json) {
   return Status::OK();
 }
 
+Status SlicePatchesOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("num_height") != op_params.end(), "Failed to find num_height");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("num_width") != op_params.end(), "Failed to find num_width");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("slice_mode") != op_params.end(), "Failed to find slice_mode");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value");
+  int32_t num_height = op_params["num_height"];
+  int32_t num_width = op_params["num_width"];
+  SliceMode slice_mode = static_cast<SliceMode>(op_params["slice_mode"]);
+  uint8_t fill_value = op_params["fill_value"];
+  *operation = std::make_shared<vision::SlicePatchesOperation>(num_height, num_width, slice_mode, fill_value);
+  return Status::OK();
+}
 }  // namespace vision
 }  // namespace dataset
 }  // namespace mindspore
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.h
index e65954d3d85..b7b00d86b2d 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.h
@@ -48,6 +48,8 @@ class SlicePatchesOperation : public TensorOperation {
 
   Status to_json(nlohmann::json *out_json) override;
 
+  static Status from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
+
  private:
   int32_t num_height_;
   int32_t num_width_;
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_random_crop_resize_jpeg_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_random_crop_resize_jpeg_ir.cc
index 80e130de420..c939aa426d9 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_random_crop_resize_jpeg_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_random_crop_resize_jpeg_ir.cc
@@ -44,7 +44,7 @@ Status SoftDvppDecodeRandomCropResizeJpegOperation::ValidateParams() {
   RETURN_IF_NOT_OK(ValidateVectorSize("SoftDvppDecodeRandomCropResizeJpeg", size_));
   constexpr int32_t value_one = 1;
   constexpr int32_t value_two = 2;
-  for (int32_t i = 0; i < size_.size(); i++) {
+  for (size_t i = 0; i < size_.size(); i++) {
     if (size_[i] % value_two == value_one) {
       std::string err_msg = "SoftDvppDecodeRandomCropResizeJpeg: size[" + std::to_string(i) +
                             "] must be even values, got: " + std::to_string(size_[i]);
@@ -96,10 +96,10 @@ Status SoftDvppDecodeRandomCropResizeJpegOperation::to_json(nlohmann::json *out_
 
 Status SoftDvppDecodeRandomCropResizeJpegOperation::from_json(nlohmann::json op_params,
                                                               std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Fail to find scale");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Fail to find ratio");
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Fail to find max_attempts");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Failed to find scale");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Failed to find ratio");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Failed to find max_attempts");
   std::vector<int32_t> size = op_params["size"];
   std::vector<float> scale = op_params["scale"];
   std::vector<float> ratio = op_params["ratio"];
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_resize_jpeg_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_resize_jpeg_ir.cc
index c00b0d6ddd1..fc1b320438a 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_resize_jpeg_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_resize_jpeg_ir.cc
@@ -38,7 +38,7 @@ Status SoftDvppDecodeResizeJpegOperation::ValidateParams() {
   RETURN_IF_NOT_OK(ValidateVectorSize("SoftDvppDecodeResizeJpeg", size_));
   constexpr int32_t value_one = 1;
   constexpr int32_t value_two = 2;
-  for (int32_t i = 0; i < size_.size(); i++) {
+  for (size_t i = 0; i < size_.size(); i++) {
     if (size_[i] % value_two == value_one) {
       std::string err_msg = "SoftDvppDecodeResizeJpeg: size[" + std::to_string(i) +
                             "] must be even values, got: " + std::to_string(size_[i]);
@@ -74,7 +74,7 @@ Status SoftDvppDecodeResizeJpegOperation::to_json(nlohmann::json *out_json) {
 
 Status SoftDvppDecodeResizeJpegOperation::from_json(nlohmann::json op_params,
                                                     std::shared_ptr<TensorOperation> *operation) {
-  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size");
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size");
   std::vector<int32_t> size = op_params["size"];
   *operation = std::make_shared<vision::SoftDvppDecodeResizeJpegOperation>(size);
   return Status::OK();
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.cc
index f12774aadd1..42989e66b42 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.cc
@@ -39,6 +39,12 @@ std::shared_ptr<TensorOp> VerticalFlipOperation::Build() {
   std::shared_ptr<VerticalFlipOp> tensor_op = std::make_shared<VerticalFlipOp>();
   return tensor_op;
 }
+
+Status VerticalFlipOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
+  *operation = std::make_shared<vision::VerticalFlipOperation>();
+  return Status::OK();
+}
+
 #endif
 
 }  // namespace vision
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.h
index 35ecf11b683..2c518effba7 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.h
@@ -43,6 +43,8 @@ class VerticalFlipOperation : public TensorOperation {
   Status ValidateParams() override;
 
   std::string Name() const override;
+
+  static Status from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
 };
 
 }  // namespace vision
diff --git a/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h b/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h
index d00a5914820..8c4308d41f6 100644
--- a/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h
+++ b/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h
@@ -53,6 +53,7 @@ namespace dataset {
 constexpr char kTensorOp[] = "TensorOp";
 
 // image
+constexpr char kAdjustGammaOp[] = "AdjustGammaOp";
 constexpr char kAffineOp[] = "AffineOp";
 constexpr char kAutoContrastOp[] = "AutoContrastOp";
 constexpr char kBoundingBoxAugmentOp[] = "BoundingBoxAugmentOp";
@@ -137,7 +138,14 @@ constexpr char kRandomSelectSubpolicyOp[] = "RandomSelectSubpolicyOp";
 constexpr char kSentencepieceTokenizerOp[] = "SentencepieceTokenizerOp";
 
 // audio
+constexpr char kAllpassBiquadOp[] = "AllpassBiquadOp";
+constexpr char kAmplitudeToDBOp[] = "AmplitudeToDBOp";
+constexpr char kAngleOp[] = "AngleOp";
 constexpr char kBandBiquadOp[] = "BandBiquadOp";
+constexpr char kBandpassBiquadOp[] = "BandpassBiquadOp";
+constexpr char kBandrejectBiquadOp[] = "BandrejectBiquadOp";
+constexpr char kBassBiquadOp[] = "BassBiquadOp";
+constexpr char kTimeStretchOp[] = "TimeStretchOp";
 
 // data
 constexpr char kConcatenateOp[] = "ConcatenateOp";
diff --git a/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.cc b/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.cc
index 64c7dacb188..028111bfea2 100644
--- a/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.cc
+++ b/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.cc
@@ -396,6 +396,13 @@ Status ToNumberOperation::to_json(nlohmann::json *out_json) {
   return Status::OK();
 }
 
+Status ToNumberOperation::from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation) {
+  CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("data_type") != op_params.end(), "Failed to find data_type");
+  std::string data_type = op_params["data_type"];
+  *operation = std::make_shared<text::ToNumberOperation>(data_type);
+  return Status::OK();
+}
+
 // TruncateSequencePairOperation
 TruncateSequencePairOperation::TruncateSequencePairOperation(int32_t max_length) : max_length_(max_length) {}
 
diff --git a/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.h b/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.h
index 8b2cee15618..43dbe213584 100644
--- a/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.h
+++ b/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.h
@@ -288,6 +288,8 @@ class ToNumberOperation : public TensorOperation {
 
   Status to_json(nlohmann::json *out_json) override;
 
+  static Status from_json(nlohmann::json op_params, std::shared_ptr<TensorOperation> *operation);
+
  private:
   DataType data_type_;
 };
diff --git a/mindspore/ccsrc/minddata/mindrecord/io/shard_index_generator.cc b/mindspore/ccsrc/minddata/mindrecord/io/shard_index_generator.cc
index 21e223be24e..59a68116912 100644
--- a/mindspore/ccsrc/minddata/mindrecord/io/shard_index_generator.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/io/shard_index_generator.cc
@@ -223,7 +223,7 @@ MSRStatus ShardIndexGenerator::CreateShardNameTable(sqlite3 *db, const std::stri
   sql = "INSERT INTO SHARD_NAME (NAME) VALUES (:SHARD_NAME);";
   sqlite3_stmt *stmt = nullptr;
   if (sqlite3_prepare_v2(db, common::SafeCStr(sql), -1, &stmt, 0) != SQLITE_OK) {
-    if (stmt) {
+    if (stmt != nullptr) {
       (void)sqlite3_finalize(stmt);
     }
     MS_LOG(ERROR) << "SQL error: could not prepare statement, sql: " << sql;
diff --git a/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc b/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc
index aff17e3efc5..f182d503b1e 100644
--- a/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc
+++ b/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc
@@ -877,7 +877,9 @@ std::pair<MSRStatus, std::vector<json>> ShardReader::GetLabels(int page_id, int
       sqlite3_free(errmsg);
     }
     std::vector<json> ret;
-    for (unsigned int i = 0; i < labels_ptr->size(); ++i) ret.emplace_back(json{});
+    for (unsigned int i = 0; i < labels_ptr->size(); ++i) {
+      (void)ret.emplace_back(json{});
+    }
     for (unsigned int i = 0; i < labels_ptr->size(); ++i) {
       json construct_json;
       for (unsigned int j = 0; j < columns.size(); ++j) {
diff --git a/mindspore/ccsrc/pipeline/jit/action.cc b/mindspore/ccsrc/pipeline/jit/action.cc
index 6460b9786dc..df4f77a8f18 100644
--- a/mindspore/ccsrc/pipeline/jit/action.cc
+++ b/mindspore/ccsrc/pipeline/jit/action.cc
@@ -121,6 +121,28 @@ using CompileGraphs = compile::CompileGraphs;
 using abstract::AnalysisResult;
 using mindspore::abstract::AnalysisContextPtr;
 
+inline bool ResetCNodeFromLoad(const AnfNodePtr &node) {
+  if (node->isa<CNode>() && node->cast<CNodePtr>()->get_load_flag()) {
+    // Process partial("DeadNode",args) when the graph is loaded.
+    auto operatorPtr = node->cast<CNodePtr>()->input(0);
+    // Set abstract of switch(c,f,t) to null
+    auto prim = GetValueNode<PrimitivePtr>(operatorPtr);
+    if (IsPrimitiveEquals(prim::kPrimSwitch, prim) || IsPrimitiveEquals(prim::kPrimSwitchLayer, prim)) {
+      node->set_abstract(nullptr);
+      return true;
+    }
+    // Set abstract of switch(c,f,t)() to null
+    prim = GetCNodePrimitive(operatorPtr);
+    if (IsPrimitiveEquals(prim::kPrimSwitch, prim) || IsPrimitiveEquals(prim::kPrimSwitchLayer, prim)) {
+      node->set_abstract(nullptr);
+      return true;
+    }
+    // Previous inferred value
+    return true;
+  }
+  return false;
+}
+
 abstract::AnalysisResult AbstractAnalyze(const ResourcePtr &res, const FuncGraphPtr &func_graph,
                                          const abstract::AbstractBasePtrList &args_spec, bool clear) {
   MS_LOG(DEBUG) << "AbstractAnalyze start";
@@ -133,10 +155,19 @@ abstract::AnalysisResult AbstractAnalyze(const ResourcePtr &res, const FuncGraph
     for (auto &node : manager->all_nodes()) {
       MS_EXCEPTION_IF_NULL(node);
       const AbstractBasePtr &prev_inferred = node->abstract();
-      // Keep previous inferred value for CNode if is loaded from MindIR.
-      if (node->isa<CNode>() && node->cast<CNodePtr>()->get_load_flag()) {
+
+      // AbstractFunction has context,but contexts in cache have been cleaned.
+      if (prev_inferred != nullptr && prev_inferred->isa<abstract::AbstractFunction>()) {
+        node->set_abstract(nullptr);
+        MS_LOG(DEBUG) << "Abstract of node " << node->ToString() << " is set to nullptr";
         continue;
       }
+
+      // Handle previous inferred value for CNode if is loaded from MindIR
+      if (res->is_load() && ResetCNodeFromLoad(node)) {
+        continue;
+      }
+
       // Keep previous inferred value for ValueNode if the inferred value is not AbstractFunction.
       if (!node->isa<ValueNode>() || (prev_inferred != nullptr && prev_inferred->isa<abstract::AbstractFunction>())) {
         node->set_abstract(nullptr);
@@ -200,6 +231,7 @@ const FuncGraphPtr GetLoadedGraph(const ResourcePtr &res) {
     if (graph->has_attr("is_load")) {
       loaded_graph = graph;
       loaded_graph_num += 1;
+      res->set_is_load(true);
     }
   }
   if (loaded_graph_num == 0) {
@@ -218,6 +250,8 @@ void CheckRootInputShapeAndType(const ResourcePtr &res, const FuncGraphPtr &load
   FuncGraphPtr root_graph = *(manager->roots().begin());
   auto root_inputs = root_graph->get_inputs();
   auto loaded_inputs = loaded_graph->get_inputs();
+  MS_LOG(DEBUG) << "root_graph: " << root_graph->ToString();
+  MS_LOG(DEBUG) << "loaded_graph: " << loaded_graph->ToString();
 
   size_t root_inputs_num = root_inputs.size();
   size_t loaded_inputs_num = loaded_inputs.size();
@@ -229,10 +263,18 @@ void CheckRootInputShapeAndType(const ResourcePtr &res, const FuncGraphPtr &load
     auto root_input = root_inputs[index];
     auto loaded_input = loaded_inputs[index];
 
+    MS_LOG(DEBUG) << "root_input[" << index << "]: " << root_input->DebugString(1);
+    MS_LOG(DEBUG) << "loaded_input[" << index << "]: " << loaded_input->DebugString(1);
+    MS_LOG(DEBUG) << "root_input abstract[" << index
+                  << "]: " << (root_input->abstract() ? root_input->abstract()->ToString() : "NULL");
+    MS_LOG(DEBUG) << "loaded_input abstract [" << index
+                  << "]: " << (loaded_input->abstract() ? loaded_input->abstract()->ToString() : "NULL");
+
     auto root_shape = root_input->Shape() == nullptr ? nullptr : dyn_cast<abstract::Shape>(root_input->Shape());
     auto loaded_shape = loaded_input->Shape() == nullptr ? nullptr : dyn_cast<abstract::Shape>(loaded_input->Shape());
     auto root_type = root_input->Type() == nullptr ? nullptr : dyn_cast<Type>(root_input->Type());
     auto loaded_type = loaded_input->Type() == nullptr ? nullptr : dyn_cast<Type>(loaded_input->Type());
+
     MS_EXCEPTION_IF_NULL(root_shape);
     MS_EXCEPTION_IF_NULL(loaded_shape);
     MS_EXCEPTION_IF_NULL(root_type);
@@ -454,6 +496,7 @@ bool AbstractSpecializeAction(const ResourcePtr &res) {
   }
   // Analyze
   AnalysisResult result = AbstractAnalyze(res, func_graph, args_spec);
+
   // The top graph may be replaced by infer, update the top graph when the infer is done
   parse::Parser::UpdateTopFuncGraph(result.context->func_graph());
 
diff --git a/mindspore/ccsrc/pipeline/jit/parse/function_block.cc b/mindspore/ccsrc/pipeline/jit/parse/function_block.cc
index 1ba42b20733..9e9110f2fbf 100644
--- a/mindspore/ccsrc/pipeline/jit/parse/function_block.cc
+++ b/mindspore/ccsrc/pipeline/jit/parse/function_block.cc
@@ -20,6 +20,7 @@
 
 #include <string>
 #include <memory>
+#include <algorithm>
 
 #include "pybind11/pybind11.h"
 #include "pipeline/jit/parse/resolve.h"
@@ -329,10 +330,10 @@ bool FunctionBlock::CollectRemovablePhi(const ParameterPtr &phi) {
 
 // A block should be marked matured if its predecessor blocks have been processed
 void FunctionBlock::Mature() {
-  const auto &graphParamVec = func_graph_->parameters();
-  for (auto &paramItr : graphParamVec) {
-    MS_EXCEPTION_IF_NULL(paramItr);
-    auto param = paramItr->cast<ParameterPtr>();
+  const auto &graph_params = func_graph_->parameters();
+  for (auto &param_itr : graph_params) {
+    MS_EXCEPTION_IF_NULL(param_itr);
+    auto param = param_itr->cast<ParameterPtr>();
     if (phi_nodes_.find(param) != phi_nodes_.cend()) {
       SetPhiArgument(param);
     }
@@ -356,7 +357,7 @@ CNodePtr FunctionBlock::ForceToWhileCond(const AnfNodePtr &cond) {
 }
 
 // Perform a jump from this block to target block
-void FunctionBlock::Jump(const FunctionBlockPtr &target_block, const AnfNodePtr &node) {
+void FunctionBlock::Jump(const FunctionBlockPtr &target_block, const std::vector<AnfNodePtr> &args) {
   MS_EXCEPTION_IF_NULL(target_block);
   if (func_graph_->get_return() != nullptr) {
     MS_LOG(EXCEPTION) << "Failure: have return node! NodeInfo: "
@@ -364,9 +365,7 @@ void FunctionBlock::Jump(const FunctionBlockPtr &target_block, const AnfNodePtr
   }
   std::vector<AnfNodePtr> input_nodes;
   input_nodes.emplace_back(NewValueNode(target_block->func_graph()));
-  if (node != nullptr) {
-    input_nodes.emplace_back(node);
-  }
+  (void)std::copy(args.begin(), args.end(), std::back_inserter(input_nodes));
 
   CNodePtr jump = func_graph_->NewCNodeInOrder(input_nodes);
   jumps_[target_block.get()] = jump;
diff --git a/mindspore/ccsrc/pipeline/jit/parse/function_block.h b/mindspore/ccsrc/pipeline/jit/parse/function_block.h
index b9a26193ceb..ff45747c828 100644
--- a/mindspore/ccsrc/pipeline/jit/parse/function_block.h
+++ b/mindspore/ccsrc/pipeline/jit/parse/function_block.h
@@ -57,7 +57,7 @@ class FunctionBlock : public std::enable_shared_from_this<FunctionBlock> {
   void Mature();
   CNodePtr ForceToBoolNode(const AnfNodePtr &cond);
   CNodePtr ForceToWhileCond(const AnfNodePtr &cond);
-  void Jump(const FunctionBlockPtr &block, const AnfNodePtr &node);
+  void Jump(const FunctionBlockPtr &block, const std::vector<AnfNodePtr> &args);
   AnfNodePtr SearchReplaceNode(const std::string &var, const ParameterPtr &phi);
   void ConditionalJump(AnfNodePtr condNode, const FunctionBlockPtr &trueBlock, const FunctionBlockPtr &falseBlock,
                        bool unroll_loop = true);
diff --git a/mindspore/ccsrc/pipeline/jit/parse/parse.cc b/mindspore/ccsrc/pipeline/jit/parse/parse.cc
index e70ff90493a..37d4cede426 100644
--- a/mindspore/ccsrc/pipeline/jit/parse/parse.cc
+++ b/mindspore/ccsrc/pipeline/jit/parse/parse.cc
@@ -130,6 +130,8 @@ void Parser::BuildMethodMap() {
   expr_method_map_["UnaryOp"] = &Parser::ParseUnaryOp;
   expr_method_map_["Dict"] = &Parser::ParseDict;
   expr_method_map_["Ellipsis"] = &Parser::ParseEllipsis;
+  expr_method_map_["ListComp"] = &Parser::ParseListComp;
+  expr_method_map_["GeneratorExp"] = &Parser::ParseListComp;  // We treat 'GeneratorExp' the same as 'ListComp'.
 }
 
 void Parser::UpdateTopFuncGraph(const FuncGraphPtr &func_graph) { top_func_graph_ = FuncGraphWeakPtr(func_graph); }
@@ -156,8 +158,8 @@ void CheckFuncReturn(const FuncGraphPtr &fn, const std::shared_ptr<ParseAst> &as
     }
     py::object node = ast->GetAstNode();
     py::list ret = ast->CallParserObjMethod(PYTHON_PARSE_GET_LOCATION, node);
-    constexpr auto kMinListSize = 2;
-    if (ret.size() < kMinListSize) {
+    constexpr auto min_list_size = 2;
+    if (ret.size() < min_list_size) {
       MS_LOG(EXCEPTION) << "list size:" << ret.size() << " is less than 2.";
     }
     py::str desc =
@@ -169,18 +171,15 @@ void CheckFuncReturn(const FuncGraphPtr &fn, const std::shared_ptr<ParseAst> &as
 FuncGraphPtr Parser::ParseFuncGraph() {
   // Get ast FunctionDef node
   py::object node = ast_->GetAstNode();
-  FunctionBlockPtr pFnBlock = ParseFunction(node);
+  FunctionBlockPtr fn_block = ParseFunction(node);
   if (errcode() != PARSE_SUCCESS) {
     MS_LOG(ERROR) << "Parse function error, code is " << errcode();
     return nullptr;
   }
-
   RemoveUnnecessaryPhis();
-
-  MS_EXCEPTION_IF_NULL(pFnBlock);
-  CheckFuncReturn(pFnBlock->func_graph(), ast_);
-
-  return pFnBlock->func_graph();
+  MS_EXCEPTION_IF_NULL(fn_block);
+  CheckFuncReturn(fn_block->func_graph(), ast_);
+  return fn_block->func_graph();
 }
 
 void Parser::GenerateArgsNodeForFunction(const FunctionBlockPtr &block, const py::object &fn_node) {
@@ -261,14 +260,14 @@ FunctionBlockPtr Parser::ParseFunction(const py::object &node, const FunctionBlo
   // The node created in the parsefunction context, will inherit the scope created using scope_guard
   ScopeGuard scope_guard(scope);
   TraceGuard trace_guard(data_converter::GetObjKey(ast_->obj())[0], GetLocation(node));
-  FunctionBlockPtr pFunBlock = MakeFunctionBlock(*this);
+  FunctionBlockPtr func_block = MakeFunctionBlock(*this);
   if (block != nullptr) {
-    pFunBlock->AddPrevBlock(block);
+    func_block->AddPrevBlock(block);
   } else {
-    func_graph_ = pFunBlock->func_graph();
+    func_graph_ = func_block->func_graph();
   }
-  pFunBlock->Mature();
-  auto current_fg = pFunBlock->func_graph();
+  func_block->Mature();
+  auto current_fg = func_block->func_graph();
   auto function_name = py::cast<std::string>(python_adapter::GetPyObjAttr(node, "name"));
   MS_LOG(DEBUG) << "The function name is " << function_name;
   current_fg->debug_info()->set_name(function_name);
@@ -286,27 +285,27 @@ FunctionBlockPtr Parser::ParseFunction(const py::object &node, const FunctionBlo
     MS_LOG(ERROR) << "Set flags failed";
     return nullptr;
   }
-  GenerateArgsNodeForFunction(pFunBlock, node);
+  GenerateArgsNodeForFunction(func_block, node);
 
   // When parsing the top graph of construct, save the top graph
   if (GetTopFuncGraph() == nullptr) {
-    UpdateTopFuncGraph(pFunBlock->func_graph());
+    UpdateTopFuncGraph(func_block->func_graph());
   }
 
   // Save the function node to block
-  pFunBlock->WriteVariable(function_name, NewValueNode(current_fg));
+  func_block->WriteVariable(function_name, NewValueNode(current_fg));
 
   py::object funcObj = python_adapter::GetPyObjAttr(node, "body");
-  (void)ParseStatements(pFunBlock, funcObj);
+  (void)ParseStatements(func_block, funcObj);
 
   // Add unused variables as isolate nodes.
-  for (auto &func_block : func_block_list_) {
-    MS_EXCEPTION_IF_NULL(func_block);
-    if (func_block->func_graph()->get_return() != nullptr) {
+  for (auto &func_block_item : func_block_list_) {
+    MS_EXCEPTION_IF_NULL(func_block_item);
+    if (func_block_item->func_graph()->get_return() != nullptr) {
       // Find unused variables.
-      func_block->FindIsolatedNodes();
+      func_block_item->FindIsolatedNodes();
       // Attach all isolated nodes.
-      func_block->AttachIsolatedNodesBeforeReturn();
+      func_block_item->AttachIsolatedNodesBeforeReturn();
     }
   }
 
@@ -315,8 +314,8 @@ FunctionBlockPtr Parser::ParseFunction(const py::object &node, const FunctionBlo
     py::str desc = python_adapter::CallPyModFn(ast_->module(), PYTHON_MOD_GET_OBJECT_DESCRIPTION, node, ret[0], ret[1]);
     MS_EXCEPTION(TypeError) << "Missing return statement in " << desc.cast<std::string>() << ".";
   }
-  GenerateArgsDefaultValueForFunction(pFunBlock, node);
-  return pFunBlock;
+  GenerateArgsDefaultValueForFunction(func_block, node);
+  return func_block;
 }
 
 FunctionBlockPtr Parser::ParseStatements(FunctionBlockPtr block, const py::object &nodes) {
@@ -461,14 +460,14 @@ FunctionBlockPtr Parser::ParseReturn(const FunctionBlockPtr &block, const py::ob
   MS_LOG(DEBUG) << "Process ast return";
   MS_EXCEPTION_IF_NULL(block);
   // Create return valuenode
-  AnfNodePtr pReturnValueNode = NewValueNode(prim::kPrimReturn);
+  AnfNodePtr return_value_node = NewValueNode(prim::kPrimReturn);
   // Parse the return Statements value
   py::object value = python_adapter::GetPyObjAttr(node, "value");
-  AnfNodePtr pReturnStatementNode = ParseExprNode(block, value);
+  AnfNodePtr return_expr_node = ParseExprNode(block, value);
   // Create the cnode
   auto block_fg = block->func_graph();
-  CNodePtr pReturnCNode = block_fg->NewCNodeInOrder({pReturnValueNode, pReturnStatementNode});
-  block_fg->set_return(pReturnCNode);
+  CNodePtr return_node = block_fg->NewCNodeInOrder({return_value_node, return_expr_node});
+  block_fg->set_return(return_node);
   return block;
 }
 
@@ -583,6 +582,7 @@ AnfNodePtr Parser::ParseNameConstant(const FunctionBlockPtr &, const py::object
   errcode_ = PARSE_NODE_TYPE_UNKNOWN;
   MS_LOG(EXCEPTION) << "Unsupported NameConstant type: " << (std::string)py::str(obj);
 }
+
 AnfNodePtr Parser::GenerateMakeTuple(const FunctionBlockPtr &block, const std::vector<AnfNodePtr> &element_nodes) {
   MS_EXCEPTION_IF_NULL(block);
   AnfNodePtr make_tuple_op = block->MakeResolveOperation(NAMED_PRIMITIVE_MAKETUPLE);
@@ -1117,18 +1117,18 @@ FunctionBlockPtr Parser::ParseIf(const FunctionBlockPtr &block, const py::object
   py::object bodyNode = python_adapter::GetPyObjAttr(node, "body");
   FunctionBlockPtr true_end = ParseStatements(true_block, bodyNode);
 
-  // If the return_ is set ,it has its own continuation block
+  // If the return_ is set, it has its own continuation block
   if (true_end->func_graph()->get_return() == nullptr) {
-    true_end->Jump(after_block, nullptr);
+    true_end->Jump(after_block, {});
   }
 
   // Process the orelse branch
   py::object orelseNode = python_adapter::GetPyObjAttr(node, "orelse");
   FunctionBlockPtr false_end = ParseStatements(false_block, orelseNode);
 
-  // If the return_ is set ,it has its own continuation block
+  // If the return_ is set, it has its own continuation block
   if (false_end->func_graph()->get_return() == nullptr) {
-    false_end->Jump(after_block, nullptr);
+    false_end->Jump(after_block, {});
   }
 
   block->ConditionalJump(bool_node, true_block, false_block);
@@ -1158,7 +1158,7 @@ FunctionBlockPtr Parser::ParseWhile(const FunctionBlockPtr &block, const py::obj
 
   body_block->AddPrevBlock(header_block);
   after_block->AddPrevBlock(header_block);
-  block->Jump(header_block, nullptr);
+  block->Jump(header_block, {});
 
   py::object test_node = python_adapter::GetPyObjAttr(node, "test");
   AnfNodePtr condition_node = ParseExprNode(header_block, test_node);
@@ -1171,7 +1171,7 @@ FunctionBlockPtr Parser::ParseWhile(const FunctionBlockPtr &block, const py::obj
   py::object body_node = python_adapter::GetPyObjAttr(node, "body");
   FunctionBlockPtr after_body = ParseStatements(body_block, body_node);
   if (after_body->func_graph()->get_return() == nullptr) {
-    after_body->Jump(header_block, nullptr);
+    after_body->Jump(header_block, {});
   }
 
   header_block->Mature();
@@ -1179,7 +1179,7 @@ FunctionBlockPtr Parser::ParseWhile(const FunctionBlockPtr &block, const py::obj
   auto &end_block = loop_context.EndBlock();
   if (end_block) {
     // end_block exists if we encounter 'break' in loop body.
-    after_block->Jump(end_block, nullptr);
+    after_block->Jump(end_block, {});
     end_block->Mature();
     return end_block;
   }
@@ -1200,16 +1200,17 @@ CNodePtr Parser::GenerateCondInFor(const ParameterPtr &iter_param, const Functio
   return header_block->func_graph()->NewCNodeInOrder({op_hasnext, iter_param});
 }
 
-FunctionBlockPtr Parser::GenerateBlockInFor(const TraceInfoPtr &trace_info) {
+FunctionBlockPtr Parser::GenerateBlock(const TraceInfoPtr &trace_info) {
   TraceGuard trace_guard(trace_info);
-  FunctionBlockPtr body_block = MakeFunctionBlock(*this);
-  return body_block;
+  FunctionBlockPtr block = MakeFunctionBlock(*this);
+  MS_EXCEPTION_IF_NULL(block);
+  return block;
 }
 
 int64_t Parser::GetForTransToWhileLoop() {
   // int64 support 63bits positive num mostly.
-  constexpr auto kMaxNumLength = 10;
-  if (max_for_loop_count_str_.size() > kMaxNumLength || max_for_loop_count_str_.empty()) {
+  constexpr auto max_num_length = 10;
+  if (max_for_loop_count_str_.size() > max_num_length || max_for_loop_count_str_.empty()) {
     return MAX_FOR_LOOP_COUNT;
   }
   if (std::any_of(max_for_loop_count_str_.begin(), max_for_loop_count_str_.end(),
@@ -1222,6 +1223,7 @@ int64_t Parser::GetForTransToWhileLoop() {
   ss >> loop_count;
   return loop_count;
 }
+
 // A for loop will generate 3 functions :the test, the body, and the continuation
 // for x in xs:
 //    body
@@ -1260,10 +1262,10 @@ FunctionBlockPtr Parser::ParseFor(const FunctionBlockPtr &block, const py::objec
   }
 
   FunctionBlockPtr true_end = ParseForIter(true_block, node);
-  true_end->Jump(after_block, nullptr);
+  true_end->Jump(after_block, {});
 
   FunctionBlockPtr false_end = ParseForLoop(false_block, node);
-  false_end->Jump(after_block, nullptr);
+  false_end->Jump(after_block, {});
 
   block->ConditionalJump(bool_node, true_block, false_block);
   after_block->Mature();
@@ -1288,14 +1290,13 @@ FunctionBlockPtr Parser::ParseForIter(const FunctionBlockPtr &block, const py::o
   // Generate the iterator apply
   CNodePtr iter_apply = GenerateIteratorInFor(block, node, op_iter);
   MS_EXCEPTION_IF_NULL(iter_apply);
-  FunctionBlockPtr header_block =
-    GenerateBlockInFor(std::make_shared<TraceForHeader>(block->func_graph()->debug_info()));
+  FunctionBlockPtr header_block = GenerateBlock(std::make_shared<TraceForHeader>(block->func_graph()->debug_info()));
   MS_EXCEPTION_IF_NULL(header_block);
   // Generate the hasnext apply which is a condition
   ParameterPtr iter_param = header_block->func_graph()->add_parameter();
   CNodePtr cond_apply = GenerateCondInFor(iter_param, header_block, op_hasnext);
   // Generate the body of the for statement
-  FunctionBlockPtr body_block = GenerateBlockInFor(std::make_shared<TraceForBody>(block->func_graph()->debug_info()));
+  FunctionBlockPtr body_block = GenerateBlock(std::make_shared<TraceForBody>(block->func_graph()->debug_info()));
   MS_EXCEPTION_IF_NULL(body_block);
   body_block->AddPrevBlock(header_block);
   // Generate the iterator next apply
@@ -1323,7 +1324,7 @@ FunctionBlockPtr Parser::ParseForIter(const FunctionBlockPtr &block, const py::o
   MS_EXCEPTION_IF_NULL(after_block);
   after_block->AddPrevBlock(header_block);
 
-  block->Jump(header_block, iter_apply);
+  block->Jump(header_block, {iter_apply});
   body_block->Mature();
   header_block->ConditionalJump(cond_apply, body_block, after_block);
 
@@ -1332,7 +1333,7 @@ FunctionBlockPtr Parser::ParseForIter(const FunctionBlockPtr &block, const py::o
   py::object body_node = python_adapter::GetPyObjAttr(node, "body");
   FunctionBlockPtr after_body_block = ParseStatements(body_block, body_node);
   if (after_body_block->func_graph()->get_return() == nullptr) {
-    after_body_block->Jump(header_block, iter2_app);
+    after_body_block->Jump(header_block, {iter2_app});
   }
 
   header_block->Mature();
@@ -1340,7 +1341,7 @@ FunctionBlockPtr Parser::ParseForIter(const FunctionBlockPtr &block, const py::o
   auto &end_block = loop_context.EndBlock();
   if (end_block) {
     // end_block exists if we encounter 'break' in loop body.
-    after_block->Jump(end_block, nullptr);
+    after_block->Jump(end_block, {});
     end_block->Mature();
     return end_block;
   }
@@ -1377,8 +1378,7 @@ FunctionBlockPtr Parser::ParseForLoop(const FunctionBlockPtr &block, const py::o
 
   CNodePtr len_iter = block->func_graph()->NewCNodeInOrder({scalar_to_tensor_node, scalar_len});
 
-  FunctionBlockPtr header_block =
-    GenerateBlockInFor(std::make_shared<TraceForHeader>(block->func_graph()->debug_info()));
+  FunctionBlockPtr header_block = GenerateBlock(std::make_shared<TraceForHeader>(block->func_graph()->debug_info()));
   MS_EXCEPTION_IF_NULL(header_block);
   // Create loop variable 'i'
   ParameterPtr loop_var = header_block->func_graph()->add_parameter();
@@ -1388,7 +1388,7 @@ FunctionBlockPtr Parser::ParseForLoop(const FunctionBlockPtr &block, const py::o
   CNodePtr cond_node = header_block->func_graph()->NewCNodeInOrder({less_node, loop_var, len_iter});
 
   // Generate the body of the for statement
-  FunctionBlockPtr body_block = GenerateBlockInFor(std::make_shared<TraceForBody>(block->func_graph()->debug_info()));
+  FunctionBlockPtr body_block = GenerateBlock(std::make_shared<TraceForBody>(block->func_graph()->debug_info()));
   MS_EXCEPTION_IF_NULL(body_block);
   body_block->AddPrevBlock(header_block);
   // Create 'x = xs[i]'
@@ -1419,7 +1419,7 @@ FunctionBlockPtr Parser::ParseForLoop(const FunctionBlockPtr &block, const py::o
 
   CNodePtr zero_tensor =
     block->func_graph()->NewCNodeInOrder({scalar_to_tensor_node, NewValueNode(static_cast<int64_t>(0))});
-  block->Jump(header_block, zero_tensor);
+  block->Jump(header_block, {zero_tensor});
   body_block->Mature();
 
   header_block->ConditionalJump(cond_node, body_block, after_block, false);
@@ -1429,7 +1429,7 @@ FunctionBlockPtr Parser::ParseForLoop(const FunctionBlockPtr &block, const py::o
   py::object body_node = python_adapter::GetPyObjAttr(node, "body");
   FunctionBlockPtr after_body_block = ParseStatements(body_block, body_node);
   if (after_body_block->func_graph()->get_return() == nullptr) {
-    after_body_block->Jump(header_block, loop_var_inc);
+    after_body_block->Jump(header_block, {loop_var_inc});
   }
 
   header_block->Mature();
@@ -1437,7 +1437,7 @@ FunctionBlockPtr Parser::ParseForLoop(const FunctionBlockPtr &block, const py::o
   auto &end_block = loop_context.EndBlock();
   if (end_block) {
     // end_block exists if we encounter 'break' in loop body.
-    after_block->Jump(end_block, nullptr);
+    after_block->Jump(end_block, {});
     end_block->Mature();
     return end_block;
   }
@@ -1489,6 +1489,155 @@ AnfNodePtr Parser::ParseIfExp(const FunctionBlockPtr &block, const py::object &n
   return switch_app_call;
 }
 
+FunctionBlockPtr Parser::ParseListCompIter(const FunctionBlockPtr &block, const py::object &node,
+                                           const py::object &generator_node) {
+  // Create a header block.
+  FunctionBlockPtr top_block = GenerateBlock(std::make_shared<TraceListComp>(block->func_graph()->debug_info()));
+  // Handle iter attribute.
+  py::object iter_node = python_adapter::GetPyObjAttr(generator_node, "iter");
+  AnfNodePtr iter_anf_node = ParseExprNode(block, iter_node);
+  AnfNodePtr op_iter = top_block->MakeResolveOperation(NAMED_PRIMITIVE_ITER);
+  CNodePtr iter_apply = top_block->func_graph()->NewCNodeInOrder({op_iter, iter_anf_node});
+
+  // Create header graph.
+  FunctionBlockPtr list_header_block =
+    GenerateBlock(std::make_shared<TraceForHeader>(block->func_graph()->debug_info()));
+  list_header_block->AddPrevBlock(top_block);
+
+  // Create hasNext apply.
+  AnfNodePtr op_hasnext = top_block->MakeResolveOperation(NAMED_PRIMITIVE_HASNEXT);
+  ParameterPtr iter_param = list_header_block->func_graph()->add_parameter();
+  constexpr auto iter_param_name = "iter";
+  iter_param->set_name(iter_param_name);
+  iter_param->debug_info()->set_name(iter_param_name);
+  CNodePtr cond_apply = list_header_block->func_graph()->NewCNodeInOrder({op_hasnext, iter_param});
+
+  // Call the header graph with iter.
+  ParameterPtr list_param = list_header_block->func_graph()->add_parameter();
+  constexpr auto list_param_name = "list";
+  list_param->set_name(list_param_name);
+  list_param->debug_info()->set_name(list_param_name);
+  auto empty_list = std::vector<ValuePtr>();
+  AnfNodePtr empty_list_node = NewValueNode(std::make_shared<ValueList>(empty_list));
+  top_block->Jump(list_header_block, {iter_apply, empty_list_node});
+
+  // Create body graph.
+  FunctionBlockPtr list_body_block = GenerateBlock(std::make_shared<TraceForBody>(block->func_graph()->debug_info()));
+  list_body_block->AddPrevBlock(list_header_block);
+  AnfNodePtr op_next = top_block->MakeResolveOperation(NAMED_PRIMITIVE_NEXT);
+  CNodePtr next_apply = list_body_block->func_graph()->NewCNodeInOrder({op_next, iter_param});
+  AnfNodePtr op_getitem = top_block->MakeResolveOperation(NAMED_PRIMITIVE_GETITEM);
+  CNodePtr item_apply =
+    list_body_block->func_graph()->NewCNodeInOrder({op_getitem, next_apply, NewValueNode(static_cast<int64_t>(0))});
+  CNodePtr new_iter =
+    list_body_block->func_graph()->NewCNodeInOrder({op_getitem, next_apply, NewValueNode(static_cast<int64_t>(1))});
+
+  // Save the `target` in a variable.
+  py::object gen_target_node = python_adapter::GetPyObjAttr(generator_node, "target");
+  WriteAssignVars(list_body_block, gen_target_node, item_apply);
+
+  auto ifs_new_list = ParseListCompIfs(list_body_block, list_param, node, generator_node);
+  list_body_block->Jump(list_header_block, {new_iter, ifs_new_list});
+
+  // Create after graph.
+  FunctionBlockPtr list_after_block = GenerateBlock(std::make_shared<TraceForAfter>(block->func_graph()->debug_info()));
+  list_after_block->AddPrevBlock(list_header_block);
+  // Return the list in after graph.
+  list_after_block->func_graph()->set_output(list_param);
+
+  // Run the branches.
+  list_header_block->ConditionalJump(cond_apply, list_body_block, list_after_block);
+
+  top_block->Mature();
+  list_header_block->Mature();
+  list_body_block->Mature();
+  list_after_block->Mature();
+  return top_block;
+}
+
+AnfNodePtr Parser::ParseListCompIfs(const FunctionBlockPtr &list_body_block, const ParameterPtr &list_param,
+                                    const py::object &node, const py::object &generator_node) {
+  // Handle ifs attribute.
+  py::list ifs_node = python_adapter::GetPyObjAttr(generator_node, "ifs");
+  AnfNodePtr ifs_bool_node;
+  if (ifs_node.empty()) {
+    ifs_bool_node = NewValueNode(true);
+  } else {
+    ifs_bool_node = ProcessBoolOpValueList(list_body_block, ifs_node, AST_SUB_TYPE_AND);
+  }
+
+  // Create if-true graph.
+  FunctionBlockPtr if_true_block =
+    GenerateBlock(std::make_shared<TraceIfStmtTrueBranch>(list_body_block->func_graph()->debug_info()));
+  if_true_block->AddPrevBlock(list_body_block);
+  // Handle elt attribute in body block.
+  py::object elt_obj = python_adapter::GetPyObjAttr(node, "elt");
+  AnfNodePtr elt_node = ParseExprNode(list_body_block, elt_obj);
+  // Append the element.
+  auto list_append_op = prim::kPrimListAppend;
+  auto new_list = list_body_block->func_graph()->NewCNodeInOrder({NewValueNode(list_append_op), list_param, elt_node});
+  // Return new list in true branch graph.
+  if_true_block->func_graph()->set_output(new_list);
+
+  // Create if-false graph.
+  FunctionBlockPtr if_false_block =
+    GenerateBlock(std::make_shared<TraceIfStmtFalseBranch>(list_body_block->func_graph()->debug_info()));
+  if_false_block->AddPrevBlock(list_body_block);
+  // Return original list in false branch graph.
+  if_false_block->func_graph()->set_output(list_param);
+
+  // We don't want to create a header graph, where to get and wrap the result of Switch().
+  // So just call ConditionalJump() to set Switch() as output, and reset it later, as tricky.
+  list_body_block->ConditionalJump(ifs_bool_node, if_true_block, if_false_block);
+  // Output is Switch() result, i.e. updated list.
+  auto switch_apply_node = list_body_block->func_graph()->output();
+  auto ifs_new_list = switch_apply_node;
+  // Since we call ConditionalJump() above, to reset the Return as null before call Jump().
+  list_body_block->func_graph()->set_return(nullptr);
+  if_true_block->Mature();
+  if_false_block->Mature();
+  return ifs_new_list;
+}
+
+// A ListComp contains: `elt` and `generators`.
+// `generators` contains: `target`, `iter` and `ifs`.
+// For example:
+// [x * x for x in range(0, 10) if x % 2 == 0]
+// It is compiled to be following statement:
+// list = []
+// for x in range(0, 10):
+//    if x % 2 == 0:
+//        list.append(x * x)
+// return list
+AnfNodePtr Parser::ParseListComp(const FunctionBlockPtr &block, const py::object &node) {
+  MS_LOG(DEBUG) << "Process ast ListComp";
+  MS_EXCEPTION_IF_NULL(block);
+
+  // Handle generators attribute.
+  py::list generators_node = python_adapter::GetPyObjAttr(node, "generators");
+  if (generators_node.size() != 1) {
+    MS_EXCEPTION(TypeError) << "The `generators` supports one `comprehension` in ListComp/GeneratorExp, but got "
+                            << generators_node.size() << " comprehensions.";
+  }
+  py::object generator_node = generators_node[0];
+  auto generator_node_type = ast_->GetNodeType(generator_node);
+  auto generator_node_name = generator_node_type->node_name();
+  constexpr auto comprehension_name = "comprehension";
+  if (generator_node_name != comprehension_name) {
+    MS_LOG(EXCEPTION) << "Generator node name should be " << comprehension_name << ", but got " << generator_node_name;
+  }
+
+  // Parse ListComp's `iter` and add `elt` in it.
+  auto top_block = ParseListCompIter(block, node, generator_node);
+
+  // Call the top graph and return the list.
+  auto call_function_anf_node = NewValueNode(top_block->func_graph());
+  std::vector<AnfNodePtr> func_call_nodes;
+  func_call_nodes.push_back(call_function_anf_node);
+  AnfNodePtr output = block->func_graph()->NewCNodeInOrder(func_call_nodes);
+  return output;
+}
+
 void Parser::HandleAssignName(const FunctionBlockPtr &block, const py::object &targ, const AnfNodePtr &assigned_node) {
   MS_EXCEPTION_IF_NULL(block);
   MS_EXCEPTION_IF_NULL(assigned_node);
@@ -1644,7 +1793,7 @@ FunctionBlockPtr Parser::ParseBreak(const FunctionBlockPtr &block, const py::obj
     loop.end = MakeFunctionBlock(*this);
   }
   // Jump to the end_block.
-  block->Jump(loop.end, nullptr);
+  block->Jump(loop.end, {});
   return block;
 }
 
@@ -1655,7 +1804,11 @@ FunctionBlockPtr Parser::ParseContinue(const FunctionBlockPtr &block, const py::
   }
   // Jump to the header of the loop with iterator called.
   Loop &loop = loops_.top();
-  block->Jump(loop.header, loop.iterator);
+  std::vector<AnfNodePtr> args;
+  if (loop.iterator != nullptr) {
+    args.emplace_back(loop.iterator);
+  }
+  block->Jump(loop.header, args);
   return block;
 }
 
diff --git a/mindspore/ccsrc/pipeline/jit/parse/parse.h b/mindspore/ccsrc/pipeline/jit/parse/parse.h
index a62090e1e6e..06a2dde140c 100644
--- a/mindspore/ccsrc/pipeline/jit/parse/parse.h
+++ b/mindspore/ccsrc/pipeline/jit/parse/parse.h
@@ -38,19 +38,19 @@ namespace parse {
 // Parse status define
 enum ParseStatusCode : int64_t {
   PARSE_SUCCESS = 0,
-  PARSE_FUNCTION_IS_NULL,            // python function is null
-  PARSE_PARAMETER_INVALID,           // parameter is invalid
-  PARSE_NO_RETURN,                   // function no return node
-  PARSE_NODE_TYPE_NO_MATCH,          // ast node type is error
-  PARSE_NODE_TYPE_UNKNOWN,           // node type is unknown
-  PARSE_NODE_METHOD_UNSUPPORTED,     // no method to parse the node
-  PARSE_DONT_RESOLVE_SYMBOL,         // can't resolve the string
-  PARSE_NOT_SUPPORTED_COMPARE_EXPR,  // the comparison is not supported
+  PARSE_FUNCTION_IS_NULL,            // Python function is null
+  PARSE_PARAMETER_INVALID,           // Parameter is invalid
+  PARSE_NO_RETURN,                   // Function no return node
+  PARSE_NODE_TYPE_NO_MATCH,          // Ast node type is error
+  PARSE_NODE_TYPE_UNKNOWN,           // Node type is unknown
+  PARSE_NODE_METHOD_UNSUPPORTED,     // No method to parse the node
+  PARSE_DONT_RESOLVE_SYMBOL,         // Can't resolve the string
+  PARSE_NOT_SUPPORTED_COMPARE_EXPR,  // The comparison is not supported
   PARSE_FAILURE = 0xFF
 };
 
-// max loop count of for statement, when loop count is less then this value, the for loop will be unrolled, otherwise it
-//  will be sunk(i.e. not unrolled)
+// Max loop count of for statement, when loop count is less then this value, the for loop will be unrolled, otherwise it
+// will be sunk(i.e. not unrolled)
 // NOTE: Since when the for loop was unrolled, it depends backend operators `tuple_getitem` and `scalar_add` which were
 //  not implemented, so here set MAX_FOR_LOOP_COUNT to int64_t max limit to override default value `600`. This will make
 //  the for loop will always be unrolled, but don't worry about the memory were exhausted, an exception will be raised
@@ -97,7 +97,7 @@ class Parser {
   FuncGraphPtr func_graph() const { return func_graph_; }
   ParseStatusCode errcode() const { return errcode_; }
   std::shared_ptr<ParseAst> ast() const { return ast_; }
-  // get location info from the ast node
+  // Get location info from the ast node
   LocationPtr GetLocation(const py::object &node) const;
   static void InitParserEnvironment(const py::object &obj);
   static void CleanParserResource();
@@ -105,114 +105,118 @@ class Parser {
   static void UpdateTopFuncGraph(const FuncGraphPtr &func_graph);
 
  private:
-  // process the stmt node method list
+  // Process the stmt node method list
   FunctionBlockPtr ParseReturn(const FunctionBlockPtr &block, const py::object &node);
-  // parse expression
+  // Parse expression
   FunctionBlockPtr ParseExpr(const FunctionBlockPtr &block, const py::object &node);
-  // process a if statement
+  // Process a if statement
   FunctionBlockPtr ParseIf(const FunctionBlockPtr &block, const py::object &node);
-  // process a while statement
+  // Process a while statement
   FunctionBlockPtr ParseWhile(const FunctionBlockPtr &block, const py::object &node);
-  // process a for statement
+  // Process a for statement
   FunctionBlockPtr ParseFor(const FunctionBlockPtr &block, const py::object &node);
   FunctionBlockPtr ParseForIter(const FunctionBlockPtr &block, const py::object &node);
   FunctionBlockPtr ParseForLoop(const FunctionBlockPtr &block, const py::object &node);
-  // process a function def statement
+  // Process a function def statement
   FunctionBlockPtr ParseFunctionDef(const FunctionBlockPtr &block, const py::object &node);
-  // process a augment assign
+  // Process a augment assign
   FunctionBlockPtr ParseAugAssign(const FunctionBlockPtr &block, const py::object &node);
-  // process a global declaration
+  // Process a global declaration
   FunctionBlockPtr ParseGlobal(const FunctionBlockPtr &block, const py::object &node);
-  // process assign statement
+  // Process assign statement
   FunctionBlockPtr ParseAssign(const FunctionBlockPtr &block, const py::object &node);
-  // process break statement
+  // Process break statement
   FunctionBlockPtr ParseBreak(const FunctionBlockPtr &block, const py::object &node);
-  // process continue statement
+  // Process continue statement
   FunctionBlockPtr ParseContinue(const FunctionBlockPtr &block, const py::object &node);
-  // process pass statement
+  // Process pass statement
   FunctionBlockPtr ParsePass(const FunctionBlockPtr &block, const py::object &node);
-  // process the expr and slice node method list
+
+  // Process the expr and slice node method list
   AnfNodePtr ParseBinOp(const FunctionBlockPtr &block, const py::object &node);
-  // process a variable name
+  // Process a variable name
   AnfNodePtr ParseName(const FunctionBlockPtr &block, const py::object &node);
-  // process NoneType
+  // Process NoneType
   AnfNodePtr ParseNone(const FunctionBlockPtr &block, const py::object &node);
-  // process Ellipsis
+  // Process Ellipsis
   AnfNodePtr ParseEllipsis(const FunctionBlockPtr &block, const py::object &node);
-  // process a integer or float number
+  // Process a integer or float number
   AnfNodePtr ParseNum(const FunctionBlockPtr &block, const py::object &node);
-  // process a string variable
+  // Process a string variable
   AnfNodePtr ParseStr(const FunctionBlockPtr &block, const py::object &node);
-  // process a Constant
+  // Process a Constant
   AnfNodePtr ParseConstant(const FunctionBlockPtr &block, const py::object &node);
-  // process a name
+  // Process a name
   AnfNodePtr ParseNameConstant(const FunctionBlockPtr &block, const py::object &node);
-  // process a function call
+  // Process a function call
   AnfNodePtr ParseCall(const FunctionBlockPtr &block, const py::object &node);
-  // process function 'super'
+  // Process function 'super'
   AnfNodePtr ParseSuper(const FunctionBlockPtr &block, const py::list &args);
-  // process the if expression
+  // Process the if expression
   AnfNodePtr ParseIfExp(const FunctionBlockPtr &block, const py::object &node);
-  // process class type define
+  // Process class type define
   AnfNodePtr ParseAttribute(const FunctionBlockPtr &block, const py::object &node);
-  // process a compare expression
+  // Process a compare expression
   AnfNodePtr ParseCompare(const FunctionBlockPtr &block, const py::object &node);
-  // process a bool operation
+  // Process a bool operation
   AnfNodePtr ParseBoolOp(const FunctionBlockPtr &block, const py::object &node);
-  // process a lambda operation
+  // Process a lambda operation
   AnfNodePtr ParseLambda(const FunctionBlockPtr &block, const py::object &node);
-  // process a tuple
+  // Process a tuple
   AnfNodePtr ParseTuple(const FunctionBlockPtr &block, const py::object &node);
-  // process a tuple
+  // Process a tuple
   AnfNodePtr ParseList(const FunctionBlockPtr &block, const py::object &node);
-  // process a tuple
+  // Process a tuple
   AnfNodePtr ParseSubscript(const FunctionBlockPtr &block, const py::object &node);
-  // process a slice
+  // Process a slice
   AnfNodePtr ParseSlice(const FunctionBlockPtr &block, const py::object &node);
-
-  // process a extslice
+  // Process a extslice
   AnfNodePtr ParseExtSlice(const FunctionBlockPtr &block, const py::object &node);
-
-  // process a tuple
+  // Process a tuple
   AnfNodePtr ParseIndex(const FunctionBlockPtr &block, const py::object &node);
-
-  // process a unaryop
+  // Process a unaryop
   AnfNodePtr ParseUnaryOp(const FunctionBlockPtr &block, const py::object &node);
-
-  // process a dict ast node expression
+  // Process a dict ast node expression
   AnfNodePtr ParseDict(const FunctionBlockPtr &block, const py::object &node);
-  // generate argument nodes for ast  function node
+  // Process ListComp expression
+  AnfNodePtr ParseListComp(const FunctionBlockPtr &block, const py::object &node);
+  FunctionBlockPtr ParseListCompIter(const FunctionBlockPtr &block, const py::object &node,
+                                     const py::object &generator_node);
+  AnfNodePtr ParseListCompIfs(const FunctionBlockPtr &list_body_block, const ParameterPtr &list_param,
+                              const py::object &node, const py::object &generator_node);
+
+  // Generate argument nodes for ast  function node
   void GenerateArgsNodeForFunction(const FunctionBlockPtr &block, const py::object &function_node);
-  // generate argument default value for ast  function node
+  // Generate argument default value for ast  function node
   void GenerateArgsDefaultValueForFunction(const FunctionBlockPtr &block, const py::object &function_node);
-  // parse ast function node
+  // Parse ast function node
   FunctionBlockPtr ParseFunction(const py::object &function_node, const FunctionBlockPtr &block = nullptr);
-  // parse ast statements
+  // Parse ast statements
   FunctionBlockPtr ParseStatements(FunctionBlockPtr block, const py::object &stmt_node);
-  // parse one ast statement node
+  // Parse one ast statement node
   FunctionBlockPtr ParseStatement(const FunctionBlockPtr &block, const py::object &node);
-  // parse an ast expression node
+  // Parse an ast expression node
   AnfNodePtr ParseExprNode(const FunctionBlockPtr &block, const py::object &node);
 
   void MakeConditionBlocks(const FunctionBlockPtr &block, const FunctionBlockPtr &trueBlock,
                            const FunctionBlockPtr &falseBlock);
   void RemoveUnnecessaryPhis();
-  // write a new var
+  // Write a new var
   void WriteAssignVars(const FunctionBlockPtr &block, const py::object &targ, const AnfNodePtr &value_node);
 
-  // assign value to single variable name
+  // Assign value to single variable name
   void HandleAssignName(const FunctionBlockPtr &block, const py::object &targ, const AnfNodePtr &assigned_node);
 
-  // assign value to tuple
+  // Assign value to tuple
   void HandleAssignTuple(const FunctionBlockPtr &block, const py::object &targ, const AnfNodePtr &assigned_node);
 
-  // assign value to class member
+  // Assign value to class member
   void HandleAssignClassMember(const FunctionBlockPtr &block, const py::object &targ, const AnfNodePtr &assigned_node);
 
-  // assign value to subscript
+  // Assign value to subscript
   void HandleAssignSubscript(const FunctionBlockPtr &block, const py::object &targ, const AnfNodePtr &assigned_node);
 
-  // process a bool operation value list
+  // Process a bool operation value list
   AnfNodePtr ProcessBoolOpValueList(const FunctionBlockPtr &block, const py::list &value_list, AstSubType mode);
 
   CNodePtr GenerateIteratorInFor(const FunctionBlockPtr &block, const pybind11::object &node,
@@ -221,7 +225,7 @@ class Parser {
   CNodePtr GenerateCondInFor(const ParameterPtr &iter_param, const FunctionBlockPtr &header_block,
                              const AnfNodePtr &op_hasnext);
 
-  FunctionBlockPtr GenerateBlockInFor(const TraceInfoPtr &trace_info);
+  FunctionBlockPtr GenerateBlock(const TraceInfoPtr &trace_info);
 
   bool ParseKeywordsInCall(const FunctionBlockPtr &block, const py::object &node,
                            std::vector<AnfNodePtr> *packed_arguments);
@@ -249,27 +253,27 @@ class Parser {
     func_block_list_.push_back(block);
     return block;
   }
-  // return a make tuple for input elements list
+  // Return a make tuple for input elements list
   AnfNodePtr GenerateMakeTuple(const FunctionBlockPtr &block, const std::vector<AnfNodePtr> &element_nodes);
   int64_t GetForTransToWhileLoop();
 
-  // shared_ptr will be hold by GraphManager, so just hold a weak ref here.
+  // The shared_ptr will be hold by GraphManager, so just hold a weak ref here.
   static FuncGraphWeakPtr top_func_graph_;
   // Python function id, used to indicate whether two CNodes come from the same Python function
   const std::shared_ptr<ParseAst> &ast_;
   FuncGraphPtr func_graph_;
-  // error code setwhen parsing ast tree
+  // Error code setwhen parsing ast tree
   ParseStatusCode errcode_;
 
-  // hold all reference for FunctionBlock in this round of parsing,
+  // Hold all reference for FunctionBlock in this round of parsing,
   // so in FunctionBlock class we can use FunctionBlock* in member
   // pre_blocks_ and jumps_ to break reference cycle.
   std::vector<FunctionBlockPtr> func_block_list_;
   using pStmtFunc = FunctionBlockPtr (Parser::*)(const FunctionBlockPtr &block, const py::object &node);
   using pExprFunc = AnfNodePtr (Parser::*)(const FunctionBlockPtr &block, const py::object &node);
-  // define the function map to parse ast Statement
+  // Define the function map to parse ast Statement
   std::map<std::string, pStmtFunc> stmt_method_map_;
-  // define the function map to parse ast expression
+  // Define the function map to parse ast expression
   std::map<std::string, pExprFunc> expr_method_map_;
   // Save current loops to support 'continue', 'break' statement.
   std::stack<Loop> loops_;
@@ -350,10 +354,10 @@ class ParseAst {
   bool IsClassMember(const py::object &node);
 
  private:
-  // save obj,eg: class instance or function
+  // Save obj,eg: class instance or function
   py::object obj_;
 
-  // function or class method.
+  // Function or class method.
   py::function function_;
 
   py::object ast_tree_;
@@ -369,7 +373,7 @@ class ParseAst {
   int64_t function_line_offset_;
 };
 
-// update the graph flags
+// Update the graph flags
 bool UpdateFuncGraphFlags(const py::object &obj, const FuncGraphPtr &func_graph);
 
 AnfNodePtr GetMixedPrecisionCastHelp(const FuncGraphPtr &func_graph, const AnfNodePtr &param);
diff --git a/mindspore/ccsrc/pipeline/jit/parse/resolve.cc b/mindspore/ccsrc/pipeline/jit/parse/resolve.cc
index 00c583467ce..cc636afbc9c 100644
--- a/mindspore/ccsrc/pipeline/jit/parse/resolve.cc
+++ b/mindspore/ccsrc/pipeline/jit/parse/resolve.cc
@@ -28,6 +28,7 @@
 #include "frontend/operator/ops.h"
 #include "frontend/optimizer/opt.h"
 #include "frontend/optimizer/irpass.h"
+#include "frontend/optimizer/irpass/symbol_resolver.h"
 
 namespace mindspore {
 namespace parse {
@@ -306,7 +307,7 @@ AnfNodePtr ResolveSymbol(const FuncGraphManagerPtr &manager, const NameSpacePtr
 }
 
 AnfNodePtr ResolveCellwithAttr(const FuncGraphManagerPtr &manager, const NameSpacePtr &name_space,
-                               const SymbolPtr &symbol, const AnfNodePtr &node, const std::string &attr) {
+                               const SymbolPtr &symbol, const AnfNodePtr &node, const AnfNodePtr &attr) {
   MS_EXCEPTION_IF_NULL(node);
   TraceGuard trace_guard(std::make_shared<TraceResolve>(node->debug_info()));
   if (node->func_graph() == nullptr || manager == nullptr) {
@@ -319,14 +320,19 @@ AnfNodePtr ResolveCellwithAttr(const FuncGraphManagerPtr &manager, const NameSpa
 
   py::object obj = symbol_resolver.result();
   if (!data_converter::IsCellInstance(obj)) {
-    return nullptr;
+    AnfNodePtr resolved_node = ResolveObjectAndAddToManager(manager, obj, node);
+    AnfNodePtrList inputs = {NewValueNode(prim::kPrimGetAttr), resolved_node, attr};
+    AnfNodePtr res_node = node->func_graph()->NewCNode(inputs);
+    TraceManager::ClearParseOrResolveDebugInfo();
+    return res_node;
   }
 
   const std::string fn = PYTHON_MOD_GET_MEMBER_NAMESPACE_SYMBOL;
   const std::string module = "mindspore._extends.parse.parser";
   py::object namespace_obj = parse::python_adapter::GetPyFn(module, fn)(obj);
   auto new_namespace = std::make_shared<NameSpace>(RESOLVE_NAMESPACE_NAME_CLASS_MEMBER, namespace_obj);
-  auto new_symbol = std::make_shared<Symbol>(attr);
+  std::string attr_as_string = GetValueNode<StringImmPtr>(attr)->value();
+  auto new_symbol = std::make_shared<Symbol>(attr_as_string);
 
   AnfNodePtrList inputs = {NewValueNode(prim::kPrimResolve), NewValueNode(new_namespace), NewValueNode(new_symbol)};
   AnfNodePtr resolved_node = node->func_graph()->NewCNode(inputs);
@@ -336,11 +342,11 @@ AnfNodePtr ResolveCellwithAttr(const FuncGraphManagerPtr &manager, const NameSpa
 
 namespace {
 opt::OptPassGroupMap GetOptResolvePasses(const opt::irpass::ResolveIRPassLib &irpass) {
+  // For resolve and getattr primitive.
   opt::OptPassGroupMap map({
     {"resolve",
      {
-       // For resolve and getattr primitive;
-       irpass.resolver_resolve_and_getattr_,
+       irpass.resolver_getattr_resolve_,
      }},
   });
   return map;
diff --git a/mindspore/ccsrc/pipeline/jit/parse/resolve.h b/mindspore/ccsrc/pipeline/jit/parse/resolve.h
index ad8bdc27454..bfc0e818b41 100644
--- a/mindspore/ccsrc/pipeline/jit/parse/resolve.h
+++ b/mindspore/ccsrc/pipeline/jit/parse/resolve.h
@@ -147,7 +147,7 @@ AnfNodePtr ResolveSymbol(const FuncGraphManagerPtr &manager, const NameSpacePtr
 
 // Resolve Cell with attr name.
 AnfNodePtr ResolveCellwithAttr(const FuncGraphManagerPtr &manager, const NameSpacePtr &name_space,
-                               const SymbolPtr &symbol, const AnfNodePtr &node, const std::string &attr);
+                               const SymbolPtr &symbol, const AnfNodePtr &node, const AnfNodePtr &attr);
 
 // Resolve one graph which normally is the root graph. FuncGraph shall be managed by res->manager().
 bool ResolveFuncGraph(const FuncGraphPtr &func_graph, const pipeline::ResourceBasePtr &res, bool use_profile = true);
diff --git a/mindspore/ccsrc/pipeline/jit/pass.cc b/mindspore/ccsrc/pipeline/jit/pass.cc
index 2e6e4292809..43bde4e9cd5 100644
--- a/mindspore/ccsrc/pipeline/jit/pass.cc
+++ b/mindspore/ccsrc/pipeline/jit/pass.cc
@@ -239,7 +239,12 @@ opt::OptPassConfig GetOptPassA1(const opt::irpass::OptimizeIRPassLib &irpass) {
 
     // Safe inlining
     irpass.inline_,
-    irpass.updatestate_eliminater_,
+    irpass.updatestate_depend_eliminater_,
+    irpass.updatestate_assign_eliminater_,
+    irpass.updatestate_maketuple_eliminater_,
+    irpass.updatestate_only_used_node_eliminater_,
+    irpass.updatestate_loads_eliminater_,
+    irpass.updatestate_pure_node_eliminater_,
     irpass.load_eliminater_,
     irpass.stopgrad_eliminater_,
     irpass.partial_eliminate_,
@@ -273,7 +278,12 @@ opt::OptPassConfig GetOptPassA1(const opt::irpass::OptimizeIRPassLib &irpass) {
 
     // Safe inlining
     irpass.inline_,
-    irpass.updatestate_eliminater_,
+    irpass.updatestate_depend_eliminater_,
+    irpass.updatestate_assign_eliminater_,
+    irpass.updatestate_maketuple_eliminater_,
+    irpass.updatestate_only_used_node_eliminater_,
+    irpass.updatestate_loads_eliminater_,
+    irpass.updatestate_pure_node_eliminater_,
     irpass.load_eliminater_,
     irpass.stopgrad_eliminater_,
     irpass.sparse_tensor_eliminate_,
@@ -357,7 +367,12 @@ OptPassGroupMap GetOptPassesAfterCconv(const opt::irpass::OptimizeIRPassLib &irp
   opt::OptPassConfig c_1 = opt::OptPassConfig({
     // Safe inlining,
     irpass.inline_,
-    irpass.updatestate_eliminater_,
+    irpass.updatestate_depend_eliminater_,
+    irpass.updatestate_assign_eliminater_,
+    irpass.updatestate_maketuple_eliminater_,
+    irpass.updatestate_only_used_node_eliminater_,
+    irpass.updatestate_loads_eliminater_,
+    irpass.updatestate_pure_node_eliminater_,
     irpass.load_eliminater_,
     irpass.switch_call_monad_eliminater_,
     irpass.stopgrad_eliminater_,
@@ -394,7 +409,12 @@ OptPassGroupMap GetOptPassesB(const opt::irpass::OptimizeIRPassLib &irpass) {
                                                irpass.float_tuple_getitem_switch_,
                                                irpass.reset_defer_inline_,
                                                irpass.inline_,
-                                               irpass.updatestate_eliminater_,
+                                               irpass.updatestate_depend_eliminater_,
+                                               irpass.updatestate_assign_eliminater_,
+                                               irpass.updatestate_maketuple_eliminater_,
+                                               irpass.updatestate_only_used_node_eliminater_,
+                                               irpass.updatestate_loads_eliminater_,
+                                               irpass.updatestate_pure_node_eliminater_,
                                                irpass.load_eliminater_,
                                                irpass.stopgrad_eliminater_,
                                                irpass.special_op_eliminate_,
@@ -663,10 +683,35 @@ bool AutoMonadElimOptPass(const FuncGraphPtr &func_graph) {
   res->set_manager(func_graph->manager());
 
   // opt::irpass::OptimizeIRPassLib is not used here to avoid double free problems in external calls.
-  opt::SubstitutionPtr updatestate_eliminater = opt::MakeSubstitution(
-    std::make_shared<opt::irpass::UpdatestateEliminater>(), "updatestate_eliminater", prim::kPrimUpdateState);
+  opt::SubstitutionPtr updatestate_depend_eliminater =
+    opt::MakeSubstitution(std::make_shared<opt::irpass::UpdatestateDependEliminater>(), "updatestate_depend_eliminater",
+                          prim::kPrimUpdateState);
+  opt::SubstitutionPtr updatestate_assign_eliminater =
+    opt::MakeSubstitution(std::make_shared<opt::irpass::UpdatestateAssignEliminater>(), "updatestate_assign_eliminater",
+                          prim::kPrimUpdateState);
+  opt::SubstitutionPtr updatestate_maketuple_eliminater =
+    opt::MakeSubstitution(std::make_shared<opt::irpass::UpdatestateMakeTupleEliminater>(),
+                          "updatestate_maketuple_eliminater", prim::kPrimUpdateState);
+  opt::SubstitutionPtr updatestate_only_used_node_eliminater =
+    opt::MakeSubstitution(std::make_shared<opt::irpass::UpdatestateOnlyUsedNodeEliminater>(),
+                          "updatestate_only_used_node_eliminater", prim::kPrimUpdateState);
+  opt::SubstitutionPtr updatestate_loads_eliminater =
+    opt::MakeSubstitution(std::make_shared<opt::irpass::UpdatestateLoadsEliminater>(), "updatestate_loads_eliminater",
+                          prim::kPrimUpdateState);
+  opt::SubstitutionPtr updatestate_pure_node_eliminater =
+    opt::MakeSubstitution(std::make_shared<opt::irpass::UpdatestatePureNodeEliminater>(),
+                          "updatestate_pure_node_eliminater", prim::kPrimUpdateState);
+
+  opt::OptPassConfig updatestate_eliminater = opt::OptPassConfig({
+    updatestate_depend_eliminater,
+    updatestate_assign_eliminater,
+    updatestate_maketuple_eliminater,
+    updatestate_only_used_node_eliminater,
+    updatestate_loads_eliminater,
+    updatestate_pure_node_eliminater,
+  });
   opt::OptPassGroupMap elim_map({
-    {"updatestate_eliminate", opt::OptPassConfig({updatestate_eliminater})},
+    {"updatestate_eliminater", updatestate_eliminater},
     {"auto_monad_eliminator", opt::OptPassConfig(opt::AutoMonadEliminator())},
   });
 
diff --git a/mindspore/ccsrc/pipeline/jit/pipeline.cc b/mindspore/ccsrc/pipeline/jit/pipeline.cc
index 87f194a304e..6d5cff6cc18 100644
--- a/mindspore/ccsrc/pipeline/jit/pipeline.cc
+++ b/mindspore/ccsrc/pipeline/jit/pipeline.cc
@@ -206,7 +206,8 @@ void CacheFuncGraph(const ResourcePtr &resource) {
   ChangeFileMode(realpath.value(), S_IRWXU);
   std::ofstream fout(realpath.value());
   if (!fout.is_open()) {
-    MS_LOG(EXCEPTION) << "Open cache file '" << realpath.value() << "' failed!";
+    MS_LOG(EXCEPTION) << "Open cache file '" << realpath.value() << "' failed!"
+                      << " Errno:" << errno << " ErrInfo:" << strerror(errno);
   }
   FuncGraphPtr fg = resource->func_graph();
   mind_ir::ModelProto fg_model = GetBinaryProto(fg, true);
@@ -707,6 +708,7 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons
   SaveCompiledGraph(phase_s);
 
   opt::python_pass::PyPassManager::GetInstance()->ClearPipelineRes();
+  abstract::AnalysisContext::ClearContext();
   // Reclaim all resource used by optimizer;
   ReclaimOptimizer();
   resource->Clean();
@@ -1336,6 +1338,7 @@ void ClearResAtexit() {
   ReleaseGeTsd();
   parse::python_adapter::ResetPythonScope();
   abstract::AnalysisResultCacheMgr::GetInstance().Clear();
+  abstract::AnalysisContext::ClearContext();
 #ifdef ENABLE_DEBUGGER
   Debugger::GetInstance()->Reset();
 #endif
diff --git a/mindspore/ccsrc/pipeline/jit/resource.h b/mindspore/ccsrc/pipeline/jit/resource.h
index 8981d825acf..f31bf37376c 100644
--- a/mindspore/ccsrc/pipeline/jit/resource.h
+++ b/mindspore/ccsrc/pipeline/jit/resource.h
@@ -79,6 +79,8 @@ class Resource : public ResourceBase {
     gpu_loopsink_flag_ = flag;
     gpu_loopsink_size_ = size;
   }
+  void set_is_load(bool flag) { is_load_ = flag; }
+  bool is_load() { return is_load_; }
   bool gpu_loopsink_flag() { return gpu_loopsink_flag_; }
   int64_t gpu_loopsink_size() { return gpu_loopsink_size_; }
   // Reclaim resource and clear the cache.
@@ -93,6 +95,8 @@ class Resource : public ResourceBase {
   py::object input_;
   bool is_cleaned_;
   bool gpu_loopsink_flag_{false};
+  // The func_graph_ is loaded from mindir
+  bool is_load_{false};
   int64_t gpu_loopsink_size_{1};
 };
 
diff --git a/mindspore/ccsrc/pipeline/jit/static_analysis/order_enforce.cc b/mindspore/ccsrc/pipeline/jit/static_analysis/order_enforce.cc
index 922a085a440..b13061ee351 100644
--- a/mindspore/ccsrc/pipeline/jit/static_analysis/order_enforce.cc
+++ b/mindspore/ccsrc/pipeline/jit/static_analysis/order_enforce.cc
@@ -161,9 +161,17 @@ class OrderEnforcer {
       auto update_state = FindLastUpdateState(maketuple);
       if (update_state != nullptr) {
         std::unordered_set<AnfNodePtr> maketuple_users = GetSpecialOperatorRealUsers(maketuple);
+        std::unordered_set<AnfNodePtr> no_push_maketuple_users;
+        // Push and Pull at the end of the execution order,
+        // In order to ensure push and pull operator cut into the same graph, do not put push operator into updatestate
+        for (auto maketuple_user : maketuple_users) {
+          if (!IsPrimitiveCNode(maketuple_user, prim::kPrimPush)) {
+            no_push_maketuple_users.insert(maketuple_user);
+          }
+        }
         auto update_state_cnode = update_state->cast<CNodePtr>();
         MS_EXCEPTION_IF_NULL(update_state_cnode);
-        AddInputEdges(update_state_cnode, maketuple_users);
+        AddInputEdges(update_state_cnode, no_push_maketuple_users);
       }
     }
   }
@@ -207,7 +215,7 @@ class OrderEnforcer {
     if (!IsPrimitiveCNode(last_input, prim::kPrimUpdateState)) {
       return;
     }
-    const std::set<PrimitivePtr> special_operators = {prim::kPrimExpandDims};
+    const std::set<PrimitivePtr> special_operators = {prim::kPrimExpandDims, prim::kPrimBatchNormGrad};
     for (size_t i = 1; i < inputs.size(); ++i) {
       auto &input = inputs.at(i);
       if (!IsRef(input)) {
diff --git a/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc b/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc
index c5e7dec5f37..8ffa2642bc6 100644
--- a/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc
+++ b/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc
@@ -2309,6 +2309,7 @@ void GradExecutor::GradNetInner(py::object *ret, const prim::GradOperationPtr &g
   MS_LOG(DEBUG) << "Start update top cell info when run finish";
   UpdateTopCellInfo(false, false, true);
   resource->Clean();
+  abstract::AnalysisContext::ClearContext();
 }
 
 std::vector<AnfNodePtr> GradExecutor::GetWeightsArgs(const py::object &weights, const FuncGraphPtr &df_builder) {
diff --git a/mindspore/ccsrc/profiler/device/data_saver.cc b/mindspore/ccsrc/profiler/device/data_saver.cc
index 17742d0a7e7..c1e6ef01783 100644
--- a/mindspore/ccsrc/profiler/device/data_saver.cc
+++ b/mindspore/ccsrc/profiler/device/data_saver.cc
@@ -17,7 +17,6 @@
 #include <fstream>
 #include <numeric>
 #include "sys/stat.h"
-#include "utils/log_adapter.h"
 #include "utils/ms_utils.h"
 #include "utils/ms_context.h"
 
@@ -31,6 +30,10 @@ OpDetailInfo::OpDetailInfo(const std::shared_ptr<OpInfo> op_info, float proporti
   auto op_type_end_iter = op_full_name_.rfind('-');
   op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter);
   op_name_ = op_full_name_.substr(op_type_begin_iter);
+  if (op_info->op_count == 0) {
+    MS_LOG(ERROR) << "The num of operations can not be 0.";
+    return;
+  }
   op_avg_time_ = op_info->op_host_cost_time / op_info->op_count;
 }
 
@@ -39,6 +42,10 @@ void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) {
   float total_time_sum = GetTotalOpTime(op_info_maps);
   for (auto item : op_info_maps) {
     op_timestamps_map_[item.first] = item.second.start_duration;
+    if (total_time_sum == 0.0) {
+      MS_LOG(ERROR) << "The total operation times can not be 0.";
+      return;
+    }
     float proportion = item.second.op_host_cost_time / total_time_sum;
     auto op_info = std::make_shared<OpInfo>(item.second);
     if (op_info == nullptr) {
@@ -52,6 +59,10 @@ void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) {
   // update average time of op type
   for (auto &op_type : op_type_infos_) {
     // device_infos: <type_name, op_type_info>
+    if (op_type.second.count_ == 0) {
+      MS_LOG(ERROR) << "The num of operation type can not be 0.";
+      return;
+    }
     op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_;
   }
   MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items.";
diff --git a/mindspore/ccsrc/profiler/device/data_saver.h b/mindspore/ccsrc/profiler/device/data_saver.h
index 13c3ab80227..759a85b04de 100644
--- a/mindspore/ccsrc/profiler/device/data_saver.h
+++ b/mindspore/ccsrc/profiler/device/data_saver.h
@@ -23,6 +23,7 @@
 #include <string>
 #include <memory>
 #include "profiler/device/profiling.h"
+#include "utils/log_adapter.h"
 namespace mindspore {
 namespace profiler {
 struct OpDetailInfo {
@@ -73,6 +74,14 @@ struct OpType {
   std::string GetGpuHeader() const { return "op_type,type_occurrences,total_time(us),total_proportion,avg_time(us)"; }
 
   void OutputCpuOpTypeInfo(std::ostream &os) const {
+    if (step_ == 0) {
+      MS_LOG(ERROR) << "The run step can not be 0.";
+      return;
+    }
+    if (count_ == 0) {
+      MS_LOG(ERROR) << "The num of operation type can not be 0.";
+      return;
+    }
     os << op_type_ << ',' << count_ << ',' << count_ / step_ << ',' << total_time_ << ',' << total_time_ / count_ << ','
        << proportion_ << std::endl;
   }
diff --git a/mindspore/ccsrc/profiler/device/gpu/gpu_data_saver.cc b/mindspore/ccsrc/profiler/device/gpu/gpu_data_saver.cc
index b939ab36bee..3300b2e4925 100644
--- a/mindspore/ccsrc/profiler/device/gpu/gpu_data_saver.cc
+++ b/mindspore/ccsrc/profiler/device/gpu/gpu_data_saver.cc
@@ -68,6 +68,10 @@ void GpuDataSaver::ParseEvent(const std::vector<Event> &events) {
   for (auto &device_infos : activity_infos_) {
     // device_infos: <device_id, DeviceActivityInfos>
     for (auto &activity_info : device_infos.second) {
+      if (activity_info.second.count_ == 0) {
+        MS_LOG(ERROR) << "The num of operations can not be 0.";
+        return;
+      }
       // activity_info: <kernel_name, Activity>
       activity_info.second.avg_duration_ = activity_info.second.total_duration_ / activity_info.second.count_;
     }
diff --git a/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc b/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc
index ffa1d513c1d..3553e18bf01 100644
--- a/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc
+++ b/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc
@@ -339,6 +339,10 @@ void GPUProfiler::OpsParser() {
   std::sort(order_vec.begin(), order_vec.end(), cmp_func);
 
   for (auto iter = order_vec.begin(); iter != order_vec.end(); iter++) {
+    if (iter->second.op_count == 0) {
+      MS_LOG(ERROR) << "The num of operations can not be 0.";
+      return;
+    }
     MS_LOG(DEBUG) << "GPU_profiler"
                   << "," << iter->first << "," << iter->second.op_count << "," << iter->second.op_kernel_count << ","
                   << iter->second.op_kernel_api_count << ","
diff --git a/mindspore/ccsrc/ps/core/communicator/http_communicator.cc b/mindspore/ccsrc/ps/core/communicator/http_communicator.cc
index fccb4ab4d1e..a8f376e97a5 100644
--- a/mindspore/ccsrc/ps/core/communicator/http_communicator.cc
+++ b/mindspore/ccsrc/ps/core/communicator/http_communicator.cc
@@ -42,9 +42,12 @@ bool HttpCommunicator::Start() {
 
 bool HttpCommunicator::Stop() {
   MS_EXCEPTION_IF_NULL(http_server_);
-  bool res = http_server_->Stop();
+  if (!http_server_->Stop()) {
+    MS_LOG(ERROR) << "Stopping http server failed.";
+    return false;
+  }
   running_ = false;
-  return res;
+  return true;
 }
 
 void HttpCommunicator::RegisterMsgCallBack(const std::string &msg_type, const MessageCallback &cb) {
@@ -60,6 +63,7 @@ void HttpCommunicator::RegisterMsgCallBack(const std::string &msg_type, const Me
 
   std::string url = "/";
   url += msg_type;
+  MS_EXCEPTION_IF_NULL(http_server_);
   bool is_succeed = http_server_->RegisterRoute(url, &http_msg_callbacks_[msg_type]);
   if (!is_succeed) {
     MS_LOG(EXCEPTION) << "Http server register handler for url " << url << " failed.";
diff --git a/mindspore/ccsrc/ps/core/communicator/tcp_communicator.cc b/mindspore/ccsrc/ps/core/communicator/tcp_communicator.cc
index 468dcf9f3ce..e8378c4c74b 100644
--- a/mindspore/ccsrc/ps/core/communicator/tcp_communicator.cc
+++ b/mindspore/ccsrc/ps/core/communicator/tcp_communicator.cc
@@ -57,7 +57,10 @@ bool TcpCommunicator::Start() {
     std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4);
   server_node_->set_handler(tcp_msg_callback_);
 
-  server_node_->Start();
+  if (!server_node_->Start()) {
+    MS_LOG(EXCEPTION) << "Starting server node failed.";
+    return false;
+  }
   running_ = true;
   running_thread_ = std::thread([&]() {
     while (running_) {
@@ -69,8 +72,14 @@ bool TcpCommunicator::Start() {
 
 bool TcpCommunicator::Stop() {
   MS_EXCEPTION_IF_NULL(server_node_);
-  server_node_->Finish();
-  server_node_->Stop();
+  if (!server_node_->Finish()) {
+    MS_LOG(ERROR) << "Finishing server node failed.";
+    return false;
+  }
+  if (!server_node_->Stop()) {
+    MS_LOG(ERROR) << "Stopping server node failed.";
+    return false;
+  }
   running_ = false;
   return true;
 }
@@ -81,6 +90,7 @@ void TcpCommunicator::RegisterMsgCallBack(const std::string &msg_type, const Mes
 }
 
 void TcpCommunicator::RegisterEventCallback(const core::ClusterEvent &event, const EventCallback &event_cb) {
+  MS_EXCEPTION_IF_NULL(server_node_);
   server_node_->RegisterEventCallback(event, event_cb);
 }
 
diff --git a/mindspore/ccsrc/ps/core/communicator/tcp_communicator.h b/mindspore/ccsrc/ps/core/communicator/tcp_communicator.h
index 784397165b6..f6ef04fc5e2 100644
--- a/mindspore/ccsrc/ps/core/communicator/tcp_communicator.h
+++ b/mindspore/ccsrc/ps/core/communicator/tcp_communicator.h
@@ -52,7 +52,6 @@ enum class TcpUserCommand {
   kPrepareForNextIter,
   kProceedToNextIter,
   kEndLastIter,
-
   kStartFLJob,
   kUpdateModel,
   kGetModel
@@ -102,6 +101,7 @@ class TcpCommunicator : public CommunicatorBase {
                      std::shared_ptr<std::vector<unsigned char>> *output = nullptr) {
     const std::string &msg_str = pb_msg.SerializeAsString();
     std::shared_ptr<unsigned char[]> msg(new unsigned char[msg_str.size()]);
+    MS_ERROR_IF_NULL_W_RET_VAL(msg, false);
     size_t dest_size = msg_str.size();
     size_t src_size = msg_str.size();
     auto ret = memcpy_s(msg.get(), dest_size, msg_str.c_str(), src_size);
diff --git a/mindspore/ccsrc/ps/optimizer_info.cc b/mindspore/ccsrc/ps/optimizer_info.cc
index fc8ba289283..5bb8019cb52 100644
--- a/mindspore/ccsrc/ps/optimizer_info.cc
+++ b/mindspore/ccsrc/ps/optimizer_info.cc
@@ -23,7 +23,10 @@
 
 namespace mindspore {
 namespace ps {
-void OptimizerInfo::AddWorkspace(const AddressPtr &workspace) { workspaces_.push_back(workspace); }
+void OptimizerInfo::AddWorkspace(const AddressPtr &workspace) {
+  MS_EXCEPTION_IF_NULL(workspace);
+  workspaces_.push_back(workspace);
+}
 
 const std::vector<AddressPtr> &OptimizerInfo::inputs() const { return inputs_; }
 
@@ -42,6 +45,7 @@ size_t OptimizerInfo::indices_index() { return 0; }
 template <typename T>
 void OptimizerInfo::UpdateOptimInputValue(const std::string &optim_type, const std::string &input_name, void *data,
                                           const Lengths &lens) {
+  MS_EXCEPTION_IF_NULL(data);
   if (kOptimToOriginIdx.count(optim_type) == 0 || kOptimToPSSendIdx.count(optim_type) == 0) {
     MS_LOG(EXCEPTION) << "Optimizer type " << optim_type << " in not supported.";
   }
@@ -96,8 +100,8 @@ void DenseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
 
 void DenseOptimInfo::ComputeMean(const std::vector<std::vector<size_t>> &, size_t n, size_t, size_t) {
   if (n > 1) {
+    MS_EXCEPTION_IF_NULL(gradient()->addr);
     float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
-    MS_EXCEPTION_IF_NULL(accum_grad_data);
     size_t size = gradient()->size / sizeof(float);
     for (size_t i = 0; i < size; i++) {
       accum_grad_data[i] /= n;
@@ -116,8 +120,8 @@ void DenseOptimInfo::Reset() {
 
 void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
   // Append grad data to the end
+  MS_EXCEPTION_IF_NULL(gradient()->addr);
   float *accum_grad_data = reinterpret_cast<float *>(gradient()->addr);
-  MS_EXCEPTION_IF_NULL(accum_grad_data);
 
   size_t grad_index = this->grad_index();
   size_t grad_offset = 0;
@@ -143,6 +147,7 @@ void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
   gradient()->size += incr_grad_size;
 
   // Append indice data to the end
+  MS_EXCEPTION_IF_NULL(indices()->addr);
   int *accum_indices_data = reinterpret_cast<int *>(indices()->addr);
   MS_EXCEPTION_IF_NULL(accum_indices_data);
 
@@ -153,10 +158,10 @@ void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
   }
 
   void *incr_indice_data_temp = const_cast<float *>(values.data()) + indice_offset;
-  int *incr_indice_data = reinterpret_cast<int *>(incr_indice_data_temp);
-
   MS_EXCEPTION_IF_NULL(incr_indice_data_temp);
+  int *incr_indice_data = reinterpret_cast<int *>(incr_indice_data_temp);
   MS_EXCEPTION_IF_NULL(incr_indice_data);
+
   size_t incr_indice_size = lengths[indices_index];
   size_t incr_indice_data_size = incr_indice_size * sizeof(int);
   dst_size = incr_indice_data_size;
@@ -176,8 +181,9 @@ void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) {
 
 void SparseOptimInfo::ComputeMean(const std::vector<std::vector<size_t>> &shapes, size_t n, size_t server_num,
                                   size_t rank_id) {
-  MS_EXCEPTION_IF_NULL(gradient());
-  MS_EXCEPTION_IF_NULL(indices());
+  if (n == 0 || indices()->size == 0) {
+    MS_LOG(EXCEPTION) << "The size of shapes or indices are 0.";
+  }
   size_t indices_size = static_cast<size_t>(indices()->size / sizeof(int));
   size_t segment_size = gradient()->size / indices()->size;
 
@@ -259,6 +265,11 @@ void SparseOptimInfo::Reset() {
 MomentumOptimInfo::MomentumOptimInfo(const AddressPtr &weight, const AddressPtr &accumulate,
                                      const AddressPtr &learning_rate, const AddressPtr &gradient,
                                      const AddressPtr &momentum) {
+  MS_EXCEPTION_IF_NULL(weight);
+  MS_EXCEPTION_IF_NULL(accumulate);
+  MS_EXCEPTION_IF_NULL(learning_rate);
+  MS_EXCEPTION_IF_NULL(gradient);
+  MS_EXCEPTION_IF_NULL(momentum);
   inputs_.push_back(weight);
   inputs_.push_back(accumulate);
   inputs_.push_back(learning_rate);
@@ -275,12 +286,14 @@ const size_t SparseOptimInfo::indice_size() const { return indices_offset_; }
 const AddressPtr &MomentumOptimInfo::gradient() {
   size_t origin_grad_index = kMomentumOriginIdx.at("grad");
   EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
+  MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]);
   return inputs_[origin_grad_index];
 }
 
 const AddressPtr &MomentumOptimInfo::indices() {
   size_t origin_grad_index = kMomentumOriginIdx.at("grad");
   EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
+  MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]);
   return inputs_[origin_grad_index];
 }
 
@@ -294,6 +307,17 @@ SparseAdamOptimInfo::SparseAdamOptimInfo(const AddressPtr &weight, const Address
                                          const AddressPtr &learning_rate, const AddressPtr &beta1,
                                          const AddressPtr &beta2, const AddressPtr &epsilon, const AddressPtr &grad,
                                          const AddressPtr &indices, bool sharded) {
+  MS_EXCEPTION_IF_NULL(weight);
+  MS_EXCEPTION_IF_NULL(m);
+  MS_EXCEPTION_IF_NULL(v);
+  MS_EXCEPTION_IF_NULL(beta1_power);
+  MS_EXCEPTION_IF_NULL(beta2_power);
+  MS_EXCEPTION_IF_NULL(learning_rate);
+  MS_EXCEPTION_IF_NULL(beta1);
+  MS_EXCEPTION_IF_NULL(beta2);
+  MS_EXCEPTION_IF_NULL(epsilon);
+  MS_EXCEPTION_IF_NULL(grad);
+  MS_EXCEPTION_IF_NULL(indices);
   inputs_.push_back(weight);
   inputs_.push_back(m);
   inputs_.push_back(v);
@@ -322,12 +346,14 @@ void SparseAdamOptimInfo::Update(const Values &values, const Lengths &lens) {
 const AddressPtr &SparseAdamOptimInfo::gradient() {
   size_t origin_grad_index = kSparseAdamOriginIdx.at("grad");
   EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
+  MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]);
   return inputs_[origin_grad_index];
 }
 
 const AddressPtr &SparseAdamOptimInfo::indices() {
   size_t origin_indices_index = kSparseAdamOriginIdx.at("indices");
   EXC_IF_VEC_IDX_OOB(inputs_, origin_indices_index);
+  MS_EXCEPTION_IF_NULL(inputs_[origin_indices_index]);
   return inputs_[origin_indices_index];
 }
 
@@ -345,6 +371,11 @@ size_t SparseAdamOptimInfo::indices_index() {
 
 SparseFtrlOptimInfo::SparseFtrlOptimInfo(const AddressPtr &weight, const AddressPtr &accum, const AddressPtr &linear,
                                          const AddressPtr &grad, const AddressPtr &indices, bool sharded) {
+  MS_EXCEPTION_IF_NULL(weight);
+  MS_EXCEPTION_IF_NULL(accum);
+  MS_EXCEPTION_IF_NULL(linear);
+  MS_EXCEPTION_IF_NULL(grad);
+  MS_EXCEPTION_IF_NULL(indices);
   inputs_.push_back(weight);
   inputs_.push_back(accum);
   inputs_.push_back(linear);
@@ -358,12 +389,14 @@ SparseFtrlOptimInfo::SparseFtrlOptimInfo(const AddressPtr &weight, const Address
 const AddressPtr &SparseFtrlOptimInfo::gradient() {
   size_t origin_grad_index = kSparseFtrlOriginIdx.at("grad");
   EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index);
+  MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]);
   return inputs_[origin_grad_index];
 }
 
 const AddressPtr &SparseFtrlOptimInfo::indices() {
   size_t origin_indices_index = kSparseFtrlOriginIdx.at("indices");
   EXC_IF_VEC_IDX_OOB(inputs_, origin_indices_index);
+  MS_EXCEPTION_IF_NULL(inputs_[origin_indices_index]);
   return inputs_[origin_indices_index];
 }
 
diff --git a/mindspore/ccsrc/ps/optimizer_info_builder.cc b/mindspore/ccsrc/ps/optimizer_info_builder.cc
index 5a1f60149c7..68db3d280c0 100644
--- a/mindspore/ccsrc/ps/optimizer_info_builder.cc
+++ b/mindspore/ccsrc/ps/optimizer_info_builder.cc
@@ -29,6 +29,7 @@ OptimizerInfo *OptimizerInfoBuilder::Build(const std::shared_ptr<PServerKernel>
                                            const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num,
                                            bool sharded) {
   MS_EXCEPTION_IF_NULL(pserver_kernel);
+  MS_EXCEPTION_IF_NULL(weight);
   MS_EXCEPTION_IF_NULL(inputs_shape);
   OptimizerInfo *optim_info =
     BuildInputs(weight, keys, values, lens, inputs_shape, worker_num, pserver_kernel, sharded);
@@ -40,6 +41,7 @@ OptimizerInfo *OptimizerInfoBuilder::Build(const std::shared_ptr<PServerKernel>
 }
 
 void OptimizerInfoBuilder::BuildWorkspaces(OptimizerInfo *info, const std::vector<size_t> &ws_sizes, size_t) {
+  MS_EXCEPTION_IF_NULL(info);
   for (size_t i = 0; i < ws_sizes.size(); i++) {
     size_t size = ws_sizes[i];
     AddressPtr workspace = std::make_shared<kernel::Address>();
@@ -116,6 +118,7 @@ AddressPtr OptimizerInfoBuilder::GenInputAddrPtr(const std::string &optim_type,
 OptimizerInfo *MomentumOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &, const Values &values,
                                                      const Lengths &lens, const InputsShapePtr &, size_t,
                                                      const std::shared_ptr<PServerKernel> &, bool) {
+  MS_EXCEPTION_IF_NULL(weight);
   AddressPtr weight_addr = std::make_shared<kernel::Address>();
   MS_EXCEPTION_IF_NULL(weight_addr);
   weight_addr->addr = weight->data();
diff --git a/mindspore/ccsrc/ps/ps_cache/ps_cache_manager.cc b/mindspore/ccsrc/ps/ps_cache/ps_cache_manager.cc
index 4c4e97f7939..353136b83c2 100644
--- a/mindspore/ccsrc/ps/ps_cache/ps_cache_manager.cc
+++ b/mindspore/ccsrc/ps/ps_cache/ps_cache_manager.cc
@@ -237,9 +237,6 @@ void PsCacheManager::AllocMemForHashTable() {
   embedding_device_cache_->hash_swap_value_addr_ = reinterpret_cast<float *>(
     embedding_device_cache_->cache_->MallocMemory(max_embedding_size * batch_elements_ * sizeof(float)));
   MS_EXCEPTION_IF_NULL(embedding_device_cache_->hash_swap_value_addr_);
-  if (!(embedding_device_cache_->cache_->MallocConstantMemory(vocab_cache_size_))) {
-    MS_LOG(EXCEPTION) << "MallocConstantMemory failed.";
-  }
 }
 
 void PsCacheManager::SetLocalIdRank() {
@@ -328,6 +325,14 @@ void PsCacheManager::ProcessDataTask(uint32_t device_id, const void *context) {
   MS_ERROR_IF_NULL_WO_RET_VAL(embedding_device_cache_);
   MS_ERROR_IF_NULL_WO_RET_VAL(embedding_device_cache_->cache_);
   embedding_device_cache_->cache_->InitDevice(device_id, context);
+
+  // MallocConstantMemory need stream on device Ascend, should be called after InitDevice.
+  if (!(embedding_device_cache_->cache_->MallocConstantMemory(vocab_cache_size_))) {
+    MS_LOG(ERROR) << "MallocConstantMemory failed.";
+    running_ = false;
+    return;
+  }
+
   InitParameterServer();
   InitDataChannel();
   while (running_) {
@@ -636,6 +641,7 @@ bool PsCacheManager::ParseHostDataHostToDevice(size_t id) {
 
 bool PsCacheManager::ParseHostDataDeviceToHost() {
   MS_ERROR_IF_NULL(embedding_device_cache_);
+  MS_ERROR_IF_NULL(embedding_host_cache_);
   int *device_to_host_ids = embedding_device_cache_->device_to_host_ids.get();
   int *device_to_host_index = embedding_host_cache_->device_to_host_index.get();
   MS_ERROR_IF_NULL(device_to_host_ids);
@@ -1053,6 +1059,7 @@ bool PsCacheManager::SyncHostEmbeddingTable() {
 
 bool PsCacheManager::SyncDeviceEmbeddingTable() {
   MS_ERROR_IF_NULL(embedding_device_cache_);
+  MS_ERROR_IF_NULL(embedding_device_cache_->cache_);
   const auto &device_hash_map = embedding_device_cache_->device_hash_map_;
   MS_ERROR_IF_NULL(device_hash_map);
   const auto &hash_id_to_index = device_hash_map->hash_id_to_index();
@@ -1105,6 +1112,8 @@ bool PsCacheManager::SyncDeviceEmbeddingTable() {
 }
 
 void PsCacheManager::DumpHashTables(bool dump_device_tables) const {
+  MS_EXCEPTION_IF_NULL(embedding_device_cache_);
+  MS_EXCEPTION_IF_NULL(embedding_device_cache_->cache_);
   for (const auto &item : hash_tables_) {
     const auto &param_name = item.first;
     size_t cache_vocab_size = item.second.cache_vocab_size;
diff --git a/mindspore/ccsrc/ps/ps_cache/ps_data/ps_data_prefetch.cc b/mindspore/ccsrc/ps/ps_cache/ps_data/ps_data_prefetch.cc
index 17df2f0ad28..eca9209af37 100644
--- a/mindspore/ccsrc/ps/ps_cache/ps_data/ps_data_prefetch.cc
+++ b/mindspore/ccsrc/ps/ps_cache/ps_data/ps_data_prefetch.cc
@@ -31,6 +31,7 @@ void PsDataPrefetch::CreateDataChannel(const std::string &channel_name, size_t s
   if (iter != ps_data_channel_map_.end()) {
     MS_LOG(WARNING) << "The ps data channel already exists, channel name:" << channel_name;
     auto channel = iter->second;
+    MS_ERROR_IF_NULL_WO_RET_VAL(channel);
     channel->set_step_num(step_num);
   } else {
     auto channel = std::make_shared<PsDataChannel>(channel_name, step_num);
diff --git a/mindspore/ccsrc/ps/ps_context.cc b/mindspore/ccsrc/ps/ps_context.cc
index cbaeec47987..36a48183055 100644
--- a/mindspore/ccsrc/ps/ps_context.cc
+++ b/mindspore/ccsrc/ps/ps_context.cc
@@ -270,6 +270,7 @@ void PSContext::GenerateResetterRound() {
   bool is_parameter_server_mode = false;
   bool is_federated_learning_mode = false;
   bool is_mixed_training_mode = false;
+  bool use_pairwise_encrypt = (encrypt_type_ == kPWEncryptType);
 
   if (server_mode_ == kServerModePS) {
     is_parameter_server_mode = true;
@@ -285,7 +286,7 @@ void PSContext::GenerateResetterRound() {
 
   binary_server_context = ((unsigned int)is_parameter_server_mode << 0) |
                           ((unsigned int)is_federated_learning_mode << 1) |
-                          ((unsigned int)is_mixed_training_mode << 2) | ((unsigned int)secure_aggregation_ << 3);
+                          ((unsigned int)is_mixed_training_mode << 2) | ((unsigned int)use_pairwise_encrypt << 3);
   if (kServerContextToResetRoundMap.count(binary_server_context) == 0) {
     resetter_round_ = ResetterRound::kNoNeedToReset;
   } else {
diff --git a/mindspore/ccsrc/ps/ps_context.h b/mindspore/ccsrc/ps/ps_context.h
index ddf88d8fe05..291a7246038 100644
--- a/mindspore/ccsrc/ps/ps_context.h
+++ b/mindspore/ccsrc/ps/ps_context.h
@@ -44,14 +44,13 @@ constexpr char kNotEncryptType[] = "NOT_ENCRYPT";
 // 0: Server is in parameter server mode.
 // 1: Server is in federated learning mode.
 // 2: Server is in mixed training mode.
-// 3: Server enables sucure aggregation.
-// For example: 1010 stands for that the server is in federated learning mode and sucure aggregation  is enabled.
+// 3: Server enables pairwise encrypt algorithm.
+// For example: 1010 stands for that the server is in federated learning mode and pairwise encrypt algorithm is enabled.
 enum class ResetterRound { kNoNeedToReset, kUpdateModel, kReconstructSeccrets, kPushWeight };
 const std::map<uint32_t, ResetterRound> kServerContextToResetRoundMap = {{0b0010, ResetterRound::kUpdateModel},
                                                                          {0b1010, ResetterRound::kReconstructSeccrets},
                                                                          {0b1100, ResetterRound::kPushWeight},
-                                                                         {0b0100, ResetterRound::kPushWeight},
-                                                                         {0b0100, ResetterRound::kUpdateModel}};
+                                                                         {0b0100, ResetterRound::kPushWeight}};
 
 class PSContext {
  public:
diff --git a/mindspore/ccsrc/pybind_api/ir/param_info_py.cc b/mindspore/ccsrc/pybind_api/ir/param_info_py.cc
index d59c197fc55..3a44d7a8ee1 100644
--- a/mindspore/ccsrc/pybind_api/ir/param_info_py.cc
+++ b/mindspore/ccsrc/pybind_api/ir/param_info_py.cc
@@ -34,6 +34,7 @@ REGISTER_PYBIND_DEFINE(ParamInfo, ([](const py::module *m) {
                            .def_property("comm_fusion", &ParamInfo::comm_fusion, &ParamInfo::set_comm_fusion)
                            .def_property("cache_enable", &ParamInfo::cache_enable, &ParamInfo::set_cache_enable)
                            .def_property("cache_shape", &ParamInfo::cache_shape, &ParamInfo::set_cache_shape)
+                           .def_property("requires_aggr", &ParamInfo::requires_aggr, &ParamInfo::set_requires_aggr)
                            .def(py::pickle(
                              [](const ParamInfo &p) {  // __getstate__
                                return py::make_tuple(p.name(), p.requires_grad(), p.layerwise_parallel());
diff --git a/mindspore/ccsrc/runtime/device/CMakeLists.txt b/mindspore/ccsrc/runtime/device/CMakeLists.txt
index c7e99adbbea..56cd9fe6275 100644
--- a/mindspore/ccsrc/runtime/device/CMakeLists.txt
+++ b/mindspore/ccsrc/runtime/device/CMakeLists.txt
@@ -42,7 +42,7 @@ if(ENABLE_MPI)
     if(ENABLE_GPU)
         set_property(SOURCE "gpu/mpi/mpi_initializer.cc"
             PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE)
-        pybind11_add_module(_ms_mpi "gpu/mpi/mpi_initializer.cc")
+        pybind11_add_module(_ms_mpi NO_EXTRAS "gpu/mpi/mpi_initializer.cc")
         target_link_libraries(_ms_mpi PRIVATE mindspore::pybind11_module mindspore::ompi)
     endif()
 
diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_bucket.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_bucket.cc
index 666d79f2fc9..c7473255a40 100644
--- a/mindspore/ccsrc/runtime/device/ascend/ascend_bucket.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_bucket.cc
@@ -124,8 +124,8 @@ void AscendBucket::LaunchAllReduce() {
     MS_LOG(EXCEPTION) << "allreduce input have different dtype";
   }
 
-  auto iter = CONST_OP_HCOM_DATA_TYPE_MAP.find(type);
-  if (iter == CONST_OP_HCOM_DATA_TYPE_MAP.end()) {
+  auto iter = kConstOpHcomDataTypeMap.find(type);
+  if (iter == kConstOpHcomDataTypeMap.end()) {
     MS_LOG(EXCEPTION) << "unknown data type:" << type;
   }
 
diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
index 9ddd0ef3f95..aa2874e022a 100644
--- a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc
@@ -175,6 +175,9 @@ bool AscendDeviceAddress::SyncDeviceToHost(const ShapeVector &shape, size_t size
                                            void *host_ptr) const {
   MS_LOG(INFO) << "SyncDeviceToHost, Device(format:" << format_ << ", type_id:" << TypeIdLabel(type_id_)
                << ", size:" << size_ << "), Host(type_id:" << TypeIdLabel(type) << ", size:" << size << ")";
+  if (type_id_ > kMonadTypeBegin && type_id_ < kMonadTypeEnd) {
+    return true;
+  }
   SyncStream();
   bool sync_ok = false;
   std::vector<size_t> host_shape;
diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_event.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_event.cc
index 676b311244c..eb063c54c6a 100644
--- a/mindspore/ccsrc/runtime/device/ascend/ascend_event.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_event.cc
@@ -53,6 +53,10 @@ void AscendEvent::WaitEvent() {
   if (ret != RT_ERROR_NONE) {
     MS_LOG(EXCEPTION) << "rtStreamWaitEvent failed, ret:" << ret;
   }
+  ret = rtEventReset(event_, wait_stream_);
+  if (ret != RT_ERROR_NONE) {
+    MS_LOG(EXCEPTION) << "rtEventReset failed, ret:" << ret;
+  }
   need_wait_ = false;
 }
 
diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
index 950c9aa97a2..6dad9375810 100644
--- a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc
@@ -22,6 +22,7 @@
 #include "utils/signal_util.h"
 #include "debug/data_dump/e2e_dump.h"
 #include "runtime/device/ascend/ascend_device_address.h"
+#include "runtime/device/ascend/distribute/ascend_collective.h"
 #include "utils/ms_context.h"
 #include "utils/context/context_extends.h"
 #include "utils/mpi/mpi_config.h"
@@ -46,7 +47,6 @@
 #include "backend/optimizer/mem_reuse/mem_reuse_checker.h"
 #include "debug/env_config_parser.h"
 #endif
-#include "runtime/device/ascend/executor/tiling/op_tiling_calculater.h"
 #include "runtime/device/ascend/executor/hccl_dynamic_kernel.h"
 #include "utils/config_manager.h"
 #include "runtime/device/ascend/profiling/reporter/op_name_task_stream_reporter.h"
@@ -64,6 +64,7 @@ using mindspore::device::ascend::ProfilingManager;
 using mindspore::device::ascend::ProfilingUtils;
 using mindspore::device::ascend::tasksink::TaskGenerator;
 using mindspore::ge::model_runner::ModelRunner;
+using HcclCollectiveGroup = mindspore::device::ascend::collective::HcclCollectiveGroup;
 using mindspore::kernel::tbe::TbeUtils;
 using std::vector;
 
@@ -78,32 +79,17 @@ namespace mindspore::device::ascend {
 static thread_local rtContext_t thread_local_rt_context{nullptr};
 namespace {
 std::string GetRankId() {
-  std::string rank_id_str;
-#ifdef ENABLE_MPI
-  auto mpi_config_ptr = MpiConfig::GetInstance();
-  MS_EXCEPTION_IF_NULL(mpi_config_ptr);
-  if (mpi_config_ptr->enable_mpi()) {
-    int rank_id = GetMPIRankId();
-    const std::string offset = common::GetEnv("RANK_OFFSET");
-    if (offset.empty()) {
-      try {
-        int rank_offset = std::stoi(offset);
-        rank_id += rank_offset;
-      } catch (std::invalid_argument) {
-        MS_LOG(EXCEPTION) << "Call stoi invalid argument:" << offset;
-      } catch (std::out_of_range) {
-        MS_LOG(EXCEPTION) << "Call stoi out_of_range:" << offset;
-      }
-    }
-    rank_id_str = std::to_string(rank_id);
-  } else {
-    rank_id_str = common::GetEnv("RANK_ID");
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  if (!context_ptr->get_param<bool>(MS_CTX_ENABLE_TASK_SINK)) {
+    MS_LOG(INFO) << "Get hccl rankid from mpi";
+    auto rank = HcclCollectiveGroup::instance().GetRankId();
+    return std::to_string(rank);
   }
-#else
-  rank_id_str = common::GetEnv("RANK_ID");
-#endif
+  std::string rank_id_str;
+  rank_id_str = std::getenv("RANK_ID");
   if (rank_id_str.empty()) {
-    MS_LOG(ERROR) << "Get hccl rankid failed, please set env RANK_ID";
+    MS_LOG(EXCEPTION) << "Get hccl rankid failed, please set env RANK_ID";
   }
   return rank_id_str;
 }
@@ -246,7 +232,10 @@ void AscendKernelRuntime::ReleaseDeviceRes() {
 #ifdef ENABLE_DEBUGGER
   if (debugger_ && debugger_->debugger_enabled()) {
     debugger_->SetTrainingDone(true);
-    debugger_->SendMetadata(false);
+    bool ret = debugger_->SendMetadata(false);
+    if (!ret) {
+      MS_LOG(ERROR) << "Failed to SendMetadata when finalize";
+    }
   }
 #endif
   if (!initialized_) {
@@ -304,9 +293,7 @@ bool AscendKernelRuntime::Init() {
     MS_LOG(WARNING) << "Init ErrorManager failed.";
   }
   try {
-    OpTilingCalculater::GetInstance().Init();
     // Start up profiling before rtSetDevice
-
     bool ret = InitDevice();
     if (!ret) {
       return ret;
@@ -744,6 +731,7 @@ bool AscendKernelRuntime::SyncStream() {
     MS_LOG(ERROR) << "Call runtime rtStreamSynchronize error.";
     return false;
   }
+
   if (RT_ERROR_NONE != rtStreamSynchronize(communication_stream_)) {  // o for switch stream
     MS_LOG(ERROR) << "Call runtime rtStreamSynchronize error.";
     return false;
@@ -832,7 +820,6 @@ bool AscendKernelRuntime::ResetDevice(uint32_t device_id) {
     }
     stream_ = nullptr;
   }
-
   if (communication_stream_ != nullptr) {
     ret = rtStreamDestroy(communication_stream_);
     if (ret != RT_ERROR_NONE) {
@@ -840,7 +827,6 @@ bool AscendKernelRuntime::ResetDevice(uint32_t device_id) {
     }
     communication_stream_ = nullptr;
   }
-
   ret = rtDeviceReset(device_id);
   if (ret != RT_ERROR_NONE) {
     MS_EXCEPTION(DeviceProcessError) << "Call rtDeviceReset, ret[" << ret << "]";
@@ -857,6 +843,19 @@ bool AscendKernelRuntime::HcclInit() {
     MS_LOG(EXCEPTION) << "Hccl dependent tsd is not open";
   }
   MS_LOG(INFO) << "Do hcom init";
+  bool is_task_sink = context_ptr->get_param<bool>(MS_CTX_ENABLE_TASK_SINK);
+  auto mode = context_ptr->get_param<int>(MS_CTX_EXECUTION_MODE);
+  if (!is_task_sink && mode == kGraphMode) {
+    hccl::HcclAdapter::GetInstance().InitHccl();
+    std::vector<unsigned int> ranks;
+    auto rank_size = HcclCollectiveGroup::instance().GetRankSize();
+    for (size_t i = 0; i < IntToSize(rank_size); ++i) {
+      ranks.push_back(i);
+    }
+    HcclCollectiveGroup::instance().CreateCommGroup(kHcclWorldGroup, ranks);
+    return true;
+  }
+
   auto config_path_str = std::getenv("MINDSPORE_HCCL_CONFIG_PATH");
   if (config_path_str == nullptr) {
     config_path_str = std::getenv("RANK_TABLE_FILE");
diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc
index e1a773864c8..a2850cdc33d 100644
--- a/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc
@@ -1992,6 +1992,28 @@ CNodePtr AscendStreamAssign::CreateRecvApplyKernel(const NotNull<KernelGraphPtr>
   return recv_node_ptr;
 }
 
+bool AscendStreamAssign::IsNopNodeTarget(const AnfNodePtr &nop_node, const CNodePtr &target_node,
+                                         const CNodePtr &cur_node, bool exclude_hcom) {
+  MS_EXCEPTION_IF_NULL(nop_node);
+  auto cnode = nop_node->cast<CNodePtr>();
+  auto new_inputs = cnode->inputs();
+  for (size_t i = 1; i < new_inputs.size(); i++) {
+    if (opt::IsNopNode(new_inputs[i])) {
+      if (IsNopNodeTarget(new_inputs[i], target_node, cur_node, exclude_hcom)) {
+        return true;
+      }
+    } else {
+      auto new_real_input = AnfAlgo::VisitKernel(new_inputs[i], 0);
+      if (target_node == new_real_input.first) {
+        if (!(exclude_hcom && IsHcom(cur_node))) {
+          return true;
+        }
+      }
+    }
+  }
+  return false;
+}
+
 vector<CNodePtr>::iterator AscendStreamAssign::FindTargetOp(vector<CNodePtr>::iterator begin,
                                                             vector<CNodePtr>::iterator end, const CNodePtr &node,
                                                             bool exclude_hcom) {
@@ -2000,18 +2022,8 @@ vector<CNodePtr>::iterator AscendStreamAssign::FindTargetOp(vector<CNodePtr>::it
     for (size_t i = 1; i < inputs.size(); i++) {
       auto input = inputs[i];
       if (opt::IsNopNode(input)) {
-        CNodePtr cnode = input->cast<CNodePtr>();
-        auto new_inputs = cnode->inputs();
-        for (size_t j = 1; j < new_inputs.size(); j++) {
-          auto new_real_input = AnfAlgo::VisitKernel(new_inputs[j], 0);
-          // find target node except hcom op. insert event for hcom in:InsertEventHcomDependCommonBak function
-          // only insert one time
-          if (node == new_real_input.first) {
-            if (!(exclude_hcom && IsHcom(*begin))) {
-              MS_LOG(DEBUG) << "Nop node find target op[" << (*begin)->DebugString() << "]";
-              return begin;
-            }
-          }
+        if (IsNopNodeTarget(input, node, *begin, exclude_hcom)) {
+          return begin;
         }
       } else {
         auto real_input = AnfAlgo::VisitKernel(input, 0);
diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.h b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.h
index 8f7773e77bd..bfe55a440dc 100644
--- a/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.h
+++ b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.h
@@ -175,7 +175,8 @@ class AscendStreamAssign {
   uint32_t GetIndexByKey(const NotNull<KernelGraphPtr> &graph_ptr, const CNodeKey &key);
   uint32_t GetIndependentStreamSwitchStreamId(const NotNull<KernelGraphPtr> &graph_ptr);
   void GetIndependentMaxTarget(const NotNull<KernelGraphPtr> &graph_ptr);
-
+  bool IsNopNodeTarget(const AnfNodePtr &nop_node, const CNodePtr &target_node, const CNodePtr &cur_node,
+                       bool exclude_hcom);
   bool IsTaskSink();
   bool IsHcom(const CNodePtr &cur_cnode_ptr);
   bool IsIndependentNode(const CNodePtr &node_ptr);
diff --git a/mindspore/ccsrc/runtime/device/ascend/executor/ai_core_dynamic_kernel.cc b/mindspore/ccsrc/runtime/device/ascend/executor/ai_core_dynamic_kernel.cc
index 21fa2d4263c..c37be1a0f43 100644
--- a/mindspore/ccsrc/runtime/device/ascend/executor/ai_core_dynamic_kernel.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/executor/ai_core_dynamic_kernel.cc
@@ -19,12 +19,12 @@
 #include <memory>
 #include "framework/common/debug/log.h"
 #include "utils/log_adapter.h"
-#include "runtime/device/ascend/executor/tiling/op_tiling_calculater.h"
 #include "register/op_tiling.h"
 #include "utils/convert_utils_base.h"
 #include "utils/ms_context.h"
 #include "runtime/device/kernel_runtime_manager.h"
 #include "pipeline/jit/static_analysis/static_analysis.h"
+#include "runtime/device/ascend/executor/tiling/op_tiling_adapter.h"
 #include "common/trans.h"
 
 namespace mindspore {
@@ -131,14 +131,17 @@ void AiCoreDynamicKernel::ComputeTiling() {
   auto cnode = cnode_ptr_.lock();
   MS_EXCEPTION_IF_NULL(cnode);
   MS_LOG(INFO) << "Start compute tiling of:" << cnode->fullname_with_scope();
-  optiling::OpRunInfo op_run_info;
+  // start compute tiling
+  optiling::utils::OpRunInfo op_run_info_v2(-1, true, 0);
+  tiling::OpTilingCalculateAdapter converter;
+  ge::ComputeGraphPtr ge_graph = std::make_shared<ge::ComputeGraph>("default");
+  auto ge_node = converter.AnfNodeToGeNodeAdapter(cnode, &ge_graph, depend_tensor_map_);
+  (void)optiling::OpParaCalculateV2(*ge_node, op_run_info_v2);
 
-  OpTilingCalculater::GetInstance().CalculateTiling(NOT_NULL(cnode), op_compile_info_, depend_tensor_map_,
-                                                    NOT_NULL(&op_run_info));
-  block_dim_ = op_run_info.block_dim;
-  workspaces_size_ = op_run_info.workspaces;
-  tiling_data_ = op_run_info.tiling_data.str();
-  tiling_key_ = op_run_info.tiling_key;
+  block_dim_ = op_run_info_v2.GetBlockDim();
+  op_run_info_v2.GetAllWorkspaces(workspaces_size_);
+  tiling_data_ = op_run_info_v2.GetAllTilingData().str();
+  tiling_key_ = op_run_info_v2.GetTilingKey();
 }
 
 void AiCoreDynamicKernel::AllocateWorkspace() {
diff --git a/mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc b/mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc
index 706c1dd46c3..c3b897ca391 100644
--- a/mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc
@@ -182,7 +182,7 @@ bool AiCpuDynamicKernel::UpdateOutputShapeFromExtInfo() {
     MS_LOG(INFO) << "Get output:" << output_num_ << " Shape";
     std::vector<int64_t> shape;
     TypeId type_id;
-    ext_info_handler_->GetOutputShapeAndType(SizeToUint(i), NOT_NULL(&shape), NOT_NULL(&type_id));
+    (void)ext_info_handler_->GetOutputShapeAndType(SizeToUint(i), NOT_NULL(&shape), NOT_NULL(&type_id));
 
     for (auto x : shape) {
       MS_LOG(INFO) << "Update output:" << i << " shape:" << x;
diff --git a/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_adapter.cc b/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_adapter.cc
index f7ebd026df5..5d22d300520 100644
--- a/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_adapter.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_adapter.cc
@@ -138,10 +138,14 @@ void OpTilingCalculateAdapter::ConvertOutputShapeAndType(const CNodePtr &node, g
 void OpTilingCalculateAdapter::ConvertCompileInfo(const CNodePtr &node, ge::OpDescPtr *op_desc) {
   MS_EXCEPTION_IF_NULL(node);
   MS_EXCEPTION_IF_NULL(*op_desc);
-  MS_LOG(INFO) << "For op " << op_name_ << ", get compile_info: " << op_compile_info_;
-  std::string compile_info_key = std::to_string(std::hash<std::string>()(op_compile_info_));
+  if (!AnfAlgo::HasNodeAttr(kAttrCompileInfo, node)) {
+    MS_LOG(EXCEPTION) << "Get compile_info failed";
+  }
+  auto compile_info_attr = AnfAlgo::GetNodeAttr<std::string>(node, kAttrCompileInfo);
+  MS_LOG(INFO) << "For op " << op_name_ << ", get compile_info: " << compile_info_attr;
+  std::string compile_info_key = std::to_string(std::hash<std::string>()(compile_info_attr));
   (void)ge::AttrUtils::SetStr(*(*op_desc), COMPILE_INFO_KEY, compile_info_key);
-  (void)ge::AttrUtils::SetStr(*(*op_desc), COMPILE_INFO_JSON, op_compile_info_);
+  (void)ge::AttrUtils::SetStr(*(*op_desc), COMPILE_INFO_JSON, compile_info_attr);
 }
 
 ge::NodePtr OpTilingCalculateAdapter::NewConstantOp(const CNodePtr &node, const std::string &name,
@@ -265,11 +269,9 @@ void OpTilingCalculateAdapter::InitOpIoName(const CNodePtr &node) {
 }
 
 ge::NodePtr OpTilingCalculateAdapter::AnfNodeToGeNodeAdapter(
-  const CNodePtr &node, ge::ComputeGraphPtr *ge_graph, const std::map<uint32_t, tensor::TensorPtr> &depend_tensor_map,
-  const std::string &op_compile_info) {
+  const CNodePtr &node, ge::ComputeGraphPtr *ge_graph, const std::map<uint32_t, tensor::TensorPtr> &depend_tensor_map) {
   MS_EXCEPTION_IF_NULL(node);
   op_name_ = AnfAlgo::GetCNodeName(node);
-  op_compile_info_ = op_compile_info;
   auto op_type = GetRealOpType(op_name_);
   (void)InitOpIoName(node);
   ge::OpDescPtr op_desc = std::make_shared<ge::OpDesc>(op_name_, op_type);
diff --git a/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_adapter.h b/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_adapter.h
index 5c92c2bfc0d..9dbfd7ab8ca 100644
--- a/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_adapter.h
+++ b/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_adapter.h
@@ -37,8 +37,7 @@ class OpTilingCalculateAdapter {
   ~OpTilingCalculateAdapter() = default;
 
   ge::NodePtr AnfNodeToGeNodeAdapter(const CNodePtr &node, ge::ComputeGraphPtr *ge_graph,
-                                     const std::map<uint32_t, tensor::TensorPtr> &depend_tensor_map,
-                                     const std::string &op_compile_info);
+                                     const std::map<uint32_t, tensor::TensorPtr> &depend_tensor_map);
 
  private:
   void ConvertInputShapeAndType(const CNodePtr &node, ge::OpDescPtr *op_desc);
@@ -56,7 +55,6 @@ class OpTilingCalculateAdapter {
   std::string GetOutputName(const CNodePtr &node, size_t index);
   void InitOpIoName(const CNodePtr &node);
   std::string op_name_;
-  std::string op_compile_info_;
   std::vector<std::string> input_names_;
   std::vector<std::string> output_names_;
 };
diff --git a/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc b/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc
index 48725ce7f6d..60ff6bc8b2f 100644
--- a/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc
@@ -525,7 +525,7 @@ KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node, KernelType kern
 }
 
 void SetKernelInfo(const CNodePtr &kernel_node, KernelType kernel_type) {
-  auto kernel_info = static_cast<device::KernelInfo *>(kernel_node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(kernel_node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto kernel_build_info = kernel_info->select_kernel_build_info();
   MS_EXCEPTION_IF_NULL(kernel_build_info);
diff --git a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_callback_register.h b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_callback_register.h
index ace8c4631d3..61f9b268c05 100644
--- a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_callback_register.h
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_callback_register.h
@@ -18,10 +18,10 @@
 #define MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_PROFILING_PROFILING_CALLBACK_REGISTER_H_
 
 #include "toolchain/prof_callback.h"
+#include "toolchain/prof_acl_api.h"
 
 #define MAX_DEV_NUM (64)
 
-using Status = uint32_t;
 enum ProfCommandHandleType {
   kProfCommandhandleInit = 0,
   kProfCommandhandleStart,
diff --git a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc
index 0d33fa4219e..666d266bc74 100644
--- a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc
@@ -24,7 +24,6 @@
 #include "utils/ms_utils.h"
 #include "utils/convert_utils.h"
 #include "runtime/base.h"
-#include "toolchain/prof_acl_api.h"
 #include "runtime/device/ascend/profiling/profiling_callback_register.h"
 #include <nlohmann/json.hpp>
 
diff --git a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h
index 0ca8d7971a2..d6b57f373b9 100644
--- a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h
+++ b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h
@@ -24,11 +24,11 @@
 #include "utils/contract.h"
 #include "utils/ms_context.h"
 #include "toolchain/prof_callback.h"
+#include "toolchain/prof_acl_api.h"
 #include "runtime/device/ascend/profiling/profiling_callback_register.h"
 
 using std::map;
 using std::string;
-using Status = uint32_t;
 namespace mindspore {
 namespace device {
 namespace ascend {
diff --git a/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc b/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc
index 96c51cd1c34..d096401288f 100644
--- a/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc
+++ b/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc
@@ -31,6 +31,8 @@ namespace cpu {
 using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;
 using mindspore::kernel::KernelBuildInfo;
 namespace {
+constexpr auto kParamDynamic = "dynamic";
+
 bool IsInputNotCNode(const CNodePtr &kernel_node, size_t input_index) {
   auto input_node = AnfAlgo::VisitKernel(kernel_node->input(input_index + 1), 0).first;
   MS_EXCEPTION_IF_NULL(input_node);
@@ -66,6 +68,13 @@ void GetOutputDtypes(const CNodePtr &kernel_node, std::vector<TypeId> *output_ty
   }
 }
 
+void GetOutputFormat(const CNodePtr &kernel_node, std::vector<std::string> *output_formats) {
+  size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
+  for (size_t output_index = 0; output_index < output_num; ++output_index) {
+    output_formats->emplace_back(kOpFormat_DEFAULT);
+  }
+}
+
 void GetInputDtypes(const CNodePtr &kernel_node, std::vector<TypeId> *input_types,
                     std::vector<size_t> *input_no_cnode_indexes) {
   size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
@@ -81,6 +90,13 @@ void GetInputDtypes(const CNodePtr &kernel_node, std::vector<TypeId> *input_type
   }
 }
 
+void GetInputFormat(const CNodePtr &kernel_node, std::vector<std::string> *input_formats) {
+  size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
+  for (size_t input_index = 0; input_index < input_num; ++input_index) {
+    input_formats->emplace_back(kOpFormat_DEFAULT);
+  }
+}
+
 void GetOutputFormatsAndDtypes(const CNodePtr &kernel_node, const KernelAttr &kernel_attr,
                                std::vector<std::string> *output_formats, std::vector<TypeId> *output_types) {
   size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
@@ -200,7 +216,57 @@ void KernelNotSupportException(const AnfNodePtr &kernel_node, const std::vector<
   operator_info << "is not support.";
   MS_EXCEPTION(TypeError) << operator_info.str() << " Trace: " << trace::DumpSourceLines(kernel_node);
 }
+
+void UpdateDynamicKernelBuildInfoAndAttrs(const CNodePtr &kernel_node) {
+  const std::string &op_name = AnfAlgo::GetCNodeName(kernel_node);
+  MS_LOG(INFO) << "Operator name: " << op_name;
+  // Set kernel build info
+  std::vector<TypeId> input_types;
+  std::vector<size_t> input_not_cnode_indexes;
+  GetInputDtypes(kernel_node, &input_types, &input_not_cnode_indexes);
+  std::vector<TypeId> output_types;
+  GetOutputDtypes(kernel_node, &output_types);
+  std::vector<std::string> input_formats;
+  GetInputFormat(kernel_node, &input_formats);
+  std::vector<std::string> output_formats;
+  GetOutputFormat(kernel_node, &output_formats);
+  SetKernelBuildInfo(input_formats, input_types, output_formats, output_types, kernel_node.get());
+
+  // Set kernel attrs
+  KernelAttr attr;
+  for (size_t i = 0; i < input_types.size(); i++) {
+    attr.AddInputAttr(input_types[i]);
+  }
+  for (size_t j = 0; j < output_types.size(); j++) {
+    attr.AddInputAttr(output_types[j]);
+  }
+  std::vector<KernelAttr> kernel_attrs =
+    kernel::CPUKernelFactory::GetInstance().GetSupportedKernelAttrList(AnfAlgo::GetCNodeName(kernel_node));
+  kernel_attrs.emplace_back(attr);
+  kernel::CPUKernelFactory::GetInstance().UpdateKernelAttrs(op_name, kernel_attrs);
+  return;
+}
 }  // namespace
+
+bool IsDynamicParamKernel(const std::string &op_name) {
+  const auto &op_info = kernel::OpLib::FindOp(op_name, kernel::OpImplyType::kCPU);
+  if (op_info == nullptr) {
+    return false;
+  }
+
+  const auto &input_io_info = op_info->inputs_ptr();
+  if (input_io_info.size() != 1 || input_io_info[0]->param_type() != kParamDynamic) {
+    return false;
+  }
+
+  const auto &output_io_info = op_info->outputs_ptr();
+  if (output_io_info.size() != 1 || output_io_info[0]->param_type() != kParamDynamic) {
+    return false;
+  }
+
+  return true;
+}
+
 bool SelectKernel(const CNodePtr &kernel_node, KernelAttr *selected_kernel_attr,
                   const std::vector<KernelAttr> &kernel_attrs, const std::vector<TypeId> &input_types,
                   const std::vector<size_t> &input_not_cnode_indexes, const std::vector<TypeId> &output_types,
@@ -229,7 +295,14 @@ bool SelectKernel(const CNodePtr &kernel_node, KernelAttr *selected_kernel_attr,
   }
   return false;
 }
+
 void SetKernelInfo(const CNodePtr &kernel_node) {
+  // Select for dynamic kernel(both the number and data type are undetermined).
+  const std::string &op_name = AnfAlgo::GetCNodeName(kernel_node);
+  if (IsDynamicParamKernel(op_name)) {
+    return UpdateDynamicKernelBuildInfoAndAttrs(kernel_node);
+  }
+
   std::vector<std::string> input_formats;
   std::vector<TypeId> input_types;
   std::vector<size_t> input_not_cnode_indexes;
@@ -241,7 +314,6 @@ void SetKernelInfo(const CNodePtr &kernel_node) {
     kernel::CPUKernelFactory::GetInstance().GetSupportedKernelAttrList(AnfAlgo::GetCNodeName(kernel_node));
   if (kernel_attrs.empty() || (kernel_attrs[0].GetInputSize() == 0 && kernel_attrs[0].GetOutputSize() == 0)) {
     MS_LOG(DEBUG) << "Operator[" << AnfAlgo::GetCNodeName(kernel_node) << "] will get ops attr info.";
-    std::string op_name = AnfAlgo::GetCNodeName(kernel_node);
     auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, kernel::OpImplyType::kCPU);
     if (op_info_ptr == nullptr) {
       MS_LOG(EXCEPTION) << "Not find op[" << op_name << "] in cpu";
diff --git a/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.h b/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.h
index 9fd5c55b7d5..867676cd85e 100644
--- a/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.h
+++ b/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.h
@@ -29,6 +29,8 @@ namespace mindspore {
 namespace device {
 namespace cpu {
 void SetKernelInfo(const CNodePtr &apply_kernel_ptr);
+// Indicate whether the kernel input/output number are variable.
+bool IsDynamicParamKernel(const std::string &op_name);
 
 class KernelAttr {
  public:
diff --git a/mindspore/ccsrc/runtime/device/gpu/blocking_queue.h b/mindspore/ccsrc/runtime/device/gpu/blocking_queue.h
index ebb97f0866b..5c9275e36c1 100644
--- a/mindspore/ccsrc/runtime/device/gpu/blocking_queue.h
+++ b/mindspore/ccsrc/runtime/device/gpu/blocking_queue.h
@@ -30,7 +30,7 @@
 
 namespace mindspore {
 namespace device {
-enum BlockQueueStatus_T : int { SUCCESS = 0, QUEUE_NOT_EXIST, HANDLE_NOT_EXIST, ERROR_INPUT, INTERNAL_ERROR, TIMEOUT };
+enum BlockQueueStatus_T : int { SUCCESS = 0, QUEUE_EXIST, HANDLE_NOT_EXIST, ERROR_INPUT, INTERNAL_ERROR, TIMEOUT };
 
 struct DataItemGpu {
   int32_t worker_id_;
diff --git a/mindspore/ccsrc/runtime/device/gpu/cuda_env_checker.cc b/mindspore/ccsrc/runtime/device/gpu/cuda_env_checker.cc
index 9186488945c..e162ffd157f 100644
--- a/mindspore/ccsrc/runtime/device/gpu/cuda_env_checker.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/cuda_env_checker.cc
@@ -54,6 +54,10 @@ bool CudaEnvChecker::CheckNvccInPath() {
 }
 
 void CudaEnvChecker::GetRealPaths(std::set<std::string> *paths) const {
+  if (paths == nullptr) {
+    MS_LOG(ERROR) << "The pointer paths is nullptr";
+    return;
+  }
   auto env_paths_ptr = std::getenv(kPathEnv);
   if (env_paths_ptr == nullptr) {
     MS_LOG(ERROR) << "Please export environment variable PATH";
diff --git a/mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.cc b/mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.cc
index eac50cb9369..3c4745cb0b5 100644
--- a/mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.cc
@@ -55,6 +55,9 @@ bool MPIWrapper::CreateCommGroup(const std::string &group_name, const std::vecto
   }
 
   ncclUniqueId group_unique_id;
+  if (ranks.size() == 0) {
+    return false;
+  }
   if (rank_id_ == ranks[0]) {
     group_unique_id = NCCLWrapper::instance().nccl_unique_id();
   }
@@ -138,9 +141,10 @@ void MPIWrapper::AssignLocalRankID() {
 
   const int kRankSize = rank_size_;
   size_t all_host_hashs[kRankSize];
+  CHECK_RET((rank_id_ < kRankSize), true, "The rank id is not less than rank size.");
   all_host_hashs[rank_id_] = host_hash;
   CHECK_RET(MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, all_host_hashs, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD),
-            MPI_SUCCESS, "MPI_Allgather host hashs failed.");
+            MPI_SUCCESS, "MPI_Allgather host hashes failed.");
   for (int global_rank = 0; global_rank < kRankSize; global_rank++) {
     if (global_rank == rank_id_) {
       break;
diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.cc
index 947490dfde9..02f0bf2fcb6 100644
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.cc
@@ -52,8 +52,8 @@ BlockQueueStatus_T GpuBufferMgr::Create(unsigned int device_id, const std::strin
                                         const std::vector<size_t> &shape, const size_t &capacity) {
   std::string name = std::to_string(device_id) + std::string("_") + channel_name;
   if (name_queue_map_.count(name)) {
-    MS_LOG(ERROR) << "Queue not exist " << name;
-    return QUEUE_NOT_EXIST;
+    MS_LOG(ERROR) << "Queue already exist: " << name;
+    return QUEUE_EXIST;
   }
   std::shared_ptr<BlockingQueue> queue = std::make_shared<BlockingQueue>();
   BlockQueueStatus_T rt = queue->Create(addr, shape, capacity);
@@ -205,6 +205,10 @@ size_t GpuBufferMgr::Size(unsigned int handle) {
     MS_LOG(ERROR) << "handle is invalid";
     return 0;
   }
+  if (handle_queue_map_.count(handle) == 0) {
+    MS_LOG(ERROR) << "Handle not exist " << handle;
+    return 0;
+  }
   return handle_queue_map_.at(handle)->Size();
 }
 
@@ -222,6 +226,10 @@ size_t GpuBufferMgr::Capacity(unsigned int handle) {
     MS_LOG(ERROR) << "handle is invalid";
     return 0;
   }
+  if (handle_queue_map_.count(handle) == 0) {
+    MS_LOG(ERROR) << "Handle not exist " << handle;
+    return 0;
+  }
   return handle_queue_map_.at(handle)->Capacity();
 }
 
diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc
index eed333d7a17..604ca05328c 100644
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc
@@ -135,6 +135,7 @@ bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int executi
     return true;
   }
 
+  MS_EXCEPTION_IF_NULL(Debugger::GetInstance());
   if (Debugger::GetInstance()->TensorExistsInCurrent(tensor_name)) {
     MS_LOG(INFO) << tensor_name << " already loaded for this step so not loading it again.";
     return true;
diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc
index 5be77aef128..b176799dfae 100644
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc
@@ -33,6 +33,7 @@ void CreateGPUKernel(const std::vector<CNodePtr> &kernels) {
   bool already_check_nvcc = false;
   std::vector<AnfNodePtr> akg_nodes;
   for (const auto &kernel : kernels) {
+    MS_EXCEPTION_IF_NULL(kernel);
     std::string kernel_name = session::AnfRuntimeAlgorithm::GetCNodeName(kernel);
     if (kernel_name == prim::kPrimTupleGetItem->name() || kernel_name == prim::kPrimMakeTuple->name() ||
         kernel_name == prim::kPrimDepend->name() || kernel_name == prim::kPrimStateSetItem->name()) {
@@ -41,8 +42,7 @@ void CreateGPUKernel(const std::vector<CNodePtr> &kernels) {
 
     if (session::AnfRuntimeAlgorithm::GetKernelType(kernel) == KernelType::AKG_KERNEL) {
       if (!bin_map->initialized()) {
-        auto pid = mindspore::kernel::GpuKernelBuildClient::Instance().AkgGetPid();
-        bin_map->Initialize(pid);
+        bin_map->Initialize();
       }
       if (!already_check_nvcc) {
         already_check_nvcc = true;
diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
index e56bdcfa5ad..a6ba90f0ee4 100644
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc
@@ -124,6 +124,8 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
                     const std::vector<mindspore::kernel::AddressPtr> &kernel_workspaces,
                     const std::vector<mindspore::kernel::AddressPtr> &kernel_outputs, int exec_order, void *stream_ptr,
                     bool dump_enabled, bool last_kernel) {
+  MS_EXCEPTION_IF_NULL(debugger);
+  MS_EXCEPTION_IF_NULL(kernel);
   // check if we should read the kernel data
   bool read_data = false;
   auto &dump_json_parser = DumpJsonParser::GetInstance();
@@ -147,6 +149,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
     auto input_size = AnfAlgo::GetInputTensorNum(kernel);
     for (size_t j = 0; j < input_size; ++j) {
       auto input_kernel = kernel->input(j + 1);
+      MS_EXCEPTION_IF_NULL(input_kernel);
       std::string input_kernel_name = input_kernel->fullname_with_scope();
       auto addr = kernel_inputs[j];
       auto type = AnfAlgo::GetOutputInferDataType(input_kernel, PARAMETER_OUTPUT_INDEX);
@@ -155,6 +158,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
         continue;
       }
       auto format = kOpFormat_DEFAULT;
+      MS_EXCEPTION_IF_NULL(addr);
       auto gpu_addr = std::make_unique<GPUDeviceAddress>(addr->addr, addr->size, format, type);
       string input_tensor_name = input_kernel_name + ':' + "0";
       ShapeVector int_shapes = trans::GetRuntimePaddingShape(input_kernel, PARAMETER_OUTPUT_INDEX);
@@ -181,6 +185,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
         continue;
       }
       auto format = kOpFormat_DEFAULT;
+      MS_EXCEPTION_IF_NULL(addr);
       auto gpu_addr = std::make_unique<GPUDeviceAddress>(addr->addr, addr->size, format, type);
       string tensor_name = kernel_name + ':' + std::to_string(j);
       ShapeVector int_shapes = trans::GetRuntimePaddingShape(kernel, j);
@@ -246,7 +251,10 @@ void GPUKernelRuntime::ReleaseDeviceRes() {
 #ifdef ENABLE_DEBUGGER
   if (debugger_ && debugger_->debugger_enabled()) {
     debugger_->SetTrainingDone(true);
-    debugger_->SendMetadata(false);
+    bool ret = debugger_->SendMetadata(false);
+    if (!ret) {
+      MS_LOG(ERROR) << "Failed to SendMetadata when finalize";
+    }
   }
 #endif
   if (GpuBufferMgr::GetInstance().IsInit()) {
@@ -272,14 +280,6 @@ void GPUKernelRuntime::ReleaseDeviceRes() {
   if (mem_manager_ != nullptr) {
     mem_manager_->FreeDeviceMemory();
   }
-
-  auto context_ptr = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context_ptr);
-  if (!(context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG))) {
-    kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance();
-    MS_EXCEPTION_IF_NULL(bin_map);
-    bin_map->RemoveKernelCache();
-  }
 }
 
 void GPUKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id, const std::vector<AnfNodePtr> &inputs,
@@ -292,6 +292,7 @@ void GPUKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id, const std::v
 }
 
 void GPUKernelRuntime::AllocInplaceNodeMemory(const session::KernelGraph *graph) {
+  MS_EXCEPTION_IF_NULL(graph);
   if (is_alloc_inplace_res_[graph->graph_id()]) {
     return;
   }
@@ -304,6 +305,7 @@ void GPUKernelRuntime::AllocInplaceNodeMemory(const session::KernelGraph *graph)
       continue;
     }
     auto primitive = AnfAlgo::GetCNodePrimitive(kernel);
+    MS_EXCEPTION_IF_NULL(primitive);
     auto group_attr = primitive->GetAttr("inplace_group");
     MS_EXCEPTION_IF_NULL(group_attr);
     auto group_id = GetValue<uint32_t>(group_attr);
@@ -318,14 +320,18 @@ void GPUKernelRuntime::AllocInplaceNodeMemory(const session::KernelGraph *graph)
     }
 
     auto primitive = AnfAlgo::GetCNodePrimitive(item[0]);
+    MS_EXCEPTION_IF_NULL(primitive);
     auto output_index = GetValue<uint32_t>(primitive->GetAttr("inplace_output_index"));
     auto device_address = GetMutableOutputAddr(item[0], output_index, false);
+    MS_EXCEPTION_IF_NULL(device_address);
     if (device_address->GetPtr() != nullptr) {
       continue;
     }
 
     auto kernel_mod = AnfAlgo::GetKernelMod(item[0]);
+    MS_EXCEPTION_IF_NULL(kernel_mod);
     auto output_size = kernel_mod->GetOutputSizeList();
+    MS_EXCEPTION_IF_NULL(mem_manager_);
     auto ret = mem_manager_->MallocMemFromMemPool(device_address, output_size[output_index]);
     if (!ret) {
       MS_LOG(EXCEPTION) << "Device memory isn't enough and alloc failed, alloc size:" << output_size[output_index];
@@ -333,6 +339,7 @@ void GPUKernelRuntime::AllocInplaceNodeMemory(const session::KernelGraph *graph)
 
     for (auto &node : item) {
       auto prim = AnfAlgo::GetCNodePrimitive(node);
+      MS_EXCEPTION_IF_NULL(prim);
       auto index = GetValue<uint32_t>(prim->GetAttr("inplace_output_index"));
       AnfAlgo::SetOutputAddr(device_address, index, node.get());
     }
@@ -486,6 +493,7 @@ std::shared_ptr<DeviceEvent> GPUKernelRuntime::CreateDeviceEvent() {
 }
 
 bool GPUKernelRuntime::RunOneStep(const session::KernelGraph *graph) {
+  MS_EXCEPTION_IF_NULL(graph);
   auto graph_id = graph->graph_id();
   if (!is_first_step_map_[graph_id] || graph->is_dynamic_shape()) {
     // Normally run graph
@@ -508,6 +516,8 @@ bool GPUKernelRuntime::RunOneStep(const session::KernelGraph *graph) {
 }
 
 bool GPUKernelRuntime::SearchMemSwapScheme(const session::KernelGraph *graph) {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
   MS_LOG(INFO) << "Run out of memory and try memory swapping, it may take some time, please wait a moment.";
   bool ret = false;
   ClearKernelOldOutputAndWorkspace(graph);
@@ -538,6 +548,8 @@ bool GPUKernelRuntime::SearchMemSwapScheme(const session::KernelGraph *graph) {
 }
 
 bool GPUKernelRuntime::RefineMemSwapScheme(const session::KernelGraph *graph) {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
   MS_LOG(INFO) << "Refine memory swap scheme, it may take some time, please wait a moment.";
   auto &kernels = graph->execution_order();
   for (const auto &kernel : kernels) {
@@ -650,6 +662,7 @@ void GPUKernelRuntime::ClearKernelOldOutputAndWorkspace(const session::KernelGra
 
 void GPUKernelRuntime::ClearKernelOutputAddress(const session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   auto &kernels = graph->execution_order();
   for (const auto &kernel : kernels) {
     if (IsGraphOutput(graph, kernel)) {
@@ -674,6 +687,7 @@ void GPUKernelRuntime::ClearKernelOutputAddress(const session::KernelGraph *grap
 
 void GPUKernelRuntime::ClearKernelWorkspaceAddress(const session::KernelGraph *graph) {
   MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   auto &kernels = graph->execution_order();
   for (const auto &kernel : kernels) {
     auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
@@ -690,6 +704,7 @@ void GPUKernelRuntime::ClearKernelWorkspaceAddress(const session::KernelGraph *g
 }
 
 CNodePtr GetLastKernel(const session::KernelGraph *graph) {
+  MS_EXCEPTION_IF_NULL(graph);
   const auto &kernels = graph->execution_order();
   CNodePtr last_kernel;
   for (const auto &kernel : kernels) {
@@ -735,6 +750,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
     kernel::GpuKernel *gpu_kernel = nullptr;
     if (session::AnfRuntimeAlgorithm::GetKernelType(kernel) != KernelType::AKG_KERNEL) {
       gpu_kernel = dynamic_cast<kernel::GpuKernel *>(kernel_mod);
+      MS_EXCEPTION_IF_NULL(gpu_kernel);
       dynamic_kernel = gpu_kernel->DynamicKernel();
     }
 
@@ -749,6 +765,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
     auto ret = AllocKernelDynamicRes(*kernel_mod, kernel, &kernel_inputs, &kernel_workspaces, &kernel_outputs, mock);
     if (!ret) {
       if (!mock) {
+        MS_EXCEPTION_IF_NULL(debugger_);
         // invalidate current data collected by the debugger
         debugger_->ClearCurrentData();
       }
@@ -796,6 +813,9 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo
 void GPUKernelRuntime::LaunchKernelWithoutMock(const session::KernelGraph *graph, const AnfNodePtr &kernel,
                                                const AddressPtrList &inputs, const AddressPtrList &workspaces,
                                                const AddressPtrList &outputs, bool profiling) {
+  MS_EXCEPTION_IF_NULL(graph);
+  MS_EXCEPTION_IF_NULL(kernel);
+
   auto profiler_inst = profiler::gpu::GPUProfiler::GetInstance();
   MS_EXCEPTION_IF_NULL(profiler_inst);
 
@@ -810,6 +830,7 @@ void GPUKernelRuntime::LaunchKernelWithoutMock(const session::KernelGraph *graph
       profiler_inst->OpDataProducerBegin(kernel->fullname_with_scope(), stream_);
     }
     auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
+    MS_EXCEPTION_IF_NULL(kernel_mod);
     if (!kernel_mod->Launch(inputs, workspaces, outputs, stream_)) {
       MS_LOG(EXCEPTION) << "Launch kernel failed: " << kernel->fullname_with_scope();
     }
@@ -836,6 +857,7 @@ bool GPUKernelRuntime::RunOpLaunchKernelDynamic(const session::KernelGraph *grap
     kernel::GpuKernel *gpu_kernel = nullptr;
     if (session::AnfRuntimeAlgorithm::GetKernelType(kernel) != KernelType::AKG_KERNEL) {
       gpu_kernel = dynamic_cast<kernel::GpuKernel *>(kernel_mod);
+      MS_EXCEPTION_IF_NULL(gpu_kernel);
       dynamic_kernel = gpu_kernel->DynamicKernel();
     }
     // pre-processing for dynamic shape kernel
@@ -862,6 +884,7 @@ bool GPUKernelRuntime::RunOpLaunchKernelDynamic(const session::KernelGraph *grap
 
 void GPUKernelRuntime::LaunchKernelWithTimeProfiling(const AnfNodePtr &kernel, const AddressPtrList &inputs,
                                                      const AddressPtrList &workspace, const AddressPtrList &outputs) {
+  MS_EXCEPTION_IF_NULL(mem_swap_manager_);
   auto kernel_mod = AnfAlgo::GetKernelMod(kernel);
   MS_EXCEPTION_IF_NULL(kernel_mod);
   float cost_time = 0;
@@ -886,6 +909,7 @@ void GPUKernelRuntime::LaunchKernelWithTimeProfiling(const AnfNodePtr &kernel, c
 
 bool GPUKernelRuntime::AddMemorySwapTask(const AnfNodePtr &kernel, bool mock, bool profiling) {
   MS_EXCEPTION_IF_NULL(mem_swap_manager_);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   const MemSwapInfoSet &mem_swap_info_set = mem_swap_manager_->QueryKernelMemSwapInfo(kernel);
   for (auto &mem_swap_info : mem_swap_info_set) {
     auto need_swap_kernel = mem_swap_manager_->QueryKernelByTopoOrder(mem_swap_info.topo_order_);
@@ -893,6 +917,7 @@ bool GPUKernelRuntime::AddMemorySwapTask(const AnfNodePtr &kernel, bool mock, bo
     const HostAddress &host_address =
       mem_swap_manager_->QueryKernelHostAddr(need_swap_kernel, mem_swap_info.output_idx_);
     auto device_address = GetMutableOutputAddr(need_swap_kernel, mem_swap_info.output_idx_, false);
+    MS_EXCEPTION_IF_NULL(device_address);
 
     if (mem_swap_info.swap_kind_ == SwapKind::kDeviceToHost) {
       if (mem_swap_manager_->QueryKernelHostAddrIsDirty(need_swap_kernel, mem_swap_info.output_idx_)) {
@@ -943,6 +968,7 @@ bool GPUKernelRuntime::UpdateMemorySwapTask(const AnfNodePtr &kernel, bool mock,
 }
 
 void GPUKernelRuntime::UpdateHostSwapInQueue(const DeviceAddressPtr device_address, bool mock) {
+  MS_EXCEPTION_IF_NULL(device_address);
   MS_EXCEPTION_IF_NULL(mem_swap_manager_);
   if (!mem_swap_manager_->trigger_swap()) {
     return;
@@ -977,6 +1003,7 @@ void GPUKernelRuntime::UpdateHostSwapInQueue(const DeviceAddressPtr device_addre
 
 void GPUKernelRuntime::UpdateHostSwapOutQueue(bool mock) {
   MS_EXCEPTION_IF_NULL(mem_swap_manager_);
+  MS_EXCEPTION_IF_NULL(mem_manager_);
   if (!mem_swap_manager_->trigger_swap()) {
     return;
   }
@@ -1059,6 +1086,7 @@ bool GPUKernelRuntime::AllocKernelInputDynamicRes(const mindspore::AnfNodePtr &k
     // Get in-place output_address
     if (AnfAlgo::IsInplaceNode(kernel, "aggregate")) {
       auto primitive = AnfAlgo::GetCNodePrimitive(kernel);
+      MS_EXCEPTION_IF_NULL(primitive);
       auto input_index = GetValue<uint32_t>(primitive->GetAttr("aggregate_input_index"));
       if (i == input_index) {
         auto skip_node = AnfAlgo::GetInputNode(utils::cast<CNodePtr>(kernel), input_index);
@@ -1115,6 +1143,7 @@ bool GPUKernelRuntime::AllocKernelWorkspaceDynamicRes(const mindspore::kernel::K
       continue;
     }
     auto device_address = AnfAlgo::GetMutableWorkspaceAddr(kernel, i);
+    MS_EXCEPTION_IF_NULL(device_address);
     if (device_address->ptr_ == nullptr && !AttemptMallocMem(device_address, workspace_sizes[i], mock)) {
       return false;
     }
@@ -1128,12 +1157,12 @@ bool GPUKernelRuntime::AllocKernelWorkspaceDynamicRes(const mindspore::kernel::K
 }
 
 void GPUKernelRuntime::AllocCommunicationOpDynamicRes(const session::KernelGraph *graph) {
+  MS_EXCEPTION_IF_NULL(graph);
   if (is_alloc_communication_res_[graph->graph_id()]) {
     return;
   }
   is_alloc_communication_res_[graph->graph_id()] = true;
 
-  MS_EXCEPTION_IF_NULL(graph);
   auto &kernels = graph->execution_order();
   for (auto &kernel : kernels) {
     MS_EXCEPTION_IF_NULL(kernel);
@@ -1226,6 +1255,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel)
   for (size_t i = 0; i < input_num; ++i) {
     if (AnfAlgo::IsInplaceNode(kernel, "aggregate")) {
       auto primitive = AnfAlgo::GetCNodePrimitive(kernel);
+      MS_EXCEPTION_IF_NULL(primitive);
       auto index = GetValue<uint32_t>(primitive->GetAttr("aggregate_input_index"));
       if (i == index) {
         continue;
@@ -1250,6 +1280,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel)
         device_address = GetPrevNodeMutableOutputAddr(kernel, i, true);
       }
       mem_manager_->FreeMemFromMemPool(device_address);
+      MS_EXCEPTION_IF_NULL(device_address);
       device_address->set_status(DeviceAddressStatus::kInDevice);
     }
   }
@@ -1262,6 +1293,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel)
     }
     if (kernel_ref_count_ptr->ref_count_dynamic_use_ == 0) {
       auto device_address = GetMutableOutputAddr(kernel, i, false);
+      MS_EXCEPTION_IF_NULL(device_address);
       mem_manager_->FreeMemFromMemPool(device_address);
       device_address->set_status(DeviceAddressStatus::kInDevice);
     }
@@ -1296,7 +1328,7 @@ DeviceAddressPtr GPUKernelRuntime::GetPrevNodeMutableOutputAddr(const AnfNodePtr
   }
 
   session::KernelWithIndex prev_node_with_index = addr_iter->second[i];
-  auto kernel_info = static_cast<device::KernelInfo *>(prev_node_with_index.first->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(prev_node_with_index.first->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto addr = kernel_info->GetMutableOutputAddr(prev_node_with_index.second);
 
diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.cc
index 574ddca14d7..60f06c5733a 100644
--- a/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.cc
@@ -61,6 +61,8 @@ void AssignGpuStream(const std::shared_ptr<session::KernelGraph> &kernel_graph)
 
 bool FindAllReduceStreamSwitchPos(const std::shared_ptr<session::KernelGraph> &kernel_graph,
                                   std::vector<SendRecvPair> *send_recv_pairs) {
+  MS_EXCEPTION_IF_NULL(kernel_graph);
+  MS_EXCEPTION_IF_NULL(send_recv_pairs);
   auto execution_kernels = kernel_graph->execution_order();
   std::vector<CNodePtr>::iterator iter, iter_begin;
   iter = iter_begin = execution_kernels.begin();
@@ -126,6 +128,7 @@ std::vector<CNodePtr>::iterator FindRecvNodePos(std::vector<CNodePtr>::iterator
   for (auto iter = begin; iter != end; iter++) {
     auto node = *iter;
     if (stream_switch_type == kAllReduceStreamSwitch) {
+      MS_EXCEPTION_IF_NULL(node);
       for (auto input : node->inputs()) {
         if (mock_send_node == AnfAlgo::VisitKernel(input, 0).first) {
           if (AnfAlgo::GetCNodeName(node) != kAllReduceOpName) {
@@ -142,6 +145,7 @@ std::vector<CNodePtr>::iterator FindRecvNodePos(std::vector<CNodePtr>::iterator
 
 void InsertStreamSwitchNode(const std::shared_ptr<session::KernelGraph> &kernel_graph,
                             const std::vector<SendRecvPair> &send_recv_pairs) {
+  MS_EXCEPTION_IF_NULL(kernel_graph);
   std::set<StreamSwitchNode> ordered_stream_switch_nodes;
   for (SendRecvPair pair : send_recv_pairs) {
     StreamSwitchType stream_switch_type = pair.stream_switch_type;
@@ -194,6 +198,7 @@ bool GenSendRecvCNodesForAllReduce(const std::shared_ptr<session::KernelGraph> &
 }
 
 CNodePtr CreateStreamSwitchNode(const std::shared_ptr<session::KernelGraph> &kernel_graph, const std::string &name) {
+  MS_EXCEPTION_IF_NULL(kernel_graph);
   auto op = std::make_shared<Primitive>(name);
   MS_EXCEPTION_IF_NULL(op);
   auto apply = std::make_shared<ValueNode>(op);
diff --git a/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc b/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
index 46b99a7766c..36a5271cfcd 100644
--- a/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
+++ b/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc
@@ -242,6 +242,9 @@ bool IsNeedProcessFormatInfo(const CNodePtr &kernel_node, const std::vector<Type
 void UpdateKernelFormatInfo(const CNodePtr &kernel_node, const std::vector<TypeId> &inputs_type,
                             std::vector<std::string> *inputs_format, std::vector<std::string> *outputs_format,
                             std::string *origin_data_format) {
+  MS_EXCEPTION_IF_NULL(kernel_node);
+  MS_EXCEPTION_IF_NULL(inputs_format);
+  MS_EXCEPTION_IF_NULL(outputs_format);
   auto kernel_name = AnfAlgo::GetCNodeName(kernel_node);
   auto iter = kKernelFormatPositionMap.find(kernel_name);
   if (iter == kKernelFormatPositionMap.end()) {
@@ -351,6 +354,7 @@ void PrintUnsupportedTypeException(const CNodePtr &kernel_node, const std::vecto
 }  // namespace
 
 void FormatTransformChecker::CheckSupportFormatTransform(const std::shared_ptr<session::KernelGraph> &kernel_graph) {
+  MS_EXCEPTION_IF_NULL(kernel_graph);
   // TensorCore can be used only in Volta or newer devices.
   const int marjor_sm = GET_MAJOR_SM;
   if (marjor_sm < RECOMMEND_SM) {
@@ -387,6 +391,7 @@ void FormatTransformChecker::CheckSupportFormatTransform(const std::shared_ptr<s
 }
 
 void SetKernelInfo(const CNodePtr &kernel_node, KernelType kernel_type) {
+  MS_EXCEPTION_IF_NULL(kernel_node);
   if (AnfAlgo::IsGraphKernel(kernel_node)) {
     auto func_graph = AnfAlgo::GetCNodeFuncGraphPtr(kernel_node);
     MS_EXCEPTION_IF_NULL(func_graph);
diff --git a/mindspore/ccsrc/runtime/device/kernel_runtime.cc b/mindspore/ccsrc/runtime/device/kernel_runtime.cc
index 7d2af553466..8b1343d43c8 100644
--- a/mindspore/ccsrc/runtime/device/kernel_runtime.cc
+++ b/mindspore/ccsrc/runtime/device/kernel_runtime.cc
@@ -482,9 +482,12 @@ void KernelRuntime::GenKernelEvents(const session::KernelGraph *graph) {
     for (size_t j = i + 1; j < kernels.size(); ++j) {
       auto &child = kernels[j];
       MS_EXCEPTION_IF_NULL(child);
+      if (AnfAlgo::IsCommunicationOp(child)) {
+        continue;
+      }
       auto input_size = child->inputs().size() - 1;
       for (size_t k = 0; k < input_size; ++k) {
-        auto kernel_index = AnfAlgo::VisitKernelWithReturnType(AnfAlgo::GetInputNode(child, k), 0);
+        auto kernel_index = AnfAlgo::VisitKernelWithReturnType(AnfAlgo::GetInputNode(child, k), 0, true);
         if (kernel_index.first == kernel) {
           found_nearest_child = true;
           break;
@@ -617,7 +620,6 @@ void KernelRuntime::AssignCommunicationNodeInputMem(MemType type, const AnfNodeP
   if (addr_size.empty()) {
     return;
   }
-
   if (type == kSomasReuseDynamicMem) {
     bool not_reuse = KernelMemNotReuse(node);
     if (not_reuse) {
@@ -695,7 +697,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const
   std::vector<tensor::TensorPtr> tensors;
   TensorValueToTensor(node_value, &tensors);
   // Graph id should be passed to record static memory if profiling is enabled.
-  auto kernel_info = static_cast<device::KernelInfo *>(value_node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(value_node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   uint32_t graph_id = kernel_info->graph_id();
   for (const auto &tensor : tensors) {
@@ -709,7 +711,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const
                              value_node.get());
       continue;
     }
-    size_t tensor_size = tensor->data().nbytes();
+    size_t tensor_size = LongToSize(tensor->data().nbytes());
     auto node_size = AnfAlgo::GetOutputTensorMemSize(value_node, output_idx);
     TypeId output_type_id = AnfAlgo::GetOutputDeviceDataType(value_node, output_idx);
     if (output_type_id == kTypeUnknown) {
diff --git a/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc b/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc
index afd9f03e5c4..d525045a003 100644
--- a/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc
@@ -29,7 +29,6 @@
 
 namespace mindspore {
 namespace runtime {
-
 void DebugActor::Debug(const AnfNodePtr &node, const KernelLaunchInfo *launch_info_,
                        const DeviceContext *device_context, OpContext<DeviceTensor> *const op_context,
                        const AID *from_aid) {
diff --git a/mindspore/ccsrc/runtime/framework/actor/gather_actor.cc b/mindspore/ccsrc/runtime/framework/actor/gather_actor.cc
index fe867d82e30..84996aa42fc 100644
--- a/mindspore/ccsrc/runtime/framework/actor/gather_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/gather_actor.cc
@@ -75,7 +75,7 @@ void GatherActor::RunOpControl(AID *input_control, OpContext<DeviceTensor> *cont
   }
 }
 
-void GatherActor::CollectBranchId(const int branch_id, OpContext<DeviceTensor> *context) {
+void GatherActor::CollectBranchId(const int branch_id, OpContext<DeviceTensor> *const context) {
   MS_EXCEPTION_IF_NULL(context);
   auto &sequential_num = context->sequential_num_;
   input_branch_ids_[sequential_num] = branch_id;
@@ -97,7 +97,7 @@ void GatherActor::FetchBackendInputNode(const FuncGraphPtr &func_graph, const Co
   }
 }
 
-void GatherActor::SendOutput(OpContext<DeviceTensor> *context) const {
+void GatherActor::SendOutput(OpContext<DeviceTensor> *const context) const {
   MS_EXCEPTION_IF_NULL(context);
   // Must be the execution order: send branch id --> send result --> send data --> send control, avoid the illegal
   // timing problem.
@@ -138,7 +138,7 @@ void GatherActor::SendOutput(OpContext<DeviceTensor> *context) const {
   }
 }
 
-void GatherActor::FetchInputDeviceTensor(OpContext<DeviceTensor> *context) {
+void GatherActor::FetchInputDeviceTensor(OpContext<DeviceTensor> *const context) {
   MS_EXCEPTION_IF_NULL(context);
   auto data_iter = input_data_.find(context->sequential_num_);
   if (data_iter != input_data_.end()) {
@@ -175,7 +175,7 @@ void GatherActor::FetchInputDeviceTensor(OpContext<DeviceTensor> *context) {
   }
 }
 
-bool GatherActor::CheckLaunchCondition(OpContext<DeviceTensor> *context) const {
+bool GatherActor::CheckLaunchCondition(OpContext<DeviceTensor> *const context) const {
   MS_EXCEPTION_IF_NULL(context);
 
   // Fetch input data.
@@ -214,7 +214,7 @@ bool GatherActor::CheckLaunchCondition(OpContext<DeviceTensor> *context) const {
   return true;
 }
 
-void GatherActor::EraseInput(OpContext<DeviceTensor> *context) {
+void GatherActor::EraseInput(OpContext<DeviceTensor> *const context) {
   MS_EXCEPTION_IF_NULL(context);
 
   // Erase input data.
diff --git a/mindspore/ccsrc/runtime/framework/actor/gather_actor.h b/mindspore/ccsrc/runtime/framework/actor/gather_actor.h
index 3a0f45de737..e446ca59e8c 100644
--- a/mindspore/ccsrc/runtime/framework/actor/gather_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/gather_actor.h
@@ -67,7 +67,7 @@ class GatherActor : public OpActor<DeviceTensor> {
   // The gather actor run when receive the input control.
   void RunOpControl(AID *input_control, OpContext<DeviceTensor> *context) override;
   // The gather actor run when receive the input branch id.
-  void CollectBranchId(const int branch_id, OpContext<DeviceTensor> *context);
+  void CollectBranchId(const int branch_id, OpContext<DeviceTensor> *const context);
   void Init() override;
 
  private:
@@ -75,12 +75,12 @@ class GatherActor : public OpActor<DeviceTensor> {
 
   // Collect the inputs of gather actor.
   void FetchBackendInputNode(const FuncGraphPtr &func_graph, const ControlNodeParserPtr &parser);
-  void FetchInputDeviceTensor(OpContext<DeviceTensor> *context);
+  void FetchInputDeviceTensor(OpContext<DeviceTensor> *const context);
   // Check whether satisfy the condition for launch.
-  bool CheckLaunchCondition(OpContext<DeviceTensor> *context) const;
-  void SendOutput(OpContext<DeviceTensor> *context) const;
+  bool CheckLaunchCondition(OpContext<DeviceTensor> *const context) const;
+  void SendOutput(OpContext<DeviceTensor> *const context) const;
   // Erase input data and input controls when finish gather launch.
-  void EraseInput(OpContext<DeviceTensor> *context);
+  void EraseInput(OpContext<DeviceTensor> *const context);
 
   // The device tensors for launch.
   std::vector<DeviceTensor *> input_device_tensors_;
diff --git a/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc b/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc
index 9754a5a8fac..cae678fa23d 100644
--- a/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc
@@ -30,7 +30,7 @@ void KernelActor::Init() {
 
   MS_EXCEPTION_IF_NULL(kernel_);
   real_input_num_ = AnfAlgo::GetInputTensorNum(kernel_);
-  kernel_info_ = static_cast<KernelInfo *>(kernel_->kernel_info());
+  kernel_info_ = dynamic_cast<KernelInfo *>(kernel_->kernel_info());
   is_dynamic_shape_ = AnfAlgo::IsDynamicShape(kernel_);
 
   // Init the device tensors and kernel launch info.
diff --git a/mindspore/ccsrc/runtime/framework/actor/switch_actor.cc b/mindspore/ccsrc/runtime/framework/actor/switch_actor.cc
index 26753a2a02b..30527331fda 100644
--- a/mindspore/ccsrc/runtime/framework/actor/switch_actor.cc
+++ b/mindspore/ccsrc/runtime/framework/actor/switch_actor.cc
@@ -66,7 +66,7 @@ void SwitchActor::RunOpControl(AID *input_control, OpContext<DeviceTensor> *cont
   }
 }
 
-void SwitchActor::CollectBranchId(const int branch_id, OpContext<DeviceTensor> *context) {
+void SwitchActor::CollectBranchId(const int branch_id, OpContext<DeviceTensor> *const context) {
   MS_EXCEPTION_IF_NULL(context);
   auto &sequential_num = context->sequential_num_;
   input_branch_ids_[sequential_num].push(branch_id);
@@ -262,7 +262,7 @@ void SwitchActor::AddInput(const AnfNodePtr &node, const size_t branch) {
   }
 }
 
-size_t SwitchActor::GetIndex(OpContext<DeviceTensor> *context) {
+size_t SwitchActor::GetIndex(const OpContext<DeviceTensor> *const context) {
   if (need_branch_id_input_) {
     if (input_branch_ids_.find(context->sequential_num_) == input_branch_ids_.end() ||
         input_branch_ids_[context->sequential_num_].empty()) {
@@ -313,7 +313,7 @@ size_t SwitchActor::GetIndex(OpContext<DeviceTensor> *context) {
   return static_cast<size_t>(index);
 }
 
-bool SwitchActor::CheckLaunchCondition(OpContext<DeviceTensor> *context) const {
+bool SwitchActor::CheckLaunchCondition(OpContext<DeviceTensor> *const context) const {
   MS_EXCEPTION_IF_NULL(context);
   if (input_datas_num_ != 0) {
     auto data_iter = input_data_.find(context->sequential_num_);
@@ -346,7 +346,7 @@ bool SwitchActor::CheckLaunchCondition(OpContext<DeviceTensor> *context) const {
   return true;
 }
 
-void SwitchActor::FetchInputDeviceTensor(OpContext<DeviceTensor> *context) {
+void SwitchActor::FetchInputDeviceTensor(OpContext<DeviceTensor> *const context) {
   MS_EXCEPTION_IF_NULL(context);
   input_device_tensors_.resize(input_nodes_.size());
   auto data_iter = input_data_.find(context->sequential_num_);
@@ -452,7 +452,7 @@ void SwitchActor::SendOutput(OpContext<DeviceTensor> *context) {
   }
 }
 
-void SwitchActor::EraseInput(OpContext<DeviceTensor> *context) {
+void SwitchActor::EraseInput(OpContext<DeviceTensor> *const context) {
   MS_EXCEPTION_IF_NULL(context);
   auto data_iter = input_data_.find(context->sequential_num_);
   if (data_iter != input_data_.end() && std::all_of(data_iter->second.begin(), data_iter->second.end(),
diff --git a/mindspore/ccsrc/runtime/framework/actor/switch_actor.h b/mindspore/ccsrc/runtime/framework/actor/switch_actor.h
index 42fb313bb71..5337c520799 100644
--- a/mindspore/ccsrc/runtime/framework/actor/switch_actor.h
+++ b/mindspore/ccsrc/runtime/framework/actor/switch_actor.h
@@ -75,7 +75,7 @@ class SwitchActor : public SwitchActorBase<DeviceTensor> {
   // The switch actor run when receive the input control.
   void RunOpControl(AID *input_control, OpContext<DeviceTensor> *context);
   // The switch actor run when receive the input branch id.
-  void CollectBranchId(const int branch_id, OpContext<DeviceTensor> *context);
+  void CollectBranchId(const int branch_id, OpContext<DeviceTensor> *const context);
   // Parse the input node information of the switch actor according to node_.
   void ParseInput(const ControlNodeParserPtr &parser);
   // Add input for all branches.
@@ -96,18 +96,18 @@ class SwitchActor : public SwitchActorBase<DeviceTensor> {
   // Initialize the size of the vector members.
   void InitVectorSize(const size_t num);
   // Get index from DeviceTensor.
-  size_t GetIndex(OpContext<DeviceTensor> *context);
+  size_t GetIndex(const OpContext<DeviceTensor> *const context);
   // Add input for the branch.
   void AddInput(const AnfNodePtr &node, size_t branch);
   void AddInput(const KernelWithIndex node_with_index, const size_t branch);
 
   // Check whether satisfy the condition for send outputs.
-  bool CheckLaunchCondition(OpContext<DeviceTensor> *context) const;
+  bool CheckLaunchCondition(OpContext<DeviceTensor> *const context) const;
   // Fetch the args of switch branch.
-  void FetchInputDeviceTensor(OpContext<DeviceTensor> *context);
-  void SendOutput(OpContext<DeviceTensor> *context);
+  void FetchInputDeviceTensor(OpContext<DeviceTensor> *const context);
+  void SendOutput(OpContext<DeviceTensor> *const context);
   // Erase input data and input controls when finish switch launch.
-  void EraseInput(OpContext<DeviceTensor> *context);
+  void EraseInput(OpContext<DeviceTensor> *const context);
   void SendMemoryFreeReq(OpContext<DeviceTensor> *const context);
 
   // Collect all the backend inputs of switch actor.
diff --git a/mindspore/ccsrc/runtime/framework/control_node_parser.cc b/mindspore/ccsrc/runtime/framework/control_node_parser.cc
index 1cbf40b8e3c..6e73837fc78 100644
--- a/mindspore/ccsrc/runtime/framework/control_node_parser.cc
+++ b/mindspore/ccsrc/runtime/framework/control_node_parser.cc
@@ -157,7 +157,7 @@ void CreateDeviceTensorForValueNode(const AnfNodePtr &front_node, const AnfNodeP
   }
 
   // Get the select kernel build info.
-  auto kernel_info = static_cast<device::KernelInfo *>(backend_node->kernel_info());
+  auto kernel_info = dynamic_cast<device::KernelInfo *>(backend_node->kernel_info());
   MS_EXCEPTION_IF_NULL(kernel_info);
   auto build_info = kernel_info->GetMutableSelectKernelBuildInfo();
   MS_EXCEPTION_IF_NULL(build_info);
diff --git a/mindspore/ccsrc/runtime/framework/graph_compiler.cc b/mindspore/ccsrc/runtime/framework/graph_compiler.cc
index df56b0412e3..ad225024aee 100644
--- a/mindspore/ccsrc/runtime/framework/graph_compiler.cc
+++ b/mindspore/ccsrc/runtime/framework/graph_compiler.cc
@@ -320,19 +320,24 @@ GraphId GraphCompiler::CompileGraphImpl(const KernelGraphPtr &graph, const Devic
     DumpIRProto(graph, "before_opt_" + std::to_string(graph->graph_id()));
   }
 
-  // Execute optimization pass.
+  MS_LOG(INFO) << "Get graph outputs before optimizer, graph id: " << graph->graph_id();
   auto outputs_before_optimizer = AnfAlgo::GetAllOutputWithIndex(graph->output());
+
+  // Execute optimization pass.
   device_context->OptimizeGraph(graph);
-  auto outputs_after_optimizer = AnfAlgo::GetAllOutputWithIndex(graph->output());
-  // Update the output map of kernel graph by modified output nodes.
-  graph->UpdateGraphOutputMap(outputs_before_optimizer, outputs_after_optimizer);
 
   // Generate 'KernelMod' for all kernels and set 'KernelMod' into kernel,
   // 'KernelMod' is real executive object of kernel.
   device_context->CreateKernel(graph->execution_order());
 
+  // Adjust kernel graph before run graph.
   device_context->PreprocessBeforeRunGraph(graph);
 
+  MS_LOG(INFO) << "Get graph outputs after optimizer, graph id: " << graph->graph_id();
+  auto outputs_after_optimizer = AnfAlgo::GetAllOutputWithIndex(graph->output());
+  // Update the output map of kernel graph by modified output nodes.
+  graph->UpdateGraphOutputMap(outputs_before_optimizer, outputs_after_optimizer);
+
   if (ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kGraphMode) {
     // Create device address for all anf nodes of graph.
     CreateDeviceAddress(graph, device_context);
diff --git a/mindspore/ccsrc/runtime/framework/graph_scheduler.cc b/mindspore/ccsrc/runtime/framework/graph_scheduler.cc
index c8e88ee3adb..a3ddbd9d0a0 100644
--- a/mindspore/ccsrc/runtime/framework/graph_scheduler.cc
+++ b/mindspore/ccsrc/runtime/framework/graph_scheduler.cc
@@ -282,7 +282,7 @@ void PrepareDataForControlWeightNode(
 
 void PrepareDataForHostDataSourceActor(const std::unordered_map<AnfNodePtr, size_t> &data_node_position_map,
                                        const AnfNodePtr &node, const TensorPtr &tensor,
-                                       std::vector<TensorPtr> *host_tensors) {
+                                       std::vector<TensorPtr> *const host_tensors) {
   MS_EXCEPTION_IF_NULL(tensor);
 
   // Fill the host tensors for non weighted parameters.
@@ -417,10 +417,6 @@ void GraphScheduler::Clear() {
   graph_output_to_actor_.clear();
   front_node_to_actor_.clear();
   copy_actors_.clear();
-
-  // Delete the thread pool.
-  delete thread_pool_;
-  thread_pool_ = nullptr;
 }
 
 void GraphScheduler::Initialize() {
@@ -434,16 +430,15 @@ void GraphScheduler::Initialize() {
   }
   init_ = true;
 
-  auto actorMgr = ActorMgr::GetActorMgrRef();
-  MS_EXCEPTION_IF_NULL(actorMgr);
-  actorMgr->Initialize();
-
   // Create the thread pool of actor runtime and Set the OMP_NUM_THREADS env.
   size_t actor_thread_num = 0;
   size_t OMP_thread_num = 0;
   ComputeThreadNums(&actor_thread_num, &OMP_thread_num);
-  thread_pool_ = ActorThreadPool::CreateThreadPool(actor_thread_num);
-  MS_EXCEPTION_IF_NULL(thread_pool_);
+
+  auto actor_manager = ActorMgr::GetActorMgrRef();
+  MS_EXCEPTION_IF_NULL(actor_manager);
+  actor_manager->Initialize(true, actor_thread_num);
+
   std::string OMP_env = std::to_string(OMP_thread_num);
   (void)common::SetEnv("OMP_NUM_THREADS", OMP_env.c_str(), 0);
   auto OMP_thread_num_used = common::GetEnv("OMP_NUM_THREADS");
@@ -463,7 +458,6 @@ void GraphScheduler::BuildAndScheduleGlobalActor() {
   MS_EXCEPTION_IF_NULL(memory_manager_actor);
   memory_manager_aid_ = memory_manager_actor->GetAID();
   auto base_actor = static_cast<ActorReference>(memory_manager_actor);
-  base_actor->set_thread_pool(thread_pool_);
   // Bind single thread to response to memory alloc and free quickly.
   (void)actorMgr->Spawn(base_actor, false);
 
@@ -472,7 +466,6 @@ void GraphScheduler::BuildAndScheduleGlobalActor() {
   MS_EXCEPTION_IF_NULL(recorder_actor);
   recorder_aid_ = &(recorder_actor->GetAID());
   auto base_recorder_actor = static_cast<ActorReference>(recorder_actor);
-  base_recorder_actor->set_thread_pool(thread_pool_);
   (void)actorMgr->Spawn(base_recorder_actor, true);
 
   // Create and schedule debug actor.
@@ -487,7 +480,6 @@ void GraphScheduler::BuildAndScheduleGlobalActor() {
     MS_EXCEPTION_IF_NULL(debug_actor);
     debug_aid_ = &(debug_actor->GetAID());
     auto base_debug_actor = static_cast<ActorReference>(debug_actor);
-    base_debug_actor->set_thread_pool(thread_pool_);
     (void)actorMgr->Spawn(base_debug_actor, true);
   }
 }
@@ -561,7 +553,6 @@ void GraphScheduler::Schedule(const ActorSet *actor_set) {
   auto actorMgr = ActorMgr::GetActorMgrRef();
   MS_EXCEPTION_IF_NULL(actorMgr);
   for (auto actor : actors) {
-    actor->set_thread_pool(thread_pool_);
     (void)actorMgr->Spawn(actor);
   }
 }
@@ -687,11 +678,11 @@ void GraphScheduler::PrepareRunOp(const ActorSet *actor_set, const GraphCompiler
   }
 }
 
-void GraphScheduler::PrepareDataForControlNode(HostQueueDataSourceActor *host_data_source_actor,
+void GraphScheduler::PrepareDataForControlNode(HostQueueDataSourceActor *const host_data_source_actor,
                                                const ControlNodeParserPtr &control_node_parser,
                                                const std::vector<AnfNodePtr> &origin_parameters,
                                                const std::vector<TensorPtr> &tensors,
-                                               std::vector<TensorPtr> *host_tensors) {
+                                               std::vector<TensorPtr> *const host_tensors) {
   const auto &control_node_parameters = control_node_parser->GetControlNodeParameter();
 
   for (size_t j = 0; j < control_node_parameters.size(); ++j) {
@@ -800,6 +791,10 @@ ActorSetPtr GraphScheduler::Build(const GraphCompilerInfo &graph_compiler_info)
 }
 
 void GraphScheduler::CacheGraphOutputToActor(const GraphCompilerInfo &graph_compiler_info) {
+  if (graph_compiler_info.strategy_ == GraphExecutionStrategy::kStep) {
+    return;
+  }
+
   for (const auto &graph : graph_compiler_info.graphs_) {
     MS_EXCEPTION_IF_NULL(graph);
     auto outputs = AnfAlgo::GetAllOutputWithIndex(graph->output());
@@ -808,6 +803,8 @@ void GraphScheduler::CacheGraphOutputToActor(const GraphCompilerInfo &graph_comp
       MS_EXCEPTION_IF_NULL(output_kernel);
       auto origin_output_with_index = graph->GetFrontNodeWithIndexByGraphOutput(output_with_index);
       if (origin_output_with_index.first == nullptr) {
+        MS_LOG(WARNING) << "The graph " << graph->graph_id() << " output node:" << output_kernel->fullname_with_scope()
+                        << " with index: " << output_with_index.second << " has no actor.";
         continue;
       }
 
@@ -837,7 +834,9 @@ void GraphScheduler::CacheGraphOutputToActor(const GraphCompilerInfo &graph_comp
       MS_EXCEPTION_IF_NULL(actor);
       MS_LOG(INFO) << "Cache the graph " << graph->graph_id() << " output node:" << output_kernel->fullname_with_scope()
                    << " with index: " << output_with_index.second << " to actor:" << actor->GetAID().Name()
-                   << " with index:" << actor_output_index;
+                   << " with index:" << actor_output_index
+                   << ", from front node:" << origin_output_with_index.first->fullname_with_scope()
+                   << " with index: " << origin_output_with_index.second;
       (void)graph_output_to_actor_.emplace(origin_output_with_index, GraphOutputPair(actor, actor_output_index));
     }
   }
@@ -968,7 +967,7 @@ std::vector<DataSourceActorPtr> GraphScheduler::BuildDataSourceActor(const Graph
       InsertActor(device_queue_ds_actor.get());
       (void)data_source_actors.emplace_back(device_queue_ds_actor);
       device_queue_ds_actor->data_kernel_ = *iter;
-      device_queue_ds_actor->kernel_info_ = static_cast<device::KernelInfo *>((*iter)->kernel_info());
+      device_queue_ds_actor->kernel_info_ = dynamic_cast<device::KernelInfo *>((*iter)->kernel_info());
     }
   }
 
@@ -1282,9 +1281,9 @@ std::vector<GatherActorPtr> GraphScheduler::BuildGatherActor(const GraphCompiler
   return gather_actors;
 }
 
-void GraphScheduler::LinkDataArrow(KernelActor *to_actor, const GraphCompilerInfo &graph_compiler_info,
-                                   const KernelGraphPtr &graph, KernelWithIndex from_kernel_with_output_idx,
-                                   KernelWithIndex to_kernel_with_input_idx) {
+void GraphScheduler::LinkDataArrow(KernelActor *const to_actor, const GraphCompilerInfo &graph_compiler_info,
+                                   const KernelGraphPtr &graph, const KernelWithIndex &from_kernel_with_output_idx,
+                                   const KernelWithIndex &to_kernel_with_input_idx) {
   MS_EXCEPTION_IF_NULL(to_actor);
   MS_EXCEPTION_IF_NULL(graph);
 
@@ -2063,7 +2062,7 @@ void GraphScheduler::PrepareInputNodeForSwitchActor(const std::vector<AnfNodePtr
   }
 }
 
-void GraphScheduler::LinkArrowByControlNode(const GraphCompilerInfo &graph_compiler_info, ActorSet *actor_set) {
+void GraphScheduler::LinkArrowByControlNode(const GraphCompilerInfo &graph_compiler_info, ActorSet *const actor_set) {
   PrepareInputNodeForSwitchActor(graph_compiler_info.control_nodes_);
 
   for (const auto &node : graph_compiler_info.control_nodes_) {
@@ -2161,7 +2160,7 @@ void GraphScheduler::LinkArrowByControlNode(const GraphCompilerInfo &graph_compi
   LinkOutputResultArrowForSwitchActor(graph_compiler_info, actor_set);
 }
 
-void GraphScheduler::LinkDataArrowForGatherActor(GatherActor *from_actor, KernelActor *to_actor,
+void GraphScheduler::LinkDataArrowForGatherActor(GatherActor *const from_actor, KernelActor *const to_actor,
                                                  const KernelWithIndex &front_node_with_index,
                                                  const KernelWithIndex &to_node_with_index) {
   MS_EXCEPTION_IF_NULL(from_actor);
@@ -2177,7 +2176,7 @@ void GraphScheduler::LinkDataArrowForGatherActor(GatherActor *from_actor, Kernel
 
 void GraphScheduler::LinkDataArrowByCallInput(const KernelWithIndex &call_node_with_index,
                                               const ControlNodeParserPtr &parser, const FuncGraphPtr &from_func_graph,
-                                              OpActor<DeviceTensor> *to_actor, const size_t to_index) {
+                                              OpActor<DeviceTensor> *const to_actor, const size_t to_index) {
   // Fetch all the funcgraph that call node would call.
   const auto cnode = call_node_with_index.first->cast<CNodePtr>();
   std::vector<FuncGraphPtr> func_graphs = FetchFuncGraphbyCallNode(cnode);
@@ -2233,8 +2232,8 @@ void GraphScheduler::LinkDataArrowForSwitchActor(SwitchActor *from_actor, const
 
 void GraphScheduler::LinkDataArrowByControlNode(const GraphCompilerInfo &graph_compiler_info,
                                                 const KernelWithIndex &input_with_index,
-                                                const FuncGraphPtr &from_func_graph, OpActor<DeviceTensor> *to_actor,
-                                                const size_t to_index) {
+                                                const FuncGraphPtr &from_func_graph,
+                                                OpActor<DeviceTensor> *const to_actor, const size_t to_index) {
   const auto &parameters = graph_compiler_info.origin_parameters_order_;
   const auto &front_to_backend_parameter = graph_compiler_info.control_node_parser_->front_to_backend_parameters_;
   const auto &input_node = input_with_index.first;
@@ -2314,7 +2313,8 @@ void GraphScheduler::LinkDataArrowByControlNode(const GraphCompilerInfo &graph_c
   }
 }
 
-void GraphScheduler::LinkDataArrowForSwitchActor(const GraphCompilerInfo &graph_compiler_info, SwitchActor *actor) {
+void GraphScheduler::LinkDataArrowForSwitchActor(const GraphCompilerInfo &graph_compiler_info,
+                                                 SwitchActor *const actor) {
   // Link switch input.
   const auto &inputs = actor->input_nodes_;
   for (size_t i = 0; i < inputs.size(); ++i) {
@@ -2342,13 +2342,14 @@ void GraphScheduler::LinkDataArrowForSwitchActor(const GraphCompilerInfo &graph_
     auto to_actor = dynamic_cast<GatherActor *>(actor_name_to_actor_[gather_name]);
     for (size_t j = 0; j < actor->branch_inputs_pos_[i].size(); ++j) {
       auto pos = actor->branch_inputs_pos_[i][j];
-      auto op_arrow = std::make_shared<DataArrow>(pos, to_actor->GetAID(), j);
+      auto to_actor_index = j;
+      auto op_arrow = std::make_shared<DataArrow>(pos, to_actor->GetAID(), to_actor_index);
       (void)actor->output_branch_arrows_[i].emplace_back(op_arrow);
     }
   }
 }
 
-void GraphScheduler::LinkControlArrowForGatherActor(std::vector<KernelActorPtr> *kernel_actors,
+void GraphScheduler::LinkControlArrowForGatherActor(std::vector<KernelActorPtr> *const kernel_actors,
                                                     const std::vector<KernelGraphPtr> &graphs,
                                                     const ControlNodeParserPtr &parser) {
   // Link control arrow to kernel actor.
@@ -2426,8 +2427,8 @@ void GraphScheduler::LinkControlArrowForGatherActor(std::vector<KernelActorPtr>
   }
 }
 
-void GraphScheduler::LinkControlArrowForSwitchActor(std::vector<SwitchActorPtr> *switch_actors,
-                                                    LoopCountActor *to_actor,
+void GraphScheduler::LinkControlArrowForSwitchActor(std::vector<SwitchActorPtr> *const switch_actors,
+                                                    LoopCountActor *const to_actor,
                                                     const KernelMapPosition &origin_outputs_order) {
   if (to_actor == nullptr || (*switch_actors).empty()) {
     return;
diff --git a/mindspore/ccsrc/runtime/framework/graph_scheduler.h b/mindspore/ccsrc/runtime/framework/graph_scheduler.h
index 2a149307c09..63c7fc0572a 100644
--- a/mindspore/ccsrc/runtime/framework/graph_scheduler.h
+++ b/mindspore/ccsrc/runtime/framework/graph_scheduler.h
@@ -195,8 +195,9 @@ class GraphScheduler {
   // The processing of actors link statically.
   // 1. The processing of linking data arrows.
   // The gather of linking data arrows of kernel, it will call following functions by the different from actor type.
-  void LinkDataArrow(KernelActor *to_actor, const GraphCompilerInfo &graph_compiler_info, const KernelGraphPtr &graph,
-                     KernelWithIndex from_kernel_with_output_idx, KernelWithIndex to_kernel_with_input_idx);
+  void LinkDataArrow(KernelActor *const to_actor, const GraphCompilerInfo &graph_compiler_info,
+                     const KernelGraphPtr &graph, const KernelWithIndex &from_kernel_with_output_idx,
+                     const KernelWithIndex &to_kernel_with_input_idx);
   // Link data arrows for internal parameter, convert internal parameter to actor by internal parameter cache to link.
   void LinkDataArrowForInternalParameter(const AnfNodePtr &internal_parameter,
                                          const std::vector<AnfNodePtr> &host_parameters, const KernelGraphPtr &graph,
@@ -232,37 +233,38 @@ class GraphScheduler {
   void LinkOutputResultArrowForOutputActor(OutputActor *to_actor, const GraphCompilerInfo &graph_compiler_info);
 
   // 4. The processing of control flow linking.
-  void LinkArrowByControlNode(const GraphCompilerInfo &graph_compiler_info, ActorSet *actor_set);
-  void LinkDataArrowForGatherActor(GatherActor *from_actor, KernelActor *to_actor,
+  void LinkArrowByControlNode(const GraphCompilerInfo &graph_compiler_info, ActorSet *const actor_set);
+  void LinkDataArrowForGatherActor(GatherActor *const from_actor, KernelActor *const to_actor,
                                    const KernelWithIndex &front_node_with_index,
                                    const KernelWithIndex &to_node_with_index);
-  void LinkDataArrowForSwitchActor(const GraphCompilerInfo &graph_compiler_info, SwitchActor *actor);
+  void LinkDataArrowForSwitchActor(const GraphCompilerInfo &graph_compiler_info, SwitchActor *const actor);
   // Connect the input of the actor.
   void LinkDataArrowByControlNode(const GraphCompilerInfo &graph_compiler_info, const KernelWithIndex &input_node,
-                                  const FuncGraphPtr &from_func_graph, OpActor<DeviceTensor> *to_actor,
+                                  const FuncGraphPtr &from_func_graph, OpActor<DeviceTensor> *const to_actor,
                                   const size_t to_index);
   // When the input of the actor is a call node, the output of the funcgraph called by the call node needs to be
   // connected.
   void LinkDataArrowByCallInput(const KernelWithIndex &call_node_with_index, const ControlNodeParserPtr &parser,
-                                const FuncGraphPtr &from_func_graph, OpActor<DeviceTensor> *to_actor,
+                                const FuncGraphPtr &from_func_graph, OpActor<DeviceTensor> *const to_actor,
                                 const size_t to_index);
-  void LinkDataArrowForSwitchActor(SwitchActor *from_actor, const size_t from_index, OpActor<DeviceTensor> *to_actor,
-                                   const size_t to_index, const size_t branch_index = SIZE_MAX);
+  void LinkDataArrowForSwitchActor(SwitchActor *const from_actor, const size_t from_index,
+                                   OpActor<DeviceTensor> *const to_actor, const size_t to_index,
+                                   const size_t branch_index = SIZE_MAX);
 
-  void LinkControlArrowForGatherActor(std::vector<KernelActorPtr> *kernel_actors,
+  void LinkControlArrowForGatherActor(std::vector<KernelActorPtr> *const kernel_actors,
                                       const std::vector<KernelGraphPtr> &graphs, const ControlNodeParserPtr &parser);
 
-  void LinkControlArrowForSwitchActor(std::vector<SwitchActorPtr> *switch_actors, LoopCountActor *to_actor,
+  void LinkControlArrowForSwitchActor(std::vector<SwitchActorPtr> *const switch_actors, LoopCountActor *const to_actor,
                                       const KernelMapPosition &origin_outputs_order);
   // In control flow, there are scenarios where there are multi-branch outputs, and the gather actor needs to
   // send the branch id to the loop count actor.
   void LinkBranchArrowForSwitchActor(const GraphCompilerInfo &graph_compiler_info);
   void LinkBranchArrowForGatherActor(const GraphCompilerInfo &graph_compiler_info);
   void LinkOutputResultArrowForSwitchActor(const GraphCompilerInfo &graph_compiler_info, const ActorSet *actor_set);
-  void PrepareDataForControlNode(HostQueueDataSourceActor *host_data_source_actor,
+  void PrepareDataForControlNode(HostQueueDataSourceActor *const host_data_source_actor,
                                  const ControlNodeParserPtr &control_node_parser,
                                  const std::vector<AnfNodePtr> &origin_parameters,
-                                 const std::vector<TensorPtr> &tensors, std::vector<TensorPtr> *host_tensors);
+                                 const std::vector<TensorPtr> &tensors, std::vector<TensorPtr> *const host_tensors);
   // Add input for switch actor. Since part of the input of funcgraph is on call node, these inputs need to be added
   // to switch actor.
   void PrepareInputNodeForSwitchActor(const std::vector<AnfNodePtr> &control_nodes);
@@ -330,8 +332,6 @@ class GraphScheduler {
   const AID *recorder_aid_{nullptr};
   const AID *debug_aid_{nullptr};
 
-  ActorThreadPool *thread_pool_{nullptr};
-
   bool init_{false};
 };
 }  // namespace runtime
diff --git a/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc b/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc
index 010f2682795..52bf733402b 100644
--- a/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc
+++ b/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc
@@ -57,6 +57,14 @@ void CPUDeviceContext::Initialize() {
   initialized_ = true;
 }
 
+void CPUDeviceContext::Destroy() {
+  // Release memory.
+  if (mem_manager_ != nullptr) {
+    mem_manager_->FreeDeviceMemory();
+    mem_manager_ = nullptr;
+  }
+}
+
 bool CPUDeviceContext::AllocateMemory(DeviceAddress *const &address, size_t size) const {
   MS_EXCEPTION_IF_NULL(address);
   MS_EXCEPTION_IF_NULL(mem_manager_);
diff --git a/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h b/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h
index f7dbdddfa85..7fb859324f5 100644
--- a/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h
+++ b/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h
@@ -35,6 +35,8 @@ class CPUDeviceContext : public DeviceContext {
 
   void Initialize() override;
 
+  void Destroy() override;
+
   bool AllocateMemory(DeviceAddress *const &address, size_t size) const override;
   void FreeMemory(DeviceAddress *const &address) const override;
 
diff --git a/mindspore/ccsrc/runtime/hardware/cpu/cpu_memory_pool.cc b/mindspore/ccsrc/runtime/hardware/cpu/cpu_memory_pool.cc
index 14093a5f989..c7b1a706ca1 100644
--- a/mindspore/ccsrc/runtime/hardware/cpu/cpu_memory_pool.cc
+++ b/mindspore/ccsrc/runtime/hardware/cpu/cpu_memory_pool.cc
@@ -17,6 +17,7 @@
 #include "runtime/hardware/cpu/cpu_memory_pool.h"
 #include <string>
 #include "utils/log_adapter.h"
+#include "utils/convert_utils_base.h"
 
 namespace mindspore {
 namespace device {
@@ -42,14 +43,13 @@ size_t GetSystemMemorySize(const std::string &key) {
     std::string line(buf);
     auto title_end_pos = line.find(":");
     auto title = line.substr(0, title_end_pos);
-
     // Get mem size.
     if (title == key) {
       auto mem_size_end_pos = line.find_last_of(" ");
       auto mem_size_begin_pos = line.find_last_of(" ", mem_size_end_pos - 1);
       if ((mem_size_end_pos != std::string::npos) && (mem_size_begin_pos != std::string::npos)) {
         auto mem_size_string = line.substr(mem_size_begin_pos, mem_size_end_pos - mem_size_begin_pos);
-        mem_size = std::atol(mem_size_string.c_str());
+        mem_size = LongToSize(std::atol(mem_size_string.c_str()));
       }
       break;
     }
diff --git a/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc b/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc
index 4264cdf6d81..fa92a5aac3f 100644
--- a/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc
+++ b/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc
@@ -152,15 +152,6 @@ void GPUDeviceContext::Destroy() {
     mem_manager_->FreeDeviceMemory();
     mem_manager_ = nullptr;
   }
-
-  // Clean GPU cache kernels which is generated by AKG
-  auto context_ptr = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context_ptr);
-  if (!(context_ptr->get_param<bool>(MS_CTX_SAVE_GRAPHS_FLAG))) {
-    kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance();
-    MS_EXCEPTION_IF_NULL(bin_map);
-    bin_map->RemoveKernelCache();
-  }
 }
 
 bool GPUDeviceContext::AllocateMemory(DeviceAddress *const &address, size_t size) const {
diff --git a/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.cc b/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.cc
index 24fb30f82f6..3fef5113bdf 100644
--- a/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.cc
+++ b/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.cc
@@ -26,7 +26,10 @@
 #include "hccl/hcom.h"
 #include "utils/log_adapter.h"
 #include "utils/ms_utils.h"
+#include "utils/ms_context.h"
 #include "runtime/hccl_adapter/converter.h"
+#include "runtime/device/ascend/distribute/ascend_collective.h"
+using HcclCollectiveGroup = mindspore::device::ascend::collective::HcclCollectiveGroup;
 
 static constexpr const char *kHcclPluginFileName = "libhccl_plugin.so";
 static constexpr const char *kHcclDeployModeEnv = "DEPLOY_MODE";
@@ -75,7 +78,6 @@ void HcclAdapter::InitPlugin() {
   if (plugin_handle_ == nullptr) {
     MS_LOG(EXCEPTION) << "Dlopen " << kHcclPluginFileName << " failed, result = " << GetDlErrorMsg();
   }
-
   init_hcom_graph_adapter_ = DlsymFuncObj(InitHcomGraphAdapter, plugin_handle_);
   finalize_hcom_graph_adapter_ = DlsymFuncObj(FinalizeHcomGraphAdapter, plugin_handle_);
   get_hccl_kernel_info_store_ = DlsymFuncObj(GetHcclKernelInfoStore, plugin_handle_);
@@ -98,7 +100,6 @@ void HcclAdapter::FinalizePlugin() {
   if (plugin_handle_ == nullptr) {
     return;
   }
-
   init_hcom_graph_adapter_ = nullptr;
   finalize_hcom_graph_adapter_ = nullptr;
   get_hccl_kernel_info_store_ = nullptr;
@@ -107,6 +108,10 @@ void HcclAdapter::FinalizePlugin() {
   finalize_hccl_comm_ = nullptr;
   launch_hccl_broadcast_ = nullptr;
   launch_hccl_all_reduce_ = nullptr;
+  launch_hccl_reduce_scatter_ = nullptr;
+  launch_hccl_all_gather_ = nullptr;
+  launch_hccl_send_ = nullptr;
+  launch_hccl_recv_ = nullptr;
   hccl_create_group_ = nullptr;
   hccl_destroy_group_ = nullptr;
   hccl_get_rank_id_ = nullptr;
@@ -119,6 +124,19 @@ void HcclAdapter::FinalizePlugin() {
   plugin_handle_ = nullptr;
 }
 
+bool HcclAdapter::InitHccl() {
+  MS_LOG(INFO) << "Start init hccl adapter.";
+  std::lock_guard<std::mutex> lock(init_mutex_);
+  if (init_flag_) {
+    MS_LOG(INFO) << "Hccl has been inited, skip.";
+    return true;
+  }
+  InitPlugin();
+  init_flag_ = true;
+  MS_LOG(INFO) << "Init hccl adapter success.";
+  return true;
+}
+
 bool HcclAdapter::InitHccl(uint32_t device_id, std::string_view rank_id, std::string_view rank_file) {
   MS_LOG(INFO) << "Start init hccl adapter.";
   std::lock_guard<std::mutex> lock(init_mutex_);
@@ -136,12 +154,10 @@ bool HcclAdapter::InitHccl(uint32_t device_id, std::string_view rank_id, std::st
   if (!ret) {
     return false;
   }
-
   ret = InitHcclExec();
   if (!ret) {
     return false;
   }
-
   init_flag_ = true;
   MS_LOG(INFO) << "Init hccl adapter success.";
   return true;
@@ -238,10 +254,69 @@ HcclResult HcclAdapter::HcclBroadcast(void *buf, uint64_t count, HcclDataType da
   return launch_hccl_broadcast_(buf, count, dataType, root, hccl_comm_, stream);
 }
 
-HcclResult HcclAdapter::HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType,
-                                      HcclReduceOp op, aclrtStream stream) const {
+HcclResult HcclAdapter::HcclAllReduce(void *send_buf, void *recv_buf, uint64_t count, HcclDataType dataType,
+                                      HcclReduceOp op, aclrtStream stream, const std::string &group) const {
   MS_EXCEPTION_IF_NULL(launch_hccl_all_reduce_);
-  return launch_hccl_all_reduce_(sendBuf, recvBuf, count, dataType, op, hccl_comm_, stream);
+  HcclComm hccl_comm;
+  if (hccl_comm_ != nullptr) {
+    hccl_comm = hccl_comm_;
+  } else {
+    hccl_comm = HcclCollectiveGroup::instance().GetGroupComm(group);
+    MS_EXCEPTION_IF_NULL(hccl_comm);
+  }
+  return launch_hccl_all_reduce_(send_buf, recv_buf, count, dataType, op, hccl_comm, stream);
+}
+
+HcclResult HcclAdapter::HcclReduceScatter(void *send_buf, void *recv_buf, uint64_t count, HcclDataType dataType,
+                                          HcclReduceOp op, aclrtStream stream, const std::string &group) const {
+  MS_EXCEPTION_IF_NULL(launch_hccl_reduce_scatter_);
+  HcclComm hccl_comm;
+  if (hccl_comm_ != nullptr) {
+    hccl_comm = hccl_comm_;
+  } else {
+    hccl_comm = HcclCollectiveGroup::instance().GetGroupComm(group);
+    MS_EXCEPTION_IF_NULL(hccl_comm);
+  }
+  return launch_hccl_reduce_scatter_(send_buf, recv_buf, count, dataType, op, hccl_comm, stream);
+}
+
+HcclResult HcclAdapter::HcclAllGather(void *send_buf, void *recv_buf, uint64_t count, HcclDataType dataType,
+                                      aclrtStream stream, const std::string &group) const {
+  MS_EXCEPTION_IF_NULL(launch_hccl_all_gather_);
+  HcclComm hccl_comm;
+  if (hccl_comm_ != nullptr) {
+    hccl_comm = hccl_comm_;
+  } else {
+    hccl_comm = HcclCollectiveGroup::instance().GetGroupComm(group);
+    MS_EXCEPTION_IF_NULL(hccl_comm);
+  }
+  return launch_hccl_all_gather_(send_buf, recv_buf, count, dataType, hccl_comm, stream);
+}
+
+HcclResult HcclAdapter::HcclSend(void *send_buf, uint64_t count, HcclDataType dataType, uint32_t destRank,
+                                 aclrtStream stream, const std::string &group) const {
+  MS_EXCEPTION_IF_NULL(launch_hccl_send_);
+  HcclComm hccl_comm;
+  if (hccl_comm_ != nullptr) {
+    hccl_comm = hccl_comm_;
+  } else {
+    hccl_comm = HcclCollectiveGroup::instance().GetGroupComm(group);
+    MS_EXCEPTION_IF_NULL(hccl_comm);
+  }
+  return launch_hccl_send_(send_buf, count, dataType, destRank, hccl_comm, stream);
+}
+
+HcclResult HcclAdapter::HcclRecv(void *recv_buf, uint64_t count, HcclDataType dataType, uint32_t srcRank,
+                                 aclrtStream stream, const std::string &group) const {
+  MS_EXCEPTION_IF_NULL(launch_hccl_recv_);
+  HcclComm hccl_comm;
+  if (hccl_comm_ != nullptr) {
+    hccl_comm = hccl_comm_;
+  } else {
+    hccl_comm = HcclCollectiveGroup::instance().GetGroupComm(group);
+    MS_EXCEPTION_IF_NULL(hccl_comm);
+  }
+  return launch_hccl_recv_(recv_buf, count, dataType, srcRank, hccl_comm, stream);
 }
 
 bool HcclAdapter::InitKernelInfoStore(uint32_t device_id, std::string_view rank_id, std::string_view rank_file) {
@@ -338,6 +413,12 @@ bool HcclAdapter::InitHcclComm(std::string_view rank_id, std::string_view rank_f
 
 bool HcclAdapter::FinalizeHcclComm() {
   MS_LOG(INFO) << "Start finalize hccl comm.";
+  auto context_ptr = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(context_ptr);
+  auto task_sink = context_ptr->get_param<bool>(MS_CTX_ENABLE_TASK_SINK);
+  if (!task_sink) {
+    HcclCollectiveGroup::instance().DestroyCommGroup();
+  }
   if (hccl_comm_ == nullptr) {
     return true;
   }
diff --git a/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.h b/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.h
index f3c39937405..2ed8685d9fd 100644
--- a/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.h
+++ b/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.h
@@ -43,6 +43,7 @@ class HcclAdapter {
 
   // common
   bool InitHccl(uint32_t device_id, std::string_view rank_id, std::string_view rank_file);
+  bool InitHccl();
   bool FinalizeHccl();
 
   HcclResult HcclCreateGroup(const std::string &group, uint32_t rank_num, uint32_t *rank_ids) const;
@@ -58,8 +59,16 @@ class HcclAdapter {
 
   // for single op
   HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, aclrtStream stream) const;
-  HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op,
-                           aclrtStream stream) const;
+  HcclResult HcclAllReduce(void *send_buf, void *recv_buf, uint64_t count, HcclDataType dataType, HcclReduceOp op,
+                           aclrtStream stream, const std::string &group = "") const;
+  HcclResult HcclAllGather(void *send_buf, void *recv_buf, uint64_t count, HcclDataType dataType, aclrtStream stream,
+                           const std::string &group = "") const;
+  HcclResult HcclReduceScatter(void *send_buf, void *recv_buf, uint64_t count, HcclDataType dataType, HcclReduceOp op,
+                               aclrtStream stream, const std::string &group = "") const;
+  HcclResult HcclSend(void *send_buf, uint64_t count, HcclDataType dataType, uint32_t destRank, aclrtStream stream,
+                      const std::string &group = "") const;
+  HcclResult HcclRecv(void *recv_buf, uint64_t count, HcclDataType dataType, uint32_t srcRank, aclrtStream stream,
+                      const std::string &group = "") const;
 
   // for enqueue op
   HcclResult HcclExecEnqueueOp(const ::HcomOperation &op_info, const HExecCallBack &callback) const;
@@ -91,6 +100,10 @@ class HcclAdapter {
   HcclCommDestroyFunObj finalize_hccl_comm_ = nullptr;
   HcclBroadcastFunObj launch_hccl_broadcast_ = nullptr;
   HcclAllReduceFunObj launch_hccl_all_reduce_ = nullptr;
+  HcclReduceScatterFunObj launch_hccl_reduce_scatter_ = nullptr;
+  HcclAllGatherFunObj launch_hccl_all_gather_ = nullptr;
+  HcclSendFunObj launch_hccl_send_ = nullptr;
+  HcclRecvFunObj launch_hccl_recv_ = nullptr;
 
   HcomCreateGroupFunObj hccl_create_group_ = nullptr;
   HcomDestroyGroupFunObj hccl_destroy_group_ = nullptr;
diff --git a/mindspore/ccsrc/runtime/hccl_adapter/plugin/hccl_plugin.h b/mindspore/ccsrc/runtime/hccl_adapter/plugin/hccl_plugin.h
index a4b5fa3b0ae..82e0156abe5 100644
--- a/mindspore/ccsrc/runtime/hccl_adapter/plugin/hccl_plugin.h
+++ b/mindspore/ccsrc/runtime/hccl_adapter/plugin/hccl_plugin.h
@@ -47,6 +47,12 @@ PLUGIN_METHOD(GetAllKernelBuilder, void, OpsKernelBuilderMap *);
 
 ORIGIN_METHOD(HcclBroadcast, HcclResult, void *, uint64_t, HcclDataType, uint32_t, HcclComm, aclrtStream);
 ORIGIN_METHOD(HcclAllReduce, HcclResult, void *, void *, uint64_t, HcclDataType, HcclReduceOp, HcclComm, aclrtStream);
+ORIGIN_METHOD(HcclReduceScatter, HcclResult, void *, void *, uint64_t, HcclDataType, HcclReduceOp, HcclComm,
+              aclrtStream);
+ORIGIN_METHOD(HcclAllGather, HcclResult, void *, void *, uint64_t, HcclDataType, HcclComm, aclrtStream);
+ORIGIN_METHOD(HcclSend, HcclResult, void *, uint64_t, HcclDataType, uint32_t, HcclComm, aclrtStream);
+ORIGIN_METHOD(HcclRecv, HcclResult, void *, uint64_t, HcclDataType, uint32_t, HcclComm, aclrtStream);
+
 ORIGIN_METHOD(HcclCommInitClusterInfo, HcclResult, const char *, uint32_t, HcclComm *);
 ORIGIN_METHOD(HcclCommDestroy, HcclResult, HcclComm);
 ORIGIN_METHOD(HcomCreateGroup, HcclResult, const char *, uint32_t, uint32_t *);
diff --git a/mindspore/ccsrc/transform/express_ir/mindir_exporter.cc b/mindspore/ccsrc/transform/express_ir/mindir_exporter.cc
index 53626814add..4ee7217e8a9 100644
--- a/mindspore/ccsrc/transform/express_ir/mindir_exporter.cc
+++ b/mindspore/ccsrc/transform/express_ir/mindir_exporter.cc
@@ -138,6 +138,7 @@ class IrExportBuilder {
   mind_ir::NodeProto *last_node_{nullptr};
   std::list<FuncGraphPtr> todo_;
   std::map<AnfNodePtr, size_t> node_index_map_;
+  std::set<std::string> nodeName_;
   size_t node_index_{0};
   size_t shape_index_{0};
 };
@@ -145,16 +146,7 @@ class IrExportBuilder {
 using IrExporterPtr = std::shared_ptr<IrExporter>;
 
 std::string IrExporter::GetDumpString(const FuncGraphPtr &func_graph) {
-  if ((builder_ == nullptr) || (func_graph == nullptr)) {
-    MS_LOG(EXCEPTION) << "Input params is null.";
-  }
-
-  // Export model info
-  builder_->BuildModelInfo();
-
-  // Export model and return string
-  builder_->BuildModel(func_graph);
-
+  (void)GetDumpProto(func_graph);
   return builder_->GetProtoString(func_graph);
 }
 
@@ -168,7 +160,6 @@ mind_ir::ModelProto IrExporter::GetDumpProto(const FuncGraphPtr &func_graph, boo
 
   // Export model and return string
   builder_->BuildModel(func_graph, save_tensor_data);
-
   return builder_->Model();
 }
 
@@ -191,16 +182,34 @@ void IrExportBuilder::BuildModel(const FuncGraphPtr &func_graph, bool save_tenso
   graph_proto->set_bprop_hash(func_graph->bprop_hash());
   ResetNodeIndex();
   todo_.clear();
-  todo_.push_back(func_graph);
+  nodeName_.clear();
+  // Build the main funcGraph
+  nodeName_.insert(func_graph->ToString());
+  BuildFuncGraph(func_graph, graph_proto, save_tensor_data);
+  std::set<FuncGraphPtr> graphVisited;
+  graphVisited.insert(func_graph);
   while (!todo_.empty()) {
     FuncGraphPtr fg = todo_.back();
     todo_.pop_back();
-    BuildFuncGraph(fg, graph_proto, save_tensor_data);
+    if (graphVisited.count(fg) > 0) {
+      continue;
+    }
+    if (nodeName_.count(fg->ToString()) > 0) {
+      MS_LOG(EXCEPTION) << "There is a duplicate name: " << fg->ToString();
+    }
+    nodeName_.insert(fg->ToString());
+    graphVisited.insert(fg);
+    auto graph = model_.add_functions();
+    BuildFuncGraph(fg, graph, save_tensor_data);
   }
+  // Release resource
+  nodeName_.clear();
 }
 
 void IrExportBuilder::BuildFuncGraph(const FuncGraphPtr &func_graph, mind_ir::GraphProto *const graph_proto,
                                      bool save_tensor_data) {
+  // Export funcGraph name.
+  graph_proto->set_name(func_graph->ToString());
   // Export parameters
   // 1. parameters should be mapped to ValueInfoProto
   // 2. parameters with default value should be mapped to Initializer
@@ -232,6 +241,10 @@ void IrExportBuilder::BuildParameters(const FuncGraphPtr &func_graph, mind_ir::G
       input_proto->set_name(param_name);
       SetValueInfoProto(param, input_proto);
     }
+    if (nodeName_.count(param_name) > 0) {
+      MS_LOG(EXCEPTION) << "parameter name is duplicate:" << param_name;
+    }
+    nodeName_.insert(param_name);
   }
 }
 
@@ -383,9 +396,13 @@ std::string IrExportBuilder::GetOpTypeName(const AnfNodePtr &node) {
   } else if (IsValueNode<FuncGraph>(node)) {
     FuncGraphPtr fg = GetValueNode<FuncGraphPtr>(node);
     todo_.push_back(fg);
-    type_name = fg->ToString();
+    type_name = "REF::" + fg->ToString();
   } else if (node->isa<CNode>() || node->isa<Parameter>()) {
-    type_name = node->ToString();
+    auto nodeName = GetUniqueNodeName(node);
+    type_name = "REF::" + nodeName;
+    if (nodeName_.count(nodeName) == 0) {
+      MS_LOG(EXCEPTION) << "There is not the name: " << nodeName;
+    }
   } else {
     MS_LOG(EXCEPTION) << "Need to support op type: " << node->type_name();
   }
@@ -424,6 +441,9 @@ void IrExportBuilder::SetShapeToNodeProto(const TypePtr &type, const BaseShapePt
       tensor_proto->set_data_type(mind_ir::TensorProto_DataType_UINT64);
       tensor_proto->add_dims(1);
     }
+  } else if (type->isa<Function>()) {
+    attr_proto->set_type(mind_ir::AttributeProto_AttributeType_GRAPH);
+    *seq_string += type->type_name() + ",";
   } else if (type->isa<String>() || type->isa<UMonadType>() || type->isa<IOMonadType>()) {
     *seq_string += type->type_name() + ",";
   } else {
@@ -468,6 +488,10 @@ void IrExportBuilder::BuildCNode(const CNodePtr &node, mind_ir::GraphProto *cons
   // Build cnode
   mind_ir::NodeProto *node_proto = graph_proto->add_node();
   std::string output_name = GetUniqueNodeName(node);
+  if (nodeName_.count(output_name) > 0) {
+    MS_LOG(EXCEPTION) << "There is a duplicate name: " << output_name;
+  }
+  nodeName_.insert(output_name);
   node_proto->add_output(output_name);
   node_proto->set_name(output_name);
   node_proto->set_domain(node->fullname_with_scope());
@@ -475,7 +499,9 @@ void IrExportBuilder::BuildCNode(const CNodePtr &node, mind_ir::GraphProto *cons
   std::string type_name = GetOpTypeName(op);
   node_proto->set_op_type(type_name);
   last_node_ = node_proto;
+  // Maybe Tensor or Function or nullptr
   SetShapeToNodeProto(node, node_proto);
+
   (void)std::for_each(input_names.begin(), input_names.end(),
                       [&node_proto](const string &name) { node_proto->add_input(name); });
 
@@ -490,13 +516,17 @@ void IrExportBuilder::BuildCNode(const CNodePtr &node, mind_ir::GraphProto *cons
       CheckAndConvertUtils::ConvertAttrValueInExport(type_name, attr.first, &attr_value);
       SetValueToAttributeProto(attr_value, attr_proto);
     }
-  } else {
-    MS_LOG(EXCEPTION) << "Need to support op type: " << op->type_name();
   }
 }
 
 std::string IrExportBuilder::BuildInputNode(const AnfNodePtr &node, mind_ir::GraphProto *const graph_proto) {
   std::string node_name = GetUniqueNodeName(node);
+  // FuncGraph will be added to functions and the input name is the function name.
+  if (IsValueNode<FuncGraph>(node)) {
+    FuncGraphPtr fg = GetValueNode<FuncGraphPtr>(node);
+    todo_.push_back(fg);
+    return fg->ToString();
+  }
   if (node->isa<ValueNode>()) {
     // When node input is a ValueNode, need to create a Constant Node
     mind_ir::NodeProto *node_proto = graph_proto->add_node();
@@ -539,7 +569,12 @@ std::string IrExportBuilder::GetNodeName(const AnfNodePtr &node) {
   if ((node != nullptr) && (node->func_graph() != nullptr)) {
     node_name = node->func_graph()->ToString() + ":";
   }
-  node_name += node->ToString();
+  if (node->isa<ValueNode>()) {
+    // Needn't value
+    node_name += node->AnfNode::ToString();
+  } else {
+    node_name += node->ToString();
+  }
   MS_LOG(DEBUG) << "GetNodeName: " << node_name;
   return node_name;
 }
diff --git a/mindspore/ccsrc/transform/express_ir/onnx_exporter.cc b/mindspore/ccsrc/transform/express_ir/onnx_exporter.cc
index dfd09a79356..d44baf4c70b 100644
--- a/mindspore/ccsrc/transform/express_ir/onnx_exporter.cc
+++ b/mindspore/ccsrc/transform/express_ir/onnx_exporter.cc
@@ -29,6 +29,11 @@
 
 namespace mindspore {
 const int ONNX_VERSION = 11;
+const int kZeroNum = 0;
+const int kOneNum = 1;
+const int kTwoNum = 2;
+const int kThreeNum = 3;
+const int kFourNum = 4;
 enum OpMergeMode {
   OP_MERGE_UNDEFINED = 0,            // undefined behavior
   OP_MERGE_IGNORE = 1,               // indicate an input op merged into other op in compute node list
@@ -36,6 +41,7 @@ enum OpMergeMode {
   OP_MERGE_GEMM = 3,                 // indicate `MindSpore MatMul + BiasAdd` --> `ONNX Gemm`
   OP_MERGE_BATCH_NORM = 4,           // indicate `MindSpore BatchNorm(x)[0]` --> `ONNX Batch Normalization`
   OP_MERGE_MAXPOOL_WITH_ARGMAX = 5,  // indicate `MindSpore MaxPoolWithArgmax(x)[0]` --> `ONNX MaxPool`
+  OP_MERGE_LAYER_NORM = 6,           // indicate `MindSpore LayerNorm(x)[0]` --> `ONNX MeanVarianceNormalization`
 };
 
 struct OpMergedInfo {
@@ -99,6 +105,9 @@ void SetAttrTupleValueToProto(const ValuePtr &value, onnx::AttributeProto_Attrib
         attr_proto->add_ints(GetValue<int64_t>((*tuple_ptr)[i]));
       }
       break;
+    case onnx::AttributeProto_AttributeType_INT:
+      attr_proto->set_i(GetValue<int64_t>((*tuple_ptr)[beg_idx]));
+      break;
     case onnx::AttributeProto_AttributeType_FLOATS:
       for (size_t i = beg_idx; i < tuple_ptr->size(); ++i) {
         attr_proto->add_floats(GetValue<float>((*tuple_ptr)[i]));
@@ -266,25 +275,39 @@ OPERATOR_ONNX_CONVERT_DEFINE(RealDiv, Div, OpNameInfo())
 OPERATOR_ONNX_CONVERT_DEFINE(ReduceSum, ReduceSum, OpNameInfo())
 OPERATOR_ONNX_CONVERT_DEFINE(Sub, Sub, OpNameInfo())
 OPERATOR_ONNX_CONVERT_DEFINE(Maximum, Max, OpNameInfo())
+OPERATOR_ONNX_CONVERT_DEFINE(Minimum, Min, OpNameInfo())
 OPERATOR_ONNX_CONVERT_DEFINE(Transpose, Transpose, OpNameInfo())
 OPERATOR_ONNX_CONVERT_DEFINE(StridedSlice, Slice, OpNameInfo())
 OPERATOR_ONNX_CONVERT_DEFINE(Exp, Exp, OpNameInfo())
-OPERATOR_ONNX_CONVERT_DEFINE(ResizeNearestNeighbor, Resize, OpNameInfo())
 OPERATOR_ONNX_CONVERT_DEFINE(Softplus, Softplus, OpNameInfo())
 OPERATOR_ONNX_CONVERT_DEFINE(Tanh, Tanh, OpNameInfo())
+OPERATOR_ONNX_CONVERT_DEFINE(Abs, Abs, OpNameInfo())
+
+// MindSpore Softmax axis(int, Tuple)
+OPERATOR_ONNX_CONVERT_DEFINE(Softmax, Softmax,
+                             OpNameInfo().Attr("axis", "axis", onnx::AttributeProto_AttributeType_INT,
+                                               SetAttrTupleValueToProto<0>))
+
+// MindSpore LogSoftmax axis(int)
+OPERATOR_ONNX_CONVERT_DEFINE(LogSoftmax, LogSoftmax,
+                             OpNameInfo().Attr("axis", "axis", onnx::AttributeProto_AttributeType_INT,
+                                               SetAttrValueToProto<Int64Imm>))
+
+OPERATOR_ONNX_CONVERT_DEFINE(Softsign, Softsign, OpNameInfo())
+OPERATOR_ONNX_CONVERT_DEFINE(Sqrt, Sqrt, OpNameInfo())
+OPERATOR_ONNX_CONVERT_DEFINE(Equal, Equal, OpNameInfo())
+OPERATOR_ONNX_CONVERT_DEFINE(Floor, Floor, OpNameInfo())
+OPERATOR_ONNX_CONVERT_DEFINE(ACos, Acos, OpNameInfo())
 
 #define OP_CONVERT_FUNCTION_NAME(name) GetOpOnnxConvertInfo_##name
 
 void RegisterOpConverters(const std::function<void(OpNameInfo &&)> &fn) {
   fn(OP_CONVERT_FUNCTION_NAME(Add)());
   fn(OP_CONVERT_FUNCTION_NAME(Mul)());
-
   fn(OP_CONVERT_FUNCTION_NAME(ReLU)());
   fn(OP_CONVERT_FUNCTION_NAME(Sigmoid)());
-
   fn(OP_CONVERT_FUNCTION_NAME(Conv2D)());
   fn(OP_CONVERT_FUNCTION_NAME(Argmax)());
-
   fn(OP_CONVERT_FUNCTION_NAME(Flatten)());
   fn(OP_CONVERT_FUNCTION_NAME(MaxPool)());
   fn(OP_CONVERT_FUNCTION_NAME(MaxPoolWithArgmax)());
@@ -293,16 +316,24 @@ void RegisterOpConverters(const std::function<void(OpNameInfo &&)> &fn) {
   fn(OP_CONVERT_FUNCTION_NAME(Squeeze)());
   fn(OP_CONVERT_FUNCTION_NAME(BatchNorm)());
   fn(OP_CONVERT_FUNCTION_NAME(MatMul)());
-
   fn(OP_CONVERT_FUNCTION_NAME(MakeTuple)());
   fn(OP_CONVERT_FUNCTION_NAME(RealDiv)());
   fn(OP_CONVERT_FUNCTION_NAME(BiasAdd)());
   fn(OP_CONVERT_FUNCTION_NAME(Sub)());
   fn(OP_CONVERT_FUNCTION_NAME(Maximum)());
+  fn(OP_CONVERT_FUNCTION_NAME(Minimum)());
   fn(OP_CONVERT_FUNCTION_NAME(Exp)());
-  fn(OP_CONVERT_FUNCTION_NAME(ResizeNearestNeighbor)());
+
   fn(OP_CONVERT_FUNCTION_NAME(Softplus)());
   fn(OP_CONVERT_FUNCTION_NAME(Tanh)());
+  fn(OP_CONVERT_FUNCTION_NAME(Softmax)());
+  fn(OP_CONVERT_FUNCTION_NAME(LogSoftmax)());
+  fn(OP_CONVERT_FUNCTION_NAME(Abs)());
+  fn(OP_CONVERT_FUNCTION_NAME(Softsign)());
+  fn(OP_CONVERT_FUNCTION_NAME(Sqrt)());
+  fn(OP_CONVERT_FUNCTION_NAME(Equal)());
+  fn(OP_CONVERT_FUNCTION_NAME(Floor)());
+  fn(OP_CONVERT_FUNCTION_NAME(ACos)());
 }
 
 class OpConvertRegistry {
@@ -367,6 +398,12 @@ class OnnxExporter {
                               std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *graph_proto);
   void ExportPrimResizeNearestNeighbor(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                        std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *graph_proto);
+  void ExportPrimExpandDims(const FuncGraphPtr &func_graph, const CNodePtr &node,
+                            std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *graph_proto);
+  void ExportPrimBatchMatMul(const FuncGraphPtr &func_graph, const CNodePtr &node,
+                             std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *graph_proto);
+  void ExportPrimGeLU(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map<AnfNodePtr, size_t> *node_map_ptr,
+                      onnx::GraphProto *graph_proto);
   void ExportPrimConcat(const FuncGraphPtr &func_graph, const CNodePtr &node,
                         std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *graph_proto);
   void ExportPrimCast(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map<AnfNodePtr, size_t> *node_map_ptr,
@@ -383,7 +420,6 @@ class OnnxExporter {
                         std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *graph_proto);
   void ExportPrimGatherV2(const FuncGraphPtr &func_graph, const CNodePtr &node,
                           std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *graph_proto);
-
   void ExportMergeConv(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map<AnfNodePtr, size_t> *node_map_ptr,
                        onnx::GraphProto *graph_proto);
   void ExportMergeGemm(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map<AnfNodePtr, size_t> *node_map_ptr,
@@ -392,6 +428,8 @@ class OnnxExporter {
                             std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *graph_proto);
   void ExportMergeMaxPoolWithArgmax(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                     std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *graph_proto);
+  void ExportMergeLayerNorm(const FuncGraphPtr &func_graph, const CNodePtr &node,
+                            std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *graph_proto);
 
   void ExportOutput(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map<AnfNodePtr, size_t> *node_map_ptr,
                     onnx::GraphProto *graph_proto);
@@ -400,6 +438,16 @@ class OnnxExporter {
 
   void ConvertTupleToTensor(const ValuePtr &value, onnx::TensorProto *tensor_proto);
   void SetNodeAttribute(const ValuePtr &value, onnx::NodeProto *node_proto);
+  void SetConstantNodeProtoInfoForGeLU(onnx::NodeProto *const node_proto, std::string output,
+                                       onnx::AttributeProto *const attr_proto, onnx::TensorProto *const tensor_proto,
+                                       std::string tensor_name, float float_data);
+  void SetTwoInputNodeProtoInfo(onnx::NodeProto *const node_proto, std::string output, std::string op_type,
+                                std::string input_x, std::string input_y);
+  void SetOneInputNodeProtoInfo(onnx::NodeProto *const node_proto, std::string output, std::string op_type,
+                                std::string input);
+
+  void SetCastNodeProtoInfo(onnx::NodeProto *const node_proto, std::string output, std::string input,
+                            onnx::AttributeProto *const attr_proto, onnx::TensorProto_DataType i_type);
 
   size_t AllocateNodeIndex() { return ++onnx_node_index_; }
 
@@ -526,6 +574,9 @@ void OnnxExporter::SetValueInfoType(const AnfNodePtr &node, onnx::ValueInfoProto
     for (const auto &dim : dims) {
       type_proto->mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(dim);
     }
+    if (dims.empty()) {
+      type_proto->mutable_tensor_type()->mutable_shape();
+    }
   }
 }
 
@@ -593,6 +644,12 @@ void OnnxExporter::MatchAndMark(const FuncGraphPtr &func_graph, const std::vecto
       op_merged_infos[cnode].mode = OP_MERGE_MAXPOOL_WITH_ARGMAX;
       op_merged_infos[cnode->input(1)].mode = OP_MERGE_IGNORE;
       op_merged_infos[cnode->input(1)].referred_count -= 1;
+    } else if (cnode->IsApply(prim::kPrimTupleGetItem) &&
+               IsPrimitiveCNode(cnode->input(1), std::make_shared<Primitive>("LayerNorm")) &&
+               GetInt64Value(cnode->input(2)) == 0) {
+      op_merged_infos[cnode].mode = OP_MERGE_LAYER_NORM;
+      op_merged_infos[cnode->input(1)].mode = OP_MERGE_IGNORE;
+      op_merged_infos[cnode->input(1)].referred_count -= 1;
     }
   }
 }
@@ -612,6 +669,7 @@ void OnnxExporter::ExportNodes(const FuncGraphPtr &func_graph, std::map<AnfNodeP
   MatchAndMark(func_graph, nodes, &op_merged_infos);
   int count = -1;
   for (const AnfNodePtr &node : nodes) {
+    // skip when MakeTuple + UpdateState
     count++;
     if (!node->isa<CNode>()) {
       continue;
@@ -623,9 +681,8 @@ void OnnxExporter::ExportNodes(const FuncGraphPtr &func_graph, std::map<AnfNodeP
         i++;
       }
       auto nextCNode = nodes[i]->cast<CNodePtr>();
-      const int INDEX = 2;
       if (nextCNode->IsApply(prim::kPrimUpdateState) &&
-          IsPrimitiveCNode(nextCNode->input(INDEX), std::make_shared<Primitive>("MakeTuple"))) {
+          IsPrimitiveCNode(nextCNode->input(kTwoNum), std::make_shared<Primitive>("MakeTuple"))) {
         continue;
       }
     }
@@ -644,6 +701,18 @@ void OnnxExporter::ExportNodes(const FuncGraphPtr &func_graph, std::map<AnfNodeP
       ExportOutput(func_graph, cnode, node_map_ptr, graph_proto);
       continue;
     }
+    if (cnode->IsApply(prim::kPrimExpandDims)) {
+      ExportPrimExpandDims(func_graph, cnode, node_map_ptr, graph_proto);
+      continue;
+    }
+    if (cnode->IsApply(prim::kPrimBatchMatMul)) {
+      ExportPrimBatchMatMul(func_graph, cnode, node_map_ptr, graph_proto);
+      continue;
+    }
+    if (cnode->IsApply(prim::kPrimGeLU)) {
+      ExportPrimGeLU(func_graph, cnode, node_map_ptr, graph_proto);
+      continue;
+    }
     switch (merged_info.mode) {
       case OP_MERGE_CONV:
         ExportMergeConv(func_graph, cnode, node_map_ptr, graph_proto);
@@ -657,6 +726,9 @@ void OnnxExporter::ExportNodes(const FuncGraphPtr &func_graph, std::map<AnfNodeP
       case OP_MERGE_MAXPOOL_WITH_ARGMAX:
         ExportMergeMaxPoolWithArgmax(func_graph, cnode, node_map_ptr, graph_proto);
         break;
+      case OP_MERGE_LAYER_NORM:
+        ExportMergeLayerNorm(func_graph, cnode, node_map_ptr, graph_proto);
+        break;
       default:
         ExportCNode(func_graph, cnode, node_map_ptr, graph_proto);
         break;
@@ -666,20 +738,21 @@ void OnnxExporter::ExportNodes(const FuncGraphPtr &func_graph, std::map<AnfNodeP
 
 void OnnxExporter::ExportPrimReshape(const FuncGraphPtr &, const CNodePtr &node,
                                      std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
-  auto name_x = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
-  auto input_shape = node->input(2);
+  auto name_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
+  auto input_shape = node->input(kTwoNum);
   std::string name_shape;
   if (input_shape->isa<ValueNode>()) {
     auto const_node_idx = AllocateNodeIndex();
     (*node_map_ptr)[input_shape] = const_node_idx;
     onnx::NodeProto *node_proto = graph_proto->add_node();
     name_shape = std::to_string(const_node_idx);
-    node_proto->add_output(name_shape);
+    auto name = prim::kPrimReshape->name();
 
+    node_proto->set_name(name_shape + name);
+    node_proto->add_output(name_shape);
     node_proto->set_op_type("Constant");
     onnx::AttributeProto *attr_proto = node_proto->add_attribute();
     attr_proto->set_name("value");
-
     attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR);
     ConvertTupleToTensor(dyn_cast<ValueNode>(input_shape)->value(), attr_proto->mutable_t());
   } else {
@@ -698,8 +771,8 @@ void OnnxExporter::ExportPrimReshape(const FuncGraphPtr &, const CNodePtr &node,
 
 void OnnxExporter::ExportPrimReduce(const FuncGraphPtr &, const CNodePtr &node,
                                     std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
-  auto input_data = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
-  auto input_axis = node->input(2);
+  auto input_data = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
+  auto input_axis = node->input(kTwoNum);
 
   auto node_idx = AllocateNodeIndex();
   (*node_map_ptr)[node] = node_idx;
@@ -708,6 +781,7 @@ void OnnxExporter::ExportPrimReduce(const FuncGraphPtr &, const CNodePtr &node,
   if (node->IsApply(prim::kPrimReduceSum)) {
     name = prim::kPrimReduceSum->name();
   }
+  node_proto->set_name(std::to_string(node_idx) + name);
   node_proto->set_op_type(name);
   node_proto->add_output(std::to_string(node_idx));
   node_proto->add_input(input_data);
@@ -735,14 +809,14 @@ void OnnxExporter::ExportPrimReduce(const FuncGraphPtr &, const CNodePtr &node,
 void OnnxExporter::ExportPrimTranspose(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                        std::map<AnfNodePtr, size_t> *node_map_ptr,
                                        onnx::GraphProto *const graph_proto) {
-  auto input_data = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
-  const int PERM_INDEX = 2;
-  auto input_perm = node->input(PERM_INDEX);
-
+  auto input_data = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
+  auto input_perm = node->input(kTwoNum);
   auto node_idx = AllocateNodeIndex();
   (*node_map_ptr)[node] = node_idx;
   onnx::NodeProto *node_proto = graph_proto->add_node();
   auto name = prim::kPrimTranspose->name();
+
+  node_proto->set_name(std::to_string(node_idx) + name);
   node_proto->set_op_type(name);
   node_proto->add_output(std::to_string(node_idx));
   node_proto->add_input(input_data);
@@ -771,9 +845,8 @@ void OnnxExporter::ExportPrimTranspose(const FuncGraphPtr &func_graph, const CNo
 void OnnxExporter::ExportPrimStridedSlice(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                           std::map<AnfNodePtr, size_t> *node_map_ptr,
                                           onnx::GraphProto *const graph_proto) {
-  auto input_data = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
-  const int BEGIN_INDEX = 2;
-  auto begin = node->input(BEGIN_INDEX);
+  auto input_data = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
+  auto begin = node->input(kTwoNum);
   auto name = prim::kPrimStridedSlice->name();
   std::string name_begin;
   if (begin->isa<ValueNode>()) {
@@ -785,7 +858,7 @@ void OnnxExporter::ExportPrimStridedSlice(const FuncGraphPtr &func_graph, const
 
     node_proto->set_op_type("Constant");
     onnx::AttributeProto *attr_proto = node_proto->add_attribute();
-    attr_proto->set_name("starts");
+    attr_proto->set_name("value");
 
     attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR);
     ConvertTupleToTensor(dyn_cast<ValueNode>(begin)->value(), attr_proto->mutable_t());
@@ -794,8 +867,7 @@ void OnnxExporter::ExportPrimStridedSlice(const FuncGraphPtr &func_graph, const
                       << "Need to insert op convert variable from tuple to tensor for " << name;
   }
 
-  const int END_INDEX = 3;
-  auto end = node->input(END_INDEX);
+  auto end = node->input(kThreeNum);
   std::string name_end;
   if (end->isa<ValueNode>()) {
     auto const_node_idx = AllocateNodeIndex();
@@ -806,7 +878,7 @@ void OnnxExporter::ExportPrimStridedSlice(const FuncGraphPtr &func_graph, const
 
     node_proto->set_op_type("Constant");
     onnx::AttributeProto *attr_proto = node_proto->add_attribute();
-    attr_proto->set_name("ends");
+    attr_proto->set_name("value");
 
     attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR);
     ConvertTupleToTensor(dyn_cast<ValueNode>(end)->value(), attr_proto->mutable_t());
@@ -832,12 +904,11 @@ void OnnxExporter::ExportPrimStridedSlice(const FuncGraphPtr &func_graph, const
   node_proto_axes->add_output(name_axes);
   node_proto_axes->set_op_type("Constant");
   onnx::AttributeProto *attr_proto_axes = node_proto_axes->add_attribute();
-  attr_proto_axes->set_name("axes");
+  attr_proto_axes->set_name("value");
   attr_proto_axes->set_type(onnx::AttributeProto_AttributeType_TENSOR);
   ConvertTupleToTensor(dyn_cast<ValueNode>(axes)->value(), attr_proto_axes->mutable_t());
 
-  const int STRIDES_INDEX = 4;
-  auto strides = node->input(STRIDES_INDEX);
+  auto strides = node->input(kFourNum);
   std::string name_strides;
   if (strides->isa<ValueNode>()) {
     auto const_node_idx = AllocateNodeIndex();
@@ -848,7 +919,7 @@ void OnnxExporter::ExportPrimStridedSlice(const FuncGraphPtr &func_graph, const
 
     node_proto->set_op_type("Constant");
     onnx::AttributeProto *attr_proto_steps = node_proto->add_attribute();
-    attr_proto_steps->set_name("steps");
+    attr_proto_steps->set_name("value");
     attr_proto_steps->set_type(onnx::AttributeProto_AttributeType_TENSOR);
     ConvertTupleToTensor(dyn_cast<ValueNode>(strides)->value(), attr_proto_steps->mutable_t());
   } else {
@@ -871,18 +942,17 @@ void OnnxExporter::ExportPrimStridedSlice(const FuncGraphPtr &func_graph, const
 void OnnxExporter::ExportPrimResizeNearestNeighbor(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                                    std::map<AnfNodePtr, size_t> *node_map_ptr,
                                                    onnx::GraphProto *const graph_proto) {
-  auto input_data = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
-  auto x_shape = dyn_cast<abstract::Shape>(node->input(1)->Shape());
+  auto input_data = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
+  auto x_shape = dyn_cast<abstract::Shape>(node->input(kOneNum)->Shape());
 
-  AnfNodePtr op = node->input(0);
+  AnfNodePtr op = node->input(kZeroNum);
   auto op_value = dyn_cast<ValueNode>(op);
   auto prim = dyn_cast<Primitive>(op_value->value());
   std::vector<int64_t> resize_size;
 
   auto tuple_ptr = dyn_cast<ValueTuple>(prim->GetAttr("size"));
 
-  const int NUM = 2;
-  for (size_t i = 0; i < x_shape->shape().size() - NUM; i++) {
+  for (size_t i = 0; i < x_shape->shape().size() - kTwoNum; i++) {
     resize_size.push_back(x_shape->shape()[i]);
   }
   for (size_t i = 0; i < tuple_ptr->size(); i++) {
@@ -900,7 +970,7 @@ void OnnxExporter::ExportPrimResizeNearestNeighbor(const FuncGraphPtr &func_grap
   node_proto_size->add_output(name_size);
   node_proto_size->set_op_type("Constant");
   onnx::AttributeProto *attr_proto = node_proto_size->add_attribute();
-  attr_proto->set_name("sizes");
+  attr_proto->set_name("value");
   attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR);
   ConvertTupleToTensor(resize_size_ptr, attr_proto->mutable_t());
 
@@ -929,6 +999,293 @@ void OnnxExporter::ExportPrimResizeNearestNeighbor(const FuncGraphPtr &func_grap
   node_proto->add_input(name_size);
 }
 
+// MindSpore ExpandDims -> ONNX Reshape
+void OnnxExporter::ExportPrimExpandDims(const FuncGraphPtr &func_graph, const CNodePtr &node,
+                                        std::map<AnfNodePtr, size_t> *node_map_ptr,
+                                        onnx::GraphProto *const graph_proto) {
+  auto input_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
+  auto axis = GetInt64Value(node->input(kTwoNum));
+  auto x_shape = dyn_cast<abstract::Shape>(node->input(kOneNum)->Shape());
+  auto name = prim::kPrimExpandDims->name();
+
+  std::vector<int64_t> new_shape;
+  for (size_t i = 0; i < x_shape->shape().size(); i++) {
+    new_shape.push_back(x_shape->shape()[i]);
+  }
+  if (axis < 0) {
+    axis = axis + 1 + x_shape->shape().size();
+  }
+  new_shape.insert(new_shape.begin() + axis, kOneNum);
+  auto new_shape_value = MakeValue<std::vector<int64_t>>(new_shape);
+  auto shape = NewValueNode(new_shape_value)->cast<AnfNodePtr>();
+  std::string name_shape;
+
+  if (shape->isa<ValueNode>()) {
+    auto const_node_idx = AllocateNodeIndex();
+    (*node_map_ptr)[shape] = const_node_idx;
+    onnx::NodeProto *node_proto = graph_proto->add_node();
+    name_shape = std::to_string(const_node_idx);
+    node_proto->add_output(name_shape);
+    node_proto->set_op_type("Constant");
+    onnx::AttributeProto *attr_proto = node_proto->add_attribute();
+    attr_proto->set_name("value");
+    attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR);
+    ConvertTupleToTensor(dyn_cast<ValueNode>(shape)->value(), attr_proto->mutable_t());
+  } else {
+    name_shape = GetNodeInputName(shape, node_map_ptr, graph_proto);
+    MS_LOG(EXCEPTION) << "Need to insert op convert variable from tuple to tensor for " << name;
+  }
+
+  auto node_idx = AllocateNodeIndex();
+  (*node_map_ptr)[node] = node_idx;
+  onnx::NodeProto *node_proto = graph_proto->add_node();
+  node_proto->set_op_type("Reshape");
+  node_proto->add_output(std::to_string(node_idx));
+  node_proto->add_input(input_x);
+  node_proto->add_input(name_shape);
+}
+
+// MindSpore BatchMatMul -> ONNX Transpose + MatMul
+void OnnxExporter::ExportPrimBatchMatMul(const FuncGraphPtr &func_graph, const CNodePtr &node,
+                                         std::map<AnfNodePtr, size_t> *node_map_ptr,
+                                         onnx::GraphProto *const graph_proto) {
+  auto input_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
+  auto input_y = GetNodeInputName(node->input(kTwoNum), node_map_ptr, graph_proto);
+
+  AnfNodePtr batchmatmul_op = node->input(kZeroNum);
+  auto op_value = dyn_cast<ValueNode>(batchmatmul_op);
+  auto prim = dyn_cast<Primitive>(op_value->value());
+  auto transpose_a = GetValue<bool>(prim->GetAttr("transpose_a"));
+  auto transpose_b = GetValue<bool>(prim->GetAttr("transpose_b"));
+  std::string transpose_input_x_name = "";
+  std::string transpose_input_y_name = "";
+
+  if (transpose_a) {
+    auto input_x_shape = dyn_cast<abstract::Shape>(node->input(kOneNum)->Shape());
+    // Add Transpose node after input_x of BatchMatMul
+    auto transpose_input_x_index = AllocateNodeIndex();
+    onnx::NodeProto *transpose_inputx_node_proto = graph_proto->add_node();
+    transpose_inputx_node_proto->add_input(input_x);
+    transpose_inputx_node_proto->add_output(std::to_string(transpose_input_x_index));
+    transpose_inputx_node_proto->set_op_type(prim::kPrimTranspose->name());
+    onnx::AttributeProto *attr_proto = transpose_inputx_node_proto->add_attribute();
+    attr_proto->set_name("perm");
+    attr_proto->set_type(onnx::AttributeProto_AttributeType_INTS);
+    for (size_t i = 0; i < input_x_shape->shape().size() - kTwoNum; i++) {
+      attr_proto->add_ints(i);
+    }
+    attr_proto->add_ints(input_x_shape->shape().size() - kOneNum);
+    attr_proto->add_ints(input_x_shape->shape().size() - kTwoNum);
+    transpose_input_x_name = std::to_string(transpose_input_x_index);
+  }
+  if (transpose_b) {
+    auto input_y_shape = dyn_cast<abstract::Shape>(node->input(kTwoNum)->Shape());
+    // Add Transpose node after input_y of BatchMatMul
+    auto transpose_input_y_index = AllocateNodeIndex();
+    onnx::NodeProto *transpose_inputy_node_proto = graph_proto->add_node();
+    transpose_inputy_node_proto->add_input(input_y);
+    transpose_inputy_node_proto->add_output(std::to_string(transpose_input_y_index));
+    transpose_inputy_node_proto->set_op_type(prim::kPrimTranspose->name());
+    onnx::AttributeProto *attr_proto = transpose_inputy_node_proto->add_attribute();
+    attr_proto->set_name("perm");
+    attr_proto->set_type(onnx::AttributeProto_AttributeType_INTS);
+    for (size_t i = 0; i < input_y_shape->shape().size() - kTwoNum; i++) {
+      attr_proto->add_ints(i);
+    }
+    attr_proto->add_ints(input_y_shape->shape().size() - kOneNum);
+    attr_proto->add_ints(input_y_shape->shape().size() - kTwoNum);
+    transpose_input_y_name = std::to_string(transpose_input_y_index);
+  }
+
+  auto node_idx = AllocateNodeIndex();
+  (*node_map_ptr)[node] = node_idx;
+  onnx::NodeProto *node_proto = graph_proto->add_node();
+  node_proto->set_op_type("MatMul");
+  node_proto->add_output(std::to_string(node_idx));
+  node_proto->set_name(std::to_string(node_idx) + "MatMul");
+  if (transpose_a) {
+    node_proto->add_input(transpose_input_x_name);
+  } else {
+    node_proto->add_input(input_x);
+  }
+  if (transpose_b) {
+    node_proto->add_input(transpose_input_y_name);
+  } else {
+    node_proto->add_input(input_y);
+  }
+}
+
+void OnnxExporter::SetConstantNodeProtoInfoForGeLU(onnx::NodeProto *const node_proto, std::string output,
+                                                   onnx::AttributeProto *const attr_proto,
+                                                   onnx::TensorProto *const tensor_proto, std::string tensor_name,
+                                                   float float_data) {
+  node_proto->set_op_type("Constant");
+  node_proto->add_output(output);
+
+  attr_proto->set_name("value");
+  attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR);
+
+  tensor_proto->set_name(tensor_name);
+  tensor_proto->add_dims(static_cast<::google::protobuf::int64>(kOneNum));
+  tensor_proto->set_data_type(GetOnnxDataType(kNumberTypeFloat32));
+  tensor_proto->add_float_data(float_data);
+}
+
+void OnnxExporter::SetCastNodeProtoInfo(onnx::NodeProto *const node_proto, std::string output, std::string input,
+                                        onnx::AttributeProto *const attr_proto, onnx::TensorProto_DataType i_type) {
+  node_proto->set_op_type(prim::kPrimCast->name());
+  node_proto->add_output(output);
+  node_proto->add_input(input);
+
+  attr_proto->set_name("to");
+  attr_proto->set_type(onnx::AttributeProto_AttributeType_INT);
+  attr_proto->set_i(i_type);
+}
+
+void OnnxExporter::SetTwoInputNodeProtoInfo(onnx::NodeProto *const node_proto, std::string output, std::string op_type,
+                                            std::string input_x, std::string input_y) {
+  node_proto->add_output(output);
+  node_proto->set_op_type(op_type);
+  node_proto->add_input(input_x);
+  node_proto->add_input(input_y);
+}
+
+void OnnxExporter::SetOneInputNodeProtoInfo(onnx::NodeProto *const node_proto, std::string output, std::string op_type,
+                                            std::string input) {
+  node_proto->add_output(output);
+  node_proto->set_op_type(op_type);
+  node_proto->add_input(input);
+}
+
+// MindSpore GeLU -> ONNX 0.5 * X * (1.0 + tanh((sqrt(2/pi) * (x + 0.044715 * pow(x, 3)))))
+void OnnxExporter::ExportPrimGeLU(const FuncGraphPtr &func_graph, const CNodePtr &node,
+                                  std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
+  auto input_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
+  auto input_x_node = node->input(kOneNum);
+  auto dtype = input_x_node->Type();
+  auto elem_type = dyn_cast<TensorType>(dtype)->element()->type_id();
+  auto pre_cast_node_idx = 0;
+
+  // if type is float16, add cast node cast float16 to float32
+  if (elem_type == kNumberTypeFloat16) {
+    pre_cast_node_idx = AllocateNodeIndex();
+    onnx::NodeProto *pre_cast_node_proto = graph_proto->add_node();
+    onnx::AttributeProto *pre_cast_attr_proto = pre_cast_node_proto->add_attribute();
+    SetCastNodeProtoInfo(pre_cast_node_proto, std::to_string(pre_cast_node_idx), input_x, pre_cast_attr_proto,
+                         onnx::TensorProto_DataType_FLOAT);
+  }
+
+  // Add Pow node
+  // Add input exponent node for Pow node
+  auto exp_node_idx = AllocateNodeIndex();
+  onnx::NodeProto *exp_node_proto = graph_proto->add_node();
+  onnx::AttributeProto *exp_attr_proto = exp_node_proto->add_attribute();
+  onnx::TensorProto *exp_tensor_proto = exp_attr_proto->mutable_t();
+  SetConstantNodeProtoInfoForGeLU(exp_node_proto, std::to_string(exp_node_idx), exp_attr_proto, exp_tensor_proto,
+                                  "exponent", 3.0);
+  // Add pow node
+  auto pow_idx = AllocateNodeIndex();
+  auto pow_name = std::to_string(pow_idx);
+  onnx::NodeProto *pow_node_proto = graph_proto->add_node();
+  pow_node_proto->set_op_type("Pow");
+  pow_node_proto->add_output(pow_name);
+  if (elem_type == kNumberTypeFloat16) {
+    pow_node_proto->add_input(std::to_string(pre_cast_node_idx));
+  } else {
+    pow_node_proto->add_input(input_x);
+  }
+  pow_node_proto->add_input(std::to_string(exp_node_idx));
+
+  // Add first Mul node
+  // Add input node for first Mul node
+  auto fmul_input_node_idx = AllocateNodeIndex();
+  onnx::NodeProto *fmul_input_node_proto = graph_proto->add_node();
+  onnx::AttributeProto *fmul_input_attr_proto = fmul_input_node_proto->add_attribute();
+  onnx::TensorProto *fmul_input_tensor_proto = fmul_input_attr_proto->mutable_t();
+  SetConstantNodeProtoInfoForGeLU(fmul_input_node_proto, std::to_string(fmul_input_node_idx), fmul_input_attr_proto,
+                                  fmul_input_tensor_proto, "input_y_for_mul", 0.044715);
+  // Add first Mul Node
+  auto fmul_name = std::to_string(AllocateNodeIndex());
+  onnx::NodeProto *fmul_node_proto = graph_proto->add_node();
+  SetTwoInputNodeProtoInfo(fmul_node_proto, fmul_name, "Mul", pow_name, std::to_string(fmul_input_node_idx));
+
+  // Add first Add node
+  auto fadd_name = std::to_string(AllocateNodeIndex());
+  onnx::NodeProto *fadd_node_proto = graph_proto->add_node();
+  if (elem_type == kNumberTypeFloat16) {
+    fadd_node_proto->add_input(std::to_string(pre_cast_node_idx));
+  } else {
+    fadd_node_proto->add_input(input_x);
+  }
+  SetOneInputNodeProtoInfo(fadd_node_proto, fadd_name, "Add", fmul_name);
+
+  // Add second Mul node
+  // Add input node for second Mul node
+  auto smul_input_node_idx = AllocateNodeIndex();
+  onnx::NodeProto *smul_input_node_proto = graph_proto->add_node();
+  onnx::AttributeProto *smul_input_attr_proto = smul_input_node_proto->add_attribute();
+  onnx::TensorProto *smul_input_tensor_proto = smul_input_attr_proto->mutable_t();
+  SetConstantNodeProtoInfoForGeLU(smul_input_node_proto, std::to_string(smul_input_node_idx), smul_input_attr_proto,
+                                  smul_input_tensor_proto, "input_y_for_smul", 0.79788456);
+  // Add second Mul Node
+  auto smul_name = std::to_string(AllocateNodeIndex());
+  onnx::NodeProto *smul_node_proto = graph_proto->add_node();
+  SetTwoInputNodeProtoInfo(smul_node_proto, smul_name, "Mul", fadd_name, std::to_string(smul_input_node_idx));
+
+  // Add tanh node
+  auto tanh_name = std::to_string(AllocateNodeIndex());
+  onnx::NodeProto *tanh_node_proto = graph_proto->add_node();
+  SetOneInputNodeProtoInfo(tanh_node_proto, tanh_name, "Tanh", smul_name);
+
+  // Add second Add node
+  // Add input node for second add node
+  auto sadd_input_node_idx = AllocateNodeIndex();
+  onnx::NodeProto *sadd_input_node_proto = graph_proto->add_node();
+  onnx::AttributeProto *sadd_input_attr_proto = sadd_input_node_proto->add_attribute();
+  onnx::TensorProto *sadd_input_tensor_proto = sadd_input_attr_proto->mutable_t();
+  SetConstantNodeProtoInfoForGeLU(sadd_input_node_proto, std::to_string(sadd_input_node_idx), sadd_input_attr_proto,
+                                  sadd_input_tensor_proto, "input_y_for_sadd", 1.0);
+  // Add second Add node
+  auto sadd_name = std::to_string(AllocateNodeIndex());
+  onnx::NodeProto *sadd_node_proto = graph_proto->add_node();
+  SetTwoInputNodeProtoInfo(sadd_node_proto, sadd_name, "Add", tanh_name, std::to_string(sadd_input_node_idx));
+
+  // Add third Mul node
+  // Add input node for third Mul node
+  auto tmul_input_node_idx = AllocateNodeIndex();
+  onnx::NodeProto *tmul_input_node_proto = graph_proto->add_node();
+  onnx::AttributeProto *tmul_input_attr_proto = tmul_input_node_proto->add_attribute();
+  onnx::TensorProto *tmul_input_tensor_proto = tmul_input_attr_proto->mutable_t();
+  SetConstantNodeProtoInfoForGeLU(tmul_input_node_proto, std::to_string(tmul_input_node_idx), tmul_input_attr_proto,
+                                  tmul_input_tensor_proto, "input_y_for_tmul", 0.5);
+  // Add third Mul Node
+  auto tmul_name = std::to_string(AllocateNodeIndex());
+  onnx::NodeProto *tmul_node_proto = graph_proto->add_node();
+  SetTwoInputNodeProtoInfo(tmul_node_proto, tmul_name, "Mul", sadd_name, std::to_string(tmul_input_node_idx));
+
+  // Add fourth Mul Node
+  auto fomul_node_idx = AllocateNodeIndex();
+  onnx::NodeProto *fomul_node_proto = graph_proto->add_node();
+  if (elem_type == kNumberTypeFloat16) {
+    fomul_node_proto->add_input(std::to_string(pre_cast_node_idx));
+  } else {
+    fomul_node_proto->add_input(input_x);
+  }
+  SetOneInputNodeProtoInfo(fomul_node_proto, std::to_string(fomul_node_idx), "Mul", tmul_name);
+
+  // if type is float16, add cast node cast output node from float16 to float32
+  if (elem_type == kNumberTypeFloat16) {
+    auto aft_cast_node_idx = AllocateNodeIndex();
+    (*node_map_ptr)[node] = aft_cast_node_idx;
+    onnx::NodeProto *aft_cast_node_proto = graph_proto->add_node();
+    onnx::AttributeProto *aft_cast_attr_proto = aft_cast_node_proto->add_attribute();
+    SetCastNodeProtoInfo(aft_cast_node_proto, std::to_string(aft_cast_node_idx), std::to_string(fomul_node_idx),
+                         aft_cast_attr_proto, onnx::TensorProto_DataType_FLOAT16);
+  } else {
+    (*node_map_ptr)[node] = fomul_node_idx;
+  }
+}
+
 void OnnxExporter::ExportPrimConcat(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                     std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
   auto input_data = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
@@ -936,10 +1293,10 @@ void OnnxExporter::ExportPrimConcat(const FuncGraphPtr &func_graph, const CNodeP
   (*node_map_ptr)[node] = node_idx;
   onnx::NodeProto *node_proto = graph_proto->add_node();
 
-  AnfNodePtr op = node->input(0);
+  AnfNodePtr op = node->input(kZeroNum);
   auto op_value = dyn_cast<ValueNode>(op);
   auto prim = dyn_cast<Primitive>(op_value->value());
-  auto input_node = node->input(1)->cast<CNodePtr>();
+  auto input_node = node->input(kOneNum)->cast<CNodePtr>();
 
   if (input_node->IsApply(prim::kPrimMakeTuple)) {
     node_proto->set_op_type("ConcatFromSequence");
@@ -957,8 +1314,8 @@ void OnnxExporter::ExportPrimConcat(const FuncGraphPtr &func_graph, const CNodeP
 
 void OnnxExporter::ExportPrimCast(const FuncGraphPtr &, const CNodePtr &node,
                                   std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
-  auto input_data = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
-  auto input_type = node->input(2);
+  auto input_data = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
+  auto input_type = node->input(kTwoNum);
 
   auto node_idx = AllocateNodeIndex();
   (*node_map_ptr)[node] = node_idx;
@@ -982,16 +1339,16 @@ void OnnxExporter::ExportPrimCast(const FuncGraphPtr &, const CNodePtr &node,
 
 void OnnxExporter::ExportPrimPReLU(const FuncGraphPtr &, const CNodePtr &node,
                                    std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
-  auto input_x = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
-  auto input_slope = GetNodeInputName(node->input(2), node_map_ptr, graph_proto);
+  auto input_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
+  auto input_slope = GetNodeInputName(node->input(kTwoNum), node_map_ptr, graph_proto);
 
-  auto x_shape = dyn_cast<abstract::Shape>(node->input(1)->Shape());
-  auto slope_shape = dyn_cast<abstract::Shape>(node->input(2)->Shape());
+  auto x_shape = dyn_cast<abstract::Shape>(node->input(kOneNum)->Shape());
+  auto slope_shape = dyn_cast<abstract::Shape>(node->input(kTwoNum)->Shape());
   MS_EXCEPTION_IF_NULL(x_shape);
   MS_EXCEPTION_IF_NULL(slope_shape);
 
   // format of x is NCHW, input format is NCHW, if length of input_slope is 1, insert Unsqueeze [1,2]
-  if (x_shape->shape().size() == 4 && slope_shape->shape().size() == 1) {
+  if (x_shape->shape().size() == kFourNum && slope_shape->shape().size() == kOneNum) {
     auto node_idx = AllocateNodeIndex();
     onnx::NodeProto *node_proto = graph_proto->add_node();
     node_proto->set_op_type("Unsqueeze");
@@ -1000,8 +1357,8 @@ void OnnxExporter::ExportPrimPReLU(const FuncGraphPtr &, const CNodePtr &node,
     onnx::AttributeProto *attr_proto = node_proto->add_attribute();
     attr_proto->set_type(onnx::AttributeProto_AttributeType_INTS);
     attr_proto->set_name("axes");
-    attr_proto->add_ints(1);
-    attr_proto->add_ints(2);
+    attr_proto->add_ints(kOneNum);
+    attr_proto->add_ints(kTwoNum);
 
     node_proto->add_input(input_slope);
     input_slope = std::to_string(node_idx);
@@ -1018,7 +1375,7 @@ void OnnxExporter::ExportPrimPReLU(const FuncGraphPtr &, const CNodePtr &node,
 
 void OnnxExporter::ExportPrimReLU6(const FuncGraphPtr &, const CNodePtr &node,
                                    std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
-  auto input_x = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
+  auto input_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
   auto node_idx = AllocateNodeIndex();
   (*node_map_ptr)[node] = node_idx;
   onnx::NodeProto *node_proto = graph_proto->add_node();
@@ -1038,16 +1395,16 @@ void OnnxExporter::ExportPrimReLU6(const FuncGraphPtr &, const CNodePtr &node,
 void OnnxExporter::ExportPrimDepthwiseConv2d(const FuncGraphPtr &, const CNodePtr &node,
                                              std::map<AnfNodePtr, size_t> *node_map_ptr,
                                              onnx::GraphProto *const graph_proto) {
-  auto input_x = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
-  auto input_w = GetNodeInputName(node->input(2), node_map_ptr, graph_proto);
-  auto x_shape = dyn_cast<abstract::Shape>(node->input(1)->Shape());
-  auto w_shape = dyn_cast<abstract::Shape>(node->input(2)->Shape());
+  auto input_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
+  auto input_w = GetNodeInputName(node->input(kTwoNum), node_map_ptr, graph_proto);
+  auto x_shape = dyn_cast<abstract::Shape>(node->input(kOneNum)->Shape());
+  auto w_shape = dyn_cast<abstract::Shape>(node->input(kTwoNum)->Shape());
   MS_EXCEPTION_IF_NULL(x_shape);
   MS_EXCEPTION_IF_NULL(w_shape);
-  if (x_shape->shape().size() != 4 || w_shape->shape().size() != 4) {
+  if (x_shape->shape().size() != kFourNum || w_shape->shape().size() != kFourNum) {
     MS_LOG(EXCEPTION) << "DepthwiseConv2d input shape should be 4d.";
   }
-  if (w_shape->shape()[0] != 1 && w_shape->shape()[1] != 1) {
+  if (w_shape->shape()[kZeroNum] != kOneNum && w_shape->shape()[kOneNum] != kOneNum) {
     MS_LOG(EXCEPTION) << "DepthwiseConv2d weight shape[0] != 1 and shape[1] != 1, cannot reshape";
   }
   // create w_shape constant node
@@ -1128,8 +1485,8 @@ void OnnxExporter::ExportPrimDepthwiseConv2d(const FuncGraphPtr &, const CNodePt
 
 void OnnxExporter::ExportPrimTile(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                   std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
-  auto name_x = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
-  auto multiples = node->input(2);
+  auto name_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
+  auto multiples = node->input(kTwoNum);
   std::string name_multiples;
   if (multiples->isa<ValueNode>()) {
     auto const_node_idx = AllocateNodeIndex();
@@ -1137,11 +1494,9 @@ void OnnxExporter::ExportPrimTile(const FuncGraphPtr &func_graph, const CNodePtr
     onnx::NodeProto *node_proto = graph_proto->add_node();
     name_multiples = std::to_string(const_node_idx);
     node_proto->add_output(name_multiples);
-
     node_proto->set_op_type("Constant");
     onnx::AttributeProto *attr_proto = node_proto->add_attribute();
-    attr_proto->set_name("repeat");
-
+    attr_proto->set_name("value");
     attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR);
     ConvertTupleToTensor(dyn_cast<ValueNode>(multiples)->value(), attr_proto->mutable_t());
   } else {
@@ -1160,7 +1515,7 @@ void OnnxExporter::ExportPrimTile(const FuncGraphPtr &func_graph, const CNodePtr
 
 void OnnxExporter::ExportPrimSquare(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                     std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
-  auto name_x = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
+  auto name_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
   std::string name_exponent;
   auto const_node_idx = AllocateNodeIndex();
   onnx::NodeProto *node_proto_exp = graph_proto->add_node();
@@ -1169,12 +1524,13 @@ void OnnxExporter::ExportPrimSquare(const FuncGraphPtr &func_graph, const CNodeP
 
   node_proto_exp->set_op_type("Constant");
   onnx::AttributeProto *attr_proto = node_proto_exp->add_attribute();
+  attr_proto->set_name("value");
   attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR);
   onnx::TensorProto *tensor_proto = attr_proto->mutable_t();
   tensor_proto->set_name("exponent");
   tensor_proto->add_dims(static_cast<::google::protobuf::int64>(1));
-  tensor_proto->set_data_type(onnx::TensorProto_DataType_INT64);
-  tensor_proto->add_int64_data(2);
+  tensor_proto->set_data_type(GetOnnxDataType(kNumberTypeFloat32));
+  tensor_proto->add_float_data(2.0);
 
   auto node_idx = AllocateNodeIndex();
   (*node_map_ptr)[node] = node_idx;
@@ -1187,10 +1543,9 @@ void OnnxExporter::ExportPrimSquare(const FuncGraphPtr &func_graph, const CNodeP
 
 void OnnxExporter::ExportPrimGatherV2(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                       std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
-  auto name_x = GetNodeInputName(node->input(1), node_map_ptr, graph_proto);
-  auto name_indices = GetNodeInputName(node->input(2), node_map_ptr, graph_proto);
-  auto axis = node->input(3)->cast<ValueNodePtr>()->value();
-
+  auto name_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto);
+  auto name_indices = GetNodeInputName(node->input(kTwoNum), node_map_ptr, graph_proto);
+  auto axis = node->input(kThreeNum)->cast<ValueNodePtr>()->value();
   auto node_idx = AllocateNodeIndex();
   (*node_map_ptr)[node] = node_idx;
   onnx::NodeProto *node_proto = graph_proto->add_node();
@@ -1199,6 +1554,7 @@ void OnnxExporter::ExportPrimGatherV2(const FuncGraphPtr &func_graph, const CNod
   node_proto->add_input(name_x);
   node_proto->add_input(name_indices);
   onnx::AttributeProto *attr_proto = node_proto->add_attribute();
+  attr_proto->set_name("axis");
   attr_proto->set_type(onnx::AttributeProto_AttributeType_INT);
   attr_proto->set_i(static_cast<::google::protobuf::int64>(dyn_cast<Int64Imm>(axis)->value()));
 }
@@ -1209,11 +1565,9 @@ void OnnxExporter::ExportCNode(const FuncGraphPtr &func_graph, const CNodePtr &n
   if (node->IsApply(prim::kPrimReshape)) {
     return ExportPrimReshape(func_graph, node, node_map_ptr, graph_proto);
   }
-
   if (node->IsApply(prim::kPrimReduceMean) || node->IsApply(prim::kPrimReduceSum)) {
     return ExportPrimReduce(func_graph, node, node_map_ptr, graph_proto);
   }
-
   if (node->IsApply(prim::kPrimTranspose)) {
     return ExportPrimTranspose(func_graph, node, node_map_ptr, graph_proto);
   }
@@ -1257,7 +1611,7 @@ void OnnxExporter::ExportCNode(const FuncGraphPtr &func_graph, const CNodePtr &n
     return ExportPrimSquare(func_graph, node, node_map_ptr, graph_proto);
   }
 
-  // MindSpore GatherV2(x, indices, axis) --> ONNX Pow(x, indices)
+  // MindSpore GatherV2(x, indices, axis) --> ONNX Gather(x, indices)
   if (node->IsApply(prim::kPrimGather)) {
     return ExportPrimGatherV2(func_graph, node, node_map_ptr, graph_proto);
   }
@@ -1267,7 +1621,7 @@ void OnnxExporter::ExportCNode(const FuncGraphPtr &func_graph, const CNodePtr &n
     MS_LOG(EXCEPTION) << "Inputs of apply node is empty";
   }
 
-  AnfNodePtr op = inputs[0];
+  AnfNodePtr op = inputs[kZeroNum];
   std::vector<AnfNodePtr> op_inputs;
   // first process node input 1,2,..., since when node input is a ValueNode, here need to create a Constant Operator
   for (size_t i = 1; i < inputs.size(); i++) {
@@ -1296,15 +1650,23 @@ size_t OnnxExporter::ExportPrimitive(const FuncGraphPtr &, std::map<AnfNodePtr,
     MS_LOG(EXCEPTION) << "Can not find key " << prim->name() << " in convert map. "
                       << "Exporting " << prim->name() << " operator is not yet supported.";
   }
+  // Get input first, because input maybe valuenode which need create constant node
+  std::vector<std::string> input_list;
+  for (const auto &input : inputs) {
+    auto input_name = GetNodeInputName(input, node_map_ptr, graph_proto);
+    input_list.push_back(input_name);
+  }
+
   const OpNameInfo &op_convert_info = op_iter->second;
   auto node_idx = AllocateNodeIndex();
   onnx::NodeProto *node_proto = graph_proto->add_node();
+  node_proto->set_name(std::to_string(node_idx) + op_convert_info.onnx_type());
   node_proto->add_output(std::to_string(node_idx));
   node_proto->set_op_type(op_convert_info.onnx_type());
 
   // Set inputs
-  for (const auto &input : inputs) {
-    auto input_name = GetNodeInputName(input, node_map_ptr, graph_proto);
+  for (const auto &input_name : input_list) {
+    // auto input_name = GetNodeInputName(input, node_map_ptr, graph_proto);
     node_proto->add_input(input_name);
   }
 
@@ -1327,24 +1689,24 @@ size_t OnnxExporter::ExportPrimitive(const FuncGraphPtr &, std::map<AnfNodePtr,
 
 void OnnxExporter::ExportMergeConv(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                    std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
-  auto conv_node = dyn_cast<CNode>(node->input(1));
-  auto input_x = conv_node->input(1);  // conv input x
-  auto input_w = conv_node->input(2);  // conv weight(filter)
-  auto input_b = node->input(2);       // conv bias
+  auto conv_node = dyn_cast<CNode>(node->input(kOneNum));
+  auto input_x = conv_node->input(kOneNum);  // conv input x
+  auto input_w = conv_node->input(kTwoNum);  // conv weight(filter)
+  auto input_b = node->input(kTwoNum);       // conv bias
 
-  PrimitivePtr prim_conv = dyn_cast<Primitive>((dyn_cast<ValueNode>(conv_node->input(0)))->value());
+  PrimitivePtr prim_conv = dyn_cast<Primitive>((dyn_cast<ValueNode>(conv_node->input(kZeroNum)))->value());
   std::vector<AnfNodePtr> inputs{input_x, input_w, input_b};
   (*node_map_ptr)[node] = ExportPrimitive(func_graph, node_map_ptr, prim_conv, inputs, graph_proto);
 }
 
 void OnnxExporter::ExportMergeGemm(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                    std::map<AnfNodePtr, size_t> *node_map_ptr, onnx::GraphProto *const graph_proto) {
-  auto matmul_node = dyn_cast<CNode>(node->input(1));
-  auto input_x = matmul_node->input(1);  // matmul input x
-  auto input_y = matmul_node->input(2);  // matmul input y
-  auto input_b = node->input(2);         // matmul bias
+  auto matmul_node = dyn_cast<CNode>(node->input(kOneNum));
+  auto input_x = matmul_node->input(kOneNum);  // matmul input x
+  auto input_y = matmul_node->input(kTwoNum);  // matmul input y
+  auto input_b = node->input(kTwoNum);         // matmul bias
 
-  PrimitivePtr prim_matmul = dyn_cast<Primitive>((dyn_cast<ValueNode>(matmul_node->input(0)))->value());
+  PrimitivePtr prim_matmul = dyn_cast<Primitive>((dyn_cast<ValueNode>(matmul_node->input(kZeroNum)))->value());
   std::vector<AnfNodePtr> inputs{input_x, input_y, input_b};
   (*node_map_ptr)[node] = ExportPrimitive(func_graph, node_map_ptr, prim_matmul, inputs, graph_proto);
 }
@@ -1352,9 +1714,9 @@ void OnnxExporter::ExportMergeGemm(const FuncGraphPtr &func_graph, const CNodePt
 void OnnxExporter::ExportMergeBatchNorm(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                         std::map<AnfNodePtr, size_t> *node_map_ptr,
                                         onnx::GraphProto *const graph_proto) {
-  auto batch_norm_node = dyn_cast<CNode>(node->input(1));
+  auto batch_norm_node = dyn_cast<CNode>(node->input(kOneNum));
 
-  PrimitivePtr prim_batch_norm = dyn_cast<Primitive>((dyn_cast<ValueNode>(batch_norm_node->input(0)))->value());
+  PrimitivePtr prim_batch_norm = dyn_cast<Primitive>((dyn_cast<ValueNode>(batch_norm_node->input(kZeroNum)))->value());
   std::vector<AnfNodePtr> inputs;
   for (size_t i = 1; i < batch_norm_node->inputs().size(); i++) {
     inputs.push_back(batch_norm_node->input(i));
@@ -1365,10 +1727,10 @@ void OnnxExporter::ExportMergeBatchNorm(const FuncGraphPtr &func_graph, const CN
 void OnnxExporter::ExportMergeMaxPoolWithArgmax(const FuncGraphPtr &func_graph, const CNodePtr &node,
                                                 std::map<AnfNodePtr, size_t> *node_map_ptr,
                                                 onnx::GraphProto *const graph_proto) {
-  auto maxpool_with_argmax_node = dyn_cast<CNode>(node->input(1));
+  auto maxpool_with_argmax_node = dyn_cast<CNode>(node->input(kOneNum));
 
   PrimitivePtr prim_maxpool_with_argmax =
-    dyn_cast<Primitive>((dyn_cast<ValueNode>(maxpool_with_argmax_node->input(0)))->value());
+    dyn_cast<Primitive>((dyn_cast<ValueNode>(maxpool_with_argmax_node->input(kZeroNum)))->value());
   std::vector<AnfNodePtr> inputs;
   for (size_t i = 1; i < maxpool_with_argmax_node->inputs().size(); i++) {
     inputs.push_back(maxpool_with_argmax_node->input(i));
@@ -1376,9 +1738,132 @@ void OnnxExporter::ExportMergeMaxPoolWithArgmax(const FuncGraphPtr &func_graph,
   (*node_map_ptr)[node] = ExportPrimitive(func_graph, node_map_ptr, prim_maxpool_with_argmax, inputs, graph_proto);
 }
 
+// LayerNorm(N, C1, H, W) --> reshape(1, C2, 1, W) + MeanVarianceNormalization + reshape(N, C1, H, W)
+void OnnxExporter::ExportMergeLayerNorm(const FuncGraphPtr &func_graph, const CNodePtr &node,
+                                        std::map<AnfNodePtr, size_t> *node_map_ptr,
+                                        onnx::GraphProto *const graph_proto) {
+  auto LayerNormNode = dyn_cast<CNode>(node->input(kOneNum));
+  auto layernorm_input_x = GetNodeInputName(LayerNormNode->input(kOneNum), node_map_ptr, graph_proto);
+  auto layernorm_input_gamma = GetNodeInputName(LayerNormNode->input(kTwoNum), node_map_ptr, graph_proto);
+  auto layernorm_input_beta = GetNodeInputName(LayerNormNode->input(kThreeNum), node_map_ptr, graph_proto);
+
+  auto layernorm_input_x_node = LayerNormNode->input(kOneNum);
+  auto dtype = layernorm_input_x_node->Type();
+  auto elem_type = dyn_cast<TensorType>(dtype)->element()->type_id();
+  auto pre_cast_node_idx = 0;
+
+  // if type is float16, add cast node cast type from float16 to float32
+  if (elem_type == kNumberTypeFloat16) {
+    pre_cast_node_idx = AllocateNodeIndex();
+    onnx::NodeProto *pre_cast_node_proto = graph_proto->add_node();
+    onnx::AttributeProto *pre_cast_attr_proto = pre_cast_node_proto->add_attribute();
+    SetCastNodeProtoInfo(pre_cast_node_proto, std::to_string(pre_cast_node_idx), layernorm_input_x, pre_cast_attr_proto,
+                         onnx::TensorProto_DataType_FLOAT);
+  }
+
+  // reshape before MeanVarianceNormalization
+  auto input_shape = dyn_cast<abstract::Shape>(LayerNormNode->input(kOneNum)->Shape());
+  std::vector<int64_t> new_input_shape;
+  int64_t n_shape = 1;
+  int64_t c_shape = 1;
+  int64_t h_shape = 1;
+  size_t input_shape_size = input_shape->shape().size();
+  for (size_t i = 0; i < input_shape_size - 1; i++) {
+    c_shape = c_shape * input_shape->shape()[i];
+  }
+  new_input_shape.push_back(n_shape);
+  new_input_shape.push_back(c_shape);
+  new_input_shape.push_back(h_shape);
+  new_input_shape.push_back(input_shape->shape()[input_shape_size - kOneNum]);
+
+  // Add shape node for reshape(before MeanVarianceNormalization)
+  auto new_shape_value = MakeValue<std::vector<int64_t>>(new_input_shape);
+  auto shape_node = NewValueNode(new_shape_value)->cast<AnfNodePtr>();
+  auto shape_node_idx = AllocateNodeIndex();
+
+  // (*node_map_ptr)[shape_node] = shape_node_idx;
+  onnx::NodeProto *shape_node_proto = graph_proto->add_node();
+  shape_node_proto->add_output(std::to_string(shape_node_idx));
+  shape_node_proto->set_op_type("Constant");
+  onnx::AttributeProto *shape_attr_proto = shape_node_proto->add_attribute();
+  shape_attr_proto->set_name("value");
+  shape_attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR);
+  ConvertTupleToTensor(dyn_cast<ValueNode>(shape_node)->value(), shape_attr_proto->mutable_t());
+
+  // Add reshape node before MeanVarianceNormalization
+  auto pre_reshape_node_idx = AllocateNodeIndex();
+  onnx::NodeProto *pre_reshape_node_proto = graph_proto->add_node();
+  pre_reshape_node_proto->set_op_type("Reshape");
+  pre_reshape_node_proto->add_output(std::to_string(pre_reshape_node_idx));
+  if (elem_type == kNumberTypeFloat16) {
+    pre_reshape_node_proto->add_input(std::to_string(pre_cast_node_idx));
+  } else {
+    pre_reshape_node_proto->add_input(layernorm_input_x);
+  }
+  pre_reshape_node_proto->add_input(std::to_string(shape_node_idx));
+
+  // MeanVarianceNormalization
+  auto meanvariancenormal_node_idx = AllocateNodeIndex();
+  onnx::NodeProto *meanvariancenormal_node_proto = graph_proto->add_node();
+  meanvariancenormal_node_proto->set_op_type("MeanVarianceNormalization");
+  meanvariancenormal_node_proto->add_output(std::to_string(meanvariancenormal_node_idx));
+  meanvariancenormal_node_proto->add_input(std::to_string(pre_reshape_node_idx));
+
+  // if cast type from float16 to float32, add cast node cast type from float32 to float16
+  auto aft_cast_node_idx = 0;
+  if (elem_type == kNumberTypeFloat16) {
+    aft_cast_node_idx = AllocateNodeIndex();
+    onnx::NodeProto *aft_cast_node_proto = graph_proto->add_node();
+    onnx::AttributeProto *aft_cast_attr_proto = aft_cast_node_proto->add_attribute();
+    SetCastNodeProtoInfo(aft_cast_node_proto, std::to_string(aft_cast_node_idx),
+                         std::to_string(meanvariancenormal_node_idx), aft_cast_attr_proto,
+                         onnx::TensorProto_DataType_FLOAT16);
+  }
+
+  // Add mul and add node
+  auto mul_node_idx = AllocateNodeIndex();
+  onnx::NodeProto *mul_node_proto = graph_proto->add_node();
+  mul_node_proto->set_op_type("Mul");
+  if (elem_type == kNumberTypeFloat16) {
+    mul_node_proto->add_input(std::to_string(aft_cast_node_idx));
+  } else {
+    mul_node_proto->add_input(std::to_string(meanvariancenormal_node_idx));
+  }
+  mul_node_proto->add_input(layernorm_input_gamma);
+  mul_node_proto->add_output(std::to_string(mul_node_idx));
+
+  // add beta
+  auto add_node_idx = AllocateNodeIndex();
+  onnx::NodeProto *add_node_proto = graph_proto->add_node();
+  SetTwoInputNodeProtoInfo(add_node_proto, std::to_string(add_node_idx), "Add", std::to_string(mul_node_idx),
+                           layernorm_input_beta);
+
+  // reshape after MeanVarianceNormalization
+  // Add shape node for reshape(after MeanVarianceNormalization)
+  auto output_shape_value = MakeValue<std::vector<int64_t>>(input_shape->shape());
+  auto output_shape_node = NewValueNode(output_shape_value)->cast<AnfNodePtr>();
+  auto output_shape_node_idx = AllocateNodeIndex();
+
+  onnx::NodeProto *output_shape_node_proto = graph_proto->add_node();
+  output_shape_node_proto->add_output(std::to_string(output_shape_node_idx));
+  output_shape_node_proto->set_op_type("Constant");
+  onnx::AttributeProto *output_shape_attr_proto = output_shape_node_proto->add_attribute();
+  output_shape_attr_proto->set_name("value");
+  output_shape_attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR);
+  ConvertTupleToTensor(dyn_cast<ValueNode>(output_shape_node)->value(), output_shape_attr_proto->mutable_t());
+  // Add reshape node after MeanVarianceNormalization
+  auto aft_reshape_node_idx = AllocateNodeIndex();
+  (*node_map_ptr)[node] = aft_reshape_node_idx;
+  onnx::NodeProto *aft_reshape_node_proto = graph_proto->add_node();
+  aft_reshape_node_proto->set_op_type("Reshape");
+  aft_reshape_node_proto->add_output(std::to_string(aft_reshape_node_idx));
+  aft_reshape_node_proto->add_input(std::to_string(add_node_idx));
+  aft_reshape_node_proto->add_input(std::to_string(output_shape_node_idx));
+}
+
 void OnnxExporter::ExportOutput(const FuncGraphPtr &, const CNodePtr &node, std::map<AnfNodePtr, size_t> *node_map_ptr,
                                 onnx::GraphProto *const graph_proto) {
-  if (node->inputs().size() != 2) {
+  if (node->inputs().size() != kTwoNum) {
     MS_LOG(EXCEPTION) << "Number of inputs of return node is not equal to 2.";
   }
   AnfNodePtr arg = node->input(1);
@@ -1416,7 +1901,6 @@ std::string OnnxExporter::GetNodeInputName(const AnfNodePtr &orig_node, std::map
 
     onnx::NodeProto *node_proto = graph_proto->add_node();
     node_proto->add_output(node_name);
-
     SetNodeAttribute(node->cast<ValueNodePtr>()->value(), node_proto);
 
     return node_name;
diff --git a/mindspore/ccsrc/transform/graph_ir/convert.h b/mindspore/ccsrc/transform/graph_ir/convert.h
index 00bde36780d..504bda92482 100644
--- a/mindspore/ccsrc/transform/graph_ir/convert.h
+++ b/mindspore/ccsrc/transform/graph_ir/convert.h
@@ -78,7 +78,8 @@ class DfGraphConvertor {
   void DrawComputeGraph(const std::string &name) {
     std::ofstream fout(name);
     if (!fout.is_open()) {
-      MS_LOG(ERROR) << "Open file '" << name << "' failed!";
+      MS_LOG(ERROR) << "Open file '" << name << "' failed!"
+                    << " Errno:" << errno << " ErrInfo:" << strerror(errno);
       return;
     }
     fout << compute_sout_.str();
@@ -87,7 +88,8 @@ class DfGraphConvertor {
   void DrawInitGraph(const std::string &name) {
     std::ofstream fout(name);
     if (!fout.is_open()) {
-      MS_LOG(ERROR) << "Open file '" << name << "' failed!";
+      MS_LOG(ERROR) << "Open file '" << name << "' failed!"
+                    << " Errno:" << errno << " ErrInfo:" << strerror(errno);
       return;
     }
     fout << init_sout_.str();
@@ -96,7 +98,8 @@ class DfGraphConvertor {
   void DrawSaveCheckpointGraph(const std::string &name) {
     std::ofstream fout(name);
     if (!fout.is_open()) {
-      MS_LOG(ERROR) << "Open file '" << name << "' failed!";
+      MS_LOG(ERROR) << "Open file '" << name << "' failed!"
+                    << " Errno:" << errno << " ErrInfo:" << strerror(errno);
       return;
     }
     fout << checkpoint_sout_.str();
diff --git a/mindspore/ccsrc/utils/context/graph_kernel_flags.cc b/mindspore/ccsrc/utils/context/graph_kernel_flags.cc
index b43049a8b3b..b522c010357 100644
--- a/mindspore/ccsrc/utils/context/graph_kernel_flags.cc
+++ b/mindspore/ccsrc/utils/context/graph_kernel_flags.cc
@@ -184,6 +184,7 @@ void GraphKernelFlags::RegisterFlags(std::map<std::string, std::string> *flag_ma
   reg.AddFlag("enable_stitch_fusion", &enable_stitch_fusion, opt_level == OptLevel_3);
   reg.AddFlag("enable_recompute_fusion", &enable_recompute_fusion, opt_level >= OptLevel_2);
   reg.AddFlag("enable_parallel_fusion", &enable_parallel_fusion, opt_level == OptLevel_3);
+  reg.AddFlag("enable_low_precision", &enable_low_precision);
 
   // Integer flags
   reg.AddFlag("online_tuning", &online_tuning);
@@ -211,6 +212,7 @@ std::string GraphKernelFlags::DumpAllFlags() const {
   json["enable_stitch_fusion"] = enable_stitch_fusion;
   json["enable_recompute_fusion"] = enable_recompute_fusion;
   json["enable_parallel_fusion"] = enable_parallel_fusion;
+  json["enable_low_precision"] = enable_low_precision;
 
   json["opt_level"] = opt_level;
   json["online_tuning"] = online_tuning;
diff --git a/mindspore/ccsrc/utils/context/graph_kernel_flags.h b/mindspore/ccsrc/utils/context/graph_kernel_flags.h
index 6be617452f2..7691609853f 100644
--- a/mindspore/ccsrc/utils/context/graph_kernel_flags.h
+++ b/mindspore/ccsrc/utils/context/graph_kernel_flags.h
@@ -79,6 +79,13 @@ class GraphKernelFlags {
    */
   bool enable_parallel_fusion;
 
+  /**
+   * Enable low precision in data transferring between graph kernel and computing in graph kernel
+   * in graph kernel.
+   * Experimental feature, enabled by the enable_low_precision flag
+   */
+  bool enable_low_precision;
+
   /**
    * Optimization level, value from 0 to 3.
    * 0: Disable GraphKernel
diff --git a/mindspore/ccsrc/utils/utils.h b/mindspore/ccsrc/utils/utils.h
index 7e884d52645..a4983b310a4 100644
--- a/mindspore/ccsrc/utils/utils.h
+++ b/mindspore/ccsrc/utils/utils.h
@@ -203,6 +203,7 @@ constexpr auto kSoftmaxGradExtOpName = "SoftmaxGradExt";
 constexpr auto kStridedReadOpName = "StridedRead";
 constexpr auto kStridedWriteOpName = "StridedWrite";
 constexpr auto kFusedAdamWeightDecayName = "FusedAdamWeightDecay";
+constexpr auto kAdamWeightDecayName = "AdamWeightDecay";
 constexpr auto kFusedAdamName = "FusedAdam";
 constexpr auto kFusedSparseAdamName = "FusedSparseAdam";
 constexpr auto kFusedMatMulBiasAddName = "FusedMatMulBiasAdd";
@@ -322,6 +323,7 @@ constexpr auto kAttrInputNames = "input_names";
 constexpr auto kAttrIsAICPUKernel = "is_AICPU_kernel";
 constexpr auto kIsBackendCast = "is_backed_cast";
 constexpr auto kAttrOutputNames = "output_names";
+constexpr auto kAttrAsync = "async";
 constexpr auto kAttrVisited = "visited";
 constexpr auto kAttrShape = "shape";
 constexpr auto kAttrMomentum = "momentum";
@@ -333,6 +335,7 @@ constexpr auto kAttrDataShape = "data_shape";
 constexpr auto kAttrFormat = "format";
 constexpr auto kAttrReshapeType = "reshape_type";
 constexpr auto kAttrAxis = "axis";
+constexpr auto kAttrAxes = "axes";
 constexpr auto kAttrKeepDims = "keep_dims";
 constexpr auto kAttrShapeGamma = "shape_gamma";
 constexpr auto kAttrPerm = "perm";
@@ -589,6 +592,7 @@ const std::set<std::string> kOptOperatorSet = {kMomentumOpName,
                                                kAdamApplyOneWithDecayOpName,
                                                kAdamApplyOneWithDecayAssignOpName,
                                                kFusedAdamWeightDecayName,
+                                               kAdamWeightDecayName,
                                                kFusedAdamName,
                                                kFusedSparseAdamName,
                                                kFusedMulApplyMomentumOpName,
@@ -628,6 +632,10 @@ const std::set<std::string> k3DFormatSet = {kOpFormat_NCDHW, kOpFormat_NDC1HWC0,
                                             kOpFormat_NDHWC, kOpFormat_DHWCN,    kOpFormat_DHWNC};
 
 const std::set<std::string> DynamicShapeConstInputToAttr = {
+  kCastOpName,       kExpandDimsOpName, kReshapeOpName,   kEmbeddingLookupOpName, kTransposeOpName, kReduceMinOpName,
+  kReduceMeanOpName, kReduceMaxOpName,  kReduceAllOpName, kReduceAnyOpName,       kConcatOpName};
+
+const std::set<std::string> DynamicShapeConstInputToAttrGPU = {
   kCastOpName,      kExpandDimsOpName, kReshapeOpName,   kEmbeddingLookupOpName, kTransposeOpName, kReduceSumOpName,
   kReduceMinOpName, kReduceMeanOpName, kReduceMaxOpName, kReduceAllOpName,       kReduceAnyOpName, kConcatOpName};
 
diff --git a/mindspore/ccsrc/vm/transform.cc b/mindspore/ccsrc/vm/transform.cc
index 374685aa085..4a363b65cbb 100644
--- a/mindspore/ccsrc/vm/transform.cc
+++ b/mindspore/ccsrc/vm/transform.cc
@@ -388,6 +388,13 @@ int64_t CompileGraph::AddCall(const FuncGraphPtr &graph, const CNodePtr &node) {
   MS_LOG(DEBUG) << "Call:" << Ref(fn) << ", " << height_ << ", " << (size - 1);
   AddInst(Instruction::kCall, Ref(fn));
   Ret(static_cast<int64_t>(size - 1));
+
+  for (size_t i = size - 1; i > 0; i--) {
+    const auto iter = slots_.find(inputs[i]);
+    if (iter != slots_.end() && iter->second >= height_) {
+      slots_.erase(inputs[i]);
+    }
+  }
   return RET_SUCCESS;
 }
 
diff --git a/mindspore/common/parameter.py b/mindspore/common/parameter.py
index 496c94e4148..018ebaf5190 100644
--- a/mindspore/common/parameter.py
+++ b/mindspore/common/parameter.py
@@ -136,7 +136,6 @@ class Parameter(Tensor_):
 
     def __init__(self, default_input, name=None, requires_grad=True, layerwise_parallel=False, parallel_optimizer=True):
         self.param_info = ParamInfo()
-        self.init_param_info = True
         self.init_in_server = False
         self.cache_enable = False
         self.name = name
@@ -152,6 +151,7 @@ class Parameter(Tensor_):
         self.is_param_ps = False
         self.push_weight_to_server = False
         self.pull_weight_from_server = False
+        self.requires_aggr = True
         self._cast_type = None
         self._unique = False
         self.is_in_parallel = _is_in_parallel_mode()
@@ -236,18 +236,22 @@ class Parameter(Tensor_):
         self.init_in_server = init_in_server
         self.param_info.init_in_server = init_in_server
 
-    def set_param_fl(self, push_to_server=False, pull_from_server=False):
+    def set_param_fl(self, push_to_server=False, pull_from_server=False, requires_aggr=True):
         """
         Set the way of parameter and server interaction.
 
         Args:
             push_to_server (bool): Whether the parameter should be pushed to server. Default: False.
             pull_from_server (bool): Whether the parameter should be pulled from server. Default: False.
+            requires_aggr (bool): Whether the parameter should be aggregated in the server. Default: True.
         """
         if push_to_server:
             self.push_weight_to_server = True
         if pull_from_server:
             self.pull_weight_from_server = True
+        if not requires_aggr:
+            self.requires_aggr = False
+            self.param_info.requires_aggr = False
 
     @property
     def inited_param(self):
@@ -376,6 +380,7 @@ class Parameter(Tensor_):
         x.is_param_ps = self.is_param_ps
         x.init_in_server = self.init_in_server
         x.cache_enable = self.cache_enable
+        x.requires_aggr = self.requires_aggr
         if self.cache_shape:
             x.cache_shape = self.cache_shape
         if init != 'same':
@@ -581,11 +586,6 @@ class Parameter(Tensor_):
         obj.sliced = set_sliced
         return obj
 
-    def __del__(self):
-        if hasattr(self, "init_param_info"):
-            if self.init_param_info is True and context.get_context("mode") == context.GRAPH_MODE:
-                self.param_info = None
-
 
 class ParameterTuple(tuple):
     """
diff --git a/mindspore/common/seed.py b/mindspore/common/seed.py
index 7839cbe1cc5..84157a2771c 100644
--- a/mindspore/common/seed.py
+++ b/mindspore/common/seed.py
@@ -59,10 +59,8 @@ def set_seed(seed):
     Examples:
         >>> import numpy as np
         >>> import mindspore.ops as ops
-        >>> from mindspore import Tensor
-        >>> from mindspore.common import set_seed
+        >>> from mindspore import Tensor, set_seed, Parameter
         >>> from mindspore.common.initializer import initializer
-        >>> from mindspore.common.parameter import Parameter
         >>>
         >>> # Note: (1) Please make sure the code is running in PYNATIVE MODE;
         >>> # (2) Because Composite-level ops need parameters to be Tensors, for below examples,
diff --git a/mindspore/common/tensor.py b/mindspore/common/tensor.py
index 80a32b93a89..12b11905f87 100644
--- a/mindspore/common/tensor.py
+++ b/mindspore/common/tensor.py
@@ -40,13 +40,13 @@ class Tensor(Tensor_):
         input_data (Union[Tensor, float, int, bool, tuple, list, numpy.ndarray]): Input data of the tensor.
         dtype (:class:`mindspore.dtype`): Input data should be None, bool or numeric type defined in `mindspore.dtype`.
             The argument is used to define the data type of the output tensor. If it is None, the data type of the
-            output tensor will be as same as the `input_data`. Default: None.
+            output tensor will be the same as the `input_data`. Default: None.
         shape (Union[tuple, list, int]): A list of integers, a tuple of integers or an integer as the shape of
             output. If `input_data` is available, `shape` doesn't need to be set. Default: None.
         init (Initializer): the information of init data.
             'init' is used for delayed initialization in parallel mode. Usually, it is not recommended to use
             'init' interface to initialize parameters in other conditions. If 'init' interface is used to initialize
-            parameters, the `Tensor.init_data` API need to be called to convert `Tensor` to the actual data.
+            parameters, the `Tensor.init_data` API needs to be called to convert `Tensor` to the actual data.
 
     Outputs:
         Tensor. If `dtype` and `shape` are not set, return a tensor with the same dtype and shape as `input_data`.
@@ -425,12 +425,12 @@ class Tensor(Tensor_):
 
         Args:
             axis (Union[None, int, tuple(int)): Dimensions of reduction,
-                when axis is None or empty tuple, reduce all dimensions. Default: ().
+                when the axis is None or empty tuple, reduce all dimensions. Default: ().
             keep_dims (bool): Whether to keep the reduced dimensions. Default: False.
 
         Returns:
             Tensor, if all array elements along the given axis evaluate to True, its value is True,
-            otherwise its value is False. If axis is None or empty tuple, reduce all dimensions.
+            otherwise its value is False. If the axis is None or empty tuple, reduce all dimensions.
 
         Supported Platforms:
             ``Ascend`` ``GPU`` ``CPU``
@@ -454,12 +454,12 @@ class Tensor(Tensor_):
 
         Args:
             axis (Union[None, int, tuple(int)): Dimensions of reduction,
-                when axis is None or empty tuple, reduce all dimensions. Default: ().
+                when the axis is None or empty tuple, reduce all dimensions. Default: ().
             keep_dims (bool): Whether to keep the reduced dimensions. Default: False.
 
         Returns:
             Tensor, if any array element along the given axis evaluates to True, its value is True,
-            otherwise its value is False. If axis is None or empty tuple, reduce all dimensions.
+            otherwise its value is False. If the axis is None or empty tuple, reduce all dimensions.
 
         Supported Platforms:
             ``Ascend`` ``GPU`` ``CPU``
@@ -536,7 +536,7 @@ class Tensor(Tensor_):
 
         Args:
             axis (Union[None, int, tuple(int), list(int)]): Dimensions of reduction,
-                when axis is None or empty tuple, reduce all dimensions. Default: ().
+                when the axis is None or empty tuple, reduce all dimensions. Default: ().
             keep_dims (bool): Whether to keep the reduced dimensions. Default: False.
 
         Returns:
@@ -772,7 +772,7 @@ class Tensor(Tensor_):
 
     def astype(self, dtype, copy=True):
         """
-        Return a copy of the tensor, casted to a specified type.
+        Return a copy of the tensor, cast to a specified type.
 
         Args:
             dtype (Union[:class:`mindspore.dtype`, str]): Designated tensor dtype, can be in format
@@ -818,7 +818,7 @@ class Tensor(Tensor_):
             shape as self.shape with the dimension along axis removed.
 
         Raises:
-            ValueError: if axis is out of range.
+            ValueError: if the axis is out of range.
 
         Supported Platforms:
             ``Ascend`` ``GPU`` ``CPU``
@@ -852,7 +852,7 @@ class Tensor(Tensor_):
             shape as self.shape with the dimension along axis removed.
 
         Raises:
-            ValueError: if axis is out of range.
+            ValueError: if the axis is out of range.
 
         Supported Platforms:
             ``Ascend`` ``GPU`` ``CPU``
@@ -890,7 +890,7 @@ class Tensor(Tensor_):
                 In that case, :class:`float32` is used. Default: None.
 
         Raises:
-            ValueError: if axis is out of range.
+            ValueError: if the axis is out of range.
 
         Returns:
             Tensor.
@@ -1024,7 +1024,7 @@ class Tensor(Tensor_):
                 is passed, initial must also be provided. Default: True.
 
         Returns:
-            Tensor or scalar, minimum of input tensor. If axis is None, the result is a scalar
+            Tensor or scalar, minimum of input tensor. If the axis is None, the result is a scalar
             value. If `axis` is given, the result is an array of dimension ``self.ndim - 1``.
 
         Raises:
@@ -1533,7 +1533,7 @@ class Tensor(Tensor_):
 
         Args:
             choices (Union[tuple, list, Tensor]): Choice arrays. `a` and all of the `choices` must
-                be broadcastable to the same shape. If `choices` is itself an array, then
+                be broadcasted to the same shape. If `choices` is itself an array, then
                 its outermost dimension (i.e., the one corresponding to ``choices.shape[0]``)
                 is taken as defining the “sequence”.
             mode (‘raise’, ‘wrap’, ‘clip’, optional): Specifies how indices outside
@@ -1764,8 +1764,8 @@ class Tensor(Tensor_):
         Args:
             axis (Union[None, int, tuple(int)]): Axis or axes along which a sum is performed. Default: None.
                 If None, sum all of the elements of the input array.
-                If axis is negative it counts from the last to the first axis.
-                If axis is a tuple of ints, a sum is performed on all of the axes specified in the tuple
+                If the axis is negative, it counts from the last to the first axis.
+                If the axis is a tuple of ints, a sum is performed on all of the axes specified in the tuple
                 instead of a single axis or all the axes as before.
             dtype (:class:`mindspore.dtype`, optional): defaults to None. Overrides the dtype of the
                 output Tensor.
@@ -1778,7 +1778,7 @@ class Tensor(Tensor_):
 
         Returns:
             Tensor. A tensor with the same shape as input, with the specified axis removed.
-            If input tensor is a 0-d array, or if axis is None, a scalar is returned.
+            If input tensor is a 0-d array, or if the axis is None, a scalar is returned.
 
         Raises:
             TypeError: If input is not array_like, or `axis` is not int or tuple of ints,
@@ -1798,7 +1798,8 @@ class Tensor(Tensor_):
             >>> print(input_x.sum(axis=1))
             [10. 35.]
         """
-        dtype = self.dtype if dtype is None else dtype
+        input_x = self.astype(mstype.int32) if self.dtype == mstype.bool_ else self
+        dtype = input_x.dtype if dtype is None else dtype
         if not isinstance(keepdims, int):
             raise TypeError(f"integer argument expected, but got {type(keepdims)}")
         if initial is not None and not isinstance(initial, (int, float, bool)):
@@ -1808,7 +1809,9 @@ class Tensor(Tensor_):
         else:
             axis = validator.check_and_canonicalize_axes(axis, self.ndim)
 
-        input_x = self.astype(mstype.int32) if self.dtype == mstype.bool_ else self
+        if not validator.check_type_support(input_x.dtype, 'GPU',
+                                            (mstype.float64, mstype.float32, mstype.float16)):
+            input_x = input_x.astype(mstype.float32)
         if 0 in self.shape:
             input_x = tensor_operator_registry.get('make_tensor')([0], self.dtype)
         res = tensor_operator_registry.get('sum')(bool(keepdims))(input_x, axis)
@@ -1830,7 +1833,7 @@ class Tensor(Tensor_):
             Tensor, has the same shape as input tensor except along the given axis.
 
         Raises:
-            ValueError: if axis is out of range.
+            ValueError: if the axis is out of range.
             TypeError: if arguments have types not specified above.
 
         Supported Platforms:
diff --git a/mindspore/context.py b/mindspore/context.py
index bcccdd44bf4..c6262fd4e79 100644
--- a/mindspore/context.py
+++ b/mindspore/context.py
@@ -353,11 +353,11 @@ def set_auto_parallel_context(**kwargs):
 
     Note:
         Attribute name is required for setting attributes.
-        If a program has tasks with different parallel modes, then before setting new parallel mode for the
-        next task, interface mindspore.context.reset_auto_parallel_context() needs to be called to reset
+        If a program has tasks on different parallel modes, before setting a new parallel mode for the
+        next task, interface mindspore.context.reset_auto_parallel_context() should be called to reset
         the configuration.
-        Setting or changing parallel modes must be called before any creating Initializer, otherwise,
-        RuntimeError may be raised when compiling the network.
+        Setting or changing parallel modes must be called before creating any Initializer, otherwise,
+        it may have RuntimeError when compiling the network.
 
     Some configurations are parallel mode specific, see the below table for details:
 
@@ -410,7 +410,7 @@ def set_auto_parallel_context(**kwargs):
         strategy_ckpt_load_file (str): The path to load parallel strategy checkpoint. Default: ''
         strategy_ckpt_save_file (str): The path to save parallel strategy checkpoint. Default: ''
         full_batch (bool): If you load whole batch datasets in auto_parallel mode, this parameter
-                       should be set with True. Default: False.
+                       should be set as True. Default: False.
         enable_parallel_optimizer (bool): This is a developing feature, which shards the weight update computation for
                        data parallel training in the benefit of time and memory saving. Currently, auto and semi auto
                        parallel mode support all optimizers in both Ascend and GPU. Data parallel mode only supports
@@ -419,7 +419,7 @@ def set_auto_parallel_context(**kwargs):
                        and HCCL_WORLD_GROUP/NCCL_WORLD_GROUP. No Default, if it is not set, the fusion is closed.
         pipeline_stages (int): Set the stage information for pipeline parallel. This indicates how
                         the devices are distributed alone the pipeline. The total devices will be divided into
-                        'pipeline_stags' stages. This currently could only be used when
+                        'pipeline_stags' stages. Currently this could only be used when
                         parallel mode semi_auto_parallel is enabled. Default: 1.
         grad_accumulation_step (int): Set the accumulation steps of gradients in auto and semi auto parallel mode.
                         This should be a positive int. Default: 1.
@@ -520,14 +520,14 @@ def set_context(**kwargs):
     Set context for running environment.
 
     Context should be configured before running your program. If there is no configuration,
-    it will automatic acquisition according to device target by default. GRAPH_MODE or
+    it will be automatically obtained according to the device target by default. GRAPH_MODE or
     PYNATIVE_MODE can be set by `mode` attribute and both modes support all backends, default
     mode is GRAPH_MODE.
 
-    When the `save_graphs` attribute is set to True, attribute of `save_graphs_path` is used to set the
+    When the `save_graphs` attribute is set as True, attribute of `save_graphs_path` is used to set the
     intermediate compilation graph storage path. By default, the graphs are saved in the current directory.
     For other configurations and arguments, please refer to the corresponding module
-    description, the configuration is optional and can be enabled when needed.
+    description. Additionally, the configuration is optional and can be enabled when needed.
 
     Note:
         Attribute name is required for setting attributes.
@@ -579,7 +579,7 @@ def set_context(**kwargs):
               equivalently by setting opt_level greater than 0.
             - dump_as_text: dump detail info as text files. Default: false.
 
-            More options can be referred from the implementation code.
+            More options can refer to the implementation code.
             These options can also be set by environment variable `MS_GRAPH_KERNEL_FLAGS`, without modifying
             network source code. For example, `export MS_GRAPH_KERNEL_FLAGS="--opt_level=2 --dump_as_text"`.
         reserve_class_name_in_scope (bool) : Whether to save the network class name in the scope. Default: True.
@@ -597,15 +597,15 @@ def set_context(**kwargs):
         profiling_options (str): Set profiling collection options, operators can profiling data here.
             The values of profiling collection options are as follows, supporting the collection of multiple data.
 
-            - output: the saving the path of the profiling collection result file. The directory spectified by this
-              parameter needs to be created in advance on the training environment (container or host side) and ensure
+            - output: The saving path of the profiling collection result. The directory specified by this
+              parameter should be created in advance in the training environment (container or host side) and ensure
               that the running user configured during installation has read and write permissions.It supports the
               configuration of absolute or relative paths(relative to the current path when executing the command line).
               The absolute path configuration starts with '/', for example:/home/data/output.
-              The relative path configuration directly starts with the directory name,for example:output.
+              The relative path configuration starts with the directory name,for example:output.
 
             - training_trace: collect iterative trajectory data, that is, the training task and software information of
-              the AI software stack, to achieve performance analysis of the training task, focusing on data
+              the AI software stack, to realize performance analysis of the training task, focusing on data
               enhancement, forward and backward calculation, gradient aggregation update and other related data.
               The value is on/off.
 
@@ -640,11 +640,11 @@ def set_context(**kwargs):
         max_device_memory (str): Sets the maximum memory available for devices.
             Currently, it is only supported on GPU. The format is "xxGB". Default: "1024GB".
         print_file_path (str): The path of saving print data. If this parameter is set, print data is saved to
-            a file by default, and turns off printing to the screen. If the file already exists, add a timestamp
+            a file by default, and turns off printing to the screen. If the file exists already, add a timestamp
             suffix to the file. Default: ''.
         enable_sparse (bool): Whether to enable sparsity feature. Default: False.
             For details of sparsity and sparse tensor, please check
-            `<https://www.mindspore.cn/docs/programming_guide/zh-CN/master/tensor.html>`_.
+            `<https://www.mindspore.cn/doc/programming_guide/zh-CN/master/tensor.html>`_.
         max_call_depth (int): Specify the maximum depth of function call. Must be positive integer. Default: 1000.
         env_config_path (str): Config path for DFX.
         auto_tune_mode (str): The mode of auto tune when op building, get the best tiling performance,
@@ -652,7 +652,7 @@ def set_context(**kwargs):
             RL: rl_tune;
             GA: ga_tune;
             RL,GA: rl_tune/ga_tune(Automatic selection).
-            - rl_tune: Reinforecement Learning tune.
+            - rl_tune: Reinforcement Learning tune.
             - ga_tune: Genetic Algorithm tune.
         grad_for_scalar (bool): Whether to get gradient for scalar. If set, the gradient of scalar input parameter
             can be calculated. Now, only part of the scalar operators support this calculation. Default: False.
@@ -660,8 +660,8 @@ def set_context(**kwargs):
             This is an experimental prototype that is subject to change and/or deletion.
         load_compile_cache (bool): Whether to use the cache of the graph compiled by frontend.
             When it is true, the graph compilation will skip the frontend compilation process. It means that
-            you should make sure the network has not been changed since the last execution. Currently we have
-            not support automatic checking the changes yet. Default: False.
+            you should make sure the network has not been changed since the last execution. By now, we have
+            not support automatically checking the changes yet. Default: False.
             This is an experimental prototype that is subject to change and/or deletion.
 
     Raises:
@@ -715,7 +715,7 @@ def set_context(**kwargs):
 def get_context(attr_key):
     """
     Get context attribute value according to the input key.
-    If some attribute are not set, it will be automatically obtained.
+    If some attributes are not set, they will be automatically obtained.
 
     Args:
         attr_key (str): The key of the attribute.
diff --git a/mindspore/core/abstract/abstract_value.cc b/mindspore/core/abstract/abstract_value.cc
index e6c81dc8268..4f93df83a16 100644
--- a/mindspore/core/abstract/abstract_value.cc
+++ b/mindspore/core/abstract/abstract_value.cc
@@ -271,10 +271,14 @@ const AbstractBasePtr AbstractSequeue::operator[](const std::size_t &dim) const
 
 std::string AbstractSequeue::ToString() const {
   std::ostringstream buffer;
-  int64_t i = 0;
+  size_t i = 0;
+  size_t size = elements_.size();
   for (const auto &ele : elements_) {
     MS_EXCEPTION_IF_NULL(ele);
-    buffer << "element[" << i << "]: " << ele->ToString() << ",";
+    buffer << "element[" << i << "]: " << ele->ToString();
+    if (i < size - 1) {
+      buffer << ", ";
+    }
     i++;
   }
   return buffer.str();
diff --git a/mindspore/core/abstract/analysis_context.cc b/mindspore/core/abstract/analysis_context.cc
index 99facd66845..561fa777a43 100644
--- a/mindspore/core/abstract/analysis_context.cc
+++ b/mindspore/core/abstract/analysis_context.cc
@@ -23,6 +23,7 @@
 
 namespace mindspore {
 namespace abstract {
+std::list<AnalysisContextPtr> AnalysisContext::all_context_;
 AnalysisContextPtr AnalysisContext::NewContext(const FuncGraphPtr &func_graph,
                                                const AbstractBasePtrList &args_spec_list) {
   // Find func graph's parent and its parent context firstly.
@@ -56,7 +57,7 @@ AnalysisContextPtr AnalysisContext::NewContext(const FuncGraphPtr &func_graph,
   }
 
   // Create a new context for the func graph and its specific arguments.
-  AnalysisContextPtr new_context = std::make_shared<AnalysisContext>(parent_context, func_graph, args_spec_list);
+  AnalysisContextPtr new_context = CreateContext(parent_context, func_graph, args_spec_list);
   // To avoid cycle-reference, use weak_ptr here.
   auto weak_new_context = std::weak_ptr<AnalysisContext>(new_context);
   new_context->extant_context_cache_[func_graph] = weak_new_context;
@@ -102,7 +103,7 @@ AnalysisContextPtr AnalysisContext::FindOwnOrParentContext(const FuncGraphPtr &f
 }
 
 AnalysisContextPtr AnalysisContext::DummyContext() {
-  AnalysisContextPtr dummy_context = std::make_shared<AnalysisContext>(nullptr, nullptr, AbstractBasePtrList());
+  AnalysisContextPtr dummy_context = CreateContext(nullptr, nullptr, AbstractBasePtrList());
   dummy_context->extant_context_cache_[nullptr] = std::weak_ptr<AnalysisContext>(dummy_context);
   return dummy_context;
 }
@@ -112,7 +113,7 @@ bool AnalysisContext::IsDummyContext() {
 }
 
 const AnalysisContextPtr kDummyAnalysisContext =
-  std::make_shared<AnalysisContext>(nullptr, nullptr, AbstractBasePtrList());
+  AnalysisContext::CreateContext(nullptr, nullptr, AbstractBasePtrList());
 
 bool AnalysisContext::operator==(const AnalysisContext &other) const {
   if (func_graph_ != other.func_graph_) {
@@ -174,7 +175,7 @@ AnalysisContextPtr AnalysisContext::SpecializeKey() const {
                          }
                          return arg;
                        });
-  AnalysisContextPtr context_new = std::make_shared<AnalysisContext>(nullptr, func_graph_, args_broad_shp);
+  AnalysisContextPtr context_new = CreateContext(nullptr, func_graph_, args_broad_shp);
   context_new->parent_ = parent_;
   return context_new;
 }
@@ -209,5 +210,23 @@ std::string AnalysisContext::ToString() const {
   buffer << "}";
   return buffer.str();
 }
+
+void AnalysisContext::ClearContext() {
+  for (auto &item : all_context_) {
+    item->parent_ = nullptr;
+    item->func_graph_ = nullptr;
+    item->args_spec_list_.clear();
+    item->extant_context_cache_.clear();
+    item->children_cache_.clear();
+  }
+  all_context_.clear();
+}
+
+AnalysisContextPtr AnalysisContext::CreateContext(const AnalysisContextPtr &parent, const FuncGraphPtr &fg,
+                                                  const AbstractBasePtrList &args_spec_list) {
+  auto context = std::make_shared<AnalysisContext>(parent, fg, args_spec_list);
+  all_context_.emplace_back(context);
+  return context;
+}
 }  // namespace abstract
 }  // namespace mindspore
diff --git a/mindspore/core/abstract/analysis_context.h b/mindspore/core/abstract/analysis_context.h
index e097888ebc7..926697b5759 100644
--- a/mindspore/core/abstract/analysis_context.h
+++ b/mindspore/core/abstract/analysis_context.h
@@ -22,6 +22,7 @@
 #include <memory>
 #include <string>
 #include <unordered_map>
+#include <list>
 
 #include "abstract/abstract_value.h"
 #include "ir/meta_func_graph.h"
@@ -42,7 +43,6 @@ class AnalysisContext {
       extant_context_cache_ = parent_->extant_context_cache_;
     }
   }
-
   ~AnalysisContext() = default;
 
   // Extend this context with values for another graph.
@@ -59,6 +59,9 @@ class AnalysisContext {
   std::string ToString() const;
   AnalysisContextPtr SpecializeKey() const;
   AbstractBasePtrList args_spec_list() { return args_spec_list_; }
+  static void ClearContext();
+  static AnalysisContextPtr CreateContext(const AnalysisContextPtr &parent, const FuncGraphPtr &fg,
+                                          const AbstractBasePtrList &args_spec_list);
 
  private:
   AnalysisContextPtr parent_;
@@ -70,6 +73,11 @@ class AnalysisContext {
   // Record all created child contexts from this context.
   // Like: key: [func_graph & arguments], value: [child_context]
   std::unordered_map<FuncGraphPtr, ArgsSpecToAnalysisContextMap> children_cache_;
+
+  // There may may be shared_ptr loop like:
+  // FuncGraphAbstactClosur->AnalysisContext->children_cache_->ArgsSpec->FuncGraphAbstactClosur.
+  // For break the loop, using all_context_ to clear context_.
+  static std::list<AnalysisContextPtr> all_context_;
 };
 
 struct ContextHasher {
diff --git a/mindspore/core/abstract/prim_arrays.cc b/mindspore/core/abstract/prim_arrays.cc
index 4b5aefeac1a..9c72ad800f2 100644
--- a/mindspore/core/abstract/prim_arrays.cc
+++ b/mindspore/core/abstract/prim_arrays.cc
@@ -140,7 +140,7 @@ AbstractBasePtr InferImplUnique(const AnalysisEnginePtr &, const PrimitivePtr &p
 
   auto shape = input->shape();
   MS_EXCEPTION_IF_NULL(shape);
-  if (shape->shape().empty()) {
+  if (shape->shape().size() != 1) {
     MS_LOG(EXCEPTION) << "Rank of " << op_name << "'s input must be 1.";
   }
   ShapeVector ids_shape = {Shape::SHP_ANY};
diff --git a/mindspore/core/abstract/prim_structures.cc b/mindspore/core/abstract/prim_structures.cc
index a94311edd40..fd429717c0e 100644
--- a/mindspore/core/abstract/prim_structures.cc
+++ b/mindspore/core/abstract/prim_structures.cc
@@ -318,8 +318,11 @@ AbstractBasePtr InferImplListAppend(const AnalysisEnginePtr &, const PrimitivePt
   const std::string op_name = primitive->name();
   CheckArgsSize(op_name, args_spec_list, 2);
   AbstractListPtr list = CheckArg<AbstractList>(op_name, args_spec_list, 0);
-  (void)AbstractJoin(list->elements());
-  return list;
+  AbstractBasePtr item = dyn_cast<AbstractBase>(args_spec_list[1]);
+  MS_EXCEPTION_IF_NULL(item);
+  auto new_list = AbstractBasePtrList(list->elements());
+  new_list.emplace_back(item);
+  return std::make_shared<AbstractList>(new_list);
 }
 
 AbstractBasePtr InferImplTupleLen(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
diff --git a/mindspore/core/abstract/primitive_infer_map.cc b/mindspore/core/abstract/primitive_infer_map.cc
index ce46a71137f..f56fbd7ba80 100644
--- a/mindspore/core/abstract/primitive_infer_map.cc
+++ b/mindspore/core/abstract/primitive_infer_map.cc
@@ -31,13 +31,16 @@
 #include "ops/mul.h"
 #include "ops/sub.h"
 #include "ops/strided_slice.h"
+#include "ops/reduce_sum.h"
 #include "abstract/abstract_function.h"
 #include "abstract/infer_functions.h"
+#include "utils/ms_context.h"
 #include "ops/tile.h"
 
 namespace mindspore {
 namespace abstract {
 std::vector<int64_t> GetDependsFormMap(const CNodePtr &cnode) {
+  const auto kReduceSum = prim::kPrimReduceSum->name();
   const auto kUnsortedSegmentSum = prim::kPrimUnsortedSegmentSum->name();
   const auto kUnsortedSegmentMin = prim::kPrimUnsortedSegmentMin->name();
   const auto kUnsortedSegmentMax = prim::kPrimUnsortedSegmentMax->name();
@@ -49,6 +52,13 @@ std::vector<int64_t> GetDependsFormMap(const CNodePtr &cnode) {
     {kUnsortedSegmentSum, {2}}, {kUnsortedSegmentMin, {2}}, {kUnsortedSegmentMax, {2}}, {kGather, {2}},
     {kGatherV2, {2}},           {kDynamicShape, {0}},       {kRange, {0, 1, 2}},
   };
+  auto ms_context = MsContext::GetInstance();
+  MS_EXCEPTION_IF_NULL(ms_context);
+  auto device = ms_context->get_param<std::string>(MS_CTX_DEVICE_TARGET);
+  if (device == kAscendDevice) {
+    dynamic_shape_depends.insert({kReduceSum, {1}});
+  }
+
   MS_EXCEPTION_IF_NULL(cnode);
   if (cnode->inputs().empty()) {
     MS_LOG(EXCEPTION) << "Invalid inputs";
@@ -194,7 +204,7 @@ PrimitiveEvalImplMap &GetPrimitiveToBackendEvalImplMap() {
     {prim::kPrimNotEqual, {ops::NotEqualInfer, nullptr, true}},
     {prim::kPrimLog, {ops::LogInfer, nullptr, true}},
     {prim::kPrimReciprocal, {ops::ReciprocalInfer, nullptr, true}},
-    {prim::kPrimReduceSum, {InferImplReduceFunc, nullptr, true}},
+    {prim::kPrimReduceSum, {ops::ReduceSumInfer, nullptr, true}},
     {prim::kPrimReduceMean, {InferImplReduceFunc, nullptr, true}},
     {prim::kPrimReduceAll, {InferImplReduceFunc, nullptr, true}},
     {prim::kPrimReduceAny, {InferImplReduceFunc, nullptr, true}},
diff --git a/mindspore/core/base/core_ops.h b/mindspore/core/base/core_ops.h
index ab7c128ffbf..9ad67236fc6 100644
--- a/mindspore/core/base/core_ops.h
+++ b/mindspore/core/base/core_ops.h
@@ -312,6 +312,8 @@ inline const PrimitivePtr kPrimBinaryCrossEntropy = std::make_shared<Primitive>(
 inline const PrimitivePtr kPrimBinaryCrossEntropyGrad = std::make_shared<Primitive>("BinaryCrossEntropyGrad");
 inline const PrimitivePtr kPrimSmoothL1Loss = std::make_shared<Primitive>("SmoothL1Loss");
 inline const PrimitivePtr kPrimSmoothL1LossGrad = std::make_shared<Primitive>("SmoothL1LossGrad");
+inline const PrimitivePtr kPrimSoftMarginLoss = std::make_shared<Primitive>("SoftMarginLoss");
+inline const PrimitivePtr kPrimSoftMarginLossGrad = std::make_shared<Primitive>("SoftMarginLossGrad");
 inline const PrimitivePtr kPrimSoftmaxCrossEntropyWithLogits =
   std::make_shared<Primitive>("SoftmaxCrossEntropyWithLogits");
 inline const PrimitivePtr kPrimSigmoidCrossEntropyWithLogits =
@@ -346,6 +348,7 @@ inline const PrimitivePtr kPrimRelu6 = std::make_shared<Primitive>(kReLU6);
 inline const PrimitivePtr kPrimReluV2 = std::make_shared<Primitive>(kReLUV2);
 inline const PrimitivePtr kPrimPRelu = std::make_shared<Primitive>("PReLU");
 inline const PrimitivePtr kPrimSoftplus = std::make_shared<Primitive>("Softplus");
+inline const PrimitivePtr kPrimSoftplusGrad = std::make_shared<Primitive>("SoftplusGrad");
 inline const PrimitivePtr kPrimZeros = std::make_shared<Primitive>("Zeros");
 inline const PrimitivePtr kPrimZerosLike = std::make_shared<Primitive>(kZerosLike);
 inline const PrimitivePtr kPrimOnesLike = std::make_shared<Primitive>(kOnesLike);
@@ -375,6 +378,8 @@ inline const PrimitivePtr kSquareSumV1 = std::make_shared<Primitive>("SquareSumV
 inline const PrimitivePtr kFusedMulAdd = std::make_shared<Primitive>("FusedMulAdd");
 inline const PrimitivePtr kPrimSoftShrink = std::make_shared<Primitive>("SoftShrink");
 inline const PrimitivePtr kPrimSoftShrinkGrad = std::make_shared<Primitive>("SoftShrinkGrad");
+inline const PrimitivePtr kPrimHShrink = std::make_shared<Primitive>("HShrink");
+inline const PrimitivePtr kPrimHShrinkGrad = std::make_shared<Primitive>("HShrinkGrad");
 
 // Comm ops
 inline const PrimitivePtr kPrimMirror = std::make_shared<Primitive>("_MirrorOperator");
@@ -472,6 +477,7 @@ inline const PrimitivePtr kPrimSqrtGrad = std::make_shared<Primitive>("SqrtGrad"
 inline const PrimitivePtr kPrimReciprocal = std::make_shared<Primitive>(kReciprocal);
 inline const PrimitivePtr kPrimExpandDims = std::make_shared<Primitive>("ExpandDims");
 inline const PrimitivePtr kPrimAbs = std::make_shared<Primitive>("Abs");
+inline const PrimitivePtr kPrimAbsGrad = std::make_shared<Primitive>("AbsGrad");
 inline const PrimitivePtr kPrimRint = std::make_shared<Primitive>("Rint");
 inline const PrimitivePtr kPrimRound = std::make_shared<Primitive>("Round");
 inline const PrimitivePtr kPrimExp = std::make_shared<Primitive>(kExp);
@@ -487,6 +493,8 @@ inline const PrimitivePtr kPrimACos = std::make_shared<Primitive>("ACos");
 inline const PrimitivePtr kPrimAsinGrad = std::make_shared<Primitive>("AsinGrad");
 inline const PrimitivePtr kPrimACosGrad = std::make_shared<Primitive>("ACosGrad");
 inline const PrimitivePtr kPrimAtanGrad = std::make_shared<Primitive>("AtanGrad");
+inline const PrimitivePtr kPrimAsinhGrad = std::make_shared<Primitive>("AsinhGrad");
+inline const PrimitivePtr kPrimAcoshGrad = std::make_shared<Primitive>("AcoshGrad");
 inline const PrimitivePtr kPrimFloorMod = std::make_shared<Primitive>("FloorMod");
 inline const PrimitivePtr kPrimWhere = std::make_shared<Primitive>("Where");
 inline const PrimitivePtr kPrimIdentityMath = std::make_shared<Primitive>("Identity", kSideEffectPropagate);
@@ -554,7 +562,9 @@ inline const PrimitivePtr kPrimPriorBox = std::make_shared<Primitive>("PriorBox"
 inline const PrimitivePtr kPrimQuantDTypeCast = std::make_shared<Primitive>("QuantDTypeCast");
 inline const PrimitivePtr kPrimWhile = std::make_shared<Primitive>("While");
 inline const PrimitivePtr kPrimPull = std::make_shared<Primitive>("Pull");
+inline const PrimitivePtr kPrimPush = std::make_shared<Primitive>("Push");
 inline const PrimitivePtr kPrimNPUAllocFloatStatus = std::make_shared<Primitive>("NPUAllocFloatStatus");
+inline const PrimitivePtr kPyFunc = std::make_shared<Primitive>("PyFunc");
 
 // Structures
 inline const PrimitivePtr kPrimMakeList = std::make_shared<Primitive>("make_list");
diff --git a/mindspore/core/ir/param_info.h b/mindspore/core/ir/param_info.h
index cba7dbc4071..490218c8cf0 100644
--- a/mindspore/core/ir/param_info.h
+++ b/mindspore/core/ir/param_info.h
@@ -72,6 +72,7 @@ class ParamInfo {
     this->be_cloned_ = true;
     this->be_cloned_index_.push_back(index);
     clone->init_in_server_ = this->init_in_server_;
+    clone->requires_aggr_ = this->requires_aggr_;
     clone->ClearParameter();
     return clone;
   }
@@ -91,6 +92,9 @@ class ParamInfo {
   void set_parameter(const ParameterPtr &parameter) { parameter_ = parameter; }
   void ClearParameter() { parameter_ = nullptr; }
 
+  bool requires_aggr() const { return requires_aggr_; }
+  void set_requires_aggr(bool requires_aggr) { requires_aggr_ = requires_aggr; }
+
  private:
   std::string name_{"Parameter"};
   bool requires_grad_{true};
@@ -105,6 +109,7 @@ class ParamInfo {
   bool cache_enable_{false};
   std::vector<int64_t> cache_shape_;
   ParameterPtr parameter_{nullptr};
+  bool requires_aggr_{true};
 };
 }  // namespace mindspore
 #endif  // MINDSPORE_CORE_IR_PARAM_INFO_H_
diff --git a/mindspore/core/load_mindir/anf_model_parser.cc b/mindspore/core/load_mindir/anf_model_parser.cc
index 68c1bbd0e8b..c38868d0d42 100644
--- a/mindspore/core/load_mindir/anf_model_parser.cc
+++ b/mindspore/core/load_mindir/anf_model_parser.cc
@@ -635,14 +635,12 @@ bool MSANFModelParser::ObtainValueNodeInMonadForm(const std::string &value_node_
                                                   const mind_ir::AttributeProto &attr_proto) {
   const std::string &ref_attr_name = attr_proto.ref_attr_name();
   if (ref_attr_name.find("UMonad") != std::string::npos) {
-    const ValuePtr kUMonad = std::make_shared<UMonad>();
     auto monad_abs = kUMonad->ToAbstract();
     auto new_value_node = NewValueNode(kUMonad);
     MS_EXCEPTION_IF_NULL(new_value_node);
     new_value_node->set_abstract(monad_abs);
     anfnode_build_map_[value_node_name] = new_value_node;
   } else if (ref_attr_name.find("IOMonad") != std::string::npos) {
-    const ValuePtr kIOMonad = std::make_shared<IOMonad>();
     auto monad_abs = kIOMonad->ToAbstract();
     auto new_value_node = NewValueNode(kIOMonad);
     MS_EXCEPTION_IF_NULL(new_value_node);
@@ -768,17 +766,22 @@ std::unordered_map<std::string, abstract::AbstractBasePtr> MSANFModelParser::Get
   return kv;
 }
 
-CNodePtr MSANFModelParser::BuildCNodeForFuncGraph(const FuncGraphPtr &outputFuncGraph,
-                                                  const mind_ir::NodeProto &node_proto) {
-  MS_EXCEPTION_IF_NULL(outputFuncGraph);
-  if (!node_proto.has_op_type()) {
-    MS_LOG(ERROR) << "Get CNode op_type failed!";
-    return nullptr;
-  }
-  const std::string &node_name = node_proto.output(0);
-  const std::string &fullname_with_scope = node_proto.domain();
+AnfNodePtr MSANFModelParser::BuildOperatorNode(const mind_ir::NodeProto &node_proto) {
+  const std::string kOperatorTypeFlag = std::string("REF::");
+  const size_t kOpTypeFlagSize = kOperatorTypeFlag.length();
   const std::string &node_type = node_proto.op_type();
+  MS_LOG(DEBUG) << "Process Operator :" << node_type;
+  // Operator maybe CNode,FuncGraph or Parameter.
 
+  if (node_type.size() > kOpTypeFlagSize && node_type.substr(0, kOpTypeFlagSize) == kOperatorTypeFlag) {
+    auto it = anfnode_build_map_.find(node_type.substr(kOpTypeFlagSize));
+    if (it != anfnode_build_map_.end()) {
+      return it->second;
+    }
+    MS_LOG(EXCEPTION) << "Can't find the ref:" << node_type;
+  }
+
+  // Operator is  primitive.
   std::shared_ptr<Primitive> prim;
   auto op_primc_fns = ops::OpPrimCRegister::GetInstance().GetPrimCMap();
   if (op_primc_fns.find(node_type) != op_primc_fns.end()) {
@@ -794,52 +797,65 @@ CNodePtr MSANFModelParser::BuildCNodeForFuncGraph(const FuncGraphPtr &outputFunc
     }
   }
   MS_EXCEPTION_IF_NULL(prim);
+  for (int i = 0; i < node_proto.attribute_size(); ++i) {
+    const mind_ir::AttributeProto &attr_proto = node_proto.attribute(i);
+    // CNode abstract
+    if (attr_proto.ref_attr_name().find("shape:") != string::npos) {
+      continue;
+    }
+    if (!GetAttrValueForCNode(prim, attr_proto)) {
+      MS_LOG(EXCEPTION) << "Parser prim: " << node_type << " attributes error : " << attr_proto.DebugString();
+    }
+  }
+  prim->set_attr("is_load", MakeValue(true));
+  return std::make_shared<ValueNode>(prim);
+}
+
+// Set CNode abstract.
+void MSANFModelParser::SetCNodeAbastract(const mind_ir::NodeProto &node_proto, CNodePtr cnode_ptr) {
+  const std::string &node_type = node_proto.op_type();
+  // Handle control flow operator.
+  auto operatorPtr = cnode_ptr->input(0);
+  // Set abstract of switch(c,f,t),switchLayer(c,tup) and
+  // partial(func,args) to null
+  auto prim = GetValueNode<PrimitivePtr>(operatorPtr);
+  if (IsPrimitiveEquals(prim::kPrimSwitch, prim) || IsPrimitiveEquals(prim::kPrimSwitchLayer, prim) ||
+      IsPrimitiveEquals(prim::kPrimPartial, prim)) {
+    cnode_ptr->set_abstract(nullptr);
+    return;
+  }
+  // Set abstract of switch(c,f,t)() to null
+  prim = GetCNodePrimitive(operatorPtr);
+  if (IsPrimitiveEquals(prim::kPrimSwitch, prim) || IsPrimitiveEquals(prim::kPrimSwitchLayer, prim)) {
+    cnode_ptr->set_abstract(nullptr);
+    return;
+  }
 
   std::unordered_map<std::string, abstract::AbstractBasePtr> kv;
   string shape_ref_attr_name;
+
   for (int i = 0; i < node_proto.attribute_size(); ++i) {
     const mind_ir::AttributeProto &attr_proto = node_proto.attribute(i);
     if (attr_proto.ref_attr_name().find("shape:") != string::npos) {
       shape_ref_attr_name = attr_proto.ref_attr_name();
       kv = GetAbstractForCNode(attr_proto);
-      continue;
-    }
-
-    if (!GetAttrValueForCNode(prim, attr_proto)) {
-      MS_LOG(ERROR) << "Get CNode attr failed!";
-      return nullptr;
+      break;
     }
   }
 
-  std::vector<AnfNodePtr> inputs;
-  inputs.clear();
-  for (int i = 0; i < node_proto.input_size(); ++i) {
-    const std::string &input_name = node_proto.input(i);
-    if (anfnode_build_map_.find(input_name) == anfnode_build_map_.end()) {
-      MS_LOG(ERROR) << node_name << " input " << i << input_name << "can't find in nodes have parsed";
-      return nullptr;
-    }
-
-    inputs.push_back(anfnode_build_map_[input_name]);
-  }
-  prim->set_attr("is_load", MakeValue(true));
-  CNodePtr cnode_ptr;
-  cnode_ptr = outputFuncGraph->NewCNode(prim, inputs);
-  MS_EXCEPTION_IF_NULL(cnode_ptr);
-
+  // Because there is not context in unit test,
+  // abstract->broaden() is replaced by abstract->set_value(kAnyValue).
   if (kv.size() == 0) {
     if (node_type == "UpdateState") {
-      const ValuePtr kUMonad = std::make_shared<UMonad>();
-      auto monad_abs = kUMonad->ToAbstract();
-      cnode_ptr->set_abstract(monad_abs);
+      cnode_ptr->set_abstract(kUMonad->ToAbstract());
     } else if (node_type == "Depend") {
-      const ValuePtr kBool = std::make_shared<BoolImm>(true);
       cnode_ptr->set_abstract(kBool->ToAbstract());
     } else {
       AbstractBasePtrList elem;
       for (size_t index = 1; index < cnode_ptr->inputs().size(); ++index) {
         auto abs = cnode_ptr->input(index)->abstract();
         if (abs != nullptr) {
+          abs->set_value(kAnyValue);
           elem.push_back(abs);
         }
       }
@@ -849,22 +865,56 @@ CNodePtr MSANFModelParser::BuildCNodeForFuncGraph(const FuncGraphPtr &outputFunc
     }
   } else if (kv.size() == 1) {
     std::unordered_map<std::string, abstract::AbstractBasePtr>::iterator iter = kv.begin();
-    cnode_ptr->set_abstract(iter->second);
+    if (iter->second != nullptr) {
+      iter->second->set_value(kAnyValue);
+      cnode_ptr->set_abstract(iter->second);
+    }
   } else {
     auto abstract = ParserAttrShape(shape_ref_attr_name, kv);
     if (abstract == nullptr) {
+      cnode_ptr->set_abstract(nullptr);
       MS_LOG(ERROR) << "Node's attribute is nullptr.";
+    } else {
+      abstract->set_value(kAnyValue);
+      cnode_ptr->set_abstract(abstract);
+    }
+  }
+}
+
+CNodePtr MSANFModelParser::BuildCNodeForFuncGraph(const FuncGraphPtr &outputFuncGraph,
+                                                  const mind_ir::NodeProto &node_proto) {
+  MS_EXCEPTION_IF_NULL(outputFuncGraph);
+  if (!node_proto.has_op_type()) {
+    MS_LOG(ERROR) << "Get CNode op_type failed!";
+    return nullptr;
+  }
+  const std::string &node_name = node_proto.output(0);
+  MS_LOG(DEBUG) << "Process CNode: " << node_name;
+  // Build inputs.
+  std::vector<AnfNodePtr> inputs;
+  inputs.push_back(BuildOperatorNode(node_proto));
+  for (int i = 0; i < node_proto.input_size(); ++i) {
+    const std::string &input_name = node_proto.input(i);
+    if (anfnode_build_map_.find(input_name) == anfnode_build_map_.end()) {
+      MS_LOG(ERROR) << node_name << " input " << i << input_name << "can't find in nodes have parsed";
       return nullptr;
     }
-    cnode_ptr->set_abstract(abstract);
+    inputs.push_back(anfnode_build_map_[input_name]);
   }
 
+  CNodePtr cnode_ptr = outputFuncGraph->NewCNode(inputs);
+  MS_EXCEPTION_IF_NULL(cnode_ptr);
+  SetCNodeAbastract(node_proto, cnode_ptr);
+
+  const std::string &fullname_with_scope = node_proto.domain();
   string debug_info_name = ParseCNodeName(node_name);
   auto debug_info_ptr = std::make_shared<NodeDebugInfo>(debug_info_name);
   cnode_ptr->set_debug_info(debug_info_ptr);
   cnode_ptr->set_fullname_with_scope(fullname_with_scope);
   cnode_ptr->set_load_flag(true);
-
+  if (anfnode_build_map_.count(node_name) > 0) {
+    MS_LOG(EXCEPTION) << "Duplicate CNode name: " << node_name;
+  }
   anfnode_build_map_[node_name] = cnode_ptr;
   return cnode_ptr;
 }
@@ -992,11 +1042,41 @@ FuncGraphPtr MSANFModelParser::Parse(const mind_ir::ModelProto &model_proto) {
     MS_LOG(ERROR) << "Parse configuration info for pb file failed!";
   }
   const mind_ir::GraphProto &graphBuild = model_proto.graph();
+
+  // Forward declare FuncGraph name
+  // Compatible with the previous proto.
+  if (graphBuild.has_name()) {
+    anfnode_build_map_[graphBuild.name()] = std::make_shared<ValueNode>(dstGraph);
+  }
+  for (int i = 0; i < model_proto.functions_size(); ++i) {
+    FuncGraphPtr graph = std::make_shared<FuncGraph>();
+    const auto &graph_proto = model_proto.functions(i);
+    if (!graph_proto.has_name()) {
+      MS_LOG(EXCEPTION) << "The function has not a name. Please export mindIR again. ";
+    }
+    if (anfnode_build_map_.count(graph_proto.name()) > 0) {
+      MS_LOG(EXCEPTION) << "There is a duplication function graph name: " << graph_proto.name();
+    }
+    anfnode_build_map_[graph_proto.name()] = std::make_shared<ValueNode>(graph);
+  }
+
+  // Parser the proto.
   if (!BuildFuncGraph(dstGraph, graphBuild)) {
     MS_LOG(ERROR) << "Build funcgraph failed!";
     return nullptr;
   }
-  MS_LOG(INFO) << "Parse pb to build FuncGraph Success!";
+  MS_LOG(DEBUG) << "Parse pb to build FuncGraph Success! " << graphBuild.name();
+  for (int i = 0; i < model_proto.functions_size(); ++i) {
+    const auto &graph_proto = model_proto.functions(i);
+    FuncGraphPtr graph = GetValueNode<FuncGraphPtr>(anfnode_build_map_[graph_proto.name()]);
+    if (!BuildFuncGraph(graph, graph_proto)) {
+      MS_LOG(ERROR) << "Build funcgraph failed!";
+      return nullptr;
+    }
+    MS_LOG(DEBUG) << "Parse pb to build FuncGraph Success! " << graph_proto.name();
+  }
+  // Release resource
+  anfnode_build_map_.clear();
   return dstGraph;
 }
 }  // namespace mindspore
diff --git a/mindspore/core/load_mindir/anf_model_parser.h b/mindspore/core/load_mindir/anf_model_parser.h
index 4d7ce1adecb..dffc78deeff 100644
--- a/mindspore/core/load_mindir/anf_model_parser.h
+++ b/mindspore/core/load_mindir/anf_model_parser.h
@@ -62,6 +62,8 @@ class MSANFModelParser {
   ValuePtr ObtainCNodeAttrInSingleScalarForm(const mind_ir::AttributeProto &attr_proto);
   bool ObtainCNodeAttrInTensorForm(const PrimitivePtr &prim, const mind_ir::AttributeProto &attr_proto);
   bool BuildValueNodeForFuncGraph(const mind_ir::NodeProto &node_proto);
+  AnfNodePtr BuildOperatorNode(const mind_ir::NodeProto &node_proto);
+  void SetCNodeAbastract(const mind_ir::NodeProto &node_proto, CNodePtr cnode_ptr);
   bool ObtainValueNodeInTensorForm(const string &value_node_name, const mind_ir::TensorProto &attr_tensor);
   bool ObtainValueNodeInTupleTensorForm(const string &value_node_name, const mind_ir::AttributeProto &attr_proto);
   bool GetAttrValueForValueNode(const std::string &value_node_name, const mind_ir::AttributeProto &attr_tensor);
diff --git a/mindspore/core/load_mindir/load_model.cc b/mindspore/core/load_mindir/load_model.cc
index 62574ee7db2..afc37e9ad45 100644
--- a/mindspore/core/load_mindir/load_model.cc
+++ b/mindspore/core/load_mindir/load_model.cc
@@ -92,7 +92,7 @@ bool get_all_files(const std::string &dir_in, std::vector<std::string> *files) {
     return false;
   }
   DIR *open_dir = opendir(dir_in.c_str());
-  if (NULL == open_dir) {
+  if (open_dir == NULL) {
     MS_LOG(EXCEPTION) << "open dir " << dir_in.c_str() << " failed";
   }
   dirent *p = nullptr;
@@ -217,7 +217,7 @@ std::shared_ptr<FuncGraph> LoadMindIR(const std::string &file_name, bool is_lite
       return nullptr;
     }
     abs_path[path_len] = '\0';
-    snprintf(abs_path + path_len, sizeof(abs_path), "variables");
+    snprintf(abs_path + path_len, sizeof(abs_path) - path_len, "variables");
     std::ifstream ifs(abs_path);
     if (ifs.good()) {
       MS_LOG(DEBUG) << "MindIR file has variables path, load parameter into graph.";
diff --git a/mindspore/core/mindrt/src/actor/actormgr.cc b/mindspore/core/mindrt/src/actor/actormgr.cc
index 686942aecc0..4c28eea3de4 100644
--- a/mindspore/core/mindrt/src/actor/actormgr.cc
+++ b/mindspore/core/mindrt/src/actor/actormgr.cc
@@ -46,6 +46,30 @@ ActorMgr::ActorMgr() : actors(), procotols(), urls() {
 
 ActorMgr::~ActorMgr() {}
 
+void ActorMgr::Initialize(bool use_inner_pool, size_t thread_num) {
+  bool expected = false;
+  if (!initialized_.compare_exchange_strong(expected, true)) {
+    MS_LOG(DEBUG) << "Actor Manager has been initialized before";
+    return;
+  }
+  // create inner thread pool only when specified use_inner_pool
+  if (use_inner_pool) {
+    inner_pool_ = ActorThreadPool::CreateThreadPool(thread_num);
+  }
+}
+
+void ActorMgr::SetActorReady(const ActorReference &actor) const {
+  // use inner thread pool or actor thread pool created externally
+  // priority to use actor thread pool
+  ActorThreadPool *pool = actor->pool_ ? actor->pool_ : inner_pool_;
+  if (pool == nullptr) {
+    MS_LOG(ERROR) << "ThreadPool is nullptr, " << actor->pool_ << ", " << inner_pool_
+                  << ", actor: " << actor->GetAID().Name();
+    return;
+  }
+  pool->PushActorToQueue(actor.get());
+}
+
 const std::string ActorMgr::GetUrl(const std::string &protocol) {
   auto it = procotols.find(protocol);
   if (it != procotols.end()) {
@@ -109,6 +133,10 @@ void ActorMgr::Finalize() {
     MS_LOG(INFO) << "finalize IOMgr=" << mgrIt->first.c_str();
     mgrIt->second->Finish();
   }
+
+  // delete actor thread pool if use_inner_pool
+  delete inner_pool_;
+  inner_pool_ = nullptr;
   MS_LOG(INFO) << "mindrt IOMGRS finish exiting.";
 }
 
@@ -171,7 +199,7 @@ int ActorMgr::Send(const AID &to, std::unique_ptr<MessageBase> &&msg, bool remot
   }
 }
 
-AID ActorMgr::Spawn(ActorReference &actor, bool shareThread, bool start) {
+AID ActorMgr::Spawn(const ActorReference &actor, bool shareThread, bool start) {
   actorsMutex.lock();
   if (actors.find(actor->GetAID().Name()) != actors.end()) {
     actorsMutex.unlock();
diff --git a/mindspore/core/mindrt/src/actor/actormgr.h b/mindspore/core/mindrt/src/actor/actormgr.h
index c4273b821b7..967b77a0b3e 100644
--- a/mindspore/core/mindrt/src/actor/actormgr.h
+++ b/mindspore/core/mindrt/src/actor/actormgr.h
@@ -17,6 +17,7 @@
 #ifndef MINDSPORE_CORE_MINDRT_SRC_ACTOR_ACTORMGR_H
 #define MINDSPORE_CORE_MINDRT_SRC_ACTOR_ACTORMGR_H
 
+#include <atomic>
 #include <set>
 #include <utility>
 #include <map>
@@ -51,28 +52,24 @@ class ActorMgr {
   ~ActorMgr();
 
   void Finalize();
-  void Initialize() {}
+  // initialize actor manager resource, do not create inner thread pool by default
+  void Initialize(bool use_inner_pool = false, size_t thread_num = 1);
+
   void RemoveActor(const std::string &name);
   ActorBase *GetActor(const AID &id);
   const std::string GetUrl(const std::string &protocol = "tcp");
   void AddUrl(const std::string &protocol, const std::string &url);
   void AddIOMgr(const std::string &protocol, const std::shared_ptr<IOMgr> &ioMgr);
   int Send(const AID &to, std::unique_ptr<MessageBase> &&msg, bool remoteLink = false, bool isExactNotRemote = false);
-  AID Spawn(ActorReference &actor, bool shareThread = true, bool start = true);
+  AID Spawn(const ActorReference &actor, bool shareThread = true, bool start = true);
   void Terminate(const AID &id);
   void TerminateAll();
   void Wait(const AID &pid);
   inline const std::string &GetDelegate() const { return delegate; }
 
   inline void SetDelegate(const std::string &d) { delegate = d; }
-  inline void SetActorReady(std::shared_ptr<ActorBase> &actor) const {
-    auto pool = actor->pool_;
-    if (pool == nullptr) {
-      MS_LOG(ERROR) << "ThreadPool is nullptr, actor: " << actor->GetAID().Name();
-      return;
-    }
-    pool->PushActorToQueue(actor.get());
-  }
+
+  void SetActorReady(const ActorReference &actor) const;
   void SetActorStatus(const AID &pid, bool start);
 
  private:
@@ -83,6 +80,13 @@ class ActorMgr {
       return false;
     }
   }
+  // in order to avoid being initialized many times
+  std::atomic_bool initialized_{false};
+
+  // actor manager support running on inner thread pool,
+  // or running on other thread pool created independently externally
+  ActorThreadPool *inner_pool_{nullptr};
+
   // Map of all local spawned and running processes.
   std::map<std::string, ActorReference> actors;
 #ifndef MS_COMPILE_IOS
diff --git a/mindspore/core/mindrt/src/thread/actor_threadpool.cc b/mindspore/core/mindrt/src/thread/actor_threadpool.cc
index 2427a84da48..58966fca13b 100644
--- a/mindspore/core/mindrt/src/thread/actor_threadpool.cc
+++ b/mindspore/core/mindrt/src/thread/actor_threadpool.cc
@@ -13,7 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
+#ifndef _MSC_VER
+#include <sched.h>
+#include <unistd.h>
+#endif
 #include "thread/actor_threadpool.h"
 #include "thread/core_affinity.h"
 
@@ -26,6 +29,7 @@ void ActorWorker::CreateThread(ActorThreadPool *pool) {
 }
 
 void ActorWorker::RunWithSpin() {
+  SetAffinity();
 #if !defined(__APPLE__) && !defined(SUPPORT_MSVC)
   static std::atomic_int index = {0};
   pthread_setname_np(pthread_self(), ("ActorThread_" + std::to_string(index++)).c_str());
@@ -116,7 +120,7 @@ void ActorThreadPool::PushActorToQueue(ActorBase *actor) {
     actor_queue_.push(actor);
 #endif
   }
-  THREAD_INFO("actor[%s] enqueue success", actor->GetAID().Name().c_str());
+  THREAD_DEBUG("actor[%s] enqueue success", actor->GetAID().Name().c_str());
   // active one idle actor thread if exist
   for (size_t i = 0; i < actor_thread_num_; ++i) {
     auto worker = reinterpret_cast<ActorWorker *>(workers_[i]);
@@ -126,11 +130,13 @@ void ActorThreadPool::PushActorToQueue(ActorBase *actor) {
   }
 }
 
-int ActorThreadPool::CreateThreads(size_t actor_thread_num, size_t all_thread_num) {
+int ActorThreadPool::CreateThreads(size_t actor_thread_num, size_t all_thread_num, const std::vector<int> &core_list) {
 #ifdef USE_HQUEUE
   actor_queue_.Init(MAX_READY_ACTOR_NR);
 #endif
-
+#ifdef BIND_CORE
+  affinity_->SetCoreId(core_list);
+#endif
   size_t core_num = std::thread::hardware_concurrency();
   THREAD_INFO("ThreadInfo, Actor: [%zu], All: [%zu], CoreNum: [%zu]", actor_thread_num, all_thread_num, core_num);
   actor_thread_num_ = actor_thread_num < core_num ? actor_thread_num : core_num;
@@ -142,27 +148,56 @@ int ActorThreadPool::CreateThreads(size_t actor_thread_num, size_t all_thread_nu
     std::lock_guard<std::mutex> _l(pool_mutex_);
     auto worker = new (std::nothrow) ActorWorker();
     THREAD_ERROR_IF_NULL(worker);
+#ifdef BIND_CORE
+    cpu_set_t mask;
+    CPU_ZERO(&mask);
+    if (core_list.size() > 0) {
+      CPU_SET(core_list[workers_.size() % core_list.size()], &mask);
+    }
+    worker->set_mask(mask);
+#endif
     worker->CreateThread(this);
     workers_.push_back(worker);
     THREAD_INFO("create actor thread[%zu]", i);
   }
   size_t kernel_thread_num = all_thread_num - actor_thread_num_;
   if (kernel_thread_num > 0) {
-    return ThreadPool::CreateThreads(kernel_thread_num);
+    return ThreadPool::CreateThreads(kernel_thread_num, core_list);
   }
   return THREAD_OK;
 }
 
-ActorThreadPool *ActorThreadPool::CreateThreadPool(size_t actor_thread_num, size_t all_thread_num) {
+ActorThreadPool *ActorThreadPool::CreateThreadPool(size_t actor_thread_num, size_t all_thread_num, BindMode bind_mode) {
   ActorThreadPool *pool = new (std::nothrow) ActorThreadPool();
   if (pool == nullptr) {
     return nullptr;
   }
-  int ret = pool->CreateThreads(actor_thread_num, all_thread_num);
+  int ret;
+  std::vector<int> core_list;
+#ifdef BIND_CORE
+  ret = pool->InitAffinityInfo();
   if (ret != THREAD_OK) {
     delete pool;
     return nullptr;
   }
+  core_list = pool->affinity_->GetCoreId(all_thread_num, bind_mode);
+#endif  // BIND_CORE
+  ret = pool->CreateThreads(actor_thread_num, all_thread_num, core_list);
+  if (ret != THREAD_OK) {
+    delete pool;
+    return nullptr;
+  }
+
+  return pool;
+}
+
+ActorThreadPool *ActorThreadPool::CreateThreadPool(size_t actor_thread_num, size_t all_thread_num,
+                                                   const std::vector<int> &core_list) {
+  ActorThreadPool *pool = new (std::nothrow) ActorThreadPool();
+  if (pool == nullptr) {
+    return nullptr;
+  }
+  int ret;
 #ifdef BIND_CORE
   ret = pool->InitAffinityInfo();
   if (ret != THREAD_OK) {
@@ -170,6 +205,12 @@ ActorThreadPool *ActorThreadPool::CreateThreadPool(size_t actor_thread_num, size
     return nullptr;
   }
 #endif  // BIND_CORE
+  ret = pool->CreateThreads(actor_thread_num, all_thread_num, core_list);
+  if (ret != THREAD_OK) {
+    delete pool;
+    return nullptr;
+  }
+
   return pool;
 }
 
@@ -178,7 +219,7 @@ ActorThreadPool *ActorThreadPool::CreateThreadPool(size_t thread_num) {
   if (pool == nullptr) {
     return nullptr;
   }
-  int ret = pool->CreateThreads(thread_num, thread_num);
+  int ret = pool->CreateThreads(thread_num, thread_num, {});
   if (ret != THREAD_OK) {
     delete pool;
     return nullptr;
diff --git a/mindspore/core/mindrt/src/thread/actor_threadpool.h b/mindspore/core/mindrt/src/thread/actor_threadpool.h
index b588844388c..bb4bc4f57ba 100644
--- a/mindspore/core/mindrt/src/thread/actor_threadpool.h
+++ b/mindspore/core/mindrt/src/thread/actor_threadpool.h
@@ -18,6 +18,7 @@
 #define MINDSPORE_CORE_MINDRT_RUNTIME_ACTOR_THREADPOOL_H_
 
 #include <queue>
+#include <vector>
 #include <mutex>
 #include <atomic>
 #include <condition_variable>
@@ -43,7 +44,10 @@ class ActorWorker : public Worker {
 class ActorThreadPool : public ThreadPool {
  public:
   // create ThreadPool that contains actor thread and kernel thread
-  static ActorThreadPool *CreateThreadPool(size_t actor_thread_num, size_t all_thread_num);
+  static ActorThreadPool *CreateThreadPool(size_t actor_thread_num, size_t all_thread_num, BindMode bind_mode);
+
+  static ActorThreadPool *CreateThreadPool(size_t actor_thread_num, size_t all_thread_num,
+                                           const std::vector<int> &core_list);
   // create ThreadPool that contains only actor thread
   static ActorThreadPool *CreateThreadPool(size_t thread_num);
   ~ActorThreadPool() override;
@@ -53,7 +57,7 @@ class ActorThreadPool : public ThreadPool {
 
  private:
   ActorThreadPool() {}
-  int CreateThreads(size_t actor_thread_num, size_t all_thread_num);
+  int CreateThreads(size_t actor_thread_num, size_t all_thread_num, const std::vector<int> &core_list);
   size_t actor_thread_num_{0};
 
   std::mutex actor_mutex_;
diff --git a/mindspore/core/mindrt/src/thread/core_affinity.cc b/mindspore/core/mindrt/src/thread/core_affinity.cc
index 72417f018c7..f24f0d613cd 100644
--- a/mindspore/core/mindrt/src/thread/core_affinity.cc
+++ b/mindspore/core/mindrt/src/thread/core_affinity.cc
@@ -248,21 +248,31 @@ int CoreAffinity::InitHardwareCoreInfo() {
   return THREAD_OK;
 }
 
-int CoreAffinity::InitBindCoreId(size_t thread_num, BindMode bind_mode) {
+std::vector<int> CoreAffinity::GetCoreId(size_t thread_num, BindMode bind_mode) {
+  std::vector<int> bind_id;
   if (core_num_ != sorted_id_.size()) {
     THREAD_ERROR("init sorted core id failed");
-    return THREAD_ERROR;
+    return bind_id;
   }
-  bind_id_.clear();
   if (bind_mode == Power_Higher || bind_mode == Power_NoBind) {
     for (size_t i = 0; i < thread_num; ++i) {
-      bind_id_.push_back(sorted_id_[i % core_num_]);
+      bind_id.push_back(sorted_id_[i % core_num_]);
     }
   } else if (bind_mode == Power_Middle) {
     for (size_t i = 0; i < thread_num; ++i) {
-      bind_id_.push_back(sorted_id_[(i + higher_num_) % core_num_]);
+      bind_id.push_back(sorted_id_[(i + higher_num_) % core_num_]);
     }
   } else {
+    return bind_id;
+  }
+  return bind_id;
+}
+void CoreAffinity::SetCoreId(const std::vector<int> &core_list) { bind_id_ = core_list; }
+
+int CoreAffinity::InitBindCoreId(size_t thread_num, BindMode bind_mode) {
+  bind_id_.clear();
+  bind_id_ = GetCoreId(thread_num, bind_mode);
+  if (bind_id_.empty()) {
     return THREAD_ERROR;
   }
   return THREAD_OK;
diff --git a/mindspore/core/mindrt/src/thread/core_affinity.h b/mindspore/core/mindrt/src/thread/core_affinity.h
index 6dc3aae44ae..7138e41d131 100644
--- a/mindspore/core/mindrt/src/thread/core_affinity.h
+++ b/mindspore/core/mindrt/src/thread/core_affinity.h
@@ -43,6 +43,8 @@ class CoreAffinity {
   int BindThreads(const std::vector<Worker *> &workers, const std::vector<int> &core_list);
   int BindThreads(const std::vector<Worker *> &workers, BindMode bind_mode);
   int BindProcess(BindMode bind_mode) const;
+  std::vector<int> GetCoreId(size_t thread_num, BindMode bind_mode);
+  void SetCoreId(const std::vector<int> &core_list);
 
  private:
 #ifdef BIND_CORE
diff --git a/mindspore/core/mindrt/src/thread/threadlog.h b/mindspore/core/mindrt/src/thread/threadlog.h
index 5318fa9d899..8594d852daa 100644
--- a/mindspore/core/mindrt/src/thread/threadlog.h
+++ b/mindspore/core/mindrt/src/thread/threadlog.h
@@ -20,14 +20,23 @@
 namespace mindspore {
 #ifdef THREAD_POOL_DEBUG
 #include <stdio.h>
+#define THREAD_DEBUG(content, args...) \
+  { printf("[DEBUG] %s|%d: " #content "\r\n", __func__, __LINE__, ##args); }
 #define THREAD_INFO(content, args...) \
   { printf("[INFO] %s|%d: " #content "\r\n", __func__, __LINE__, ##args); }
 #define THREAD_ERROR(content, args...) \
   { printf("[ERROR] %s|%d: " #content "\r\n", __func__, __LINE__, ##args); }
 #else
+#define THREAD_DEBUG(content, ...)
 #define THREAD_INFO(content, ...)
+#if defined(__ANDROID__)
+#include <android/log.h>
+#define THREAD_ERROR(content, args...) \
+  { __android_log_print(ANDROID_LOG_ERROR, "MS_LITE", "%s|%d: " #content "\r\n", __func__, __LINE__, ##args); }
+#else
 #define THREAD_ERROR(content, ...)
 #endif
+#endif
 
 #define THREAD_ERROR_IF_NULL(ptr) \
   do {                            \
diff --git a/mindspore/core/mindrt/src/thread/threadpool.cc b/mindspore/core/mindrt/src/thread/threadpool.cc
index 1690e91f291..fec5dedfd8f 100644
--- a/mindspore/core/mindrt/src/thread/threadpool.cc
+++ b/mindspore/core/mindrt/src/thread/threadpool.cc
@@ -13,7 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
+#ifndef _MSC_VER
+#include <sched.h>
+#include <unistd.h>
+#endif
 #include "thread/threadpool.h"
 #include "thread/core_affinity.h"
 
@@ -31,7 +34,28 @@ Worker::~Worker() {
 
 void Worker::CreateThread() { thread_ = std::thread(&Worker::Run, this); }
 
+void Worker::SetAffinity() {
+#ifdef BIND_CORE
+#ifdef __ANDROID__
+  int ret = sched_setaffinity(gettid(), sizeof(cpu_set_t), &mask_);
+  if (ret != THREAD_OK) {
+    THREAD_ERROR("bind thread %d to cpu failed. ERROR %d", gettid(), errno);
+  }
+  return;
+#else
+#if !defined(__APPLE__) && !defined(SUPPORT_MSVC)
+  int ret = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &mask_);
+  if (ret != THREAD_OK) {
+    THREAD_ERROR("bind thread %lu to cpu failed. ERROR %d", pthread_self(), errno);
+  }
+  return;
+#endif
+#endif
+#endif
+}
+
 void Worker::Run() {
+  SetAffinity();
 #if !defined(__APPLE__) && !defined(SUPPORT_MSVC)
   static std::atomic_int index = {0};
   pthread_setname_np(pthread_self(), ("KernelThread_" + std::to_string(index++)).c_str());
@@ -105,7 +129,7 @@ ThreadPool::~ThreadPool() {
   THREAD_INFO("destruct success");
 }
 
-int ThreadPool::CreateThreads(size_t thread_num) {
+int ThreadPool::CreateThreads(size_t thread_num, const std::vector<int> &core_list) {
   size_t core_num = std::thread::hardware_concurrency();
   thread_num = thread_num < core_num ? thread_num : core_num;
   THREAD_INFO("ThreadInfo, Num: [%zu], CoreNum: [%zu]", thread_num, core_num);
@@ -117,6 +141,14 @@ int ThreadPool::CreateThreads(size_t thread_num) {
   for (size_t i = 0; i < thread_num; ++i) {
     auto worker = new (std::nothrow) Worker();
     THREAD_ERROR_IF_NULL(worker);
+#ifdef BIND_CORE
+    cpu_set_t mask;
+    CPU_ZERO(&mask);
+    if (core_list.size() > 0) {
+      CPU_SET(core_list[workers_.size() % core_list.size()], &mask);
+    }
+    worker->set_mask(mask);
+#endif
     worker->CreateThread();
     workers_.push_back(worker);
     THREAD_INFO("create kernel thread[%zu]", i);
@@ -127,7 +159,7 @@ int ThreadPool::CreateThreads(size_t thread_num) {
 int ThreadPool::ParallelLaunch(const Func &func, Content content, int task_num) const {
   // distribute task to the KernelThread and the idle ActorThread,
   // if the task num is greater than the KernelThread num
-  THREAD_INFO("launch: %d", task_num);
+  THREAD_DEBUG("launch: %d", task_num);
   Task task = {func, content};
 
   DistributeTask(&task, task_num);
@@ -266,12 +298,12 @@ int ThreadPool::SetProcessAffinity(BindMode bind_mode) const {
 #endif  // BIND_CORE
 }
 
-ThreadPool *ThreadPool::CreateThreadPool(size_t thread_num) {
+ThreadPool *ThreadPool::CreateThreadPool(size_t thread_num, const std::vector<int> &core_list) {
   ThreadPool *pool = new (std::nothrow) ThreadPool();
   if (pool == nullptr) {
     return nullptr;
   }
-  int ret = pool->CreateThreads(thread_num);
+  int ret = pool->CreateThreads(thread_num, core_list);
   if (ret != THREAD_OK) {
     delete pool;
     return nullptr;
diff --git a/mindspore/core/mindrt/src/thread/threadpool.h b/mindspore/core/mindrt/src/thread/threadpool.h
index f6b478391ac..4db2c8e4aea 100644
--- a/mindspore/core/mindrt/src/thread/threadpool.h
+++ b/mindspore/core/mindrt/src/thread/threadpool.h
@@ -73,16 +73,21 @@ class Worker {
 
   std::thread::id thread_id() const { return thread_.get_id(); }
 #ifdef BIND_CORE
+  void set_mask(const cpu_set_t &mask) { mask_ = mask; }
   pthread_t handle() { return thread_.native_handle(); }
 #endif
 
  protected:
+  void SetAffinity();
   void Run();
   void YieldAndDeactive();
   void WaitUntilActive();
 
   bool alive_{true};
   std::thread thread_;
+#ifdef BIND_CORE
+  cpu_set_t mask_;
+#endif
   std::atomic_int status_{kThreadBusy};
 
   std::mutex mutex_;
@@ -98,7 +103,7 @@ class Worker {
 
 class ThreadPool {
  public:
-  static ThreadPool *CreateThreadPool(size_t thread_num);
+  static ThreadPool *CreateThreadPool(size_t thread_num, const std::vector<int> &core_list = {});
   virtual ~ThreadPool();
 
   size_t thread_num() const { return workers_.size(); }
@@ -112,7 +117,7 @@ class ThreadPool {
  protected:
   ThreadPool() = default;
 
-  int CreateThreads(size_t thread_num);
+  int CreateThreads(size_t thread_num, const std::vector<int> &core_list);
 
   int InitAffinityInfo();
 
diff --git a/mindspore/core/ops/apply_momentum.cc b/mindspore/core/ops/apply_momentum.cc
index 1d1c38c319f..888081700fc 100644
--- a/mindspore/core/ops/apply_momentum.cc
+++ b/mindspore/core/ops/apply_momentum.cc
@@ -63,6 +63,9 @@ AbstractBasePtr ApplyMomentumInfer(const abstract::AnalysisEnginePtr &, const Pr
   auto prim_name = primitive->name();
   (void)CheckAndConvertUtils::CheckInteger("apply_momentum_infer", SizeToLong(input_args.size()), kEqual, 5, prim_name);
 
+  for (const auto &item : input_args) {
+    MS_EXCEPTION_IF_NULL(item);
+  }
   // Infer shape
   auto v_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape];
 
diff --git a/mindspore/core/ops/arg_min.cc b/mindspore/core/ops/arg_min.cc
index 532a2f9b6e6..ae92481a448 100644
--- a/mindspore/core/ops/arg_min.cc
+++ b/mindspore/core/ops/arg_min.cc
@@ -42,6 +42,7 @@ AbstractBasePtr ArgMinInfer(const abstract::AnalysisEnginePtr &, const Primitive
 
   // Infer shape
   auto axis = GetValue<int64_t>(primitive->GetAttr(kAxis));
+  MS_EXCEPTION_IF_NULL(input_args[0]);
   auto x_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape];
   auto x_rank = SizeToLong(x_shape.size());
   CheckAndConvertUtils::CheckInRange<int64_t>("axis", axis, kIncludeLeft, {-x_rank, x_rank}, prim_name);
diff --git a/mindspore/core/ops/asin.cc b/mindspore/core/ops/asin.cc
index fb78967c815..dfdcabec6e1 100644
--- a/mindspore/core/ops/asin.cc
+++ b/mindspore/core/ops/asin.cc
@@ -32,6 +32,7 @@ AbstractBasePtr AsinInfer(const abstract::AnalysisEnginePtr &, const PrimitivePt
   auto x_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape];
   auto infer_shape = std::make_shared<abstract::Shape>(x_shape);
 
+  MS_EXCEPTION_IF_NULL(input_args[0]);
   // Infer Type
   auto dtype = input_args[0]->BuildType();
   const std::set<TypePtr> valid_types = {kFloat16, kFloat32, kInt32};
diff --git a/mindspore/core/ops/assert.cc b/mindspore/core/ops/assert.cc
index 1900e484c09..22755b87630 100644
--- a/mindspore/core/ops/assert.cc
+++ b/mindspore/core/ops/assert.cc
@@ -38,6 +38,9 @@ AbstractBasePtr AssertInfer(const abstract::AnalysisEnginePtr &, const Primitive
                             const std::vector<AbstractBasePtr> &input_args) {
   MS_EXCEPTION_IF_NULL(primitive);
   auto op_name = primitive->name();
+  for (const auto &item : input_args) {
+    MS_EXCEPTION_IF_NULL(item);
+  }
   TypePtr condition;
   if (!(input_args[0]->BuildType()->type_id() == kObjectTypeTensorType)) {
     auto condition_values = GetValue<std::vector<bool>>(input_args[0]->BuildValue());
diff --git a/mindspore/core/ops/batch_to_space_nd.cc b/mindspore/core/ops/batch_to_space_nd.cc
index 2ba2a24a106..ffb6e66e6a6 100644
--- a/mindspore/core/ops/batch_to_space_nd.cc
+++ b/mindspore/core/ops/batch_to_space_nd.cc
@@ -92,7 +92,7 @@ std::vector<int64_t> BatchToSpaceND::get_block_shape() const {
   return GetValue<std::vector<int64_t>>(value_ptr);
 }
 
-void BatchToSpaceND::Init(std::vector<int64_t> block_shape, std::vector<std::vector<int64_t>> crops) {
+void BatchToSpaceND::Init(const std::vector<int64_t> block_shape, const std::vector<std::vector<int64_t>> crops) {
   this->set_crops(crops);
   this->set_block_shape(block_shape);
 }
diff --git a/mindspore/core/ops/batch_to_space_nd.h b/mindspore/core/ops/batch_to_space_nd.h
index 3a745b5f42e..99df67a6ba7 100644
--- a/mindspore/core/ops/batch_to_space_nd.h
+++ b/mindspore/core/ops/batch_to_space_nd.h
@@ -33,7 +33,7 @@ class BatchToSpaceND : public PrimitiveC {
   BatchToSpaceND() : PrimitiveC(kNameBatchToSpaceND) {}
   ~BatchToSpaceND() = default;
   MS_DECLARE_PARENT(BatchToSpaceND, PrimitiveC);
-  void Init(std::vector<int64_t> block_shape, std::vector<std::vector<int64_t>> crops);
+  void Init(const std::vector<int64_t> block_shape, const std::vector<std::vector<int64_t>> crops);
   void set_crops(std::vector<std::vector<int64_t>> crops);
   void set_block_shape(std::vector<int64_t> block_shape);
   std::vector<int64_t> get_block_shape() const;
diff --git a/mindspore/core/ops/conv2d.cc b/mindspore/core/ops/conv2d.cc
index c579f0ce6bb..07c493a5840 100644
--- a/mindspore/core/ops/conv2d.cc
+++ b/mindspore/core/ops/conv2d.cc
@@ -144,6 +144,9 @@ void Conv2DPadFunction(std::vector<int64_t> *output_hw, std::vector<int64_t> *pa
 abstract::ShapePtr Conv2dInferShape(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) {
   MS_EXCEPTION_IF_NULL(primitive);
   auto prim_name = primitive->name();
+  for (const auto &item : input_args) {
+    MS_EXCEPTION_IF_NULL(item);
+  }
   auto x_shape_map = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape());
   auto w_shape_map = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[1]->BuildShape());
   auto x_shape = x_shape_map[kShape];
diff --git a/mindspore/core/ops/cos.cc b/mindspore/core/ops/cos.cc
index 845261b3f6e..be4e80b1b62 100644
--- a/mindspore/core/ops/cos.cc
+++ b/mindspore/core/ops/cos.cc
@@ -32,7 +32,7 @@ abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector<A
 }
 
 TypePtr InferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
-  if (std::any_of(input_args.begin(), input_args.end(), [](AbstractBasePtr a) { return a == nullptr; })) {
+  if (std::any_of(input_args.begin(), input_args.end(), [](const AbstractBasePtr arg) { return arg == nullptr; })) {
     MS_LOG(EXCEPTION) << "nullptr";
   }
   std::map<std::string, TypePtr> types;
diff --git a/mindspore/core/ops/fake_quant_with_min_max_vars.cc b/mindspore/core/ops/fake_quant_with_min_max_vars.cc
index 6c5fa3e8fd0..21ffb6c4dd7 100644
--- a/mindspore/core/ops/fake_quant_with_min_max_vars.cc
+++ b/mindspore/core/ops/fake_quant_with_min_max_vars.cc
@@ -47,7 +47,7 @@ abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector<A
 
 TypePtr InferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
   const std::set<TypePtr> valid_types = {kFloat16, kFloat32};
-  if (std::any_of(input_args.begin(), input_args.end(), [](AbstractBasePtr arg) { return arg == nullptr; })) {
+  if (std::any_of(input_args.begin(), input_args.end(), [](const AbstractBasePtr arg) { return arg == nullptr; })) {
     MS_LOG(EXCEPTION) << "nullptr";
   }
   std::map<std::string, TypePtr> types;
diff --git a/mindspore/core/ops/grad/hshrink_grad.h b/mindspore/core/ops/grad/hshrink_grad.h
index 45e92b79b33..210b8b47965 100644
--- a/mindspore/core/ops/grad/hshrink_grad.h
+++ b/mindspore/core/ops/grad/hshrink_grad.h
@@ -25,7 +25,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameHShrinkGrad = "HShrinkGrad";
-class MS_CORE_API HShrinkGrad : public PrimitiveC {
+class HShrinkGrad : public PrimitiveC {
  public:
   HShrinkGrad() : PrimitiveC(kNameHShrinkGrad) { InitIOName({"gradients", "features"}, {"backprops"}); }
   ~HShrinkGrad() = default;
diff --git a/mindspore/core/ops/grad/soft_margin_loss_grad.h b/mindspore/core/ops/grad/soft_margin_loss_grad.h
index e5a47350ab1..152ff646fe6 100644
--- a/mindspore/core/ops/grad/soft_margin_loss_grad.h
+++ b/mindspore/core/ops/grad/soft_margin_loss_grad.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSoftMarginLossGrad = "SoftMarginLossGrad";
-class MS_CORE_API SoftMarginLossGrad : public PrimitiveC {
+class SoftMarginLossGrad : public PrimitiveC {
  public:
   SoftMarginLossGrad() : PrimitiveC(kNameSoftMarginLossGrad) { InitIOName({"predict", "label", "dout"}, {"gradient"}); }
   ~SoftMarginLossGrad() = default;
diff --git a/mindspore/core/ops/hshrink.h b/mindspore/core/ops/hshrink.h
index 5bff01a8319..582e8847dea 100644
--- a/mindspore/core/ops/hshrink.h
+++ b/mindspore/core/ops/hshrink.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameHShrink = "HShrink";
-class MS_CORE_API HShrink : public PrimitiveC {
+class HShrink : public PrimitiveC {
  public:
   HShrink() : PrimitiveC(kNameHShrink) { InitIOName({"input_x"}, {"output"}); }
   ~HShrink() = default;
diff --git a/mindspore/core/ops/logical_not.cc b/mindspore/core/ops/logical_not.cc
index cc215908fbc..5b71d133ee9 100644
--- a/mindspore/core/ops/logical_not.cc
+++ b/mindspore/core/ops/logical_not.cc
@@ -32,6 +32,7 @@ abstract::ShapePtr LogicalNotInferShape(const PrimitivePtr &primitive, const std
 TypePtr LogicalNotInferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
   MS_EXCEPTION_IF_NULL(prim);
   auto op_name = prim->name();
+  MS_EXCEPTION_IF_NULL(input_args[0]);
   auto infer_dtype = input_args[0]->BuildType();
   std::set<TypePtr> local_bool = {kBool};
   return CheckAndConvertUtils::CheckTensorTypeValid("x", infer_dtype, local_bool, op_name);
diff --git a/mindspore/core/ops/lrn.cc b/mindspore/core/ops/lrn.cc
index d7025310d65..d4eadbd1360 100644
--- a/mindspore/core/ops/lrn.cc
+++ b/mindspore/core/ops/lrn.cc
@@ -86,10 +86,11 @@ abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector<A
 
 TypePtr InferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
   const std::set<TypePtr> valid_types = {kFloat16, kFloat32};
-  if (std::any_of(input_args.begin(), input_args.end(), [](AbstractBasePtr a) { return a == nullptr; })) {
+  if (std::any_of(input_args.begin(), input_args.end(), [](const AbstractBasePtr arg) { return arg == nullptr; })) {
     MS_LOG(EXCEPTION) << "nullptr";
   }
   std::map<std::string, TypePtr> types;
+  MS_EXCEPTION_IF_NULL(input_args[0]);
   types.emplace("x", input_args[0]->BuildType());
   return CheckAndConvertUtils::CheckTensorTypeSame(types, valid_types, prim->name());
 }
diff --git a/mindspore/core/ops/max_pool.cc b/mindspore/core/ops/max_pool.cc
index c7e1618c459..4583fe0a196 100644
--- a/mindspore/core/ops/max_pool.cc
+++ b/mindspore/core/ops/max_pool.cc
@@ -82,6 +82,7 @@ namespace {
 abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) {
   MS_EXCEPTION_IF_NULL(primitive);
   auto op_name = primitive->name();
+  MS_EXCEPTION_IF_NULL(input_args[0]);
   auto in_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->GetShapeTrack())[kShape];
   auto format = Format(GetValue<int64_t>(primitive->GetAttr(kFormat)));
   if (format == NHWC) {
@@ -123,7 +124,7 @@ abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector<A
 }
 
 TypePtr InferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
-  if (std::any_of(input_args.begin(), input_args.end(), [](AbstractBasePtr a) { return a == nullptr; })) {
+  if (std::any_of(input_args.begin(), input_args.end(), [](const AbstractBasePtr arg) { return arg == nullptr; })) {
     MS_LOG(EXCEPTION) << "nullptr";
   }
   auto input_type = input_args[0]->BuildType();
diff --git a/mindspore/core/ops/ones_like.cc b/mindspore/core/ops/ones_like.cc
index 90e07ae6e88..d2b85398c0d 100644
--- a/mindspore/core/ops/ones_like.cc
+++ b/mindspore/core/ops/ones_like.cc
@@ -34,7 +34,9 @@ abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector<A
 }
 
 TypePtr InferType(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) {
+  MS_EXCEPTION_IF_NULL(primitive);
   auto op_name = primitive->name();
+  MS_EXCEPTION_IF_NULL(input_args[0]);
   auto infer_type = input_args[0]->BuildType();
   auto valid_type = common_valid_types;
   valid_type.insert(kBool);
diff --git a/mindspore/core/ops/pack.cc b/mindspore/core/ops/pack.cc
index 6bb6ce9a577..08965c86c18 100644
--- a/mindspore/core/ops/pack.cc
+++ b/mindspore/core/ops/pack.cc
@@ -58,6 +58,7 @@ AbstractBasePtr PackInfer(const abstract::AnalysisEnginePtr &, const PrimitivePt
   MS_EXCEPTION_IF_NULL(primitive);
   auto prim_name = primitive->name();
 
+  MS_EXCEPTION_IF_NULL(input_args[0]);
   auto x_shapes = input_args[0]->BuildShape()->cast<abstract::TupleShapePtr>()->shape();
   auto x_types = input_args[0]->BuildType()->cast<TuplePtr>()->elements();
   auto all_shape = _get_pack_shape(x_shapes, x_types, GetValue<int64_t>(primitive->GetAttr(kAxis)), prim_name);
diff --git a/mindspore/core/ops/rank.cc b/mindspore/core/ops/rank.cc
index b969ec1fd05..1bf9569ccc3 100644
--- a/mindspore/core/ops/rank.cc
+++ b/mindspore/core/ops/rank.cc
@@ -22,6 +22,7 @@ namespace {
 TypePtr RankInferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
   MS_EXCEPTION_IF_NULL(prim);
   auto op_name = prim->name();
+  MS_EXCEPTION_IF_NULL(input_args[0]);
   auto infer_dtype = input_args[0]->BuildType();
   CheckAndConvertUtils::CheckTensorTypeValid("x", infer_dtype, {kTensorType}, op_name);
   return kTypeNone;
diff --git a/mindspore/core/ops/reduce_sum.cc b/mindspore/core/ops/reduce_sum.cc
index ec4e5f7ee1f..7bf3912b729 100644
--- a/mindspore/core/ops/reduce_sum.cc
+++ b/mindspore/core/ops/reduce_sum.cc
@@ -15,12 +15,178 @@
  */
 
 #include <memory>
+#include <algorithm>
 
 #include "ops/reduce_sum.h"
 #include "ops/op_utils.h"
 
 namespace mindspore {
 namespace ops {
-REGISTER_PRIMITIVE_C(kNameReduceSum, ReduceSum);
+namespace {
+int64_t InferImplReduceFuncCheckAxis(const int64_t &axis, const size_t dim) {
+  int64_t dim_ = static_cast<int64_t>(dim);
+  if (axis < -dim_ || axis >= dim_) {
+    MS_LOG(EXCEPTION) << "axis should be in [" << -dim_ << ", " << dim_ << "). But got axis = " << axis;
+  }
+  int64_t ret_axis = axis;
+  if (axis >= -dim_ && axis < 0) {
+    ret_axis += dim_;
+  }
+  return ret_axis;
+}
+
+void InferImplReduceFuncCalShape(ShapeVector *shape, const ShapeVector &x_shape, const ValuePtr &axis,
+                                 bool keep_dims_value) {
+  if (axis->isa<ValueTuple>() || axis->isa<ValueList>()) {
+    auto axis_ptr_list =
+      axis->isa<ValueTuple>() ? axis->cast<ValueTuplePtr>()->value() : axis->cast<ValueListPtr>()->value();
+    if (!axis_ptr_list.size()) {
+      if (keep_dims_value) (void)shape->insert(shape->end(), x_shape.size(), 1);
+    } else {
+      (void)shape->insert(shape->end(), x_shape.begin(), x_shape.end());
+      ValuePtrList axis_items = axis_ptr_list;
+      ValuePtrList::iterator it;
+      if (keep_dims_value) {
+        for (it = axis_items.begin(); it != axis_items.end(); ++it) {
+          auto axis_value = GetValue<int64_t>(*it);
+          shape->at(axis_value) = 1;
+        }
+      } else {
+        std::vector<int64_t> axis_value_list;
+        for (it = axis_items.begin(); it != axis_items.end(); ++it) {
+          auto axis_value = GetValue<int64_t>(*it);
+          auto axis_positive_value = InferImplReduceFuncCheckAxis(axis_value, x_shape.size());
+          axis_value_list.push_back(axis_positive_value);
+        }
+        std::sort(axis_value_list.begin(), axis_value_list.end());
+        std::vector<int64_t>::reverse_iterator it_re;
+        for (it_re = axis_value_list.rbegin(); it_re != axis_value_list.rend(); ++it_re) {
+          (void)shape->erase(shape->begin() + *it_re);
+        }
+      }
+    }
+  } else if (axis->isa<Int32Imm>() || axis->isa<Int64Imm>()) {
+    (void)shape->insert(shape->end(), x_shape.begin(), x_shape.end());
+    int64_t axis_value = GetValue<int64_t>(axis);
+    axis_value = InferImplReduceFuncCheckAxis(axis_value, x_shape.size());
+    if (keep_dims_value) {
+      shape->at(axis_value) = 1;
+    } else {
+      (void)shape->erase(shape->begin() + axis_value);
+    }
+  } else {
+    MS_LOG(EXCEPTION) << "Axis should be one of types: [int/tuple/list].";
+  }
+  return;
+}
+
+abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) {
+  MS_EXCEPTION_IF_NULL(primitive);
+  auto shape_ptr = CheckAndConvertUtils::GetTensorInputShape("ReduceSum", input_args, 0);
+  auto input_shape = shape_ptr->shape();
+  auto input_min_shape = shape_ptr->min_shape();
+  auto input_max_shape = shape_ptr->max_shape();
+  auto keep_dimis_value_ptr = primitive->GetAttr(kKeepDims);
+  MS_EXCEPTION_IF_NULL(keep_dimis_value_ptr);
+  if (!keep_dimis_value_ptr->isa<BoolImm>()) {
+    MS_LOG(EXCEPTION) << "Keep_dims should be Bool.";
+  }
+  bool keep_dims = GetValue<bool>(keep_dimis_value_ptr);
+  ShapeVector out_shape = {};
+  ShapeVector out_min_shape = {};
+  ShapeVector out_max_shape = {};
+  int64_t max_v;
+  if (shape_ptr->IsDynamic()) {
+    max_v = *max_element(input_max_shape.begin(), input_max_shape.end());
+  } else {
+    max_v = *max_element(input_shape.begin(), input_shape.end());
+  }
+  const int64_t input_num_ascend = 2;
+  if (input_args.size() == input_num_ascend && input_args[1]->isa<abstract::AbstractTensor>() &&
+      input_args[1]->BuildValue()->isa<AnyValue>()) {
+    auto axis_tensor = input_args[1]->cast<abstract::AbstractTensorPtr>();
+    auto axis_shape = axis_tensor->shape()->shape();
+    if (axis_shape.size() == 1 && axis_shape[0] == -1 && !keep_dims) {
+      out_shape.push_back(-2);
+      for (size_t i = 0; i < input_shape.size(); ++i) {
+        out_min_shape.push_back(1);
+        out_max_shape.push_back(max_v);
+      }
+    } else if (!keep_dims) {
+      for (size_t i = 0; i < input_shape.size() - axis_shape.size(); ++i) {
+        out_shape.push_back(-1);
+        out_min_shape.push_back(1);
+        out_max_shape.push_back(max_v);
+      }
+    } else {
+      for (size_t i = 0; i < input_shape.size(); ++i) {
+        out_shape.push_back(-1);
+        out_min_shape.push_back(1);
+        out_max_shape.push_back(max_v);
+      }
+    }
+    return std::make_shared<abstract::Shape>(out_shape, out_min_shape, out_max_shape);
+  } else {
+    ValuePtr axis_value;
+    ValuePtr axis_ptr;
+    if (input_args.size() == input_num_ascend) {
+      axis_ptr = input_args[1]->BuildValue();
+    } else {
+      axis_ptr = primitive->GetAttr("axis");
+    }
+    MS_EXCEPTION_IF_NULL(axis_ptr);
+    if (axis_ptr->isa<tensor::Tensor>()) {
+      MS_LOG(ERROR) << "Tensor with value";
+      auto axis_type = input_args[1]->BuildType();
+      MS_EXCEPTION_IF_NULL(axis_type);
+      auto axis_type_id = axis_type->cast<TensorTypePtr>();
+      MS_EXCEPTION_IF_NULL(axis_type_id);
+      auto axis_tensor = axis_ptr->cast<tensor::TensorPtr>();
+      MS_EXCEPTION_IF_NULL(axis_tensor);
+      size_t data_size = LongToSize(axis_tensor->DataSize());
+      std::vector<ValuePtr> value_list;
+      if (axis_type_id->element()->type_id() == kNumberTypeInt32) {
+        auto shape_data = reinterpret_cast<int *>(axis_tensor->data_c());
+        MS_EXCEPTION_IF_NULL(shape_data);
+        for (size_t i = 0; i < data_size; i++) {
+          value_list.push_back(MakeValue(static_cast<int64_t>(*shape_data)));
+          ++shape_data;
+        }
+      } else {
+        auto shape_data2 = reinterpret_cast<int64_t *>(axis_tensor->data_c());
+        for (size_t i = 0; i < data_size; i++) {
+          value_list.push_back(MakeValue(static_cast<int64_t>(*shape_data2)));
+          ++shape_data2;
+        }
+      }
+      axis_value = std::make_shared<ValueTuple>(value_list);
+    } else {
+      axis_value = axis_ptr;
+    }
+    InferImplReduceFuncCalShape(&out_shape, input_shape, axis_value, keep_dims);
+
+    if (!input_min_shape.empty() && !input_max_shape.empty()) {
+      ShapeVector shape_min = {};
+      ShapeVector shape_max = {};
+      InferImplReduceFuncCalShape(&shape_min, input_min_shape, axis_value, keep_dims);
+      InferImplReduceFuncCalShape(&shape_max, input_max_shape, axis_value, keep_dims);
+      return std::make_shared<abstract::Shape>(out_shape, shape_min, shape_max);
+    }
+    return std::make_shared<abstract::Shape>(out_shape);
+  }
+}
+
+TypePtr InferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
+  MS_EXCEPTION_IF_NULL(prim);
+  return CheckAndConvertUtils::CheckTensorTypeValid("x dtype", input_args[0]->BuildType(), common_valid_types,
+                                                    "ReduceSum");
+}
+}  // namespace
+
+AbstractBasePtr ReduceSumInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                               const std::vector<AbstractBasePtr> &input_args) {
+  CheckAndConvertUtils::CheckInteger("input size", input_args.size(), kGreaterEqual, 1, primitive->name());
+  return abstract::MakeAbstract(InferShape(primitive, input_args), InferType(primitive, input_args));
+}
 }  // namespace ops
 }  // namespace mindspore
diff --git a/mindspore/core/ops/reduce_sum.h b/mindspore/core/ops/reduce_sum.h
index ec9e4a499df..3c67e181b79 100644
--- a/mindspore/core/ops/reduce_sum.h
+++ b/mindspore/core/ops/reduce_sum.h
@@ -29,11 +29,13 @@ namespace ops {
 constexpr auto kNameReduceSum = "ReduceSum";
 class ReduceSum : public Reduce {
  public:
-  ReduceSum() : Reduce(kNameReduceSum) { InitIOName({"input_x", "axis"}, {"y"}); }
+  ReduceSum() : Reduce(kNameReduceSum) { InitIOName({"x", "axis"}, {"y"}); }
   ~ReduceSum() = default;
   MS_DECLARE_PARENT(ReduceSum, Reduce);
   void Init() {}
 };
+AbstractBasePtr ReduceSumInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                               const std::vector<AbstractBasePtr> &input_args);
 }  // namespace ops
 }  // namespace mindspore
 
diff --git a/mindspore/core/ops/round.cc b/mindspore/core/ops/round.cc
index fb1d345a2e3..a8c4a59e9b9 100644
--- a/mindspore/core/ops/round.cc
+++ b/mindspore/core/ops/round.cc
@@ -28,6 +28,7 @@ abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector<A
 }
 
 TypePtr InferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
+  MS_EXCEPTION_IF_NULL(input_args[0]);
   auto infer_type = input_args[0]->BuildType();
   return CheckAndConvertUtils::CheckTensorTypeValid("x", infer_type, common_valid_types, prim->name());
 }
diff --git a/mindspore/core/ops/scatter_nd_update.h b/mindspore/core/ops/scatter_nd_update.h
index 03b42cc86b7..5909f0ef48a 100644
--- a/mindspore/core/ops/scatter_nd_update.h
+++ b/mindspore/core/ops/scatter_nd_update.h
@@ -26,7 +26,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameScatterNdUpdate = "ScatterNdUpdate";
-class MS_CORE_API ScatterNdUpdate : public PrimitiveC {
+class ScatterNdUpdate : public PrimitiveC {
  public:
   ScatterNdUpdate() : PrimitiveC(kNameScatterNdUpdate) { InitIOName({"input_x", "indices", "update"}, {"output"}); }
   ~ScatterNdUpdate() = default;
diff --git a/mindspore/core/ops/soft_margin_loss.h b/mindspore/core/ops/soft_margin_loss.h
index 53f63fa38be..e670d99dc51 100644
--- a/mindspore/core/ops/soft_margin_loss.h
+++ b/mindspore/core/ops/soft_margin_loss.h
@@ -28,7 +28,7 @@
 namespace mindspore {
 namespace ops {
 constexpr auto kNameSoftMarginLoss = "SoftMarginLoss";
-class MS_CORE_API SoftMarginLoss : public PrimitiveC {
+class SoftMarginLoss : public PrimitiveC {
  public:
   SoftMarginLoss() : PrimitiveC(kNameSoftMarginLoss) { InitIOName({"predict", "label"}, {"loss"}); }
   ~SoftMarginLoss() = default;
diff --git a/mindspore/core/ops/space_to_batch_nd.cc b/mindspore/core/ops/space_to_batch_nd.cc
index 98efcf0a5b6..91d27235405 100644
--- a/mindspore/core/ops/space_to_batch_nd.cc
+++ b/mindspore/core/ops/space_to_batch_nd.cc
@@ -89,7 +89,7 @@ std::vector<int64_t> SpaceToBatchND::get_block_shape() const {
   return GetValue<std::vector<int64_t>>(GetAttr(kBlockShape));
 }
 
-void SpaceToBatchND::Init(std::vector<int64_t> block_shape, std::vector<std::vector<int64_t>> paddings) {
+void SpaceToBatchND::Init(const std::vector<int64_t> block_shape, const std::vector<std::vector<int64_t>> paddings) {
   this->set_paddings(paddings);
   this->set_block_shape(block_shape);
 }
diff --git a/mindspore/core/ops/space_to_batch_nd.h b/mindspore/core/ops/space_to_batch_nd.h
index 8ca02e35fe4..dafd345d262 100644
--- a/mindspore/core/ops/space_to_batch_nd.h
+++ b/mindspore/core/ops/space_to_batch_nd.h
@@ -33,7 +33,7 @@ class SpaceToBatchND : public PrimitiveC {
   SpaceToBatchND() : PrimitiveC(kNameSpaceToBatchND) {}
   ~SpaceToBatchND() = default;
   MS_DECLARE_PARENT(SpaceToBatchND, PrimitiveC);
-  void Init(std::vector<int64_t> block_shape, const std::vector<std::vector<int64_t>> paddings);
+  void Init(const std::vector<int64_t> block_shape, const std::vector<std::vector<int64_t>> paddings);
   void set_paddings(const std::vector<std::vector<int64_t>> paddings);
   void set_block_shape(std::vector<int64_t> block_shape);
   std::vector<int64_t> get_block_shape() const;
diff --git a/mindspore/core/ops/squeeze.cc b/mindspore/core/ops/squeeze.cc
index fd0139d8599..f144611cd7a 100644
--- a/mindspore/core/ops/squeeze.cc
+++ b/mindspore/core/ops/squeeze.cc
@@ -54,7 +54,7 @@ abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector<A
 }
 
 TypePtr InferType(const PrimitivePtr &prim, const std::vector<AbstractBasePtr> &input_args) {
-  if (std::any_of(input_args.begin(), input_args.end(), [](AbstractBasePtr a) { return a == nullptr; })) {
+  if (std::any_of(input_args.begin(), input_args.end(), [](const AbstractBasePtr arg) { return arg == nullptr; })) {
     MS_LOG(EXCEPTION) << "nullptr";
   }
   return input_args[0]->BuildType();
diff --git a/mindspore/core/ops/stack.cc b/mindspore/core/ops/stack.cc
index 9740a757ed7..fe47c844b49 100644
--- a/mindspore/core/ops/stack.cc
+++ b/mindspore/core/ops/stack.cc
@@ -28,6 +28,9 @@ abstract::AbstractBasePtr StackInfer(const PrimitivePtr &primitive, const std::v
   if (input_args.size() < 1) {
     MS_LOG(ERROR) << "Invalid input size " << input_args.size();
   }
+  for (const auto &item : input_args) {
+    MS_EXCEPTION_IF_NULL(item);
+  }
   auto input_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape];
   for (int64_t i = 1; i < SizeToLong(input_args.size()); ++i) {
     auto input_shape_tmp = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[i]->BuildShape())[kShape];
diff --git a/mindspore/core/ops/strided_slice.cc b/mindspore/core/ops/strided_slice.cc
index bc7d459e41e..0292bd4d0a7 100644
--- a/mindspore/core/ops/strided_slice.cc
+++ b/mindspore/core/ops/strided_slice.cc
@@ -28,6 +28,20 @@
 namespace mindspore {
 namespace ops {
 namespace {
+std::vector<int64_t> TenToTwo(int64_t num) {
+  std::vector<int64_t> output;
+  if (num == 0) {
+    output.push_back(0);
+    return output;
+  }
+  while (num) {
+    output.push_back(num % 2);
+    num /= 2;
+  }
+
+  return output;
+}
+
 void EllipsisInferShape(const PrimitivePtr &primitive, const std::vector<int64_t> &x_shape,
                         const std::vector<int64_t> &begin_v, const std::vector<int64_t> &end_v,
                         const std::vector<int64_t> &strides_v, std::vector<int64_t> *infer_shape, size_t i, size_t j,
@@ -40,10 +54,11 @@ void EllipsisInferShape(const PrimitivePtr &primitive, const std::vector<int64_t
   MS_EXCEPTION_IF_NULL(strided_slice_prim);
   size_t x_rank = x_shape.size();
   size_t slice_len = begin_v.size();
-  std::vector<int64_t> begin_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_begin_mask());
-  std::vector<int64_t> end_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_end_mask());
-  std::vector<int64_t> new_axis_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_new_axis_mask());
-  std::vector<int64_t> shrink_axis_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_shrink_axis_mask());
+  std::vector<int64_t> begin_pos = TenToTwo(GetValue<int64_t>(primitive->GetAttr(kBeginMask)));
+  std::vector<int64_t> end_pos = TenToTwo(GetValue<int64_t>(primitive->GetAttr(kEndMask)));
+  std::vector<int64_t> ellipsis_pos = TenToTwo(GetValue<int64_t>(primitive->GetAttr(kEllipsisMask)));
+  std::vector<int64_t> new_axis_pos = TenToTwo(GetValue<int64_t>(primitive->GetAttr(kNewAxisMask)));
+  std::vector<int64_t> shrink_axis_pos = TenToTwo(GetValue<int64_t>(primitive->GetAttr(kShrinkAxisMask)));
   (void)CheckAndConvertUtils::CheckInteger("infer", SizeToLong(new_axis_pos.size()), kGreaterEqual,
                                            SizeToLong(slice_len), primitive->name());
 
@@ -105,10 +120,12 @@ const std::vector<int64_t> CheckAndGetValidStrides(const AbstractBasePtr &stride
 
 std::vector<int64_t> ComputeInferShape(const PrimitivePtr &primitive, const std::vector<int64_t> &begin_v,
                                        const std::vector<int64_t> &end_v, const std::vector<int64_t> &x_shape,
-                                       const std::vector<int64_t> &strides_v, const std::vector<int64_t> &begin_pos,
-                                       const std::vector<int64_t> &shrink_axis_pos, const std::vector<int64_t> &end_pos,
-                                       const std::vector<int64_t> &new_axis_pos,
-                                       const std::vector<int64_t> &ellipsis_pos) {
+                                       const std::vector<int64_t> &strides_v) {
+  std::vector<int64_t> begin_pos = TenToTwo(GetValue<int64_t>(primitive->GetAttr(kBeginMask)));
+  std::vector<int64_t> end_pos = TenToTwo(GetValue<int64_t>(primitive->GetAttr(kEndMask)));
+  std::vector<int64_t> ellipsis_pos = TenToTwo(GetValue<int64_t>(primitive->GetAttr(kEllipsisMask)));
+  std::vector<int64_t> new_axis_pos = TenToTwo(GetValue<int64_t>(primitive->GetAttr(kNewAxisMask)));
+  std::vector<int64_t> shrink_axis_pos = TenToTwo(GetValue<int64_t>(primitive->GetAttr(kShrinkAxisMask)));
   size_t i = 0;
   size_t j = 0;
   int64_t start;
@@ -171,8 +188,6 @@ std::vector<int64_t> ComputeInferShape(const PrimitivePtr &primitive, const std:
 abstract::ShapePtr StridedSliceInferShape(const PrimitivePtr &primitive,
                                           const std::vector<AbstractBasePtr> &input_args) {
   MS_EXCEPTION_IF_NULL(primitive);
-  auto strided_slice_prim = primitive->cast<PrimStridedSlicePtr>();
-  MS_EXCEPTION_IF_NULL(strided_slice_prim);
   auto tuple_begin_v = input_args[1]->cast<abstract::AbstractTuplePtr>();
   MS_EXCEPTION_IF_NULL(tuple_begin_v);
   auto temp_begin_v = tuple_begin_v->BuildValue();
@@ -189,20 +204,12 @@ abstract::ShapePtr StridedSliceInferShape(const PrimitivePtr &primitive,
   auto x_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape];
   auto min_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kMinShape];
   auto max_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kMaxShape];
-  std::vector<int64_t> begin_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_begin_mask());
-  std::vector<int64_t> end_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_end_mask());
-  std::vector<int64_t> ellipsis_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_ellipsis_mask());
-  std::vector<int64_t> new_axis_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_new_axis_mask());
-  std::vector<int64_t> shrink_axis_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_shrink_axis_mask());
-  auto ret_in_shape = ComputeInferShape(primitive, begin_v, end_v, x_shape, strides_v, begin_pos, shrink_axis_pos,
-                                        end_pos, new_axis_pos, ellipsis_pos);
+  auto ret_in_shape = ComputeInferShape(primitive, begin_v, end_v, x_shape, strides_v);
   if (min_shape.empty() || max_shape.empty()) {
     return std::make_shared<abstract::Shape>(ret_in_shape);
   }
-  auto ret_min_shape = ComputeInferShape(primitive, begin_v, end_v, min_shape, strides_v, begin_pos, shrink_axis_pos,
-                                         end_pos, new_axis_pos, ellipsis_pos);
-  auto ret_max_shape = ComputeInferShape(primitive, begin_v, end_v, max_shape, strides_v, begin_pos, shrink_axis_pos,
-                                         end_pos, new_axis_pos, ellipsis_pos);
+  auto ret_min_shape = ComputeInferShape(primitive, begin_v, end_v, min_shape, strides_v);
+  auto ret_max_shape = ComputeInferShape(primitive, begin_v, end_v, max_shape, strides_v);
   return std::make_shared<abstract::Shape>(ret_in_shape, ret_min_shape, ret_max_shape);
 }
 
@@ -267,20 +274,6 @@ void StridedSlice::Init(const int64_t begin_mask, const int64_t end_mask, const
   this->set_shrink_axis_mask(shrink_axis_mask);
 }
 
-std::vector<int64_t> StridedSlice::TenToTwo(int64_t num) {
-  std::vector<int64_t> output;
-  if (num == 0) {
-    output.push_back(0);
-    return output;
-  }
-  while (num) {
-    output.push_back(num % 2);
-    num /= 2;
-  }
-
-  return output;
-}
-
 int64_t StridedSlice::compute_slicing_length(int64_t start_pos, int64_t end_pos, int64_t strides, int64_t x_dim) const {
   int64_t slicing_length = 0;
   if (strides > 0) {
diff --git a/mindspore/core/ops/topk.cc b/mindspore/core/ops/topk.cc
index e861dc7d4e5..c1fa50e0c62 100644
--- a/mindspore/core/ops/topk.cc
+++ b/mindspore/core/ops/topk.cc
@@ -35,6 +35,9 @@ AbstractBasePtr TopKInfer(const abstract::AnalysisEnginePtr &, const PrimitivePt
   (void)CheckAndConvertUtils::CheckInteger("top_k_infer", SizeToLong(input_args.size()), kEqual, 2, prim_name);
 
   // Infer dtype
+  for (const auto &item : input_args) {
+    MS_EXCEPTION_IF_NULL(item);
+  }
   auto output1_type = kInt32;
   const std::set<TypePtr> valid_types = {kFloat16, kFloat32};
   auto output0_type =
diff --git a/mindspore/core/ops/unpack.cc b/mindspore/core/ops/unpack.cc
index 2a7a19a7667..faf02a802be 100644
--- a/mindspore/core/ops/unpack.cc
+++ b/mindspore/core/ops/unpack.cc
@@ -26,6 +26,7 @@ AbstractBasePtr UnpackInfer(const abstract::AnalysisEnginePtr &, const Primitive
                             const std::vector<AbstractBasePtr> &input_args) {
   MS_EXCEPTION_IF_NULL(primitive);
   auto prim_name = primitive->name();
+  MS_EXCEPTION_IF_NULL(input_args[0]);
   CheckAndConvertUtils::CheckSubClass("x", input_args[0]->BuildType(), {TypeIdToType(kObjectTypeTensorType)},
                                       prim_name);
   auto x_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape];
diff --git a/mindspore/core/ops/unsorted_segment_sum.cc b/mindspore/core/ops/unsorted_segment_sum.cc
index 8a6f914fd54..8b84c30759b 100644
--- a/mindspore/core/ops/unsorted_segment_sum.cc
+++ b/mindspore/core/ops/unsorted_segment_sum.cc
@@ -31,6 +31,9 @@ AbstractBasePtr UnsortedSegmentSumInfer(const abstract::AnalysisEnginePtr &, con
   auto prim_name = primitive->name();
 
   // Infer type
+  for (const auto &item : input_args) {
+    MS_EXCEPTION_IF_NULL(item);
+  }
   auto x_type = input_args[0]->BuildType()->cast<TensorTypePtr>()->element();
   // Infer shape
   auto x_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape];
diff --git a/mindspore/core/ops/unstack.cc b/mindspore/core/ops/unstack.cc
index 29f4a8eca2d..01159252aef 100644
--- a/mindspore/core/ops/unstack.cc
+++ b/mindspore/core/ops/unstack.cc
@@ -25,6 +25,7 @@ AbstractBasePtr UnstackInfer(const abstract::AnalysisEnginePtr &, const Primitiv
                              const std::vector<AbstractBasePtr> &input_args) {
   MS_EXCEPTION_IF_NULL(primitive);
   auto prim_name = primitive->name();
+  MS_EXCEPTION_IF_NULL(input_args[0]);
   auto x_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape];
   int64_t dim = x_shape.size();
   int64_t axis = GetValue<int64_t>(primitive->GetAttr(kAxis));
diff --git a/mindspore/core/proto/mind_ir.proto b/mindspore/core/proto/mind_ir.proto
index cd6182b9e15..8d9c9ecc434 100644
--- a/mindspore/core/proto/mind_ir.proto
+++ b/mindspore/core/proto/mind_ir.proto
@@ -23,6 +23,9 @@ message AttributeProto {
     TENSOR = 17;
     GRAPH = 18;
     TENSORS = 19;
+    TUPLE = 20;        // tuple
+    LIST = 21;         // list
+    DICT = 22;         // dictionary
   }
   optional string name = 1;
   optional float f = 2;
@@ -40,6 +43,8 @@ message AttributeProto {
   optional string doc_string = 14;
   optional string ref_attr_name = 15;
   optional AttributeType type = 16;
+  repeated AttributeProto values = 17;          // tuple, list,dict of value
+  optional AttributeType type_val = 18;         // type type info
 }
 
 
@@ -70,6 +75,7 @@ message ModelProto {
   optional string model_version = 5;
   optional string doc_string = 6;
   optional GraphProto graph = 7;
+  repeated GraphProto functions = 8; // all the graphs without the main graph.
 }
 
 
diff --git a/mindspore/core/utils/check_convert_utils.cc b/mindspore/core/utils/check_convert_utils.cc
index e5553cf2ab1..6be7796aebc 100644
--- a/mindspore/core/utils/check_convert_utils.cc
+++ b/mindspore/core/utils/check_convert_utils.cc
@@ -175,6 +175,21 @@ void CheckAndConvertUtils::GetPadModEnumValue(const ValuePtr &value, int64_t *en
   }
 }
 
+void CheckAndConvertUtils::GetReductionEnumValue(const ValuePtr &value, int64_t *enum_value) {
+  MS_EXCEPTION_IF_NULL(value);
+  if (value->isa<StringImm>()) {
+    auto attr_value_str = GetValue<std::string>(value);
+
+    std::map<std::string, int64_t> pad_map = ReductionToEnumMap;
+    if (pad_map.find(attr_value_str) == pad_map.end()) {
+      MS_LOG(EXCEPTION) << "Invalid pad mode " << attr_value_str << " use pad, valid or same";
+    }
+    *enum_value = pad_map[attr_value_str];
+  } else {
+    *enum_value = GetValue<int64_t>(value);
+  }
+}
+
 AttrConverterPair CheckAndConvertUtils::GetAttrConvertPair(const std::string &op_type, const std::string &attr_name) {
   AttrConverterPair attr_pair;
   if (op_type.empty() || attr_name.empty()) {
diff --git a/mindspore/core/utils/check_convert_utils.h b/mindspore/core/utils/check_convert_utils.h
index ac7aa08a8c0..6e8820c0402 100644
--- a/mindspore/core/utils/check_convert_utils.h
+++ b/mindspore/core/utils/check_convert_utils.h
@@ -297,6 +297,7 @@ class CheckAndConvertUtils {
   static AttrConverterPair GetAttrConvertPair(const std::string &op_type, const std::string &attr_name);
   static bool GetDataFormatEnumValue(const ValuePtr &value, int64_t *enum_value);
   static void GetPadModEnumValue(const ValuePtr &value, int64_t *enum_value, bool is_upper = false);
+  static void GetReductionEnumValue(const ValuePtr &value, int64_t *enum_value);
   static bool CheckIrAttrtoOpAttr(const std::string &op_type, const std::string &attr_name, ValuePtr *const value);
   static void CheckSummaryParam(const AbstractBasePtr &name, const AbstractBasePtr &value,
                                 const std::string &class_name);
diff --git a/mindspore/core/utils/log_adapter.cc b/mindspore/core/utils/log_adapter.cc
index 7358cadbbe5..1bd1c7888fb 100644
--- a/mindspore/core/utils/log_adapter.cc
+++ b/mindspore/core/utils/log_adapter.cc
@@ -437,7 +437,9 @@ void common_log_init(void) {
   if (logtostderr.empty()) {
     FLAGS_logtostderr = true;
   } else if (logtostderr == "0" && mindspore::GetEnv("GLOG_log_dir").empty()) {
-    MS_LOG(EXCEPTION) << "`GLOG_log_dir` is empty, it must be set while 'logtostderr' equals to 0.";
+    MS_LOG(ERROR) << "`GLOG_log_dir` is empty, it must be set while 'logtostderr' equals to 0.";
+    // Here can not throw exception and use python to catch, because the PYBIND11_MODULE is not yet been initialed.
+    exit(EXIT_FAILURE);
   }
 
   // default GLOG_stderrthreshold level to WARNING
diff --git a/mindspore/core/utils/parallel_node_check.cc b/mindspore/core/utils/parallel_node_check.cc
index 2259be72856..85a077918c9 100644
--- a/mindspore/core/utils/parallel_node_check.cc
+++ b/mindspore/core/utils/parallel_node_check.cc
@@ -30,7 +30,7 @@ static const std::set<std::string> PARALLEL_BLACK_LIST_ = {prim::kTupleGetItem,
   "get_ref_value", "get_ref_origin", "dot", "im2col", "col2im", "im2col_v1", "state_setitem", "ScalarSummary",
   "ImageSummary", "TensorSummary", "Debug", "HistogramSummary", "col2im_v1", "resolve", "BroadcastGradientArgs",
   "InvertPermutation", "DropoutGenMask", "embed", "create_instance", "RefToEmbed",
-  "stop_gradient", "UpdateState", "Load", "Switch"};
+  "stop_gradient", "UpdateState", "Load", "Switch", "Print"};
 static const std::set<PrimitivePtr> ALLGATHER_NODE_LIST_ = {prim::kPrimAllGather, prim::kPrimMiniStepAllGather,
                                                             prim::kPrimMicroStepAllGather};
 static const std::set<PrimitivePtr> TRIVIAL_NODE_LIST_ = {prim::kPrimCast, prim::kPrimDepend};
diff --git a/mindspore/core/utils/trace_info.h b/mindspore/core/utils/trace_info.h
index e9b29c7b478..22f7252d141 100644
--- a/mindspore/core/utils/trace_info.h
+++ b/mindspore/core/utils/trace_info.h
@@ -430,6 +430,14 @@ class TraceOpt : public TraceInfo {
   ~TraceOpt() override = default;
   TraceInfoPtr clone() override { return std::make_shared<TraceOpt>(*shared_from_base<TraceOpt>()); }
 };
+
+class TraceListComp : public TraceInfo {
+ public:
+  explicit TraceListComp(const DebugInfoPtr &info) : TraceInfo(info, "ListComp", "G-") {}
+  MS_DECLARE_PARENT(TraceListComp, TraceInfo);
+  ~TraceListComp() override = default;
+  TraceInfoPtr clone() override { return std::make_shared<TraceListComp>(*shared_from_base<TraceListComp>()); }
+};
 }  // namespace mindspore
 
 #endif  // MINDSPORE_CORE_UTILS_TRACE_INFO_H_
diff --git a/mindspore/dataset/audio/transforms.py b/mindspore/dataset/audio/transforms.py
index f6f97ac0e95..aff46d944f4 100644
--- a/mindspore/dataset/audio/transforms.py
+++ b/mindspore/dataset/audio/transforms.py
@@ -20,7 +20,9 @@ to improve their training models.
 import mindspore._c_dataengine as cde
 import numpy as np
 from ..transforms.c_transforms import TensorOperation
-from .validators import check_band_biquad
+from .utils import ScaleType
+from .validators import check_allpass_biquad, check_amplitude_to_db, check_band_biquad, check_bandpass_biquad, \
+    check_bandreject_biquad, check_bass_biquad, check_time_stretch
 
 
 class AudioTensorOperation(TensorOperation):
@@ -40,6 +42,94 @@ class AudioTensorOperation(TensorOperation):
             "AudioTensorOperation has to implement parse() method.")
 
 
+class AllpassBiquad(AudioTensorOperation):
+    """
+    Design two-pole all-pass filter for audio waveform of dimension of `(..., time)`
+
+        Args:
+            sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz),
+                the value must be greater than 0 .
+            central_freq (float): central frequency (in Hz),
+                the value must be greater than 0 .
+            Q(float, optional): Quality factor,https://en.wikipedia.org/wiki/Q_factor,
+                Range: (0, 1] (Default=0.707).
+
+        Examples:
+            >>> import mindspore.dataset.audio.transforms as audio
+            >>> import numpy as np
+
+            >>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03],[9.246826171875e-03, 1.0894775390625e-02]])
+            >>> allpasspass_biquad_op = audio.AllpassBiquad(44100, 200.0)
+            >>> waveform_filtered = allpass_biquad_op(waveform)
+
+        References:
+            https://www.w3.org/2011/audio/audio-eq-cookbook.html#APF
+    """
+    @check_allpass_biquad
+    def __init__(self, sample_rate, central_freq, Q=0.707):
+        self.sample_rate = sample_rate
+        self.central_freq = central_freq
+        self.Q = Q
+
+    def parse(self):
+        return cde.AllpassBiquadOperation(self.sample_rate, self.central_freq, self.Q)
+
+
+DE_C_SCALETYPE_TYPE = {ScaleType.MAGNITUDE: cde.ScaleType.DE_SCALETYPE_MAGNITUDE,
+                       ScaleType.POWER: cde.ScaleType.DE_SCALETYPE_POWER}
+
+
+class AmplitudeToDB(AudioTensorOperation):
+    """
+    Converts the input tensor from amplitude/power scale to decibel scale.
+
+    Args:
+        stype (ScaleType, optional): Scale of the input tensor. (Default="ScaleType.POWER").
+        It can be any of [ScaleType.MAGNITUDE, ScaleType.POWER].
+        ref_value (float, optional): Param for generate db_multiplier.
+        amin (float, optional): Lower bound to clamp the input waveform.
+        top_db (float, optional): Minimum cut-off decibels. The range of values is non-negative. Commonly set at 80.
+            (Default=80.0)
+    Examples:
+        >>> channel = 1
+        >>> n_fft = 400
+        >>> n_frame = 30
+        >>> specrogram = np.random.random([channel, n_fft//2+1, n_frame])
+        >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=specrogram, column_names=["audio"])
+        >>> transforms = [audio.AmplitudeToDB(stype=ScaleType.POWER)]
+        >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"])
+    """
+
+    @ check_amplitude_to_db
+    def __init__(self, stype=ScaleType.POWER, ref_value=1.0, amin=1e-10, top_db=80.0):
+        self.stype = stype
+        self.ref_value = ref_value
+        self.amin = amin
+        self.top_db = top_db
+
+    def parse(self):
+        return cde.AmplitudeToDBOperation(DE_C_SCALETYPE_TYPE[self.stype], self.ref_value, self.amin, self.top_db)
+
+
+class Angle(AudioTensorOperation):
+    """
+    Calculate the angle of the complex number sequence of shape (..., 2).
+    The first dimension represents the real part while the second represents the imaginary.
+    Args:
+
+    Examples:
+        >>> import mindspore.dataset.audio.transforms as audio
+        >>> import numpy as np
+
+        >>> input_complex = np.array([[1.43, 5.434], [23.54, 89.38]])
+        >>> angle_op = audio.Angle()
+        >>> angles = angle_op(input_complex)
+    """
+
+    def parse(self):
+        return cde.AngleOperation()
+
+
 class BandBiquad(AudioTensorOperation):
     """
     Design two-pole band filter for audio waveform of dimension of `(..., time)`
@@ -69,3 +159,126 @@ class BandBiquad(AudioTensorOperation):
 
     def parse(self):
         return cde.BandBiquadOperation(self.sample_rate, self.central_freq, self.Q, self.noise)
+
+
+class BandpassBiquad(TensorOperation):
+    """
+    Design two-pole band-pass filter.  Similar to SoX implementation.
+
+    Args:
+        sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz)
+        central_freq (float): central frequency (in Hz)
+        Q (float, optional): https://en.wikipedia.org/wiki/Q_factor Range: (0,1] (Default=0.707).
+        const_skirt_gain (bool, optional) : If ``True``, uses a constant skirt gain (peak gain = Q).
+            If ``False``, uses a constant 0dB peak gain. (Default: ``False``)
+
+    Examples:
+        >>> import mindspore.dataset.audio.transforms as audio
+        >>> import numpy as np
+
+        >>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03],[9.246826171875e-03, 1.0894775390625e-02]])
+        >>> bandpass_biquad_op = audio.BandpassBiquad(44100, 200.0)
+        >>> waveform_filtered = bandpass_biquad_op(waveform)
+    """
+    @check_bandpass_biquad
+    def __init__(self, sample_rate, central_freq, Q=0.707, const_skirt_gain=False):
+        self.sample_rate = sample_rate
+        self.central_freq = central_freq
+        self.Q = Q
+        self.const_skirt_gain = const_skirt_gain
+
+    def parse(self):
+        return cde.BandpassBiquadOperation(self.sample_rate, self.central_freq, self.Q, self.const_skirt_gain)
+
+
+class BandrejectBiquad(AudioTensorOperation):
+    """
+    Design two-pole band filter for audio waveform of dimension of `(..., time)`
+
+    Args:
+        sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz),
+            the value must be greater than 0 .
+        central_freq (float): central frequency (in Hz),
+            the value must be greater than 0 .
+        Q(float, optional): Quality factor,https://en.wikipedia.org/wiki/Q_factor,
+            Range: (0, 1] (Default=0.707).
+
+    Examples:
+        >>> import mindspore.dataset.audio.transforms as audio
+        >>> import numpy as np
+
+        >>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03],[9.246826171875e-03, 1.0894775390625e-02]])
+        >>> band_biquad_op = audio.BandBiquad(44100, 200.0)
+        >>> waveform_filtered = band_biquad_op(waveform)
+    """
+
+    @check_bandreject_biquad
+    def __init__(self, sample_rate, central_freq, Q=0.707):
+        self.sample_rate = sample_rate
+        self.central_freq = central_freq
+        self.Q = Q
+
+    def parse(self):
+        return cde.BandrejectBiquadOperation(self.sample_rate, self.central_freq, self.Q)
+
+
+class BassBiquad(AudioTensorOperation):
+    """
+    Design a bass tone-control effect for audio waveform of dimension of `(..., time)`
+
+    Args:
+        sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz)
+        gain (float): desired gain at the boost (or attenuation) in dB.
+        central_freq (float): central frequency (in Hz)(Default=100.0).
+        Q(float, optional): Quality factor, https://en.wikipedia.org/wiki/Q_factor, Range: (0, 1] (Default=0.707).
+
+    Examples:
+        >>> import mindspore.dataset.audio.transforms as audio
+        >>> import numpy as np
+
+        >>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03],[9.246826171875e-03, 1.0894775390625e-02]])
+        >>> bass_biquad_op = audio.BassBiquad(44100, 100.0)
+        >>> waveform_filtered = bass_biquad_op(waveform)
+    """
+    @check_bass_biquad
+    def __init__(self, sample_rate, gain, central_freq=100.0, Q=0.707):
+        self.sample_rate = sample_rate
+        self.gain = gain
+        self.central_freq = central_freq
+        self.Q = Q
+
+    def parse(self):
+        return cde.BassBiquadOperation(self.sample_rate, self.gain, self.central_freq, self.Q)
+
+
+class TimeStretch(AudioTensorOperation):
+    """
+    Stretch STFT in time at a given rate, without changing the pitch.
+
+    Args:
+        hop_length (int, optional): Length of hop between STFT windows (default=None).
+        n_freq (int, optional): Number of filter banks form STFT (default=201).
+        fixed_rate (float, optional): Rate to speed up or slow down the input in time (default=None).
+
+    Examples:
+        >>> freq = 44100
+        >>> num_frame = 30
+        >>> def gen():
+        ...     np.random.seed(0)
+        ...     data =  np.random.random([freq, num_frame])
+        ...     yield (np.array(data, dtype=np.float32), )
+        >>> data1 = ds.GeneratorDataset(source=gen, column_names=["multi_dimensional_data"])
+        >>> transforms = [py_audio.TimeStretch()]
+        >>> data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
+    """
+    @check_time_stretch
+    def __init__(self, hop_length=None, n_freq=201, fixed_rate=None):
+        self.n_freq = n_freq
+        self.fixed_rate = fixed_rate
+
+        n_fft = (n_freq - 1) * 2
+        self.hop_length = hop_length if hop_length is not None else n_fft // 2
+        self.fixed_rate = fixed_rate if fixed_rate is not None else np.nan
+
+    def parse(self):
+        return cde.TimeStretchOperation(self.hop_length, self.n_freq, self.fixed_rate)
diff --git a/mindspore/dataset/audio/utils.py b/mindspore/dataset/audio/utils.py
index 3b1f42579eb..1bf00f2da0d 100644
--- a/mindspore/dataset/audio/utils.py
+++ b/mindspore/dataset/audio/utils.py
@@ -11,11 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# ==============================================================================
 """
 enum for audio ops
 """
-
 from enum import Enum
 
 
diff --git a/mindspore/dataset/audio/validators.py b/mindspore/dataset/audio/validators.py
index da3d4b045f6..ad10b842b68 100644
--- a/mindspore/dataset/audio/validators.py
+++ b/mindspore/dataset/audio/validators.py
@@ -16,8 +16,41 @@
 Validators for TensorOps.
 """
 from functools import wraps
+from mindspore.dataset.core.validator_helpers import check_not_zero, check_int32, check_float32, check_value, \
+    check_value_normalize_std, check_value_ratio, FLOAT_MAX_INTEGER, INT64_MAX, parse_user_args, type_check
+from .utils import ScaleType
 
-from mindspore.dataset.core.validator_helpers import check_not_zero, check_int32, check_float32, check_value_normalize_std, parse_user_args, type_check
+
+def check_amplitude_to_db(method):
+    """Wrapper method to check the parameters of amplitude_to_db."""
+
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [stype, ref_value, amin, top_db], _ = parse_user_args(method, *args, **kwargs)
+
+        # type check stype
+        type_check(stype, (ScaleType,), "stype")
+
+        # type check ref_value
+        type_check(ref_value, (int, float), "ref_value")
+        # value check ref_value
+        if not ref_value is None:
+            check_value_ratio(ref_value, (0, FLOAT_MAX_INTEGER), "ref_value")
+
+        # type check amin
+        type_check(amin, (int, float), "amin")
+        # value check amin
+        if not amin is None:
+            check_value_ratio(amin, (0, FLOAT_MAX_INTEGER), "amin")
+
+        # type check top_db
+        type_check(top_db, (int, float), "top_db")
+        # value check top_db
+        if not top_db is None:
+            check_value_ratio(top_db, (0, FLOAT_MAX_INTEGER), "top_db")
+
+        return method(self, *args, **kwargs)
+    return new_method
 
 
 def check_biquad_sample_rate(sample_rate):
@@ -44,6 +77,17 @@ def check_biquad_noise(noise):
     type_check(noise, (bool,), "noise")
 
 
+def check_biquad_const_skirt_gain(const_skirt_gain):
+    """Wrapper method to check the parameters of const_skirt_gain."""
+    type_check(const_skirt_gain, (bool,), "const_skirt_gain")
+
+
+def check_biquad_gain(gain):
+    """Wrapper method to check the parameters of gain."""
+    type_check(gain, (float, int), "gain")
+    check_float32(gain, "gain")
+
+
 def check_band_biquad(method):
     """Wrapper method to check the parameters of BandBiquad."""
 
@@ -58,3 +102,87 @@ def check_band_biquad(method):
         return method(self, *args, **kwargs)
 
     return new_method
+
+
+def check_allpass_biquad(method):
+    """Wrapper method to check the parameters of CutMixBatch."""
+
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [sample_rate, central_freq, Q], _ = parse_user_args(
+            method, *args, **kwargs)
+        check_biquad_sample_rate(sample_rate)
+        check_biquad_central_freq(central_freq)
+        check_biquad_Q(Q)
+        return method(self, *args, **kwargs)
+
+    return new_method
+
+
+def check_bandpass_biquad(method):
+    """Wrapper method to check the parameters of BandpassBiquad."""
+
+    @ wraps(method)
+    def new_method(self, *args, **kwargs):
+        [sample_rate, central_freq, Q, const_skirt_gain], _ = parse_user_args(
+            method, *args, **kwargs)
+        check_biquad_sample_rate(sample_rate)
+        check_biquad_central_freq(central_freq)
+        check_biquad_Q(Q)
+        check_biquad_const_skirt_gain(const_skirt_gain)
+        return method(self, *args, **kwargs)
+
+    return new_method
+
+
+def check_bandreject_biquad(method):
+    """Wrapper method to check the parameters of BandrejectBiquad."""
+
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [sample_rate, central_freq, Q], _ = parse_user_args(
+            method, *args, **kwargs)
+        check_biquad_sample_rate(sample_rate)
+        check_biquad_central_freq(central_freq)
+        check_biquad_Q(Q)
+        return method(self, *args, **kwargs)
+
+    return new_method
+
+
+def check_bass_biquad(method):
+    """Wrapper method to check the parameters of CutMixBatch."""
+
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [sample_rate, gain, central_freq, Q], _ = parse_user_args(
+            method, *args, **kwargs)
+        check_biquad_sample_rate(sample_rate)
+        check_biquad_gain(gain)
+        check_biquad_central_freq(central_freq)
+        check_biquad_Q(Q)
+        return method(self, *args, **kwargs)
+
+    return new_method
+
+
+def check_time_stretch(method):
+    """Wrapper method to check the parameters of time_stretch."""
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [hop_length, n_freq, fixed_rate], _ = parse_user_args(method, *args, **kwargs)
+        # type check
+        type_check(hop_length, (int, type(None)), "hop_length")
+        type_check(n_freq, (int,), "n_freq")
+        type_check(fixed_rate, (int, float, type(None)), "fixed_rate")
+
+        # value check
+        if hop_length is not None:
+            check_value(hop_length, (1, INT64_MAX), "hop_length")
+        check_value(n_freq, (1, INT64_MAX), "n_freq")
+        if fixed_rate is not None:
+            check_value_ratio(fixed_rate, (0, FLOAT_MAX_INTEGER), "fixed_rate")
+
+        return method(self, *args, **kwargs)
+
+    return new_method
diff --git a/mindspore/dataset/core/validator_helpers.py b/mindspore/dataset/core/validator_helpers.py
index 55591b9d9d2..7ca1696e857 100644
--- a/mindspore/dataset/core/validator_helpers.py
+++ b/mindspore/dataset/core/validator_helpers.py
@@ -210,6 +210,11 @@ def check_2tuple(value, arg_name=""):
         raise ValueError("Value {0} needs to be a 2-tuple.".format(arg_name))
 
 
+def check_int32(value, arg_name=""):
+    type_check(value, (int,), arg_name)
+    check_value(value, [INT32_MIN, INT32_MAX], arg_name)
+
+
 def check_uint8(value, arg_name=""):
     """
     Validates the value of a variable is within the range of uint8.
@@ -246,11 +251,6 @@ def check_pos_uint32(value, arg_name=""):
     check_value(value, [POS_INT_MIN, UINT32_MAX])
 
 
-def check_int32(value, arg_name=""):
-    type_check(value, (int,), arg_name)
-    check_value(value, [INT32_MIN, INT32_MAX], arg_name)
-
-
 def check_pos_int32(value, arg_name=""):
     """
     Validates the value of a variable is within the range of int32.
@@ -482,8 +482,6 @@ def check_filename(path):
     if filename.startswith(' ') or filename.endswith(' '):
         raise ValueError("filename should not start/end with space.")
 
-    return True
-
 
 def check_dir(dataset_dir):
     """
@@ -682,3 +680,4 @@ def check_c_tensor_op(param, param_name):
 def replace_none(value, default):
     """ replaces None with a default value."""
     return value if value is not None else default
+    
\ No newline at end of file
diff --git a/mindspore/dataset/engine/__init__.py b/mindspore/dataset/engine/__init__.py
index 51103dcd204..c445542f630 100644
--- a/mindspore/dataset/engine/__init__.py
+++ b/mindspore/dataset/engine/__init__.py
@@ -33,7 +33,7 @@ from .serializer_deserializer import compare, deserialize, serialize, show
 
 __all__ = ["CelebADataset", "Cifar100Dataset", "Cifar10Dataset", "CLUEDataset", "CocoDataset", "CSVDataset",
            "GeneratorDataset", "GraphData", "ImageFolderDataset", "ManifestDataset", "MindDataset", "MnistDataset",
-           "LibriSpeechDataset",
+           "CmuArcticDataset",
            "NumpySlicesDataset", "PaddedDataset", "TextFileDataset", "TFRecordDataset", "VOCDataset",
            "DistributedSampler", "PKSampler", "RandomSampler", "SequentialSampler", "SubsetRandomSampler",
            "WeightedRandomSampler", "SubsetSampler",
diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py
index 7168200e0ca..eab49e87cbd 100644
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@@ -58,7 +58,7 @@ from .queue import _SharedQueue
 from .validators import check_batch, check_shuffle, check_map, check_filter, check_repeat, check_skip, check_zip, \
     check_rename, check_numpyslicesdataset, check_device_send, check_take, check_project, check_imagefolderdataset, \
     check_mnist_cifar_dataset, check_manifestdataset, check_tfrecorddataset, check_vocdataset, check_cocodataset, \
-    check_celebadataset, check_minddataset,check_libri_speech_dataset, check_generatordataset, check_sync_wait, check_zip_dataset, \
+    check_celebadataset, check_minddataset,check_cmu_arctic_dataset, check_generatordataset, check_sync_wait, check_zip_dataset, \
     check_add_column, check_textfiledataset, check_concat, check_random_dataset, check_split, \
     check_bucket_batch_by_length, check_cluedataset, check_save, check_csvdataset, check_paddeddataset, \
     check_tuple_iterator, check_dict_iterator, check_schema, check_to_device_send
@@ -4369,19 +4369,19 @@ class Cifar10Dataset(MappableDataset):
         return cde.Cifar10Node(self.dataset_dir, self.usage, self.sampler)
 
 
-class LibriSpeechDataset(MappableDataset):
+class CmuArcticDataset(MappableDataset):
 
-    @check_libri_speech_dataset
+    @check_cmu_arctic_dataset
     def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None,
                  sampler=None, num_shards=None, shard_id=None, cache=None):
         super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
                          shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
 
         self.dataset_dir = dataset_dir
-        self.usage = replace_none(usage, "test-other")
+        self.usage = replace_none(usage, "aew")
 
     def parse(self, children=None):
-        return cde.LibriSpeechNode(self.dataset_dir, self.usage, self.sampler)
+        return cde.CmuArcticNode(self.dataset_dir, self.usage, self.sampler)
 
 class Cifar100Dataset(MappableDataset):
     """
diff --git a/mindspore/dataset/engine/serializer_deserializer.py b/mindspore/dataset/engine/serializer_deserializer.py
index deacd6e2408..0ec39085a28 100644
--- a/mindspore/dataset/engine/serializer_deserializer.py
+++ b/mindspore/dataset/engine/serializer_deserializer.py
@@ -17,12 +17,9 @@ Functions to support dataset serialize and deserialize.
 """
 import json
 import os
-import sys
 
-import mindspore.common.dtype as mstype
 from mindspore import log as logger
 from . import datasets as de
-from ..vision.utils import Inter, Border, ImageBatchFormat
 
 
 def serialize(dataset, json_filepath=""):
@@ -87,15 +84,10 @@ def deserialize(input_dict=None, json_filepath=None):
     """
     data = None
     if input_dict:
-        data = construct_pipeline(input_dict)
+        data = de.DeserializedDataset(input_dict)
 
     if json_filepath:
-        dict_pipeline = dict()
-        real_file_path = os.path.realpath(json_filepath)
-        with open(real_file_path, 'r') as json_file:
-            dict_pipeline = json.load(json_file)
-            data = construct_pipeline(dict_pipeline)
-
+        data = de.DeserializedDataset(json_filepath)
     return data
 
 
@@ -146,341 +138,3 @@ def compare(pipeline1, pipeline2):
     """
 
     return pipeline1.to_json() == pipeline2.to_json()
-
-
-def construct_pipeline(node):
-    """Construct the Python Dataset objects by following the dictionary deserialized from JSON file."""
-    op_type = node.get('op_type')
-    if not op_type:
-        raise ValueError("op_type field in the json file can't be None.")
-
-    # Instantiate Python Dataset object based on the current dictionary element
-    dataset = create_node(node)
-    # Initially it is not connected to any other object.
-    dataset.children = []
-
-    # Construct the children too and add edge between the children and parent.
-    for child in node['children']:
-        dataset.children.append(construct_pipeline(child))
-
-    return dataset
-
-
-def create_node(node):
-    """Parse the key, value in the node dictionary and instantiate the Python Dataset object"""
-    logger.info('creating node: %s', node['op_type'])
-    dataset_op = node['op_type']
-    op_module = "mindspore.dataset"
-
-    # Get the Python class to be instantiated.
-    # Example:
-    #  "op_type": "MapDataset",
-    #  "op_module": "mindspore.dataset.datasets",
-    if node.get("children"):
-        pyclass = getattr(sys.modules[op_module], "Dataset")
-    else:
-        pyclass = getattr(sys.modules[op_module], dataset_op)
-
-    pyobj = None
-    # Find a matching Dataset class and call the constructor with the corresponding args.
-    # When a new Dataset class is introduced, another if clause and parsing code needs to be added.
-    # Dataset Source Ops (in alphabetical order)
-    pyobj = create_dataset_node(pyclass, node, dataset_op)
-    if not pyobj:
-        # Dataset Ops (in alphabetical order)
-        pyobj = create_dataset_operation_node(node, dataset_op)
-
-    return pyobj
-
-
-def create_dataset_node(pyclass, node, dataset_op):
-    """Parse the key, value in the dataset node dictionary and instantiate the Python Dataset object"""
-    pyobj = None
-    if dataset_op == 'CelebADataset':
-        sampler = construct_sampler(node.get('sampler'))
-        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
-        pyobj = pyclass(node['dataset_dir'], node.get('num_parallel_workers'), node.get('shuffle'), node.get('usage'),
-                        sampler, node.get('decode'), node.get('extensions'), num_samples, node.get('num_shards'),
-                        node.get('shard_id'))
-
-    elif dataset_op == 'Cifar10Dataset':
-        sampler = construct_sampler(node.get('sampler'))
-        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
-        pyobj = pyclass(node['dataset_dir'], node['usage'], num_samples, node.get('num_parallel_workers'),
-                        node.get('shuffle'), sampler, node.get('num_shards'), node.get('shard_id'))
-
-    elif dataset_op == 'Cifar100Dataset':
-        sampler = construct_sampler(node.get('sampler'))
-        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
-        pyobj = pyclass(node['dataset_dir'], node['usage'], num_samples, node.get('num_parallel_workers'),
-                        node.get('shuffle'), sampler, node.get('num_shards'), node.get('shard_id'))
-
-    elif dataset_op == 'ClueDataset':
-        shuffle = to_shuffle_mode(node.get('shuffle'))
-        if isinstance(shuffle, str):
-            shuffle = de.Shuffle(shuffle)
-        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
-        pyobj = pyclass(node['dataset_files'], node.get('task'),
-                        node.get('usage'), num_samples, node.get('num_parallel_workers'), shuffle,
-                        node.get('num_shards'), node.get('shard_id'))
-
-    elif dataset_op == 'CocoDataset':
-        sampler = construct_sampler(node.get('sampler'))
-        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
-        pyobj = pyclass(node['dataset_dir'], node.get('annotation_file'), node.get('task'), num_samples,
-                        node.get('num_parallel_workers'), node.get('shuffle'), node.get('decode'), sampler,
-                        node.get('num_shards'), node.get('shard_id'))
-
-    elif dataset_op == 'CSVDataset':
-        shuffle = to_shuffle_mode(node.get('shuffle'))
-        if isinstance(shuffle, str):
-            shuffle = de.Shuffle(shuffle)
-        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
-        pyobj = pyclass(node['dataset_files'], node.get('field_delim'),
-                        node.get('column_defaults'), node.get('column_names'), num_samples,
-                        node.get('num_parallel_workers'), shuffle,
-                        node.get('num_shards'), node.get('shard_id'))
-
-    elif dataset_op == 'ImageFolderDataset':
-        sampler = construct_sampler(node.get('sampler'))
-        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
-        pyobj = pyclass(node['dataset_dir'], num_samples, node.get('num_parallel_workers'),
-                        node.get('shuffle'), sampler, node.get('extensions'),
-                        node.get('class_indexing'), node.get('decode'), node.get('num_shards'),
-                        node.get('shard_id'))
-
-    elif dataset_op == 'ManifestDataset':
-        sampler = construct_sampler(node.get('sampler'))
-        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
-        pyobj = pyclass(node['dataset_file'], node['usage'], num_samples,
-                        node.get('num_parallel_workers'), node.get('shuffle'), sampler,
-                        node.get('class_indexing'), node.get('decode'), node.get('num_shards'),
-                        node.get('shard_id'))
-
-    elif dataset_op == 'MnistDataset':
-        sampler = construct_sampler(node.get('sampler'))
-        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
-        pyobj = pyclass(node['dataset_dir'], node['usage'], num_samples, node.get('num_parallel_workers'),
-                        node.get('shuffle'), sampler, node.get('num_shards'), node.get('shard_id'))
-
-    elif dataset_op == 'TextFileDataset':
-        shuffle = to_shuffle_mode(node.get('shuffle'))
-        if isinstance(shuffle, str):
-            shuffle = de.Shuffle(shuffle)
-        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
-        pyobj = pyclass(node['dataset_files'], num_samples,
-                        node.get('num_parallel_workers'), shuffle,
-                        node.get('num_shards'), node.get('shard_id'))
-
-    elif dataset_op == 'TFRecordDataset':
-        shuffle = to_shuffle_mode(node.get('shuffle'))
-        if isinstance(shuffle, str):
-            shuffle = de.Shuffle(shuffle)
-        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
-        pyobj = pyclass(node['dataset_files'], node.get('schema'), node.get('columns_list'),
-                        num_samples, node.get('num_parallel_workers'),
-                        shuffle, node.get('num_shards'), node.get('shard_id'))
-
-    elif dataset_op == 'VOCDataset':
-        sampler = construct_sampler(node.get('sampler'))
-        num_samples = check_and_replace_input(node.get('num_samples'), 0, None)
-        pyobj = pyclass(node['dataset_dir'], node.get('task'), node.get('usage'), node.get('class_indexing'),
-                        num_samples, node.get('num_parallel_workers'), node.get('shuffle'),
-                        node.get('decode'), sampler, node.get('num_shards'), node.get('shard_id'))
-
-    return pyobj
-
-
-def create_dataset_operation_node(node, dataset_op):
-    """Parse the key, value in the dataset operation node dictionary and instantiate the Python Dataset object"""
-    pyobj = None
-    if dataset_op == 'Batch':
-        pyobj = de.Dataset().batch(node['batch_size'], node.get('drop_remainder'))
-
-    elif dataset_op == 'Map':
-        tensor_ops = construct_tensor_ops(node.get('operations'))
-        pyobj = de.Dataset().map(tensor_ops, node.get('input_columns'), node.get('output_columns'),
-                                 node.get('column_order'), node.get('num_parallel_workers'),
-                                 False, None, node.get('callbacks'))
-
-    elif dataset_op == 'Project':
-        pyobj = de.Dataset().project(node['columns'])
-
-    elif dataset_op == 'Rename':
-        pyobj = de.Dataset().rename(node['input_columns'], node['output_columns'])
-
-    elif dataset_op == 'Repeat':
-        pyobj = de.Dataset().repeat(node.get('count'))
-
-    elif dataset_op == 'Shuffle':
-        pyobj = de.Dataset().shuffle(node.get('buffer_size'))
-
-    elif dataset_op == 'Skip':
-        pyobj = de.Dataset().skip(node.get('count'))
-
-    elif dataset_op == 'Take':
-        pyobj = de.Dataset().take(node.get('count'))
-
-    elif dataset_op == 'Transfer':
-        pyobj = de.Dataset().to_device(node.get('send_epoch_end'), node.get('create_data_info_queue'))
-
-    elif dataset_op == 'Zip':
-        # Create ZipDataset instance, giving dummy input dataset that will be overrode in the caller.
-        pyobj = de.ZipDataset((de.Dataset(), de.Dataset()))
-
-    else:
-        raise RuntimeError(dataset_op + " is not yet supported by ds.engine.deserialize().")
-
-    return pyobj
-
-
-def construct_sampler(in_sampler):
-    """Instantiate Sampler object based on the information from dictionary['sampler']"""
-    sampler = None
-    if in_sampler is not None:
-        if "num_samples" in in_sampler:
-            num_samples = check_and_replace_input(in_sampler['num_samples'], 0, None)
-        sampler_name = in_sampler['sampler_name']
-        sampler_module = "mindspore.dataset"
-        sampler_class = getattr(sys.modules[sampler_module], sampler_name)
-        if sampler_name == 'DistributedSampler':
-            sampler = sampler_class(in_sampler['num_shards'], in_sampler['shard_id'], in_sampler.get('shuffle'))
-        elif sampler_name == 'PKSampler':
-            sampler = sampler_class(in_sampler['num_val'], in_sampler.get('num_class'), in_sampler('shuffle'))
-        elif sampler_name == 'RandomSampler':
-            sampler = sampler_class(in_sampler.get('replacement'), num_samples)
-        elif sampler_name == 'SequentialSampler':
-            sampler = sampler_class(in_sampler.get('start_index'), num_samples)
-        elif sampler_name == 'SubsetRandomSampler':
-            sampler = sampler_class(in_sampler['indices'], num_samples)
-        elif sampler_name == 'WeightedRandomSampler':
-            sampler = sampler_class(in_sampler['weights'], num_samples, in_sampler.get('replacement'))
-        else:
-            raise ValueError("Sampler type is unknown: {}.".format(sampler_name))
-    if in_sampler.get("child_sampler"):
-        for child in in_sampler["child_sampler"]:
-            sampler.add_child(construct_sampler(child))
-
-    return sampler
-
-
-def construct_tensor_ops(operations):
-    """Instantiate tensor op object(s) based on the information from dictionary['operations']"""
-    result = []
-    for op in operations:
-        op_name = op.get('tensor_op_name')
-        op_params = op.get('tensor_op_params')
-
-        if op.get('is_python_front_end_op'):  # check if it's a py_transform op
-            raise NotImplementedError("python function is not yet supported by de.deserialize().")
-
-        if op_name == "HwcToChw":
-            op_name = "HWC2CHW"
-        if op_name == "UniformAug":
-            op_name = "UniformAugment"
-        op_module_vis = sys.modules["mindspore.dataset.vision.c_transforms"]
-        op_module_trans = sys.modules["mindspore.dataset.transforms.c_transforms"]
-
-        if hasattr(op_module_vis, op_name):
-            op_class = getattr(op_module_vis, op_name, None)
-        elif hasattr(op_module_trans, op_name):
-            op_class = getattr(op_module_trans, op_name, None)
-        else:
-            raise RuntimeError(op_name + " is not yet supported by deserialize().")
-
-        if op_params is None:  # If no parameter is specified, call it directly
-            result.append(op_class())
-        else:
-            # Input parameter type cast
-            for key, val in op_params.items():
-                if key in ['center', 'fill_value']:
-                    op_params[key] = tuple(val)
-                elif key in ['interpolation', 'resample']:
-                    op_params[key] = Inter(to_interpolation_mode(val))
-                elif key in ['padding_mode']:
-                    op_params[key] = Border(to_border_mode(val))
-                elif key in ['data_type']:
-                    op_params[key] = to_mstype(val)
-                elif key in ['image_batch_format']:
-                    op_params[key] = to_image_batch_format(val)
-                elif key in ['policy']:
-                    op_params[key] = to_policy(val)
-                elif key in ['transform', 'transforms']:
-                    op_params[key] = construct_tensor_ops(val)
-
-            result.append(op_class(**op_params))
-    return result
-
-
-def to_policy(op_list):
-    """ op_list to policy """
-    policy_tensor_ops = []
-    for policy_list in op_list:
-        sub_policy_tensor_ops = []
-        for policy_item in policy_list:
-            sub_policy_tensor_ops.append(
-                (construct_tensor_ops(policy_item.get('tensor_op')), policy_item.get('prob')))
-        policy_tensor_ops.append(sub_policy_tensor_ops)
-    return policy_tensor_ops
-
-
-def to_shuffle_mode(shuffle):
-    """ int to shuffle mode """
-    ret_val = False
-    if shuffle == 2:
-        ret_val = "global"
-    elif shuffle == 1:
-        ret_val = "files"
-    return ret_val
-
-
-def to_interpolation_mode(inter):
-    """ int to interpolation mode """
-    return {
-        0: Inter.LINEAR,
-        1: Inter.NEAREST,
-        2: Inter.CUBIC,
-        3: Inter.AREA
-    }[inter]
-
-
-def to_border_mode(border):
-    """ int to border mode """
-    return {
-        0: Border.CONSTANT,
-        1: Border.EDGE,
-        2: Border.REFLECT,
-        3: Border.SYMMETRIC
-    }[border]
-
-
-def to_mstype(data_type):
-    """ str to mstype """
-    return {
-        "bool": mstype.bool_,
-        "int8": mstype.int8,
-        "int16": mstype.int16,
-        "int32": mstype.int32,
-        "int64": mstype.int64,
-        "uint8": mstype.uint8,
-        "uint16": mstype.uint16,
-        "uint32": mstype.uint32,
-        "uint64": mstype.uint64,
-        "float16": mstype.float16,
-        "float32": mstype.float32,
-        "float64": mstype.float64,
-        "string": mstype.string
-    }[data_type]
-
-
-def to_image_batch_format(image_batch_format):
-    """ int to image batch format """
-    return {
-        0: ImageBatchFormat.NHWC,
-        1: ImageBatchFormat.NCHW
-    }[image_batch_format]
-
-
-def check_and_replace_input(input_value, expect, replace):
-    """ check and replace input arg """
-    return replace if input_value == expect else input_value
diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py
index 083bfa66f01..2db4b32ef16 100644
--- a/mindspore/dataset/engine/validators.py
+++ b/mindspore/dataset/engine/validators.py
@@ -92,8 +92,8 @@ def check_mnist_cifar_dataset(method):
     return new_method
 
 
-def check_libri_speech_dataset(method):
-    """A wrapper that wraps a parameter checker around the original LirbiSpeechDataset."""
+def check_cmu_arctic_dataset(method):
+    """A wrapper that wraps a parameter checker around the original CmuArcticDataset."""
 
     @wraps(method)
     def new_method(self, *args, **kwargs):
@@ -107,7 +107,7 @@ def check_libri_speech_dataset(method):
 
         usage = param_dict.get('usage')
         if usage is not None:
-            check_valid_str(usage, ['dev-clean', 'dev-other', 'test-clean','test-other', 'train-clean-100', 'train-clean-360','train-other-500'], "usage")
+            check_valid_str(usage, ['aew', 'ahw', 'aup', 'awb', 'axb', 'bdl', 'clb', 'eey', 'fem', 'gka', 'jmk', 'ksp', 'ljm', 'lnh', 'rms', 'rxr', 'slp' , 'slt'], "usage")
 
         validate_dataset_param_value(nreq_param_int, param_dict, int)
         validate_dataset_param_value(nreq_param_bool, param_dict, bool)
diff --git a/mindspore/dataset/vision/c_transforms.py b/mindspore/dataset/vision/c_transforms.py
index fd6e1a0c2a5..1f7fb720e82 100644
--- a/mindspore/dataset/vision/c_transforms.py
+++ b/mindspore/dataset/vision/c_transforms.py
@@ -54,7 +54,7 @@ from .validators import check_prob, check_crop, check_center_crop, check_resize_
     check_uniform_augment_cpp, \
     check_bounding_box_augment_cpp, check_random_select_subpolicy_op, check_auto_contrast, check_random_affine, \
     check_random_solarize, check_soft_dvpp_decode_random_crop_resize_jpeg, check_positive_degrees, FLOAT_MAX_INTEGER, \
-    check_cut_mix_batch_c, check_posterize, check_gaussian_blur, check_rotate, check_slice_patches
+    check_cut_mix_batch_c, check_posterize, check_gaussian_blur, check_rotate, check_slice_patches, check_adjust_gamma
 from ..transforms.c_transforms import TensorOperation
 
 
@@ -107,6 +107,37 @@ def parse_padding(padding):
     return padding
 
 
+class AdjustGamma(ImageTensorOperation):
+    r"""
+    Apply gamma correction on input image. Input image is expected to be in [..., H, W, C] or [H, W, C] format.
+    .. math::
+        I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma}
+
+    See `Gamma Correction`_ for more details.
+
+    .. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction
+
+    Args:
+        gamma (float): Non negative real number.
+            The output image pixel value is exponentially related to the input image pixel value.
+            gamma larger than 1 make the shadows darker,
+            while gamma smaller than 1 make dark regions lighter.
+        gain (float, optional): The constant multiplier (default=1).
+
+    Examples:
+        >>> transforms_list = [c_vision.Decode(), c_vision.AdjustGamma(gamma=10.0, gain=1.0)]
+        >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
+        ...                                                 input_columns=["image"])
+    """
+    @check_adjust_gamma
+    def __init__(self, gamma, gain=1):
+        self.gamma = gamma
+        self.gain = gain
+
+    def parse(self):
+        return cde.AdjustGammaOperation(self.gamma, self.gain)
+
+
 class AutoContrast(ImageTensorOperation):
     """
     Apply automatic contrast on input image. This operator calculates histogram of image, reassign cutoff percent
diff --git a/mindspore/dataset/vision/py_transforms.py b/mindspore/dataset/vision/py_transforms.py
index af0ae88bc8e..989d53c7a39 100644
--- a/mindspore/dataset/vision/py_transforms.py
+++ b/mindspore/dataset/vision/py_transforms.py
@@ -31,7 +31,8 @@ from .validators import check_prob, check_center_crop, check_five_crop, check_re
     check_normalize_py, check_normalizepad_py, check_random_crop, check_random_color_adjust, check_random_rotation, \
     check_ten_crop, check_num_channels, check_pad, check_rgb_to_hsv, check_hsv_to_rgb, \
     check_random_perspective, check_random_erasing, check_cutout, check_linear_transform, check_random_affine, \
-    check_mix_up, check_positive_degrees, check_uniform_augment_py, check_auto_contrast, check_rgb_to_bgr
+    check_mix_up, check_positive_degrees, check_uniform_augment_py, check_auto_contrast, check_rgb_to_bgr, \
+    check_adjust_gamma
 from .utils import Inter, Border
 from .py_transforms_util import is_pil
 
@@ -1375,7 +1376,6 @@ class RgbToBgr:
         return util.rgb_to_bgrs(rgb_imgs, self.is_hwc)
 
 
-
 class RgbToHsv:
     """
     Convert a NumPy RGB image or a batch of NumPy RGB images to HSV images.
@@ -1525,6 +1525,44 @@ class RandomSharpness:
         return util.random_sharpness(img, self.degrees)
 
 
+class AdjustGamma:
+    """
+    Adjust gamma of the input PIL image.
+
+    Args:
+        gamma (float): Non negative real number, same as gamma in the equation.
+        gain (float, optional): The constant multiplier.
+
+    Examples:
+        >>> from mindspore.dataset.transforms.py_transforms import Compose
+        >>> transforms_list = Compose([py_vision.Decode(),
+        ...                            py_vision.AdjustGamma(),
+        ...                            py_vision.ToTensor()])
+        >>> # apply the transform to dataset through map function
+        >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list,
+        ...                                                 input_columns="image")
+    """
+
+    @check_adjust_gamma
+    def __init__(self, gamma, gain=1.0):
+        self.gamma = gamma
+        self.gain = gain
+        self.random = False
+
+    def __call__(self, img):
+        """
+        Call method.
+
+        Args:
+            img (PIL image): Image to be augmented with AutoContrast.
+
+        Returns:
+            img (PIL image), Augmented image.
+        """
+
+        return util.adjust_gamma(img, self.gamma, self.gain)
+
+
 class AutoContrast:
     """
     Automatically maximize the contrast of the input PIL image.
diff --git a/mindspore/dataset/vision/py_transforms_util.py b/mindspore/dataset/vision/py_transforms_util.py
index 475a4bab9bc..48ed3457837 100644
--- a/mindspore/dataset/vision/py_transforms_util.py
+++ b/mindspore/dataset/vision/py_transforms_util.py
@@ -19,7 +19,6 @@ import math
 import numbers
 import random
 import colorsys
-
 import numpy as np
 from PIL import Image, ImageOps, ImageEnhance, __version__
 
@@ -1243,6 +1242,7 @@ def rgb_to_bgr(np_rgb_img, is_hwc):
         np_bgr_img = np_rgb_img[::-1, :, :]
     return np_bgr_img
 
+
 def rgb_to_bgrs(np_rgb_imgs, is_hwc):
     """
     Convert RGB imgs to BGR imgs.
@@ -1473,6 +1473,32 @@ def random_sharpness(img, degrees):
     return ImageEnhance.Sharpness(img).enhance(v)
 
 
+def adjust_gamma(img, gamma, gain):
+    """
+    Adjust gamma of the input PIL image.
+
+    Args:
+        img (PIL image): Image to be augmented with AdjustGamma.
+        gamma (float): Non negative real number, same as gamma in the equation.
+        gain (float, optional): The constant multiplier.
+
+    Returns:
+        img (PIL image), Augmented image.
+
+    """
+
+    if not is_pil(img):
+        raise TypeError("img should be PIL image. Got {}.".format(type(img)))
+
+    gamma_table = [(255 + 1 - 1e-3) * gain * pow(x / 255., gamma) for x in range(256)]
+    if len(img.split()) == 3:
+        gamma_table = gamma_table * 3
+        img = img.point(gamma_table)
+    elif len(img.split()) == 1:
+        img = img.point(gamma_table)
+    return img
+
+
 def auto_contrast(img, cutoff, ignore):
     """
     Automatically maximize the contrast of the input PIL image.
diff --git a/mindspore/dataset/vision/validators.py b/mindspore/dataset/vision/validators.py
index baecbabce73..d8d7b84385b 100644
--- a/mindspore/dataset/vision/validators.py
+++ b/mindspore/dataset/vision/validators.py
@@ -19,10 +19,10 @@ from functools import wraps
 import numpy as np
 from mindspore._c_dataengine import TensorOp, TensorOperation
 
-from mindspore.dataset.core.validator_helpers import check_value, check_uint8, FLOAT_MAX_INTEGER, check_pos_float32, \
-    check_float32, check_2tuple, check_range, check_positive, INT32_MAX, INT32_MIN, parse_user_args, type_check, \
-    type_check_list, check_c_tensor_op, UINT8_MAX, check_value_normalize_std, check_value_cutoff, check_value_ratio, \
-    check_odd
+from mindspore.dataset.core.validator_helpers import check_value, check_uint8, FLOAT_MIN_INTEGER, FLOAT_MAX_INTEGER, \
+    check_pos_float32, check_float32, check_2tuple, check_range, check_positive, INT32_MAX, INT32_MIN, \
+    parse_user_args, type_check, type_check_list, check_c_tensor_op, UINT8_MAX, check_value_normalize_std, \
+    check_value_cutoff, check_value_ratio, check_odd
 from .utils import Inter, Border, ImageBatchFormat, SliceMode
 
 
@@ -788,6 +788,22 @@ def check_bounding_box_augment_cpp(method):
     return new_method
 
 
+def check_adjust_gamma(method):
+    """Wrapper method to check the parameters of AdjustGamma ops (Python and C++)."""
+
+    @wraps(method)
+    def new_method(self, *args, **kwargs):
+        [gamma, gain], _ = parse_user_args(method, *args, **kwargs)
+        type_check(gamma, (float, int), "gamma")
+        check_value(gamma, (0, FLOAT_MAX_INTEGER))
+        if gain is not None:
+            type_check(gain, (float, int), "gain")
+            check_value(gain, (FLOAT_MIN_INTEGER, FLOAT_MAX_INTEGER))
+        return method(self, *args, **kwargs)
+
+    return new_method
+
+
 def check_auto_contrast(method):
     """Wrapper method to check the parameters of AutoContrast ops (Python and C++)."""
 
diff --git a/mindspore/lite/CMakeLists.txt b/mindspore/lite/CMakeLists.txt
index 3731938f1a2..17d306a98de 100644
--- a/mindspore/lite/CMakeLists.txt
+++ b/mindspore/lite/CMakeLists.txt
@@ -33,6 +33,10 @@ option(MSLITE_ENABLE_TESTCASES "enable testcase" off)
 option(MSLITE_ENABLE_NNIE "enable NNIE" off)
 option(MSLITE_COMPILE_NNIE "compile NNIE" off)
 option(MSLITE_ENABLE_HIGH_PERFORMANCE "enable high performance" on)
+option(MSLITE_STRING_KERNEL "enable string kernel" on)
+option(MSLITE_CONTROL_TENSORLIST "enable control and tensorlist" on)
+option(MSLITE_AUTO_PARALLEL "enable automatic parallelism" on)
+option(MSLITE_HUFFMAN_DECODE "enable huffman decode" on)
 
 # Option that can be configured through manually
 option(ENABLE_VERBOSE "" off)
@@ -82,6 +86,32 @@ endif()
 if(DEFINED ENV{MSLITE_ENABLE_HIGH_PERFORMANCE})
     set(MSLITE_ENABLE_HIGH_PERFORMANCE $ENV{MSLITE_ENABLE_HIGH_PERFORMANCE})
 endif()
+if(DEFINED ENV{MSLITE_STRING_KERNEL})
+    set(MSLITE_STRING_KERNEL $ENV{MSLITE_STRING_KERNEL})
+endif()
+if(DEFINED ENV{MSLITE_CONTROL_TENSORLIST})
+    set(MSLITE_CONTROL_TENSORLIST $ENV{MSLITE_CONTROL_TENSORLIST})
+endif()
+if(DEFINED ENV{MSLITE_AUTO_PARALLEL})
+    set(MSLITE_AUTO_PARALLEL $ENV{MSLITE_AUTO_PARALLEL})
+endif()
+if(DEFINED ENV{MSLITE_HUFFMAN_DECODE})
+    set(MSLITE_HUFFMAN_DECODE $ENV{MSLITE_HUFFMAN_DECODE})
+endif()
+
+
+if(MSLITE_STRING_KERNEL)
+    add_compile_definitions(ENABLE_STRING_KERNEL)
+endif()
+if(MSLITE_CONTROL_TENSORLIST)
+    add_compile_definitions(ENABLE_CONTROL_TENSORLIST)
+endif()
+if(MSLITE_AUTO_PARALLEL)
+    add_compile_definitions(ENABLE_AUTO_PARALLEL)
+endif()
+if(MSLITE_HUFFMAN_DECODE)
+    add_compile_definitions(ENABLE_HUFFMAN_DECODE)
+endif()
 
 if(PLATFORM_ARM64)
     if(MSLITE_GPU_BACKEND STREQUAL "")
@@ -191,6 +221,11 @@ else()
     set(CMAKE_CXX_FLAGS "${LITE_COMPILE_FLAGS} -Wno-overloaded-virtual ${CMAKE_CXX_FLAGS} -std=c++17")
     set(CMAKE_CXX_FLAGS_DEBUG "-DDebug -g -fvisibility=default")
 
+    if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
+        string(REPLACE "-O2" "-O0" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
+        string(REPLACE "-O2" "-O0" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+    endif()
+
     if(WIN32)
         if(CMAKE_SIZEOF_VOID_P EQUAL 4)
             set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--no-seh ${CMAKE_SHARED_LINKER_FLAGS}")
@@ -201,6 +236,10 @@ else()
     else()
         set(CMAKE_SHARED_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack -s ${CMAKE_SHARED_LINKER_FLAGS}")
         set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack -s -pie ${CMAKE_EXE_LINKER_FLAGS}")
+        if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
+            string(REPLACE "-s " "" CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}")
+            string(REPLACE "-s " "" CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
+        endif()
     endif()
 endif()
 
@@ -216,7 +255,10 @@ if(SUPPORT_NPU)
 endif()
 
 add_compile_definitions(NO_DLIB)
-add_compile_options(-fPIC)
+
+if(NOT MSVC)
+    add_compile_options(-fPIC)
+endif()
 
 if(PLATFORM_ARM64)
     set(RUNTIME_COMPONENT_NAME "android-aarch64")
diff --git a/mindspore/lite/OWNERS b/mindspore/lite/OWNERS
index 65b4352238e..e2e7476b4a9 100644
--- a/mindspore/lite/OWNERS
+++ b/mindspore/lite/OWNERS
@@ -1,18 +1,4 @@
 approvers:
-- zhang_xue_tong
+- zhaizhiqiang
 - zhanghaibo5
-- ddwsky
-- HilbertDavid
-- jpc_chenjianping
-- hangangqiang
-- zqstar
-reviewers:
-- yangruoqi713
-- yeyunpeng2020
-- ling_qiao_min
-- mengyuanli
-- zhujingxuan
-- zhanyuan1
-- cjh9368
-- zhaozhenlong
 
diff --git a/mindspore/lite/build_lite.sh b/mindspore/lite/build_lite.sh
index 14f8f14fe01..65cec694837 100755
--- a/mindspore/lite/build_lite.sh
+++ b/mindspore/lite/build_lite.sh
@@ -371,7 +371,7 @@ build_aar() {
 
     cp ${LITE_JAVA_PATH}/java/common/build/libs/mindspore-lite-java-common.jar ${LITE_JAVA_PATH}/java/app/libs
     ${LITE_JAVA_PATH}/java/gradlew clean -p ${LITE_JAVA_PATH}/java/app
-    ${LITE_JAVA_PATH}/java/gradlew build  -p ${LITE_JAVA_PATH}/java/app
+    ${LITE_JAVA_PATH}/java/gradlew assembleRelease  -p ${LITE_JAVA_PATH}/java/app
     ${LITE_JAVA_PATH}/java/gradlew publish -PLITE_VERSION=${VERSION_STR} -p ${LITE_JAVA_PATH}/java/app
 
     cd ${LITE_JAVA_PATH}/java/app/build
diff --git a/mindspore/lite/examples/export_models/models/densenet_train_export.py b/mindspore/lite/examples/export_models/models/densenet_train_export.py
index 14c36475890..ea801e5403a 100644
--- a/mindspore/lite/examples/export_models/models/densenet_train_export.py
+++ b/mindspore/lite/examples/export_models/models/densenet_train_export.py
@@ -21,10 +21,9 @@ from train_utils import save_inout, train_wrap
 import mindspore.common.dtype as mstype
 from mindspore import context, Tensor, nn
 from mindspore.train.serialization import export
+from src.network.densenet import DenseNet121
 #pylint: disable=wrong-import-position
 sys.path.append(os.environ['CLOUD_MODEL_ZOO'] + 'official/cv/densenet121/')
-from src.network.densenet import DenseNet121
-
 
 context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU", save_graphs=False)
 
diff --git a/mindspore/lite/examples/quick_start_cpp/build.sh b/mindspore/lite/examples/quick_start_cpp/build.sh
index 76f3e1407a7..9e12c9b086d 100644
--- a/mindspore/lite/examples/quick_start_cpp/build.sh
+++ b/mindspore/lite/examples/quick_start_cpp/build.sh
@@ -37,8 +37,8 @@ if [ ! -e ${BASEPATH}/build/${MINDSPORE_FILE} ]; then
   wget -c -O ${BASEPATH}/build/${MINDSPORE_FILE} --no-check-certificate ${MINDSPORE_LITE_DOWNLOAD_URL}
 fi
 tar xzvf ${BASEPATH}/build/${MINDSPORE_FILE} -C ${BASEPATH}/build/
-cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/inference/lib/libmindspore-lite.a ${BASEPATH}/lib
-cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/inference/include ${BASEPATH}/
+cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/runtime/lib/libmindspore-lite.a ${BASEPATH}/lib
+cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/runtime/include ${BASEPATH}/
 cd ${BASEPATH}/build || exit
 cmake ${BASEPATH}
 make
diff --git a/mindspore/lite/examples/quick_start_cpp/main.cc b/mindspore/lite/examples/quick_start_cpp/main.cc
index 5c3585f4a44..3d4bfe509d1 100644
--- a/mindspore/lite/examples/quick_start_cpp/main.cc
+++ b/mindspore/lite/examples/quick_start_cpp/main.cc
@@ -19,10 +19,11 @@
 #include <iostream>
 #include <fstream>
 #include <cstring>
-#include "include/errorcode.h"
-#include "include/model.h"
-#include "include/context.h"
-#include "include/lite_session.h"
+#include <memory>
+#include "include/api/model.h"
+#include "include/api/context.h"
+#include "include/api/status.h"
+#include "include/api/types.h"
 namespace {
 constexpr int kNumPrintOfOutData = 50;
 }
@@ -95,81 +96,19 @@ void GenerateRandomData(int size, void *data, Distribution distribution) {
                         [&distribution, &random_engine]() { return static_cast<T>(distribution(random_engine)); });
 }
 
-int GenerateInputDataWithRandom(std::vector<mindspore::tensor::MSTensor *> inputs) {
+int GenerateInputDataWithRandom(std::vector<mindspore::MSTensor> inputs) {
   for (auto tensor : inputs) {
-    auto input_data = tensor->MutableData();
+    auto input_data = tensor.MutableData();
     if (input_data == nullptr) {
       std::cerr << "MallocData for inTensor failed." << std::endl;
       return -1;
     }
-    GenerateRandomData<float>(tensor->Size(), input_data, std::uniform_real_distribution<float>(0.1f, 1.0f));
+    GenerateRandomData<float>(tensor.DataSize(), input_data, std::uniform_real_distribution<float>(0.1f, 1.0f));
   }
-  return mindspore::lite::RET_OK;
+  return mindspore::kSuccess;
 }
 
-int Run(mindspore::session::LiteSession *session) {
-  auto inputs = session->GetInputs();
-
-  // Generate random data as input data.
-  auto ret = GenerateInputDataWithRandom(inputs);
-  if (ret != mindspore::lite::RET_OK) {
-    std::cerr << "Generate Random Input Data failed." << std::endl;
-    return ret;
-  }
-
-  // Run Inference.
-  ret = session->RunGraph();
-  if (ret != mindspore::lite::RET_OK) {
-    std::cerr << "Inference error " << ret << std::endl;
-    return ret;
-  }
-
-  // Get Output Tensor Data.
-  auto out_tensors = session->GetOutputs();
-  for (auto tensor : out_tensors) {
-    std::cout << "tensor name is:" << tensor.first << " tensor size is:" << tensor.second->Size()
-              << " tensor elements num is:" << tensor.second->ElementsNum() << std::endl;
-    auto out_data = reinterpret_cast<float *>(tensor.second->MutableData());
-    std::cout << "output data is:";
-    for (int i = 0; i < tensor.second->ElementsNum() && i <= kNumPrintOfOutData; i++) {
-      std::cout << out_data[i] << " ";
-    }
-    std::cout << std::endl;
-  }
-  return mindspore::lite::RET_OK;
-}
-
-mindspore::session::LiteSession *Compile(mindspore::lite::Model *model) {
-  // Create and init context.
-  auto context = std::make_shared<mindspore::lite::Context>();
-  if (context == nullptr) {
-    std::cerr << "New context failed while." << std::endl;
-    return nullptr;
-  }
-
-  // Create the session.
-  mindspore::session::LiteSession *session = mindspore::session::LiteSession::CreateSession(context.get());
-  if (session == nullptr) {
-    std::cerr << "CreateSession failed while running." << std::endl;
-    return nullptr;
-  }
-
-  // Compile graph.
-  auto ret = session->CompileGraph(model);
-  if (ret != mindspore::lite::RET_OK) {
-    delete session;
-    std::cerr << "Compile failed while running." << std::endl;
-    return nullptr;
-  }
-
-  // Note: when use model->Free(), the model can not be compiled again.
-  if (model != nullptr) {
-    model->Free();
-  }
-  return session;
-}
-
-int CompileAndRun(int argc, const char **argv) {
+int QuickStart(int argc, const char **argv) {
   if (argc < 2) {
     std::cerr << "Model file must be provided.\n";
     return -1;
@@ -177,7 +116,7 @@ int CompileAndRun(int argc, const char **argv) {
   // Read model file.
   auto model_path = RealPath(argv[1]);
   if (model_path.empty()) {
-    std::cerr << "model path " << argv[1] << " is invalid.";
+    std::cerr << "Model path " << argv[1] << " is invalid.";
     return -1;
   }
   size_t size = 0;
@@ -186,33 +125,74 @@ int CompileAndRun(int argc, const char **argv) {
     std::cerr << "Read model file failed." << std::endl;
     return -1;
   }
-  // Load the .ms model.
-  auto model = mindspore::lite::Model::Import(model_buf, size);
-  delete[](model_buf);
+
+  // Create and init context, add CPU device info
+  auto context = std::make_shared<mindspore::Context>();
+  if (context == nullptr) {
+    delete[](model_buf);
+    std::cerr << "New context failed." << std::endl;
+    return -1;
+  }
+  auto &device_list = context->MutableDeviceInfo();
+  auto device_info = std::make_shared<mindspore::CPUDeviceInfo>();
+  if (device_info == nullptr) {
+    delete[](model_buf);
+    std::cerr << "New CPUDeviceInfo failed." << std::endl;
+    return -1;
+  }
+  device_list.push_back(device_info);
+
+  // Create model
+  auto model = new (std::nothrow) mindspore::Model();
   if (model == nullptr) {
-    std::cerr << "Import model file failed." << std::endl;
+    delete[](model_buf);
+    std::cerr << "New Model failed." << std::endl;
     return -1;
   }
-  // Compile MindSpore Lite model.
-  auto session = Compile(model);
-  if (session == nullptr) {
+  // Build model
+  auto build_ret = model->Build(model_buf, size, mindspore::kMindIR, context);
+  delete[](model_buf);
+  if (build_ret != mindspore::kSuccess) {
     delete model;
-    std::cerr << "Create session failed." << std::endl;
+    std::cerr << "Build model failed." << std::endl;
     return -1;
   }
-  // Run inference.
-  auto ret = Run(session);
-  if (ret != mindspore::lite::RET_OK) {
+
+  // Get Input
+  auto inputs = model->GetInputs();
+  // Generate random data as input data.
+  auto ret = GenerateInputDataWithRandom(inputs);
+  if (ret != mindspore::kSuccess) {
     delete model;
-    delete session;
-    std::cerr << "MindSpore Lite run failed." << std::endl;
+    std::cerr << "Generate Random Input Data failed." << std::endl;
     return -1;
   }
-  // Delete model buffer.
+  // Get Output
+  auto outputs = model->GetOutputs();
+
+  // Model Predict
+  auto predict_ret = model->Predict(inputs, &outputs);
+  if (predict_ret != mindspore::kSuccess) {
+    delete model;
+    std::cerr << "Predict error " << ret << std::endl;
+    return ret;
+  }
+
+  // Print Output Tensor Data.
+  for (auto tensor : outputs) {
+    std::cout << "tensor name is:" << tensor.Name() << " tensor size is:" << tensor.DataSize()
+              << " tensor elements num is:" << tensor.ElementNum() << std::endl;
+    auto out_data = reinterpret_cast<const float *>(tensor.Data().get());
+    std::cout << "output data is:";
+    for (int i = 0; i < tensor.ElementNum() && i <= 50; i++) {
+      std::cout << out_data[i] << " ";
+    }
+    std::cout << std::endl;
+  }
+
+  // Delete model.
   delete model;
-  // Delete session buffer.
-  delete session;
-  return mindspore::lite::RET_OK;
+  return mindspore::kSuccess;
 }
 
-int main(int argc, const char **argv) { return CompileAndRun(argc, argv); }
+int main(int argc, const char **argv) { return QuickStart(argc, argv); }
diff --git a/mindspore/lite/examples/runtime_cpp/build.sh b/mindspore/lite/examples/runtime_cpp/build.sh
index 4fafbfc8922..75b9553d11e 100644
--- a/mindspore/lite/examples/runtime_cpp/build.sh
+++ b/mindspore/lite/examples/runtime_cpp/build.sh
@@ -54,7 +54,7 @@ checkopts()
           continue
         elif [[ "X${DEVICE}" == "Xnpu" ]]; then
           MINDSPORE_FILE_NAME="mindspore-lite-${VERSION_STR}-android-aarch64"
-          MINDSPORE_LITE_DOWNLOAD_URL="https://ms-release.obs.cn-north-4.myhuaweicloud.com/${VERSION_STR}/MindSpore/lite/release/android/${MINDSPORE_FILE}"
+          MINDSPORE_LITE_DOWNLOAD_URL="https://ms-release.obs.cn-north-4.myhuaweicloud.com/${VERSION_STR}/MindSpore/lite/release/android/npu/${MINDSPORE_FILE}"
           SUPPORT_NPU="on"
         else
           echo "Unknown DEVICE option ${OPTARG}!"
@@ -89,10 +89,10 @@ if [ ! -e ${BASEPATH}/build/${MINDSPORE_FILE} ]; then
   wget -c -O ${BASEPATH}/build/${MINDSPORE_FILE} --no-check-certificate ${MINDSPORE_LITE_DOWNLOAD_URL}
 fi
 tar xzvf ${BASEPATH}/build/${MINDSPORE_FILE} -C ${BASEPATH}/build/
-cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/inference/lib/libmindspore-lite.a ${BASEPATH}/lib
-cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/inference/include ${BASEPATH}/
+cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/runtime/lib/libmindspore-lite.a ${BASEPATH}/lib
+cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/runtime/include ${BASEPATH}/
 if [[ "X${DEVICE}" == "Xnpu" ]]; then
-    cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/inference/third_party/hiai_ddk/lib/*.so ${BASEPATH}/lib
+    cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/runtime/third_party/hiai_ddk/lib/*.so ${BASEPATH}/lib
 fi
 cd ${BASEPATH}/build || exit
 cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" -DANDROID_NATIVE_API_LEVEL="19" \
diff --git a/mindspore/lite/examples/runtime_cpp/main.cc b/mindspore/lite/examples/runtime_cpp/main.cc
index 564f16fccf8..cef2f4845a6 100644
--- a/mindspore/lite/examples/runtime_cpp/main.cc
+++ b/mindspore/lite/examples/runtime_cpp/main.cc
@@ -20,11 +20,11 @@
 #include <fstream>
 #include <thread>
 #include <algorithm>
-#include "include/errorcode.h"
-#include "include/model.h"
-#include "include/context.h"
-#include "include/lite_session.h"
-#include "include/version.h"
+#include "include/api/allocator.h"
+#include "include/api/model.h"
+#include "include/api/context.h"
+#include "include/api/types.h"
+#include "include/api/serialization.h"
 
 std::string RealPath(const char *path) {
   const size_t max = 4096;
@@ -99,218 +99,231 @@ void GenerateRandomData(int size, void *data, Distribution distribution) {
                         [&]() { return static_cast<T>(distribution(random_engine)); });
 }
 
-std::shared_ptr<mindspore::lite::Context> CreateCPUContext() {
-  auto context = std::make_shared<mindspore::lite::Context>();
-  if (context == nullptr) {
-    std::cerr << "New context failed while running." << std::endl;
+std::shared_ptr<mindspore::CPUDeviceInfo> CreateCPUDeviceInfo() {
+  auto device_info = std::make_shared<mindspore::CPUDeviceInfo>();
+  if (device_info == nullptr) {
+    std::cerr << "New CPUDeviceInfo failed." << std::endl;
     return nullptr;
   }
-  // Configure the number of worker threads in the thread pool to 2, including the main thread.
-  context->thread_num_ = 2;
-  // CPU device context has default values.
-  auto &cpu_device_info = context->device_list_[0].device_info_.cpu_device_info_;
-  // The large core takes priority in thread and core binding methods. This parameter will work in the BindThread
-  // interface. For specific binding effect, see the "Run Graph" section.
-  cpu_device_info.cpu_bind_mode_ = mindspore::lite::HIGHER_CPU;
   // Use float16 operator as priority.
-  cpu_device_info.enable_float16_ = true;
-  return context;
+  device_info->SetEnableFP16(true);
+  return device_info;
 }
 
-std::shared_ptr<mindspore::lite::Context> CreateGPUContext() {
-  auto context = std::make_shared<mindspore::lite::Context>();
-  if (context == nullptr) {
-    std::cerr << "New context failed while running. " << std::endl;
+std::shared_ptr<mindspore::GPUDeviceInfo> CreateGPUDeviceInfo() {
+  auto device_info = std::make_shared<mindspore::GPUDeviceInfo>();
+  if (device_info == nullptr) {
+    std::cerr << "New GPUDeviceInfo failed." << std::endl;
     return nullptr;
   }
-
-  // If GPU device context is set. The preferred backend is GPU, which means, if there is a GPU operator, it will run on
+  // If GPU device info is set. The preferred backend is GPU, which means, if there is a GPU operator, it will run on
   // the GPU first, otherwise it will run on the CPU.
-  mindspore::lite::DeviceContext gpu_device_ctx{mindspore::lite::DT_GPU, {false}};
   // GPU use float16 operator as priority.
-  gpu_device_ctx.device_info_.gpu_device_info_.enable_float16_ = true;
-  // The GPU device context needs to be push_back into device_list to work.
-  context->device_list_.push_back(gpu_device_ctx);
-  return context;
+  device_info->SetEnableFP16(true);
+  return device_info;
 }
 
-std::shared_ptr<mindspore::lite::Context> CreateNPUContext() {
-  auto context = std::make_shared<mindspore::lite::Context>();
-  if (context == nullptr) {
-    std::cerr << "New context failed while running. " << std::endl;
+std::shared_ptr<mindspore::KirinNPUDeviceInfo> CreateNPUDeviceInfo() {
+  auto device_info = std::make_shared<mindspore::KirinNPUDeviceInfo>();
+  if (device_info == nullptr) {
+    std::cerr << "New KirinNPUDeviceInfo failed." << std::endl;
     return nullptr;
   }
-  mindspore::lite::DeviceContext npu_device_ctx{mindspore::lite::DT_NPU};
-  npu_device_ctx.device_info_.npu_device_info_.frequency_ = 3;
-  // The NPU device context needs to be push_back into device_list to work.
-  context->device_list_.push_back(npu_device_ctx);
-  return context;
+  device_info->SetFrequency(3);
+  return device_info;
 }
 
-int GetInputsAndSetData(mindspore::session::LiteSession *session) {
-  auto inputs = session->GetInputs();
-
+mindspore::Status GetInputsAndSetData(mindspore::Model *model) {
+  auto inputs = model->GetInputs();
   // The model has only one input tensor.
   auto in_tensor = inputs.front();
   if (in_tensor == nullptr) {
     std::cerr << "Input tensor is nullptr" << std::endl;
-    return -1;
+    return mindspore::kLiteNullptr;
   }
-  auto input_data = in_tensor->MutableData();
+  auto input_data = in_tensor.MutableData();
   if (input_data == nullptr) {
     std::cerr << "MallocData for inTensor failed." << std::endl;
-    return -1;
+    return mindspore::kLiteNullptr;
   }
-  GenerateRandomData<float>(in_tensor->Size(), input_data, std::uniform_real_distribution<float>(0.1f, 1.0f));
-
-  return 0;
+  GenerateRandomData<float>(in_tensor.DataSize(), input_data, std::uniform_real_distribution<float>(0.1f, 1.0f));
+  return mindspore::kSuccess;
 }
 
-int GetInputsByTensorNameAndSetData(mindspore::session::LiteSession *session) {
-  auto in_tensor = session->GetInputsByTensorName("graph_input-173");
+mindspore::Status GetInputsByTensorNameAndSetData(mindspore::Model *model) {
+  auto in_tensor = model->GetInputByTensorName("graph_input-173");
   if (in_tensor == nullptr) {
     std::cerr << "Input tensor is nullptr" << std::endl;
-    return -1;
+    return mindspore::kLiteNullptr;
   }
-  auto input_data = in_tensor->MutableData();
+  auto input_data = in_tensor.MutableData();
   if (input_data == nullptr) {
     std::cerr << "MallocData for inTensor failed." << std::endl;
-    return -1;
+    return mindspore::kLiteNullptr;
   }
-  GenerateRandomData<float>(in_tensor->Size(), input_data, std::uniform_real_distribution<float>(0.1f, 1.0f));
-  return 0;
+  GenerateRandomData<float>(in_tensor.DataSize(), input_data, std::uniform_real_distribution<float>(0.1f, 1.0f));
+  return mindspore::kSuccess;
 }
 
-void GetOutputsByNodeName(mindspore::session::LiteSession *session) {
+void GetOutputsByNodeName(mindspore::Model *model) {
   // model has a output node named output_node_name_0.
-  auto output_vec = session->GetOutputsByNodeName("Softmax-65");
+  auto output_vec = model->GetOutputsByNodeName("Softmax-65");
   // output node named output_node_name_0 has only one output tensor.
   auto out_tensor = output_vec.front();
   if (out_tensor == nullptr) {
     std::cerr << "Output tensor is nullptr" << std::endl;
     return;
   }
-  std::cout << "tensor size is:" << out_tensor->Size() << " tensor elements num is:" << out_tensor->ElementsNum()
+  std::cout << "tensor size is:" << out_tensor.DataSize() << " tensor elements num is:" << out_tensor.ElementNum()
             << std::endl;
   // The model output data is float 32.
-  if (out_tensor->data_type() != mindspore::TypeId::kNumberTypeFloat32) {
+  if (out_tensor.DataType() != mindspore::DataType::kNumberTypeFloat32) {
     std::cerr << "Output should in float32" << std::endl;
     return;
   }
-  auto out_data = reinterpret_cast<float *>(out_tensor->MutableData());
+  auto out_data = reinterpret_cast<float *>(out_tensor.MutableData());
   if (out_data == nullptr) {
     std::cerr << "Data of out_tensor is nullptr" << std::endl;
     return;
   }
   std::cout << "output data is:";
-  for (int i = 0; i < out_tensor->ElementsNum() && i < 10; i++) {
+  for (int i = 0; i < out_tensor.ElementNum() && i < 10; i++) {
     std::cout << out_data[i] << " ";
   }
   std::cout << std::endl;
 }
 
-void GetOutputByTensorName(mindspore::session::LiteSession *session) {
+void GetOutputByTensorName(mindspore::Model *model) {
   // We can use GetOutputTensorNames method to get all name of output tensor of model which is in order.
-  auto tensor_names = session->GetOutputTensorNames();
-  // Use output tensor name returned by GetOutputTensorNames as key
+  auto tensor_names = model->GetOutputTensorNames();
   for (const auto &tensor_name : tensor_names) {
-    auto out_tensor = session->GetOutputByTensorName(tensor_name);
+    auto out_tensor = model->GetOutputByTensorName(tensor_name);
     if (out_tensor == nullptr) {
       std::cerr << "Output tensor is nullptr" << std::endl;
       return;
     }
-    std::cout << "tensor size is:" << out_tensor->Size() << " tensor elements num is:" << out_tensor->ElementsNum()
+    std::cout << "tensor size is:" << out_tensor.DataSize() << " tensor elements num is:" << out_tensor.ElementNum()
               << std::endl;
     // The model output data is float 32.
-    if (out_tensor->data_type() != mindspore::TypeId::kNumberTypeFloat32) {
+    if (out_tensor.DataType() != mindspore::DataType::kNumberTypeFloat32) {
       std::cerr << "Output should in float32" << std::endl;
       return;
     }
-    auto out_data = reinterpret_cast<float *>(out_tensor->MutableData());
+    auto out_data = reinterpret_cast<float *>(out_tensor.MutableData());
     if (out_data == nullptr) {
       std::cerr << "Data of out_tensor is nullptr" << std::endl;
       return;
     }
     std::cout << "output data is:";
-    for (int i = 0; i < out_tensor->ElementsNum() && i < 10; i++) {
+    for (int i = 0; i < out_tensor.ElementNum() && i < 10; i++) {
       std::cout << out_data[i] << " ";
     }
     std::cout << std::endl;
   }
 }
 
-void GetOutputs(mindspore::session::LiteSession *session) {
-  auto out_tensors = session->GetOutputs();
+void GetOutputs(mindspore::Model *model) {
+  auto out_tensors = model->GetOutputs();
   for (auto out_tensor : out_tensors) {
-    std::cout << "tensor name is:" << out_tensor.first << " tensor size is:" << out_tensor.second->Size()
-              << " tensor elements num is:" << out_tensor.second->ElementsNum() << std::endl;
+    std::cout << "tensor name is:" << out_tensor.Name() << " tensor size is:" << out_tensor.DataSize()
+              << " tensor elements num is:" << out_tensor.ElementNum() << std::endl;
     // The model output data is float 32.
-    if (out_tensor.second->data_type() != mindspore::TypeId::kNumberTypeFloat32) {
+    if (out_tensor.DataType() != mindspore::DataType::kNumberTypeFloat32) {
       std::cerr << "Output should in float32" << std::endl;
       return;
     }
-    auto out_data = reinterpret_cast<float *>(out_tensor.second->MutableData());
+    auto out_data = reinterpret_cast<float *>(out_tensor.MutableData());
     if (out_data == nullptr) {
       std::cerr << "Data of out_tensor is nullptr" << std::endl;
       return;
     }
     std::cout << "output data is:";
-    for (int i = 0; i < out_tensor.second->ElementsNum() && i < 10; i++) {
+    for (int i = 0; i < out_tensor.ElementNum() && i < 10; i++) {
       std::cout << out_data[i] << " ";
     }
     std::cout << std::endl;
   }
 }
 
-mindspore::session::LiteSession *CreateSessionAndCompileByModel(mindspore::lite::Model *model) {
-  // Create and init CPU context.
-  // If you need to use GPU or NPU, you can refer to CreateGPUContext() or CreateNPUContext().
-  auto context = CreateCPUContext();
+mindspore::Model *CreateAndBuildModel(char *model_buf, size_t model_size) {
+  // Create and init context, add CPU device info
+  auto context = std::make_shared<mindspore::Context>();
   if (context == nullptr) {
-    std::cerr << "New context failed while." << std::endl;
+    std::cerr << "New context failed." << std::endl;
     return nullptr;
   }
-
-  // Create the session.
-  mindspore::session::LiteSession *session = mindspore::session::LiteSession::CreateSession(context.get());
-  if (session == nullptr) {
-    std::cerr << "CreateSession failed while running." << std::endl;
+  auto &device_list = context->MutableDeviceInfo();
+  // If you need to use GPU or NPU, you can refer to CreateGPUDeviceInfo() or CreateNPUDeviceInfo().
+  auto cpu_device_info = CreateCPUDeviceInfo();
+  if (cpu_device_info == nullptr) {
+    std::cerr << "Create CPUDeviceInfo failed." << std::endl;
     return nullptr;
   }
+  device_list.push_back(cpu_device_info);
 
-  // Compile graph.
-  auto ret = session->CompileGraph(model);
-  if (ret != mindspore::lite::RET_OK) {
-    delete session;
-    std::cerr << "Compile failed while running." << std::endl;
+  // Create model
+  auto model = new (std::nothrow) mindspore::Model();
+  if (model == nullptr) {
+    std::cerr << "New Model failed." << std::endl;
     return nullptr;
   }
-
-  return session;
+  // Build model
+  auto build_ret = model->Build(model_buf, model_size, mindspore::kMindIR, context);
+  if (build_ret != mindspore::kSuccess) {
+    delete model;
+    std::cerr << "Build model failed." << std::endl;
+    return nullptr;
+  }
+  return model;
 }
 
-mindspore::session::LiteSession *CreateSessionAndCompileByModelBuffer(char *model_buf, size_t size) {
-  auto context = std::make_shared<mindspore::lite::Context>();
+mindspore::Model *CreateAndBuildModelComplicated(char *model_buf, size_t size) {
+  // Create and init context, add CPU device info
+  auto context = std::make_shared<mindspore::Context>();
   if (context == nullptr) {
-    std::cerr << "New context failed while running" << std::endl;
+    std::cerr << "New context failed." << std::endl;
     return nullptr;
   }
-  // Use model buffer and context to create Session.
-  auto session = mindspore::session::LiteSession::CreateSession(model_buf, size, context.get());
-  if (session == nullptr) {
-    std::cerr << "CreateSession failed while running" << std::endl;
+  auto &device_list = context->MutableDeviceInfo();
+  auto cpu_device_info = CreateCPUDeviceInfo();
+  if (cpu_device_info == nullptr) {
+    std::cerr << "Create CPUDeviceInfo failed." << std::endl;
     return nullptr;
   }
-  return session;
+  device_list.push_back(cpu_device_info);
+
+  // Load graph
+  mindspore::Graph graph;
+  auto load_ret = mindspore::Serialization::Load(model_buf, size, mindspore::kMindIR, &graph);
+  if (load_ret != mindspore::kSuccess) {
+    std::cerr << "Load graph failed." << std::endl;
+    return nullptr;
+  }
+
+  // Create model
+  auto model = new (std::nothrow) mindspore::Model();
+  if (model == nullptr) {
+    std::cerr << "New Model failed." << std::endl;
+    return nullptr;
+  }
+  // Build model
+  mindspore::GraphCell graph_cell(graph);
+  auto build_ret = model->Build(graph_cell, context);
+  if (build_ret != mindspore::kSuccess) {
+    delete model;
+    std::cerr << "Build model failed." << std::endl;
+    return nullptr;
+  }
+  return model;
 }
 
-int ResizeInputsTensorShape(mindspore::session::LiteSession *session) {
-  auto inputs = session->GetInputs();
-  std::vector<int> resize_shape = {1, 128, 128, 3};
+mindspore::Status ResizeInputsTensorShape(mindspore::Model *model) {
+  auto inputs = model->GetInputs();
+  std::vector<int64_t> resize_shape = {1, 128, 128, 3};
   // Assume the model has only one input,resize input shape to [1, 128, 128, 3]
-  std::vector<std::vector<int>> new_shapes;
+  std::vector<std::vector<int64_t>> new_shapes;
   new_shapes.push_back(resize_shape);
-  return session->Resize(inputs, new_shapes);
+  return model->Resize(inputs, new_shapes);
 }
 
 int Run(const char *model_path) {
@@ -321,47 +334,40 @@ int Run(const char *model_path) {
     std::cerr << "Read model file failed." << std::endl;
     return -1;
   }
-  // Load the .ms model.
-  auto model = mindspore::lite::Model::Import(model_buf, size);
+
+  // Create and Build MindSpore model.
+  auto model = CreateAndBuildModel(model_buf, size);
   delete[](model_buf);
   if (model == nullptr) {
-    std::cerr << "Import model file failed." << std::endl;
+    std::cerr << "Create and build model failed." << std::endl;
     return -1;
   }
-  // Compile MindSpore Lite model.
-  auto session = CreateSessionAndCompileByModel(model);
-  if (session == nullptr) {
-    delete model;
-    std::cerr << "Create session failed." << std::endl;
-    return -1;
-  }
-
-  // Note: when use model->Free(), the model can not be compiled again.
-  model->Free();
 
   // Set inputs data.
   // You can also get input through other methods, and you can refer to GetInputsAndSetData()
-  GetInputsByTensorNameAndSetData(session);
-
-  session->BindThread(true);
-  auto ret = session->RunGraph();
-  if (ret != mindspore::lite::RET_OK) {
+  auto generate_input_ret = GetInputsByTensorNameAndSetData(model);
+  if (generate_input_ret != mindspore::kSuccess) {
     delete model;
-    delete session;
-    std::cerr << "Inference error " << ret << std::endl;
-    return ret;
+    std::cerr << "Set input data error " << generate_input_ret << std::endl;
+    return -1;
+  }
+
+  auto inputs = model->GetInputs();
+  auto outputs = model->GetOutputs();
+  auto predict_ret = model->Predict(inputs, &outputs);
+  if (predict_ret != mindspore::kSuccess) {
+    delete model;
+    std::cerr << "Predict error " << predict_ret << std::endl;
+    return -1;
   }
-  session->BindThread(false);
 
   // Get outputs data.
   // You can also get output through other methods,
   // and you can refer to GetOutputByTensorName() or GetOutputs().
-  GetOutputsByNodeName(session);
+  GetOutputsByNodeName(model);
 
-  // Delete model buffer.
+  // Delete model.
   delete model;
-  // Delete session buffer.
-  delete session;
   return 0;
 }
 
@@ -372,57 +378,52 @@ int RunResize(const char *model_path) {
     std::cerr << "Read model file failed." << std::endl;
     return -1;
   }
-  // Load the .ms model.
-  auto model = mindspore::lite::Model::Import(model_buf, size);
+
+  // Create and Build MindSpore model.
+  auto model = CreateAndBuildModel(model_buf, size);
   delete[](model_buf);
   if (model == nullptr) {
-    std::cerr << "Import model file failed." << std::endl;
-    return -1;
-  }
-  // Compile MindSpore Lite model.
-  auto session = CreateSessionAndCompileByModel(model);
-  if (session == nullptr) {
-    delete model;
-    std::cerr << "Create session failed." << std::endl;
+    std::cerr << "Create and build model failed." << std::endl;
     return -1;
   }
 
   // Resize inputs tensor shape.
-  auto ret = ResizeInputsTensorShape(session);
-  if (ret != mindspore::lite::RET_OK) {
+  auto resize_ret = ResizeInputsTensorShape(model);
+  if (resize_ret != mindspore::kSuccess) {
     delete model;
-    delete session;
-    std::cerr << "Resize input tensor shape error." << ret << std::endl;
-    return ret;
+    std::cerr << "Resize input tensor shape error." << resize_ret << std::endl;
+    return -1;
   }
 
   // Set inputs data.
   // You can also get input through other methods, and you can refer to GetInputsAndSetData()
-  GetInputsByTensorNameAndSetData(session);
-
-  session->BindThread(true);
-  ret = session->RunGraph();
-  if (ret != mindspore::lite::RET_OK) {
+  auto generate_input_ret = GetInputsByTensorNameAndSetData(model);
+  if (generate_input_ret != mindspore::kSuccess) {
     delete model;
-    delete session;
-    std::cerr << "Inference error " << ret << std::endl;
-    return ret;
+    std::cerr << "Set input data error " << generate_input_ret << std::endl;
+    return -1;
+  }
+
+  auto inputs = model->GetInputs();
+  auto outputs = model->GetOutputs();
+  auto predict_ret = model->Predict(inputs, &outputs);
+  if (predict_ret != mindspore::kSuccess) {
+    delete model;
+    std::cerr << "Predict error " << predict_ret << std::endl;
+    return -1;
   }
-  session->BindThread(false);
 
   // Get outputs data.
   // You can also get output through other methods,
   // and you can refer to GetOutputByTensorName() or GetOutputs().
-  GetOutputsByNodeName(session);
+  GetOutputsByNodeName(model);
 
-  // Delete model buffer.
+  // Delete model.
   delete model;
-  // Delete session buffer.
-  delete session;
   return 0;
 }
 
-int RunCreateSessionSimplified(const char *model_path) {
+int RunCreateModelComplicated(const char *model_path) {
   size_t size = 0;
   char *model_buf = ReadFile(model_path, &size);
   if (model_buf == nullptr) {
@@ -430,86 +431,93 @@ int RunCreateSessionSimplified(const char *model_path) {
     return -1;
   }
 
-  // Compile MindSpore Lite model.
-  auto session = CreateSessionAndCompileByModelBuffer(model_buf, size);
-  if (session == nullptr) {
-    std::cerr << "Create session failed." << std::endl;
-    return -1;
-  }
-
-  // Set inputs data.
-  // You can also get input through other methods, and you can refer to GetInputsAndSetData()
-  GetInputsByTensorNameAndSetData(session);
-
-  session->BindThread(true);
-  auto ret = session->RunGraph();
-  if (ret != mindspore::lite::RET_OK) {
-    delete session;
-    std::cerr << "Inference error " << ret << std::endl;
-    return ret;
-  }
-  session->BindThread(false);
-
-  // Get outputs data.
-  // You can also get output through other methods,
-  // and you can refer to GetOutputByTensorName() or GetOutputs().
-  GetOutputsByNodeName(session);
-
-  // Delete session buffer.
-  delete session;
-  return 0;
-}
-
-int RunSessionParallel(const char *model_path) {
-  size_t size = 0;
-  char *model_buf = ReadFile(model_path, &size);
-  if (model_buf == nullptr) {
-    std::cerr << "Read model file failed." << std::endl;
-    return -1;
-  }
-  // Load the .ms model.
-  auto model = mindspore::lite::Model::Import(model_buf, size);
+  // Create and Build MindSpore model.
+  auto model = CreateAndBuildModelComplicated(model_buf, size);
   delete[](model_buf);
   if (model == nullptr) {
-    std::cerr << "Import model file failed." << std::endl;
-    return -1;
-  }
-  // Compile MindSpore Lite model.
-  auto session1 = CreateSessionAndCompileByModel(model);
-  if (session1 == nullptr) {
-    delete model;
-    std::cerr << "Create session failed." << std::endl;
+    std::cerr << "Create and build model failed." << std::endl;
     return -1;
   }
 
-  // Compile MindSpore Lite model.
-  auto session2 = CreateSessionAndCompileByModel(model);
-  if (session2 == nullptr) {
+  // Set inputs data.
+  // You can also get input through other methods, and you can refer to GetInputsAndSetData()
+  auto generate_input_ret = GetInputsByTensorNameAndSetData(model);
+  if (generate_input_ret != mindspore::kSuccess) {
     delete model;
-    std::cerr << "Create session failed." << std::endl;
+    std::cerr << "Set input data error " << generate_input_ret << std::endl;
+    return -1;
+  }
+
+  auto inputs = model->GetInputs();
+  auto outputs = model->GetOutputs();
+  auto predict_ret = model->Predict(inputs, &outputs);
+  if (predict_ret != mindspore::kSuccess) {
+    delete model;
+    std::cerr << "Predict error " << predict_ret << std::endl;
+    return -1;
+  }
+
+  // Get outputs data.
+  // You can also get output through other methods,
+  // and you can refer to GetOutputByTensorName() or GetOutputs().
+  GetOutputsByNodeName(model);
+
+  // Delete model.
+  delete model;
+  return 0;
+}
+
+int RunModelParallel(const char *model_path) {
+  size_t size = 0;
+  char *model_buf = ReadFile(model_path, &size);
+  if (model_buf == nullptr) {
+    std::cerr << "Read model file failed." << std::endl;
+    return -1;
+  }
+
+  // Create and Build MindSpore model.
+  auto model1 = CreateAndBuildModel(model_buf, size);
+  auto model2 = CreateAndBuildModel(model_buf, size);
+  delete[](model_buf);
+  if (model1 == nullptr || model2 == nullptr) {
+    std::cerr << "Create and build model failed." << std::endl;
     return -1;
   }
-  // Note: when use model->Free(), the model can not be compiled again.
-  model->Free();
 
   std::thread thread1([&]() {
-    GetInputsByTensorNameAndSetData(session1);
-    auto status = session1->RunGraph();
-    if (status != 0) {
-      std::cerr << "Inference error " << status << std::endl;
-      return;
+    auto generate_input_ret = GetInputsByTensorNameAndSetData(model1);
+    if (generate_input_ret != mindspore::kSuccess) {
+      std::cerr << "Model1 set input data error " << generate_input_ret << std::endl;
+      return -1;
     }
-    std::cout << "Session1 inference success" << std::endl;
+
+    auto inputs = model1->GetInputs();
+    auto outputs = model1->GetOutputs();
+    auto predict_ret = model1->Predict(inputs, &outputs);
+    if (predict_ret != mindspore::kSuccess) {
+      std::cerr << "Model1 predict error " << predict_ret << std::endl;
+      return -1;
+    }
+    std::cout << "Model1 predict success" << std::endl;
+    return 0;
   });
 
   std::thread thread2([&]() {
-    GetInputsByTensorNameAndSetData(session2);
-    auto status = session2->RunGraph();
-    if (status != 0) {
-      std::cerr << "Inference error " << status << std::endl;
-      return;
+    auto generate_input_ret = GetInputsByTensorNameAndSetData(model2);
+    if (generate_input_ret != mindspore::kSuccess) {
+      std::cerr << "Model2 set input data error " << generate_input_ret << std::endl;
+      return -1;
     }
-    std::cout << "Session2 inference success" << std::endl;
+
+    auto inputs = model2->GetInputs();
+    auto outputs = model2->GetOutputs();
+    auto predict_ret = model2->Predict(inputs, &outputs);
+    if (predict_ret != mindspore::kSuccess) {
+      std::cerr << "Model2 predict error " << predict_ret << std::endl;
+      return -1;
+    }
+    std::cout << "Model2 predict success" << std::endl;
+    return 0;
   });
 
   thread1.join();
@@ -518,17 +526,12 @@ int RunSessionParallel(const char *model_path) {
   // Get outputs data.
   // You can also get output through other methods,
   // and you can refer to GetOutputByTensorName() or GetOutputs().
-  GetOutputsByNodeName(session1);
-  GetOutputsByNodeName(session2);
+  GetOutputsByNodeName(model1);
+  GetOutputsByNodeName(model2);
 
-  // Delete model buffer.
-  if (model != nullptr) {
-    delete model;
-    model = nullptr;
-  }
-  // Delete session buffer.
-  delete session1;
-  delete session2;
+  // Delete model.
+  delete model1;
+  delete model2;
   return 0;
 }
 
@@ -539,93 +542,103 @@ int RunWithSharedMemoryPool(const char *model_path) {
     std::cerr << "Read model file failed." << std::endl;
     return -1;
   }
-  auto model = mindspore::lite::Model::Import(model_buf, size);
-  delete[](model_buf);
-  if (model == nullptr) {
-    std::cerr << "Import model file failed." << std::endl;
-    return -1;
-  }
 
-  auto context1 = std::make_shared<mindspore::lite::Context>();
+  auto context1 = std::make_shared<mindspore::Context>();
   if (context1 == nullptr) {
-    delete model;
-    std::cerr << "New context failed while running." << std::endl;
+    std::cerr << "New context failed." << std::endl;
     return -1;
   }
-  auto session1 = mindspore::session::LiteSession::CreateSession(context1.get());
-  if (session1 == nullptr) {
-    delete model;
-    std::cerr << "CreateSession failed while running." << std::endl;
+  auto &device_list1 = context1->MutableDeviceInfo();
+  auto device_info1 = CreateCPUDeviceInfo();
+  if (device_info1 == nullptr) {
+    std::cerr << "Create CPUDeviceInfo failed." << std::endl;
     return -1;
   }
-  auto ret = session1->CompileGraph(model);
-  if (ret != mindspore::lite::RET_OK) {
-    delete model;
-    delete session1;
-    std::cerr << "Compile failed while running." << std::endl;
+  device_list1.push_back(device_info1);
+
+  auto model1 = new (std::nothrow) mindspore::Model();
+  if (model1 == nullptr) {
+    delete[](model_buf);
+    std::cerr << "New Model failed." << std::endl;
+    return -1;
+  }
+  auto build_ret = model1->Build(model_buf, size, mindspore::kMindIR, context1);
+  if (build_ret != mindspore::kSuccess) {
+    delete[](model_buf);
+    delete model1;
+    std::cerr << "Build model failed." << std::endl;
     return -1;
   }
 
-  auto context2 = std::make_shared<mindspore::lite::Context>();
+  auto context2 = std::make_shared<mindspore::Context>();
   if (context2 == nullptr) {
-    delete model;
-    std::cerr << "New  context failed while running." << std::endl;
+    delete[](model_buf);
+    delete model1;
+    std::cerr << "New context failed." << std::endl;
+    return -1;
+  }
+  auto &device_list2 = context2->MutableDeviceInfo();
+  auto device_info2 = CreateCPUDeviceInfo();
+  if (device_info2 == nullptr) {
+    delete[](model_buf);
+    delete model1;
+    std::cerr << "Create CPUDeviceInfo failed." << std::endl;
     return -1;
   }
   // Use the same allocator to share the memory pool.
-  context2->allocator = context1->allocator;
+  device_info2->SetAllocator(device_info1->GetAllocator());
+  device_list2.push_back(device_info2);
 
-  auto session2 = mindspore::session::LiteSession::CreateSession(context2.get());
-  if (session2 == nullptr) {
-    delete model;
-    delete session1;
-    std::cerr << "CreateSession failed while running " << std::endl;
+  auto model2 = new (std::nothrow) mindspore::Model();
+  if (model2 == nullptr) {
+    delete[](model_buf);
+    delete model1;
+    std::cerr << "New Model failed." << std::endl;
     return -1;
   }
-
-  ret = session2->CompileGraph(model);
-  if (ret != mindspore::lite::RET_OK) {
-    delete model;
-    delete session1;
-    delete session2;
-    std::cerr << "Compile failed while running " << std::endl;
+  build_ret = model2->Build(model_buf, size, mindspore::kMindIR, context2);
+  delete[](model_buf);
+  if (build_ret != mindspore::kSuccess) {
+    delete model1;
+    delete model2;
+    std::cerr << "Build model failed." << std::endl;
     return -1;
   }
 
-  // Note: when use model->Free(), the model can not be compiled again.
-  model->Free();
-
   // Set inputs data.
   // You can also get input through other methods, and you can refer to GetInputsAndSetData()
-  GetInputsByTensorNameAndSetData(session1);
-  GetInputsByTensorNameAndSetData(session2);
+  GetInputsByTensorNameAndSetData(model1);
+  GetInputsByTensorNameAndSetData(model2);
 
-  ret = session1->RunGraph();
-  if (ret != mindspore::lite::RET_OK) {
-    std::cerr << "Inference error " << ret << std::endl;
-    return ret;
+  auto inputs1 = model1->GetInputs();
+  auto outputs1 = model1->GetOutputs();
+  auto predict_ret = model1->Predict(inputs1, &outputs1);
+  if (predict_ret != mindspore::kSuccess) {
+    delete model1;
+    delete model2;
+    std::cerr << "Inference error " << predict_ret << std::endl;
+    return -1;
   }
 
-  ret = session2->RunGraph();
-  if (ret != mindspore::lite::RET_OK) {
-    delete model;
-    delete session1;
-    delete session2;
-    std::cerr << "Inference error " << ret << std::endl;
-    return ret;
+  auto inputs2 = model2->GetInputs();
+  auto outputs2 = model2->GetOutputs();
+  predict_ret = model2->Predict(inputs2, &outputs2);
+  if (predict_ret != mindspore::kSuccess) {
+    delete model1;
+    delete model2;
+    std::cerr << "Inference error " << predict_ret << std::endl;
+    return -1;
   }
 
   // Get outputs data.
   // You can also get output through other methods,
   // and you can refer to GetOutputByTensorName() or GetOutputs().
-  GetOutputsByNodeName(session1);
-  GetOutputsByNodeName(session2);
+  GetOutputsByNodeName(model1);
+  GetOutputsByNodeName(model2);
 
-  // Delete model buffer.
-  delete model;
-  // Delete session buffer.
-  delete session1;
-  delete session2;
+  // Delete model.
+  delete model1;
+  delete model2;
   return 0;
 }
 
@@ -636,62 +649,56 @@ int RunCallback(const char *model_path) {
     std::cerr << "Read model file failed." << std::endl;
     return -1;
   }
-  // Load the .ms model.
-  auto model = mindspore::lite::Model::Import(model_buf, size);
+
+  // Create and Build MindSpore model.
+  auto model = CreateAndBuildModel(model_buf, size);
   delete[](model_buf);
   if (model == nullptr) {
-    std::cerr << "Import model file failed." << std::endl;
-    return -1;
-  }
-  // Compile MindSpore Lite model.
-  auto session = CreateSessionAndCompileByModel(model);
-  if (session == nullptr) {
     delete model;
-    std::cerr << "Create session failed." << std::endl;
+    std::cerr << "Create model failed." << std::endl;
     return -1;
   }
 
-  // Note: when use model->Free(), the model can not be compiled again.
-  model->Free();
-
   // Set inputs data.
   // You can also get input through other methods, and you can refer to GetInputsAndSetData()
-  GetInputsByTensorNameAndSetData(session);
+  auto generate_input_ret = GetInputsByTensorNameAndSetData(model);
+  if (generate_input_ret != mindspore::kSuccess) {
+    delete model;
+    std::cerr << "Set input data error " << generate_input_ret << std::endl;
+    return -1;
+  }
 
   // Definition of callback function before forwarding operator.
-  auto before_call_back = [](const std::vector<mindspore::tensor::MSTensor *> &before_inputs,
-                             const std::vector<mindspore::tensor::MSTensor *> &before_outputs,
-                             const mindspore::CallBackParam &call_param) {
-    std::cout << "Before forwarding " << call_param.node_name << " " << call_param.node_type << std::endl;
+  auto before_call_back = [](const std::vector<mindspore::MSTensor> &before_inputs,
+                             const std::vector<mindspore::MSTensor> &before_outputs,
+                             const mindspore::MSCallBackParam &call_param) {
+    std::cout << "Before forwarding " << call_param.node_name_ << " " << call_param.node_type_ << std::endl;
     return true;
   };
   // Definition of callback function after forwarding operator.
-  auto after_call_back = [](const std::vector<mindspore::tensor::MSTensor *> &after_inputs,
-                            const std::vector<mindspore::tensor::MSTensor *> &after_outputs,
-                            const mindspore::CallBackParam &call_param) {
-    std::cout << "After forwarding " << call_param.node_name << " " << call_param.node_type << std::endl;
+  auto after_call_back = [](const std::vector<mindspore::MSTensor> &after_inputs,
+                            const std::vector<mindspore::MSTensor> &after_outputs,
+                            const mindspore::MSCallBackParam &call_param) {
+    std::cout << "After forwarding " << call_param.node_name_ << " " << call_param.node_type_ << std::endl;
     return true;
   };
 
-  session->BindThread(true);
-  auto ret = session->RunGraph(before_call_back, after_call_back);
-  if (ret != mindspore::lite::RET_OK) {
+  auto inputs = model->GetInputs();
+  auto outputs = model->GetOutputs();
+  auto predict_ret = model->Predict(inputs, &outputs, before_call_back, after_call_back);
+  if (predict_ret != mindspore::kSuccess) {
     delete model;
-    delete session;
-    std::cerr << "Inference error " << ret << std::endl;
-    return ret;
+    std::cerr << "Predict error " << predict_ret << std::endl;
+    return -1;
   }
-  session->BindThread(false);
 
   // Get outputs data.
   // You can also get output through other methods,
   // and you can refer to GetOutputByTensorName() or GetOutputs().
-  GetOutputsByNodeName(session);
+  GetOutputsByNodeName(model);
 
-  // Delete model buffer.
+  // Delete model.
   delete model;
-  // Delete session buffer.
-  delete session;
   return 0;
 }
 
@@ -699,16 +706,15 @@ int main(int argc, const char **argv) {
   if (argc < 3) {
     std::cerr << "Usage: ./runtime_cpp model_path Option" << std::endl;
     std::cerr << "Example: ./runtime_cpp ../model/mobilenetv2.ms 0" << std::endl;
-    std::cerr << "When your Option is 0, you will run MindSpore Lite inference." << std::endl;
-    std::cerr << "When your Option is 1, you will run MindSpore Lite inference with resize." << std::endl;
-    std::cerr << "When your Option is 2, you will run MindSpore Lite inference with CreateSession simplified API."
-              << std::endl;
-    std::cerr << "When your Option is 3, you will run MindSpore Lite inference with session parallel." << std::endl;
-    std::cerr << "When your Option is 4, you will run MindSpore Lite inference with shared memory pool." << std::endl;
-    std::cerr << "When your Option is 5, you will run MindSpore Lite inference with callback." << std::endl;
+    std::cerr << "When your Option is 0, you will run MindSpore Lite predict." << std::endl;
+    std::cerr << "When your Option is 1, you will run MindSpore Lite predict with resize." << std::endl;
+    std::cerr << "When your Option is 2, you will run MindSpore Lite predict with complicated API." << std::endl;
+    std::cerr << "When your Option is 3, you will run MindSpore Lite predict with model parallel." << std::endl;
+    std::cerr << "When your Option is 4, you will run MindSpore Lite predict with shared memory pool." << std::endl;
+    std::cerr << "When your Option is 5, you will run MindSpore Lite predict with callback." << std::endl;
     return -1;
   }
-  std::string version = mindspore::lite::Version();
+  std::string version = mindspore::Version();
   std::cout << "MindSpore Lite Version is " << version << std::endl;
   auto model_path = RealPath(argv[1]);
   if (model_path.empty()) {
@@ -721,9 +727,9 @@ int main(int argc, const char **argv) {
   } else if (strcmp(flag, "1") == 0) {
     return RunResize(model_path.c_str());
   } else if (strcmp(flag, "2") == 0) {
-    return RunCreateSessionSimplified(model_path.c_str());
+    return RunCreateModelComplicated(model_path.c_str());
   } else if (strcmp(flag, "3") == 0) {
-    return RunSessionParallel(model_path.c_str());
+    return RunModelParallel(model_path.c_str());
   } else if (strcmp(flag, "4") == 0) {
     return RunWithSharedMemoryPool(model_path.c_str());
   } else if (strcmp(flag, "5") == 0) {
diff --git a/mindspore/lite/examples/train_lenet_java/prepare_and_run.sh b/mindspore/lite/examples/train_lenet_java/prepare_and_run.sh
index b34469175e8..66557812f01 100755
--- a/mindspore/lite/examples/train_lenet_java/prepare_and_run.sh
+++ b/mindspore/lite/examples/train_lenet_java/prepare_and_run.sh
@@ -75,6 +75,10 @@ LD_LIBRARY_PATH=${MSLITE_LINUX}/tools/converter/lib/:${MSLITE_LINUX}/tools/conve
 EXPORT=${EXPORT} LD_LIBRARY_PATH=${LD_LIBRARY_PATH} CONVERTER=${CONVERTER} ./prepare_model.sh $DOCKER || exit 1
 cd ../
 
+if [ "$TARBALL" != "" ]; then
+  rm -rf build
+fi
+
 cd target || exit 1
 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../lib/ 
 java -Djava.library.path=../lib/ -classpath .:./train_lenet_java.jar:../lib/mindspore-lite-java.jar  com.mindspore.lite.train_lenet.Main ../model/lenet_tod.ms $MNIST_DATA_PATH 1
diff --git a/mindspore/lite/include/registry/kernel_interface.h b/mindspore/lite/include/registry/kernel_interface.h
index 0988c3f2395..4ca4d05cb74 100644
--- a/mindspore/lite/include/registry/kernel_interface.h
+++ b/mindspore/lite/include/registry/kernel_interface.h
@@ -27,12 +27,6 @@
 
 namespace mindspore {
 namespace kernel {
-/// \brief CapabilityParam defined performance of op when running.
-struct MS_API CapabilityParam {
-  float exec_time_;   /**< op running time argument */
-  float power_usage_; /**< op power waste argument */
-};
-
 /// \brief KernelInterface defined customized op's interface, such as infershape, and so on.
 class MS_API KernelInterface {
  public:
@@ -50,18 +44,6 @@ class MS_API KernelInterface {
                     const schema::Primitive *primitive) {
     return 0;
   }
-
-  /// \brief Method to get performance of an op when running.
-  ///
-  /// \param[in] tensor_in Define the input tensors of op.
-  /// \param[in] primitive Define the attributes of op.
-  /// \param[in] param Define the contr of performance.
-  ///
-  /// \return STATUS as an error code of inferring, STATUS is defined in errorcode.h.
-  virtual int GetCapability(const std::vector<mindspore::MSTensor> &tensor_in, const schema::Primitive *primitive,
-                            CapabilityParam *param) {
-    return 0;
-  }
 };
 
 /// \brief KernelInterfaceCreator defined a functor to create KernelInterface.
diff --git a/mindspore/lite/include/registry/register_kernel.h b/mindspore/lite/include/registry/register_kernel.h
index 1c521b78352..21289bfd77f 100644
--- a/mindspore/lite/include/registry/register_kernel.h
+++ b/mindspore/lite/include/registry/register_kernel.h
@@ -29,26 +29,6 @@
 
 namespace mindspore {
 namespace kernel {
-/// \brief KernelDesc defined kernel's basic attribute.
-struct MS_API KernelDesc {
-  TypeId data_type;     /**< kernel data type argument */
-  int type;             /**< op type argument */
-  std::string arch;     /**< deviceType argument */
-  std::string provider; /**< user identification argument */
-
-  bool operator<(const KernelDesc &dst) const {
-    if (provider != dst.provider) {
-      return provider < dst.provider;
-    } else if (arch != dst.arch) {
-      return arch < dst.arch;
-    } else if (data_type != dst.data_type) {
-      return data_type < dst.data_type;
-    } else {
-      return type < dst.type;
-    }
-  }
-};
-
 /// \brief CreateKernel Defined a functor to create a kernel.
 ///
 /// \param[in] inputs Define input tensors of kernel.
@@ -87,14 +67,6 @@ class MS_API RegisterKernel {
   /// \return STATUS as an error code of registering, STATUS is defined in errorcode.h.
   static int RegCustomKernel(const std::string &arch, const std::string &provider, TypeId data_type,
                              const std::string &type, CreateKernel creator);
-
-  /// \brief Static methon to get a kernel's create function.
-  ///
-  /// \param[in] desc Define kernel's basic attribute.
-  /// \param[in] primitive Define the attributes of op.
-  ///
-  /// \return Function pointer to create a kernel.
-  static CreateKernel GetCreator(const schema::Primitive *primitive, kernel::KernelDesc *desc);
 };
 
 /// \brief KernelReg Defined registration class of kernel.
diff --git a/mindspore/lite/micro/cmake/file_list.cmake b/mindspore/lite/micro/cmake/file_list.cmake
index 4e52a61d853..86543f44cdc 100644
--- a/mindspore/lite/micro/cmake/file_list.cmake
+++ b/mindspore/lite/micro/cmake/file_list.cmake
@@ -139,6 +139,7 @@ set(LITE_SRC
         ${LITE_DIR}/src/registry/kernel_interface.cc
         ${LITE_DIR}/src/registry/kernel_interface_registry.cc
         ${LITE_DIR}/src/registry/register_kernel.cc
+        ${LITE_DIR}/src/registry/register_utils.cc
         ${LITE_DIR}/src/registry/register_kernel_impl.cc
         ${LITE_DIR}/src/lite_model.cc
         ${LITE_DIR}/src/ms_tensor.cc
diff --git a/mindspore/lite/micro/coder/generator/component/weight_component.cc b/mindspore/lite/micro/coder/generator/component/weight_component.cc
index ab8fb428bcf..6b377b540e7 100644
--- a/mindspore/lite/micro/coder/generator/component/weight_component.cc
+++ b/mindspore/lite/micro/coder/generator/component/weight_component.cc
@@ -158,5 +158,4 @@ void SaveDataToNet(const std::map<std::string, Tensor *> &saved_weights, const s
   }
   net.close();
 }
-
 }  // namespace mindspore::lite::micro
diff --git a/mindspore/lite/micro/coder/graph.cc b/mindspore/lite/micro/coder/graph.cc
index 1e7a9c7f21c..4044fc4eb3b 100644
--- a/mindspore/lite/micro/coder/graph.cc
+++ b/mindspore/lite/micro/coder/graph.cc
@@ -30,8 +30,11 @@
 
 namespace mindspore::lite::micro {
 CoderGraph::~CoderGraph() {
-  model_->Free();
-  delete model_;
+  if (model_ != nullptr) {
+    model_->Free();
+    delete model_;
+    model_ = nullptr;
+  }
   for (auto &tensor : all_tensors_) {
     delete tensor;
   }
diff --git a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.cc
index 11abe860696..1aed5bb1e21 100644
--- a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.cc
@@ -147,15 +147,16 @@ void Conv2DInt8Coder::CheckSupportOptimize() {
 }
 
 int Conv2DInt8Coder::InitTmpBuffer() {
+  const size_t kPartial = 2;
   switch (opt_) {
     case Basic:
       buffer_size_ =
-        static_cast<size_t>(2 * input_tensor_->Channel() * filter_tensor_->Width() * filter_tensor_->Height()) *
+        static_cast<size_t>(kPartial * input_tensor_->Channel() * filter_tensor_->Width() * filter_tensor_->Height()) *
         sizeof(int16_t);
       break;
     case Convolve_1_x_n:
       buffer_size_ =
-        static_cast<size_t>(2 * input_tensor_->Channel() * filter_tensor_->Width() * filter_tensor_->Height()) *
+        static_cast<size_t>(kPartial * input_tensor_->Channel() * filter_tensor_->Width() * filter_tensor_->Height()) *
         sizeof(int16_t);
       break;
     case Convolve_1x1_fast:
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/addn_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/addn_fp32_coder.cc
index b6e86dd4af5..79a52ac0d0a 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/addn_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/addn_fp32_coder.cc
@@ -38,10 +38,8 @@ int AddNFP32Coder::DoCode(CoderContext *const context) {
           });
   NNaclFp32Serializer code;
   code.CodeFunction("ElementAdd", input0, input1, output_tensor_, elements_num);
-  if (input_tensors_.size() > 2) {
-    for (size_t i = 2; i < input_tensors_.size(); ++i) {
-      code.CodeFunction("ElementAdd", input_tensors_.at(i), output_tensor_, elements_num);
-    }
+  for (size_t i = 2; i < input_tensors_.size(); ++i) {
+    code.CodeFunction("ElementAdd", input_tensors_.at(i), output_tensor_, elements_num);
   }
   context->AppendCode(code.str());
   return RET_OK;
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc
index 11725e88b71..55f0a6e5023 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc
@@ -53,6 +53,8 @@ int BatchnormFP32Coder::DoCode(CoderContext *const context) {
   MS_CHECK_TRUE(input_tensors_.size() == DIMENSION_3D, "inputs size is not equal to three");
   Tensor *mean_tensor = input_tensors_.at(1);
   Tensor *var_tensor = input_tensors_.at(kInputSize1);
+  MS_CHECK_PTR(mean_tensor);
+  MS_CHECK_PTR(var_tensor);
   Collect(context,
           {
             "nnacl/fp32/batchnorm.h",
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc
index d179eb0b4d8..b2b689facae 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc
@@ -55,6 +55,7 @@ int BiasAddFP32Coder::DoCode(CoderContext *ctx) {
   arithmetic_parameter_->broadcasting_ = false;
   arithmetic_parameter_->ndim_ = dims.size();
   arithmetic_parameter_->activation_type_ = 0;
+  MS_CHECK_TRUE(dims.size() <= DIMENSION_10D, "dims.size() must not be greater than 10!");
   for (size_t i = 0; i < dims.size(); i++) {
     arithmetic_parameter_->in_shape0_[i] = dims[i];
   }
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.cc
index 782e1d961f0..3d974db3259 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.cc
@@ -23,7 +23,7 @@
 
 namespace mindspore::lite::micro::nnacl {
 int ConvolutionDepthwiseFP32Coder::Prepare(CoderContext *const context) {
-  Conv2DBaseCoder::Init();
+  MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "Conv2DBaseCoder::Init() failed!");
   MS_CHECK_RET_CODE(InitWeightBias(), "dwconvolution do init weightbais failed");
   conv_param_->thread_num_ = MSMIN(thread_num_, conv_param_->output_h_);
   return RET_OK;
@@ -83,5 +83,4 @@ int ConvolutionDepthwiseFP32Coder::DoCode(CoderContext *const context) {
   context->AppendCode(code.str());
   return RET_OK;
 }
-
 }  // namespace mindspore::lite::micro::nnacl
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.cc
index b16b0b402b8..c1c223515db 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.cc
@@ -146,8 +146,8 @@ int ConvolutionWinogradFP32Coder::InitWeightBias() {
   if (input_unit_ == DIMENSION_8D) {
     coef = 0.5f;
   }
-  CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, coef, output_unit_, kernel_unit_);
-
+  ret = CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, coef, output_unit_, kernel_unit_);
+  MS_CHECK_RET_CODE(ret, "CookToomFilter failed!");
   auto out_channel_size = static_cast<size_t>(out_channel);
   auto weight_data = reinterpret_cast<float *>(filter_tensor_->MutableData());
   ret = WinogradFilterTransform(weight_data, matrix_g, matrix_gt, oc_block);
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/full_connection_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/full_connection_fp32_coder.cc
index ed623b01f32..522629049b9 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/full_connection_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/full_connection_fp32_coder.cc
@@ -28,6 +28,7 @@ int FullConnectionFP32Coder::ReSize() {
   }
   params_->row_ = row;
   params_->col_ = output_tensor_->shape().back();
+  MS_CHECK_TRUE(filter_tensor_->shape().size() >= DIMENSION_2D, "filter_tensor_->shape().size() < DIMENSION_2D");
   params_->deep_ = filter_tensor_->shape().at(1);
   return MatMulFP32BaseCoder::ReSize();
 }
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc
index 9e049a31b03..25f044176c6 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc
@@ -30,7 +30,8 @@ int GatherFP32Coder::Prepare(CoderContext *const context) { return RET_OK; }
 int GatherFP32Coder::DoCode(CoderContext *context) {
   Tensor *input0 = input_tensors_.at(0);
   Tensor *input1 = input_tensors_.at(1);
-
+  MS_CHECK_PTR(input0);
+  MS_CHECK_PTR(input1);
   // generate code .h .c
   Collect(context,
           {
@@ -42,8 +43,9 @@ int GatherFP32Coder::DoCode(CoderContext *context) {
 
   NNaclFp32Serializer code;
   std::vector<int> in_shape = input0->shape();
-  int in_rank = in_shape.size();
+  int in_rank = static_cast<int>(in_shape.size());
   int indices_element_size = input1->ElementsNum();
+  MS_CHECK_PTR(parameter_);
   int axis = (reinterpret_cast<GatherParameter *>(parameter_))->axis_;
   MS_CHECK_TRUE(static_cast<int>(in_shape.size()) >= axis, "invalid axis in gather parameter");
   const int limit = in_shape.at(axis);
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc
index ce13ba8c52c..13cfc7ac7be 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc
@@ -30,7 +30,9 @@ using mindspore::schema::PrimitiveType_MatMul;
 namespace mindspore::lite::micro::nnacl {
 int MatMulFP32BaseCoder::ReSize() {
   ResizeParameter();
+  MS_CHECK_TRUE(params_->col_align_ != 0, "params_->col_align_ = 0");
   thread_count_ = MSMIN(thread_num_, UP_DIV(params_->col_align_, col_tile_));
+  MS_CHECK_TRUE(thread_count_ != 0, "thread_count_ = 0");
   thread_stride_ = UP_DIV(UP_DIV(params_->col_align_, col_tile_), thread_count_);
   // can not call Malloc in DoCode,so move this runtime init to final resize
   if (!params_->a_const_) {
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc
index fbc1adef9ac..c79bc6dad7f 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc
@@ -24,7 +24,8 @@ using mindspore::schema::PrimitiveType_Softmax;
 
 namespace mindspore::lite::micro::nnacl {
 int SoftMaxFP32Coder::Prepare(CoderContext *const context) {
-  SoftmaxBaseCoder::Init();
+  auto ret = SoftmaxBaseCoder::Init();
+  MS_CHECK_RET_CODE(ret, "SoftmaxBaseCoder::Init() failed!");
   // malloc tmp buffer
   int n_dim = softmax_param_->n_dim_;
   int32_t axis = softmax_param_->axis_;
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_int8_coder.cc
index 076bbf6c492..4df09b2b9d5 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_int8_coder.cc
@@ -165,7 +165,7 @@ int Conv2DINT8Coder::InitWeightBias(CoderContext *const context) {
 }
 
 int Conv2DINT8Coder::Prepare(CoderContext *const context) {
-  Conv2DBaseCoder::Init();
+  MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "Conv2d base init failed.");
   CheckSupportOptimize();
   MS_CHECK_RET_CODE(SetQuantParam(), "Set quant param failed!");
   MS_CHECK_RET_CODE(InitWeightBias(context), "Init weight bias failed.");
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/convolution_depthwise_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/int8/convolution_depthwise_int8_coder.cc
index ee7a7277f19..00bd0993fa6 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/convolution_depthwise_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/convolution_depthwise_int8_coder.cc
@@ -24,7 +24,7 @@
 
 namespace mindspore::lite::micro {
 int ConvolutionDepthwiseINT8Coder::Prepare(CoderContext *const context) {
-  Conv2DBaseCoder::Init();
+  MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "Conv2d base init failed.");
   // init sliding window param
   MS_CHECK_RET_CODE(SetQuantParam(), "Set quant param failed.");
   MS_CHECK_RET_CODE(InitWeightBias(context), "dwconvolution do init weightbais failed");
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.cc
index fe11a943568..00974b29eaa 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.cc
@@ -69,7 +69,7 @@ int ReduceInt8Coder::CalculateQuantArgs() {
       QuantizeMultiplierSmallerThanOne(prod_multiplier, &qm->multiplier_, &shift);
       qm->left_shift_ = shift < 0 ? -shift : 0;
       qm->right_shift_ = shift > 0 ? shift : 0;
-      mean_multipliers_.push_back(qm);
+      prod_multipliers_.push_back(qm);
     }
   }
 
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.h b/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.h
index 24fc4564168..bd9d05dfb94 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.h
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.h
@@ -30,7 +30,21 @@ class ReduceInt8Coder final : public ReduceBaseCoder {
                   const Model::Node *node, size_t node_index, Target target)
       : ReduceBaseCoder(in_tensors, out_tensors, node, node_index, target) {}
 
-  ~ReduceInt8Coder() override { begin_src_data_ = nullptr; }
+  ~ReduceInt8Coder() override {
+    begin_src_data_ = nullptr;
+    for (auto &arg : mean_multipliers_) {
+      delete arg;
+      arg = nullptr;
+    }
+    for (auto &arg : prod_multipliers_) {
+      delete arg;
+      arg = nullptr;
+    }
+    for (auto &arg : sum_square_multipliers_) {
+      delete arg;
+      arg = nullptr;
+    }
+  }
 
   int Prepare(CoderContext *const context) override;
   int DoCode(CoderContext *const context) override;
diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.cc
index 49727fd4d62..764ebbc8cd5 100644
--- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.cc
+++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.cc
@@ -29,7 +29,7 @@ using mindspore::schema::PrimitiveType_Softmax;
 
 namespace mindspore::lite::micro::nnacl {
 int SoftMaxInt8Coder::Prepare(CoderContext *const context) {
-  SoftmaxBaseCoder::Init();
+  MS_CHECK_RET_CODE(SoftmaxBaseCoder::Init(), "Softmax base init failed.");
   std::vector<LiteQuantParam> in_quant_args = input_tensor_->quant_params();
   quant_params_.in_quant_args_.scale_ = in_quant_args.at(0).scale;
   quant_params_.in_quant_args_.zp_ = -in_quant_args.at(0).zeroPoint;
@@ -59,8 +59,7 @@ int SoftMaxInt8Coder::Prepare(CoderContext *const context) {
   sum_data_size_ = inner_size * sizeof(int);
   sum_data_ = static_cast<int *>(allocator_->Malloc(kNumberTypeInt32, sum_data_size_, kWorkspace));
   MS_CHECK_PTR(sum_data_);
-  ReSize();
-  return RET_OK;
+  return ReSize();
 }
 
 int SoftMaxInt8Coder::DoCode(CoderContext *const context) {
diff --git a/mindspore/lite/micro/coder/train.cc b/mindspore/lite/micro/coder/train.cc
index 16f873e01c2..320efe1b66e 100644
--- a/mindspore/lite/micro/coder/train.cc
+++ b/mindspore/lite/micro/coder/train.cc
@@ -55,6 +55,10 @@ std::set<OperatorCoder *> FindInferenceOpcoders(OperatorCoder *edge) {
 }
 
 int Train::TransformGraphForTrain(CoderContext *context, const std::vector<std::unique_ptr<OperatorCoder>> &op_coders) {
+  if (context == nullptr) {
+    MS_LOG(INFO) << "input context invalid";
+    return RET_ERROR;
+  }
   const std::array<int, 6> loss_types = {schema::PrimitiveType_SparseSoftmaxCrossEntropyWithLogits,
                                          schema::PrimitiveType_BinaryCrossEntropy,
                                          schema::PrimitiveType_SmoothL1Loss,
diff --git a/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.c b/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.c
index bee2c6e35e9..adb59ac25b8 100644
--- a/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.c
+++ b/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.c
@@ -20,11 +20,12 @@ extern void MatMulOptR4Int8Neon64(const int8_t *a, const int8_t *b, int *dst, in
                                   const int *input_sum, const int *bias);
 extern void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4,
                                const int *a_sums, const int *bias, int act_min, int act_max, int out_zp,
-                               int *multiplier, int *left_shift, int *right_shift, int row, int col, int stride,
-                               size_t peroc);
+                               const int *multiplier, const int *left_shift, const int *right_shift, int row, int col,
+                               int stride, size_t peroc);
 extern void MatmulInt8DpOpt(const int8_t *a, const int8_t *b, int8_t *dst, size_t row8, size_t col8, size_t deep4,
-                            const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, int *multiplier,
-                            int *left_shift, int *right_shift, size_t stride, size_t peroc, int *filter_zp);
+                            const int *a_sums, const int *bias, int act_min, int act_max, int out_zp,
+                            const int *multiplier, const int *left_shift, const int *right_shift, size_t stride,
+                            size_t peroc, const int *filter_zp);
 
 #ifdef ENABLE_ARM64
 void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16,
@@ -33,16 +34,17 @@ void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, i
 }
 
 void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                                  size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
-                                  int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
-                                  int32_t maxi, size_t per_channel) {
+                                  size_t stride, const int32_t *input_sum, const int32_t *bias,
+                                  const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
+                                  int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel) {
   return MatmulInt8DpNeon64(a, b, dst, UP_ROUND(row, C8NUM), UP_ROUND(col, C8NUM), deep_4, input_sum, bias, mini, maxi,
                             output_zp, multiplier, left_shift, right_shift, row, col, stride, per_channel);
 }
 void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                                   size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
-                                   int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
-                                   int32_t maxi, size_t per_channel, int32_t *filter_zp) {
+                                   size_t stride, const int32_t *input_sum, const int32_t *bias,
+                                   const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
+                                   int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel,
+                                   const int32_t *filter_zp) {
   return MatmulInt8DpOpt(a, b, dst, row, col, deep_4, input_sum, bias, mini, maxi, output_zp, multiplier, left_shift,
                          right_shift, stride, per_channel, filter_zp);
 }
diff --git a/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.h b/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.h
index 40e82acbaba..bc76939aa85 100644
--- a/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.h
+++ b/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.h
@@ -29,13 +29,14 @@ void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, i
                                    const int *input_sum, const int *bias);
 
 void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                                  size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
-                                  int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
-                                  int32_t maxi, size_t per_channel);
+                                  size_t stride, const int32_t *input_sum, const int32_t *bias,
+                                  const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
+                                  int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel);
 void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                                   size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
-                                   int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
-                                   int32_t maxi, size_t per_channel, int32_t *filter_zp);
+                                   size_t stride, const int32_t *input_sum, const int32_t *bias,
+                                   const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
+                                   int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel,
+                                   const int32_t *filter_zp);
 #endif
 
 #endif  // MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_OPTIMIZE_HANDLER_WRAPPER_H_
diff --git a/mindspore/lite/micro/coder/wrapper/int8/conv1x1_init_int8_wrapper.c b/mindspore/lite/micro/coder/wrapper/int8/conv1x1_init_int8_wrapper.c
index 959d03a8a34..f4ffc047bf4 100644
--- a/mindspore/lite/micro/coder/wrapper/int8/conv1x1_init_int8_wrapper.c
+++ b/mindspore/lite/micro/coder/wrapper/int8/conv1x1_init_int8_wrapper.c
@@ -35,7 +35,7 @@ int Conv1x1Init(int8_t *src_weight, int32_t *src_bias, int32_t *filter_zps, int3
   memset(packed_weight_, 0, size);
   RowMajor2Row2x16MajorInt8(src_weight, packed_weight_, output_channel, input_channel);
   /* bias */
-  size = UP_ROUND(output_channel, C2NUM);
+  size = (size_t)UP_ROUND(output_channel, C2NUM);
   int32_t *bias_data_ = (int32_t *)malloc(size * sizeof(int32_t));
   if (bias_data_ == NULL) {
     free(packed_weight_);
@@ -43,7 +43,7 @@ int Conv1x1Init(int8_t *src_weight, int32_t *src_bias, int32_t *filter_zps, int3
   }
   memset(bias_data_, 0, size * sizeof(int32_t));
   if (src_bias != NULL) {
-    memcpy(bias_data_, src_bias, output_channel * sizeof(int32_t));
+    memcpy(bias_data_, src_bias, (size_t)output_channel * sizeof(int32_t));
   }
 #else
   /* InitWeightBias */
@@ -65,6 +65,7 @@ int Conv1x1Init(int8_t *src_weight, int32_t *src_bias, int32_t *filter_zps, int3
   int32_t *bias_data_ = (int32_t *)malloc(size * sizeof(int32_t));
   if (bias_data_ == NULL) {
     free(packed_weight_);
+    packed_weight_ = NULL;
     return NNACL_ERR;
   }
   memset(bias_data_, 0, size * sizeof(int32_t));
diff --git a/mindspore/lite/minddata/example/CMakeLists.txt b/mindspore/lite/minddata/example/CMakeLists.txt
index 70b9129e45b..f4403ea5d05 100644
--- a/mindspore/lite/minddata/example/CMakeLists.txt
+++ b/mindspore/lite/minddata/example/CMakeLists.txt
@@ -4,8 +4,8 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wall -fPIC -std=c++17")
 
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-sign-compare")
 
-set(MS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mindspore-lite-1.2.0-linux-x64/runtime")
-set(LITECV_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mindspore-lite-1.2.0-linux-x64/runtime/include/dataset")
+set(MS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mindspore-lite-1.3.0-linux-x64/runtime")
+set(LITECV_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mindspore-lite-1.3.0-linux-x64/runtime/include/dataset")
 
 include_directories(${MS_DIR} ${LITECV_DIR})
 
diff --git a/mindspore/lite/minddata/example/testlitecv.cpp b/mindspore/lite/minddata/example/testlitecv.cpp
index bb67161485a..cd62148ea1d 100644
--- a/mindspore/lite/minddata/example/testlitecv.cpp
+++ b/mindspore/lite/minddata/example/testlitecv.cpp
@@ -58,16 +58,26 @@ int main(int argc, char **argv) {
   auto executor = Execute(decode);
   executor(image, &image);
 
-  LiteMat lite_mat_rgb(image.Shape()[1], image.Shape()[0], image.Shape()[2], const_cast<void *>(image.Data().get()),
-                       LDataType::UINT8);
+  constexpr int32_t image_h = 0;
+  constexpr int32_t image_w = 1;
+  constexpr int32_t image_c = 2;
+  LiteMat lite_mat_rgb(image.Shape()[image_w], image.Shape()[image_h], image.Shape()[image_c],
+                       const_cast<void *>(image.Data().get()), LDataType::UINT8);
   std::cout << "lite_mat_rgb: height=" << lite_mat_rgb.height_ << ", width=" << lite_mat_rgb.width_ << std::endl;
-  LiteMat lite_mat_resize;
 
-  ResizeBilinear(lite_mat_rgb, lite_mat_resize, 256, 256);
+  LiteMat lite_mat_resize;
+  constexpr target_size = 256;
+  ResizeBilinear(lite_mat_rgb, lite_mat_resize, target_size, target_size);
   std::cout << "lite_mat_resize: height=" << lite_mat_resize.height_ << ", width=" << lite_mat_resize.width_
             << std::endl;
 
   LiteMat lite_mat_pad;
-  Pad(lite_mat_resize, lite_mat_pad, 30, 30, 10, 10, PaddBorderType::PADD_BORDER_CONSTANT, 255, 255, 255);
+  constexpr int32_t pad_top = 30;
+  constexpr int32_t pad_bottom = 30;
+  constexpr int32_t pad_left = 10;
+  constexpr int32_t pad_right = 10;
+  constexpr int32_t pad_color = 255;
+  Pad(lite_mat_resize, lite_mat_pad, pad_top, pad_bottom, pad_left, pad_right, PaddBorderType::PADD_BORDER_CONSTANT,
+      pad_color, pad_color, pad_color);
   std::cout << "lite_mat_pad: height=" << lite_mat_pad.height_ << ", width=" << lite_mat_pad.width_ << std::endl;
 }
diff --git a/mindspore/lite/minddata/wrapper/MDToDApi.cc b/mindspore/lite/minddata/wrapper/MDToDApi.cc
index b05007dfd50..cca50a99a64 100644
--- a/mindspore/lite/minddata/wrapper/MDToDApi.cc
+++ b/mindspore/lite/minddata/wrapper/MDToDApi.cc
@@ -269,10 +269,6 @@ extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) {
   MS_LOG(INFO) << "Start GetNext [1]" << pMDToDApi;
   // get next row for dataset
   std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
-  if (pMDToDApi->_iter == nullptr) {
-    MS_LOG(ERROR) << "GetNext called with no iteratoe. abort";
-    return -1;
-  }
   // create Execute functions, this replaces Map in Pipeline
 
   bool ret = pMDToDApi->_iter->GetNextRow(&row);
diff --git a/mindspore/lite/minddata/wrapper/album_op_android.cc b/mindspore/lite/minddata/wrapper/album_op_android.cc
index 48d040a96b5..472ce0a1305 100644
--- a/mindspore/lite/minddata/wrapper/album_op_android.cc
+++ b/mindspore/lite/minddata/wrapper/album_op_android.cc
@@ -177,7 +177,7 @@ bool AlbumOp::IsReadColumn(const std::string &column_name) {
   return false;
 }
 
-Status AlbumOp::LoadImageTensor(const std::string &image_file_path, uint32_t col_num, TensorPtr *tensor) {
+Status AlbumOp::LoadImageTensor(const std::string &image_file_path, int32_t col_num, TensorPtr *tensor) {
   TensorPtr image;
   TensorPtr rotate_tensor;
   std::ifstream fs;
@@ -257,7 +257,7 @@ int AlbumOp::GetOrientation(const std::string &folder_path) {
   return code;
 }
 
-Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
+Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) {
   std::vector<std::string> data = json_obj.get<std::vector<std::string>>();
 
   MS_LOG(INFO) << "String array label found: " << data << ".";
@@ -265,7 +265,7 @@ Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t c
   return Status::OK();
 }
 
-Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
+Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) {
   std::string data = json_obj;
   // now we iterate over the elements in json
 
@@ -275,7 +275,7 @@ Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_nu
   return Status::OK();
 }
 
-Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
+Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) {
   // consider templating this function to handle all ints
   if (data_schema_->column(col_num).type() == DataType::DE_INT64) {
     std::vector<int64_t> data;
@@ -302,7 +302,7 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_
   return Status::OK();
 }
 
-Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
+Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) {
   // consider templating this function to handle all ints
   if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) {
     std::vector<double> data;
@@ -329,7 +329,7 @@ Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t co
   return Status::OK();
 }
 
-Status AlbumOp::LoadIDTensor(const std::string &file, uint32_t col_num, TensorPtr *tensor) {
+Status AlbumOp::LoadIDTensor(const std::string &file, int32_t col_num, TensorPtr *tensor) {
   if (data_schema_->column(col_num).type() == DataType::DE_STRING) {
     RETURN_IF_NOT_OK(Tensor::CreateScalar<std::string>(file, tensor));
     return Status::OK();
@@ -341,7 +341,7 @@ Status AlbumOp::LoadIDTensor(const std::string &file, uint32_t col_num, TensorPt
   return Status::OK();
 }
 
-Status AlbumOp::LoadEmptyTensor(uint32_t col_num, TensorPtr *tensor) {
+Status AlbumOp::LoadEmptyTensor(int32_t col_num, TensorPtr *tensor) {
   // hack to get the file name without extension, the 1 is to get rid of the backslash character
   RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->column(col_num).type(), tensor));
   return Status::OK();
@@ -351,7 +351,7 @@ Status AlbumOp::LoadEmptyTensor(uint32_t col_num, TensorPtr *tensor) {
 // So we actually have to check what type we want to fill the tensor with.
 // Float64 doesn't work with reinterpret cast here. Otherwise we limit the float in the schema to
 // only be float32, seems like a weird limitation to impose
-Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
+Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) {
   if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) {
     double data = json_obj;
     MS_LOG(INFO) << "double found: " << json_obj << ".";
@@ -365,7 +365,7 @@ Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num
 }
 
 // Loads a tensor with int value, we have to cast the value to type specified in the schema.
-Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) {
+Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) {
   if (data_schema_->column(col_num).type() == DataType::DE_INT64) {
     int64_t data = json_obj;
     MS_LOG(INFO) << "int64 found: " << json_obj << ".";
diff --git a/mindspore/lite/minddata/wrapper/album_op_android.h b/mindspore/lite/minddata/wrapper/album_op_android.h
index 10d74d073ca..226ba66c9a4 100644
--- a/mindspore/lite/minddata/wrapper/album_op_android.h
+++ b/mindspore/lite/minddata/wrapper/album_op_android.h
@@ -93,62 +93,62 @@ class AlbumOp {
   /// \param[in] col_num Column num in schema
   /// \param[in,out] Tensor to push to
   /// \return Status The error code returned
-  Status LoadImageTensor(const std::string &image_file, uint32_t col_num, TensorPtr *tensor);
+  Status LoadImageTensor(const std::string &image_file, int32_t col_num, TensorPtr *tensor);
 
   /// \brief Load vector of ints to tensor, append tensor to tensor
   /// \param[in] json_obj Json object containing multi-dimensional label
   /// \param[in] col_num Column num in schema
   /// \param[in,out] Tensor to push to
   /// \return Status The error code returned
-  Status LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
+  Status LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor);
 
   /// \brief Load vector of floatss to tensor, append tensor to tensor
   /// \param[in] json_obj Json object containing array data
   /// \param[in] col_num Column num in schema
   /// \param[in,out] Tensor to push to
   /// \return Status The error code returned
-  Status LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
+  Status LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor);
 
   /// \brief Load string array into a tensor, append tensor to tensor
   /// \param[in] json_obj Json object containing string tensor
   /// \param[in] col_num Column num in schema
   /// \param[in,out] Tensor to push to
   /// \return Status The error code returned
-  Status LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
+  Status LoadStringArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor);
 
   /// \brief Load string into a tensor, append tensor to tensor
   /// \param[in] json_obj Json object containing string tensor
   /// \param[in] col_num Column num in schema
   /// \param[in,out]  Tensor to push to
   /// \return Status The error code returned
-  Status LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
+  Status LoadStringTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor);
 
   /// \brief Load float value to tensor
   /// \param[in] json_obj Json object containing float
   /// \param[in] col_num Column num in schema
   /// \param[in,out]  Tensor to push to
   /// \return Status The error code returned
-  Status LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
+  Status LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor);
 
   /// \brief Load int value to tensor
   /// \param[in] json_obj Json object containing int
   /// \param[in] col_num Column num in schema
   /// \param[in,out] Tensor to push to
   /// \return Status The error code returned
-  Status LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor);
+  Status LoadIntTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor);
 
   /// \brief Load empty tensor to tensor
   /// \param[in] col_num Column num in schema
   /// \param[in,out] Tensor to push to
   /// \return Status The error code returned
-  Status LoadEmptyTensor(uint32_t col_num, TensorPtr *tensor);
+  Status LoadEmptyTensor(int32_t col_num, TensorPtr *tensor);
 
   /// \brief Load id from file name to tensor
   /// \param[in] file The file name to get ID from
   /// \param[in] col_num Column num in schema
   /// \param[in,out] Tensor to push to
   /// \return Status The error code returned
-  Status LoadIDTensor(const std::string &file, uint32_t col_num, TensorPtr *tensor);
+  Status LoadIDTensor(const std::string &file, int32_t col_num, TensorPtr *tensor);
 
   /// \brief Load a tensor according to a json file
   /// \param[in] row_id_type row_id - id for this tensor row
diff --git a/mindspore/lite/schema/ops.fbs b/mindspore/lite/schema/ops.fbs
index ded169b7171..e1721611ed2 100644
--- a/mindspore/lite/schema/ops.fbs
+++ b/mindspore/lite/schema/ops.fbs
@@ -220,6 +220,7 @@ union PrimitiveType {
     Affine,
     Attention,
     LSTMGrad,
+    ScatterNdUpdate,
 }
 
 table Abs {
@@ -1212,3 +1213,6 @@ table Affine {
 
 table Attention {
 }
+
+table ScatterNdUpdate {
+}
diff --git a/mindspore/lite/src/CMakeLists.txt b/mindspore/lite/src/CMakeLists.txt
index 571714c701b..49e33521311 100644
--- a/mindspore/lite/src/CMakeLists.txt
+++ b/mindspore/lite/src/CMakeLists.txt
@@ -76,9 +76,9 @@ set(LITE_SRC
         ${CMAKE_CURRENT_SOURCE_DIR}/delegate/delegate.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/runtime/inner_allocator.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/runtime/infer_manager.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_pass.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/tensor.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/ms_tensor.cc
-        ${CMAKE_CURRENT_SOURCE_DIR}/tensorlist.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/executor.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/inner_context.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/lite_model.cc
@@ -87,14 +87,30 @@ set(LITE_SRC
         ${CMAKE_CURRENT_SOURCE_DIR}/lite_kernel.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/lite_kernel_util.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/sub_graph_kernel.cc
-        ${CMAKE_CURRENT_SOURCE_DIR}/sub_graph_split.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/scheduler.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/lite_session.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/errorcode.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/weight_decoder.cc
-        ${CMAKE_CURRENT_SOURCE_DIR}/huffman_decode.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/cpu_info.cc
         )
+if(MSLITE_CONTROL_TENSORLIST)
+    set(LITE_SRC
+            ${LITE_SRC}
+            ${CMAKE_CURRENT_SOURCE_DIR}/tensorlist.cc
+            )
+endif()
+if(MSLITE_HUFFMAN_DECODE)
+    set(LITE_SRC
+        ${LITE_SRC}
+        ${CMAKE_CURRENT_SOURCE_DIR}/huffman_decode.cc
+        )
+endif()
+if(MSLITE_AUTO_PARALLEL)
+    set(LITE_SRC
+            ${LITE_SRC}
+            ${CMAKE_CURRENT_SOURCE_DIR}/sub_graph_split.cc
+            )
+endif()
 
 file(GLOB KERNEL_REG_SRC ${CMAKE_CURRENT_SOURCE_DIR}/registry/*.cc)
 set(LITE_SRC ${LITE_SRC} ${KERNEL_REG_SRC})
@@ -133,6 +149,7 @@ set(TRAIN_SRC
         ${CMAKE_CURRENT_SOURCE_DIR}/train/accuracy_monitor.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/train/classification_train_accuracy_monitor.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/train/train_export.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/train/opt_allocator.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/../tools/common/storage.cc
         )
 if(ENABLE_V0)
diff --git a/mindspore/lite/src/common/dynamic_library_loader.cc b/mindspore/lite/src/common/dynamic_library_loader.cc
index de180f221d3..d27705dfbc2 100644
--- a/mindspore/lite/src/common/dynamic_library_loader.cc
+++ b/mindspore/lite/src/common/dynamic_library_loader.cc
@@ -28,7 +28,7 @@
 
 namespace mindspore {
 namespace lite {
-int DynamicLibraryLoader::Open(std::string lib_path) {
+int DynamicLibraryLoader::Open(const std::string &lib_path) {
   if (handler_ != nullptr) {
     return RET_ERROR;
   }
@@ -46,7 +46,7 @@ int DynamicLibraryLoader::Open(std::string lib_path) {
   return RET_OK;
 }
 
-void *DynamicLibraryLoader::GetFunc(std::string func_name) {
+void *DynamicLibraryLoader::GetFunc(const std::string &func_name) {
 #ifndef _WIN32
   return dlsym(handler_, func_name.c_str());
 #else
diff --git a/mindspore/lite/src/common/dynamic_library_loader.h b/mindspore/lite/src/common/dynamic_library_loader.h
index 2d07dff0fb6..d5771df81f7 100644
--- a/mindspore/lite/src/common/dynamic_library_loader.h
+++ b/mindspore/lite/src/common/dynamic_library_loader.h
@@ -25,8 +25,8 @@ class DynamicLibraryLoader {
  public:
   DynamicLibraryLoader() = default;
   ~DynamicLibraryLoader();
-  int Open(std::string lib_path);
-  void *GetFunc(std::string func_name);
+  int Open(const std::string &lib_path);
+  void *GetFunc(const std::string &func_name);
   int Close();
 
  private:
diff --git a/mindspore/lite/src/common/log_adapter.h b/mindspore/lite/src/common/log_adapter.h
index 4c773102f18..39c6b9fbefb 100644
--- a/mindspore/lite/src/common/log_adapter.h
+++ b/mindspore/lite/src/common/log_adapter.h
@@ -16,6 +16,20 @@
 
 #ifndef MINDSPORE_LITE_SRC_COMMON_LOG_ADAPTER_H_
 #define MINDSPORE_LITE_SRC_COMMON_LOG_ADAPTER_H_
+namespace mindspore {
+const char *const unsupport_string_tensor_log =
+  "This mindspore-lite library does not support string tensors. Set environment variable MSLITE_STRING_KERNEL to on to "
+  "recompile it.";
+const char *const unsupport_control_tensorlist_log =
+  "This mindspore-lite library does not support control and tensorlist op. Set environment variable "
+  "MSLITE_CONTROL_TENSORLIST to on to recompile it.";
+const char *const unsupport_auto_parallel_log =
+  "The mindspore-lite library does not support auto parallel. Set environment variable MSLITE_AUTO_PARALLEL to on to "
+  "recompile it.";
+const char *const unsupport_huffman_decode_log =
+  "The mindspore-lite library does not support huffman decode. Set environment variable MSLITE_HUFFMAN_DECODE to on to "
+  "recompile it.";
+}  // namespace mindspore
 #ifdef USE_GLOG
 #include "utils/log_adapter.h"
 #else
diff --git a/mindspore/lite/src/common/string_util.cc b/mindspore/lite/src/common/string_util.cc
index 23a781d2d77..a890c7fd506 100644
--- a/mindspore/lite/src/common/string_util.cc
+++ b/mindspore/lite/src/common/string_util.cc
@@ -20,6 +20,7 @@
 
 namespace mindspore {
 namespace lite {
+#ifdef ENABLE_STRING_KERNEL
 std::vector<StringPack> ParseTensorBuffer(Tensor *tensor) {
   if (tensor == nullptr) {
     MS_LOG(ERROR) << "tensor is nullptr.";
@@ -52,10 +53,10 @@ int WriteStringsToTensor(Tensor *tensor, const std::vector<StringPack> &string_b
     MS_LOG(ERROR) << "tensor is nullptr.";
     return RET_ERROR;
   }
-  int32_t num = string_buffer.size();
+  size_t num = string_buffer.size();
   std::vector<int32_t> offset(num + 1);
   offset[0] = 4 * (num + 2);
-  for (int i = 0; i < num; i++) {
+  for (size_t i = 0; i < num; i++) {
     offset[i + 1] = offset[i] + string_buffer[i].len;
   }
   std::vector<int> shape = {offset[num]};
@@ -71,10 +72,10 @@ int WriteStringsToTensor(Tensor *tensor, const std::vector<StringPack> &string_b
   char *string_data = reinterpret_cast<char *>(data);
 
   string_info[0] = num;
-  for (int i = 0; i <= num; i++) {
+  for (size_t i = 0; i <= num; i++) {
     string_info[i + 1] = offset[i];
   }
-  for (int i = 0; i < num; i++) {
+  for (size_t i = 0; i < num; i++) {
     memcpy(string_data + offset[i], string_buffer[i].data, string_buffer[i].len);
   }
   return RET_OK;
@@ -85,11 +86,11 @@ int WriteSeperatedStringsToTensor(Tensor *tensor, const std::vector<std::vector<
     MS_LOG(ERROR) << "tensor is nullptr.";
     return RET_ERROR;
   }
-  int32_t num = string_buffer.size();
+  size_t num = string_buffer.size();
   std::vector<int32_t> offset(num + 1);
   offset[0] = 4 * (num + 2);
   std::vector<int> len(num);
-  for (int i = 0; i < num; i++) {
+  for (size_t i = 0; i < num; i++) {
     len[i] = 0;
     for (int j = 0; j < static_cast<int>(string_buffer[i].size()); j++) {
       len[i] += string_buffer[i][j].len;
@@ -109,10 +110,10 @@ int WriteSeperatedStringsToTensor(Tensor *tensor, const std::vector<std::vector<
   auto *string_data = reinterpret_cast<char *>(data);
 
   string_info[0] = num;
-  for (int i = 0; i <= num; i++) {
+  for (size_t i = 0; i <= num; i++) {
     string_info[i + 1] = offset[i];
   }
-  for (int i = 0; i < num; i++) {
+  for (size_t i = 0; i < num; i++) {
     auto *dst = string_data + offset[i];
     for (auto string_part : string_buffer[i]) {
       memcpy(dst, string_part.data, string_part.len);
@@ -132,32 +133,6 @@ int GetStringCount(Tensor *tensor) {
   return GetStringCount(tensor->MutableData());
 }
 
-int StringsToMSTensor(const std::vector<std::string> &inputs, tensor::MSTensor *tensor) {
-  if (tensor == nullptr) {
-    return RET_PARAM_INVALID;
-  }
-  std::vector<StringPack> all_pack;
-  for (auto &input : inputs) {
-    StringPack pack = {static_cast<int>(input.length()), input.data()};
-    all_pack.push_back(pack);
-  }
-  return WriteStringsToTensor(static_cast<Tensor *>(tensor), all_pack);
-}
-
-std::vector<std::string> MSTensorToStrings(const tensor::MSTensor *tensor) {
-  if (tensor == nullptr) {
-    return {""};
-  }
-  const void *ptr = static_cast<const Tensor *>(tensor)->data_c();
-  std::vector<StringPack> all_pack = ParseStringBuffer(ptr);
-  std::vector<std::string> result(all_pack.size());
-  std::transform(all_pack.begin(), all_pack.end(), result.begin(), [](StringPack &pack) {
-    std::string str(pack.data, pack.len);
-    return str;
-  });
-  return result;
-}
-
 // Some primes between 2^63 and 2^64
 namespace {
 static const uint64_t k0 = 0xc3a5c85c97cb3127ULL;
@@ -302,5 +277,41 @@ uint64_t StringHash64(const char *s, size_t len) {
   return HashLen16(HashLen16(v.first, w.first, mul) + ShiftMix(y) * k0 + z, HashLen16(v.second, w.second, mul) + x,
                    mul);
 }
+#endif
+int StringsToMSTensor(const std::vector<std::string> &inputs, tensor::MSTensor *tensor) {
+#ifdef ENABLE_STRING_KERNEL
+  if (tensor == nullptr) {
+    return RET_PARAM_INVALID;
+  }
+  std::vector<StringPack> all_pack;
+  for (auto &input : inputs) {
+    StringPack pack = {static_cast<int>(input.length()), input.data()};
+    all_pack.push_back(pack);
+  }
+  return WriteStringsToTensor(static_cast<Tensor *>(tensor), all_pack);
+#else
+  MS_LOG(ERROR) << unsupport_string_tensor_log;
+  return RET_ERROR;
+#endif
+}
+
+std::vector<std::string> MSTensorToStrings(const tensor::MSTensor *tensor) {
+#ifdef ENABLE_STRING_KERNEL
+  if (tensor == nullptr) {
+    return {""};
+  }
+  const void *ptr = static_cast<const Tensor *>(tensor)->data_c();
+  std::vector<StringPack> all_pack = ParseStringBuffer(ptr);
+  std::vector<std::string> result(all_pack.size());
+  std::transform(all_pack.begin(), all_pack.end(), result.begin(), [](StringPack &pack) {
+    std::string str(pack.data, pack.len);
+    return str;
+  });
+  return result;
+#else
+  MS_LOG(ERROR) << unsupport_string_tensor_log;
+  return {""};
+#endif
+}
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/src/common/string_util.h b/mindspore/lite/src/common/string_util.h
index 8811ff00e2a..52ea90ca23f 100644
--- a/mindspore/lite/src/common/string_util.h
+++ b/mindspore/lite/src/common/string_util.h
@@ -16,7 +16,6 @@
 
 #ifndef MINDSPORE_LITE_SRC_COMMON_STRING_UTIL_H_
 #define MINDSPORE_LITE_SRC_COMMON_STRING_UTIL_H_
-
 #include <vector>
 #include <string>
 #include <utility>
@@ -26,6 +25,7 @@
 #include "include/errorcode.h"
 #include "include/lite_utils.h"
 
+#ifdef ENABLE_STRING_KERNEL
 namespace mindspore {
 namespace lite {
 typedef struct StringPack {
@@ -47,9 +47,8 @@ int WriteSeperatedStringsToTensor(Tensor *tensor, const std::vector<std::vector<
 
 int GetStringCount(const void *data);
 int GetStringCount(Tensor *tensor);
-
 uint64_t StringHash64(const char *s, size_t len);
 }  // namespace lite
 }  // namespace mindspore
-
+#endif
 #endif  // MINDSPORE_LITE_SRC_COMMON_STRING_UTIL_H_
diff --git a/mindspore/lite/src/common/tensor_util.cc b/mindspore/lite/src/common/tensor_util.cc
index 47181c11b6c..627a6385fed 100644
--- a/mindspore/lite/src/common/tensor_util.cc
+++ b/mindspore/lite/src/common/tensor_util.cc
@@ -70,27 +70,22 @@ void FreeAllTensorC(std::vector<TensorC *> *tensors_in) {
     if (i == nullptr) {
       continue;
     }
+#ifdef ENABLE_CONTROL_TENSORLIST
     if (i->data_type_ == kObjectTypeTensorType) {
       TensorListC *tensorListC = reinterpret_cast<TensorListC *>(i);
       FreeTensorListC(tensorListC);
       tensorListC = nullptr;
     } else {
+#endif
       free(i);
       i = nullptr;
+#ifdef ENABLE_CONTROL_TENSORLIST
     }
+#endif
   }
   tensors_in->clear();
 }
 
-void FreeTensorListC(TensorListC *tensorlist_c) {
-  MS_ASSERT(tensorlist_c != nullptr);
-  if (tensorlist_c->tensors_ != nullptr) {
-    free(tensorlist_c->tensors_);
-    tensorlist_c->tensors_ = nullptr;
-  }
-  free(tensorlist_c);
-}
-
 int Tensor2TensorC(const Tensor *src, TensorC *dst) {
   dst->is_ready_ = src->IsReady();
   dst->format_ = src->format();
@@ -115,6 +110,16 @@ void TensorC2Tensor(const TensorC *src, Tensor *dst) {
   dst->set_shape(std::vector<int>(src->shape_, src->shape_ + src->shape_size_));
 }
 
+#ifdef ENABLE_CONTROL_TENSORLIST
+void FreeTensorListC(TensorListC *tensorlist_c) {
+  MS_ASSERT(tensorlist_c != nullptr);
+  if (tensorlist_c->tensors_ != nullptr) {
+    free(tensorlist_c->tensors_);
+    tensorlist_c->tensors_ = nullptr;
+  }
+  free(tensorlist_c);
+}
+
 int TensorList2TensorListC(TensorList *src, TensorListC *dst) {
   MS_ASSERT(src != nullptr);
   MS_ASSERT(dst != nullptr);
@@ -172,21 +177,23 @@ int TensorListC2TensorList(const TensorListC *src, TensorList *dst) {
   return RET_OK;
 }
 
-int GenerateMergeSwitchOutTensorC(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+int GenerateMergeSwitchOutTensorC(const std::vector<lite::Tensor *> &inputs, int outputs_size,
                                   std::vector<TensorC *> *out_tensor_c) {
   MS_ASSERT(out_tensor_c != nullptr);
   int ret = RET_OK;
-  for (size_t i = 0; i < outputs.size(); i++) {
+  for (int i = 0; i < outputs_size; i++) {
     out_tensor_c->push_back(nullptr);
   }
   return ret;
 }
+#endif
 
 int GenerateOutTensorC(const OpParameter *const parameter, const std::vector<lite::Tensor *> &inputs,
                        const std::vector<lite::Tensor *> &outputs, std::vector<TensorC *> *out_tensor_c) {
   MS_ASSERT(out_tensor_c != nullptr);
   MS_ASSERT(parameter != nullptr);
   int ret = RET_OK;
+#ifdef ENABLE_CONTROL_TENSORLIST
   if (parameter->type_ == mindspore::schema::PrimitiveType_TensorListFromTensor ||
       parameter->type_ == mindspore::schema::PrimitiveType_TensorListReserve ||
       parameter->type_ == mindspore::schema::PrimitiveType_TensorListSetItem) {
@@ -199,10 +206,22 @@ int GenerateOutTensorC(const OpParameter *const parameter, const std::vector<lit
     out_tensor_c->push_back(reinterpret_cast<TensorC *const>(tensor_list_c));
   } else if (parameter->type_ == mindspore::schema::PrimitiveType_Merge ||
              parameter->type_ == mindspore::schema::PrimitiveType_Switch) {
-    ret = GenerateMergeSwitchOutTensorC(inputs, outputs, out_tensor_c);
+    ret = GenerateMergeSwitchOutTensorC(inputs, static_cast<int>(outputs.size()), out_tensor_c);
   } else {
     ret = OutputTensor2TensorC(outputs, out_tensor_c);
   }
+#else
+  if (parameter->type_ == mindspore::schema::PrimitiveType_TensorListFromTensor ||
+      parameter->type_ == mindspore::schema::PrimitiveType_TensorListReserve ||
+      parameter->type_ == mindspore::schema::PrimitiveType_TensorListSetItem ||
+      parameter->type_ == mindspore::schema::PrimitiveType_Merge ||
+      parameter->type_ == mindspore::schema::PrimitiveType_Switch) {
+    MS_LOG(ERROR) << unsupport_control_tensorlist_log;
+    return RET_ERROR;
+  } else {
+    ret = OutputTensor2TensorC(outputs, out_tensor_c);
+  }
+#endif
   return ret;
 }
 
@@ -212,6 +231,7 @@ int GenerateInTensorC(const OpParameter *const parameter, const std::vector<lite
   int ret = RET_OK;
   for (auto input : inputs) {
     if (input->data_type() == kObjectTypeTensorType) {
+#ifdef ENABLE_CONTROL_TENSORLIST
       // Tensor ->TensorList -> TensorListC -> TensorC
       auto *tensor_list = reinterpret_cast<TensorList *>(input);
       auto *tensor_list_c = reinterpret_cast<TensorListC *>(malloc(sizeof(TensorListC)));
@@ -222,10 +242,15 @@ int GenerateInTensorC(const OpParameter *const parameter, const std::vector<lite
       memset(tensor_list_c, 0, sizeof(TensorListC));
       ret = TensorList2TensorListC(tensor_list, tensor_list_c);
       if (ret != RET_OK) {
+        free(tensor_list_c->tensors_);
         free(tensor_list_c);
         return NNACL_ERR;
       }
       in_tensor_c->push_back(reinterpret_cast<TensorC *>(tensor_list_c));
+#else
+      MS_LOG(ERROR) << unsupport_control_tensorlist_log;
+      return RET_NOT_SUPPORT;
+#endif
     } else {
       // Tensor -> TensorC
       auto *tensor_c = reinterpret_cast<TensorC *>(malloc(sizeof(TensorC)));
diff --git a/mindspore/lite/src/common/tensor_util.h b/mindspore/lite/src/common/tensor_util.h
index 46c63a2044c..07c3996b693 100644
--- a/mindspore/lite/src/common/tensor_util.h
+++ b/mindspore/lite/src/common/tensor_util.h
@@ -30,13 +30,15 @@ namespace lite {
 int InputTensor2TensorC(const std::vector<lite::Tensor *> &tensors_in, std::vector<TensorC *> *tensors_out);
 int OutputTensor2TensorC(const std::vector<lite::Tensor *> &tensors_in, std::vector<TensorC *> *tensors_out);
 void FreeAllTensorC(std::vector<TensorC *> *tensors_in);
-void FreeTensorListC(TensorListC *tensorListC);
 int Tensor2TensorC(const Tensor *src, TensorC *dst);
 void TensorC2Tensor(const TensorC *src, Tensor *dst);
+#ifdef ENABLE_CONTROL_TENSORLIST
+void FreeTensorListC(TensorListC *tensorListC);
 int TensorList2TensorListC(TensorList *src, TensorListC *dst);
 int TensorListC2TensorList(const TensorListC *src, TensorList *dst);
-int GenerateMergeSwitchOutTensorC(const std::vector<lite::Tensor *> &inputs, const std::vector<lite::Tensor *> &outputs,
+int GenerateMergeSwitchOutTensorC(const std::vector<lite::Tensor *> &inputs, int output_size,
                                   std::vector<TensorC *> *out_tensor_c);
+#endif
 int GenerateInTensorC(const OpParameter *const parameter, const std::vector<lite::Tensor *> &inputs,
                       const std::vector<lite::Tensor *> &outputs, std::vector<TensorC *> *in_tensor_c);
 int GenerateOutTensorC(const OpParameter *const parameter, const std::vector<lite::Tensor *> &inputs,
diff --git a/mindspore/lite/src/common/utils.cc b/mindspore/lite/src/common/utils.cc
index c7baee91eb8..6f3d3e11468 100644
--- a/mindspore/lite/src/common/utils.cc
+++ b/mindspore/lite/src/common/utils.cc
@@ -26,26 +26,6 @@
 
 namespace mindspore {
 namespace lite {
-std::vector<std::string> StringSplit(std::string str, const std::string &pattern) {
-  std::vector<std::string> result;
-  if (str.empty()) {
-    return result;
-  }
-  std::string::size_type pos;
-  str += pattern;
-  auto size = str.size();
-
-  for (size_t i = 0; i < size; i++) {
-    pos = str.find(pattern, i);
-    if (pos < size) {
-      std::string s = str.substr(i, pos - i);
-      result.push_back(s);
-      i = pos + pattern.size() - 1;
-    }
-  }
-  return result;
-}
-
 uint64_t GetTimeUs() {
 #ifdef SUPPORT_MSVC
   FILETIME ft;
@@ -71,18 +51,22 @@ std::string RemoveSubStr(const std::string &from, const std::string &sub_str, Re
     MS_LOG(ERROR) << "string is empty";
     return "";
   }
+  if (sub_str.length() > from.length()) {
+    MS_LOG(ERROR) << "sub_str is longer than from";
+    return "";
+  }
   if (mode == PREFIX) {
     if (from.substr(0, sub_str.length()) == sub_str) {
-      result = from.substr(sub_str.size());
+      result = from.substr(sub_str.length());
     }
   } else if (mode == SUFFIX) {
-    if (from.rfind(sub_str) == from.size() - sub_str.size()) {
-      result = from.substr(0, from.size() - sub_str.size());
+    if (from.rfind(sub_str) == from.length() - sub_str.length()) {
+      result = from.substr(0, from.length() - sub_str.length());
     }
   } else {
     size_t index;
     while ((index = result.find(sub_str)) != std::string::npos) {
-      result = result.erase(index, sub_str.size());
+      result = result.erase(index, sub_str.length());
     }
   }
 
@@ -165,6 +149,5 @@ bool IsSupportSDot() {
 #endif
   return status;
 }
-
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/src/common/utils.h b/mindspore/lite/src/common/utils.h
index 2881ed2ab70..aae4ce65eb3 100644
--- a/mindspore/lite/src/common/utils.h
+++ b/mindspore/lite/src/common/utils.h
@@ -37,8 +37,6 @@ enum NodeType {
 
 const int USEC = 1000000;
 const int MSEC = 1000;
-std::vector<std::string> StringSplit(std::string str, const std::string &pattern);
-
 uint64_t GetTimeUs();
 
 bool IsSupportSDot();
@@ -119,7 +117,7 @@ inline std::string GetFileName(const std::string &path) {
   char delim = '/';
 
   size_t i = path.rfind(delim, path.length());
-  if (i != std::string::npos) {
+  if (i != std::string::npos && i + 1 < path.length()) {
     return (path.substr(i + 1, path.length() - i));
   }
 
diff --git a/mindspore/lite/src/cxx_api/model/model_impl.cc b/mindspore/lite/src/cxx_api/model/model_impl.cc
index f22dd3a2f5e..0d69f65649a 100644
--- a/mindspore/lite/src/cxx_api/model/model_impl.cc
+++ b/mindspore/lite/src/cxx_api/model/model_impl.cc
@@ -210,6 +210,7 @@ Status ModelImpl::Predict(const std::vector<MSTensor> &inputs, std::vector<MSTen
     }
     old_data.push_back(input->data());
     if (input->data_type() == kObjectTypeString) {
+#ifdef ENABLE_STRING_KERNEL
       std::vector<int32_t> shape = TruncateShape(user_input.Shape(), input->data_type(), user_input.DataSize(), false);
       if (shape.empty() && !(user_input.Shape().empty())) {
         ResetTensorData(old_data, input_tensors);
@@ -218,6 +219,10 @@ Status ModelImpl::Predict(const std::vector<MSTensor> &inputs, std::vector<MSTen
       }
       input->set_shape(shape);
       input->set_data(user_input.MutableData());
+#else
+      MS_LOG(ERROR) << unsupport_string_tensor_log;
+      return kLiteError;
+#endif
     } else {
       if (user_input.MutableData() != input->data()) {
         if (input->Size() != user_input.DataSize()) {
@@ -260,7 +265,6 @@ std::vector<MSTensor> ModelImpl::GetInputs() {
   }
   res.resize(inputs.size());
   for (size_t i = 0; i < inputs.size(); i++) {
-    inputs[i]->MutableData();  // prepare data
     auto impl = std::shared_ptr<MSTensor::Impl>(new (std::nothrow) MSTensor::Impl(inputs[i]));
     if (impl == nullptr || impl->lite_tensor() == nullptr) {
       MS_LOG(ERROR) << "Create tensor failed.";
diff --git a/mindspore/lite/src/cxx_api/tensor/tensor_impl.cc b/mindspore/lite/src/cxx_api/tensor/tensor_impl.cc
index f7f3ff73924..d12ebd02722 100644
--- a/mindspore/lite/src/cxx_api/tensor/tensor_impl.cc
+++ b/mindspore/lite/src/cxx_api/tensor/tensor_impl.cc
@@ -57,6 +57,7 @@ std::shared_ptr<MSTensor::Impl> MSTensor::Impl::CreateTensorImpl(const std::stri
 
 std::shared_ptr<MSTensor::Impl> MSTensor::Impl::StringsToTensorImpl(const std::string &name,
                                                                     const std::vector<std::string> &str) {
+#ifdef ENABLE_STRING_KERNEL
   auto lite_tensor = new (std::nothrow) lite::Tensor();
   if (lite_tensor == nullptr) {
     MS_LOG(ERROR) << "Failed to allocate lite tensor.";
@@ -78,15 +79,24 @@ std::shared_ptr<MSTensor::Impl> MSTensor::Impl::StringsToTensorImpl(const std::s
   impl->set_own_data(true);
   impl->set_from_session(false);
   return impl;
+#else
+  MS_LOG(ERROR) << unsupport_string_tensor_log;
+  return nullptr;
+#endif
 }
 
 std::vector<std::string> MSTensor::Impl::TensorImplToStrings(const std::shared_ptr<Impl> &impl) {
   std::vector<std::string> empty;
+#ifdef ENABLE_STRING_KERNEL
   auto lite_tensor = impl->lite_tensor();
   if (lite_tensor == nullptr) {
     MS_LOG(ERROR) << "Invalid tensor impl.";
     return empty;
   }
   return lite::MSTensorToStrings(lite_tensor);
+#else
+  MS_LOG(ERROR) << unsupport_string_tensor_log;
+  return empty;
+#endif
 }
 }  // namespace mindspore
diff --git a/mindspore/lite/src/cxx_api/tensor/tensor_impl.h b/mindspore/lite/src/cxx_api/tensor/tensor_impl.h
index da1c1659b51..f2f197b41a3 100644
--- a/mindspore/lite/src/cxx_api/tensor/tensor_impl.h
+++ b/mindspore/lite/src/cxx_api/tensor/tensor_impl.h
@@ -204,7 +204,7 @@ class MSTensor::Impl {
     auto lite_quant_params = lite_tensor_->quant_params();
     std::vector<QuantParam> quant_params;
     for (size_t i = 0; i < lite_quant_params.size(); i++) {
-      QuantParam param;
+      QuantParam param{};
       param.bit_num = lite_quant_params[i].bitNum;
       param.scale = lite_quant_params[i].scale;
       param.zero_point = lite_quant_params[i].zeroPoint;
@@ -220,11 +220,11 @@ class MSTensor::Impl {
     }
     std::vector<lite::LiteQuantParam> lite_quant_params;
     for (size_t i = 0; i < quant_params.size(); i++) {
-      lite::LiteQuantParam lite_arg;
-      lite_arg.bitNum = quant_params[i].bit_num;
-      lite_arg.scale = quant_params[i].scale;
-      lite_arg.zeroPoint = quant_params[i].zero_point;
-      lite_quant_params.push_back(lite_arg);
+      lite::LiteQuantParam lite_param{};
+      lite_param.bitNum = quant_params[i].bit_num;
+      lite_param.scale = quant_params[i].scale;
+      lite_param.zeroPoint = quant_params[i].zero_point;
+      lite_quant_params.push_back(lite_param);
     }
     lite_tensor_->set_quant_params(lite_quant_params);
   }
diff --git a/mindspore/lite/src/delegate/npu/npu_delegate.cc b/mindspore/lite/src/delegate/npu/npu_delegate.cc
index 97fc4c936b6..0f5a4dd4632 100644
--- a/mindspore/lite/src/delegate/npu/npu_delegate.cc
+++ b/mindspore/lite/src/delegate/npu/npu_delegate.cc
@@ -206,6 +206,14 @@ int NPUDelegate::Build(DelegateModel *model) {
 }
 
 NPUOp *NPUDelegate::GetOP(kernel::Kernel *kernel, const schema::Primitive *primitive) {
+  if (primitive == nullptr) {
+    MS_LOG(ERROR) << "primitive is NULL!";
+    return nullptr;
+  }
+  if (kernel == nullptr) {
+    MS_LOG(ERROR) << "kernel is NULL!";
+    return nullptr;
+  }
   auto name = kernel->name();
   NPUOp *npu_op = nullptr;
   auto node_type = primitive->value_type();
diff --git a/mindspore/lite/src/delegate/npu/npu_graph.cc b/mindspore/lite/src/delegate/npu/npu_graph.cc
index 3a81a50c533..4a924fbaf9a 100644
--- a/mindspore/lite/src/delegate/npu/npu_graph.cc
+++ b/mindspore/lite/src/delegate/npu/npu_graph.cc
@@ -238,7 +238,7 @@ int NPUGraph::CreateSubgraphFromReadyOps(std::queue<NPUOp *> *valid_in_ops, std:
     if ((*is_searched)[op]) {
       continue;
     }
-    if (valid_in_ops->empty()) {
+    if (!valid_in_ops->empty()) {
       // use BFS to find out connected input ops
       FindConnectedOps(op, ready_ops, &connected_ops, is_searched);
     } else {
diff --git a/mindspore/lite/src/delegate/npu/npu_manager.cc b/mindspore/lite/src/delegate/npu/npu_manager.cc
index d6606d2ed44..413009039c7 100644
--- a/mindspore/lite/src/delegate/npu/npu_manager.cc
+++ b/mindspore/lite/src/delegate/npu/npu_manager.cc
@@ -80,9 +80,9 @@ bool NPUManager::CheckDDKVersion() {
   auto client = std::make_shared<hiai::AiModelMngerClient>();
   if (client->GetVersion() != nullptr) {
     std::string version = client->GetVersion();
-    int ret = CompareVersion(version, "100.320.010.023");
-    if (ret < 0) {
-      MS_LOG(WARNING) << "DDK Version " << version << " less than 100.320.010.023";
+    int ret = CompareVersion(version, "100.320.011.018");
+    if (ret <= 0) {
+      MS_LOG(WARNING) << "DDK Version " << version << " less than 100.320.011.018";
       return false;
     }
   }
diff --git a/mindspore/lite/src/delegate/npu/op/resize_npu.cc b/mindspore/lite/src/delegate/npu/op/resize_npu.cc
index 6b7d0c9a75d..77a4a1bf9e2 100644
--- a/mindspore/lite/src/delegate/npu/op/resize_npu.cc
+++ b/mindspore/lite/src/delegate/npu/op/resize_npu.cc
@@ -93,6 +93,7 @@ int ResizeNPUOp::Init(const schema::Primitive *primitive, const std::vector<mind
     resize_nearest->set_attr_align_corners(resize_prim->coordinate_transform_mode() ==
                                            schema::CoordinateTransformMode_ALIGN_CORNERS);
     resize_nearest->set_input_size(*out_size_);
+    resize_ = resize_nearest;
   } else {
     MS_LOG(WARNING) << "Unsupported resize method type:" << resize_method_;
     return RET_ERROR;
diff --git a/mindspore/lite/src/delegate/tensorrt/op/activation_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/activation_tensorrt.cc
index f81e797efcc..a26c09c2e4a 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/activation_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/activation_tensorrt.cc
@@ -21,6 +21,10 @@ namespace mindspore::lite {
 int ActivationTensorRT::IsSupport(const schema::Primitive *primitive,
                                   const std::vector<mindspore::MSTensor> &in_tensors,
                                   const std::vector<mindspore::MSTensor> &out_tensors) {
+  if (!IsShapeKnown()) {
+    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
+    return RET_ERROR;
+  }
   if (in_tensors.size() != 1) {
     MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
     return RET_ERROR;
@@ -62,6 +66,7 @@ int ActivationTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
   }
 
   activation_layer->setName(op_name_.c_str());
+  activation_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
   this->AddInnerOutTensors(activation_layer->getOutput(0));
 
   return RET_OK;
diff --git a/mindspore/lite/src/delegate/tensorrt/op/concate_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/concate_tensorrt.cc
index 994980e5b29..b156b125dd4 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/concate_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/concate_tensorrt.cc
@@ -20,6 +20,10 @@
 namespace mindspore::lite {
 int ConcateTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
                                const std::vector<mindspore::MSTensor> &out_tensors) {
+  if (!IsShapeKnown()) {
+    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
+    return RET_ERROR;
+  }
   if (in_tensors.size() < 1) {
     MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
     return RET_ERROR;
@@ -41,7 +45,6 @@ int ConcateTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
     MS_LOG(ERROR) << "concate_op convert failed";
     return RET_ERROR;
   }
-  MS_LOG(INFO) << "in tensort size of concate: " << tensorrt_in_tensors_.size();
   if (tensorrt_in_tensors_.size() != in_tensors_.size()) {
     MS_LOG(ERROR) << "concate_op in tensor is invalid";
     return RET_ERROR;
@@ -64,6 +67,7 @@ int ConcateTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
     concate_layer->setAxis(axis);
   }
   concate_layer->setName(op_name_.c_str());
+  concate_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
   this->AddInnerOutTensors(concate_layer->getOutput(0));
 
   return RET_OK;
diff --git a/mindspore/lite/src/delegate/tensorrt/op/convolution_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/convolution_tensorrt.cc
index 4cbfbd3f207..649158a5365 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/convolution_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/convolution_tensorrt.cc
@@ -24,6 +24,10 @@ constexpr int BIAS_INDEX = 2;
 int ConvolutionTensorRT::IsSupport(const schema::Primitive *primitive,
                                    const std::vector<mindspore::MSTensor> &in_tensors,
                                    const std::vector<mindspore::MSTensor> &out_tensors) {
+  if (!IsShapeKnown()) {
+    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
+    return RET_ERROR;
+  }
   if (in_tensors.size() != 2 && in_tensors.size() != 3) {
     MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
     return RET_ERROR;
@@ -53,8 +57,12 @@ int ConvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
   }
   transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
 
+  // transpose weight
+  const mindspore::MSTensor &weight_tensor = in_tensors_[1];
+  nvinfer1::Weights kernelWeights = lite::TransposeWeight(weight_tensor, &pack_weight_);
+
   // conv
-  int nbOutputMaps = conv_op->out_channel();
+  int nbOutputMaps = weight_tensor.Shape()[0];
   if (nbOutputMaps <= 0) {
     MS_LOG(ERROR) << "out_channel is invalid";
     return RET_ERROR;
@@ -67,9 +75,6 @@ int ConvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
   }
   nvinfer1::Dims kernelSize = lite::ConvertCudaDims(std::vector<int64_t>(kernel_size->begin(), kernel_size->end()));
 
-  // transpose weight
-  nvinfer1::Weights kernelWeights = lite::TransposeWeight(in_tensors_[1], &pack_weight_);
-
   // bias
   nvinfer1::Weights biasWeights{};
   if (in_tensors_.size() >= INPUT_SIZE3) {
@@ -113,7 +118,7 @@ int ConvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
     return RET_ERROR;
   }
   transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str());
-
+  transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str());
   this->AddInnerOutTensors(transpose_layer_out->getOutput(0));
   return RET_OK;
 }
diff --git a/mindspore/lite/src/delegate/tensorrt/op/deconvolution_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/deconvolution_tensorrt.cc
index 98d62a5eb9b..8b863ba8349 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/deconvolution_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/deconvolution_tensorrt.cc
@@ -23,6 +23,10 @@ namespace mindspore::lite {
 int DeconvolutionTensorRT::IsSupport(const schema::Primitive *primitive,
                                      const std::vector<mindspore::MSTensor> &in_tensors,
                                      const std::vector<mindspore::MSTensor> &out_tensors) {
+  if (!IsShapeKnown()) {
+    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
+    return RET_ERROR;
+  }
   if (in_tensors.size() != 2 && in_tensors.size() != 3) {
     MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
     return RET_ERROR;
@@ -51,8 +55,12 @@ int DeconvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
   }
   transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
 
+  // transpose weight
+  const mindspore::MSTensor &weight_tensor = in_tensors_[1];
+  nvinfer1::Weights kernelWeights = lite::TransposeWeight(weight_tensor, &pack_weight_);
+
   // deconv basic params
-  int nbOutputMaps = deconv_op->out_channel();
+  int nbOutputMaps = weight_tensor.Shape()[0];
   if (nbOutputMaps <= 0) {
     MS_LOG(ERROR) << "out_channel is invalid";
     return RET_ERROR;
@@ -65,9 +73,6 @@ int DeconvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
   }
   nvinfer1::Dims kernelSize = lite::ConvertCudaDims(std::vector<int64_t>(kernel_size->begin(), kernel_size->end()));
 
-  // transpose weight
-  nvinfer1::Weights kernelWeights = lite::TransposeWeight(in_tensors_[1], &pack_weight_);
-
   // bias
   nvinfer1::Weights biasWeights{};
   if (in_tensors_.size() >= 3) {
@@ -111,7 +116,7 @@ int DeconvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
     return RET_ERROR;
   }
   transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str());
-
+  transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str());
   this->AddInnerOutTensors(transpose_layer_out->getOutput(0));
   return RET_OK;
 }
diff --git a/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.cc
index 8f0f2fa2894..2b64aad520c 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.cc
@@ -21,6 +21,10 @@ namespace mindspore::lite {
 int ElementWiseTensorRT::IsSupport(const schema::Primitive *primitive,
                                    const std::vector<mindspore::MSTensor> &in_tensors,
                                    const std::vector<mindspore::MSTensor> &out_tensors) {
+  if (!IsShapeKnown()) {
+    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
+    return RET_ERROR;
+  }
   std::map<schema::PrimitiveType, nvinfer1::ElementWiseOperation> element_wise_ops = {
     {schema::PrimitiveType_AddFusion, nvinfer1::ElementWiseOperation::kSUM},
     {schema::PrimitiveType_PowFusion, nvinfer1::ElementWiseOperation::kPOW},
@@ -61,6 +65,13 @@ int ElementWiseTensorRT::IsSupport(const schema::Primitive *primitive,
     MS_LOG(ERROR) << "invalid output tensort size: " << out_tensors.size();
     return RET_ERROR;
   }
+
+  // if constant tensor is scalar, it needs to know another input tensor's shape to broadcast
+  if (in_tensors[0].Shape()[0] == -1 && in_tensors[1].Shape().size() == 0) {
+    MS_LOG(ERROR) << "invalid all input tensor shape unknown for: " << op_name_;
+    return RET_ERROR;
+  }
+
   return RET_OK;
 }
 
@@ -69,23 +80,25 @@ int ElementWiseTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
     MS_LOG(ERROR) << "network or input tensor size is invalid";
     return RET_ERROR;
   }
-  // create ITensor from MS scalar
-  if (this->in_tensors_[1].Shape().size() == 0) {
-    nvinfer1::ITensor *scalar_input =
-      lite::ConvertScalarToITensor(network, this->in_tensors_[0].Shape().size(), this->in_tensors_[1].MutableData());
-    if (scalar_input == nullptr) {
-      MS_LOG(ERROR) << "create Itensor from scalar failed";
-      return RET_ERROR;
-    }
-    this->AddInnerInTensors(scalar_input);
-  }
+  first_in_tensor_index_ = strcmp(tensorrt_in_tensors_[0]->getName(), in_tensors_[0].Name().c_str()) == 0 ? 0 : 1;
   // add elementwise
   if (this->tensorrt_in_tensors_.size() != 2) {
-    MS_LOG(ERROR) << "invalid inner in tensors cnt: " << this->tensorrt_in_tensors_.size();
-    return RET_ERROR;
+    // create ITensor from MS constant tensor of index 1 - first_in_tensor_index_
+    nvinfer1::ITensor *constant_input = nullptr;
+    if (this->in_tensors_[1 - first_in_tensor_index_].Shape().size() == 0) {
+      constant_input = lite::ConvertScalarToITensor(network, this->in_tensors_[first_in_tensor_index_].Shape().size(),
+                                                    in_tensors_[1 - first_in_tensor_index_].Data().get());
+    } else {
+      constant_input = lite::ConvertConstantTensor(network, in_tensors_[1 - first_in_tensor_index_]);
+    }
+    if (constant_input == nullptr) {
+      MS_LOG(ERROR) << "create Itensor from constant tensor failed: " << op_name_;
+      return RET_ERROR;
+    }
+    this->AddInnerInTensors(constant_input);
   }
-  nvinfer1::IElementWiseLayer *cal_layer =
-    network->addElementWise(*tensorrt_in_tensors_[0], *tensorrt_in_tensors_[1], element_wise_op_);
+  nvinfer1::IElementWiseLayer *cal_layer = network->addElementWise(
+    *tensorrt_in_tensors_[first_in_tensor_index_], *tensorrt_in_tensors_[1 - first_in_tensor_index_], element_wise_op_);
 
   if (cal_layer == nullptr) {
     MS_LOG(ERROR) << "addElementWise failed for TensorRT.";
diff --git a/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.h b/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.h
index a370c80ca5f..c927ab074dd 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.h
+++ b/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.h
@@ -35,8 +35,12 @@ class ElementWiseTensorRT : public TensorRTOp {
                 const std::vector<mindspore::MSTensor> &out_tensors) override;
 
  private:
-  nvinfer1::ElementWiseOperation element_wise_op_;
   nvinfer1::ITensor *AddActivation(nvinfer1::INetworkDefinition *network, nvinfer1::ITensor *in_tensor);
+
+  nvinfer1::ElementWiseOperation element_wise_op_;
+
+  // index of first input MSTensor in the trt input tensor vector
+  size_t first_in_tensor_index_ = 0;
 };
 }  // namespace mindspore::lite
 #endif  // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_ELEMENTWISE_TENSORRT_H_
diff --git a/mindspore/lite/src/delegate/tensorrt/op/gather_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/gather_tensorrt.cc
index 410854f0e78..6bdbc2ea740 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/gather_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/gather_tensorrt.cc
@@ -22,6 +22,10 @@ constexpr int AXIS_INDEX = 2;
 
 int GatherTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
                               const std::vector<mindspore::MSTensor> &out_tensors) {
+  if (!IsShapeKnown()) {
+    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
+    return RET_ERROR;
+  }
   if (in_tensors.size() != 3) {
     MS_LOG(ERROR) << "invalid input tensor size: " << in_tensors.size();
     return RET_ERROR;
@@ -61,6 +65,7 @@ int GatherTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
     return RET_ERROR;
   }
   gather_layer->setName(op_name_.c_str());
+  gather_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
   this->AddInnerOutTensors(gather_layer->getOutput(0));
   return RET_OK;
 }
diff --git a/mindspore/lite/src/delegate/tensorrt/op/matmul_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/matmul_tensorrt.cc
index e56a4f3eec8..07a9cf4c7aa 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/matmul_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/matmul_tensorrt.cc
@@ -22,6 +22,10 @@ constexpr int BIAS_INDEX = 2;
 int MatMulTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
                               const std::vector<mindspore::MSTensor> &in_tensors,
                               const std::vector<mindspore::MSTensor> &out_tensors) {
+  if (!IsShapeKnown()) {
+    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
+    return RET_ERROR;
+  }
   if (in_tensors.size() != 2 && in_tensors.size() != 3) {
     MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
     return RET_ERROR;
@@ -41,16 +45,18 @@ int MatMulTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
 
   auto matmul_layer = network->addMatrixMultiply(*tensorrt_in_tensors_[0], transpose_a_, *weight, transpose_b_);
   matmul_layer->setName(op_name_.c_str());
+  nvinfer1::ITensor *out_tensor = matmul_layer->getOutput(0);
 
-  if (in_tensors_.size() == 3) {
+  if (in_tensors_.size() == BIAS_INDEX + 1) {
     auto bias = ConvertTensorWithExpandDims(network, in_tensors_[BIAS_INDEX], in_tensors_[0].Shape().size());
     auto bias_layer = network->addElementWise(*matmul_layer->getOutput(0), *bias, nvinfer1::ElementWiseOperation::kSUM);
     auto bias_layer_name = op_name_ + "_bias";
     bias_layer->setName(bias_layer_name.c_str());
-    this->AddInnerOutTensors(bias_layer->getOutput(0));
-  } else {
-    this->AddInnerOutTensors(matmul_layer->getOutput(0));
+    out_tensor = bias_layer->getOutput(0);
   }
+
+  out_tensor->setName(out_tensors_[0].Name().c_str());
+  this->AddInnerOutTensors(out_tensor);
   return RET_OK;
 }
 }  // namespace mindspore::lite
diff --git a/mindspore/lite/src/delegate/tensorrt/op/pad_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/pad_tensorrt.cc
index d5565765c98..5e1e2e72a66 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/pad_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/pad_tensorrt.cc
@@ -23,6 +23,10 @@ namespace mindspore::lite {
 int PadTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
                            const std::vector<mindspore::MSTensor> &in_tensors,
                            const std::vector<mindspore::MSTensor> &out_tensors) {
+  if (!IsShapeKnown()) {
+    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
+    return RET_ERROR;
+  }
   if (in_tensors.size() != 2 && in_tensors.size() != 3) {
     MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
     return RET_ERROR;
@@ -98,6 +102,7 @@ int PadTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
     return RET_ERROR;
   }
   transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str());
+  transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str());
 
   this->AddInnerOutTensors(transpose_layer_out->getOutput(0));
   return RET_OK;
diff --git a/mindspore/lite/src/delegate/tensorrt/op/pool_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/pool_tensorrt.cc
index 4263755c2fc..3ade0a4834b 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/pool_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/pool_tensorrt.cc
@@ -22,6 +22,10 @@ namespace mindspore::lite {
 int PoolTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
                             const std::vector<mindspore::MSTensor> &in_tensors,
                             const std::vector<mindspore::MSTensor> &out_tensors) {
+  if (!IsShapeKnown()) {
+    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
+    return RET_ERROR;
+  }
   if (in_tensors.size() != 1) {
     MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
     return RET_ERROR;
@@ -89,6 +93,7 @@ int PoolTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
     return RET_ERROR;
   }
   transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str());
+  transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str());
   this->AddInnerOutTensors(transpose_layer_out->getOutput(0));
   return RET_OK;
 }
diff --git a/mindspore/lite/src/delegate/tensorrt/op/reduce_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/reduce_tensorrt.cc
index 8be59ee52d3..3cf38700868 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/reduce_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/reduce_tensorrt.cc
@@ -19,6 +19,10 @@
 namespace mindspore::lite {
 int ReduceTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
                               const std::vector<mindspore::MSTensor> &out_tensors) {
+  if (!IsShapeKnown()) {
+    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
+    return RET_ERROR;
+  }
   auto reduce_op = primitive->value_as_ReduceFusion();
   if (reduce_op == nullptr) {
     MS_LOG(ERROR) << "convert failed";
diff --git a/mindspore/lite/src/delegate/tensorrt/op/scale_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/scale_tensorrt.cc
index b665c65fc7b..f0135bc2ef1 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/scale_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/scale_tensorrt.cc
@@ -26,6 +26,10 @@ constexpr int POWER_INDEX = 3;
 
 int ScaleTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
                              const std::vector<mindspore::MSTensor> &out_tensors) {
+  if (!IsShapeKnown()) {
+    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
+    return RET_ERROR;
+  }
   if (in_tensors.size() != 2 && in_tensors.size() != 3 && in_tensors.size() != 4) {
     MS_LOG(ERROR) << "Unsupported input tensor size, size is: " << in_tensors.size();
     return RET_ERROR;
diff --git a/mindspore/lite/src/delegate/tensorrt/op/shape_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/shape_tensorrt.cc
index 4db3722db10..9e006341215 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/shape_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/shape_tensorrt.cc
@@ -19,6 +19,10 @@
 namespace mindspore::lite {
 int ShapeTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
                              const std::vector<mindspore::MSTensor> &out_tensors) {
+  if (!IsShapeKnown()) {
+    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
+    return RET_ERROR;
+  }
   if (in_tensors.size() != 1) {
     MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
     return RET_ERROR;
@@ -41,6 +45,7 @@ int ShapeTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
     return RET_ERROR;
   }
   shape_layer->setName(op_name_.c_str());
+  shape_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
   this->AddInnerOutTensors(shape_layer->getOutput(0));
   return RET_OK;
 }
diff --git a/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.cc
index d5d21cf9270..21b3ae2e66e 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.cc
@@ -16,19 +16,49 @@
 
 #include "src/delegate/tensorrt/op/shuffle_tensorrt.h"
 #include <vector>
+#include <numeric>
+#include <functional>
 
 namespace mindspore::lite {
 int ShuffleTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
                                const std::vector<mindspore::MSTensor> &out_tensors) {
-  if ((type_ == schema::PrimitiveType::PrimitiveType_Squeeze ||
-       type_ == schema::PrimitiveType::PrimitiveType_Unsqueeze) &&
-      in_tensors.size() != 1) {
-    MS_LOG(ERROR) << "invalid input tensort size: " << in_tensors.size();
+  if (!IsShapeKnown()) {
+    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
     return RET_ERROR;
   }
-  if ((type_ == schema::PrimitiveType::PrimitiveType_Transpose) && in_tensors.size() != 2) {
-    MS_LOG(ERROR) << "invalid input tensort size: " << in_tensors.size();
-    return RET_ERROR;
+  switch (type_) {
+    case schema::PrimitiveType_Flatten:
+    case schema::PrimitiveType_Squeeze:
+    case schema::PrimitiveType_Unsqueeze: {
+      if (in_tensors.size() != 1) {
+        MS_LOG(ERROR) << "Unsupported in_tensors size " << in_tensors.size() << " of "
+                      << schema::EnumNamePrimitiveType(type_);
+        return RET_ERROR;
+      }
+      break;
+    }
+    case schema::PrimitiveType_Reshape: {
+      if (in_tensors.size() != 2) {
+        MS_LOG(ERROR) << "PrimitiveType_Transpose Unsupported in_tensors size: " << in_tensors.size();
+        return RET_ERROR;
+      }
+      break;
+    }
+    case schema::PrimitiveType_Transpose: {
+      if (in_tensors.size() != 2) {
+        MS_LOG(ERROR) << "PrimitiveType_Transpose Unsupported in_tensors size: " << in_tensors.size();
+        return RET_ERROR;
+      }
+      if (in_tensors[1].Data() == nullptr) {
+        MS_LOG(ERROR) << "Unsupported shape tensor of " << schema::EnumNamePrimitiveType(type_);
+        return RET_ERROR;
+      }
+      break;
+    }
+    default: {
+      MS_LOG(ERROR) << "Unsupported op type:" << schema::EnumNamePrimitiveType(type_);
+      return RET_ERROR;
+    }
   }
   if (out_tensors.size() != 1) {
     MS_LOG(ERROR) << "invalid output tensort size: " << out_tensors.size();
@@ -49,7 +79,7 @@ int ShuffleTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
   }
   shuffle_layer->setName(op_name_.c_str());
 
-  switch (this->type()) {
+  switch (type_) {
     case schema::PrimitiveType_Unsqueeze: {
       int ret = AddUnsqueezeOp(shuffle_layer);
       if (ret != RET_OK) {
@@ -82,6 +112,14 @@ int ShuffleTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
       }
       break;
     }
+    case schema::PrimitiveType_Flatten: {
+      int ret = AddFlattenOp(shuffle_layer);
+      if (ret != RET_OK) {
+        MS_LOG(ERROR) << "AddFlattenOp failed.";
+        return ret;
+      }
+      break;
+    }
     default:
       MS_LOG(ERROR) << "Unsupported op type.";
       return RET_ERROR;
@@ -148,7 +186,6 @@ int ShuffleTensorRT::AddUnsqueezeOp(nvinfer1::IShuffleLayer *shuffle_layer) {
   }
 
   nvinfer1::Dims unsqueeze_dims = lite::ConvertCudaDims(unsqueeze_shape);
-  MS_LOG(INFO) << "AddUnsqueezeOp: " << op_name_ << " unsqueeze_dims.nbDims: " << unsqueeze_dims.nbDims;
 
   shuffle_layer->setReshapeDimensions(unsqueeze_dims);
   return shuffle_layer->getOutput(0) == nullptr ? RET_ERROR : RET_OK;
@@ -166,8 +203,8 @@ int ShuffleTensorRT::AddTransposeOp(nvinfer1::IShuffleLayer *shuffle_layer) {
   }
   // perm
   mindspore::MSTensor perm_ternsor = in_tensors_[1];
-  if (perm_ternsor.Data() == nullptr || perm_ternsor.ElementNum() != tensorrt_in_tensors_[0]->getDimensions().nbDims) {
-    MS_LOG(ERROR) << "AddTransposeOp perm_ternsor data is invalid.";
+  if (perm_ternsor.Data() == nullptr) {
+    MS_LOG(ERROR) << "AddTransposeOp perm_ternsor data is invalid: " << op_name_;
     return RET_ERROR;
   }
   int *perm_data = reinterpret_cast<int *>(perm_ternsor.MutableData());
@@ -180,26 +217,38 @@ int ShuffleTensorRT::AddTransposeOp(nvinfer1::IShuffleLayer *shuffle_layer) {
   shuffle_layer->setFirstTranspose(perm);
   return RET_OK;
 }
+
 int ShuffleTensorRT::AddReshapeOp(nvinfer1::IShuffleLayer *shuffle_layer) {
-  auto reshape_op = this->op_primitive_->value_as_Reshape();
-  if (reshape_op == nullptr) {
-    MS_LOG(ERROR) << "AddReshapeOp convert failed";
-    return RET_ERROR;
-  }
-  if (in_tensors_.size() != 2) {
-    MS_LOG(ERROR) << "AddReshapeOp size of in tensort needs check: " << in_tensors_.size();
-    return RET_ERROR;
-  }
   mindspore::MSTensor &shape_tensor = in_tensors_[1];
-  nvinfer1::Dims reshape_dims = ConvertCudaDims(shape_tensor.Data().get(), shape_tensor.ElementNum());
-  int ret = InferReshapeDims(tensorrt_in_tensors_[0]->getDimensions(), &reshape_dims);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "invalid dims for reshape " << op_name_;
-    return ret;
+  if (shape_tensor.Data() != nullptr) {
+    // static shuffle layer
+    nvinfer1::Dims reshape_dims = lite::ConvertCudaDims(shape_tensor.Data().get(), shape_tensor.ElementNum());
+    int ret = InferReshapeDims(tensorrt_in_tensors_[0]->getDimensions(), &reshape_dims);
+    if (ret != RET_OK) {
+      MS_LOG(ERROR) << "invalid dims for reshape " << op_name_;
+      return ret;
+    }
+    shuffle_layer->setReshapeDimensions(reshape_dims);
+  } else {
+    if (tensorrt_in_tensors_.size() != 2) {
+      MS_LOG(ERROR) << "invalid shape tensor for reshape " << op_name_;
+      return RET_ERROR;
+    }
+    shuffle_layer->setInput(1, *tensorrt_in_tensors_[1]);
   }
-  shuffle_layer->setReshapeDimensions(reshape_dims);
   return RET_OK;
 }
+
+int ShuffleTensorRT::AddFlattenOp(nvinfer1::IShuffleLayer *shuffle_layer) {
+  nvinfer1::Dims flatten_dims;
+  const std::vector<int64_t> &input_shape = in_tensors_[0].Shape();
+  flatten_dims.nbDims = 2;
+  flatten_dims.d[0] = input_shape[0];
+  flatten_dims.d[1] = std::accumulate(input_shape.begin() + 1, input_shape.end(), 1, std::multiplies<int>());
+  shuffle_layer->setReshapeDimensions(flatten_dims);
+  return RET_OK;
+}
+
 int ShuffleTensorRT::InferReshapeDims(nvinfer1::Dims input_dims, nvinfer1::Dims *reshape_dims) {
   int infer_index = -1;
   int known_cnt = 1;
diff --git a/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.h b/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.h
index 98d90d9ac2c..e799a7dcaee 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.h
+++ b/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.h
@@ -39,6 +39,7 @@ class ShuffleTensorRT : public TensorRTOp {
   int AddUnsqueezeOp(nvinfer1::IShuffleLayer *shuffle_layer);
   int AddTransposeOp(nvinfer1::IShuffleLayer *shuffle_layer);
   int AddReshapeOp(nvinfer1::IShuffleLayer *shuffle_layer);
+  int AddFlattenOp(nvinfer1::IShuffleLayer *shuffle_layer);
   int InferReshapeDims(nvinfer1::Dims input_dims, nvinfer1::Dims *reshape_dims);
 };
 }  // namespace mindspore::lite
diff --git a/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.cc
index 4946fa0b501..a5e172e0dc5 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.cc
@@ -21,7 +21,11 @@ namespace mindspore::lite {
 int SliceTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
                              const std::vector<mindspore::MSTensor> &in_tensors,
                              const std::vector<mindspore::MSTensor> &out_tensors) {
-  if (in_tensors.size() != 4 && in_tensors.size() != 5) {
+  if (!IsShapeKnown()) {
+    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
+    return RET_ERROR;
+  }
+  if (in_tensors.size() < STRIDE_INDEX + 1) {
     MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
     return RET_ERROR;
   }
@@ -29,8 +33,8 @@ int SliceTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
     MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
     return RET_ERROR;
   }
-  if (in_tensors_[1].Data() == nullptr) {
-    MS_LOG(ERROR) << "invalid pad tensor for: " << op_name_;
+  if (in_tensors_[BEGIN_INDEX].Data() == nullptr || in_tensors_[STRIDE_INDEX].Data() == nullptr) {
+    MS_LOG(ERROR) << "invalid pad or stride tensor for: " << op_name_;
     return RET_ERROR;
   }
   return RET_OK;
@@ -42,9 +46,8 @@ int SliceTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
     MS_LOG(ERROR) << "convert StridedSlice failed: " << op_name_;
     return RET_ERROR;
   }
-  const mindspore::MSTensor &begin = in_tensors_[1];
-  // mindspore::MSTensor &end = in_tensors_[2];
-  const mindspore::MSTensor &stride = in_tensors_[3];
+  const mindspore::MSTensor &begin = in_tensors_[BEGIN_INDEX];
+  const mindspore::MSTensor &stride = in_tensors_[STRIDE_INDEX];
 
   nvinfer1::Dims start_dims = lite::ConvertCudaDims(begin.Data().get(), begin.ElementNum());
   nvinfer1::Dims size_dims = lite::ConvertCudaDims(out_tensors_[0].Shape());
diff --git a/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.h b/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.h
index 7bedfaf2adf..856f4d50712 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.h
+++ b/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.h
@@ -20,6 +20,8 @@
 #include "src/delegate/tensorrt/op/tensorrt_op.h"
 
 namespace mindspore::lite {
+constexpr int BEGIN_INDEX = 1;
+constexpr int STRIDE_INDEX = 3;
 class SliceTensorRT : public TensorRTOp {
  public:
   SliceTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
diff --git a/mindspore/lite/src/delegate/tensorrt/op/softmax_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/softmax_tensorrt.cc
index 6f3d418fd34..e65508276f7 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/softmax_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/softmax_tensorrt.cc
@@ -19,6 +19,10 @@
 namespace mindspore::lite {
 int SoftMaxTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
                                const std::vector<mindspore::MSTensor> &out_tensors) {
+  if (!IsShapeKnown()) {
+    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
+    return RET_ERROR;
+  }
   if (primitive->value_type() == schema::PrimitiveType::PrimitiveType_LogSoftmax) {
     with_log_ = true;
     auto softmax_op = primitive->value_as_LogSoftmax();
diff --git a/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.cc b/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.cc
index 5acc69ef559..4f7b3ca8164 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.cc
@@ -42,4 +42,15 @@ void TensorRTOp::set_out_ops(const std::vector<TensorRTOp *> &out_ops) { this->o
 const std::vector<TensorRTOp *> &TensorRTOp::in_ops() const { return this->in_ops_; }
 
 const std::vector<TensorRTOp *> &TensorRTOp::out_ops() const { return this->out_ops_; }
+
+bool TensorRTOp::IsShapeKnown() {
+  if (this->in_tensors_[0].Shape().size() == 0) {
+    return false;
+  } else {
+    if (this->in_tensors_[0].Shape()[0] == -1) {
+      return false;
+    }
+  }
+  return true;
+}
 }  // namespace mindspore::lite
diff --git a/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.h b/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.h
index 91e73de901f..9cc77218988 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.h
+++ b/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.h
@@ -75,6 +75,8 @@ class TensorRTOp {
   const std::vector<TensorRTOp *> &out_ops() const;
 
  protected:
+  bool IsShapeKnown();
+
   std::vector<nvinfer1::ILayer *> layers_;
 
   const schema::Primitive *op_primitive_;
diff --git a/mindspore/lite/src/delegate/tensorrt/op/unary_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/unary_tensorrt.cc
index 4549a8f5498..c5f59da7825 100644
--- a/mindspore/lite/src/delegate/tensorrt/op/unary_tensorrt.cc
+++ b/mindspore/lite/src/delegate/tensorrt/op/unary_tensorrt.cc
@@ -19,6 +19,10 @@
 namespace mindspore::lite {
 int UnaryTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
                              const std::vector<mindspore::MSTensor> &out_tensors) {
+  if (!IsShapeKnown()) {
+    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
+    return RET_ERROR;
+  }
   if (in_tensors.size() != 1) {
     MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
   }
diff --git a/mindspore/lite/src/delegate/tensorrt/tensorrt_delegate.cc b/mindspore/lite/src/delegate/tensorrt/tensorrt_delegate.cc
index 4965a6c1059..e295c34ef3f 100644
--- a/mindspore/lite/src/delegate/tensorrt/tensorrt_delegate.cc
+++ b/mindspore/lite/src/delegate/tensorrt/tensorrt_delegate.cc
@@ -69,9 +69,6 @@ int TensorRTDelegate::Init() {
   op_func_lists_.clear();
   op_func_lists_ = {
     {schema::PrimitiveType_Activation, GetTensorRTOp<ActivationTensorRT>},
-    {schema::PrimitiveType_Unsqueeze, GetTensorRTOp<ShuffleTensorRT>},
-    {schema::PrimitiveType_Squeeze, GetTensorRTOp<ShuffleTensorRT>},
-    {schema::PrimitiveType_Reshape, GetTensorRTOp<ShuffleTensorRT>},
     {schema::PrimitiveType_Concat, GetTensorRTOp<ConcateTensorRT>},
     {schema::PrimitiveType_Conv2DFusion, GetTensorRTOp<ConvolutionTensorRT>},
     {schema::PrimitiveType_Conv2dTransposeFusion, GetTensorRTOp<DeconvolutionTensorRT>},
@@ -81,14 +78,20 @@ int TensorRTDelegate::Init() {
     {schema::PrimitiveType_AddFusion, GetTensorRTOp<ElementWiseTensorRT>},
     {schema::PrimitiveType_MulFusion, GetTensorRTOp<ElementWiseTensorRT>},
     {schema::PrimitiveType_Eltwise, GetTensorRTOp<ElementWiseTensorRT>},
-    {schema::PrimitiveType_Transpose, GetTensorRTOp<ShuffleTensorRT>},
-    {schema::PrimitiveType_ReduceFusion, GetTensorRTOp<ReduceTensorRT>},
-    {schema::PrimitiveType_Sqrt, GetTensorRTOp<UnaryTensorRT>},
+    {schema::PrimitiveType_Gather, GetTensorRTOp<GatherTensorRT>},
     {schema::PrimitiveType_MatMul, GetTensorRTOp<MatMulTensorRT>},
-    {schema::PrimitiveType_ScaleFusion, GetTensorRTOp<ScaleTensorRT>},
-    {schema::PrimitiveType_StridedSlice, GetTensorRTOp<SliceTensorRT>},
     {schema::PrimitiveType_AvgPoolFusion, GetTensorRTOp<PoolTensorRT>},
     {schema::PrimitiveType_PadFusion, GetTensorRTOp<PadTensorRT>},
+    {schema::PrimitiveType_ReduceFusion, GetTensorRTOp<ReduceTensorRT>},
+    {schema::PrimitiveType_ScaleFusion, GetTensorRTOp<ScaleTensorRT>},
+    {schema::PrimitiveType_StridedSlice, GetTensorRTOp<SliceTensorRT>},
+    {schema::PrimitiveType_Shape, GetTensorRTOp<ShapeTensorRT>},
+    {schema::PrimitiveType_Unsqueeze, GetTensorRTOp<ShuffleTensorRT>},
+    {schema::PrimitiveType_Squeeze, GetTensorRTOp<ShuffleTensorRT>},
+    {schema::PrimitiveType_Reshape, GetTensorRTOp<ShuffleTensorRT>},
+    {schema::PrimitiveType_Transpose, GetTensorRTOp<ShuffleTensorRT>},
+    {schema::PrimitiveType_Flatten, GetTensorRTOp<ShuffleTensorRT>},
+    {schema::PrimitiveType_Sqrt, GetTensorRTOp<UnaryTensorRT>},
   };
   return RET_OK;
 }
diff --git a/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.cc b/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.cc
index 1c3ce666941..2be96a83f27 100644
--- a/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.cc
+++ b/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.cc
@@ -158,6 +158,7 @@ int TensorRTSubGraph::BuildTensorRTGraph() {
             return RET_ERROR;
           }
           trt_tensor = lite::ConvertConstantTensor(this->network_, in_tensor);
+          MS_LOG(INFO) << "auto convert constant tensor for: " << cur_op->GetOpName();
           cur_op->AddInnerInTensors(trt_tensor);
         }
       } else {
@@ -178,6 +179,7 @@ int TensorRTSubGraph::BuildTensorRTGraph() {
       for (size_t index = 0; index < out_op->outputs().size(); index++) {
         if (out_op->outputs()[index] == out_tensor) {
           out_op->GetInnerOutTensor()[index]->setName(out_tensor.Name().c_str());
+          MS_LOG(INFO) << "markOutput for: " << out_tensor.Name();
           this->network_->markOutput(*out_op->GetInnerOutTensor()[index]);
         }
       }
diff --git a/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.h b/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.h
index 80ed386df7d..cd9163112c2 100644
--- a/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.h
+++ b/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.h
@@ -37,8 +37,10 @@ class TensorRTSubGraph : public kernel::Kernel {
     trt_specific_weight_nodes_ = {
       schema::PrimitiveType_Conv2DFusion, schema::PrimitiveType_ReduceFusion, schema::PrimitiveType_Transpose,
       schema::PrimitiveType_Gather,       schema::PrimitiveType_Reshape,      schema::PrimitiveType_PowFusion,
-      schema::PrimitiveType_DivFusion,    schema::PrimitiveType_MatMul,       schema::PrimitiveType_ScaleFusion,
-      schema::PrimitiveType_MulFusion,    schema::PrimitiveType_StridedSlice, schema::PrimitiveType_PadFusion};
+      schema::PrimitiveType_AddFusion,    schema::PrimitiveType_DivFusion,    schema::PrimitiveType_SubFusion,
+      schema::PrimitiveType_MatMul,       schema::PrimitiveType_PowFusion,    schema::PrimitiveType_Eltwise,
+      schema::PrimitiveType_ScaleFusion,  schema::PrimitiveType_MulFusion,    schema::PrimitiveType_StridedSlice,
+      schema::PrimitiveType_PadFusion};
   }
 
   ~TensorRTSubGraph() override;
diff --git a/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.cc b/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.cc
index 230c35c829d..52ea5952adb 100644
--- a/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.cc
+++ b/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.cc
@@ -108,7 +108,7 @@ nvinfer1::ITensor *ConvertConstantTensor(nvinfer1::INetworkDefinition *network,
   return constant_tensor->getOutput(0);
 }
 
-nvinfer1::ITensor *ConvertScalarToITensor(nvinfer1::INetworkDefinition *network, size_t shape_size, void *value) {
+nvinfer1::ITensor *ConvertScalarToITensor(nvinfer1::INetworkDefinition *network, size_t shape_size, const void *value) {
   nvinfer1::Dims dims = ConvertCudaDims(1, shape_size);
   nvinfer1::Weights weights{nvinfer1::DataType::kFLOAT, value, 1};
   nvinfer1::IConstantLayer *constant_tensor = network->addConstant(dims, weights);
diff --git a/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.h b/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.h
index aacaed8534a..ae0a583faee 100644
--- a/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.h
+++ b/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.h
@@ -51,7 +51,7 @@ nvinfer1::ITensor *ConvertConstantTensor(nvinfer1::INetworkDefinition *network,
 nvinfer1::ITensor *ConvertTensorWithExpandDims(nvinfer1::INetworkDefinition *network,
                                                const mindspore::MSTensor &ms_tensor, size_t expand_shape_size);
 
-nvinfer1::ITensor *ConvertScalarToITensor(nvinfer1::INetworkDefinition *network, size_t shape_size, void *value);
+nvinfer1::ITensor *ConvertScalarToITensor(nvinfer1::INetworkDefinition *network, size_t shape_size, const void *value);
 
 nvinfer1::Weights TransposeWeight(const mindspore::MSTensor &ms_tensor, float **pack_weight);
 
diff --git a/mindspore/lite/src/huffman_decode.h b/mindspore/lite/src/huffman_decode.h
index 37c000cf792..be5e6e37431 100644
--- a/mindspore/lite/src/huffman_decode.h
+++ b/mindspore/lite/src/huffman_decode.h
@@ -76,5 +76,4 @@ class HuffmanDecode {
 
 }  // namespace lite
 }  // namespace mindspore
-
 #endif  // MINDSPORE_LITE_MINDSPORE_LITE_SRC_HUFFMAN_DECODE_H_
diff --git a/mindspore/lite/src/inner_context.cc b/mindspore/lite/src/inner_context.cc
index 7d4a1492fbd..b225d6b2970 100644
--- a/mindspore/lite/src/inner_context.cc
+++ b/mindspore/lite/src/inner_context.cc
@@ -72,16 +72,21 @@ int InnerContext::Init() {
   }
   if (this->thread_pool_ == nullptr && this->IsCpuEnabled()) {
     int actor_parallel_thread = this->enable_parallel_ ? kDefaultParallelNum : 1;
-    thread_pool_ = ActorThreadPool::CreateThreadPool(actor_parallel_thread, this->thread_num_);
-    if (thread_pool_ == nullptr) {
-      MS_LOG(ERROR) << "Create ThreadPool failed";
-      return RET_NULL_PTR;
-    }
+
     if (this->affinity_core_list_.empty()) {
-      thread_pool_->SetCpuAffinity(
-        static_cast<BindMode>(this->device_list_.front().device_info_.cpu_device_info_.cpu_bind_mode_));
+      auto bind_mode = static_cast<BindMode>(this->device_list_.front().device_info_.cpu_device_info_.cpu_bind_mode_);
+      thread_pool_ = ActorThreadPool::CreateThreadPool(actor_parallel_thread, this->thread_num_, bind_mode);
+      if (thread_pool_ == nullptr) {
+        MS_LOG(ERROR) << "Create ThreadPool failed";
+        return RET_NULL_PTR;
+      }
     } else {
-      thread_pool_->SetCpuAffinity(this->affinity_core_list_);
+      thread_pool_ =
+        ActorThreadPool::CreateThreadPool(actor_parallel_thread, this->thread_num_, this->affinity_core_list_);
+      if (thread_pool_ == nullptr) {
+        MS_LOG(ERROR) << "Create ThreadPool failed";
+        return RET_NULL_PTR;
+      }
     }
   }
   if (this->allocator == nullptr) {
@@ -115,7 +120,6 @@ int InnerContext::Init() {
 
 InnerContext::~InnerContext() {
   if (this->thread_pool_ != nullptr) {
-    thread_pool_->SetCpuAffinity(static_cast<BindMode>(NO_BIND));
     delete thread_pool_;
     this->thread_pool_ = nullptr;
   }
@@ -126,7 +130,7 @@ int InnerContext::IsValid() const {
     MS_LOG(ERROR) << "Device list is empty.";
     return RET_NOT_SUPPORT;
   }
-  if (this->device_list_.size() > 2) {
+  if (this->device_list_.size() > kMaxDeviceNums) {
     MS_LOG(ERROR) << "Not support device list more than 2.";
     return RET_NOT_SUPPORT;
   }
diff --git a/mindspore/lite/src/inner_kernel.cc b/mindspore/lite/src/inner_kernel.cc
index e9473c760fa..7d590d66385 100644
--- a/mindspore/lite/src/inner_kernel.cc
+++ b/mindspore/lite/src/inner_kernel.cc
@@ -71,7 +71,42 @@ int InnerKernel::PreProcess() {
       MS_LOG(ERROR) << "MallocData failed";
       return ret;
     }
+    output->ResetRefCount();
   }
   return RET_OK;
 }
+
+int InnerKernel::Execute() {
+  auto ret = PreProcess();
+  if (lite::RET_OK != ret) {
+    MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name();
+    return ret;
+  }
+
+  // Support ZeroShape
+  size_t zero_shape_num = 0;
+  for (auto tensor : this->out_tensors()) {
+    for (size_t i = 0; i < tensor->shape().size(); i++) {
+      if (tensor->shape()[i] == 0) {
+        zero_shape_num++;
+        break;
+      }
+    }
+  }
+
+  if (zero_shape_num != this->out_tensors().size()) {
+    ret = Run();
+    if (lite::RET_OK != ret) {
+      MS_LOG(ERROR) << "run kernel failed, name: " << this->name();
+      return ret;
+    }
+  }
+
+  ret = PostProcess();
+  if (lite::RET_OK != ret) {
+    MS_LOG(ERROR) << "run kernel PostProcess failed, name: " << this->name();
+    return ret;
+  }
+  return lite::RET_OK;
+}
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/inner_kernel.h b/mindspore/lite/src/inner_kernel.h
index 93c490544be..8f41a07b260 100644
--- a/mindspore/lite/src/inner_kernel.h
+++ b/mindspore/lite/src/inner_kernel.h
@@ -52,39 +52,7 @@ class InnerKernel : public Kernel {
     }
   }
 
-  int Execute() override {
-    auto ret = PreProcess();
-    if (lite::RET_OK != ret) {
-      MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name();
-      return ret;
-    }
-
-    // Support ZeroShape
-    size_t zero_shape_num = 0;
-    for (auto tensor : this->out_tensors()) {
-      for (size_t i = 0; i < tensor->shape().size(); i++) {
-        if (tensor->shape()[i] == 0) {
-          zero_shape_num++;
-          break;
-        }
-      }
-    }
-
-    if (zero_shape_num != this->out_tensors().size()) {
-      auto ret = Run();
-      if (lite::RET_OK != ret) {
-        MS_LOG(ERROR) << "run kernel failed, name: " << this->name();
-        return ret;
-      }
-    }
-
-    ret = PostProcess();
-    if (lite::RET_OK != ret) {
-      MS_LOG(ERROR) << "run kernel PostProcess failed, name: " << this->name();
-      return ret;
-    }
-    return lite::RET_OK;
-  }
+  int Execute() override;
 
   // called while compiling graph
   int Prepare() override { return mindspore::lite::RET_OK; }
@@ -94,14 +62,7 @@ class InnerKernel : public Kernel {
   // called before Run
   virtual int PreProcess();
   // called after Run
-  virtual int PostProcess() {
-    for (auto *output : this->out_tensors()) {
-      MS_ASSERT(output != nullptr);
-      output->ResetRefCount();
-    }
-
-    return FreeInWorkTensor();
-  }
+  virtual int PostProcess() { return FreeInWorkTensor(); }
 
   virtual int FreeInWorkTensor() const {
     for (auto &in_tensor : this->in_tensors()) {
@@ -164,14 +125,14 @@ class InnerKernel : public Kernel {
 
   void set_in_tensors(const std::vector<lite::Tensor *> &in_tensors) { this->in_tensors_ = in_tensors; }
 
-  virtual void set_in_tensor(lite::Tensor *in_tensor, int index) {
+  virtual void set_in_tensor(lite::Tensor *in_tensor, size_t index) {
     MS_ASSERT(index < in_tensors_.size());
     this->in_tensors_[index] = in_tensor;
   }
 
   void set_out_tensors(const std::vector<lite::Tensor *> &out_tensors) { this->out_tensors_ = out_tensors; }
 
-  virtual void set_out_tensor(lite::Tensor *out_tensor, int index) {
+  virtual void set_out_tensor(lite::Tensor *out_tensor, size_t index) {
     MS_ASSERT(index < out_tensors_.size());
     this->out_tensors_[index] = out_tensor;
   }
diff --git a/mindspore/lite/src/kernel_registry.cc b/mindspore/lite/src/kernel_registry.cc
index 54f4d9799b3..5da2327ceff 100644
--- a/mindspore/lite/src/kernel_registry.cc
+++ b/mindspore/lite/src/kernel_registry.cc
@@ -18,6 +18,7 @@
 #include <memory>
 #include "include/errorcode.h"
 #include "include/registry/register_kernel.h"
+#include "src/registry/register_utils.h"
 #include "src/ops/populate/populate_register.h"
 #include "src/common/version_manager.h"
 #include "nnacl/pooling_parameter.h"
@@ -50,21 +51,21 @@ void KernelKeyToKernelDesc(const KernelKey &key, kernel::KernelDesc *desc) {
 }
 }  // namespace
 
-KernelRegistry *KernelRegistry::GetInstance() {
-  static KernelRegistry instance;
-
-  std::unique_lock<std::mutex> malloc_creator_array(instance.lock_);
-  if (instance.creator_arrays_ == nullptr) {
-    instance.creator_arrays_ = reinterpret_cast<KernelCreator *>(malloc(array_size_ * sizeof(KernelCreator)));
-    if (instance.creator_arrays_ == nullptr) {
-      return nullptr;
+void KernelRegistry::CreatorArraysInit() {
+  std::unique_lock<std::mutex> malloc_creator_array(lock_);
+  if (creator_arrays_ == nullptr) {
+    creator_arrays_ = reinterpret_cast<KernelCreator *>(malloc(array_size_ * sizeof(KernelCreator)));
+    if (creator_arrays_ != nullptr) {
+      memset(creator_arrays_, 0, array_size_ * sizeof(KernelCreator));
     }
-    memset(instance.creator_arrays_, 0, array_size_ * sizeof(KernelCreator));
   }
-  return &instance;
+  return;
 }
 
-int KernelRegistry::Init() { return RET_OK; }
+KernelRegistry *KernelRegistry::GetInstance() {
+  static KernelRegistry instance;
+  return &instance;
+}
 
 kernel::KernelCreator KernelRegistry::GetCreator(const KernelKey &desc) {
   if (desc.provider == kBuiltin) {
@@ -74,7 +75,9 @@ kernel::KernelCreator KernelRegistry::GetCreator(const KernelKey &desc) {
                     << desc.type;
       return nullptr;
     }
-    return creator_arrays_[index];
+    if (creator_arrays_ != nullptr) {
+      return creator_arrays_[index];
+    }
   }
   MS_LOG(ERROR) << "Call wrong interface!provider: " << desc.provider;
   return nullptr;
@@ -89,16 +92,20 @@ int KernelRegistry::GetCreatorFuncIndex(const kernel::KernelKey desc) {
 }
 
 void KernelRegistry::RegKernel(const KernelKey desc, const kernel::KernelCreator creator) {
+  CreatorArraysInit();
   int index = GetCreatorFuncIndex(desc);
   if (index >= array_size_ || index < 0) {
     MS_LOG(ERROR) << "invalid kernel key, arch " << desc.arch << ", data_type" << desc.data_type << ",op type "
                   << desc.type;
     return;
   }
-  creator_arrays_[index] = creator;
+  if (creator_arrays_ != nullptr) {
+    creator_arrays_[index] = creator;
+  }
 }
 
 void KernelRegistry::RegKernel(KERNEL_ARCH arch, TypeId data_type, int op_type, kernel::KernelCreator creator) {
+  CreatorArraysInit();
   KernelKey desc = {arch, data_type, op_type};
   int index = GetCreatorFuncIndex(desc);
   if (index >= array_size_ || index < 0) {
@@ -106,11 +113,11 @@ void KernelRegistry::RegKernel(KERNEL_ARCH arch, TypeId data_type, int op_type,
                   << desc.type;
     return;
   }
-  creator_arrays_[index] = creator;
+  if (creator_arrays_ != nullptr) {
+    creator_arrays_[index] = creator;
+  }
 }
 
-bool KernelRegistry::Merge(const std::unordered_map<KernelKey, KernelCreator> &new_creators) { return false; }
-
 KernelRegistry::~KernelRegistry() {
   KernelRegistry *instance = GetInstance();
   std::unique_lock<std::mutex> malloc_creator_array(instance->lock_);
@@ -132,7 +139,7 @@ int KernelRegistry::GetCustomKernel(const std::vector<Tensor *> &in_tensors, con
   MS_ASSERT(kernel != nullptr);
   kernel::KernelDesc desc;
   KernelKeyToKernelDesc(key, &desc);
-  CreateKernel creator = kernel::RegisterKernel::GetCreator(static_cast<const schema::Primitive *>(primitive), &desc);
+  CreateKernel creator = kernel::RegisterUtils::GetCreator(static_cast<const schema::Primitive *>(primitive), &desc);
   if (creator == nullptr) {
     return RET_NOT_SUPPORT;
   }
diff --git a/mindspore/lite/src/kernel_registry.h b/mindspore/lite/src/kernel_registry.h
index 9015caf81a4..af480d3b844 100644
--- a/mindspore/lite/src/kernel_registry.h
+++ b/mindspore/lite/src/kernel_registry.h
@@ -37,12 +37,10 @@ class KernelRegistry {
   virtual ~KernelRegistry();
 
   static KernelRegistry *GetInstance();
-  static int Init();
   virtual kernel::KernelCreator GetCreator(const kernel::KernelKey &desc);
   int GetCreatorFuncIndex(kernel::KernelKey desc);
   void RegKernel(kernel::KernelKey desc, kernel::KernelCreator creator);
   void RegKernel(kernel::KERNEL_ARCH arch, TypeId data_type, int type, kernel::KernelCreator creator);
-  bool Merge(const std::unordered_map<kernel::KernelKey, kernel::KernelCreator> &newCreators);
   bool SupportKernel(const kernel::KernelKey &key);
   int GetKernel(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
                 const InnerContext *ctx, const mindspore::Context *ms_ctx, const kernel::KernelKey &key,
@@ -58,6 +56,9 @@ class KernelRegistry {
   static const int array_size_{device_type_length_ * data_type_length_ * op_type_length_};
   kernel::KernelCreator *creator_arrays_ = nullptr;
 
+ private:
+  void CreatorArraysInit();
+
  private:
   std::mutex lock_;
 };
diff --git a/mindspore/lite/src/lite_kernel.cc b/mindspore/lite/src/lite_kernel.cc
index 926a94f3bfd..db1ad97e1d0 100644
--- a/mindspore/lite/src/lite_kernel.cc
+++ b/mindspore/lite/src/lite_kernel.cc
@@ -38,15 +38,18 @@ bool LiteKernel::IsReady(const std::vector<lite::Tensor *> &scope_tensors) {
   });
 }
 
-void LiteKernel::InitOutTensorInitRefCount() {
+void LiteKernel::InitOutTensorInitRefCount(const std::vector<LiteKernel *> *mask_kernels) {
   for (auto *tensor : this->out_tensors()) {
     MS_ASSERT(tensor != nullptr);
     size_t init_ref_count = 0;
     for (auto *post_kernel : this->out_kernels_) {
-      auto &post_in_tensors = post_kernel->in_tensors();
-      init_ref_count +=
-        std::count_if(post_in_tensors.begin(), post_in_tensors.end(),
-                      [&tensor](const lite::Tensor *post_kernel_in_tensor) { return post_kernel_in_tensor == tensor; });
+      if ((mask_kernels == nullptr) ||
+          std::find(mask_kernels->begin(), mask_kernels->end(), post_kernel) != mask_kernels->end()) {
+        auto &post_in_tensors = post_kernel->in_tensors();
+        init_ref_count += std::count_if(
+          post_in_tensors.begin(), post_in_tensors.end(),
+          [&tensor](const lite::Tensor *post_kernel_in_tensor) { return post_kernel_in_tensor == tensor; });
+      }
     }
     tensor->set_init_ref_count(init_ref_count);
   }
diff --git a/mindspore/lite/src/lite_kernel.h b/mindspore/lite/src/lite_kernel.h
index 55456c46c05..b539849d81f 100644
--- a/mindspore/lite/src/lite_kernel.h
+++ b/mindspore/lite/src/lite_kernel.h
@@ -238,7 +238,7 @@ class LiteKernel {
     }
   }
 
-  void set_in_tensor(lite::Tensor *in_tensor, int index) {
+  void set_in_tensor(lite::Tensor *in_tensor, size_t index) {
     MS_ASSERT(kernel_ != nullptr);
     if (desc_.provider == kBuiltin) {
       std::static_pointer_cast<InnerKernel>(kernel_)->set_in_tensor(in_tensor, index);
@@ -264,7 +264,7 @@ class LiteKernel {
     }
   }
 
-  virtual void set_out_tensor(lite::Tensor *out_tensor, int index) {
+  virtual void set_out_tensor(lite::Tensor *out_tensor, size_t index) {
     MS_ASSERT(kernel_ != nullptr);
     if (desc_.provider == kBuiltin) {
       std::static_pointer_cast<InnerKernel>(kernel_)->set_out_tensor(out_tensor, index);
@@ -327,7 +327,7 @@ class LiteKernel {
 
   virtual bool IsReady(const std::vector<lite::Tensor *> &in_tensor);
 
-  virtual void InitOutTensorInitRefCount();
+  virtual void InitOutTensorInitRefCount(const std::vector<LiteKernel *> *mask_kernels = nullptr);
 
   KernelKey desc() const { return desc_; }
 
@@ -353,7 +353,7 @@ class LiteKernel {
   mutable std::vector<lite::Tensor *> mutable_out_tensors_;
   bool is_model_output_ = false;
   SubGraphType subgraph_type_ = kNotSubGraph;
-  const lite::InnerContext *context_;
+  const lite::InnerContext *context_ = nullptr;
 };
 
 typedef InnerKernel *(*KernelCreator)(const std::vector<lite::Tensor *> &inputs,
@@ -378,4 +378,4 @@ kernel::InnerKernel *LiteKernelCreator(const std::vector<lite::Tensor *> &inputs
 }
 }  // namespace mindspore::kernel
 
-#endif  // MINDSPORE_LITE_SRC_INNER_KERNEL_H_
+#endif  // MINDSPORE_LITE_SRC_LITE_KERNEL_H_
diff --git a/mindspore/lite/src/lite_kernel_util.cc b/mindspore/lite/src/lite_kernel_util.cc
index d3b2df08187..0fac1ba5903 100644
--- a/mindspore/lite/src/lite_kernel_util.cc
+++ b/mindspore/lite/src/lite_kernel_util.cc
@@ -190,12 +190,13 @@ int LiteKernelUtil::TopologicalSortKernels(std::vector<kernel::LiteKernel *> *ke
 
 void LiteKernelUtil::InitTensorInitRefCount(const std::vector<kernel::LiteKernel *> &kernels) {
   for (auto *kernel : kernels) {
-    kernel->InitOutTensorInitRefCount();
+    kernel->InitOutTensorInitRefCount(&kernels);
   }
 }
 
 int LiteKernelUtil::SetInput(const LiteKernel &kernelMod, const std::vector<lite::Tensor *> &inputs) { return -1; }
 
+#ifdef ENABLE_CONTROL_TENSORLIST
 bool LiteKernelUtil::IsSwitchCall(kernel::LiteKernel *kernel) {
   if (kernel->desc().delegate != nullptr) {
     return false;
@@ -214,6 +215,7 @@ bool LiteKernelUtil::IsSwitchCall(kernel::LiteKernel *kernel) {
 
   return false;
 }
+#endif
 
 kernel::LiteKernel *LiteKernelUtil::GetInputsSpecificNode(const kernel::LiteKernel *kernel,
                                                           const schema::PrimitiveType &primitive_type) {
diff --git a/mindspore/lite/src/lite_kernel_util.h b/mindspore/lite/src/lite_kernel_util.h
index 74db835b68c..0a8bc2ddde4 100644
--- a/mindspore/lite/src/lite_kernel_util.h
+++ b/mindspore/lite/src/lite_kernel_util.h
@@ -37,7 +37,9 @@ class LiteKernelUtil {
 
   static int SetInput(const LiteKernel &kernelMod, const std::vector<lite::Tensor *> &inputs);
 
+#ifdef ENABLE_CONTROL_TENSORLIST
   static bool IsSwitchCall(kernel::LiteKernel *kernel);
+#endif
 
   static kernel::LiteKernel *GetInputsSpecificNode(const kernel::LiteKernel *kernel,
                                                    const schema::PrimitiveType &primitive_type);
diff --git a/mindspore/lite/src/lite_mindrt.cc b/mindspore/lite/src/lite_mindrt.cc
index ab1e87f5517..6c7bfffad4c 100644
--- a/mindspore/lite/src/lite_mindrt.cc
+++ b/mindspore/lite/src/lite_mindrt.cc
@@ -86,6 +86,7 @@ void LiteOpActor::IsolateInputData(std::vector<std::shared_ptr<LiteOpActor>> *ac
       if (old_tensor->data_type() == kNumberTypeFloat16 || old_tensor->data_type() == kNumberTypeFloat32) {
         old_tensor->set_data_type(kernel_->desc().data_type);
       }
+#ifdef ENABLE_CONTROL_TENSORLIST
       if (old_tensor->data_type() == kObjectTypeTensorType) {
         auto old_tensorlist = reinterpret_cast<TensorList *>(old_tensor);
         if (old_tensorlist->tensors_data_type() == kNumberTypeFloat16 ||
@@ -93,6 +94,8 @@ void LiteOpActor::IsolateInputData(std::vector<std::shared_ptr<LiteOpActor>> *ac
           old_tensorlist->set_tensors_data_type(kernel_->desc().data_type);
         }
       }
+#endif
+      old_tensor->set_allocator(kernel_->Context()->allocator);
       continue;
     }
 
@@ -102,10 +105,12 @@ void LiteOpActor::IsolateInputData(std::vector<std::shared_ptr<LiteOpActor>> *ac
     }
 
     Tensor *new_tensor = new Tensor(new_data_type, old_tensor->shape(), old_tensor->format(), old_tensor->category());
-    new_tensor->set_allocator(old_tensor->allocator()); /* GPU use opencl allocator */
-    if (new_tensor->allocator() == nullptr && kernel_->subgraph_type() == kernel::kCpuFP16SubGraph) {
+    new_tensor->set_allocator(old_tensor->allocator());
+    if (new_tensor->allocator() == nullptr && kernel_->Context() != nullptr &&
+        kernel_->desc().arch != kernel::kDelegate) {
       new_tensor->set_allocator(kernel_->Context()->allocator);
     }
+
     new_tensor->set_tensor_name(kernel_->name() + "_duplicate_" + old_tensor->tensor_name());
     for (LiteQuantParam quant : old_tensor->quant_params()) {
       new_tensor->AddQuantParam(quant);
@@ -187,6 +192,7 @@ int LiteOpActor::CompileArrowThroughOutputKernels() {
   return RET_OK;
 }
 
+#ifdef ENABLE_CONTROL_TENSORLIST
 int LiteOpActor::CompileArrowThroughPartialCall() {
   if (kernel_->desc().delegate != nullptr) {
     MS_LOG(INFO) << "kernel is delegate subgraph kernel.";
@@ -225,10 +231,13 @@ int LiteOpActor::CompileArrowThroughPartialCall() {
   subgraph_kernel->DropNode(call_node_);
   return RET_OK;
 }
+#endif
 
 int LiteOpActor::CompileArrow() {
+  int ret;
   output_data_arrows_.clear();
-  int ret = CompileArrowThroughPartialCall();
+#ifdef ENABLE_CONTROL_TENSORLIST
+  ret = CompileArrowThroughPartialCall();
   if (ret != RET_OK) {
     output_data_arrows_.clear();
     MS_LOG(ERROR) << "CompileArrowThroughPartialCall failed.";
@@ -238,6 +247,7 @@ int LiteOpActor::CompileArrow() {
     MS_LOG(INFO) << "CompileArrowThroughPartialCall done.";
     return RET_OK;
   }
+#endif
   ret = CompileArrowThroughOutputKernels();
   if (ret != RET_OK) {
     output_data_arrows_.clear();
@@ -263,6 +273,87 @@ void LiteOpActor::MoveTensorInputData(Tensor *dst_tensor, Tensor *src_tensor) {
   src_tensor->DecRefCount();
 }
 
+void LiteOpActor::MoveInputData(Tensor *dst_tensor, Tensor *src_tensor) {
+  if (src_tensor == dst_tensor) {
+    MS_LOG(INFO) << "no need to move.";
+    return;
+  }
+  MS_ASSERT(src_tensor->allocator() != nullptr);
+#ifdef ENABLE_CONTROL_TENSORLIST
+  if (src_tensor->data_type() == kObjectTypeTensorType) {
+    MoveTensorListInputData(reinterpret_cast<TensorList *>(dst_tensor), reinterpret_cast<TensorList *>(src_tensor));
+  } else {
+    MoveTensorInputData(dst_tensor, src_tensor);
+  }
+#else
+  MoveTensorInputData(dst_tensor, src_tensor);
+#endif
+  return;
+}
+
+void LiteOpActor::SetInputData(Tensor *dst_tensor, Tensor *src_tensor) {
+  dst_tensor->set_data(src_tensor->data());
+  dst_tensor->set_own_data(false);
+}
+
+int LiteOpActor::CastInputData(Tensor *dst, Tensor *src) {
+  int ret = RET_OK;
+#ifdef ENABLE_CONTROL_TENSORLIST
+  if (src->data_type() != kObjectTypeTensorType) {
+    ret = CastTensorInputData(dst, src);
+  } else {
+    ret = CastTensorListInputData(reinterpret_cast<TensorList *>(dst), reinterpret_cast<TensorList *>(src));
+  }
+#else
+  ret = CastTensorInputData(dst, src);
+#endif
+  src->DecRefCount();
+  return ret;
+}
+
+bool LiteOpActor::NeedCastData(Tensor *dst_tensor, Tensor *src_tensor) {
+  if (dst_tensor->data_type() != kObjectTypeTensorType && src_tensor->data_type() != kObjectTypeTensorType &&
+      dst_tensor->data_type() != src_tensor->data_type()) {
+    return true;
+  }
+#ifdef ENABLE_CONTROL_TENSORLIST
+  if (dst_tensor->data_type() == kObjectTypeTensorType && src_tensor->data_type() == kObjectTypeTensorType &&
+      reinterpret_cast<TensorList *>(dst_tensor)->tensors_data_type() !=
+        reinterpret_cast<TensorList *>(src_tensor)->tensors_data_type()) {
+    return true;
+  }
+#endif
+  return false;
+}
+
+int LiteOpActor::CastTensorInputData(Tensor *dst, Tensor *src) {
+  dst->MallocData();
+  dst->ResetRefCount();
+#if defined(ENABLE_ARM) && defined(ENABLE_FP16)
+  if (dst->shape() != src->shape()) {
+    MS_LOG(ERROR) << "dst tensor: " << dst->tensor_name() << " shape: " << dst->shape() << " vs "
+                  << "src tensor: " << src->tensor_name() << " shape: " << src->shape();
+    return RET_PARAM_INVALID;
+  }
+  auto dst_data = dst->MutableData(); /* using MutableData to sync GPU data */
+  auto src_data = src->MutableData();
+  auto src_nums_size = src->ElementsNum();
+  auto dst_data_type = static_cast<int>(dst->data_type());
+  auto src_data_type = static_cast<int>(src->data_type());
+  if (dst_data_type == kNumberTypeFloat32 && src_data_type == kNumberTypeFloat16) {
+    Float16ToFloat32_fp16_handler(src_data, dst_data, src_nums_size, support_fp16_);
+  } else if (dst_data_type == kNumberTypeFloat16 && src_data_type == kNumberTypeFloat32) {
+    Float32ToFloat16_fp16_handler(src_data, dst_data, src_nums_size, support_fp16_);
+  } else {
+    MS_LOG(ERROR) << "not support dst_data_type: " << dst_data_type << " src_data_type: " << src_data_type;
+    return RET_NOT_SUPPORT;
+  }
+  return RET_OK;
+#endif
+  return RET_ERROR;
+}
+
+#ifdef ENABLE_CONTROL_TENSORLIST
 void LiteOpActor::MoveTensorListInputData(TensorList *dst_tensorlist, TensorList *src_tensorlist) {
   MS_ASSERT(src_tensorlist != nullptr);
   MS_ASSERT(dst_tensorlist != nullptr);
@@ -302,77 +393,6 @@ void LiteOpActor::MoveTensorListInputData(TensorList *dst_tensorlist, TensorList
   }
 }
 
-void LiteOpActor::MoveInputData(Tensor *dst_tensor, Tensor *src_tensor) {
-  if (src_tensor == dst_tensor) {
-    MS_LOG(INFO) << "no need to move.";
-    return;
-  }
-  MS_ASSERT(src_tensor->allocator() != nullptr);
-
-  if (src_tensor->data_type() == kObjectTypeTensorType) {
-    MoveTensorListInputData(reinterpret_cast<TensorList *>(dst_tensor), reinterpret_cast<TensorList *>(src_tensor));
-  } else {
-    MoveTensorInputData(dst_tensor, src_tensor);
-  }
-  return;
-}
-
-void LiteOpActor::SetInputData(Tensor *dst_tensor, Tensor *src_tensor) {
-  dst_tensor->set_data(src_tensor->data());
-  dst_tensor->set_own_data(false);
-}
-
-int LiteOpActor::CastInputData(Tensor *dst, Tensor *src) {
-  int ret = RET_OK;
-  if (src->data_type() != kObjectTypeTensorType) {
-    ret = CastTensorInputData(dst, src);
-  } else {
-    ret = CastTensorListInputData(reinterpret_cast<TensorList *>(dst), reinterpret_cast<TensorList *>(src));
-  }
-  src->DecRefCount();
-  return ret;
-}
-
-bool LiteOpActor::NeedCastData(Tensor *dst_tensor, Tensor *src_tensor) {
-  if (dst_tensor->data_type() != kObjectTypeTensorType && src_tensor->data_type() != kObjectTypeTensorType &&
-      dst_tensor->data_type() != src_tensor->data_type()) {
-    return true;
-  }
-  if (dst_tensor->data_type() == kObjectTypeTensorType && src_tensor->data_type() == kObjectTypeTensorType &&
-      reinterpret_cast<TensorList *>(dst_tensor)->tensors_data_type() !=
-        reinterpret_cast<TensorList *>(src_tensor)->tensors_data_type()) {
-    return true;
-  }
-  return false;
-}
-
-int LiteOpActor::CastTensorInputData(Tensor *dst, Tensor *src) {
-  dst->MallocData();
-  dst->ResetRefCount();
-#if defined(ENABLE_ARM) && defined(ENABLE_FP16)
-  if (dst->shape() != src->shape()) {
-    MS_LOG(ERROR) << "dst tensor: " << dst->tensor_name() << " shape: " << dst->shape() << " vs "
-                  << "src tensor: " << src->tensor_name() << " shape: " << src->shape();
-    return RET_PARAM_INVALID;
-  }
-  auto dst_data = dst->MutableData(); /* using MutableData to sync GPU data */
-  auto src_data = src->MutableData();
-  auto src_nums_size = src->ElementsNum();
-  auto dst_data_type = static_cast<int>(dst->data_type());
-  auto src_data_type = static_cast<int>(src->data_type());
-  if (dst_data_type == kNumberTypeFloat32 && src_data_type == kNumberTypeFloat16) {
-    Float16ToFloat32_fp16_handler(src_data, dst_data, src_nums_size, support_fp16_);
-  } else if (dst_data_type == kNumberTypeFloat16 && src_data_type == kNumberTypeFloat32) {
-    Float32ToFloat16_fp16_handler(src_data, dst_data, src_nums_size, support_fp16_);
-  } else {
-    MS_LOG(ERROR) << "not support dst_data_type: " << dst_data_type << " src_data_type: " << src_data_type;
-    return RET_NOT_SUPPORT;
-  }
-  return RET_OK;
-#endif
-  return RET_ERROR;
-}
-
 int LiteOpActor::CastTensorListInputData(TensorList *dst_tensorlist, TensorList *src_tensorlist) {
   MS_ASSERT(src_tensorlist != nullptr);
   MS_ASSERT(dst_tensorlist != nullptr);
@@ -399,87 +419,6 @@ int LiteOpActor::CastTensorListInputData(TensorList *dst_tensorlist, TensorList
   return RET_OK;
 }
 
-void LiteOpActor::SetInputShape() {
-  for (size_t i = 0; i < inputs_data_.size(); ++i) {
-    auto &input_tensor = kernel_->in_tensors()[i];
-    if (input_tensor->shape() == inputs_data_[i]->shape()) {
-      continue;
-    }
-    MS_LOG(DEBUG) << "inputs_data_[" << i << "].shape: " << inputs_data_[i]->shape() << " vs kernel_->in_tensors()["
-                  << i << "].shape: " << kernel_->in_tensors()[i]->shape() << " are not equal.";
-    MS_LOG(DEBUG) << "this->kernel_->name(): " << this->kernel_->name();
-
-    if (input_tensor->data_type() == kObjectTypeTensorType) {
-      auto input_tensorlist = reinterpret_cast<TensorList *>(input_tensor);
-      auto input_data_tensorlist = reinterpret_cast<TensorList *>(inputs_data_[i]);
-      input_tensorlist->FreeTensorListData();
-      input_tensorlist->set_element_shape(input_data_tensorlist->element_shape());
-      input_tensorlist->set_shape(input_data_tensorlist->shape());
-      std::vector<std::vector<int>> tensor_shape{};
-      std::transform(input_data_tensorlist->tensors().begin(), input_data_tensorlist->tensors().end(),
-                     std::back_inserter(tensor_shape), [](Tensor *tensor_item) { return tensor_item->shape(); });
-      input_tensorlist->MallocTensorListData(input_data_tensorlist->tensors_data_type(), tensor_shape);
-    } else {
-      input_tensor->set_shape(inputs_data_[i]->shape());
-      input_tensor->set_format(inputs_data_[i]->format());
-    }
-  }
-}
-
-int LiteOpActor::InitInputData() {
-  SetInputShape();
-
-  for (size_t i = 0; i < inputs_data_.size(); ++i) {
-    auto dst_tensor = kernel_->in_tensors()[i];
-    auto src_tensor = inputs_data_[i];
-    if (dst_tensor->init_ref_count() == 0) {
-      src_tensor->DecRefCount();
-      continue;
-    }
-
-    if (NeedCastData(dst_tensor, src_tensor)) {
-      CastInputData(dst_tensor, src_tensor);
-      continue;
-    }
-
-    /* same data-type  */
-    if (src_tensor->allocator() == nullptr || src_tensor->IsGraphInput()) {
-      // delegate graph kernel output tensor
-      SetInputData(dst_tensor, src_tensor);
-    } else {
-      MoveInputData(dst_tensor, src_tensor);
-    }
-  }
-  return RET_OK;
-}
-
-void LiteOpActor::AsyncOutput(OpContext<Tensor> *context) {
-  for (size_t i = 0; i < output_data_arrows_.size(); i++) {
-    auto data = outputs_data_.at(i);
-    Async(output_data_arrows_[i]->to_op_id_, &mindspore::OpActor<Tensor>::RunOpData, data.get(), context);
-  }
-}
-
-void LiteOpActor::AddResultIndex(size_t index) { results_index_.push_back(index); }
-
-void LiteOpActor::SetOutputData(OpContext<Tensor> *context) {
-  for (auto index : results_index_) {
-    context->SetResult(index, RET_OK);
-  }
-}
-
-int LiteOpActor::PrepareOutputData() {
-  outputs_data_.resize(output_data_arrows_.size());
-  for (size_t i = 0; i < output_data_arrows_.size(); i++) {
-    auto &arrow = output_data_arrows_[i];
-    auto data =
-      std::make_shared<OpData<Tensor>>(arrow->to_op_id_, (kernel_->out_tensors()).at(arrow->from_output_index_),
-                                       static_cast<int>(arrow->to_input_index_));
-    outputs_data_.at(i) = data;
-  }
-  return RET_OK;
-}
-
 int LiteSwitchOpActor::CompileTrueBranchArrow() {
   if (true_partial_node_ == nullptr) {
     MS_LOG(ERROR) << "true_partial_node_ is nullptr.";
@@ -719,6 +658,91 @@ void LiteSwitchOpActor::RunOpData(OpData<Tensor> *inputs, OpContext<Tensor> *con
   }
 }
 
+#endif
+
+void LiteOpActor::SetInputShape() {
+  for (size_t i = 0; i < inputs_data_.size(); ++i) {
+    auto &input_tensor = kernel_->in_tensors()[i];
+    if (input_tensor->shape() == inputs_data_[i]->shape()) {
+      continue;
+    }
+    MS_LOG(DEBUG) << "inputs_data_[" << i << "].shape: " << inputs_data_[i]->shape() << " vs kernel_->in_tensors()["
+                  << i << "].shape: " << kernel_->in_tensors()[i]->shape() << " are not equal.";
+    MS_LOG(DEBUG) << "this->kernel_->name(): " << this->kernel_->name();
+
+    if (input_tensor->data_type() == kObjectTypeTensorType) {
+#ifdef ENABLE_CONTROL_TENSORLIST
+      auto input_tensorlist = reinterpret_cast<TensorList *>(input_tensor);
+      auto input_data_tensorlist = reinterpret_cast<TensorList *>(inputs_data_[i]);
+      input_tensorlist->FreeTensorListData();
+      input_tensorlist->set_element_shape(input_data_tensorlist->element_shape());
+      input_tensorlist->set_shape(input_data_tensorlist->shape());
+      std::vector<std::vector<int>> tensor_shape{};
+      std::transform(input_data_tensorlist->tensors().begin(), input_data_tensorlist->tensors().end(),
+                     std::back_inserter(tensor_shape), [](Tensor *tensor_item) { return tensor_item->shape(); });
+      input_tensorlist->MallocTensorListData(input_data_tensorlist->tensors_data_type(), tensor_shape);
+#endif
+    } else {
+      input_tensor->set_shape(inputs_data_[i]->shape());
+      input_tensor->set_format(inputs_data_[i]->format());
+    }
+  }
+}
+
+int LiteOpActor::InitInputData() {
+  SetInputShape();
+
+  for (size_t i = 0; i < inputs_data_.size(); ++i) {
+    auto dst_tensor = kernel_->in_tensors()[i];
+    auto src_tensor = inputs_data_[i];
+    if (dst_tensor->init_ref_count() == 0) {
+      src_tensor->DecRefCount();
+      continue;
+    }
+
+    if (NeedCastData(dst_tensor, src_tensor)) {
+      CastInputData(dst_tensor, src_tensor);
+      continue;
+    }
+
+    /* same data-type  */
+    if (src_tensor->allocator() == nullptr || src_tensor->IsGraphInput()) {
+      // delegate graph kernel output tensor
+      SetInputData(dst_tensor, src_tensor);
+    } else {
+      MoveInputData(dst_tensor, src_tensor);
+    }
+  }
+  return RET_OK;
+}
+
+void LiteOpActor::AsyncOutput(OpContext<Tensor> *context) {
+  for (size_t i = 0; i < output_data_arrows_.size(); i++) {
+    auto data = outputs_data_.at(i);
+    Async(output_data_arrows_[i]->to_op_id_, &mindspore::OpActor<Tensor>::RunOpData, data.get(), context);
+  }
+}
+
+void LiteOpActor::AddResultIndex(size_t index) { results_index_.push_back(index); }
+
+void LiteOpActor::SetOutputData(OpContext<Tensor> *context) {
+  for (auto index : results_index_) {
+    context->SetResult(index, RET_OK);
+  }
+}
+
+int LiteOpActor::PrepareOutputData() {
+  outputs_data_.resize(output_data_arrows_.size());
+  for (size_t i = 0; i < output_data_arrows_.size(); i++) {
+    auto &arrow = output_data_arrows_[i];
+    auto data =
+      std::make_shared<OpData<Tensor>>(arrow->to_op_id_, (kernel_->out_tensors()).at(arrow->from_output_index_),
+                                       static_cast<int>(arrow->to_input_index_));
+    outputs_data_.at(i) = data;
+  }
+  return RET_OK;
+}
+
 std::vector<std::shared_ptr<LiteOpActor>> CreateOpActor(const std::vector<kernel::LiteKernel *> &kernels,
                                                         const lite::InnerContext *ctx) {
   std::vector<std::shared_ptr<LiteOpActor>> actors;
@@ -730,8 +754,8 @@ std::vector<std::shared_ptr<LiteOpActor>> CreateOpActor(const std::vector<kernel
   }
   for (auto &kernel : kernels) {
     /* make subgraph name (actor name) unique */
-    kernel->set_name(kernel->name() + to_string(actor_count++));
-
+    kernel->set_name(kernel->name() + "_" + to_string(actor_count++));
+#ifdef ENABLE_CONTROL_TENSORLIST
     if ((kernel::LiteKernelUtil::IsSwitchCall(kernel))) {
       auto switch_actor = std::make_shared<LiteSwitchOpActor>(kernel);
       if (switch_actor == nullptr) {
@@ -743,6 +767,7 @@ std::vector<std::shared_ptr<LiteOpActor>> CreateOpActor(const std::vector<kernel
       subgraph_name_AID_map[kernel] = switch_actor->GetAID();
       actors.push_back(switch_actor);
     } else {
+#endif
       auto actor = std::make_shared<LiteOpActor>(kernel);
       if (actor == nullptr) {
         MS_LOG(ERROR) << "create LiteOpActor failed: " << kernel->name();
@@ -752,7 +777,9 @@ std::vector<std::shared_ptr<LiteOpActor>> CreateOpActor(const std::vector<kernel
       actor->set_thread_pool(thread_pool);
       subgraph_name_AID_map[kernel] = actor->GetAID();
       actors.push_back(actor);
+#ifdef ENABLE_CONTROL_TENSORLIST
     }
+#endif
   }
 
   for (auto &actor : actors) {
diff --git a/mindspore/lite/src/lite_mindrt.h b/mindspore/lite/src/lite_mindrt.h
index 2edd9ce8455..3111015153f 100644
--- a/mindspore/lite/src/lite_mindrt.h
+++ b/mindspore/lite/src/lite_mindrt.h
@@ -95,13 +95,15 @@ class LiteOpActor : public OpActor<lite::Tensor> {
  private:
   void IsolateInputData(std::vector<std::shared_ptr<LiteOpActor>> *actors);
   void MoveTensorInputData(Tensor *dst_tensor, Tensor *src_tensor);
-  void MoveTensorListInputData(TensorList *dst_tensor, TensorList *src_tensor);
   void MoveInputData(Tensor *dst_tensor, Tensor *src_tensor);
   void SetInputData(Tensor *dst_tensor, Tensor *src_tensor);
   int CastInputData(Tensor *dst_tensor, Tensor *src_tensor);
   bool NeedCastData(Tensor *dst_tensor, Tensor *src_tensor);
   int CastTensorInputData(Tensor *dst_tensor, Tensor *src_tensor);
+#ifdef ENABLE_CONTROL_TENSORLIST
+  void MoveTensorListInputData(TensorList *dst_tensor, TensorList *src_tensor);
   int CastTensorListInputData(TensorList *dst_tensor, TensorList *src_tensor);
+#endif
 
  private:
   kernel::LiteKernel *partial_node_ = nullptr;
@@ -111,6 +113,7 @@ class LiteOpActor : public OpActor<lite::Tensor> {
 #endif
 };
 
+#ifdef ENABLE_CONTROL_TENSORLIST
 class LiteSwitchOpActor : public LiteOpActor {
  public:
   explicit LiteSwitchOpActor(kernel::LiteKernel *kernel) : LiteOpActor(kernel) {}
@@ -146,6 +149,7 @@ class LiteSwitchOpActor : public LiteOpActor {
   std::vector<OpDataPtr<Tensor>> true_branch_outputs_data_;
   std::vector<OpDataPtr<Tensor>> false_branch_outputs_data_;
 };
+#endif
 
 int MindrtInit();
 void MindrtTerminate(const std::vector<std::shared_ptr<LiteOpActor>> &);
diff --git a/mindspore/lite/src/lite_model.cc b/mindspore/lite/src/lite_model.cc
index 7471ef92735..3f28ebf6186 100644
--- a/mindspore/lite/src/lite_model.cc
+++ b/mindspore/lite/src/lite_model.cc
@@ -479,5 +479,4 @@ int Model::Export(Model *model, const char *filename) {
   return chmod(filename, S_IRUSR);
 #endif
 }
-
 }  // namespace mindspore::lite
diff --git a/mindspore/lite/src/lite_session.cc b/mindspore/lite/src/lite_session.cc
index 403626ac2a8..f026ffedd20 100644
--- a/mindspore/lite/src/lite_session.cc
+++ b/mindspore/lite/src/lite_session.cc
@@ -68,11 +68,16 @@ int DecompressTensor(const schema::Tensor &src_tensor, Tensor *dst_tensor) {
   // huffman code and bit pack are not assumed to be performed at same time
   STATUS ret = RET_ERROR;
   if (src_tensor.enableHuffmanCode()) {
+#ifdef ENABLE_HUFFMAN_DECODE
     ret = WeightDecoder::DecodeHuffmanCode(src_tensor, dst_tensor);
     if (ret != RET_OK && ret != RET_NO_CHANGE) {
       MS_LOG(ERROR) << "Decode huffman code failed: " << ret;
       return ret;
     }
+#else
+    MS_LOG(ERROR) << unsupport_huffman_decode_log;
+    return RET_ERROR;
+#endif
   } else if (need_bit_unpack) {
     ret = WeightDecoder::UnPackToInt(src_tensor, dst_tensor);
     if (ret != RET_OK && ret != RET_NO_CHANGE) {
@@ -123,11 +128,16 @@ int LiteSession::ConvertTensorsData(const lite::Model *model, size_t tensor_inde
   MS_ASSERT(dst_tensor != nullptr);
   if (src_tensor->data() != nullptr && src_tensor->data()->size() > 0) {
     if (dst_tensor->data_type() == kObjectTypeTensorType) {
+#ifdef ENABLE_CONTROL_TENSORLIST
       auto tensor_list = reinterpret_cast<TensorList *>(dst_tensor);
       if (tensor_list->Decode(reinterpret_cast<const int *>(src_tensor->data()->data())) != RET_OK) {
         MS_LOG(ERROR) << "Decode tensorlist data failed";
         return RET_ERROR;
       }
+#else
+      MS_LOG(ERROR) << unsupport_control_tensorlist_log;
+      return RET_NOT_SUPPORT;
+#endif
     } else {
       auto ret = DecompressTensor(*src_tensor, dst_tensor);
       if (ret == RET_NO_CHANGE) {
@@ -159,6 +169,7 @@ lite::Tensor *LiteSession::ConvertTensor(const schema::Tensor &src_tensor) {
   }
   lite::Tensor *dst_tensor = nullptr;
   if (TypeId(src_tensor.dataType()) == kObjectTypeTensorType) {
+#ifdef ENABLE_CONTROL_TENSORLIST
     dst_tensor = new (std::nothrow) TensorList(shape, std::vector<int>(), src_category);
     // set tensor list datatype
     auto tensor_list = reinterpret_cast<TensorList *>(dst_tensor);
@@ -166,6 +177,9 @@ lite::Tensor *LiteSession::ConvertTensor(const schema::Tensor &src_tensor) {
       auto tensor_data_type = TypeId(reinterpret_cast<const int *>(src_tensor.data()->data())[0]);
       tensor_list->set_tensors_data_type(tensor_data_type);
     }
+#else
+    MS_LOG(ERROR) << unsupport_control_tensorlist_log;
+#endif
   } else {
     dst_tensor = new (std::nothrow)
       Tensor(TypeId(src_tensor.dataType()), shape, static_cast<mindspore::Format>(src_tensor.format()), src_category);
@@ -689,12 +703,6 @@ int LiteSession::Init(const Context *context) {
       return RET_ERROR;
     }
   }
-  ret = KernelRegistry::GetInstance()->Init();
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "KernelRegistry Init Failed.";
-    is_running_.store(false);
-    return ret;
-  }
   ret = InitGPURuntime();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Init GPU runtime failed.";
diff --git a/mindspore/lite/src/ops/CMakeLists.txt b/mindspore/lite/src/ops/CMakeLists.txt
index 465d5296fcc..05b1a731ac9 100644
--- a/mindspore/lite/src/ops/CMakeLists.txt
+++ b/mindspore/lite/src/ops/CMakeLists.txt
@@ -4,9 +4,45 @@ file(GLOB OPS_SRC
     ${CMAKE_CURRENT_SOURCE_DIR}/*.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/populate/*.cc
     )
+if(MSLITE_STRING_KERNEL)
+    file(GLOB OPS_SRC_STRING
+            ${CMAKE_CURRENT_SOURCE_DIR}/populate/string/*.cc
+            )
+    set(OPS_SRC
+            ${OPS_SRC}
+            ${OPS_SRC_STRING}
+            )
+endif()
+if(MSLITE_CONTROL_TENSORLIST)
+    file(GLOB OPS_SRC_CONTROL_TENSORLIST
+            ${CMAKE_CURRENT_SOURCE_DIR}/populate/control/*.cc
+            )
+    set(OPS_SRC
+            ${OPS_SRC}
+            ${OPS_SRC_CONTROL_TENSORLIST}
+            )
+endif()
 if(ENABLE_V0)
     file(GLOB_RECURSE COMPAT_SRC ${CMAKE_CURRENT_SOURCE_DIR}/compat/*.cc)
     file(GLOB OPS_SRC_V0 ${CMAKE_CURRENT_SOURCE_DIR}/populate/v0/*.cc)
+    if(MSLITE_STRING_KERNEL)
+        file(GLOB OPS_SRC_STRING_V0
+                ${CMAKE_CURRENT_SOURCE_DIR}/populate/v0/string/*.cc
+                )
+        set(OPS_SRC_V0
+                ${OPS_SRC_V0}
+                ${OPS_SRC_STRING_V0}
+                )
+    endif()
+    if(MSLITE_CONTROL_TENSORLIST)
+        file(GLOB OPS_SRC_CONTROL_TENSORLIST_V0
+                ${CMAKE_CURRENT_SOURCE_DIR}/populate/v0/control/*.cc
+                )
+        set(OPS_SRC_V0
+                ${OPS_SRC_V0}
+                ${OPS_SRC_CONTROL_TENSORLIST_V0}
+                )
+    endif()
     set(OPS_SRC ${OPS_SRC} ${COMPAT_SRC} ${OPS_SRC_V0})
 endif()
 
diff --git a/mindspore/lite/src/ops/compat/v0/expand_dims_compat_v0.cc b/mindspore/lite/src/ops/compat/v0/expand_dims_compat_v0.cc
index eb97b279922..4232f59f7c5 100644
--- a/mindspore/lite/src/ops/compat/v0/expand_dims_compat_v0.cc
+++ b/mindspore/lite/src/ops/compat/v0/expand_dims_compat_v0.cc
@@ -21,7 +21,7 @@ namespace mindspore {
 namespace lite {
 int TransferExpandDimsAttr(Model::Node *node, std::vector<schema::Tensor *> *dst_tensors,
                            std::vector<char *> *const tensor_bufs) {
-  if (node == nullptr || dst_tensors == nullptr || tensor_bufs == nullptr) {
+  if (node == nullptr || node->primitive_ == nullptr || dst_tensors == nullptr || tensor_bufs == nullptr) {
     MS_LOG(ERROR) << "the parameter of this function is nullptr.";
     return RET_ERROR;
   }
diff --git a/mindspore/lite/src/ops/compat/v0/slice_compat_v0.cc b/mindspore/lite/src/ops/compat/v0/slice_compat_v0.cc
index efbf3019e86..a2e794e2099 100644
--- a/mindspore/lite/src/ops/compat/v0/slice_compat_v0.cc
+++ b/mindspore/lite/src/ops/compat/v0/slice_compat_v0.cc
@@ -21,7 +21,7 @@ namespace mindspore {
 namespace lite {
 int TransferSliceAttr(Model::Node *node, std::vector<schema::Tensor *> *dst_tensors,
                       std::vector<char *> *const tensor_bufs) {
-  if (node == nullptr || dst_tensors == nullptr || tensor_bufs == nullptr) {
+  if (node == nullptr || node->primitive_ == nullptr || dst_tensors == nullptr || tensor_bufs == nullptr) {
     MS_LOG(ERROR) << "the parameter of this function is nullptr.";
     return RET_ERROR;
   }
diff --git a/mindspore/lite/src/ops/compat/v0/strided_slice_compat_v0.cc b/mindspore/lite/src/ops/compat/v0/strided_slice_compat_v0.cc
index 69471b4147a..04ce2dc057c 100644
--- a/mindspore/lite/src/ops/compat/v0/strided_slice_compat_v0.cc
+++ b/mindspore/lite/src/ops/compat/v0/strided_slice_compat_v0.cc
@@ -28,7 +28,7 @@ int TransferStridedSliceAttr(Model::Node *node, std::vector<schema::Tensor *> *d
   dst_tensors->clear();
   auto prim = reinterpret_cast<const schema::v0::Primitive *>(node->primitive_);
   MS_ASSERT(prim != nullptr);
-  int inputs_size = node->input_indices_.size();
+  int inputs_size = static_cast<int>(node->input_indices_.size());
 
   auto param = prim->value_as_StridedSlice();
   if (param == nullptr) {
diff --git a/mindspore/lite/src/ops/compat/v0/topk_compat_v0.cc b/mindspore/lite/src/ops/compat/v0/topk_compat_v0.cc
index 3785abc2a32..02bb1ce567b 100644
--- a/mindspore/lite/src/ops/compat/v0/topk_compat_v0.cc
+++ b/mindspore/lite/src/ops/compat/v0/topk_compat_v0.cc
@@ -21,7 +21,7 @@ namespace mindspore {
 namespace lite {
 int TransferTopkAttr(Model::Node *node, std::vector<schema::Tensor *> *dst_tensors,
                      std::vector<char *> *const tensor_bufs) {
-  if (node == nullptr || dst_tensors == nullptr || tensor_bufs == nullptr) {
+  if (node == nullptr || node->primitive_ == nullptr || dst_tensors == nullptr || tensor_bufs == nullptr) {
     MS_LOG(ERROR) << "the parameter of this function is nullptr.";
     return RET_ERROR;
   }
diff --git a/mindspore/lite/src/ops/ops_def.cc b/mindspore/lite/src/ops/ops_def.cc
index b64ca1619fb..46c264b86c9 100644
--- a/mindspore/lite/src/ops/ops_def.cc
+++ b/mindspore/lite/src/ops/ops_def.cc
@@ -220,6 +220,7 @@ OP_TYPE(TensorArrayWrite)
 OP_TYPE(Affine)
 OP_TYPE(Attention)
 OP_TYPE(LSTMGrad)
+OP_TYPE(ScatterNdUpdate)
 OP_TYPE_DEF_END(PrimitiveType)
 
 OP_SCHEMA_DEF(Abs)
@@ -1212,3 +1213,6 @@ OP_SCHEMA_DEF_END(Affine)
 
 OP_SCHEMA_DEF(Attention)
 OP_SCHEMA_DEF_END(Attention)
+
+OP_SCHEMA_DEF(ScatterNdUpdate)
+OP_SCHEMA_DEF_END(ScatterNdUpdate)
diff --git a/mindspore/lite/src/ops/ops_func_declare.h b/mindspore/lite/src/ops/ops_func_declare.h
index a2dee794b4e..da54b2dc899 100644
--- a/mindspore/lite/src/ops/ops_func_declare.h
+++ b/mindspore/lite/src/ops/ops_func_declare.h
@@ -131,6 +131,7 @@
 #include "ops/rsqrt.h"
 #include "ops/scale.h"
 #include "ops/scatter_nd.h"
+#include "ops/scatter_nd_update.h"
 #include "ops/select.h"
 #include "ops/sgd.h"
 #include "ops/shape.h"
@@ -462,6 +463,7 @@ FUNC_MSOP2SCHEMAOP_DECLARE(TensorArrayRead)
 FUNC_MSOP2SCHEMAOP_DECLARE(TensorArrayWrite)
 FUNC_MSOP2SCHEMAOP_DECLARE(Affine)
 FUNC_MSOP2SCHEMAOP_DECLARE(Attention)
+FUNC_MSOP2SCHEMAOP_DECLARE(ScatterNdUpdate)
 #endif
 }  // namespace mindspore::lite::ops
 #else
diff --git a/mindspore/lite/src/ops/ops_utils.cc b/mindspore/lite/src/ops/ops_utils.cc
index 10a23304de7..90f57a89bb5 100644
--- a/mindspore/lite/src/ops/ops_utils.cc
+++ b/mindspore/lite/src/ops/ops_utils.cc
@@ -809,6 +809,11 @@ std::unique_ptr<schema::PrimitiveT> AttentionPrimitiveCreator(const AnfNodePtr &
   return ms_primc != nullptr ? ops::MSOp2SchemaOp(ms_primc.get()) : nullptr;
 }
 
+std::unique_ptr<schema::PrimitiveT> ScatterNdUpdatePrimitiveCreator(const AnfNodePtr &node) {
+  auto ms_primc = GetValueNode<std::shared_ptr<mindspore::ops::ScatterNdUpdate>>(node);
+  return ms_primc != nullptr ? ops::MSOp2SchemaOp(ms_primc.get()) : nullptr;
+}
+
 RegistryMSOps g_absPrimitiveCreatorRegistry("Abs", AbsPrimitiveCreator);
 RegistryMSOps g_absGradPrimitiveCreatorRegistry("AbsGrad", AbsGradPrimitiveCreator);
 RegistryMSOps g_activationPrimitiveCreatorRegistry("Activation", ActivationPrimitiveCreator);
@@ -1034,6 +1039,7 @@ RegistryMSOps g_TensorArrayReadCreatorRegistry("TensorArrayRead", TensorArrayRea
 RegistryMSOps g_TensorArrayWriteCreatorRegistry("TensorArrayWrite", TensorArrayWritePrimitiveCreator);
 RegistryMSOps g_AffineCreatorRegistry("Affine", AffinePrimitiveCreator);
 RegistryMSOps g_AttentionCreatorRegistry("Attention", AttentionPrimitiveCreator);
+RegistryMSOps g_ScatterNdUpdateCreatorRegistry("ScatterNdUpdate", ScatterNdUpdatePrimitiveCreator);
 
 std::unique_ptr<schema::PrimitiveT> CustomPrimitiveCreator(const AnfNodePtr &node) {
   auto ms_primc = GetValueNode<std::shared_ptr<mindspore::ops::Custom>>(node);
diff --git a/mindspore/lite/src/ops/populate/adder_populate.cc b/mindspore/lite/src/ops/populate/adder_populate.cc
index 284b632448b..5b41e4f5ae7 100644
--- a/mindspore/lite/src/ops/populate/adder_populate.cc
+++ b/mindspore/lite/src/ops/populate/adder_populate.cc
@@ -53,8 +53,8 @@ OpParameter *PopulateAdderParameter(const void *prim) {
   param->stride_w_ = static_cast<int>(*(stride->begin() + 1));
   param->pad_u_ = static_cast<int>(*(pad_list->begin()));
   param->pad_d_ = static_cast<int>(*(pad_list->begin() + 1));
-  param->pad_l_ = static_cast<int>(*(pad_list->begin() + 2));
-  param->pad_r_ = static_cast<int>(*(pad_list->begin() + 3));
+  param->pad_l_ = static_cast<int>(*(pad_list->begin() + kOffsetTwo));
+  param->pad_r_ = static_cast<int>(*(pad_list->begin() + kOffsetThree));
   param->dilation_h_ = static_cast<int>(*(dilation->begin()));
   param->dilation_w_ = static_cast<int>(*(dilation->begin() + 1));
   param->input_channel_ = static_cast<int>(value->in_channel());
diff --git a/mindspore/lite/src/ops/populate/conv2d_populate.cc b/mindspore/lite/src/ops/populate/conv2d_populate.cc
index ab61ea062c8..ceec07cb670 100644
--- a/mindspore/lite/src/ops/populate/conv2d_populate.cc
+++ b/mindspore/lite/src/ops/populate/conv2d_populate.cc
@@ -20,7 +20,6 @@ using mindspore::schema::PrimitiveType_Conv2DFusion;
 
 namespace mindspore {
 namespace lite {
-constexpr auto kMinShapeSize = 2;
 OpParameter *PopulateConvParameter(const void *prim) {
   auto primitive = static_cast<const schema::Primitive *>(prim);
   MS_ASSERT(primitive != nullptr);
@@ -47,7 +46,8 @@ OpParameter *PopulateConvParameter(const void *prim) {
     free(param);
     return nullptr;
   }
-  if (kernel_size->size() < kMinShapeSize || stride->size() < kMinShapeSize || dilation->size() < kMinShapeSize) {
+  if (kernel_size->size() < kMinShapeSizeTwo || stride->size() < kMinShapeSizeTwo ||
+      dilation->size() < kMinShapeSizeTwo) {
     MS_LOG(ERROR) << "Invalid shape size!kernel_size size: " << kernel_size->size()
                   << ", stride size: " << stride->size() << ", dilation size: " << dilation->size();
     free(param);
@@ -68,7 +68,7 @@ OpParameter *PopulateConvParameter(const void *prim) {
     default:
       param->pad_mode_ = Pad_pad;
   }
-  if (pad_list == nullptr || pad_list->size() < 4) {
+  if (pad_list == nullptr || pad_list->size() < kMinShapeSizeFour) {
     param->pad_u_ = 0;
     param->pad_d_ = 0;
     param->pad_l_ = 0;
@@ -76,8 +76,8 @@ OpParameter *PopulateConvParameter(const void *prim) {
   } else {
     param->pad_u_ = static_cast<int>(*(pad_list->begin()));
     param->pad_d_ = static_cast<int>(*(pad_list->begin() + 1));
-    param->pad_l_ = static_cast<int>(*(pad_list->begin() + 2));
-    param->pad_r_ = static_cast<int>(*(pad_list->begin() + 3));
+    param->pad_l_ = static_cast<int>(*(pad_list->begin() + kOffsetTwo));
+    param->pad_r_ = static_cast<int>(*(pad_list->begin() + kOffsetThree));
   }
   param->dilation_h_ = static_cast<int>(*(dilation->begin()));
   param->dilation_w_ = static_cast<int>(*(dilation->begin() + 1));
diff --git a/mindspore/lite/src/ops/populate/deconv2d_populate.cc b/mindspore/lite/src/ops/populate/deconv2d_populate.cc
index 1e6a8328f12..f9bd06890e2 100644
--- a/mindspore/lite/src/ops/populate/deconv2d_populate.cc
+++ b/mindspore/lite/src/ops/populate/deconv2d_populate.cc
@@ -20,7 +20,6 @@ using mindspore::schema::PrimitiveType_Conv2dTransposeFusion;
 
 namespace mindspore {
 namespace lite {
-constexpr auto kMinShapeSize = 2;
 OpParameter *PopulateDeconvParameter(const void *prim) {
   auto primitive = static_cast<const schema::Primitive *>(prim);
   MS_ASSERT(primitive != nullptr);
@@ -48,7 +47,8 @@ OpParameter *PopulateDeconvParameter(const void *prim) {
     free(param);
     return nullptr;
   }
-  if (kernel_size->size() < kMinShapeSize || stride->size() < kMinShapeSize || dilation->size() < kMinShapeSize) {
+  if (kernel_size->size() < kMinShapeSizeTwo || stride->size() < kMinShapeSizeTwo ||
+      dilation->size() < kMinShapeSizeTwo) {
     MS_LOG(ERROR) << "Invalid shape size!kernel_size size: " << kernel_size->size()
                   << ", stride size: " << stride->size() << ", dilation size: " << dilation->size()
                   << ", output_paddings size:" << output_paddings->size();
@@ -72,7 +72,7 @@ OpParameter *PopulateDeconvParameter(const void *prim) {
     default:
       param->pad_mode_ = Pad_pad;
   }
-  if (pad_list == nullptr || pad_list->size() < 4) {
+  if (pad_list == nullptr || pad_list->size() < kMinShapeSizeFour) {
     param->pad_u_ = 0;
     param->pad_d_ = 0;
     param->pad_l_ = 0;
@@ -80,8 +80,8 @@ OpParameter *PopulateDeconvParameter(const void *prim) {
   } else {
     param->pad_u_ = static_cast<int>(*(pad_list->begin()));
     param->pad_d_ = static_cast<int>(*(pad_list->begin() + 1));
-    param->pad_l_ = static_cast<int>(*(pad_list->begin() + 2));
-    param->pad_r_ = static_cast<int>(*(pad_list->begin() + 3));
+    param->pad_l_ = static_cast<int>(*(pad_list->begin() + kOffsetTwo));
+    param->pad_r_ = static_cast<int>(*(pad_list->begin() + kOffsetThree));
   }
   param->dilation_h_ = static_cast<int>(*(dilation->begin()));
   param->dilation_w_ = static_cast<int>(*(dilation->begin() + 1));
diff --git a/mindspore/lite/src/ops/populate/detection_post_process_populate.cc b/mindspore/lite/src/ops/populate/detection_post_process_populate.cc
index e8526010db0..9ff37e4082b 100644
--- a/mindspore/lite/src/ops/populate/detection_post_process_populate.cc
+++ b/mindspore/lite/src/ops/populate/detection_post_process_populate.cc
@@ -19,7 +19,6 @@ using mindspore::schema::PrimitiveType_DetectionPostProcess;
 
 namespace mindspore {
 namespace lite {
-constexpr auto kScaleMinSize = 4;
 OpParameter *PopulateDetectionPostProcessParameter(const void *prim) {
   auto primitive = static_cast<const schema::Primitive *>(prim);
   MS_ASSERT(primitive != nullptr);
@@ -43,15 +42,15 @@ OpParameter *PopulateDetectionPostProcessParameter(const void *prim) {
     free(param);
     return nullptr;
   }
-  if (scale->size() < kScaleMinSize) {
+  if (scale->size() < kMinShapeSizeFour) {
     MS_LOG(ERROR) << "Invalid scale shape size " << scale->size();
     free(param);
     return nullptr;
   }
   param->h_scale_ = *(scale->begin());
   param->w_scale_ = *(scale->begin() + 1);
-  param->x_scale_ = *(scale->begin() + 2);
-  param->y_scale_ = *(scale->begin() + 3);
+  param->x_scale_ = *(scale->begin() + kOffsetTwo);
+  param->y_scale_ = *(scale->begin() + kOffsetThree);
   param->nms_iou_threshold_ = value->nms_iou_threshold();
   param->nms_score_threshold_ = value->nms_score_threshold();
   param->max_detections_ = value->max_detections();
diff --git a/mindspore/lite/src/ops/populate/pooling_populate.cc b/mindspore/lite/src/ops/populate/pooling_populate.cc
index 29adecdecaa..8b2933aa85b 100644
--- a/mindspore/lite/src/ops/populate/pooling_populate.cc
+++ b/mindspore/lite/src/ops/populate/pooling_populate.cc
@@ -20,10 +20,6 @@ using mindspore::schema::PrimitiveType_MaxPoolFusion;
 
 namespace mindspore {
 namespace lite {
-constexpr size_t kMinShapeSize = 2;
-constexpr size_t kMinPadSize = 4;
-constexpr int kOffsetTwo = 2;
-constexpr int kOffsetThree = 3;
 OpParameter *PopulateAvgPoolParameter(const void *primitive) {
   auto pooling_prim = static_cast<const schema::Primitive *>(primitive);
   MS_ASSERT(pooling_prim != nullptr);
@@ -44,7 +40,7 @@ OpParameter *PopulateAvgPoolParameter(const void *primitive) {
   param->pool_mode_ = PoolMode_AvgPool;
   param->global_ = value->global();
   auto strides = value->strides();
-  if (strides == nullptr || strides->size() < kMinShapeSize) {
+  if (strides == nullptr || strides->size() < kMinShapeSizeTwo) {
     MS_LOG(ERROR) << "strides is invalid!";
     free(param);
     return nullptr;
@@ -52,7 +48,7 @@ OpParameter *PopulateAvgPoolParameter(const void *primitive) {
   param->stride_w_ = static_cast<int>(*(strides->begin() + 1));
   param->stride_h_ = static_cast<int>(*(strides->begin()));
   auto pad = value->pad();
-  if (pad != nullptr && pad->size() >= kMinPadSize) {
+  if (pad != nullptr && pad->size() >= kMinShapeSizeFour) {
     param->pad_u_ = static_cast<int>(*(pad->begin()));
     param->pad_d_ = static_cast<int>(*(pad->begin() + 1));
     param->pad_l_ = static_cast<int>(*(pad->begin() + kOffsetTwo));
@@ -60,7 +56,7 @@ OpParameter *PopulateAvgPoolParameter(const void *primitive) {
   }
   if (!param->global_) {
     auto kernel_size = value->kernel_size();
-    if (kernel_size == nullptr || kernel_size->size() < kMinShapeSize) {
+    if (kernel_size == nullptr || kernel_size->size() < kMinShapeSizeTwo) {
       MS_LOG(ERROR) << "kernel_size is invalid";
       free(param);
       return nullptr;
@@ -126,8 +122,8 @@ OpParameter *PopulateMaxPoolParameter(const void *primitive) {
   if (!param->global_) {
     auto kernel_size = value->kernel_size();
     auto strides = value->strides();
-    if (kernel_size == nullptr || strides == nullptr || kernel_size->size() < kMinShapeSize ||
-        strides->size() < kMinShapeSize) {
+    if (kernel_size == nullptr || strides == nullptr || kernel_size->size() < kMinShapeSizeTwo ||
+        strides->size() < kMinShapeSizeTwo) {
       MS_LOG(ERROR) << "kernel_size or strides is invalid";
       free(param);
       return nullptr;
@@ -137,7 +133,7 @@ OpParameter *PopulateMaxPoolParameter(const void *primitive) {
     param->stride_w_ = static_cast<int>(*(strides->begin() + 1));
     param->stride_h_ = static_cast<int>(*(strides->begin()));
     auto pad = value->pad();
-    if (pad != nullptr && pad->size() >= kMinPadSize) {
+    if (pad != nullptr && pad->size() >= kMinShapeSizeFour) {
       param->pad_u_ = static_cast<int>(*(pad->begin()));
       param->pad_d_ = static_cast<int>(*(pad->begin() + 1));
       param->pad_l_ = static_cast<int>(*(pad->begin() + kOffsetTwo));
diff --git a/mindspore/lite/src/ops/populate/populate_register.h b/mindspore/lite/src/ops/populate/populate_register.h
index 0537156cee2..1f248395f99 100644
--- a/mindspore/lite/src/ops/populate/populate_register.h
+++ b/mindspore/lite/src/ops/populate/populate_register.h
@@ -27,6 +27,10 @@
 
 namespace mindspore {
 namespace lite {
+constexpr int kOffsetTwo = 2;
+constexpr int kOffsetThree = 3;
+constexpr size_t kMinShapeSizeTwo = 2;
+constexpr size_t kMinShapeSizeFour = 4;
 typedef OpParameter *(*ParameterGen)(const void *prim);
 
 class PopulateRegistry {
diff --git a/mindspore/lite/src/ops/populate/prior_box_populate.cc b/mindspore/lite/src/ops/populate/prior_box_populate.cc
index c16d21cbe2c..f23ab7364d4 100644
--- a/mindspore/lite/src/ops/populate/prior_box_populate.cc
+++ b/mindspore/lite/src/ops/populate/prior_box_populate.cc
@@ -47,7 +47,7 @@ OpParameter *PopulatePriorBoxParameter(const void *prim) {
     free(param);
     return nullptr;
   }
-  param->min_sizes_size = min_sizes->size();
+  param->min_sizes_size = static_cast<int32_t>(min_sizes->size());
   memcpy(param->min_sizes, min_sizes->data(), min_sizes->size() * sizeof(int32_t));
 
   auto max_sizes = value->max_sizes();
@@ -61,7 +61,7 @@ OpParameter *PopulatePriorBoxParameter(const void *prim) {
     free(param);
     return nullptr;
   }
-  param->max_sizes_size = max_sizes->size();
+  param->max_sizes_size = static_cast<int32_t>(max_sizes->size());
   memcpy(param->max_sizes, max_sizes->data(), max_sizes->size() * sizeof(int32_t));
 
   auto aspect_ratios = value->aspect_ratios();
@@ -76,7 +76,7 @@ OpParameter *PopulatePriorBoxParameter(const void *prim) {
     free(param);
     return nullptr;
   }
-  param->aspect_ratios_size = aspect_ratios->size();
+  param->aspect_ratios_size = static_cast<int32_t>(aspect_ratios->size());
   memcpy(param->aspect_ratios, aspect_ratios->data(), aspect_ratios->size() * sizeof(float));
 
   auto variances = value->variances();
diff --git a/mindspore/lite/src/ops/populate/split_populate.cc b/mindspore/lite/src/ops/populate/split_populate.cc
index b2f9b9603c3..c93a42f6dcd 100644
--- a/mindspore/lite/src/ops/populate/split_populate.cc
+++ b/mindspore/lite/src/ops/populate/split_populate.cc
@@ -37,20 +37,20 @@ OpParameter *PopulateSplitParameter(const void *prim) {
 
   param->op_parameter_.type_ = primitive->value_type();
   param->num_split_ = value->output_num();
-  if (param->num_split_ > std::numeric_limits<int>::max() / static_cast<int>(sizeof(int)) || param->num_split_ < 0) {
-    MS_LOG(ERROR) << "The value of param->num_split_ is too big";
+  if (param->num_split_ > std::numeric_limits<int>::max() / static_cast<int>(sizeof(int)) || param->num_split_ <= 0) {
+    MS_LOG(ERROR) << "The value of param->num_split_ is not correct";
     free(param);
     return nullptr;
   }
 
   /* free split_sizes_ in split op base */
-  param->split_sizes_ = reinterpret_cast<int *>(malloc(param->num_split_ * sizeof(int)));
+  param->split_sizes_ = reinterpret_cast<int *>(malloc(static_cast<size_t>(param->num_split_) * sizeof(int)));
   if (param->split_sizes_ == nullptr) {
     MS_LOG(ERROR) << "malloc param split_sizes_ error";
     free(param);
     return nullptr;
   }
-  memset(param->split_sizes_, 0, param->num_split_ * sizeof(int));
+  memset(param->split_sizes_, 0, static_cast<size_t>(param->num_split_) * sizeof(int));
   auto split_sizes_vector_ = value->size_splits();
   if (split_sizes_vector_ != nullptr && split_sizes_vector_->size() <= static_cast<uint32_t>(param->num_split_)) {
     int i = 0;
diff --git a/mindspore/lite/src/ops/populate/v0/split_populate_v0.cc b/mindspore/lite/src/ops/populate/v0/split_populate_v0.cc
index 3f14f6832d1..d96635c063c 100644
--- a/mindspore/lite/src/ops/populate/v0/split_populate_v0.cc
+++ b/mindspore/lite/src/ops/populate/v0/split_populate_v0.cc
@@ -37,18 +37,19 @@ OpParameter *PopulateSplitParameter(const void *prim) {
   memset(split_param, 0, sizeof(SplitParameter));
   split_param->op_parameter_.type_ = schema::PrimitiveType_Split;
   split_param->num_split_ = split_prim->numberSplit();
-  if (split_param->num_split_ > std::numeric_limits<int>::max() / static_cast<int>(sizeof(int))) {
-    MS_LOG(ERROR) << "The value of split_param->num_split_ is too big";
+  if (split_param->num_split_ > std::numeric_limits<int>::max() / static_cast<int>(sizeof(int)) ||
+      split_param->num_split_ <= 0) {
+    MS_LOG(ERROR) << "The value of split_param->num_split_ is out of range.";
     free(split_param);
     return nullptr;
   }
-  int *split_sizes = reinterpret_cast<int *>(malloc(split_param->num_split_ * sizeof(int)));
+  int *split_sizes = reinterpret_cast<int *>(malloc(static_cast<size_t>(split_param->num_split_) * sizeof(int)));
   if (split_sizes == nullptr) {
     MS_LOG(ERROR) << "malloc split size of SplitParameter failed.";
     free(split_param);
     return nullptr;
   }
-  memset(split_sizes, 0, split_param->num_split_ * sizeof(int));
+  memset(split_sizes, 0, static_cast<size_t>(split_param->num_split_) * sizeof(int));
   split_param->split_sizes_ = split_sizes;
   auto split_sizes_vector_ = split_prim->sizeSplits();
   if (split_sizes_vector_ != nullptr) {
diff --git a/mindspore/lite/src/registry/register_kernel.cc b/mindspore/lite/src/registry/register_kernel.cc
index 2bdf48c9249..07743fa677e 100644
--- a/mindspore/lite/src/registry/register_kernel.cc
+++ b/mindspore/lite/src/registry/register_kernel.cc
@@ -29,9 +29,5 @@ int RegisterKernel::RegKernel(const std::string &arch, const std::string &provid
                               CreateKernel creator) {
   return lite::RegistryKernelImpl::GetInstance()->RegKernel(arch, provider, data_type, op_type, creator);
 }
-
-CreateKernel RegisterKernel::GetCreator(const schema::Primitive *primitive, kernel::KernelDesc *desc) {
-  return lite::RegistryKernelImpl::GetInstance()->GetProviderCreator(primitive, desc);
-}
 }  // namespace kernel
 }  // namespace mindspore
diff --git a/mindspore/lite/src/registry/register_kernel_impl.h b/mindspore/lite/src/registry/register_kernel_impl.h
index 1fbe6c58b66..508ccd6fb6b 100644
--- a/mindspore/lite/src/registry/register_kernel_impl.h
+++ b/mindspore/lite/src/registry/register_kernel_impl.h
@@ -24,6 +24,7 @@
 #include <vector>
 #include <set>
 #include "include/registry/register_kernel.h"
+#include "src/registry/register_utils.h"
 
 using mindspore::schema::PrimitiveType_MAX;
 using mindspore::schema::PrimitiveType_MIN;
diff --git a/mindspore/lite/src/registry/register_utils.cc b/mindspore/lite/src/registry/register_utils.cc
new file mode 100644
index 00000000000..b6b0231927a
--- /dev/null
+++ b/mindspore/lite/src/registry/register_utils.cc
@@ -0,0 +1,25 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/registry/register_utils.h"
+#include "src/registry/register_kernel_impl.h"
+
+namespace mindspore {
+namespace kernel {
+CreateKernel RegisterUtils::GetCreator(const schema::Primitive *primitive, kernel::KernelDesc *desc) {
+  return lite::RegistryKernelImpl::GetInstance()->GetProviderCreator(primitive, desc);
+}
+}  // namespace kernel
+}  // namespace mindspore
diff --git a/mindspore/lite/src/registry/register_utils.h b/mindspore/lite/src/registry/register_utils.h
new file mode 100644
index 00000000000..2a0a9746eca
--- /dev/null
+++ b/mindspore/lite/src/registry/register_utils.h
@@ -0,0 +1,59 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_REGISTRY_REGISTER_UTILS_H_
+#define MINDSPORE_LITE_SRC_REGISTRY_REGISTER_UTILS_H_
+#include <string>
+#include "include/registry/register_kernel.h"
+#include "schema/model_generated.h"
+#include "ir/dtype/type_id.h"
+
+namespace mindspore {
+namespace kernel {
+/// \brief KernelDesc defined kernel's basic attribute.
+struct KernelDesc {
+  TypeId data_type;     /**< kernel data type argument */
+  int type;             /**< op type argument */
+  std::string arch;     /**< deviceType argument */
+  std::string provider; /**< user identification argument */
+
+  bool operator<(const KernelDesc &dst) const {
+    if (provider != dst.provider) {
+      return provider < dst.provider;
+    } else if (arch != dst.arch) {
+      return arch < dst.arch;
+    } else if (data_type != dst.data_type) {
+      return data_type < dst.data_type;
+    } else {
+      return type < dst.type;
+    }
+  }
+};
+
+/// \brief RegisterKernel Defined registration of kernel.
+class RegisterUtils {
+ public:
+  /// \brief Static methon to get a kernel's create function.
+  ///
+  /// \param[in] desc Define kernel's basic attribute.
+  /// \param[in] primitive Define the attributes of op.
+  ///
+  /// \return Function pointer to create a kernel.
+  static CreateKernel GetCreator(const schema::Primitive *primitive, kernel::KernelDesc *desc);
+};
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_LITE_SRC_REGISTRY_REGISTER_UTILS_H_
diff --git a/mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.cc b/mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.cc
index 3507e3dcb01..dbc917a4d40 100644
--- a/mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.cc
+++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.cc
@@ -108,12 +108,15 @@ void *OpenCLAllocator::CreateImage2D(size_t size, const ImageSize &img_size, voi
   }
   if (*image == nullptr) {
     delete *buffer;
+    *buffer = nullptr;
     MS_LOG(ERROR) << "Create OpenCL Image2D failed! (ERROR CODE: " << mindspore::kernel::CLErrorCode(ret) << ")";
     return nullptr;
   }
   if (ret != CL_SUCCESS) {
     delete *buffer;
     delete *image;
+    *buffer = nullptr;
+    *image = nullptr;
     MS_LOG(ERROR) << "Create OpenCL Image2D  (ERROR CODE: " << mindspore::kernel::CLErrorCode(ret) << ")";
     return nullptr;
   }
@@ -125,6 +128,8 @@ void *OpenCLAllocator::CreateImage2D(size_t size, const ImageSize &img_size, voi
     if (host_ptr == nullptr) {
       delete *buffer;
       delete *image;
+      *buffer = nullptr;
+      *image = nullptr;
       MS_LOG(ERROR) << "Map image failed, can not found image :" << *image << ", host_ptr=" << host_ptr;
       return nullptr;
     }
diff --git a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc
index c47847c5998..4bac5664132 100644
--- a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc
+++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc
@@ -210,6 +210,7 @@ int OpenCLRuntime::InitQueue(std::vector<cl::Platform> *platforms) {
 #endif
   if (context_ == nullptr || ret != CL_SUCCESS) {
     delete device_;
+    device_ = nullptr;
     MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(ret);
     return RET_ERROR;
   }
@@ -218,6 +219,8 @@ int OpenCLRuntime::InitQueue(std::vector<cl::Platform> *platforms) {
   if (default_command_queue_ == nullptr || ret != CL_SUCCESS) {
     delete device_;
     delete context_;
+    device_ = nullptr;
+    context_ = nullptr;
     MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(ret);
     return RET_ERROR;
   }
@@ -227,6 +230,9 @@ int OpenCLRuntime::InitQueue(std::vector<cl::Platform> *platforms) {
     delete device_;
     delete context_;
     delete default_command_queue_;
+    device_ = nullptr;
+    context_ = nullptr;
+    default_command_queue_ = nullptr;
     MS_LOG(ERROR) << "Profiling command Queue create failed: " << CLErrorCode(ret);
     return RET_ERROR;
   }
@@ -291,6 +297,10 @@ int OpenCLRuntime::Init() {
     delete context_;
     delete default_command_queue_;
     delete profiling_command_queue_;
+    device_ = nullptr;
+    context_ = nullptr;
+    default_command_queue_ = nullptr;
+    profiling_command_queue_ = nullptr;
     MS_LOG(ERROR) << "Command OpenCL allocator failed!";
     return RET_ERROR;
   }
@@ -305,7 +315,9 @@ int OpenCLRuntime::Uninit() {
   if (init_state_ != InitSuccess) {
     return RET_OK;
   }
-  StoreCache();
+  if (StoreCache() != RET_OK) {
+    MS_LOG(ERROR) << "StoreCache failed!";
+  }
   program_map_.clear();
   delete default_command_queue_;
   delete profiling_command_queue_;
@@ -574,12 +586,15 @@ void *OpenCLRuntime::MapBuffer(const cl::Buffer &buffer, int flags, size_t size,
 
 int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::CommandQueue *command_queue, bool sync) const {
   if (GetSVMCapabilities() & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) {
-    return RET_OK;
+    return RET_ERROR;
   }
   if (command_queue == nullptr) {
     command_queue = default_command_queue_;
   }
-  return clEnqueueSVMMap(command_queue->get(), sync, flags, host_ptr, size, 0, nullptr, nullptr);
+  if (clEnqueueSVMMap(command_queue->get(), sync, flags, host_ptr, size, 0, nullptr, nullptr) != CL_SUCCESS) {
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector<size_t> &region,
@@ -720,17 +735,17 @@ void OpenCLRuntime::LoadCache() {
   MS_LOG(INFO) << "Init opencl cache success";
 }
 
-void OpenCLRuntime::StoreCache() {
+int OpenCLRuntime::StoreCache() {
   if (!enable_cache_) {
-    return;
+    return RET_OK;
   }
   if (!flush_cache_) {
-    return;
+    return RET_OK;
   }
   auto fbb = std::make_unique<flatbuffers::FlatBufferBuilder>();
   if (fbb == nullptr) {
     MS_LOG(ERROR) << "new opencl FlatBufferBuilder fail";
-    return;
+    return RET_ERROR;
   }
   std::vector<flatbuffers::Offset<schema::ProgramBinary>> program_binarys;
   for (const auto &kv : program_map_) {
@@ -753,8 +768,12 @@ void OpenCLRuntime::StoreCache() {
   auto gpu_cache = schema::CreateGpuCache(*fbb, name, version, data);
   fbb->Finish(gpu_cache);
   uint8_t *buf = fbb->GetBufferPointer();
-  WriteToBin(cache_path_, reinterpret_cast<void *>(buf), fbb->GetSize());
+  if (WriteToBin(cache_path_, reinterpret_cast<void *>(buf), fbb->GetSize()) != RET_OK) {
+    MS_LOG(ERROR) << "WriteToBin failed.";
+    return RET_ERROR;
+  }
   MS_LOG(INFO) << "store opencl cache ok, size=" << fbb->GetSize();
+  return RET_OK;
 }
 
 cl::Buffer *OpenCLRuntime::CreateSharedMemoryBuffer(size_t size, void *host_ptr) {
diff --git a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h
index 788be5ea97b..024b7b70456 100644
--- a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h
+++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h
@@ -203,7 +203,7 @@ class OpenCLRuntime {
   // for cache
  private:
   void LoadCache();
-  void StoreCache();
+  int StoreCache();
 #ifdef MS_OPENCL_BINARY_CACHE
   bool enable_cache_{true};
 #else
diff --git a/mindspore/lite/src/runtime/infer_manager.cc b/mindspore/lite/src/runtime/infer_manager.cc
index 3b10f0b0e8b..bb2720ee651 100644
--- a/mindspore/lite/src/runtime/infer_manager.cc
+++ b/mindspore/lite/src/runtime/infer_manager.cc
@@ -71,6 +71,12 @@ int KernelInferShape(const std::vector<lite::Tensor *> &inputs, const std::vecto
     MS_LOG(ERROR) << "No input!";
     return RET_ERROR;
   }
+#ifndef ENABLE_CONTROL_TENSORLIST
+  if (parameter->type_ == schema::PrimitiveType_Switch) {
+    MS_LOG(ERROR) << unsupport_control_tensorlist_log;
+    return RET_ERROR;
+  }
+#endif
   std::vector<TensorC *> in_tensors;
   std::vector<TensorC *> out_tensors;
   if (parameter->type_ == schema::PrimitiveType_PartialFusion || parameter->type_ == schema::PrimitiveType_Switch ||
@@ -101,6 +107,7 @@ int KernelInferShape(const std::vector<lite::Tensor *> &inputs, const std::vecto
     if (out_tensors.at(i) == nullptr) {
       continue;
     }
+#ifdef ENABLE_CONTROL_TENSORLIST
     if (reinterpret_cast<TensorListC *>(out_tensors.at(i))->data_type_ == TypeIdC::kObjectTypeTensorType) {
       auto *tensor_list_c = reinterpret_cast<TensorListC *>(out_tensors.at(i));
       auto *tensor_list = reinterpret_cast<TensorList *>(outputs.at(i));
@@ -112,8 +119,11 @@ int KernelInferShape(const std::vector<lite::Tensor *> &inputs, const std::vecto
       tensor_list->MallocTensorListData(static_cast<TypeId>(tensor_list_c->data_type_), tensor_shape);
       TensorListC2TensorList(tensor_list_c, tensor_list);
     } else {
+#endif
       TensorC2Tensor(out_tensors.at(i), outputs.at(i));
+#ifdef ENABLE_CONTROL_TENSORLIST
     }
+#endif
     if (ret == NNACL_INFER_INVALID) {
       outputs.at(i)->set_shape({-1});
     }
diff --git a/mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt b/mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt
index 1d74594c9df..be4c29cf375 100644
--- a/mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt
+++ b/mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt
@@ -4,8 +4,25 @@ file(GLOB KERNEL_SRC
     ${CMAKE_CURRENT_SOURCE_DIR}/base/*.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/fp32/*.cc
     ${CMAKE_CURRENT_SOURCE_DIR}/int8/*.cc
-    ${CMAKE_CURRENT_SOURCE_DIR}/string/*.cc
     )
+if(MSLITE_STRING_KERNEL)
+    file(GLOB KERNEL_STRING_SRC
+        ${CMAKE_CURRENT_SOURCE_DIR}/string/*.cc
+        )
+    set(KERNEL_SRC
+        ${KERNEL_SRC}
+        ${KERNEL_STRING_SRC}
+        )
+endif()
+if(MSLITE_CONTROL_TENSORLIST)
+    file(GLOB KERNEL_CONTROL_TENSORLIST
+            ${CMAKE_CURRENT_SOURCE_DIR}/control/*.cc
+            )
+    set(KERNEL_SRC
+            ${KERNEL_SRC}
+            ${KERNEL_CONTROL_TENSORLIST}
+            )
+endif()
 list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc)
 
 if(SUPPORT_TRAIN)
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/carry_data.cc b/mindspore/lite/src/runtime/kernel/arm/base/carry_data.cc
index 93d4fa2b4dc..fef89f2c486 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/carry_data.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/carry_data.cc
@@ -19,6 +19,7 @@
 #include "src/tensorlist.h"
 
 using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_NOT_SUPPORT;
 using mindspore::lite::RET_OK;
 
 namespace mindspore::kernel {
@@ -44,9 +45,14 @@ int CarryDataKernel::MoveData(const std::vector<lite::Tensor *>::iterator &dst_b
       MS_LOG(ERROR) << "Carry const data and graph inputs.";
     } else {
       if (src_tensor->data_type() == kObjectTypeTensorType && dst_tensor->data_type() == kObjectTypeTensorType) {
+#ifdef ENABLE_CONTROL_TENSORLIST
         MS_LOG(ERROR) << "Carry MoveTensorListData";
         ret = MoveTensorListData(reinterpret_cast<lite::TensorList *>(dst_tensor),
                                  reinterpret_cast<lite::TensorList *>(src_tensor));
+#else
+        MS_LOG(ERROR) << unsupport_control_tensorlist_log;
+        return RET_NOT_SUPPORT;
+#endif
       } else {
         MS_LOG(ERROR) << "Carry MoveTensorData";
         ret = MoveTensorData(dst_tensor, src_tensor);
@@ -81,7 +87,7 @@ int CarryDataKernel::MoveTensorData(lite::Tensor *dst_tensor, lite::Tensor *src_
   memcpy(dst_tensor->data(), src_tensor->data(), src_tensor->Size());
   return RET_OK;
 }
-
+#ifdef ENABLE_CONTROL_TENSORLIST
 int CarryDataKernel::MoveTensorListData(lite::TensorList *dst_tensorlist, lite::TensorList *src_tensorlist) {
   // shape may change, because tensors.size() can be change in RunGraph
   if (dst_tensorlist->data_type() != src_tensorlist->data_type() ||
@@ -126,4 +132,5 @@ int CarryDataKernel::MoveTensorListData(lite::TensorList *dst_tensorlist, lite::
   }
   return RET_OK;
 }
+#endif
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/carry_data.h b/mindspore/lite/src/runtime/kernel/arm/base/carry_data.h
index 51462939b35..1a5f47fa30e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/carry_data.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/carry_data.h
@@ -35,7 +35,9 @@ class CarryDataKernel : public InnerKernel {
                const std::vector<lite::Tensor *>::iterator &src_begin,
                const std::vector<lite::Tensor *>::iterator &src_limit);
   int MoveTensorData(lite::Tensor *dst_tensor, lite::Tensor *src_tensor);
+#ifdef ENABLE_CONTROL_TENSORLIST
   int MoveTensorListData(lite::TensorList *dst_tensorlist, lite::TensorList *src_tensorlist);
+#endif
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc
index ac5c247c713..3448f500547 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc
@@ -18,7 +18,6 @@
 #include <cfloat>
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
-#include "include/errorcode.h"
 
 using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_ERROR;
@@ -47,7 +46,15 @@ void ConvolutionBaseCPUKernel::FreeAlignedData(void **ptr) {
 }
 
 ConvolutionBaseCPUKernel::~ConvolutionBaseCPUKernel() {
-  if (bias_data_ != nullptr) {
+  if (addr_map.find(reinterpret_cast<uintptr_t>(packed_weight_)) != addr_map.end()) {
+    FreeAlignedData(reinterpret_cast<void **>(&packed_weight_));
+  } else if (packed_weight_ != nullptr) {
+    free(packed_weight_);
+    packed_weight_ = nullptr;
+  }
+  if (addr_map.find(reinterpret_cast<uintptr_t>(bias_data_)) != addr_map.end()) {
+    FreeAlignedData(reinterpret_cast<void **>(&bias_data_));
+  } else if (bias_data_ != nullptr) {
     free(bias_data_);
     bias_data_ = nullptr;
   }
@@ -110,6 +117,45 @@ int ConvolutionBaseCPUKernel::Init() {
   return RET_OK;
 }
 
+int ConvolutionBaseCPUKernel::InitConvWeightBias() {
+  auto weight_tensor = in_tensors_.at(kWeightIndex);
+  auto shape = weight_tensor->shape();
+  if (std::find(shape.begin(), shape.end(), -1) != shape.end()) {
+    MS_LOG(WARNING) << "The shape of weight tensor is not ready, the weight and bias would be inited in runtime.";
+    return lite::RET_OK;
+  }
+  if (MallocWeightBiasData() != RET_OK) {
+    MS_LOG(ERROR) << "Malloc data for bias and weight failed.";
+    return lite::RET_ERROR;
+  }
+
+  if (in_tensors_.size() == kInputSize2) {
+    memcpy(bias_data_, origin_bias_, in_tensors_.at(kBiasIndex)->Size());
+  } else {
+    MS_ASSERT(in_tensors_.size() == kInputSize1);
+  }
+  if (origin_weight_ != nullptr) {
+    PackWeight();
+  } else {
+    is_repack_ = true;
+    MS_LOG(WARNING) << "The weight is nullptr, will pack in runtime.";
+  }
+  return lite::RET_OK;
+}
+
+int ConvolutionBaseCPUKernel::RepackWeight() {
+  origin_weight_ = origin_weight_ != nullptr ? origin_weight_ : in_tensors_.at(kWeightIndex)->data_c();
+  if (packed_weight_ == nullptr && InitConvWeightBias() != RET_OK) {
+    MS_LOG(ERROR) << "Malloc data for bias and weight failed.";
+    return lite::RET_ERROR;
+  }
+  if (IsRepack() || (IsTrain() && IsTrainable())) {
+    is_repack_ = (IsTrain() && IsTrainable()) ? IsRepack() : false;
+    PackWeight();
+  }
+  return RET_OK;
+}
+
 int ConvolutionBaseCPUKernel::CheckResizeValid() {
   // ===============check in channel================= //
   auto filter_tensor = in_tensors_.at(kWeightIndex);
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h
index c1908f1d39b..2af15f14667 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h
@@ -31,6 +31,7 @@
 #include "include/context.h"
 #include "src/runtime/kernel/arm/base/layout_transform.h"
 #include "src/weight_decoder.h"
+#include "include/errorcode.h"
 
 using mindspore::lite::InnerContext;
 
@@ -38,8 +39,13 @@ namespace mindspore::kernel {
 class ConvolutionBaseCPUKernel : public InnerKernel {
  public:
   ConvolutionBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
-                           const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : InnerKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(op_parameter_->thread_num_) {
+                           const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, void *origin_weight,
+                           void *origin_bias)
+      : InnerKernel(parameter, inputs, outputs, ctx),
+        ctx_(ctx),
+        thread_count_(op_parameter_->thread_num_),
+        origin_weight_(origin_weight),
+        origin_bias_(origin_bias) {
     conv_param_ = reinterpret_cast<ConvParameter *>(op_parameter_);
   }
   ~ConvolutionBaseCPUKernel() override;
@@ -61,8 +67,14 @@ class ConvolutionBaseCPUKernel : public InnerKernel {
   void FreeAlignedData(void **ptr);
 
  protected:
+  int InitConvWeightBias();
+  int RepackWeight();
+
+  virtual int MallocWeightBiasData() { return RET_OK; }
+  virtual void PackWeight() {}
   bool IsRepack() { return is_repack_; }
   std::unordered_map<uintptr_t, void *> addr_map;
+  void *packed_weight_ = nullptr;
   void *bias_data_ = nullptr;
   const InnerContext *ctx_ = nullptr;
   ConvParameter *conv_param_ = nullptr;
@@ -70,6 +82,8 @@ class ConvolutionBaseCPUKernel : public InnerKernel {
   int tile_num_ = 0;
   int thread_count_ = 1;
   bool is_repack_ = false;
+  void *origin_weight_;  // do not free
+  void *origin_bias_;    // do not free
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.cc
index 153f50e5ab9..35b1f97596d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.cc
@@ -130,6 +130,7 @@ int GroupConvolutionBaseCPUKernel::PreProcess() {
       MS_LOG(ERROR) << "group conv out tensor malloc data failed.";
       return ret;
     }
+    output->ResetRefCount();
   }
   return RET_OK;
 }
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.h b/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.h
index 3d0e065333b..3dc41306d13 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.h
@@ -31,7 +31,7 @@ class GroupConvolutionBaseCPUKernel : public ConvolutionBaseCPUKernel {
   GroupConvolutionBaseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                 const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                 GroupConvCreator *group_conv_creator, const int group_num)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx),
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr),
         group_conv_creator_(group_conv_creator),
         group_num_(group_num) {}  // opParameter(in channel, out channel) in this kernel has been split to groups, if
                                   // you want to get real params, multiply in channel / out channel with group num
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc b/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc
index cb8cfdb648a..29c0f1066f3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc
@@ -83,7 +83,7 @@ int QuantDTypeCastCPUKernel::QuantDTypeCast(int task_id) {
     (!out_tensors_.front()->quant_params().empty() && out_tensors_.front()->quant_params().front().inited)
       ? out_tensors_.front()->quant_params().front()
       : in_tensors_.front()->quant_params().front();
-  int ret = RET_OK;
+  int ret = RET_ERROR;
   if (src_dtype == TypeId::kNumberTypeInt8 && dst_dtype == TypeId::kNumberTypeFloat32) {
     ret = DoDequantizeInt8ToFp32(int8_ptr_ + thread_offset, float32_ptr_ + thread_offset, quant_arg.scale,
                                  quant_arg.zeroPoint, num_unit_thread);
@@ -195,6 +195,9 @@ int QuantDTypeCastCPUKernel::Run() {
     if (float32_ptr_ == nullptr || uint8_ptr_ == nullptr) {
       return RET_NULL_PTR;
     }
+  } else {
+    MS_LOG(ERROR) << "Not support";
+    return RET_ERROR;
   }
 
   auto ret = ParallelLaunch(this->ms_context_, QuantDTypeCastRun, this, thread_n_num_);
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc
index ec903b96355..5cfa2f1eccd 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc
@@ -13,6 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 #include "src/runtime/kernel/arm/base/reshape_base.h"
 #include "schema/model_generated.h"
 #include "src/kernel_registry.h"
@@ -31,54 +32,27 @@ using mindspore::schema::PrimitiveType_Squeeze;
 using mindspore::schema::PrimitiveType_Unsqueeze;
 
 namespace mindspore::kernel {
-int ReshapeBaseCPUKernel::Init() { return ReSize(); }
+int ReshapeBaseCPUKernel::Run() {
+  auto in_tensor = in_tensors().front();
+  auto out_tensor = out_tensors().front();
 
-int ReshapeBaseCPUKernel::ReSize() {
-  int in_data_size = in_tensors_.front()->Size();
-  int thread_num = op_parameter_->thread_num_;
-  if (thread_num == 0) {
-    MS_LOG(ERROR) << "div zero";
-    return RET_ERROR;
-  }
-  cal_max_num_per_thread_ = UP_DIV(in_data_size, thread_num);
-  return RET_OK;
-}
-
-int ReshapeBaseCPUKernel::RunImpl(int task_id) {
-  size_t start_index = task_id * cal_max_num_per_thread_;
-  if (start_index >= in_tensors_.front()->Size()) {
+  /*
+   * in_tensor : CPU-allocator ;  out_tensor : GPU-allocator
+   * out_tensor data_c can not change
+   * */
+  if (in_tensor->allocator() == nullptr || in_tensor->allocator() != out_tensor->allocator() ||
+      op_parameter_->is_train_session_) {
+    memcpy(out_tensor->data_c(), in_tensor->data_c(), in_tensor->Size());
     return RET_OK;
   }
-  auto cur_in_ptr = input_ptr_ + start_index;
-  auto cur_out_ptr = output_ptr_ + start_index;
 
-  size_t data_size = in_tensors_.front()->Size() - start_index;
-  data_size = data_size > cal_max_num_per_thread_ ? cal_max_num_per_thread_ : data_size;
-  memcpy(cur_out_ptr, cur_in_ptr, data_size);
-  return RET_OK;
-}
+  out_tensor->FreeData();
+  out_tensor->ResetRefCount();
 
-int ReshapeRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
-  auto reshape = reinterpret_cast<ReshapeBaseCPUKernel *>(cdata);
-  auto ret = reshape->RunImpl(task_id);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "ReshapeRun error task_id[" << task_id << "] error_code[" << ret << "]";
-    return ret;
-  }
-  return RET_OK;
-}
+  in_tensor->allocator()->IncRefCount(in_tensor->data(), out_tensor->ref_count());
 
-int ReshapeBaseCPUKernel::Run() {
-  input_ptr_ = reinterpret_cast<uint8_t *>(in_tensors_.at(kInputIndex)->data_c());
-  output_ptr_ = reinterpret_cast<uint8_t *>(out_tensors_.at(kOutputIndex)->data_c());
-  if (input_ptr_ == nullptr || output_ptr_ == nullptr) {
-    return RET_NULL_PTR;
-  }
-  auto ret = ParallelLaunch(this->ms_context_, ReshapeRun, this, op_parameter_->thread_num_);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Reshape run error error_code[" << ret << "]";
-    return ret;
-  }
+  out_tensor->set_data(in_tensor->data_c());
+  out_tensor->set_own_data(in_tensor->own_data());
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h
index 774c8652493..4eb846501f0 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h
+++ b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h
@@ -19,6 +19,8 @@
 #include <vector>
 #include "src/inner_kernel.h"
 #include "include/context.h"
+#include "include/errorcode.h"
+#include "src/runtime/kernel/arm/base/carry_data.h"
 
 using mindspore::lite::InnerContext;
 namespace mindspore::kernel {
@@ -28,16 +30,9 @@ class ReshapeBaseCPUKernel : public InnerKernel {
                        const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
       : InnerKernel(parameter, inputs, outputs, ctx) {}
   ~ReshapeBaseCPUKernel() override = default;
-
-  int Init() override;
-  int ReSize() override;
+  int Init() override { return lite::RET_OK; };
+  int ReSize() override { return lite::RET_OK; };
   int Run() override;
-  int RunImpl(int task_id);
-
- private:
-  size_t cal_max_num_per_thread_ = 0;
-  uint8_t *input_ptr_ = nullptr;
-  uint8_t *output_ptr_ = nullptr;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/slice_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/slice_base.cc
index 2b483d03ebe..c2772782962 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/slice_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/slice_base.cc
@@ -39,8 +39,8 @@ int SliceCPUKernel::ReSize() {
   auto begin_tensor = in_tensors_[1];
   auto size_tensor = in_tensors_[2];
 
-  MS_ASSERT(in_tensor->shape().size() == begin_tensor->ElementsNum());
-  MS_ASSERT(in_tensor->shape().size() == size_tensor->ElementsNum());
+  MS_ASSERT(in_tensor->shape().size() == static_cast<size_t>(begin_tensor->ElementsNum()));
+  MS_ASSERT(in_tensor->shape().size() == static_cast<size_t>(size_tensor->ElementsNum()));
   MS_ASSERT(in_tensor->shape().size() <= DIMENSION_8D);
 
   auto begin = reinterpret_cast<int32_t *>(begin_tensor->data_c());
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.cc
index a24dbf76dcf..49e9e9e4d52 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.cc
@@ -29,6 +29,8 @@ using mindspore::lite::RET_OK;
 
 namespace mindspore::kernel {
 int SoftmaxBaseCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (softmax_param_ == nullptr) {
     MS_LOG(ERROR) << "SoftmaxParameter nullptr";
     return RET_NULL_PTR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc
index 91aa761aab1..c9d6c6ae48c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc
@@ -78,7 +78,7 @@ int StackBaseCPUKernel::Init() {
 }
 
 int StackBaseCPUKernel::Execute(int task_id) {
-  auto output_data = reinterpret_cast<char *>(out_tensors_.at(0)->data_c());
+  auto output_data = reinterpret_cast<void *>(out_tensors_.at(0)->data_c());
   if (output_data == nullptr) {
     return RET_NULL_PTR;
   }
@@ -86,7 +86,7 @@ int StackBaseCPUKernel::Execute(int task_id) {
   auto start = task_id * step;
   auto end = MSMIN(start + step, outer_size_);
   auto input_num = in_tensors_.size();
-  auto output = output_data + input_num * start * copy_size_;
+  auto output = reinterpret_cast<char *>(output_data) + input_num * start * copy_size_;
   Stack(all_inputs_, reinterpret_cast<void *>(output), input_num, copy_size_, start, end);
   return RET_OK;
 }
@@ -106,7 +106,7 @@ int StackBaseCPUKernel::Run() {
     return RET_ERROR;
   }
   for (size_t j = 0; j < inputs_num; ++j) {
-    auto input_data = reinterpret_cast<char *>(in_tensors_.at(j)->data_c());
+    auto input_data = reinterpret_cast<void *>(in_tensors_.at(j)->data_c());
     if (input_data == nullptr) {
       return RET_NULL_PTR;
     }
diff --git a/mindspore/lite/src/runtime/kernel/arm/control/tensorlist_reserve.cc b/mindspore/lite/src/runtime/kernel/arm/control/tensorlist_reserve.cc
index b7a633b45bd..aba1516c09d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/control/tensorlist_reserve.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/control/tensorlist_reserve.cc
@@ -42,11 +42,7 @@ int TensorListReserveCPUKernel::Run() {
     std::vector<std::vector<int> > tmp_shape(num_elements, std::vector<int>());
     output->set_element_shape(std::vector<int>(ele_shape_ptr, ele_shape_ptr + input0->ElementsNum()));
     output->set_shape(std::vector<int>(1, num_elements));
-    auto ret = output->MallocTensorListData(kTypeUnknown, tmp_shape);
-    if (ret != RET_OK) {
-      MS_LOG(ERROR) << "Failed to MallocTensorListData";
-      return ret;
-    }
+    output->MallocTensorListData(kTypeUnknown, tmp_shape);
   }
   output->set_tensors_data_type(element_dtype_);
   return RET_OK;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc
index 712f936fd4b..9460cd26043 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc
@@ -35,6 +35,8 @@ using mindspore::schema::PrimitiveType_Activation;
 
 namespace mindspore::kernel {
 int ActivationFp16CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (type_ != schema::ActivationType_RELU && type_ != schema::ActivationType_RELU6 &&
       type_ != schema::ActivationType_LEAKY_RELU && type_ != schema::ActivationType_SIGMOID &&
       type_ != schema::ActivationType_TANH && type_ != schema::ActivationType_HSWISH &&
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.cc
index 72a4f7fa082..d75177920e3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.cc
@@ -66,6 +66,8 @@ ArithmeticCompareOptFuncFp16 GetOptimizedArithmeticCompareFun(int primitive_type
 }
 
 int ArithmeticCompareFP16CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 2);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -162,7 +164,7 @@ int ArithmeticCompareFP16CPUKernel::Run() {
 
   input0_fp16_ = ConvertInputFp32toFp16(in_tensors_.at(0), static_cast<const lite::InnerContext *>(this->ms_context_));
   input1_fp16_ = ConvertInputFp32toFp16(in_tensors_.at(1), static_cast<const lite::InnerContext *>(this->ms_context_));
-  output_fp16_ = reinterpret_cast<uint8_t *>(output_tensor->MutableData());
+  output_fp16_ = reinterpret_cast<uint8_t *>(output_tensor->data_c());
   if (input0_fp16_ == nullptr || input1_fp16_ == nullptr || output_fp16_ == nullptr) {
     MS_LOG(ERROR) << "Memory allocation failed";
     FreeTmpBuffer();
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
index a81bbff7638..7b417ff90a0 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc
@@ -21,6 +21,7 @@
 using mindspore::kernel::KERNEL_ARCH;
 using mindspore::lite::KernelRegistrar;
 using mindspore::lite::RET_ERROR;
+using mindspore::lite::RET_NULL_PTR;
 using mindspore::lite::RET_OK;
 
 using mindspore::schema::PrimitiveType_AddFusion;
@@ -183,8 +184,11 @@ int ArithmeticFP16CPUKernel::Run() {
     return RET_ERROR;
   }
   auto ret = ParallelLaunch(this->ms_context_, ArithmeticsRun, this, op_parameter_->thread_num_);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "ArithmeticsRun failed, ret : " << ret;
+  }
   if (out_tensors_.at(0)->data_type() == kNumberTypeFloat32) {
-    Float16ToFloat32(static_cast<float16_t *>(output_ptr_), reinterpret_cast<float *>(output_tensor->MutableData()),
+    Float16ToFloat32(static_cast<float16_t *>(output_ptr_), reinterpret_cast<float *>(output_tensor->data_c()),
                      output_tensor->ElementsNum());
   }
   FreeFp16Buffer();
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_self_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_self_fp16.cc
index bcba2c95056..1f75a664e0c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_self_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_self_fp16.cc
@@ -76,18 +76,28 @@ int ArithmeticSelfFp16CPUKernel::DoExecute(int task_id) {
 int ArithmeticSelfFp16CPUKernel::Run() {
   auto input_tensor = in_tensors_.at(0);
   auto output_tensor = out_tensors_.at(0);
-
+  MS_ASSERT(input_tensor != nullptr);
+  MS_ASSERT(output_tensor != nullptr);
   if (input_tensor->data_type() == kNumberTypeFloat32) {
-    input_fp16_ptr_ = ConvertInputFp32toFp16(input_tensor, static_cast<const lite::InnerContext *>(this->ms_context_));
+    input_fp16_ptr_ = ConvertInputFp32toFp16(input_tensor, static_cast<const lite::InnerContext *>(ms_context_));
+    if (input_fp16_ptr_ == nullptr) {
+      return RET_ERROR;
+    }
   } else {
     input_fp16_ptr_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
+    MS_ASSERT(input_fp16_ptr_ != nullptr);
   }
   output_fp16_ptr_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
+  MS_ASSERT(output_fp16_ptr_ != nullptr);
 
-  auto ret = ParallelLaunch(this->ms_context_, ArithmeticSelfRun, this, op_parameter_->thread_num_);
+  auto ret = ParallelLaunch(ms_context_, ArithmeticSelfRun, this, op_parameter_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]";
   }
+  if (input_tensor->data_type() == kNumberTypeFloat32) {
+    ms_context_->allocator->Free(input_fp16_ptr_);
+    input_fp16_ptr_ = nullptr;
+  }
   return ret;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc
index 35f526afe38..98d6fd5312c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc
@@ -38,9 +38,9 @@ int BatchnormFp16CPUKernel::InitConstTensor() {
       FreeMeanAndVariance();
       return RET_ERROR;
     }
-    Float32ToFloat16(reinterpret_cast<float *>(mean_fp32->MutableData()), reinterpret_cast<float16_t *>(mean_),
+    Float32ToFloat16(reinterpret_cast<float *>(mean_fp32->data_c()), reinterpret_cast<float16_t *>(mean_),
                      mean_fp32->ElementsNum());
-    Float32ToFloat16(reinterpret_cast<float *>(variance_fp32->MutableData()), reinterpret_cast<float16_t *>(variance_),
+    Float32ToFloat16(reinterpret_cast<float *>(variance_fp32->data_c()), reinterpret_cast<float16_t *>(variance_),
                      variance_fp32->ElementsNum());
   } else {
     auto ret = BatchnormCPUKernel::InitConstTensor();
@@ -68,7 +68,7 @@ int BatchnormFp16CPUKernel::Run() {
     MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]";
   }
   if (is_output_fp32_) {
-    Float16ToFloat32(output_, reinterpret_cast<float *>(output_tensor->MutableData()), output_tensor->ElementsNum());
+    Float16ToFloat32(output_, reinterpret_cast<float *>(output_tensor->data_c()), output_tensor->ElementsNum());
   }
   FreeInputAndOutput();
   return ret;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/biasadd_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/biasadd_fp16.cc
index a8da79ef223..58cb9aaa3f2 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/biasadd_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/biasadd_fp16.cc
@@ -58,8 +58,10 @@ int BiasAddCPUFp16Kernel::Run() {
       is_repack_ = false;
     }
   }
-  auto in = reinterpret_cast<float16_t *>(in_tensors_.at(0)->MutableData());
-  auto out = reinterpret_cast<float16_t *>(out_tensors_.at(0)->MutableData());
+  auto in = reinterpret_cast<float16_t *>(in_tensors_.at(0)->data_c());
+  auto out = reinterpret_cast<float16_t *>(out_tensors_.at(0)->data_c());
+  MS_ASSERT(in != nullptr);
+  MS_ASSERT(out != nullptr);
   size_t data_size = in_tensors_.at(0)->ElementsNum();
   MS_ASSERT(ms_context_->allocator != nullptr);
   auto tile_in = reinterpret_cast<float16_t *>(ms_context_->allocator->Malloc(data_size * sizeof(float16_t)));
@@ -93,7 +95,7 @@ int BiasAddCPUFp16Kernel::GetBiasData() {
         return RET_NULL_PTR;
       }
     }
-    auto bias = reinterpret_cast<float *>(bias_tensor_->MutableData());
+    auto bias = reinterpret_cast<float *>(bias_tensor_->data_c());
     if (bias == nullptr) {
       MS_LOG(ERROR) << "bias is nullptr!";
       return RET_NULL_PTR;
@@ -102,7 +104,7 @@ int BiasAddCPUFp16Kernel::GetBiasData() {
       bias_data_[i] = static_cast<float16_t>(bias[i]);
     }
   } else {
-    bias_data_ = reinterpret_cast<float16_t *>(bias_tensor_->MutableData());
+    bias_data_ = reinterpret_cast<float16_t *>(bias_tensor_->data_c());
     if (bias_data_ == nullptr) {
       MS_LOG(ERROR) << "bias_data_ is nullptr";
       return RET_NULL_PTR;
@@ -112,6 +114,8 @@ int BiasAddCPUFp16Kernel::GetBiasData() {
 }
 
 int BiasAddCPUFp16Kernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 2);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   bias_tensor_ = in_tensors_.at(1);
   MS_ASSERT(bias_tensor_ != nullptr);
   if (!InferShapeDone()) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc
index 0dc3170de08..a17f381f40e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc
@@ -37,6 +37,8 @@ int CastFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 }  // namespace
 
 int CastFp16CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -55,6 +57,9 @@ int CastFp16CPUKernel::ReSize() {
 
 int CastFp16CPUKernel::DoCast(int thread_id) {
   auto input = in_tensors_.at(0);
+  MS_ASSERT(input != nullptr);
+  auto input_data = input->data_c();
+  MS_ASSERT(input_data != nullptr);
   int data_num = MSMIN(stride_, data_num_ - thread_id * stride_);
   if (data_num <= 0) {
     return RET_OK;
@@ -63,26 +68,27 @@ int CastFp16CPUKernel::DoCast(int thread_id) {
   auto offset = thread_id * stride_;
   auto output = out_tensors_.at(0);
   auto output_data = output->data_c();
+  MS_ASSERT(output_data != nullptr);
   auto input_data_type = input->data_type();
   auto output_data_type = output->data_type();
 
   if (input_data_type == kNumberTypeFloat16) {
     switch (output_data_type) {
       case kNumberTypeInt64:
-        Float16ToInt64(reinterpret_cast<float16_t *>(input->data_c()) + offset,
+        Float16ToInt64(reinterpret_cast<float16_t *>(input_data) + offset,
                        reinterpret_cast<int64_t *>(output_data) + offset, data_num);
         break;
       case kNumberTypeInt32:
-        Float16ToInt32(reinterpret_cast<float16_t *>(input->data_c()) + offset,
+        Float16ToInt32(reinterpret_cast<float16_t *>(input_data) + offset,
                        reinterpret_cast<int32_t *>(output_data) + offset, data_num);
         break;
       case kNumberTypeFloat32:
-        Float16ToFloat32(reinterpret_cast<float16_t *>(input->MutableData()) + offset,
+        Float16ToFloat32(reinterpret_cast<float16_t *>(input_data) + offset,
                          reinterpret_cast<float *>(output_data) + offset, data_num);
         break;
       case kNumberTypeFloat16:
-        memcpy(reinterpret_cast<float16_t *>(output_data) + offset,
-               reinterpret_cast<float16_t *>(input->data_c()) + offset, data_num * sizeof(float16_t));
+        memcpy(reinterpret_cast<float16_t *>(output_data) + offset, reinterpret_cast<float16_t *>(input_data) + offset,
+               data_num * sizeof(float16_t));
         break;
       default:
         MS_LOG(ERROR) << "Unsupported output data type " << output_data_type;
@@ -91,19 +97,19 @@ int CastFp16CPUKernel::DoCast(int thread_id) {
   } else if (input_data_type == kNumberTypeFloat32) {
     switch (output_data_type) {
       case kNumberTypeInt64:
-        Float32ToInt64(reinterpret_cast<float *>(input->data_c()) + offset,
+        Float32ToInt64(reinterpret_cast<float *>(input_data) + offset,
                        reinterpret_cast<int64_t *>(output_data) + offset, data_num);
         break;
       case kNumberTypeInt32:
-        Float32ToInt32(reinterpret_cast<float *>(input->data_c()) + offset,
+        Float32ToInt32(reinterpret_cast<float *>(input_data) + offset,
                        reinterpret_cast<int32_t *>(output_data) + offset, data_num);
         break;
       case kNumberTypeFloat32:
-        memcpy(reinterpret_cast<float *>(output_data) + offset, reinterpret_cast<float *>(input->data_c()) + offset,
+        memcpy(reinterpret_cast<float *>(output_data) + offset, reinterpret_cast<float *>(input_data) + offset,
                data_num * sizeof(float));
         break;
       case kNumberTypeFloat16:
-        Float32ToFloat16(reinterpret_cast<float *>(input->MutableData()) + offset,
+        Float32ToFloat16(reinterpret_cast<float *>(input_data) + offset,
                          reinterpret_cast<float16_t *>(output_data) + offset, data_num);
         break;
       default:
@@ -113,7 +119,7 @@ int CastFp16CPUKernel::DoCast(int thread_id) {
   } else if (input_data_type == kNumberTypeInt32) {
     switch (output_data_type) {
       case kNumberTypeFloat32:
-        Int32ToFloat32(static_cast<int32_t *>(input->data_c()) + offset, static_cast<float *>(output_data) + offset,
+        Int32ToFloat32(static_cast<int32_t *>(input_data) + offset, static_cast<float *>(output_data) + offset,
                        data_num);
         break;
       default:
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc
index 00d9bb92c7d..355ad85f5d6 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc
@@ -24,6 +24,8 @@ using mindspore::schema::PrimitiveType_Concat;
 
 namespace mindspore::kernel {
 int ConcatFp16CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -98,9 +100,11 @@ int ConcatFp16CPUKernel::Run() {
     const auto in_tensor = in_tensors_.at(i);
     if (in_tensor->data_type() == kNumberTypeFloat || in_tensor->data_type() == kNumberTypeFloat32) {
       auto in_tensor_data = reinterpret_cast<float *>(in_tensor->data_c());
+      MS_ASSERT(in_tensor_data != nullptr);
       Float32ToFloat16(in_tensor_data, fp16_inputs_[i], in_tensor->ElementsNum());
     } else {
       fp16_inputs_[i] = reinterpret_cast<float16_t *>(in_tensor->data_c());
+      MS_ASSERT(fp16_inputs_[i] != nullptr);
     }
 
     shapes.push_back(in_tensors_[i]->shape());
@@ -111,6 +115,7 @@ int ConcatFp16CPUKernel::Run() {
   auto output_addr = out_tensors_.at(0)->MutableData();
   if (out_tensors_.at(0)->data_type() == kNumberTypeFloat16) {
     fp16_output_ = reinterpret_cast<float16_t *>(out_tensors_.at(0)->data_c());
+    MS_ASSERT(fp16_output_ != nullptr);
   }
   int dtype_len = in_tensors_.at(0)->data_type() == kNumberTypeInt32 ? sizeof(int32_t) : sizeof(float16_t);
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc
index f3257b424a7..84d1018efa3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc
@@ -38,10 +38,6 @@ int Convolution1x1FP16CPUKernel::InitMatmulParam() {
 
 Convolution1x1FP16CPUKernel::~Convolution1x1FP16CPUKernel() {
   FreeTmpBuffer();
-  if (weight_ptr_ != nullptr) {
-    free(weight_ptr_);
-    weight_ptr_ = nullptr;
-  }
   if (matmul_param_ != nullptr) {
     delete matmul_param_;
     matmul_param_ = nullptr;
@@ -82,14 +78,23 @@ int Convolution1x1FP16CPUKernel::InitConv1x1Param() {
   return RET_OK;
 }
 
-int Convolution1x1FP16CPUKernel::InitWeightBias() {
+int Convolution1x1FP16CPUKernel::MallocWeightBiasData() {
   auto weight_tensor = in_tensors_.at(kWeightIndex);
   auto input_channel = weight_tensor->Channel();
   auto output_channel = weight_tensor->Batch();
 
-  if (in_tensors_.size() == 3) {
-    size_t size = UP_ROUND(output_channel, col_tile_) * sizeof(float16_t);
-    size_t bias_size = output_channel * sizeof(float16_t);
+  size_t size = input_channel * UP_ROUND(output_channel, col_tile_) * sizeof(float16_t);
+  if (packed_weight_ == nullptr) {
+    packed_weight_ = malloc(size);
+    if (packed_weight_ == nullptr) {
+      MS_LOG(ERROR) << "Conv1x1 Malloc packed_weight_ error!";
+      return RET_ERROR;
+    }
+  }
+  memset(reinterpret_cast<char *>(packed_weight_), 0, size);
+
+  if (in_tensors_.size() == kInputSize2) {
+    size = UP_ROUND(output_channel, col_tile_) * sizeof(float16_t);
     if (bias_data_ == nullptr) {
       bias_data_ = malloc(size);
       if (bias_data_ == nullptr) {
@@ -97,32 +102,29 @@ int Convolution1x1FP16CPUKernel::InitWeightBias() {
         return RET_ERROR;
       }
     }
-    void *bias_origin_tmp = IsTrainable() ? in_tensors_.at(kBiasIndex)->data_c() : origin_bias_;
-    memcpy(bias_data_, bias_origin_tmp, output_channel * sizeof(float16_t));
-    memset(reinterpret_cast<char *>(bias_data_) + bias_size, 0, size - bias_size);
+    memset(reinterpret_cast<char *>(bias_data_), 0, size);
   }
-
-  size_t size = input_channel * UP_ROUND(output_channel, col_tile_) * sizeof(float16_t);
-  size_t down_size = input_channel * DOWN_DIV(output_channel, col_tile_) * col_tile_ * sizeof(float16_t);
-  if (weight_ptr_ == nullptr) {
-    weight_ptr_ = reinterpret_cast<float16_t *>(malloc(size));
-    if (weight_ptr_ == nullptr) {
-      MS_LOG(ERROR) << "Conv1x1 Malloc weight_ptr_ error!";
-      return RET_ERROR;
-    }
-  }
-  void *weight_origin_tmp = IsTrainable() ? weight_tensor->data_c() : origin_weight_;
-  memset(reinterpret_cast<char *>(weight_ptr_) + down_size, 0, size - down_size);
-#ifdef ENABLE_ARM64
-  RowMajor2Col16MajorFp16Opt(static_cast<const float16_t *>(weight_origin_tmp), weight_ptr_, output_channel,
-                             input_channel);
-#else
-  ColMajor2Row8MajorFp16(weight_origin_tmp, weight_ptr_, input_channel, output_channel, true);
-#endif
   return RET_OK;
 }
 
+void Convolution1x1FP16CPUKernel::PackWeight() {
+  auto weight_tensor = in_tensors_.at(kWeightIndex);
+  auto input_channel = weight_tensor->Channel();
+  auto output_channel = weight_tensor->Batch();
+  void *weight_origin = IsTrainable() ? weight_tensor->data_c() : origin_weight_;
+  MS_ASSERT(weight_origin != nullptr);
+#ifdef ENABLE_ARM64
+  RowMajor2Col16MajorFp16Opt(static_cast<const float16_t *>(weight_origin),
+                             reinterpret_cast<float16_t *>(packed_weight_), output_channel, input_channel);
+#else
+  ColMajor2Row8MajorFp16(weight_origin, reinterpret_cast<float16_t *>(packed_weight_), input_channel, output_channel,
+                         true);
+#endif
+}
+
 int Convolution1x1FP16CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 2);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
 #ifdef ENABLE_ARM64
   row_tile_ = C12NUM;
   col_tile_ = C16NUM;
@@ -135,7 +137,7 @@ int Convolution1x1FP16CPUKernel::Init() {
     MS_LOG(ERROR) << "Init matmul_param_ failed.";
     return RET_ERROR;
   }
-  int ret = InitWeightBias();
+  int ret = InitConvWeightBias();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Init weight bias failed.";
     return ret;
@@ -180,11 +182,13 @@ int Convolution1x1FP16CPUKernel::RunOc(int task_id) {
 
   auto bias = (bias_data_ == nullptr) ? nullptr : reinterpret_cast<float16_t *>(bias_data_) + thread_stride_ * task_id;
 #ifdef ENABLE_ARM64
-  MatMul12x16Fp16Opt(pack_input_, weight_ptr_ + task_id * thread_stride_ * matmul_param_->deep_,
+  MatMul12x16Fp16Opt(pack_input_,
+                     reinterpret_cast<float16_t *>(packed_weight_) + task_id * thread_stride_ * matmul_param_->deep_,
                      output_ptr_ + task_id * thread_stride_, bias, matmul_param_->act_type_, matmul_param_->deep_,
                      matmul_param_->row_, cur_oc, matmul_param_->col_, OutType_Nhwc);
 #else
-  MatMul12x8A32Fp16(pack_input_, weight_ptr_ + task_id * thread_stride_ * matmul_param_->deep_,
+  MatMul12x8A32Fp16(pack_input_,
+                    reinterpret_cast<float16_t *>(packed_weight_) + task_id * thread_stride_ * matmul_param_->deep_,
                     output_ptr_ + task_id * thread_stride_, bias, matmul_param_->act_type_, matmul_param_->deep_,
                     matmul_param_->row_, cur_oc, matmul_param_->col_, OutType_Nhwc);
 #endif
@@ -204,13 +208,13 @@ int Convolution1x1FP16CPUKernel::RunHw(int task_id) {
 
   float16_t *thread_output_ptr = output_ptr_ + task_id * thread_stride_ * matmul_param_->col_;
 #ifdef ENABLE_ARM64
-  MatMul12x16Fp16Opt(thread_pack_input, weight_ptr_, thread_output_ptr, reinterpret_cast<float16_t *>(bias_data_),
-                     matmul_param_->act_type_, matmul_param_->deep_, cur_hw_, matmul_param_->col_, matmul_param_->col_,
-                     OutType_Nhwc);
+  MatMul12x16Fp16Opt(thread_pack_input, reinterpret_cast<float16_t *>(packed_weight_), thread_output_ptr,
+                     reinterpret_cast<float16_t *>(bias_data_), matmul_param_->act_type_, matmul_param_->deep_, cur_hw_,
+                     matmul_param_->col_, matmul_param_->col_, OutType_Nhwc);
 #else
-  MatMul12x8A32Fp16(thread_pack_input, weight_ptr_, thread_output_ptr, reinterpret_cast<float16_t *>(bias_data_),
-                    matmul_param_->act_type_, matmul_param_->deep_, cur_hw_, matmul_param_->col_, matmul_param_->col_,
-                    OutType_Nhwc);
+  MatMul12x8A32Fp16(thread_pack_input, reinterpret_cast<float16_t *>(packed_weight_), thread_output_ptr,
+                    reinterpret_cast<float16_t *>(bias_data_), matmul_param_->act_type_, matmul_param_->deep_, cur_hw_,
+                    matmul_param_->col_, matmul_param_->col_, OutType_Nhwc);
 #endif
   return RET_OK;
 }
@@ -250,14 +254,9 @@ int Convolution1x1FP16CPUKernel::Run() {
     MS_LOG(ERROR) << "Conv1x1 Malloc pack_input_ error!";
     return RET_MEMORY_FAILED;
   }
-
-  if (IsTrainable() && (IsTrain() || IsRepack())) {
-    auto ret = InitWeightBias();
-    if (ret != 0) {
-      MS_LOG(ERROR) << "Convolution 1x1 fp16 repack weight failure";
-      return RET_ERROR;
-    }
-    is_repack_ = false;
+  if (RepackWeight() != RET_OK) {
+    MS_LOG(ERROR) << "Repack weight failed.";
+    return RET_ERROR;
   }
 
   for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h
index 822572aba2a..f2420e2fdfa 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h
@@ -31,9 +31,7 @@ class Convolution1x1FP16CPUKernel : public ConvolutionBaseCPUKernel {
   Convolution1x1FP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                               const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, void *origin_weight,
                               void *origin_bias)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx),
-        origin_weight_(origin_weight),
-        origin_bias_(origin_bias) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias) {}
   ~Convolution1x1FP16CPUKernel() override;
 
   int Init() override;
@@ -49,16 +47,14 @@ class Convolution1x1FP16CPUKernel : public ConvolutionBaseCPUKernel {
   void FreeTmpBuffer();
   int InitConv1x1Param();
   int InitMatmulParam();
-  int InitWeightBias();
+  int MallocWeightBiasData() override;
+  void PackWeight() override;
 
  private:
   bool pre_trans_input_ = false;
   bool multi_thread_by_hw_ = false;
   int thread_count_ = 1;
   int thread_stride_ = 0;
-  void *origin_weight_;  // do not free
-  void *origin_bias_;    // do not free
-  float16_t *weight_ptr_ = nullptr;
   float16_t *input_ptr_ = nullptr;
   float16_t *pack_input_ = nullptr;
   float16_t *output_ptr_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.cc
index e6e1dfed963..71c79f61139 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.cc
@@ -65,8 +65,11 @@ void *ConvolutionDelegateFP16CPUKernel::CopyData(lite::Tensor *tensor) {
 }
 
 int ConvolutionDelegateFP16CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 2);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
-    origin_weight_ = CopyData(in_tensors_.at(kWeightIndex));
+    auto weight_tensor = in_tensors_.at(kWeightIndex);
+    origin_weight_ = weight_tensor->data_c() != nullptr ? CopyData(weight_tensor) : nullptr;
     need_free_ = need_free_ | WEIGHT_NEED_FREE;
     if (in_tensors_.size() == 3) {
       origin_bias_ = CopyData(in_tensors_.at(kBiasIndex));
@@ -75,7 +78,6 @@ int ConvolutionDelegateFP16CPUKernel::Init() {
     return RET_OK;
   }
   origin_weight_ = in_tensors_.at(kWeightIndex)->data_c();
-  MS_ASSERT(origin_weight_ != nullptr);
   if (in_tensors_.size() == 3) {
     origin_bias_ = in_tensors_.at(kBiasIndex)->data_c();
     MS_ASSERT(origin_bias_ != nullptr);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.h
index 12018df715f..ed20b68a3d2 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.h
@@ -60,7 +60,7 @@ class ConvolutionDelegateFP16CPUKernel : public InnerKernel {
     return fp16_conv_kernel_->SetTrainable(trainable);
   }
 
-  void set_in_tensor(lite::Tensor *in_tensor, int index) override {
+  void set_in_tensor(lite::Tensor *in_tensor, size_t index) override {
     MS_ASSERT(index < in_tensors_.size());
     this->in_tensors_[index] = in_tensor;
     if (fp16_conv_kernel_ != nullptr) {
@@ -68,7 +68,7 @@ class ConvolutionDelegateFP16CPUKernel : public InnerKernel {
     }
   }
 
-  void set_out_tensor(lite::Tensor *out_tensor, int index) override {
+  void set_out_tensor(lite::Tensor *out_tensor, size_t index) override {
     MS_ASSERT(index < out_tensors_.size());
     this->out_tensors_[index] = out_tensor;
     if (fp16_conv_kernel_ != nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc
index 43f86b0f1d6..b5e54dbcb40 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc
@@ -26,51 +26,42 @@ using mindspore::lite::RET_MEMORY_FAILED;
 using mindspore::lite::RET_OK;
 
 namespace mindspore::kernel {
-ConvolutionDepthwise3x3Fp16CPUKernel::~ConvolutionDepthwise3x3Fp16CPUKernel() {
-  if (packed_weight_ != nullptr) {
-    free(packed_weight_);
-    packed_weight_ = nullptr;
-  }
+void ConvolutionDepthwise3x3Fp16CPUKernel::PackWeight() {
+  auto weight_tensor = in_tensors_.at(kWeightIndex);
+  int channel = weight_tensor->Batch();
+  void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_;
+  MS_ASSERT(origin_weight != nullptr);
+  PackWeightConvDw3x3Fp16(reinterpret_cast<float16_t *>(origin_weight), reinterpret_cast<float16_t *>(packed_weight_),
+                          channel);
 }
 
-int ConvolutionDepthwise3x3Fp16CPUKernel::InitWeightBias() {
-  // init weight: k, h, w, c; k == group == output_channel, c == 1
-  auto weight_tensor = in_tensors_[kWeightIndex];
-  auto origin_weight = reinterpret_cast<float16_t *>(weight_tensor->MutableData());
+int ConvolutionDepthwise3x3Fp16CPUKernel::MallocWeightBiasData() {
+  auto weight_tensor = in_tensors_.at(kWeightIndex);
   int channel = weight_tensor->Batch();
   int c8 = UP_ROUND(channel, C8NUM);
   int pack_weight_size = c8 * C12NUM;
-
   if (packed_weight_ == nullptr) {
-    packed_weight_ = reinterpret_cast<float16_t *>(malloc(pack_weight_size * sizeof(float16_t)));
+    packed_weight_ = malloc(pack_weight_size * sizeof(float16_t));
     if (packed_weight_ == nullptr) {
       MS_LOG(ERROR) << "Malloc buffer failed.";
       return RET_ERROR;
     }
   }
-  PackWeightConvDw3x3Fp16(origin_weight, packed_weight_, channel);
-
   if (bias_data_ == nullptr) {
-    bias_data_ = reinterpret_cast<float16_t *>(malloc(c8 * sizeof(float16_t)));
+    bias_data_ = malloc(c8 * sizeof(float16_t));
     if (bias_data_ == nullptr) {
       MS_LOG(ERROR) << "Malloc buffer failed.";
       return RET_ERROR;
     }
   }
   memset(bias_data_, 0, c8 * sizeof(float16_t));
-  if (in_tensors_.size() == kInputSize2) {
-    auto bias_tensor = in_tensors_[kBiasIndex];
-    auto ori_bias = reinterpret_cast<float16_t *>(bias_tensor->MutableData());
-    memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(float16_t));
-  }
-
   return RET_OK;
 }
 
 int ConvolutionDepthwise3x3Fp16CPUKernel::Init() {
-  auto ret = InitWeightBias();
+  auto ret = InitConvWeightBias();
   if (ret != 0) {
-    MS_LOG(ERROR) << "Convolution depthwise 3x3 fp16 InitWeightBias failed.";
+    MS_LOG(ERROR) << "Convolution depthwise 3x3 fp16 InitConvWeightBias failed.";
     return RET_ERROR;
   }
   if (!InferShapeDone()) {
@@ -92,8 +83,8 @@ int ConvolutionDepthwise3x3Fp16CPUKernel::Execute(int task_id) {
   int step_oh = UP_DIV(conv_param_->output_h_, conv_param_->thread_num_);
   int start_oh = step_oh * task_id;
   int end_oh = MSMIN(start_oh + step_oh, conv_param_->output_h_);
-  ConvDw3x3Fp16(output_ptr_, buffer, input_ptr_, packed_weight_, reinterpret_cast<float16_t *>(bias_data_), conv_param_,
-                start_oh, end_oh);
+  ConvDw3x3Fp16(output_ptr_, buffer, input_ptr_, reinterpret_cast<float16_t *>(packed_weight_),
+                reinterpret_cast<float16_t *>(bias_data_), conv_param_, start_oh, end_oh);
   return RET_OK;
 }
 
@@ -108,14 +99,11 @@ int ConvDw3x3Fp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale)
 }
 
 int ConvolutionDepthwise3x3Fp16CPUKernel::Run() {
-  if (IsTrainable() && (IsTrain() || IsRepack())) {
-    auto ret = InitWeightBias();
-    if (ret != 0) {
-      MS_LOG(ERROR) << "Convolution depthwise fp16 repack weight failure";
-      return RET_ERROR;
-    }
-    is_repack_ = false;
+  if (RepackWeight() != RET_OK) {
+    MS_LOG(ERROR) << "Repack weight failed.";
+    return RET_ERROR;
   }
+
   int units = UP_DIV(conv_param_->output_w_, C2NUM);  // F(2, 3) contains 2 conv units
   int c8 = UP_ROUND(conv_param_->input_channel_, C8NUM);
   int buffer_size = units * c8 * C12NUM * conv_param_->thread_num_;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.h
index c6663837369..cc66bb528d0 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.h
@@ -28,19 +28,20 @@ class ConvolutionDepthwise3x3Fp16CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwise3x3Fp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                        const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
-  ~ConvolutionDepthwise3x3Fp16CPUKernel() override;
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
+                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
+  ~ConvolutionDepthwise3x3Fp16CPUKernel() override {}
 
   int Init() override;
   int ReSize() override;
   int Run() override;
 
-  int InitWeightBias();
   int Execute(int task_id);
   int Eval() override;
 
  private:
-  float16_t *packed_weight_ = nullptr;
+  void PackWeight() override;
+  int MallocWeightBiasData() override;
   float16_t *input_ptr_ = nullptr;
   float16_t *output_ptr_ = nullptr;
   float16_t *buffer_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
index af240421dee..fae625bc7b9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc
@@ -23,50 +23,42 @@ using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
 
 namespace mindspore::kernel {
-ConvolutionDepthwiseFp16CPUKernel::~ConvolutionDepthwiseFp16CPUKernel() {
-  if (packed_weight_ != nullptr) {
-    free(packed_weight_);
-    packed_weight_ = nullptr;
-  }
+void ConvolutionDepthwiseFp16CPUKernel::PackWeight() {
+  auto weight_tensor = in_tensors_.at(kWeightIndex);
+  void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_;
+  MS_ASSERT(origin_weight != nullptr);
+  PackNCHWToNHWCFp16(reinterpret_cast<float16_t *>(origin_weight), reinterpret_cast<float16_t *>(packed_weight_), 1,
+                     weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch(), 0, 0);
 }
 
-int ConvolutionDepthwiseFp16CPUKernel::InitWeightBias() {
-  // init weight: o, h, w, i; o == group, i == 1
+int ConvolutionDepthwiseFp16CPUKernel::MallocWeightBiasData() {
   auto weight_tensor = in_tensors_.at(kWeightIndex);
   int channel = weight_tensor->Batch();
   int pack_weight_size = channel * weight_tensor->Height() * weight_tensor->Width();
-  auto origin_weight = reinterpret_cast<float16_t *>(weight_tensor->data_c());
-  MS_ASSERT(origin_weight != nullptr);
   if (packed_weight_ == nullptr) {
-    packed_weight_ = reinterpret_cast<float16_t *>(malloc(pack_weight_size * sizeof(float16_t)));
+    packed_weight_ = malloc(pack_weight_size * sizeof(float16_t));
     if (packed_weight_ == nullptr) {
       MS_LOG(ERROR) << "Malloc buffer failed.";
       return RET_ERROR;
     }
   }
-  PackNCHWToNHWCFp16(origin_weight, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(),
-                     weight_tensor->Batch(), 0, 0);
-
   if (bias_data_ == nullptr) {
-    bias_data_ = reinterpret_cast<float16_t *>(malloc(channel * sizeof(float16_t)));
+    bias_data_ = malloc(channel * sizeof(float16_t));
     if (bias_data_ == nullptr) {
       MS_LOG(ERROR) << "Malloc buffer failed.";
       return RET_ERROR;
     }
   }
   memset(bias_data_, 0, channel * sizeof(float16_t));
-  if (in_tensors_.size() == kInputSize2) {
-    auto bias_tensor = in_tensors_.at(kBiasIndex);
-    auto ori_bias = reinterpret_cast<float16_t *>(bias_tensor->data_c());
-    memcpy(bias_data_, ori_bias, bias_tensor->Size());
-  }
   return RET_OK;
 }
 
 int ConvolutionDepthwiseFp16CPUKernel::Init() {
-  auto ret = InitWeightBias();
+  CHECK_LESS_RETURN(in_tensors_.size(), 2);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
+  auto ret = InitConvWeightBias();
   if (ret != 0) {
-    MS_LOG(ERROR) << "Convolution depthwise fp16 InitWeightBias failed.";
+    MS_LOG(ERROR) << "Convolution depthwise fp16 InitConvWeightBias failed.";
     return RET_ERROR;
   }
 
@@ -94,7 +86,8 @@ int ConvolutionDepthwiseFp16CPUKernel::Execute(int task_id) {
     MS_LOG(ERROR) << "Convolution depthwise Fp16 get null tensor data!";
     return RET_ERROR;
   }
-  ConvDwFp16(output_ptr, input_ptr, packed_weight_, reinterpret_cast<float16_t *>(bias_data_), conv_param_, task_id);
+  ConvDwFp16(output_ptr, input_ptr, reinterpret_cast<float16_t *>(packed_weight_),
+             reinterpret_cast<float16_t *>(bias_data_), conv_param_, task_id);
   return RET_OK;
 }
 
@@ -109,13 +102,9 @@ static int ConvDwFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_sc
 }
 
 int ConvolutionDepthwiseFp16CPUKernel::Run() {
-  if (IsTrainable() && (IsTrain() || IsRepack())) {
-    auto ret = InitWeightBias();
-    if (ret != 0) {
-      MS_LOG(ERROR) << "Convolution depthwise fp16 repack weight failure";
-      return RET_ERROR;
-    }
-    is_repack_ = false;
+  if (RepackWeight() != RET_OK) {
+    MS_LOG(ERROR) << "Repack weight failed.";
+    return RET_ERROR;
   }
   auto ret = ParallelLaunch(this->ms_context_, ConvDwFp16Run, this, conv_param_->thread_num_);
   if (ret != RET_OK) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h
index 4255ff18094..1b37edc0cd3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h
@@ -36,19 +36,20 @@ class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                     const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
-  ~ConvolutionDepthwiseFp16CPUKernel() override;
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
+                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
+  ~ConvolutionDepthwiseFp16CPUKernel() override {}
 
   int Init() override;
   int ReSize() override;
   int Run() override;
   int Eval() override;
 
-  int InitWeightBias();
   int Execute(int task_id);
 
  private:
-  float16_t *packed_weight_ = nullptr;
+  void PackWeight() override;
+  int MallocWeightBiasData() override;
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
index dcdcc930b6b..294f8a8a404 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc
@@ -28,10 +28,6 @@ ConvolutionDepthwiseSWFp16CPUKernel::~ConvolutionDepthwiseSWFp16CPUKernel() {
     delete sliding_;
     sliding_ = nullptr;
   }
-  if (packed_weight_ != nullptr) {
-    free(packed_weight_);
-    packed_weight_ = nullptr;
-  }
 }
 
 int ConvolutionDepthwiseSWFp16CPUKernel::InitPackedInputOutput() {
@@ -51,58 +47,56 @@ int ConvolutionDepthwiseSWFp16CPUKernel::InitPackedInputOutput() {
     if (packed_output_ == nullptr) {
       MS_LOG(ERROR) << "Malloc buffer failed.";
       ms_context_->allocator->Free(packed_input_);
+      packed_input_ = nullptr;
       return RET_ERROR;
     }
   }
   return RET_OK;
 }
 
-int ConvolutionDepthwiseSWFp16CPUKernel::InitWeightBias() {
-  // init weight: o, h, w, i; o == group, i == 1
+void ConvolutionDepthwiseSWFp16CPUKernel::PackWeight() {
+  auto weight_tensor = in_tensors_.at(kWeightIndex);
+  void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_;
+  MS_ASSERT(origin_weight != nullptr);
+  PackNCHWFp16ToNC8HW8Fp16(reinterpret_cast<float16_t *>(origin_weight), reinterpret_cast<float16_t *>(packed_weight_),
+                           1, weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch());
+}
+
+int ConvolutionDepthwiseSWFp16CPUKernel::MallocWeightBiasData() {
   auto weight_tensor = in_tensors_.at(kWeightIndex);
   int OC8 = UP_DIV(weight_tensor->Batch(), C8NUM);
   int pack_weight_size = C8NUM * OC8 * weight_tensor->Height() * weight_tensor->Width();
-  auto origin_weight = reinterpret_cast<float16_t *>(weight_tensor->data_c());
-  MS_ASSERT(origin_weight != nullptr);
-
   if (packed_weight_ == nullptr) {
-    packed_weight_ = reinterpret_cast<float16_t *>(malloc(pack_weight_size * sizeof(float16_t)));
+    packed_weight_ = malloc(pack_weight_size * sizeof(float16_t));
     if (packed_weight_ == nullptr) {
       MS_LOG(ERROR) << "Malloc buffer failed.";
       return RET_ERROR;
     }
   }
-  PackNCHWFp16ToNC8HW8Fp16(origin_weight, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(),
-                           weight_tensor->Batch());
-
   if (bias_data_ == nullptr) {
-    bias_data_ = reinterpret_cast<float16_t *>(malloc(C8NUM * OC8 * sizeof(float16_t)));
+    bias_data_ = malloc(C8NUM * OC8 * sizeof(float16_t));
     if (bias_data_ == nullptr) {
       MS_LOG(ERROR) << "Malloc buffer failed.";
       return RET_ERROR;
     }
   }
   memset(bias_data_, 0, C8NUM * OC8 * sizeof(float16_t));
-  if (in_tensors_.size() == kInputSize2) {
-    auto bias_tensor = in_tensors_.at(kBiasIndex);
-    auto ori_bias = reinterpret_cast<float16_t *>(bias_tensor->data_c());
-    memcpy(bias_data_, ori_bias, bias_tensor->Size());
-  }
-
   conv_param_->thread_num_ = MSMIN(thread_count_, OC8);
   return RET_OK;
-}  // namespace mindspore::kernel
+}
 
 int ConvolutionDepthwiseSWFp16CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 2);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   sliding_ = new (std::nothrow) SlidingWindowParam;
   if (sliding_ == nullptr) {
     MS_LOG(ERROR) << "new sliding window param failed.";
     return RET_ERROR;
   }
 
-  auto ret = InitWeightBias();
+  auto ret = InitConvWeightBias();
   if (ret != 0) {
-    MS_LOG(ERROR) << "Convolution depthwise fp16 InitWeightBias failed.";
+    MS_LOG(ERROR) << "Convolution depthwise fp16 InitConvWeightBias failed.";
     return RET_ERROR;
   }
 
@@ -122,8 +116,8 @@ int ConvolutionDepthwiseSWFp16CPUKernel::ReSize() {
 }
 
 int ConvolutionDepthwiseSWFp16CPUKernel::Execute(int task_id) {
-  ConvDwC8Fp16(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float16_t *>(bias_data_), conv_param_,
-               sliding_, task_id);
+  ConvDwC8Fp16(packed_output_, packed_input_, reinterpret_cast<float16_t *>(packed_weight_),
+               reinterpret_cast<float16_t *>(bias_data_), conv_param_, sliding_, task_id);
   return RET_OK;
 }
 
@@ -151,6 +145,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
   MS_ASSERT(output_ptr != nullptr);
   if (input_ptr == nullptr || output_ptr == nullptr) {
     MS_LOG(ERROR) << "Convolution depthwise Fp16 get null tensor data!";
+    FreePackedInputOutput();
     return RET_ERROR;
   }
 
@@ -161,14 +156,9 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() {
     packed_input_ = input_ptr;
     packed_output_ = output_ptr;
   }
-
-  if (IsTrainable() && (IsTrain() || IsRepack())) {
-    ret = InitWeightBias();
-    if (ret != 0) {
-      MS_LOG(ERROR) << "Convolution depthwise fp16 repack weight failure";
-      return RET_ERROR;
-    }
-    is_repack_ = false;
+  if (RepackWeight() != RET_OK) {
+    MS_LOG(ERROR) << "Repack weight failed.";
+    return RET_ERROR;
   }
   ret = ParallelLaunch(this->ms_context_, ConvDwSWFp16Run, this, conv_param_->thread_num_);
   if (ret != RET_OK) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h
index 94a8071bd99..5219c2c8570 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h
@@ -37,7 +37,8 @@ class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwiseSWFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                       const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
+                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
   ~ConvolutionDepthwiseSWFp16CPUKernel() override;
 
   int Init() override;
@@ -46,13 +47,13 @@ class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseCPUKernel {
   int Eval() override;
 
   int InitPackedInputOutput();
-  int InitWeightBias();
   int Execute(int task_id);
 
  private:
+  void PackWeight() override;
+  int MallocWeightBiasData() override;
   void FreePackedInputOutput();
   SlidingWindowParam *sliding_ = nullptr;
-  float16_t *packed_weight_ = nullptr;
   float16_t *packed_input_ = nullptr;
   float16_t *packed_output_ = nullptr;
   bool need_align_ = false;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
index e21fca572a1..25ebcebf147 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc
@@ -27,7 +27,18 @@ using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
 
 namespace mindspore::kernel {
-int ConvolutionFP16CPUKernel::InitWeightBias() {
+void ConvolutionFP16CPUKernel::PackWeight() {
+  auto filter_tensor = in_tensors_.at(kWeightIndex);
+  int in_channel = filter_tensor->Channel();
+  int out_channel = filter_tensor->Batch();
+  int kernel_plane = filter_tensor->Height() * filter_tensor->Width();
+  void *weight_origin = IsTrainable() ? filter_tensor->data_c() : origin_weight_;
+  MS_ASSERT(weight_origin != nullptr);
+  RowMajor2Col8MajorFp16(weight_origin, reinterpret_cast<float16_t *>(packed_weight_), out_channel,
+                         in_channel * kernel_plane, false);
+}
+
+int ConvolutionFP16CPUKernel::MallocWeightBiasData() {
   auto filter_tensor = in_tensors_.at(kWeightIndex);
   int in_channel = filter_tensor->Channel();
   int out_channel = filter_tensor->Batch();
@@ -39,15 +50,13 @@ int ConvolutionFP16CPUKernel::InitWeightBias() {
 
   // init weight
   if (packed_weight_ == nullptr) {
-    packed_weight_ = reinterpret_cast<float16_t *>(malloc(pack_weight_size * sizeof(float16_t)));
+    packed_weight_ = malloc(pack_weight_size * sizeof(float16_t));
     if (packed_weight_ == nullptr) {
       MS_LOG(ERROR) << "malloc packed_weight_ failed.";
       return RET_ERROR;
     }
   }
   memset(packed_weight_, 0, pack_weight_size * sizeof(float16_t));
-  void *weight_origin_tmp = IsTrainable() ? filter_tensor->data_c() : origin_weight_;
-  RowMajor2Col8MajorFp16(weight_origin_tmp, packed_weight_, out_channel, in_channel * kernel_plane, false);
 
   // init bias
   if (bias_data_ == nullptr) {
@@ -58,11 +67,6 @@ int ConvolutionFP16CPUKernel::InitWeightBias() {
     }
   }
   memset(bias_data_, 0, oc8 * sizeof(float16_t));
-  if (in_tensors_.size() == kInputSize2) {
-    auto bias_tensor = in_tensors_.at(kBiasIndex);
-    void *bias_origin_tmp = IsTrainable() ? bias_tensor->data_c() : origin_bias_;
-    memcpy(bias_data_, bias_origin_tmp, out_channel * sizeof(float16_t));
-  }
   return RET_OK;
 }
 
@@ -85,13 +89,15 @@ int ConvolutionFP16CPUKernel::InitTmpBuffer() {
 }
 
 int ConvolutionFP16CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 2);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
 #ifdef ENABLE_ARM64
   row_tile_ = C16NUM;
 #else
   row_tile_ = C12NUM;
 #endif
   col_tile_ = C8NUM;
-  auto ret = InitWeightBias();
+  auto ret = InitConvWeightBias();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Init weight bias failed.";
     return RET_ERROR;
@@ -129,8 +135,8 @@ int ConvolutionFP16CPUKernel::RunImpl(int task_id) {
     MS_LOG(ERROR) << "Convolution Fp16 get null tensor data!";
     return RET_ERROR;
   }
-  ConvFp16(input_ptr, packed_input_, packed_weight_, reinterpret_cast<float16_t *>(bias_data_), col_major_input_,
-           output_ptr, task_id, conv_param_);
+  ConvFp16(input_ptr, packed_input_, reinterpret_cast<float16_t *>(packed_weight_),
+           reinterpret_cast<float16_t *>(bias_data_), col_major_input_, output_ptr, task_id, conv_param_);
   return RET_OK;
 }
 
@@ -151,14 +157,9 @@ int ConvolutionFP16CPUKernel::Run() {
     FreeTmpBuffer();
     return RET_ERROR;
   }
-
-  if (IsTrainable() && (IsTrain() || IsRepack())) {
-    ret = InitWeightBias();
-    if (ret != 0) {
-      MS_LOG(ERROR) << "Convolution 1x1 fp16 repack weight failure";
-      return RET_ERROR;
-    }
-    is_repack_ = false;
+  if (RepackWeight() != RET_OK) {
+    MS_LOG(ERROR) << "Repack weight failed.";
+    return RET_ERROR;
   }
   ret = ParallelLaunch(this->ms_context_, ConvolutionFp16Impl, this, thread_count_);
   if (ret != RET_OK) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h
index 011976a2314..ef08a5dfa2a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h
@@ -28,26 +28,20 @@ class ConvolutionFP16CPUKernel : public ConvolutionBaseCPUKernel {
   ConvolutionFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                            const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, void *origin_weight,
                            void *origin_bias)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx),
-        origin_weight_(origin_weight),
-        origin_bias_(origin_bias) {}
-  ~ConvolutionFP16CPUKernel() override {
-    if (packed_weight_ != nullptr) {
-      free(packed_weight_);
-      packed_weight_ = nullptr;
-    }
-  }
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias) {}
+  ~ConvolutionFP16CPUKernel() override {}
 
   int Init() override;
   int ReSize() override;
   int Run() override;
   int Eval() override;
   int RunImpl(int task_id);
-  int InitWeightBias();
   int InitTmpBuffer();
   void AdjustNumberOfThread();
 
  private:
+  void PackWeight() override;
+  int MallocWeightBiasData() override;
   void FreeTmpBuffer() {
     if (packed_input_ != nullptr) {
       ctx_->allocator->Free(packed_input_);
@@ -58,10 +52,7 @@ class ConvolutionFP16CPUKernel : public ConvolutionBaseCPUKernel {
       col_major_input_ = nullptr;
     }
   }
-  void *origin_weight_;  // do not free
-  void *origin_bias_;    // do not free
   float16_t *packed_input_ = nullptr;
-  float16_t *packed_weight_ = nullptr;
   float16_t *col_major_input_ = nullptr;
   int col_tile_;
   int row_tile_;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc
index d213679f02c..33ad5e4da68 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc
@@ -27,11 +27,12 @@ int ConvolutionWinogradFP16CPUKernel::WinogradFilterTransformFp16(const float16_
     return RET_ERROR;
   }
 
-  return WinogradWeightTransformFp16(weight_data, trans_weight_, matrix_g, matrix_gt, oc_block, input_unit_,
-                                     kernel_unit_, conv_param_->input_channel_, conv_param_->output_channel_, true);
+  return WinogradWeightTransformFp16(weight_data, reinterpret_cast<float16_t *>(packed_weight_), matrix_g, matrix_gt,
+                                     oc_block, input_unit_, kernel_unit_, conv_param_->input_channel_,
+                                     conv_param_->output_channel_, true);
 }
 
-int ConvolutionWinogradFP16CPUKernel::InitWeightBias() {
+int ConvolutionWinogradFP16CPUKernel::MallocWeightBiasData() {
   auto weight_tensor = in_tensors_.at(kWeightIndex);
   int in_channel = weight_tensor->Channel();
   int out_channel = weight_tensor->Batch();
@@ -39,19 +40,16 @@ int ConvolutionWinogradFP16CPUKernel::InitWeightBias() {
   conv_param_->output_channel_ = out_channel;
   int oc_block_num = UP_DIV(out_channel, col_tile_);
   // init weight
-  // set data
   auto trans_matrix_data_size = input_unit_ * input_unit_ * in_channel * oc_block_num * col_tile_ * sizeof(float16_t);
-  if (trans_weight_ == nullptr) {
-    trans_weight_ = reinterpret_cast<float16_t *>(malloc(trans_matrix_data_size));
-    if (trans_weight_ == nullptr) {
-      MS_LOG(ERROR) << "malloc trans_weight_ failed.";
+  if (packed_weight_ == nullptr) {
+    packed_weight_ = malloc(trans_matrix_data_size);
+    if (packed_weight_ == nullptr) {
+      MS_LOG(ERROR) << "malloc packed_weight_ failed.";
       return RET_ERROR;
     }
   }
-  memset(trans_weight_, 0, trans_matrix_data_size);
+  memset(packed_weight_, 0, trans_matrix_data_size);
 
-  float matrix_g[64];
-  float matrix_gt[64];
   float matrix_a[64];
   float matrix_at[64];
   float matrix_b[64];
@@ -61,19 +59,12 @@ int ConvolutionWinogradFP16CPUKernel::InitWeightBias() {
     coef = 0.5f;
   }
   auto ret =
-    CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, coef, output_unit_, kernel_unit_);
+    CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g_, matrix_gt_, coef, output_unit_, kernel_unit_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "get matrix g from CookToomFilter failed.";
     return ret;
   }
-  void *weight_origin_tmp = IsTrainable() ? weight_tensor->data_c() : origin_weight_;
-  ret = WinogradFilterTransformFp16(reinterpret_cast<float16_t *>(weight_origin_tmp), matrix_g, matrix_gt, col_tile_);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "winograd filter transform failed.";
-    return ret;
-  }
 
-  // init bias
   if (bias_data_ == nullptr) {
     bias_data_ = malloc(oc_block_num * col_tile_ * sizeof(float16_t));
     if (bias_data_ == nullptr) {
@@ -82,14 +73,16 @@ int ConvolutionWinogradFP16CPUKernel::InitWeightBias() {
     }
   }
   memset(bias_data_, 0, oc_block_num * col_tile_ * sizeof(float16_t));
-  if (in_tensors_.size() == kInputSize2) {
-    auto bias_tensor = in_tensors_.at(kBiasIndex);
-    void *bias_origin_tmp = IsTrainable() ? bias_tensor->data_c() : origin_bias_;
-    memcpy(bias_data_, bias_origin_tmp, out_channel * sizeof(float16_t));
-  }
   return RET_OK;
 }
 
+void ConvolutionWinogradFP16CPUKernel::PackWeight() {
+  auto weight_tensor = in_tensors_.at(kWeightIndex);
+  void *weight_origin = IsTrainable() ? weight_tensor->data_c() : origin_weight_;
+  MS_ASSERT(weight_origin != nullptr);
+  WinogradFilterTransformFp16(reinterpret_cast<float16_t *>(weight_origin), matrix_g_, matrix_gt_, col_tile_);
+}
+
 int ConvolutionWinogradFP16CPUKernel::InitTmpBuffer() {
   int channel_out = conv_param_->output_channel_;
   size_t tile_buffer_size =
@@ -143,6 +136,8 @@ int ConvolutionWinogradFP16CPUKernel::ConfigInputOutput() {
 }
 
 int ConvolutionWinogradFP16CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 2);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   col_tile_ = C8NUM;
 #ifdef ENABLE_ARM64
   row_tile_ = C16NUM;
@@ -154,7 +149,7 @@ int ConvolutionWinogradFP16CPUKernel::Init() {
   conv_param_->input_unit_ = input_unit_;
   conv_param_->output_unit_ = output_unit_;
 
-  auto ret = InitWeightBias();
+  auto ret = InitConvWeightBias();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Init weight bias failed.";
     return RET_ERROR;
@@ -207,8 +202,9 @@ int ConvolutionWinogradFP16CPUKernel::RunImpl(int task_id) {
     MS_LOG(ERROR) << "Convolution Winograd Fp16 get null tensor data!";
     return RET_ERROR;
   }
-  ConvWinogardFp16(input_ptr, trans_weight_, reinterpret_cast<const float16_t *>(bias_data_), output_ptr,
-                   tmp_buffer_address_list_, task_id, conv_param_, in_func_, out_func_);
+  ConvWinogardFp16(input_ptr, reinterpret_cast<float16_t *>(packed_weight_),
+                   reinterpret_cast<const float16_t *>(bias_data_), output_ptr, tmp_buffer_address_list_, task_id,
+                   conv_param_, in_func_, out_func_);
   return RET_OK;
 }
 
@@ -229,13 +225,9 @@ int ConvolutionWinogradFP16CPUKernel::Run() {
     FreeTmpBuffer();
     return RET_ERROR;
   }
-  if (IsTrainable() && (IsTrain() || IsRepack())) {
-    ret = InitWeightBias();
-    if (ret != 0) {
-      MS_LOG(ERROR) << "ConvolutionWinogradFP16 repack weight failure";
-      return RET_ERROR;
-    }
-    is_repack_ = false;
+  if (RepackWeight() != RET_OK) {
+    MS_LOG(ERROR) << "Repack weight failed.";
+    return RET_ERROR;
   }
   ret = ParallelLaunch(this->ms_context_, ConvolutionWinogradFp16Impl, this, thread_count_);
   if (ret != RET_OK) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h
index c41cea67f0d..e94191966b0 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h
@@ -32,29 +32,22 @@ class ConvolutionWinogradFP16CPUKernel : public ConvolutionBaseCPUKernel {
   ConvolutionWinogradFP16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                    const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx, int out_unit,
                                    void *origin_weight, void *origin_bias)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx),
-        output_unit_(out_unit),
-        origin_weight_(origin_weight),
-        origin_bias_(origin_bias) {}
-  ~ConvolutionWinogradFP16CPUKernel() override {
-    if (trans_weight_ != nullptr) {
-      free(trans_weight_);
-      trans_weight_ = nullptr;
-    }
-  }
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias), output_unit_(out_unit) {}
+  ~ConvolutionWinogradFP16CPUKernel() override {}
 
   int Init() override;
   int ReSize() override;
   int Run() override;
   int Eval() override;
   int RunImpl(int task_id);
-  int InitWeightBias();
   int InitTmpBuffer();
   int ConfigInputOutput();
   int WinogradFilterTransformFp16(const float16_t *weight_data, float *matrix_g, float *matrix_gt, int oc_block);
   int AdjustNumberOfThread();
 
  private:
+  int MallocWeightBiasData() override;
+  void PackWeight() override;
   void FreeTmpBuffer() {
     if (trans_input_ != nullptr) {
       ctx_->allocator->Free(trans_input_);
@@ -76,13 +69,12 @@ class ConvolutionWinogradFP16CPUKernel : public ConvolutionBaseCPUKernel {
   int kernel_unit_ = 0;
   int input_unit_ = 0;
   int output_unit_;
-  void *origin_weight_;  // do not free
-  void *origin_bias_;    // do not free
   float16_t *tmp_data_ = nullptr;
   float16_t *trans_input_ = nullptr;
   float16_t *gemm_out_ = nullptr;
-  float16_t *trans_weight_ = nullptr;
   float16_t *col_buffer_ = nullptr;
+  float matrix_g_[64];
+  float matrix_gt_[64];
   TmpBufferAddressFp16 tmp_buffer_address_list_[4];
   InputTransFp16Func in_func_ = nullptr;
   OutputTransFp16Func out_func_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.cc
index 7cce484401a..8193a2e667b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.cc
@@ -24,6 +24,8 @@ using mindspore::schema::PrimitiveType_Crop;
 
 namespace mindspore::kernel {
 int CropFp16CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -48,7 +50,8 @@ static int CropFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scal
 int CropFp16CPUKernel::Run() {
   auto input_tensor = in_tensors_.at(0);
   auto output_tensor = out_tensors_.at(0);
-
+  MS_ASSERT(input_tensor != nullptr);
+  MS_ASSERT(output_tensor != nullptr);
   input_ptr_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
   output_ptr_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
index 79459ad8b74..445003fdf6b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc
@@ -27,10 +27,6 @@ DeconvolutionDepthwiseFp16CPUKernel::~DeconvolutionDepthwiseFp16CPUKernel() {
     delete sliding_;
     sliding_ = nullptr;
   }
-  if (packed_weight_ != nullptr) {
-    free(packed_weight_);
-    packed_weight_ = nullptr;
-  }
 }
 
 int DeconvolutionDepthwiseFp16CPUKernel::InitSlideParam() {
@@ -69,48 +65,47 @@ int DeconvolutionDepthwiseFp16CPUKernel::InitPackedInputOutput() {
   return RET_OK;
 }
 
-int DeconvolutionDepthwiseFp16CPUKernel::InitWeightBias() {
-  // init weight: o, h, w, i; o == group, i == 1
+int DeconvolutionDepthwiseFp16CPUKernel::MallocWeightBiasData() {
   auto weight_tensor = in_tensors_.at(kWeightIndex);
   int OC8 = UP_DIV(weight_tensor->Batch(), C8NUM);
-  auto origin_weight = reinterpret_cast<float16_t *>(weight_tensor->data_c());
-  MS_ASSERT(origin_weight != nullptr);
   int pack_weight_size = C8NUM * OC8 * weight_tensor->Height() * weight_tensor->Width();
 
-  packed_weight_ = reinterpret_cast<float16_t *>(malloc(pack_weight_size * sizeof(float16_t)));
+  packed_weight_ = malloc(pack_weight_size * sizeof(float16_t));
   if (packed_weight_ == nullptr) {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     return RET_ERROR;
   }
-  PackNCHWFp16ToNC8HW8Fp16(origin_weight, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(),
-                           weight_tensor->Batch());
 
-  bias_data_ = reinterpret_cast<float16_t *>(malloc(C8NUM * OC8 * sizeof(float16_t)));
+  bias_data_ = malloc(C8NUM * OC8 * sizeof(float16_t));
   if (bias_data_ == nullptr) {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     return RET_ERROR;
   }
   memset(bias_data_, 0, C8NUM * OC8 * sizeof(float16_t));
-  if (in_tensors_.size() == kInputSize2) {
-    auto bias_tensor = in_tensors_.at(kBiasIndex);
-    auto ori_bias = reinterpret_cast<float16_t *>(bias_tensor->data_c());
-    memcpy(bias_data_, ori_bias, bias_tensor->Size());
-  }
-
   conv_param_->thread_num_ = MSMIN(thread_count_, OC8);
   return RET_OK;
 }
 
+void DeconvolutionDepthwiseFp16CPUKernel::PackWeight() {
+  auto weight_tensor = in_tensors_.at(kWeightIndex);
+  void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_;
+  MS_ASSERT(origin_weight != nullptr);
+  PackNCHWFp16ToNC8HW8Fp16(reinterpret_cast<float16_t *>(origin_weight), reinterpret_cast<float16_t *>(packed_weight_),
+                           1, weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch());
+}
+
 int DeconvolutionDepthwiseFp16CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 2);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   sliding_ = new (std::nothrow) SlidingWindowParam;
   if (sliding_ == nullptr) {
     MS_LOG(ERROR) << "new SlidingWindowParam fail!";
     return RET_ERROR;
   }
 
-  auto ret = InitWeightBias();
+  auto ret = InitConvWeightBias();
   if (ret != 0) {
-    MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitWeightBias failed.";
+    MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitConvWeightBias failed.";
     return RET_ERROR;
   }
   if (!InferShapeDone()) {
@@ -133,8 +128,8 @@ int DeconvolutionDepthwiseFp16CPUKernel::ReSize() {
 }
 
 int DeconvolutionDepthwiseFp16CPUKernel::Execute(int task_id) {
-  DeconvDwC8Fp16(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float16_t *>(bias_data_), conv_param_,
-                 sliding_, task_id);
+  DeconvDwC8Fp16(packed_output_, packed_input_, reinterpret_cast<float16_t *>(packed_weight_),
+                 reinterpret_cast<float16_t *>(bias_data_), conv_param_, sliding_, task_id);
   return RET_OK;
 }
 
@@ -159,6 +154,10 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() {
     FreePackedInputOutput();
     return RET_ERROR;
   }
+  if (RepackWeight() != RET_OK) {
+    MS_LOG(ERROR) << "Repack weight failed.";
+    return RET_ERROR;
+  }
 
   auto input_ptr = reinterpret_cast<float16_t *>(in_tensors_.at(0)->data_c());
   auto output_ptr = reinterpret_cast<float16_t *>(out_tensors_.at(0)->data_c());
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
index 6ccb8a8c02d..757a7bb7e94 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h
@@ -38,7 +38,8 @@ class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   DeconvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                       const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
+                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
   ~DeconvolutionDepthwiseFp16CPUKernel() override;
 
   int Init() override;
@@ -46,14 +47,14 @@ class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel {
   int Run() override;
 
   int InitPackedInputOutput();
-  int InitWeightBias();
   int InitSlideParam();
   int Execute(int task_id);
 
  private:
+  int MallocWeightBiasData() override;
+  void PackWeight() override;
   void FreePackedInputOutput();
   SlidingWindowParam *sliding_ = nullptr;
-  float16_t *packed_weight_ = nullptr;
   float16_t *packed_input_ = nullptr;
   float16_t *packed_output_ = nullptr;
   bool need_align_ = false;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc
index 87093a8605d..c80479b2756 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc
@@ -31,10 +31,6 @@ DeConvolutionFp16CPUKernel::~DeConvolutionFp16CPUKernel() {
     delete matmul_param_;
     matmul_param_ = nullptr;
   }
-  if (pack_weight_ != nullptr) {
-    free(pack_weight_);
-    pack_weight_ = nullptr;
-  }
   return;
 }
 
@@ -52,13 +48,31 @@ int DeConvolutionFp16CPUKernel::ReSize() {
   return RET_OK;
 }
 
-int DeConvolutionFp16CPUKernel::InitWeightBias() {
+void DeConvolutionFp16CPUKernel::PackWeight() {
   auto weight_tensor = in_tensors_.at(kWeightIndex);
   auto input_channel = weight_tensor->Batch();
   auto output_channel = weight_tensor->Channel();
   auto kernel_h = weight_tensor->Height();
   auto kernel_w = weight_tensor->Width();
+  void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_;
+  MS_ASSERT(origin_weight != nullptr);
+  PackNHWCFp16ToC8HWN8Fp16(reinterpret_cast<float16_t *>(origin_weight), reinterpret_cast<float16_t *>(packed_weight_),
+                           input_channel, kernel_w * kernel_h, output_channel);
+}
 
+int DeConvolutionFp16CPUKernel::MallocWeightBiasData() {
+  auto weight_tensor = in_tensors_.at(kWeightIndex);
+  auto input_channel = weight_tensor->Batch();
+  auto output_channel = weight_tensor->Channel();
+  auto kernel_h = weight_tensor->Height();
+  auto kernel_w = weight_tensor->Width();
+  size_t weight_pack_size = input_channel * kernel_w * kernel_h * UP_ROUND(output_channel, C8NUM) * sizeof(float16_t);
+  packed_weight_ = malloc(weight_pack_size);
+  if (packed_weight_ == nullptr) {
+    MS_LOG(ERROR) << "deconv malloc packed_weight_ error!";
+    return RET_ERROR;
+  }
+  memset(packed_weight_, 0, weight_pack_size);
   auto bias_size = UP_ROUND(output_channel, C8NUM) * sizeof(float16_t);
   bias_data_ = malloc(bias_size);
   if (bias_data_ == nullptr) {
@@ -66,33 +80,6 @@ int DeConvolutionFp16CPUKernel::InitWeightBias() {
     return RET_ERROR;
   }
   memset(bias_data_, 0, UP_ROUND(output_channel, C8NUM) * sizeof(float16_t));
-  if (in_tensors_.size() == 3) {
-    if (in_tensors_.at(kBiasIndex)->data_type() != kNumberTypeFloat16) {
-      MS_LOG(ERROR) << "DeConv fp16 only support fp16 weight";
-      return RET_ERROR;
-    }
-    if (in_tensors_.at(kBiasIndex)->shape().size() == 1 &&
-        in_tensors_.at(kBiasIndex)->DimensionSize(0) == output_channel) {
-      memcpy(bias_data_, in_tensors_.at(kBiasIndex)->data_c(), output_channel * sizeof(float16_t));
-    } else {
-      MS_LOG(ERROR) << "unsupported bias shape for deconv!";
-      return RET_ERROR;
-    }
-  }
-
-  size_t weight_pack_size = input_channel * kernel_w * kernel_h * UP_ROUND(output_channel, C8NUM) * sizeof(float16_t);
-  pack_weight_ = reinterpret_cast<float16_t *>(malloc(weight_pack_size));
-  if (pack_weight_ == nullptr) {
-    MS_LOG(ERROR) << "deconv malloc pack_weight_ error!";
-    return RET_ERROR;
-  }
-  memset(pack_weight_, 0, weight_pack_size);
-  if (in_tensors_.at(1)->data_type() != kNumberTypeFloat16) {
-    MS_LOG(ERROR) << "deconv fp16 kernel require fp16 weight";
-    return RET_ERROR;
-  }
-  PackNHWCFp16ToC8HWN8Fp16(reinterpret_cast<float16_t *>(in_tensors_.at(kWeightIndex)->data_c()), pack_weight_,
-                           input_channel, kernel_w * kernel_h, output_channel);
   return RET_OK;
 }
 
@@ -172,7 +159,9 @@ int DeConvolutionFp16CPUKernel::DoDeconv(int task_id) {
   }
 
   auto tmp_buf = tmp_buffer_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->row_16_;
-  MatMulFp16(pack_input_, pack_weight_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_,
+  MatMulFp16(pack_input_,
+             reinterpret_cast<float16_t *>(packed_weight_) +
+               task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_,
              tmp_buf, nullptr, ActType_No, matmul_param_->deep_, matmul_param_->row_, oc * C8NUM * kernel_plane_, 0,
              OutType_C8);
 
@@ -183,14 +172,16 @@ int DeConvolutionFp16CPUKernel::DoDeconv(int task_id) {
 }
 
 int DeConvolutionFp16CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 2);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   matmul_param_ = new (std::nothrow) MatMulParameter();
   if (matmul_param_ == nullptr) {
     MS_LOG(ERROR) << "Memory allocation failed";
     return RET_ERROR;
   }
-  int ret = InitWeightBias();
+  int ret = InitConvWeightBias();
   if (ret != RET_OK) {
-    MS_LOG(ERROR) << "deconv InitWeightBias error!";
+    MS_LOG(ERROR) << "deconv InitConvWeightBias error!";
     return ret;
   }
   if (!InferShapeDone()) {
@@ -200,6 +191,10 @@ int DeConvolutionFp16CPUKernel::Init() {
 }
 
 int DeConvolutionFp16CPUKernel::Run() {
+  if (RepackWeight() != RET_OK) {
+    MS_LOG(ERROR) << "Repack weight failed.";
+    return RET_ERROR;
+  }
   auto input_ptr = reinterpret_cast<float16_t *>(in_tensors_.at(0)->data_c());
   auto output_ptr = reinterpret_cast<float16_t *>(out_tensors_.at(0)->data_c());
   MS_ASSERT(input_ptr != nullptr);
@@ -225,6 +220,8 @@ int DeConvolutionFp16CPUKernel::Run() {
     error_code = ParallelLaunch(this->ms_context_, DeConvFp16Run, this, thread_count_);
     if (error_code != RET_OK) {
       MS_LOG(ERROR) << "deconv fp16 run error! error_code[" << error_code << "]";
+      FreeRunBuf();
+      return error_code;
     }
   }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h
index da0330a295b..21f286b2998 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h
@@ -28,7 +28,8 @@ class DeConvolutionFp16CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   DeConvolutionFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                              const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
+                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
   ~DeConvolutionFp16CPUKernel() override;
   int Init() override;
   int Run() override;
@@ -41,7 +42,8 @@ class DeConvolutionFp16CPUKernel : public ConvolutionBaseCPUKernel {
   int InitRunBuf();
   void FreeRunBuf();
   int InitParam();
-  int InitWeightBias();
+  int MallocWeightBiasData() override;
+  void PackWeight() override;
 
  private:
   MatMulParameter *matmul_param_;
@@ -51,7 +53,6 @@ class DeConvolutionFp16CPUKernel : public ConvolutionBaseCPUKernel {
   int thread_count_;
   int thread_stride_;
   float16_t *pack_input_ = nullptr;
-  float16_t *pack_weight_ = nullptr;
   float16_t *pack_output_ = nullptr;
   float16_t *tmp_buffer_ = nullptr;
   float16_t *batch_input_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc
index 13fc716af11..d4e1bb73ce0 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc
@@ -237,7 +237,13 @@ int DeConvWgPostFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_sca
 
 int DeConvWinogradFp16CPUKernel::InitComputeParam() {
   auto weight_tensor = in_tensors_.at(1);
-
+  auto shape = weight_tensor->shape();
+  if (std::find(shape.begin(), shape.end(), -1) != shape.end()) {
+    MS_LOG(WARNING) << "The shape of weight tensor is invalid.";
+    valid_weight_shape_ = false;
+    return RET_OK;
+  }
+  valid_weight_shape_ = true;
   conv_param_->input_channel_ = weight_tensor->Batch();
   conv_param_->output_channel_ = weight_tensor->Channel();
   conv_param_->kernel_w_ = weight_tensor->Width();
@@ -318,7 +324,11 @@ int DeConvWinogradFp16CPUKernel::InitDataParam() {
   /* unit data : weight & winograd data */
   auto weight_tensor = in_tensors_.at(kWeightIndex);
   auto origin_weight = reinterpret_cast<float16_t *>(weight_tensor->data_c());
-  MS_ASSERT(origin_weight != nullptr);
+  if (origin_weight == nullptr) {
+    MS_LOG(WARNING) << "The weight data is nullptr, will init data parameter in runtime.";
+    is_repack_ = true;
+    return RET_OK;
+  }
   for (int i = 0; i < deconv_param_->compute_size_; i++) {
     DeConvComputeUnit *unit = &deconv_param_->compute_units_[i];
     auto ret = PackDeConvWgDataFp16(origin_weight, unit, conv_param_, deconv_param_);
@@ -349,6 +359,19 @@ int DeConvWinogradFp16CPUKernel::ReSize() {
     MS_LOG(ERROR) << "ConvolutionBaseCPUKernel init failed!";
     return ret;
   }
+  if (!valid_weight_shape_) {
+    if (InitComputeParam() != RET_OK) {
+      MS_LOG(ERROR) << "InitComputeParam error!";
+      return RET_ERROR;
+    } else if (!valid_weight_shape_) {
+      return RET_OK;
+    }
+    if (InitDataParam() != RET_OK) {
+      MS_LOG(ERROR) << "InitDataParam error!";
+      return RET_ERROR;
+    }
+  }
+
   ret = InitParameter();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "InitParameter failed!";
@@ -358,6 +381,8 @@ int DeConvWinogradFp16CPUKernel::ReSize() {
 }
 
 int DeConvWinogradFp16CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 2);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   deconv_param_ = new (std::nothrow) DeConvParam();
   if (deconv_param_ == nullptr) {
     MS_LOG(ERROR) << "Memory allocation failed";
@@ -368,16 +393,14 @@ int DeConvWinogradFp16CPUKernel::Init() {
     wg.dest_buffer_ = nullptr;
     wg.middle_buffer_ = nullptr;
   }
-  int error_code = InitComputeParam();
-  if (error_code != RET_OK) {
-    MS_LOG(ERROR) << "InitComputeParam error! ret: " << error_code;
-    return error_code;
-  }
 
-  error_code = InitDataParam();
-  if (error_code != RET_OK) {
-    MS_LOG(ERROR) << "InitWeightBias error! ret: " << error_code;
-    return error_code;
+  if (InitComputeParam() != RET_OK) {
+    MS_LOG(ERROR) << "InitDataParam error!";
+    return RET_ERROR;
+  }
+  if (valid_weight_shape_ && InitDataParam() != RET_OK) {
+    MS_LOG(ERROR) << "InitDataParam error!";
+    return RET_ERROR;
   }
 
   if (!InferShapeDone()) {
@@ -397,6 +420,21 @@ int DeConvWinogradFp16CPUKernel::Run() {
     return RET_ERROR;
   }
 
+  if (!valid_weight_shape_) {
+    if (InitComputeParam() != RET_OK) {
+      MS_LOG(ERROR) << "InitDataParam error!";
+      return RET_ERROR;
+    }
+    if (!valid_weight_shape_ || InitParameter() != RET_OK) {
+      MS_LOG(ERROR) << "InitDataParam error!";
+      return RET_ERROR;
+    }
+  }
+  if (IsRepack() && InitDataParam() != RET_OK) {
+    MS_LOG(ERROR) << "InitDataParam error!";
+    return RET_ERROR;
+  }
+
   for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) {
     nhwc_input_ = input_ptr + batch_index * deconv_param_->input_plane_ * conv_param_->input_channel_;
     nhwc_output_ = output_ptr + batch_index * deconv_param_->output_plane_ * conv_param_->output_channel_;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.h
index b558c2312a8..c83ee09d84f 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.h
@@ -29,7 +29,8 @@ class DeConvWinogradFp16CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   DeConvWinogradFp16CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                               const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
+                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
   ~DeConvWinogradFp16CPUKernel() override;
   int Init() override;
   int Run() override;
@@ -56,6 +57,7 @@ class DeConvWinogradFp16CPUKernel : public ConvolutionBaseCPUKernel {
   float16_t *tile_output_ = nullptr;
   int thread_num_hw_ = 0;
   int thread_stride_hw_ = 0;
+  bool valid_weight_shape_ = true;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_DECONVOLUTION_WINOGRAD_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/fullconnection_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/fullconnection_fp16.cc
index 47da33433ef..4cfa3edd456 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/fullconnection_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/fullconnection_fp16.cc
@@ -18,6 +18,7 @@
 #include "src/kernel_registry.h"
 
 using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_FullConnection;
 
@@ -41,6 +42,8 @@ int FullconnectionFP16CPUKernel::ReSize() {
 }
 
 int FullconnectionFP16CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 2);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
 #ifdef ENABLE_ARM64
   row_tile_ = C16NUM;
 #else
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc
index 9e544cd6e30..6f4b7232782 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc
@@ -86,6 +86,11 @@ int FusedBatchnormFp16CPUKernel::DoExecute(int task_id) {
       ms_context_->allocator->Free(output_fp16);
       return RET_ERROR;
     }
+    MS_ASSERT(input->data_c() != nullptr);
+    MS_ASSERT(scale->data_c() != nullptr);
+    MS_ASSERT(offset->data_c() != nullptr);
+    MS_ASSERT(mean->data_c() != nullptr);
+    MS_ASSERT(variance->data_c() != nullptr);
     Float32ToFloat16(reinterpret_cast<float *>(input->data_c()), reinterpret_cast<float16_t *>(input_fp16),
                      input->ElementsNum());
     Float32ToFloat16(reinterpret_cast<float *>(scale->data_c()), reinterpret_cast<float16_t *>(scale_fp16),
@@ -116,7 +121,8 @@ int FusedBatchnormFp16CPUKernel::DoExecute(int task_id) {
     ms_context_->allocator->Free(output_fp16);
     return RET_OK;
   }
-
+  MS_ASSERT(in_tensors_.at(0)->data_c() != nullptr);
+  MS_ASSERT(out_tensors_.at(0)->data_c() != nullptr);
   if (IsTrain() && IsTrainable() && in_tensors_.size() >= kMaxInIdx) {
     CalcMeanVar(static_cast<float16_t *>(in_tensors_.at(0)->data_c()),
                 static_cast<float16_t *>(in_tensors_.at(kInScaleIdx)->data_c()),
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.cc
index e9cbb9d2dd5..f88969604d3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.cc
@@ -40,13 +40,17 @@ GatherFp16CPUKernel::~GatherFp16CPUKernel() {
 }
 
 int GatherFp16CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 3);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto input_tensor = in_tensors_.at(0);
+  MS_ASSERT(input_tensor != nullptr);
   if (input_tensor->data_type() == kNumberTypeFloat32 && input_tensor->data_c() != nullptr) {
     const_input_ = true;
     input_data_ =
       reinterpret_cast<float16_t *>(ms_context_->allocator->Malloc(input_tensor->ElementsNum() * sizeof(float16_t)));
     Float32ToFloat16(reinterpret_cast<float *>(input_tensor->data_c()), input_data_, input_tensor->ElementsNum());
   }
+  MS_ASSERT(in_tensors_.at(kSecondInput)->data_c() != nullptr);
   (reinterpret_cast<GatherParameter *>(op_parameter_))->axis_ =
     *(reinterpret_cast<int *>(in_tensors_.at(kSecondInput)->data_c()));
   if (!InferShapeDone()) {
@@ -57,35 +61,6 @@ int GatherFp16CPUKernel::Init() {
 
 int GatherFp16CPUKernel::ReSize() { return RET_OK; }
 
-int GatherFp16CPUKernel::PreProcess() {
-  if (!InferShapeDone()) {
-    auto ret = lite::KernelInferShape(in_tensors_, out_tensors_, op_parameter_);
-    if (ret != 0) {
-      MS_LOG(ERROR) << "InferShape fail!";
-      return ret;
-    }
-    ret = ReSize();
-    if (ret != 0) {
-      MS_LOG(ERROR) << "ReSize fail!ret: " << ret;
-      return ret;
-    }
-    out_tensors_[0]->set_data_type(kNumberTypeFloat16);
-  }
-  for (auto *output : out_tensors_) {
-    MS_ASSERT(output != nullptr);
-    auto ret = output->MallocData();
-    if (output->ElementsNum() >= MAX_MALLOC_SIZE / static_cast<int>(sizeof(int64_t))) {
-      MS_LOG(ERROR) << "The size of output tensor is too big";
-      return RET_ERROR;
-    }
-    if (ret != RET_OK) {
-      MS_LOG(ERROR) << "gather out tensor malloc data failed.";
-      return ret;
-    }
-  }
-  return RET_OK;
-}
-
 int GatherFp16CPUKernel::DoGather(int task_id) {
   auto input_tensor = in_tensors_.at(0);
   auto indices_tensor = in_tensors_.at(1);
@@ -118,6 +93,8 @@ int GatherFp16CPUKernel::DoGather(int task_id) {
     return RET_ERROR;
   }
   int8_t *int8_out = reinterpret_cast<int8_t *>(out_tensor->data_c());
+  MS_ASSERT(int8_in != nullptr);
+  MS_ASSERT(int8_out != nullptr);
   int data_size = lite::DataTypeSize(kNumberTypeFloat16);
   int8_in += thread_stride * limit * inner_size * data_size;
   int8_out += thread_stride * indices_element_size * inner_size * data_size;
@@ -156,6 +133,7 @@ int GatherFp16CPUKernel::Run() {
   }
   if (!const_input_) {
     auto input_tensor = in_tensors_.at(0);
+    MS_ASSERT(input_tensor->data_c() != nullptr);
     if (input_tensor->data_type() == kNumberTypeFloat32) {
       input_data_ =
         reinterpret_cast<float16_t *>(ms_context_->allocator->Malloc(input_tensor->ElementsNum() * sizeof(float16_t)));
@@ -176,6 +154,7 @@ int GatherFp16CPUKernel::Run() {
 }
 
 int GatherFp16CPUKernel::AssignIndicesData(bool isIndicesInt32, int indices_num, lite::Tensor *indices_tensor) {
+  MS_ASSERT(indices_tensor->data_c() != nullptr);
   if (!isIndicesInt32) {
     if (indices_num >= std::numeric_limits<int>::max() / static_cast<int>(sizeof(int))) {
       MS_LOG(ERROR) << "Input indices_num is invalid, indices_num: " << indices_num;
@@ -188,18 +167,20 @@ int GatherFp16CPUKernel::AssignIndicesData(bool isIndicesInt32, int indices_num,
     }
     if (indices_tensor->data_type() == kNumberTypeInt64) {
       for (int i = 0; i < indices_num; i++) {
-        indices_data_[i] = reinterpret_cast<int64_t *>(indices_tensor->MutableData())[i];
+        indices_data_[i] = reinterpret_cast<int64_t *>(indices_tensor->data_c())[i];
       }
     } else if (indices_tensor->data_type() == kNumberTypeFloat16) {
       for (int i = 0; i < indices_num; i++) {
-        indices_data_[i] = reinterpret_cast<float16_t *>(indices_tensor->MutableData())[i];
+        indices_data_[i] = reinterpret_cast<float16_t *>(indices_tensor->data_c())[i];
       }
     } else {
       MS_LOG(ERROR) << "The data type of indices tensor is wrong";
+      ms_context_->allocator->Free(indices_data_);
+      indices_data_ = nullptr;
       return RET_ERROR;
     }
   } else {
-    indices_data_ = reinterpret_cast<int32_t *>(indices_tensor->MutableData());
+    indices_data_ = reinterpret_cast<int32_t *>(indices_tensor->data_c());
   }
   return RET_OK;
 }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.h
index a1bb9b22e2b..39167c747fd 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.h
@@ -34,7 +34,6 @@ class GatherFp16CPUKernel : public InnerKernel {
 
   int Init() override;
   int ReSize() override;
-  int PreProcess() override;
   int Run() override;
   int DoGather(int task_id);
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/group_convolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/group_convolution_fp16.cc
index 80c3751f1b0..9a968988640 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/group_convolution_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/group_convolution_fp16.cc
@@ -83,6 +83,8 @@ int GroupConvolutionFP16CPUKernel::PostConcat(int group_id) {
 }
 
 int GroupConvolutionFP16CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (group_conv_creator_ == nullptr) {
     return lite::RET_ERROR;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/gru_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/gru_fp16.cc
index 9aa8e26a7d0..7be43799813 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/gru_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/gru_fp16.cc
@@ -89,6 +89,7 @@ int GruFp16CPUKernel::InitInputWeightBias() {
   // result -- row: seq_len * batch; col: hidden_size
   auto weight_g = in_tensors_.at(1);
   MS_ASSERT(weight_g != nullptr);
+  MS_ASSERT(weight_g->data_c() != nullptr);
   weight_g_ptr_ = reinterpret_cast<float16_t *>(
     malloc(weight_batch_ * gru_param_->input_col_align_ * gru_param_->input_size_ * sizeof(float16_t)));
   if (weight_g_ptr_ == nullptr) {
@@ -109,6 +110,7 @@ int GruFp16CPUKernel::InitInputWeightBias() {
   // input bias
   auto bias = in_tensors_.at(3);
   MS_ASSERT(bias != nullptr);
+  MS_ASSERT(bias->data_c() != nullptr);
   input_bias_ = reinterpret_cast<float16_t *>(malloc(weight_batch_ * gru_param_->input_col_align_ * sizeof(float16_t)));
   if (input_bias_ == nullptr) {
     MS_LOG(ERROR) << "GruFp16CPUKernel malloc input_bias_ error.";
@@ -135,6 +137,7 @@ int GruFp16CPUKernel::InitStateWeightBias() {
   // result -- row: batch; col: hidden_size
   auto weight_r = in_tensors_.at(2);
   MS_ASSERT(weight_r != nullptr);
+  MS_ASSERT(weight_r->data_c() != nullptr);
   weight_r_ptr_ = reinterpret_cast<float16_t *>(
     malloc(weight_batch_ * gru_param_->state_col_align_ * gru_param_->hidden_size_ * sizeof(float16_t)));
   if (weight_r_ptr_ == nullptr) {
@@ -167,6 +170,7 @@ int GruFp16CPUKernel::InitStateWeightBias() {
   // state bias
   auto bias = in_tensors_.at(3);
   MS_ASSERT(bias != nullptr);
+  MS_ASSERT(bias->data_c() != nullptr);
   state_bias_ = reinterpret_cast<float16_t *>(malloc(weight_batch_ * gru_param_->state_col_align_ * sizeof(float16_t)));
   if (state_bias_ == nullptr) {
     MS_LOG(ERROR) << "GruFp16CPUKernel malloc state_bias_ error.";
@@ -189,6 +193,8 @@ int GruFp16CPUKernel::InitStateWeightBias() {
 }
 
 int GruFp16CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 5);
+  CHECK_LESS_RETURN(out_tensors_.size(), 2);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -267,10 +273,14 @@ int GruFp16CPUKernel::Run() {
   auto output_ptr = reinterpret_cast<float16_t *>(output->data_c());
   MS_ASSERT(output_ptr);
   auto output_hidden_state = out_tensors_[1];
+  MS_ASSERT(output_hidden_state->data_c() != nullptr);
+  MS_ASSERT(hidden_state->data_c() != nullptr);
   memcpy(output_hidden_state->data_c(), hidden_state->data_c(), hidden_state->ElementsNum() * sizeof(float16_t));
   int check_seq_len = gru_param_->seq_len_;
   if (in_tensors_.size() == 6) {
-    auto seq_len = reinterpret_cast<int *>(in_tensors_.at(5)->data_c());
+    MS_ASSERT(in_tensors_.at(5) != nullptr);
+    int *seq_len = reinterpret_cast<int *>(in_tensors_.at(5)->data_c());
+    MS_ASSERT(seq_len != nullptr);
     if (!std::equal(seq_len + 1, seq_len + gru_param_->batch_, seq_len)) {
       MS_LOG(ERROR) << "different batch seq_len is currently not supported";
       return RET_ERROR;
@@ -281,6 +291,7 @@ int GruFp16CPUKernel::Run() {
   auto ret = MallocRunBuffer();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "GruFp16CPUKernel MallocRunBuffer error.";
+    FreeRunBuffer();
     return RET_ERROR;
   }
   MS_ASSERT(weight_g_ptr_ != nullptr);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/instance_norm_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/instance_norm_fp16.cc
index 9af3129b128..ad4bd8870cc 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/instance_norm_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/instance_norm_fp16.cc
@@ -43,7 +43,11 @@ void InstanceNormFp16CPUKernel::FreeTmpBuffer() {
 }
 
 int InstanceNormFp16CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 3);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto gamma = in_tensors_[1];
+  MS_ASSERT(gamma != nullptr);
+  MS_ASSERT(gamma->data_c() != nullptr);
   if (gamma->data_type() == kNumberTypeFloat32) {
     gamma_data_ = reinterpret_cast<float16_t *>(malloc(gamma->ElementsNum() * sizeof(float16_t)));
     if (gamma_data_ == nullptr) {
@@ -59,6 +63,8 @@ int InstanceNormFp16CPUKernel::Init() {
   }
 
   auto beta = in_tensors_[2];
+  MS_ASSERT(beta != nullptr);
+  MS_ASSERT(beta->data_c() != nullptr);
   if (beta->data_type() == kNumberTypeFloat32) {
     beta_data_ = reinterpret_cast<float16_t *>(malloc(beta->ElementsNum() * sizeof(float16_t)));
     if (beta_data_ == nullptr) {
@@ -108,6 +114,8 @@ int InstanceNormFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_sca
 int InstanceNormFp16CPUKernel::Run() {
   src_data_ = reinterpret_cast<float16_t *>(in_tensors_[0]->data_c());
   dst_data_ = reinterpret_cast<float16_t *>(out_tensors_[0]->data_c());
+  MS_ASSERT(src_data_ != nullptr);
+  MS_ASSERT(dst_data_ != nullptr);
   auto ret = ParallelLaunch(this->ms_context_, InstanceNormFp16Run, this, op_parameter_->thread_num_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "InstanceNormFp16Run error error_code[" << ret << "]";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/lstm_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/lstm_fp16.cc
index 786765f2914..7ccdb26f8b1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/lstm_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/lstm_fp16.cc
@@ -96,6 +96,7 @@ int LstmFp16CPUKernel::InitInputWeightBias() {
   // result -- row: seq_len * batch; col: hidden_size
   auto weight_i = in_tensors_.at(1);
   MS_ASSERT(weight_i != nullptr);
+  MS_ASSERT(weight_i->data_c() != nullptr);
   weight_i_ptr_ = reinterpret_cast<float16_t *>(
     malloc(weight_batch_ * lstm_param_->input_col_align_ * lstm_param_->input_size_ * sizeof(float16_t)));
   if (weight_i_ptr_ == nullptr) {
@@ -116,6 +117,7 @@ int LstmFp16CPUKernel::InitInputWeightBias() {
   // input bias
   auto bias = in_tensors_.at(3);
   MS_ASSERT(bias != nullptr);
+  MS_ASSERT(bias->data_c() != nullptr);
   input_bias_ =
     reinterpret_cast<float16_t *>(malloc(weight_batch_ * lstm_param_->input_col_align_ * sizeof(float16_t)));
   if (input_bias_ == nullptr) {
@@ -143,6 +145,7 @@ int LstmFp16CPUKernel::InitStateWeightBias() {
   // result -- row: batch; col: hidden_size
   auto weight_h = in_tensors_.at(2);
   MS_ASSERT(weight_h != nullptr);
+  MS_ASSERT(weight_h->data_c() != nullptr);
   weight_h_ptr_ = reinterpret_cast<float16_t *>(
     malloc(weight_batch_ * lstm_param_->state_col_align_ * lstm_param_->hidden_size_ * sizeof(float16_t)));
   if (weight_h_ptr_ == nullptr) {
@@ -175,6 +178,7 @@ int LstmFp16CPUKernel::InitStateWeightBias() {
   // state bias
   auto bias = in_tensors_.at(3);
   MS_ASSERT(bias != nullptr);
+  MS_ASSERT(bias->data_c() != nullptr);
   state_bias_ =
     reinterpret_cast<float16_t *>(malloc(weight_batch_ * lstm_param_->state_col_align_ * sizeof(float16_t)));
   if (state_bias_ == nullptr) {
@@ -198,6 +202,8 @@ int LstmFp16CPUKernel::InitStateWeightBias() {
 }
 
 int LstmFp16CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 6);
+  CHECK_LESS_RETURN(out_tensors_.size(), 3);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -286,23 +292,28 @@ int LstmFp16CPUKernel::Run() {
   MS_ASSERT(input != nullptr);
   auto hidden_state = in_tensors_.at(4);
   MS_ASSERT(hidden_state != nullptr);
+  MS_ASSERT(hidden_state->data_c() != nullptr);
   auto cell_state = in_tensors_.at(5);
   MS_ASSERT(cell_state != nullptr);
+  MS_ASSERT(cell_state->data_c() != nullptr);
   auto output = out_tensors_.at(0);
   MS_ASSERT(output != nullptr);
 
   auto input_ptr = reinterpret_cast<float16_t *>(input->data_c());
-  MS_ASSERT(input_ptr);
+  MS_ASSERT(input_ptr != nullptr);
   auto output_ptr = reinterpret_cast<float16_t *>(output->data_c());
-  MS_ASSERT(output_ptr);
+  MS_ASSERT(output_ptr != nullptr);
   auto output_hidden_state = out_tensors_[1];
+  MS_ASSERT(output_hidden_state->data_c() != nullptr);
   memcpy(output_hidden_state->data_c(), hidden_state->data_c(), hidden_state->ElementsNum() * sizeof(float16_t));
   auto output_cell_state = out_tensors_[2];
+  MS_ASSERT(output_cell_state->data_c());
   memcpy(output_cell_state->data_c(), cell_state->data_c(), cell_state->ElementsNum() * sizeof(float16_t));
 
   auto ret = MallocRunBuffer();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "LstmFp16CPUKernel MallocRunBuffer error.";
+    FreeRunBuffer();
     return RET_ERROR;
   }
   MS_ASSERT(weight_i_ptr_);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_base_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_base_fp16.cc
index 03aa5338824..256c598b0be 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_base_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_base_fp16.cc
@@ -232,11 +232,15 @@ void MatmulBaseFP16CPUKernel::InitMatrixB(void *src_ptr, TypeId src_data_type) {
 }
 
 int MatmulBaseFP16CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 2);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   ResizeParameter();
   if (params_->a_const_ == true) {
     if (RET_OK != InitBufferA()) {
       return RET_ERROR;
     }
+    MS_ASSERT(in_tensors_[0] != nullptr);
+    MS_ASSERT(in_tensors_[0]->data_c() != nullptr);
     InitMatrixA(reinterpret_cast<float *>(in_tensors_[0]->data_c()));
   }
 
@@ -244,6 +248,8 @@ int MatmulBaseFP16CPUKernel::Init() {
     /* copy origin b data, pack in resize
      * pack after a infershape done */
     auto b_tensor = in_tensors_[1];
+    MS_ASSERT(b_tensor != nullptr);
+    MS_ASSERT(b_tensor->data_c() != nullptr);
     src_b_ = reinterpret_cast<float16_t *>(malloc(params_->batch * params_->col_ * params_->deep_ * sizeof(float16_t)));
     if (src_b_ == nullptr) {
       MS_LOG(ERROR) << "Matmul fp16 malloc src_b_ failed";
@@ -302,6 +308,7 @@ int MatmulBaseFP16CPUKernel::Run() {
     if (RET_OK != InitBufferA()) {
       return RET_ERROR;
     }
+    MS_ASSERT(in_tensors_.at(0)->data_c() != nullptr);
     InitMatrixA(in_tensors_.at(0)->data_c());
   }
   if ((params_->b_const_ == false) || IsRepack()) {
@@ -309,6 +316,7 @@ int MatmulBaseFP16CPUKernel::Run() {
       FreeResizeBufA();
       return RET_ERROR;
     }
+    MS_ASSERT(in_tensors_.at(1)->data_c() != nullptr);
     InitMatrixB(in_tensors_.at(1)->data_c(), in_tensors_.at(1)->data_type());
     InitBias();
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.cc
index c3bb2461107..69583ccfd6e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.cc
@@ -19,6 +19,7 @@
 #include "src/kernel_registry.h"
 
 using mindspore::lite::KernelRegistrar;
+using mindspore::lite::RET_ERROR;
 using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_MatMul;
 
@@ -54,6 +55,8 @@ void MatmulFP16CPUKernel::InitBShape() {
 }
 
 int MatmulFP16CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 2);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
 #ifdef ENABLE_ARM64
   row_tile_ = C4NUM;
 #else
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc
index c06b46c0c7c..0a35595eebb 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc
@@ -26,7 +26,8 @@ using mindspore::schema::PrimitiveType_PadFusion;
 
 namespace mindspore::kernel {
 namespace {
-constexpr size_t kPadMaxInputSize = 2;
+constexpr size_t kPadCommonInputSize = 2;
+constexpr size_t kPadMaxInputSize = 3;
 }  // namespace
 int PadFp16CPUKernel::RunImpl(int task_id) {
   PadFp16(input_, output_, in_, out_, pad_param_->paddings_, task_id, op_parameter_->thread_num_);
@@ -53,8 +54,14 @@ int PadFp16CPUKernel::RunMirrorPadImpl(int task_id) {
         for (int b = 0; b < block.size_[1]; b++) {
           int out_b_index = out_a_index + b * block.out_stride_[1];
           for (int c = 0; c < block.size_[2]; ++c) {
-            int output_index = out_b_index + c * block.out_stride_[2];
-            MirrorPadFp16(input_data, output_data, in_, pad_param_, output_index, output_index + block.size_[3]);
+            int out_c_index = out_b_index + c * block.out_stride_[2];
+            for (int d = 0; d < block.size_[3]; ++d) {
+              int out_d_index = out_c_index + d * block.out_stride_[3];
+              for (int e = 0; e < block.size_[4]; ++e) {
+                int output_index = out_d_index + e * block.out_stride_[4];
+                MirrorPadFp16(input_data, output_data, in_, pad_param_, output_index, output_index + block.size_[5]);
+              }
+            }
           }
         }
       }
@@ -84,16 +91,20 @@ int PadFp16CPUKernel::Run() {
   auto output_tensor = out_tensors_.at(0);
   input_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
   output_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
-
+  MS_ASSERT(input_ != nullptr);
+  MS_ASSERT(output_ != nullptr);
   int ret = 0;
   if (pad_param_->pad_mode_ == static_cast<int>(schema::PaddingMode_CONSTANT)) {
-    if (in_tensors_.size() == kPadMaxInputSize) {
+    if (in_tensors_.size() >= kPadCommonInputSize) {
       ret = CopyPaddingFromInput();
       if (ret != RET_OK) {
         MS_LOG(ERROR) << "PadFp16CPUKernel CopyPaddingFromInput failed";
         return RET_ERROR;
       }
     }
+    if (in_tensors_.size() == kPadMaxInputSize) {
+      pad_param_->constant_value_ = reinterpret_cast<float *>(in_tensors_.at(2)->data_c())[0];
+    }
     if (pad_param_->constant_value_ - 0.0f < 1e-5) {
       memset(output_, 0, output_tensor->ElementsNum() * sizeof(float16_t));
     } else {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.cc
index 50c17f0baaf..0ffff245ca8 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.cc
@@ -88,7 +88,8 @@ int PoolingFp16CPUKernel::Run() {
 
   fp16_input_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
   fp16_output_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
-
+  MS_ASSERT(fp16_input_ != nullptr);
+  MS_ASSERT(fp16_output_ != nullptr);
   int error_code = ParallelLaunch(this->ms_context_, PoolingFp16Impl, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/power_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/power_fp16.cc
index 691afade3c9..ae159e6b9b4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/power_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/power_fp16.cc
@@ -27,7 +27,8 @@ using mindspore::schema::PrimitiveType_PowFusion;
 
 namespace mindspore::kernel {
 int PowerFp16CPUKernel::Init() {
-  MS_ASSERT(in_tensors_.size() == 2);
+  CHECK_LESS_RETURN(in_tensors_.size(), 2);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   exp_tensor_ = in_tensors_[1];
   MS_ASSERT(exp_tensor_ != nullptr);
   if (exp_tensor_->IsConst()) {
@@ -50,7 +51,7 @@ int PowerFp16CPUKernel::GetExpData() {
       MS_LOG(ERROR) << "exp_data_ is nullptr";
       return RET_NULL_PTR;
     }
-    auto exp = reinterpret_cast<float *>(exp_tensor_->MutableData());
+    auto exp = reinterpret_cast<float *>(exp_tensor_->data_c());
     if (exp == nullptr) {
       MS_LOG(ERROR) << "exp is nullptr!";
       return RET_NULL_PTR;
@@ -59,7 +60,7 @@ int PowerFp16CPUKernel::GetExpData() {
       exp_data_[i] = (float16_t)(exp[i]);
     }
   } else {
-    exp_data_ = reinterpret_cast<float16_t *>(exp_tensor_->MutableData());
+    exp_data_ = reinterpret_cast<float16_t *>(exp_tensor_->data_c());
     if (exp_data_ == nullptr) {
       MS_LOG(ERROR) << "exp_data_ is nullptr";
       return RET_NULL_PTR;
@@ -95,10 +96,8 @@ int PowerFp16CPUKernel::Run() {
 }
 
 int PowerFp16CPUKernel::RunImpl(int task_id) {
-  auto x_addr = reinterpret_cast<float16_t *>(in_tensors_.at(0)->MutableData());
-  MS_ASSERT(x_addr);
-  auto output_addr = reinterpret_cast<float16_t *>(out_tensors_.at(0)->MutableData());
-  MS_ASSERT(output_addr);
+  auto x_addr = reinterpret_cast<float16_t *>(in_tensors_.at(0)->data_c());
+  auto output_addr = reinterpret_cast<float16_t *>(out_tensors_.at(0)->data_c());
   auto size = in_tensors_.at(0)->ElementsNum();
   int stride = UP_DIV(size, thread_count_);
   int len = MSMIN(stride, size - stride * task_id);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/quant_dtype_cast_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/quant_dtype_cast_fp16.cc
index a912c60e786..1df7d4486ac 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/quant_dtype_cast_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/quant_dtype_cast_fp16.cc
@@ -30,14 +30,8 @@ using mindspore::schema::PrimitiveType_QuantDTypeCast;
 
 namespace mindspore::kernel {
 int QuantDTypeCastFp16CPUKernel::Init() {
-  if (in_tensors_.size() != 1) {
-    MS_LOG(ERROR) << "inputs number should be 1, but " << in_tensors_.size() << " is given.";
-    return RET_PARAM_INVALID;
-  }
-  if (out_tensors_.size() != 1) {
-    MS_LOG(ERROR) << "outputs number should be 1, but " << out_tensors_.size() << " is given.";
-    return RET_PARAM_INVALID;
-  }
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto in_tensor = in_tensors_.front();
   auto out_tensor = out_tensors_.front();
   auto param = reinterpret_cast<QuantDTypeCastParameter *>(op_parameter_);
@@ -102,9 +96,9 @@ int QuantDTypeCastFp16CPUKernel::QuantDTypeCast(int task_id) {
   auto quant_arg = !out_tensors_.front()->quant_params().empty() ? out_tensors_.front()->quant_params().front()
                                                                  : in_tensors_.front()->quant_params().front();
   int ret;
-  MS_ASSERT(float16_ptr_);
+  MS_ASSERT(float16_ptr_ != nullptr);
   if (!is_uint8_) {
-    MS_ASSERT(int8_ptr_);
+    MS_ASSERT(int8_ptr_ != nullptr);
     if (int_to_float_) {
       ret = DoDequantizeInt8ToFp16(int8_ptr_ + thread_offset, float16_ptr_ + thread_offset, quant_arg.scale,
                                    quant_arg.zeroPoint, num_unit_thread);
@@ -114,7 +108,7 @@ int QuantDTypeCastFp16CPUKernel::QuantDTypeCast(int task_id) {
     }
   } else {
     // uint8
-    MS_ASSERT(uint8_ptr_);
+    MS_ASSERT(uint8_ptr_ != nullptr);
     if (int_to_float_) {
       ret = DoDequantizeUInt8ToFp16(uint8_ptr_ + thread_offset, float16_ptr_ + thread_offset, quant_arg.scale,
                                     quant_arg.zeroPoint, num_unit_thread);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc
index 5af2c51d44e..9973a53efab 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc
@@ -63,9 +63,8 @@ int ReduceFp16CPUKernel::Init() {
 }
 
 int ReduceFp16CPUKernel::CallReduceUnit(int task_id) {
-  auto ret =
-    reducer_(outer_size_, inner_size_, axis_size_, fp16_src_data_, fp16_dst_data_, task_id, op_parameter_->thread_num_);
-  return ret;
+  return reducer_(outer_size_, inner_size_, axis_size_, fp16_src_data_, fp16_dst_data_, task_id,
+                  op_parameter_->thread_num_);
 }
 
 static int ReduceFp16Impl(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
@@ -86,7 +85,9 @@ int ReduceFp16CPUKernel::Run() {
   }
 
   auto in_tensor = in_tensors_.at(0);
-  fp16_src_data_ = reinterpret_cast<float16_t *>(in_tensor->MutableData());
+  MS_ASSERT(in_tensor != nullptr);
+  fp16_src_data_ = reinterpret_cast<float16_t *>(in_tensor->data_c());
+  MS_ASSERT(fp16_src_data_ != nullptr);
   for (size_t i = 0; i < data_buffers_.size(); ++i) {
     fp16_dst_data_ = data_buffers_.at(i);
     outer_size_ = outer_sizes_.at(i);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.cc
index 139027072a8..be8d4eb0728 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.cc
@@ -48,6 +48,7 @@ int ScaleFp16CPUKernel::Init() {
     MS_LOG(ERROR) << "inputs to Scale operator should be 2 or 3, but " << in_tensors_.size() << " is given.";
     return RET_ERROR;
   }
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
 
   if (!InferShapeDone()) {
     return RET_OK;
@@ -101,9 +102,12 @@ int ScaleFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 int ScaleFp16CPUKernel::Run() {
   auto input_tensor = in_tensors_.at(0);
   auto output_tensor = out_tensors_.at(0);
-  input_ = reinterpret_cast<float16_t *>(input_tensor->MutableData());
-  output_ = reinterpret_cast<float16_t *>(output_tensor->MutableData());
-
+  MS_ASSERT(input_tensor != nullptr);
+  MS_ASSERT(output_tensor != nullptr);
+  input_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
+  output_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
+  MS_ASSERT(input_ != nullptr);
+  MS_ASSERT(output_ != nullptr);
   auto ret = InitScaleOffset();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Scale fp16 InitScaleOffset failed.";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/softmax_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/softmax_fp16.cc
index 640910814f8..abc10c22e02 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/softmax_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/softmax_fp16.cc
@@ -78,8 +78,8 @@ int SoftmaxFp16CPUKernel::DoSoftmaxLastAxis(int task_id) {
   int end = MSMIN(begin + unit, out_plane_size_);
   int channel = softmax_param_->input_shape_[softmax_param_->axis_];
   int offset = begin * channel;
-  auto input_ptr = reinterpret_cast<float16_t *>(in_tensors_.at(kInputIndex)->MutableData());
-  auto output_ptr = reinterpret_cast<float16_t *>(out_tensors_.at(kOutputIndex)->MutableData());
+  auto input_ptr = reinterpret_cast<float16_t *>(in_tensors_.at(kInputIndex)->data_c());
+  auto output_ptr = reinterpret_cast<float16_t *>(out_tensors_.at(kOutputIndex)->data_c());
   SoftmaxLastAxisFp16(input_ptr + offset, output_ptr + offset, end - begin, channel);
   return RET_OK;
 }
@@ -102,14 +102,14 @@ int SoftmaxFp16CPUKernel::Run() {
     return ret;
   } else {
     auto input_tensor = in_tensors_.at(0);
-    MS_ASSERT(input_tensor);
+    MS_ASSERT(input_tensor != nullptr);
     auto output_tensor = out_tensors_.at(0);
-    MS_ASSERT(output_tensor);
+    MS_ASSERT(output_tensor != nullptr);
     input_fp16_ = reinterpret_cast<float16_t *>(input_tensor->data_c());
-    MS_ASSERT(input_fp16_);
+    MS_ASSERT(input_fp16_ != nullptr);
     output_fp16_ = reinterpret_cast<float16_t *>(output_tensor->data_c());
-    MS_ASSERT(output_fp16_);
-    MS_ASSERT(sum_data_);
+    MS_ASSERT(output_fp16_ != nullptr);
+    MS_ASSERT(sum_data_ != nullptr);
     SoftmaxFp16(input_fp16_, output_fp16_, sum_data_, softmax_param_);
   }
   return RET_OK;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc
index 63505d35e6c..e310e07518c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc
@@ -73,6 +73,8 @@ void StackFp16CPUKernel::FreeBuffer() {
 }
 
 int StackFp16CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   data_type_size_ = sizeof(float16_t);
   if (!InferShapeDone()) {
     return RET_OK;
@@ -114,7 +116,9 @@ int StackFp16CPUKernel::Run() {
   // if output tensor is fp32, we need to transform
   if (malloc_out_) {
     auto out_tensor = out_tensors_.at(0);
-    Float16ToFloat32(out_buffer_, reinterpret_cast<float *>(out_tensor->MutableData()), out_tensor->ElementsNum());
+    MS_ASSERT(out_tensor != nullptr);
+    MS_ASSERT(out_tensor->data_c() != nullptr);
+    Float16ToFloat32(out_buffer_, reinterpret_cast<float *>(out_tensor->data_c()), out_tensor->ElementsNum());
   }
   FreeBuffer();
   return RET_OK;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/bn_fp16_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/bn_fp16_grad.cc
index 87b956be941..d49759c3296 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/bn_fp16_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/bn_fp16_grad.cc
@@ -34,6 +34,16 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_BatchNormGrad;
 
 namespace mindspore::kernel {
+namespace {
+constexpr int kNumInputDim_0 = 0;
+constexpr int kNumInputDim_1 = 1;
+constexpr int kNumInputDim_2 = 2;
+constexpr int kNumInputDim_3 = 3;
+constexpr int kNumInputDim_4 = 4;
+constexpr int kNumInputDim_5 = 4;
+constexpr int kNumOutputDim_2 = 2;
+constexpr int kNumJobs = 4;
+}  // namespace
 int BNGradCPUKernelFp16::ReSize() {
   auto *input_x = in_tensors_.at(1);
   int channels = input_x->shape().at(kNHWC_C);
@@ -52,16 +62,16 @@ int BNGradCPUKernelFp16::Init() {
 }
 
 int BNGradCPUKernelFp16::Execute(int task_id) {
-  auto *input_yt = in_tensors_.at(0);
-  auto *input_x = in_tensors_.at(1);
-  auto *input_scale = in_tensors_.at(2);
-  auto *input_mean = in_tensors_.at(3);
-  auto *input_var = in_tensors_.at(4);
+  auto *input_yt = in_tensors_.at(kNumInputDim_0);
+  auto *input_x = in_tensors_.at(kNumInputDim_1);
+  auto *input_scale = in_tensors_.at(kNumInputDim_2);
+  auto *input_mean = in_tensors_.at(kNumInputDim_3);
+  auto *input_var = in_tensors_.at(kNumInputDim_4);
 
   auto kernel_name = this->name();
   if (kernel_name.find("FusedBatchNormGradCPU") != std::string::npos) {
-    input_mean = in_tensors_.at(4);
-    input_var = in_tensors_.at(5);
+    input_mean = in_tensors_.at(kNumInputDim_4);
+    input_var = in_tensors_.at(kNumInputDim_5);
   }
   auto bn_param = reinterpret_cast<BNGradParameter *>(op_parameter_);
   int stage = stage_;
@@ -71,7 +81,7 @@ int BNGradCPUKernelFp16::Execute(int task_id) {
 
   auto *output_dx = out_tensors_.at(0);
   auto *output_scale = out_tensors_.at(1);
-  auto *output_bias = out_tensors_.at(2);
+  auto *output_bias = out_tensors_.at(kNumOutputDim_2);
   int32_t batch = input_x->Batch();
   int32_t channels = input_x->Channel();
   int32_t spatial = input_x->Height() * input_x->Width();
@@ -91,7 +101,7 @@ int BNGradCPUKernelFp16::Execute(int task_id) {
   count = (count < 0) ? 0 : count;
   switch (stage) {
     case 0: {
-      for (int job = task_id; job < 4; job += thread_num) {
+      for (int job = task_id; job < kNumJobs; job += thread_num) {
         switch (job) {
           case 0:
             var2InvarFp16(save_var, input_var->ElementsNum(), bn_param->epsilon_);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/dropout_fp16_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/dropout_fp16_grad.cc
index d9dca4254d9..9c381dd6011 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/dropout_fp16_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/dropout_fp16_grad.cc
@@ -41,7 +41,6 @@ int DropoutGradCPUKernelFp16::Init() {
     MS_LOG(ERROR) << "unsupported ratio value - Dropout ratio should be between zero to one";
     return RET_ERROR;
   }
-
   if (ratio >= 1.0f) {
     scale_ = 1.0f;
   } else {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/layernorm_fp16_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/layernorm_fp16_grad.cc
index dce310d9fb4..441b4b42d42 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/layernorm_fp16_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/layernorm_fp16_grad.cc
@@ -30,6 +30,16 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_LayerNormGrad;
 
 namespace mindspore::kernel {
+namespace {
+constexpr int kNumInputDim_0 = 0;
+constexpr int kNumInputDim_1 = 1;
+constexpr int kNumInputDim_2 = 2;
+constexpr int kNumInputDim_3 = 3;
+constexpr int kNumInputDim_4 = 4;
+constexpr int kNumOutputDim_0 = 0;
+constexpr int kNumOutputDim_1 = 1;
+constexpr int kNumOutputDim_2 = 2;
+}  // namespace
 int LayerNormGradCPUKernelFp16::ReSize() { return RET_OK; }
 
 int LayerNormGradCPUKernelFp16::Init() {
@@ -63,14 +73,14 @@ int LayerNormGradCPUKernelFp16::Init() {
 }
 
 int LayerNormGradCPUKernelFp16::Execute(int task_id) {
-  auto input_x = in_tensors_.at(0);
-  auto input_dy = in_tensors_.at(1);
-  auto input_var = in_tensors_.at(2);
-  auto input_mean = in_tensors_.at(3);
-  auto input_gamma = in_tensors_.at(4);
-  auto output_dx = out_tensors_.at(0);
-  auto output_dg = out_tensors_.at(1);
-  auto output_db = out_tensors_.at(2);
+  auto input_x = in_tensors_.at(kNumInputDim_0);
+  auto input_dy = in_tensors_.at(kNumInputDim_1);
+  auto input_var = in_tensors_.at(kNumInputDim_2);
+  auto input_mean = in_tensors_.at(kNumInputDim_3);
+  auto input_gamma = in_tensors_.at(kNumInputDim_4);
+  auto output_dx = out_tensors_.at(kNumOutputDim_0);
+  auto output_dg = out_tensors_.at(kNumOutputDim_1);
+  auto output_db = out_tensors_.at(kNumOutputDim_2);
 
   float16_t *x = reinterpret_cast<float16_t *>(input_x->data_c());
   float16_t *dy = reinterpret_cast<float16_t *>(input_dy->data_c());
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/pooling_fp16_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/pooling_fp16_grad.cc
index a4d557d84ad..0f016987be8 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/pooling_fp16_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/pooling_fp16_grad.cc
@@ -29,24 +29,23 @@ using mindspore::schema::PrimitiveType_AvgPoolGrad;
 using mindspore::schema::PrimitiveType_MaxPoolGrad;
 
 namespace mindspore::kernel {
+namespace {
+constexpr int kNumInputDim_2 = 2;
+constexpr int kNumShapeDim_2 = 2;
+}  // namespace
 int PoolingGradCPUKernelFp16::ReSize() {
   PoolingParameter *pool_param = reinterpret_cast<PoolingParameter *>(op_parameter_);
-
   auto in_shape = in_tensors_.at(0)->shape();
   auto out_shape = in_tensors_.at(1)->shape();
-
   if (pool_param->pool_mode_ == PoolMode_AvgPool) {
-    out_shape = in_tensors_.at(2)->shape();
+    out_shape = in_tensors_.at(kNumInputDim_2)->shape();
   }
-
   int input_h = in_shape.at(1);
-  int input_w = in_shape.at(2);
-
+  int input_w = in_shape.at(kNumShapeDim_2);
   if (pool_param->global_) {
     pool_param->window_w_ = input_w;
     pool_param->window_h_ = input_h;
   }
-
   pool_param->input_h_ = in_shape[kNHWC_H];
   pool_param->input_w_ = in_shape[kNHWC_W];
   pool_param->input_batch_ = in_shape[kNHWC_N];
@@ -55,7 +54,6 @@ int PoolingGradCPUKernelFp16::ReSize() {
   pool_param->output_w_ = out_shape[kNHWC_W];
   pool_param->output_batch_ = out_shape[kNHWC_N];
   pool_param->output_channel_ = out_shape[kNHWC_C];
-
   return RET_OK;
 }
 
@@ -73,11 +71,11 @@ int PoolingGradCPUKernelFp16::Execute(int task_id) {
     std::fill(output_ptr + task_id * stride * in_batch_size, output_ptr + ((task_id * stride) + count) * in_batch_size,
               0.f);
     if (pool_param->pool_mode_ == PoolMode_MaxPool) {
-      auto dy_ptr = reinterpret_cast<float16_t *>(in_tensors_.at(2)->data_c());
+      auto dy_ptr = reinterpret_cast<float16_t *>(in_tensors_.at(kNumInputDim_2)->data_c());
       MaxPoolingFp16Grad(input_ptr + task_id * stride * in_batch_size, dy_ptr + task_id * stride * out_batch_size,
                          output_ptr + task_id * stride * in_batch_size, count, pool_param);
     } else {
-      input_ptr = reinterpret_cast<float16_t *>(in_tensors_.at(2)->data_c());
+      input_ptr = reinterpret_cast<float16_t *>(in_tensors_.at(kNumInputDim_2)->data_c());
       AvgPoolingFp16Grad(input_ptr + task_id * stride * out_batch_size, output_ptr + task_id * stride * in_batch_size,
                          count, pool_param);
     }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/resize_fp16_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/resize_fp16_grad.cc
index 74175c9e9b2..6cf30d6820e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/resize_fp16_grad.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/resize_fp16_grad.cc
@@ -46,7 +46,6 @@ int ResizeGradCPUKernelFp16::ReSize() {
   param->out_width_ = static_cast<size_t>(out_tensors_.at(0)->Width());
   param->height_scale_ = ScalingFp16(param->out_height_, param->in_height_, align_corners);
   param->width_scale_ = ScalingFp16(param->out_width_, param->in_width_, align_corners);
-
   return RET_OK;
 }
 
@@ -67,7 +66,6 @@ int ResizeGradCPUKernelFp16::Execute(int task_id) {
   }
   auto batch_size = in_tensors_.at(0)->Batch();
   auto channel = in_tensors_.at(0)->Channel();
-
   if (param->method == static_cast<int>(schema::ResizeMethod_NEAREST)) {
     ResizeNearestNeighborFp16Grad(in_addr, out_addr, batch_size, channel, in_tensors_.at(0)->format(), param);
   } else {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/activation_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/activation_fp32.cc
index 436af3d4bd1..c3e00309c34 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/activation_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/activation_fp32.cc
@@ -34,6 +34,8 @@ using mindspore::schema::PrimitiveType_Activation;
 
 namespace mindspore::kernel {
 int ActivationCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (type_ != schema::ActivationType_RELU && type_ != schema::ActivationType_RELU6 &&
       type_ != schema::ActivationType_LEAKY_RELU && type_ != schema::ActivationType_SIGMOID &&
       type_ != schema::ActivationType_TANH && type_ != schema::ActivationType_HSWISH &&
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.cc
index bc601c4163f..8bfec5f4507 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.cc
@@ -31,6 +31,8 @@ using mindspore::schema::PrimitiveType_AdderFusion;
 
 namespace mindspore::kernel {
 int AdderCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto ret = InitWeightBias();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Init weight bias failed.";
@@ -71,13 +73,13 @@ int AdderCPUKernel::InitWeightBias() {
   int pack_weight_size = oc_block_num * oc_block * in_channel * kernel_plane;
 
   auto origin_weight = reinterpret_cast<float *>(filter_tensor->MutableData());
-  packed_weight_ = reinterpret_cast<float *>(malloc(pack_weight_size * sizeof(float)));
+  packed_weight_ = malloc(pack_weight_size * sizeof(float));
   if (packed_weight_ == nullptr) {
     MS_LOG(ERROR) << "malloc packed weight failed.";
     return RET_ERROR;
   }
   memset(packed_weight_, 0, pack_weight_size * sizeof(float));
-  RowMajor2Col4Major(origin_weight, packed_weight_, out_channel, in_channel * kernel_plane);
+  RowMajor2Col4Major(origin_weight, reinterpret_cast<float *>(packed_weight_), out_channel, in_channel * kernel_plane);
 
   bias_data_ = reinterpret_cast<float *>(malloc(oc_block_num * oc_block * sizeof(float)));
   if (bias_data_ == nullptr) {
@@ -101,8 +103,8 @@ int AdderCPUKernel::RunImpl(int task_id) {
   auto ori_input_data = reinterpret_cast<float *>(input_tensor->MutableData());
   MS_ASSERT(ori_input_data != nullptr);
   auto output_addr = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->MutableData());
-  AdderFp32(ori_input_data, packed_input_, packed_weight_, reinterpret_cast<float *>(bias_data_), col_major_input_,
-            output_addr, task_id, conv_param_);
+  AdderFp32(ori_input_data, packed_input_, reinterpret_cast<float *>(packed_weight_),
+            reinterpret_cast<float *>(bias_data_), col_major_input_, output_addr, task_id, conv_param_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.h
index 57ee60126d9..6966ef3ff69 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.h
@@ -30,7 +30,7 @@ class AdderCPUKernel : public ConvolutionCPUKernel {
       : ConvolutionCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {}
   ~AdderCPUKernel() override = default;
 
-  int InitWeightBias() override;
+  int InitWeightBias();
   int Init() override;
   int ReSize() override;
   int Run() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/addn_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/addn_fp32.cc
index a737ed2e08e..6f389144ebb 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/addn_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/addn_fp32.cc
@@ -37,7 +37,11 @@ int AddNLaunch(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 }
 }  // namespace
 
-int AddNCPUKernel::Init() { return RET_OK; }
+int AddNCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
+  return RET_OK;
+}
 
 int AddNCPUKernel::ReSize() { return RET_OK; }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc
index 104a754dcca..936927c8f05 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc
@@ -25,6 +25,8 @@ using mindspore::schema::PrimitiveType_Eltwise;
 
 namespace mindspore::kernel {
 int ArithmeticCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto primitive_type = param_->op_parameter_.type_;
   if (primitive_type == schema::PrimitiveType_Eltwise) {
     switch (param_->eltwise_mode_) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self_fp32.cc
index 6a0138c4fce..d1ef6c994b7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self_fp32.cc
@@ -60,6 +60,8 @@ ArithmeticSelfBoolFunc ArithmeticSelfCPUKernel::GetArithmeticSelfBoolFun(int pri
 }
 
 int ArithmeticSelfCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space_fp32.cc
index b03b63d9701..361d58eec3a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space_fp32.cc
@@ -45,6 +45,8 @@ int BatchToSpaceCPUKernel::Processinput() {
 }
 
 int BatchToSpaceCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   MS_ASSERT(in_tensors_.at(0)->format() == mindspore::NHWC);
   if (!InferShapeDone()) {
     return RET_OK;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.cc
index 8142d63c91c..c59bab81c6a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.cc
@@ -24,6 +24,8 @@ using mindspore::schema::PrimitiveType_BatchNorm;
 
 namespace mindspore::kernel {
 int BatchnormCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_3D);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/bias_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/bias_fp32.cc
index dbf95716557..36579758f66 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/bias_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/bias_fp32.cc
@@ -47,7 +47,7 @@ int BiasCPUKernel::Run() {
   auto in = reinterpret_cast<float *>(in_tensors_.at(0)->MutableData());
   auto bias = reinterpret_cast<float *>(in_tensors_.at(1)->MutableData());
   auto out = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
-  size_t data_size = in_tensors_.at(0)->ElementsNum();
+  size_t data_size = static_cast<size_t>(in_tensors_.at(0)->ElementsNum());
   MS_ASSERT(ms_context_->allocator != nullptr);
   float *tile_in = reinterpret_cast<float *>(ms_context_->allocator->Malloc(data_size * sizeof(float)));
   float *tile_bias = reinterpret_cast<float *>(ms_context_->allocator->Malloc(data_size * sizeof(float)));
@@ -57,13 +57,15 @@ int BiasCPUKernel::Run() {
     ms_context_->allocator->Free(tile_bias);
     return RET_ERROR;
   }
-  auto ret = BroadcastAdd(in, bias, tile_in, tile_bias, out, data_size, bias_param_);
+  auto ret = BroadcastAdd(in, bias, tile_in, tile_bias, out, static_cast<int>(data_size), bias_param_);
   ms_context_->allocator->Free(tile_in);
   ms_context_->allocator->Free(tile_bias);
   return ret;
 }
 
 int BiasCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.cc
index cc845aff567..d747858aa00 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.cc
@@ -53,6 +53,8 @@ int BroadcastToCPUKernel::ReSize() {
 }
 
 int BroadcastToCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   shape_info_ = reinterpret_cast<BroadcastShapeInfo *>(malloc(sizeof(BroadcastShapeInfo)));
   if (shape_info_ == nullptr) {
     MS_LOG(ERROR) << "Malloc BroadcastShapeInfo failed!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.cc
index d7f5a75e63b..a7dc45c170e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.cc
@@ -36,6 +36,8 @@ int CastRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 }  // namespace
 
 int CastCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc
index a90882da439..401ba4f74c9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc
@@ -26,6 +26,7 @@ using mindspore::schema::PrimitiveType_Concat;
 
 namespace mindspore::kernel {
 int ConcatCPUKernel::Init() {
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.cc
index d39a7bf23b2..7f129a758ed 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.cc
@@ -23,10 +23,6 @@ using mindspore::lite::RET_OK;
 namespace mindspore::kernel {
 Convolution1x1CPUKernel::~Convolution1x1CPUKernel() {
   FreeTmpBuffer();
-  if (weight_ptr_ != nullptr) {
-    free(weight_ptr_);
-    weight_ptr_ = nullptr;
-  }
   if (matmul_param_ != nullptr) {
     delete matmul_param_;
     matmul_param_ = nullptr;
@@ -67,49 +63,6 @@ void Convolution1x1CPUKernel::InitConv1x1MatmulParam() {
   return;
 }
 
-int Convolution1x1CPUKernel::InitConv1x1BiasWeight() {
-  auto filter_tensor = in_tensors_.at(kWeightIndex);
-  auto input_channel = filter_tensor->Channel();
-  if (input_channel < 0) {
-    MS_LOG(ERROR) << "get channel failed from filter_tensor";
-    return RET_ERROR;
-  }
-  auto output_channel = filter_tensor->Batch();
-  if (output_channel < 0) {
-    MS_LOG(ERROR) << "get batch failed from filter_tensor";
-    return RET_ERROR;
-  }
-
-  if (in_tensors_.size() == 3) {
-    int size = UP_ROUND(output_channel, col_tile_) * sizeof(float);
-    int weight_size = output_channel * sizeof(float);
-    bias_data_ = malloc(size);
-    if (bias_data_ == nullptr) {
-      MS_LOG(ERROR) << "Conv1x1 Malloc bias_ptr_ error!";
-      return RET_ERROR;
-    }
-    memcpy(bias_data_, origin_bias_, weight_size);
-    memset(reinterpret_cast<char *>(bias_data_) + weight_size, 0, size - weight_size);
-  }
-
-  int size = input_channel * UP_ROUND(output_channel, col_tile_) * sizeof(float);
-  int down_size = input_channel * DOWN_DIV(output_channel, col_tile_) * col_tile_ * sizeof(float);
-  weight_ptr_ = reinterpret_cast<float *>(malloc(size));
-  if (weight_ptr_ == nullptr) {
-    MS_LOG(ERROR) << "Conv1x1 Malloc weight_ptr_ error!";
-    return RET_ERROR;
-  }
-  memset(reinterpret_cast<char *>(weight_ptr_) + down_size, 0, size - down_size);
-#ifdef ENABLE_AVX
-  RowMajor2Col16Major(origin_weight_, weight_ptr_, output_channel, input_channel);
-#elif defined(ENABLE_ARM32)
-  RowMajor2Col4Major(origin_weight_, weight_ptr_, output_channel, input_channel);
-#else
-  RowMajor2Col8Major(origin_weight_, weight_ptr_, output_channel, input_channel);
-#endif
-  return RET_OK;
-}
-
 int Convolution1x1CPUKernel::InitConv1x1Param() {
   if ((matmul_param_->row_ > (row_tile_ * op_parameter_->thread_num_)) && (matmul_param_->row_ > matmul_param_->col_)) {
     multi_thread_by_hw_ = true;
@@ -144,6 +97,8 @@ int Convolution1x1CPUKernel::InitConv1x1Param() {
 }
 
 int Convolution1x1CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
 #ifdef ENABLE_AVX
   row_tile_ = C6NUM;
   col_tile_ = C16NUM;
@@ -162,7 +117,7 @@ int Convolution1x1CPUKernel::Init() {
     MS_LOG(ERROR) << "Memory allocation failed";
     return RET_ERROR;
   }
-  int error_code = InitConv1x1BiasWeight();
+  int error_code = InitConvWeightBias();
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "Convolution1x1 init weight and bias failed.";
     return error_code;
@@ -187,7 +142,7 @@ int Convolution1x1CPUKernel::DoConv1x1(int task_id) {
     return RET_OK;
   }
   auto bias = (bias_data_ == nullptr) ? nullptr : reinterpret_cast<float *>(bias_data_) + thread_stride_ * task_id;
-  MatMulOpt(pack_input_, weight_ptr_ + task_id * thread_stride_ * matmul_param_->deep_,
+  MatMulOpt(pack_input_, reinterpret_cast<float *>(packed_weight_) + task_id * thread_stride_ * matmul_param_->deep_,
             output_ptr_ + task_id * thread_stride_, bias, matmul_param_->act_type_, matmul_param_->deep_,
             matmul_param_->row_, cur_oc, matmul_param_->col_, OutType_Nhwc);
   return RET_OK;
@@ -218,9 +173,9 @@ int Convolution1x1CPUKernel::DoConv1x1Hw(int task_id) {
   for (int i = 0; i < cur_hw_; i += row_tile_) {
     int cur_rows = (cur_hw_ - i >= row_tile_) ? row_tile_ : (cur_hw_ - i);
     PackMatmulInput(cur_intput, thread_pack_input, cur_rows, matmul_param_->deep_);
-    MatMulOpt(thread_pack_input, weight_ptr_, cur_output, reinterpret_cast<float *>(bias_data_),
-              matmul_param_->act_type_, matmul_param_->deep_, cur_rows, matmul_param_->col_, matmul_param_->col_,
-              OutType_Nhwc);
+    MatMulOpt(thread_pack_input, reinterpret_cast<float *>(packed_weight_), cur_output,
+              reinterpret_cast<float *>(bias_data_), matmul_param_->act_type_, matmul_param_->deep_, cur_rows,
+              matmul_param_->col_, matmul_param_->col_, OutType_Nhwc);
     cur_intput += row_tile_ * matmul_param_->deep_;
     cur_output += row_tile_ * matmul_param_->col_;
   }
@@ -250,8 +205,9 @@ int Convolution1x1CPUKernel::Run() {
     MS_LOG(ERROR) << "Conv1x1 Malloc pack_input_ error!";
     return RET_MEMORY_FAILED;
   }
-  if (IsTrain() && IsTrainable()) {
-    PackWeight();
+  if (RepackWeight() != RET_OK) {
+    MS_LOG(ERROR) << "Repack weight failed.";
+    return RET_ERROR;
   }
 
   for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) {
@@ -292,24 +248,49 @@ void Convolution1x1CPUKernel::PackWeight() {
     return;
   }
   auto output_channel = filter_tensor->Batch();
-  if (input_channel < 0) {
+  if (output_channel < 0) {
     MS_LOG(ERROR) << "get channel failed from filter_tensor.";
     return;
   }
 
-  int size = input_channel * UP_ROUND(output_channel, col_tile_) * sizeof(float);
-  int down_size = input_channel * DOWN_DIV(output_channel, col_tile_) * col_tile_ * sizeof(float);
-  memset(reinterpret_cast<char *>(weight_ptr_) + down_size, 0, size - down_size);
-  MS_ASSERT(filter_tensor->data_c() != nullptr);
+  void *origin_weight = IsTrainable() ? filter_tensor->data_c() : origin_weight_;
+  MS_ASSERT(origin_weight != nullptr);
 #ifdef ENABLE_AVX
-  RowMajor2Col16Major(reinterpret_cast<float *>(filter_tensor->data_c()), weight_ptr_, output_channel, input_channel);
+  RowMajor2Col16Major(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_),
+                      output_channel, input_channel);
 #elif defined(ENABLE_ARM32)
-  RowMajor2Col4Major(reinterpret_cast<float *>(filter_tensor->data_c()), weight_ptr_, output_channel, input_channel);
+  RowMajor2Col4Major(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_),
+                     output_channel, input_channel);
 #else
-  RowMajor2Col8Major(reinterpret_cast<float *>(filter_tensor->data_c()), weight_ptr_, output_channel, input_channel);
+  RowMajor2Col8Major(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_),
+                     output_channel, input_channel);
 #endif
 }
 
+int Convolution1x1CPUKernel::MallocWeightBiasData() {
+  auto filter_tensor = in_tensors_.at(kWeightIndex);
+  auto input_channel = filter_tensor->Channel();
+  auto output_channel = filter_tensor->Batch();
+  int size = input_channel * UP_ROUND(output_channel, col_tile_) * sizeof(float);
+  packed_weight_ = malloc(size);
+  if (packed_weight_ == nullptr) {
+    MS_LOG(ERROR) << "Conv1x1 Malloc packed_weight_ error!";
+    return RET_ERROR;
+  }
+  memset(reinterpret_cast<char *>(packed_weight_), 0, size);
+
+  if (in_tensors_.size() == 3) {
+    size = UP_ROUND(output_channel, col_tile_) * sizeof(float);
+    bias_data_ = malloc(size);
+    if (bias_data_ == nullptr) {
+      MS_LOG(ERROR) << "Conv1x1 Malloc bias_ptr_ error!";
+      return RET_ERROR;
+    }
+    memset(reinterpret_cast<char *>(bias_data_), 0, size);
+  }
+  return RET_OK;
+}
+
 int Convolution1x1CPUKernel::Eval() {
   auto ret = InnerKernel::Eval();
   if (ret != RET_OK) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.h
index 22b054afe4a..19d3d040ec7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.h
@@ -35,9 +35,7 @@ class Convolution1x1CPUKernel : public ConvolutionBaseCPUKernel {
   Convolution1x1CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                           const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                           float *origin_weight, float *origin_bias)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx),
-        origin_weight_(origin_weight),
-        origin_bias_(origin_bias) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias) {}
   ~Convolution1x1CPUKernel();
   int Init() override;
   int Run() override;
@@ -50,11 +48,11 @@ class Convolution1x1CPUKernel : public ConvolutionBaseCPUKernel {
 
  private:
   int InitConv1x1Param();
-  int InitConv1x1BiasWeight();
   void InitConv1x1MatmulParam();
+  int MallocWeightBiasData() override;
+  void PackWeight() override;
   void FreeTmpBuffer();
   void PackMatmulInput(const float *src_ptr, float *dst_ptr, int row, int col) const;
-  void PackWeight();
 
  private:
   MatMulParameter *matmul_param_ = nullptr;
@@ -62,9 +60,6 @@ class Convolution1x1CPUKernel : public ConvolutionBaseCPUKernel {
   bool multi_thread_by_hw_ = false;
   int thread_count_ = 0;
   int thread_stride_ = 0;
-  float *origin_weight_;  // do not free
-  float *origin_bias_;    // do not free
-  float *weight_ptr_ = nullptr;
   float *pack_input_ = nullptr;
   float *input_ptr_ = nullptr;
   float *output_ptr_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc
index 72140e99963..27411fbb226 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc
@@ -73,6 +73,9 @@ int ConvolutionDelegateCPUKernel::GetWeightAndBias() {
 }
 
 int ConvolutionDelegateCPUKernel::GetWeightData() {
+  if (in_tensors_.at(kWeightIndex)->data_c() == nullptr) {
+    return RET_OK;
+  }
   if (InferShapeDone()) {
     origin_weight_ = reinterpret_cast<float *>(in_tensors_.at(kWeightIndex)->data_c());
     MS_ASSERT(origin_weight_ != nullptr);
@@ -107,6 +110,8 @@ int ConvolutionDelegateCPUKernel::GetBiasData() {
 }
 
 int ConvolutionDelegateCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto ret = GetWeightAndBias();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Get weight and bias failed.";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.h
index d41f0896423..b97e2ba0ec4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.h
@@ -40,7 +40,7 @@ class ConvolutionDelegateCPUKernel : public InnerKernel {
   int ReSize() override;
   int Run() override { return conv_kernel_->Run(); }
 
-  void set_in_tensor(lite::Tensor *in_tensor, int index) override {
+  void set_in_tensor(lite::Tensor *in_tensor, size_t index) override {
     MS_ASSERT(index < in_tensors_.size());
     this->in_tensors_[index] = in_tensor;
     if (conv_kernel_ != nullptr) {
@@ -48,7 +48,7 @@ class ConvolutionDelegateCPUKernel : public InnerKernel {
     }
   }
 
-  void set_out_tensor(lite::Tensor *out_tensor, int index) override {
+  void set_out_tensor(lite::Tensor *out_tensor, size_t index) override {
     MS_ASSERT(index < out_tensors_.size());
     this->out_tensors_[index] = out_tensor;
     if (conv_kernel_ != nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.cc
index 6973da5b212..d7e090ba343 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.cc
@@ -24,51 +24,12 @@ using mindspore::lite::RET_MEMORY_FAILED;
 using mindspore::lite::RET_OK;
 
 namespace mindspore::kernel {
-ConvolutionDepthwise3x3CPUKernel::~ConvolutionDepthwise3x3CPUKernel() {
-  if (packed_weight_ != nullptr) {
-    free(packed_weight_);
-    packed_weight_ = nullptr;
-  }
-}
-
-int ConvolutionDepthwise3x3CPUKernel::InitWeightBias() {
-  // init weight: k, h, w, c; k == group == output_channel, c == 1
-  auto weight_tensor = in_tensors_[kWeightIndex];
-  auto origin_weight = reinterpret_cast<float *>(weight_tensor->data_c());
-  int channel = weight_tensor->Batch();
-  int c4 = UP_ROUND(channel, C4NUM);
-  int pack_weight_size = c4 * C12NUM;
-
-  if (packed_weight_ == nullptr) {
-    packed_weight_ = reinterpret_cast<float *>(malloc(pack_weight_size * sizeof(float)));
-    if (packed_weight_ == nullptr) {
-      MS_LOG(ERROR) << "Malloc buffer failed.";
-      return RET_ERROR;
-    }
-  }
-  PackWeightConvDw3x3Fp32(origin_weight, packed_weight_, channel);
-
-  if (bias_data_ == nullptr) {
-    bias_data_ = reinterpret_cast<float *>(malloc(c4 * sizeof(float)));
-    if (bias_data_ == nullptr) {
-      MS_LOG(ERROR) << "Malloc buffer failed.";
-      return RET_ERROR;
-    }
-  }
-  memset(bias_data_, 0, c4 * sizeof(float));
-  if (in_tensors_.size() == kInputSize2) {
-    auto bias_tensor = in_tensors_[kBiasIndex];
-    auto ori_bias = reinterpret_cast<float *>(bias_tensor->data_c());
-    memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(float));
-  }
-
-  return RET_OK;
-}
-
 int ConvolutionDepthwise3x3CPUKernel::Init() {
-  auto ret = InitWeightBias();
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
+  auto ret = InitConvWeightBias();
   if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Convolution depthwise 3x3 fp32 InitWeightBias failed.";
+    MS_LOG(ERROR) << "Convolution depthwise 3x3 fp32 InitConvWeightBias failed.";
     return RET_ERROR;
   }
   if (!InferShapeDone()) {
@@ -98,8 +59,8 @@ int ConvolutionDepthwise3x3CPUKernel::Execute(int task_id) {
   int step_oh = UP_DIV(conv_param_->output_h_, conv_param_->thread_num_);
   int start_oh = step_oh * task_id;
   int end_oh = MSMIN(start_oh + step_oh, conv_param_->output_h_);
-  ConvDw3x3(output_ptr_, buffer, input_ptr_, packed_weight_, reinterpret_cast<float *>(bias_data_), conv_param_,
-            start_oh, end_oh);
+  ConvDw3x3(output_ptr_, buffer, input_ptr_, reinterpret_cast<float *>(packed_weight_),
+            reinterpret_cast<float *>(bias_data_), conv_param_, start_oh, end_oh);
   return RET_OK;
 }
 
@@ -122,13 +83,10 @@ int ConvolutionDepthwise3x3CPUKernel::Run() {
     MS_LOG(ERROR) << "ConvDw3x3Run failed to allocate buffer";
     return RET_MEMORY_FAILED;
   }
-
-  if (IsTrain() && IsTrainable()) {
-    if (InitWeightBias() != RET_OK) {
-      ctx_->allocator->Free(buffer_);
-      MS_LOG(ERROR) << "Convolution depthwise 3x3 run InitWeightBias failed.";
-      return RET_ERROR;
-    }
+  if (RepackWeight() != RET_OK) {
+    MS_LOG(ERROR) << "Repack weight failed.";
+    ctx_->allocator->Free(buffer_);
+    return RET_ERROR;
   }
 
   auto input_tensor = in_tensors_.at(kInputIndex);
@@ -153,12 +111,44 @@ int ConvolutionDepthwise3x3CPUKernel::Eval() {
     return ret;
   }
   if (IsTrainable()) {
-    if (InitWeightBias() != RET_OK) {
+    if (InitConvWeightBias() != RET_OK) {
       MS_LOG(ERROR) << "Convolution depthwise 3x3 fp32 Eval:InitWeightBias failed.";
       return RET_ERROR;
     }
   }
   return RET_OK;
 }
+
+void ConvolutionDepthwise3x3CPUKernel::PackWeight() {
+  auto weight_tensor = in_tensors_.at(kWeightIndex);
+  int channel = weight_tensor->Batch();
+  void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_;
+  MS_ASSERT(origin_weight != nullptr);
+  PackWeightConvDw3x3Fp32(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_), channel);
+}
+
+int ConvolutionDepthwise3x3CPUKernel::MallocWeightBiasData() {
+  auto weight_tensor = in_tensors_.at(kWeightIndex);
+  int channel = weight_tensor->Batch();
+  int c4 = UP_ROUND(channel, C4NUM);
+  if (packed_weight_ == nullptr) {
+    int pack_weight_size = c4 * C12NUM;
+    packed_weight_ = malloc(pack_weight_size * sizeof(float));
+    if (packed_weight_ == nullptr) {
+      MS_LOG(ERROR) << "Malloc buffer failed.";
+      return RET_ERROR;
+    }
+  }
+
+  if (bias_data_ == nullptr) {
+    bias_data_ = malloc(c4 * sizeof(float));
+    if (bias_data_ == nullptr) {
+      MS_LOG(ERROR) << "Malloc buffer failed.";
+      return RET_ERROR;
+    }
+  }
+  memset(bias_data_, 0, c4 * sizeof(float));
+  return RET_OK;
+}
 }  // namespace mindspore::kernel
 #endif
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.h
index 57baad587d4..bbed4403552 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.h
@@ -28,19 +28,20 @@ class ConvolutionDepthwise3x3CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwise3x3CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                    const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
-  ~ConvolutionDepthwise3x3CPUKernel() override;
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
+                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
+  ~ConvolutionDepthwise3x3CPUKernel() override {}
 
   int Init() override;
   int ReSize() override;
   int Run() override;
 
-  int InitWeightBias();
   int Execute(int task_id);
   int Eval() override;
 
  private:
-  float *packed_weight_ = nullptr;
+  int MallocWeightBiasData() override;
+  void PackWeight() override;
   float *input_ptr_ = nullptr;
   float *output_ptr_ = nullptr;
   float *buffer_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.cc
index 5e4ff8f7270..5f3d171a311 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.cc
@@ -22,51 +22,12 @@ using mindspore::lite::RET_INFER_INVALID;
 using mindspore::lite::RET_OK;
 
 namespace mindspore::kernel {
-ConvolutionDepthwiseCPUKernel::~ConvolutionDepthwiseCPUKernel() {
-  if (packed_weight_ != nullptr) {
-    free(packed_weight_);
-    packed_weight_ = nullptr;
-  }
-}
-
-int ConvolutionDepthwiseCPUKernel::InitWeightBias() {
-  // init weight: k, h, w, c; k == group == output_channel, c == 1
-  auto weight_tensor = in_tensors_.at(kWeightIndex);
-  auto origin_weight = reinterpret_cast<float *>(weight_tensor->data_c());
-  MS_ASSERT(origin_weight != nullptr);
-  int channel = weight_tensor->Batch();
-  int pack_weight_size = channel * weight_tensor->Height() * weight_tensor->Width();
-  if (pack_weight_size >= std::numeric_limits<int>::max() / static_cast<int>(sizeof(float))) {
-    MS_LOG(ERROR) << "pack_weight_size is invalid, pack_weight_size: " << pack_weight_size;
-    return RET_ERROR;
-  }
-  packed_weight_ = reinterpret_cast<float *>(malloc(pack_weight_size * sizeof(float)));
-  if (packed_weight_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc buffer failed.";
-    return RET_ERROR;
-  }
-  PackWeightKHWToHWKFp32(origin_weight, packed_weight_, weight_tensor->Height() * weight_tensor->Width(), channel);
-
-  bias_data_ = reinterpret_cast<float *>(malloc(channel * sizeof(float)));
-  if (bias_data_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc buffer failed.";
-    return RET_ERROR;
-  }
-
-  memset(bias_data_, 0, channel * sizeof(float));
-  if (in_tensors_.size() == kInputSize2) {
-    auto bias_tensor = in_tensors_[kBiasIndex];
-    auto ori_bias = reinterpret_cast<float *>(bias_tensor->data_c());
-    memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(float));
-  }
-
-  return RET_OK;
-}
-
 int ConvolutionDepthwiseCPUKernel::Init() {
-  auto ret = InitWeightBias();
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
+  auto ret = InitConvWeightBias();
   if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Convolution depthwise fp32 InitWeightBias failed.";
+    MS_LOG(ERROR) << "Convolution depthwise fp32 InitConvWeightBias failed.";
     return RET_ERROR;
   }
   if (!InferShapeDone()) {
@@ -90,8 +51,8 @@ int ConvolutionDepthwiseCPUKernel::ReSize() {
 }
 
 int ConvolutionDepthwiseCPUKernel::Execute(int task_id) {
-  auto ret =
-    ConvDw(output_ptr_, input_ptr_, packed_weight_, reinterpret_cast<float *>(bias_data_), conv_param_, task_id);
+  auto ret = ConvDw(output_ptr_, input_ptr_, reinterpret_cast<float *>(packed_weight_),
+                    reinterpret_cast<float *>(bias_data_), conv_param_, task_id);
   return ret;
 }
 
@@ -106,8 +67,9 @@ int ConvDwRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 }
 
 int ConvolutionDepthwiseCPUKernel::Run() {
-  if (IsTrain() && IsTrainable()) {
-    PackWeight();
+  if (RepackWeight() != RET_OK) {
+    MS_LOG(ERROR) << "Repack weight failed.";
+    return RET_ERROR;
   }
 
   auto input_tensor = in_tensors_.at(kInputIndex);
@@ -127,11 +89,33 @@ int ConvolutionDepthwiseCPUKernel::Run() {
 
 void ConvolutionDepthwiseCPUKernel::PackWeight() {
   auto weight_tensor = in_tensors_.at(kWeightIndex);
-  auto origin_weight = reinterpret_cast<float *>(weight_tensor->data_c());
+  void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_;
   MS_ASSERT(origin_weight != nullptr);
+  PackWeightKHWToHWKFp32(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_),
+                         weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch());
+}
 
-  PackWeightKHWToHWKFp32(origin_weight, packed_weight_, weight_tensor->Height() * weight_tensor->Width(),
-                         weight_tensor->Batch());
+int ConvolutionDepthwiseCPUKernel::MallocWeightBiasData() {
+  auto weight_tensor = in_tensors_.at(kWeightIndex);
+  int channel = weight_tensor->Batch();
+  int pack_weight_size = weight_tensor->Batch() * weight_tensor->Height() * weight_tensor->Width();
+  if (pack_weight_size >= std::numeric_limits<int>::max() / static_cast<int>(sizeof(float))) {
+    MS_LOG(ERROR) << "pack_weight_size is invalid, pack_weight_size: " << pack_weight_size;
+    return RET_ERROR;
+  }
+  packed_weight_ = malloc(pack_weight_size * sizeof(float));
+  if (packed_weight_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc buffer failed.";
+    return RET_ERROR;
+  }
+
+  bias_data_ = malloc(channel * sizeof(float));
+  if (bias_data_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc buffer failed.";
+    return RET_ERROR;
+  }
+  memset(bias_data_, 0, channel * sizeof(float));
+  return RET_OK;
 }
 
 int ConvolutionDepthwiseCPUKernel::Eval() {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.h
index 652d87eb798..622fe326136 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.h
@@ -28,20 +28,20 @@ class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwiseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                 const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
-  ~ConvolutionDepthwiseCPUKernel() override;
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
+                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
+  ~ConvolutionDepthwiseCPUKernel() override {}
 
   int Init() override;
   int ReSize() override;
   int Run() override;
 
-  int InitWeightBias();
   int Execute(int task_id);
   int Eval() override;
 
  private:
-  void PackWeight();
-  float *packed_weight_ = nullptr;
+  int MallocWeightBiasData() override;
+  void PackWeight() override;
   float *input_ptr_ = nullptr;
   float *output_ptr_ = nullptr;
 };
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.cc
index bbbfb934bec..66ef6c781cb 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.cc
@@ -23,10 +23,6 @@ using mindspore::lite::RET_OK;
 
 namespace mindspore::kernel {
 ConvolutionDepthwiseIndirectCPUKernel::~ConvolutionDepthwiseIndirectCPUKernel() {
-  if (packed_weight_ != nullptr) {
-    free(packed_weight_);
-    packed_weight_ = nullptr;
-  }
   if (zero_ptr_ != nullptr) {
     free(zero_ptr_);
     zero_ptr_ = nullptr;
@@ -37,60 +33,12 @@ ConvolutionDepthwiseIndirectCPUKernel::~ConvolutionDepthwiseIndirectCPUKernel()
   }
 }
 
-int ConvolutionDepthwiseIndirectCPUKernel::InitWeightBias() {
-  // init weight: o, h, w, i; o == group, i == 1
-  auto weight_tensor = in_tensors_[kWeightIndex];
-  auto origin_weight = reinterpret_cast<float *>(weight_tensor->data_c());
-  MS_ASSERT(origin_weight != nullptr);
-#ifdef ENABLE_AVX
-  int div_flag = C8NUM;
-#else
-  int div_flag = C4NUM;
-#endif
-  int batch_flag = UP_DIV(weight_tensor->Batch(), div_flag);
-  int pack_weight_size = div_flag * batch_flag * weight_tensor->Height() * weight_tensor->Width();
-
-  packed_weight_ = reinterpret_cast<float *>(malloc(pack_weight_size * sizeof(float)));
-  if (packed_weight_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc buffer failed.";
-    return RET_ERROR;
-  }
-#ifdef ENABLE_AVX
-  PackDepthwiseIndirectWeightC8Fp32(origin_weight, packed_weight_, weight_tensor->Height(), weight_tensor->Width(),
-                                    weight_tensor->Batch());
-#else
-  PackDepthwiseIndirectWeightC4Fp32(origin_weight, packed_weight_, weight_tensor->Height(), weight_tensor->Width(),
-                                    weight_tensor->Batch());
-#endif
-
-  bias_data_ = reinterpret_cast<float *>(malloc(batch_flag * div_flag * sizeof(float)));
-  if (bias_data_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc buffer failed.";
-    return RET_ERROR;
-  }
-
-  if (in_tensors_.size() == kInputSize2) {
-    auto bias_tensor = in_tensors_[kBiasIndex];
-    auto ori_bias = reinterpret_cast<float *>(bias_tensor->data_c());
-    memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(float));
-  } else {
-    memset(bias_data_, 0, batch_flag * div_flag * sizeof(float));
-  }
-
-  // malloc zero ptr
-  zero_ptr_ = reinterpret_cast<float *>(malloc(batch_flag * div_flag * sizeof(float)));
-  if (zero_ptr_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc buffer failed.";
-    return RET_ERROR;
-  }
-  memset(zero_ptr_, 0, batch_flag * div_flag * sizeof(float));
-  return RET_OK;
-}
-
 int ConvolutionDepthwiseIndirectCPUKernel::Init() {
-  auto ret = InitWeightBias();
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
+  auto ret = InitConvWeightBias();
   if (ret != 0) {
-    MS_LOG(ERROR) << "Convolution depthwise Indirect fp32 InitWeightBias failed.";
+    MS_LOG(ERROR) << "Convolution depthwise Indirect fp32 InitConvWeightBias failed.";
     return RET_ERROR;
   }
   if (!InferShapeDone()) {
@@ -137,8 +85,8 @@ int ConvolutionDepthwiseIndirectCPUKernel::ReSize() {
 }
 
 int ConvolutionDepthwiseIndirectCPUKernel::Execute(int task_id) {
-  ConvDwIndirection(output_ptr_, indirect_buffer_, packed_weight_, reinterpret_cast<float *>(bias_data_), zero_ptr_,
-                    conv_param_, task_id);
+  ConvDwIndirection(output_ptr_, indirect_buffer_, reinterpret_cast<float *>(packed_weight_),
+                    reinterpret_cast<float *>(bias_data_), zero_ptr_, conv_param_, task_id);
   return RET_OK;
 }
 
@@ -193,11 +141,10 @@ int ConvolutionDepthwiseIndirectCPUKernel::Run() {
   } else {
     packed_input_ = input_ptr;
   }
-
-  if (IsTrain() && IsTrainable()) {
-    PackWeight();
+  if (RepackWeight() != RET_OK) {
+    MS_LOG(ERROR) << "Repack weight failed.";
+    return RET_ERROR;
   }
-
   auto output_tensor = out_tensors_.at(kOutputIndex);
   output_ptr_ = reinterpret_cast<float *>(output_tensor->data_c());
   MS_ASSERT(output_ptr_ != nullptr);
@@ -215,18 +162,49 @@ int ConvolutionDepthwiseIndirectCPUKernel::Run() {
 }
 
 void ConvolutionDepthwiseIndirectCPUKernel::PackWeight() {
-  auto weight_tensor = in_tensors_[kWeightIndex];
-  auto origin_weight = reinterpret_cast<float *>(weight_tensor->data_c());
+  auto weight_tensor = in_tensors_.at(kWeightIndex);
+  void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_;
   MS_ASSERT(origin_weight != nullptr);
 #ifdef ENABLE_AVX
-  PackDepthwiseIndirectWeightC8Fp32(origin_weight, packed_weight_, weight_tensor->Height(), weight_tensor->Width(),
-                                    weight_tensor->Batch());
+  PackDepthwiseIndirectWeightC8Fp32(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_),
+                                    weight_tensor->Height(), weight_tensor->Width(), weight_tensor->Batch());
 #else
-  PackDepthwiseIndirectWeightC4Fp32(origin_weight, packed_weight_, weight_tensor->Height(), weight_tensor->Width(),
-                                    weight_tensor->Batch());
+  PackDepthwiseIndirectWeightC4Fp32(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_),
+                                    weight_tensor->Height(), weight_tensor->Width(), weight_tensor->Batch());
 #endif
 }
 
+int ConvolutionDepthwiseIndirectCPUKernel::MallocWeightBiasData() {
+  auto weight_tensor = in_tensors_[kWeightIndex];
+#ifdef ENABLE_AVX
+  int div_flag = C8NUM;
+#else
+  int div_flag = C4NUM;
+#endif
+  int batch_flag = UP_DIV(weight_tensor->Batch(), div_flag);
+  int pack_weight_size = div_flag * batch_flag * weight_tensor->Height() * weight_tensor->Width();
+  packed_weight_ = malloc(pack_weight_size * sizeof(float));
+  if (packed_weight_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc buffer failed.";
+    return RET_ERROR;
+  }
+  bias_data_ = malloc(batch_flag * div_flag * sizeof(float));
+  if (bias_data_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc buffer failed.";
+    return RET_ERROR;
+  }
+  memset(bias_data_, 0, batch_flag * div_flag * sizeof(float));
+
+  // malloc zero ptr
+  zero_ptr_ = reinterpret_cast<float *>(malloc(batch_flag * div_flag * sizeof(float)));
+  if (zero_ptr_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc buffer failed.";
+    return RET_ERROR;
+  }
+  memset(zero_ptr_, 0, batch_flag * div_flag * sizeof(float));
+  return RET_OK;
+}
+
 int ConvolutionDepthwiseIndirectCPUKernel::Eval() {
   auto ret = InnerKernel::Eval();
   if (ret != RET_OK) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.h
index 80820456d10..1f404d5c5c4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.h
@@ -27,26 +27,26 @@ class ConvolutionDepthwiseIndirectCPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwiseIndirectCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                         const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
+                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
   ~ConvolutionDepthwiseIndirectCPUKernel() override;
 
   int Init() override;
   int ReSize() override;
   int Run() override;
 
-  int InitWeightBias();
   int Execute(int task_id);
   int Eval() override;
 
  private:
   int MallocIndirectBuffer();
   int MallocPackedInput();
-  void PackWeight();
+  int MallocWeightBiasData() override;
+  void PackWeight() override;
   int step_w = 0;
   int step_h = 0;
   float **indirect_buffer_ = nullptr;
   float *zero_ptr_ = nullptr;
-  float *packed_weight_ = nullptr;
   float *output_ptr_ = nullptr;
   float *packed_input_ = nullptr;
 };
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc
index 8c4486f2068..b8f0475f921 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc
@@ -27,48 +27,6 @@ ConvolutionDepthwiseSWCPUKernel::~ConvolutionDepthwiseSWCPUKernel() {
     delete sliding_;
     sliding_ = nullptr;
   }
-  if (packed_weight_ != nullptr) {
-    free(packed_weight_);
-    packed_weight_ = nullptr;
-  }
-}
-
-int ConvolutionDepthwiseSWCPUKernel::InitWeightBias() {
-  // init weight: o, h, w, i; o == group, i == 1
-  auto weight_tensor = in_tensors_.at(kWeightIndex);
-  auto origin_weight = reinterpret_cast<float *>(weight_tensor->data_c());
-  MS_ASSERT(origin_weight != nullptr);
-  int OC4 = UP_DIV(weight_tensor->Batch(), C4NUM);
-  int pack_weight_size = C4NUM * OC4 * weight_tensor->Height() * weight_tensor->Width();
-
-  packed_weight_ = reinterpret_cast<float *>(malloc(pack_weight_size * sizeof(float)));
-  if (packed_weight_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc buffer failed.";
-    return RET_ERROR;
-  }
-  PackNCHWToNC4HW4Fp32(origin_weight, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(),
-                       weight_tensor->Batch());
-
-  int malloc_size = MSMAX(conv_param_->output_channel_, C4NUM * OC4);
-  if (malloc_size <= 0) {
-    MS_LOG(ERROR) << "malloc size is wrong";
-    return RET_ERROR;
-  }
-  bias_data_ = reinterpret_cast<float *>(malloc(malloc_size * sizeof(float)));
-  if (bias_data_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc buffer failed.";
-    return RET_ERROR;
-  }
-
-  memset(bias_data_, 0, malloc_size * sizeof(float));
-  if (in_tensors_.size() == kInputSize2) {
-    auto bias_tensor = in_tensors_.at(kBiasIndex);
-    auto ori_bias = reinterpret_cast<float *>(bias_tensor->data_c());
-    memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(float));
-  }
-
-  conv_param_->thread_num_ = MSMIN(thread_count_, OC4);
-  return RET_OK;
 }
 
 int ConvolutionDepthwiseSWCPUKernel::InitPackedInputOutput() {
@@ -94,15 +52,17 @@ int ConvolutionDepthwiseSWCPUKernel::InitPackedInputOutput() {
 }
 
 int ConvolutionDepthwiseSWCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   sliding_ = new (std::nothrow) SlidingWindowParam;
   if (sliding_ == nullptr) {
     MS_LOG(ERROR) << "new sliding window param failed.";
     return RET_ERROR;
   }
 
-  auto ret = InitWeightBias();
+  auto ret = InitConvWeightBias();
   if (ret != 0) {
-    MS_LOG(ERROR) << "Convolution depthwise fp32 InitWeightBias failed.";
+    MS_LOG(ERROR) << "Convolution depthwise fp32 InitConvWeightBias failed.";
     return RET_ERROR;
   }
   if (!InferShapeDone()) {
@@ -127,8 +87,8 @@ int ConvolutionDepthwiseSWCPUKernel::ReSize() {
 }
 
 int ConvolutionDepthwiseSWCPUKernel::Execute(int task_id) {
-  ConvDwSWFp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float *>(bias_data_), conv_param_,
-               sliding_, task_id);
+  ConvDwSWFp32(packed_output_, packed_input_, reinterpret_cast<float *>(packed_weight_),
+               reinterpret_cast<float *>(bias_data_), conv_param_, sliding_, task_id);
   return RET_OK;
 }
 
@@ -149,9 +109,9 @@ int ConvolutionDepthwiseSWCPUKernel::Run() {
     FreePackedInputOutput();
     return RET_ERROR;
   }
-
-  if (IsTrain() && IsTrainable()) {
-    PackWeight();
+  if (RepackWeight() != RET_OK) {
+    MS_LOG(ERROR) << "Repack weight failed.";
+    return RET_ERROR;
   }
 
   auto input_tensor = in_tensors_.at(kInputIndex);
@@ -195,10 +155,34 @@ void ConvolutionDepthwiseSWCPUKernel::FreePackedInputOutput() {
 
 void ConvolutionDepthwiseSWCPUKernel::PackWeight() {
   auto weight_tensor = in_tensors_.at(kWeightIndex);
-  auto origin_weight = reinterpret_cast<float *>(weight_tensor->data_c());
+  void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_;
   MS_ASSERT(origin_weight != nullptr);
-  PackNCHWToNC4HW4Fp32(origin_weight, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(),
-                       weight_tensor->Batch());
+  PackNCHWToNC4HW4Fp32(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_), 1,
+                       weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch());
+}
+
+int ConvolutionDepthwiseSWCPUKernel::MallocWeightBiasData() {
+  auto weight_tensor = in_tensors_.at(kWeightIndex);
+  int OC4 = UP_DIV(weight_tensor->Batch(), C4NUM);
+  int pack_weight_size = C4NUM * OC4 * weight_tensor->Height() * weight_tensor->Width();
+  packed_weight_ = malloc(pack_weight_size * sizeof(float));
+  if (packed_weight_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc buffer failed.";
+    return RET_ERROR;
+  }
+  int malloc_size = MSMAX(conv_param_->output_channel_, C4NUM * OC4);
+  if (malloc_size <= 0) {
+    MS_LOG(ERROR) << "malloc size is wrong";
+    return RET_ERROR;
+  }
+  bias_data_ = malloc(malloc_size * sizeof(float));
+  if (bias_data_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc buffer failed.";
+    return RET_ERROR;
+  }
+  memset(bias_data_, 0, malloc_size * sizeof(float));
+  conv_param_->thread_num_ = MSMIN(thread_count_, OC4);
+  return RET_OK;
 }
 
 int ConvolutionDepthwiseSWCPUKernel::Eval() {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h
index 690096fc113..f5294723bef 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h
@@ -27,23 +27,23 @@ class ConvolutionDepthwiseSWCPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwiseSWCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                   const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
+                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
   ~ConvolutionDepthwiseSWCPUKernel() override;
 
   int Init() override;
   int ReSize() override;
   int Run() override;
 
-  int InitWeightBias();
   int Execute(int task_id);
   int Eval() override;
 
  private:
   int InitPackedInputOutput();
   void FreePackedInputOutput();
-  void PackWeight();
+  int MallocWeightBiasData() override;
+  void PackWeight() override;
   SlidingWindowParam *sliding_ = nullptr;
-  float *packed_weight_ = nullptr;
   float *packed_input_ = nullptr;
   float *packed_output_ = nullptr;
   bool need_align_ = false;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.cc
index 5f79d7cbcf1..2ffecf8d98c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.cc
@@ -28,43 +28,6 @@ ConvolutionDepthwiseSWCPUKernelX86::~ConvolutionDepthwiseSWCPUKernelX86() {
     delete sliding_;
     sliding_ = nullptr;
   }
-  if (packed_weight_ != nullptr) {
-    free(packed_weight_);
-    packed_weight_ = nullptr;
-  }
-  if (packed_bias_ != nullptr) {
-    free(packed_bias_);
-    packed_bias_ = nullptr;
-  }
-}
-
-int ConvolutionDepthwiseSWCPUKernelX86::InitWeightBias() {
-  // init weight: o, h, w, i; o == group, i == 1
-  auto weight_tensor = in_tensors_.at(kWeightIndex);
-  origin_weight_ = reinterpret_cast<float *>(weight_tensor->data_c());
-  MS_ASSERT(origin_weight_ != nullptr);
-  int oc_algin = UP_DIV(weight_tensor->Batch(), oc_tile_);
-  int pack_weight_size = oc_algin * oc_tile_ * weight_tensor->Height() * weight_tensor->Width();
-  packed_weight_ = reinterpret_cast<float *>(malloc(pack_weight_size * sizeof(float)));
-  if (packed_weight_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc packed_weight_ is failed!";
-    return RET_NULL_PTR;
-  }
-  PackNHWCToNXHWCXFp32(weight_tensor->Height(), weight_tensor->Width(), weight_tensor->Batch(), oc_algin,
-                       weight_tensor->Channel(), packed_weight_, origin_weight_);
-  if (in_tensors_.size() == kInputSize2) {
-    auto bias_size = oc_algin * oc_tile_;
-    auto bias_tensor = in_tensors_.at(kBiasIndex);
-    auto ori_bias = reinterpret_cast<float *>(bias_tensor->data_c());
-    packed_bias_ = reinterpret_cast<float *>(malloc(bias_size * sizeof(float)));
-    if (packed_bias_ == nullptr) {
-      MS_LOG(ERROR) << "Malloc bias_data buffer failed.";
-      return RET_NULL_PTR;
-    }
-    memset(packed_bias_, 0, bias_size * sizeof(float));
-    memcpy(packed_bias_, ori_bias, bias_tensor->ElementsNum() * sizeof(float));
-  }
-  return RET_OK;
 }
 
 int ConvolutionDepthwiseSWCPUKernelX86::InitPackedInputOutput() {
@@ -94,6 +57,8 @@ int ConvolutionDepthwiseSWCPUKernelX86::InitPackedInputOutput() {
 }
 
 int ConvolutionDepthwiseSWCPUKernelX86::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
 #ifdef ENABLE_AVX
   oc_tile_ = C8NUM;
 #endif
@@ -103,9 +68,9 @@ int ConvolutionDepthwiseSWCPUKernelX86::Init() {
     return RET_ERROR;
   }
 
-  auto ret = InitWeightBias();
+  auto ret = InitConvWeightBias();
   if (ret != 0) {
-    MS_LOG(ERROR) << "Convolution depthwise fp32 InitWeightBias failed.";
+    MS_LOG(ERROR) << "Convolution depthwise fp32 InitConvWeightBias failed.";
     return RET_ERROR;
   }
   if (!InferShapeDone()) {
@@ -121,8 +86,8 @@ int ConvolutionDepthwiseSWCPUKernelX86::ReSize() {
 }
 
 int ConvolutionDepthwiseSWCPUKernelX86::Execute(int task_id) {
-  DepthwiseSWAvxFp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float *>(packed_bias_),
-                     conv_param_, sliding_, task_id);
+  DepthwiseSWAvxFp32(packed_output_, packed_input_, reinterpret_cast<float *>(packed_weight_),
+                     reinterpret_cast<float *>(bias_data_), conv_param_, sliding_, task_id);
   return RET_OK;
 }
 
@@ -143,11 +108,10 @@ int ConvolutionDepthwiseSWCPUKernelX86::Run() {
     FreePackedInputOutput();
     return RET_ERROR;
   }
-
-  if (IsTrain() && IsTrainable()) {
-    PackWeight();
+  if (RepackWeight() != RET_OK) {
+    MS_LOG(ERROR) << "Repack weight failed.";
+    return RET_ERROR;
   }
-
   auto input_tensor = in_tensors_.at(kInputIndex);
   auto input_ptr = reinterpret_cast<float *>(input_tensor->data_c());
   MS_ASSERT(input_ptr != nullptr);
@@ -194,8 +158,33 @@ void ConvolutionDepthwiseSWCPUKernelX86::FreePackedInputOutput() {
 void ConvolutionDepthwiseSWCPUKernelX86::PackWeight() {
   auto weight_tensor = in_tensors_.at(kWeightIndex);
   int oc_algin = UP_DIV(weight_tensor->Batch(), oc_tile_);
+  void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_;
+  MS_ASSERT(origin_weight != nullptr);
   PackNHWCToNXHWCXFp32(weight_tensor->Height(), weight_tensor->Width(), weight_tensor->Batch(), oc_algin,
-                       weight_tensor->Channel(), packed_weight_, origin_weight_);
+                       weight_tensor->Channel(), reinterpret_cast<float *>(packed_weight_),
+                       reinterpret_cast<float *>(origin_weight));
+}
+
+int ConvolutionDepthwiseSWCPUKernelX86::MallocWeightBiasData() {
+  auto weight_tensor = in_tensors_.at(kWeightIndex);
+  int oc_algin = UP_DIV(weight_tensor->Batch(), oc_tile_);
+  int pack_weight_size = oc_algin * oc_tile_ * weight_tensor->Height() * weight_tensor->Width();
+  packed_weight_ = malloc(pack_weight_size * sizeof(float));
+  if (packed_weight_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc packed_weight_ is failed!";
+    return RET_NULL_PTR;
+  }
+
+  if (in_tensors_.size() == kInputSize2) {
+    auto bias_size = oc_algin * oc_tile_;
+    bias_data_ = malloc(bias_size * sizeof(float));
+    if (bias_data_ == nullptr) {
+      MS_LOG(ERROR) << "Malloc bias_data buffer failed.";
+      return RET_NULL_PTR;
+    }
+    memset(bias_data_, 0, bias_size * sizeof(float));
+  }
+  return RET_OK;
 }
 
 int ConvolutionDepthwiseSWCPUKernelX86::Eval() {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.h
index fe060df82a7..c4bc1ffed67 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.h
@@ -27,28 +27,26 @@ class ConvolutionDepthwiseSWCPUKernelX86 : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwiseSWCPUKernelX86(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                      const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
+                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
   ~ConvolutionDepthwiseSWCPUKernelX86() override;
 
   int Init() override;
   int ReSize() override;
   int Run() override;
 
-  int InitWeightBias();
   int Execute(int task_id);
   int Eval() override;
 
  private:
   void FreePackedInputOutput();
   int InitPackedInputOutput();
-  void PackWeight();
+  int MallocWeightBiasData() override;
+  void PackWeight() override;
   int oc_tile_ = C8NUM;  // in x86 avx
   SlidingWindowParam *sliding_ = nullptr;
-  float *packed_weight_ = nullptr;
-  float *packed_bias_ = nullptr;
   float *packed_input_ = nullptr;
   float *packed_output_ = nullptr;
-  float *origin_weight_ = nullptr;
   bool input_need_align_ = false;
   bool output_need_align_ = false;
 };
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc
index 07ad676555c..54cca8410f4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc
@@ -34,58 +34,6 @@ namespace mindspore::kernel {
 #else
 #define OC_BLOCK C8NUM
 #endif
-
-int ConvolutionCPUKernel::InitWeightBias() {
-  auto filter_tensor = in_tensors_.at(kWeightIndex);
-  int32_t in_channel = filter_tensor->Channel();
-  if (in_channel < 0) {
-    MS_LOG(ERROR) << "get channel from filter_tensor failed.";
-    return RET_ERROR;
-  }
-  int32_t out_channel = filter_tensor->Batch();
-  if (out_channel < 0) {
-    MS_LOG(ERROR) << "get batch from filter_tensor failed.";
-    return RET_ERROR;
-  }
-  conv_param_->input_channel_ = in_channel;
-  conv_param_->output_channel_ = out_channel;
-  int32_t kernel_plane = filter_tensor->Height() * filter_tensor->Width();
-  if (kernel_plane < 0) {
-    MS_LOG(ERROR) << "get height and width from filter_tensor failed.";
-    return RET_ERROR;
-  }
-  size_t oc_block_num = UP_ROUND(out_channel, OC_BLOCK);
-  size_t pack_weight_size = oc_block_num * in_channel * kernel_plane;
-
-  packed_weight_ = reinterpret_cast<float *>(malloc(pack_weight_size * sizeof(float)));
-  if (packed_weight_ == nullptr) {
-    MS_LOG(ERROR) << "malloc packed weight failed.";
-    return RET_ERROR;
-  }
-  memset(packed_weight_, 0, pack_weight_size * sizeof(float));
-#ifdef ENABLE_AVX
-  RowMajor2Col16Major(origin_weight_, packed_weight_, out_channel, in_channel * kernel_plane);
-#elif defined(ENABLE_ARM32)
-  RowMajor2Col4Major(origin_weight_, packed_weight_, out_channel, in_channel * kernel_plane);
-#else
-  RowMajor2Col8Major(origin_weight_, packed_weight_, out_channel, in_channel * kernel_plane);
-#endif
-
-  bias_data_ = reinterpret_cast<float *>(malloc(oc_block_num * sizeof(float)));
-  if (bias_data_ == nullptr) {
-    MS_LOG(ERROR) << "malloc bias failed.";
-    return RET_ERROR;
-  }
-  memset(bias_data_, 0, oc_block_num * sizeof(float));
-
-  if (in_tensors_.size() == kInputSize2) {
-    memcpy(bias_data_, origin_bias_, out_channel * sizeof(float));
-  } else {
-    MS_ASSERT(in_tensors_.size() == kInputSize1);
-  }
-  return RET_OK;
-}
-
 int ConvolutionCPUKernel::InitTmpBuffer() {
   MS_ASSERT(ctx_->allocator != nullptr);
 
@@ -112,7 +60,9 @@ int ConvolutionCPUKernel::InitTmpBuffer() {
 }
 
 int ConvolutionCPUKernel::Init() {
-  auto ret = InitWeightBias();
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
+  auto ret = InitConvWeightBias();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Init weight bias failed.";
     return RET_ERROR;
@@ -137,8 +87,8 @@ int ConvolutionCPUKernel::ReSize() {
 int ConvolutionCPUKernel::RunImpl(int task_id) {
   auto ori_input_data = reinterpret_cast<float *>(in_tensors_.at(kInputIndex)->data_c());
   auto output_addr = reinterpret_cast<float *>(out_tensors_.at(kOutputIndex)->data_c());
-  ConvFp32(ori_input_data, packed_input_, packed_weight_, reinterpret_cast<float *>(bias_data_), col_major_input_,
-           output_addr, task_id, conv_param_);
+  ConvFp32(ori_input_data, packed_input_, reinterpret_cast<float *>(packed_weight_),
+           reinterpret_cast<float *>(bias_data_), col_major_input_, output_addr, task_id, conv_param_);
   return RET_OK;
 }
 
@@ -159,10 +109,11 @@ int ConvolutionCPUKernel::Run() {
     FreeTmpBuffer();
     return RET_ERROR;
   }
-  if (IsTrain() && IsTrainable()) {
-    PackWeight();
-  }
 
+  if (RepackWeight() != RET_OK) {
+    MS_LOG(ERROR) << "Repack weight failed.";
+    return RET_ERROR;
+  }
   ret = ParallelLaunch(this->ms_context_, ConvolutionImpl, this, thread_count_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "conv error error_code[" << ret << "]";
@@ -188,20 +139,45 @@ void ConvolutionCPUKernel::PackWeight() {
     MS_LOG(ERROR) << "get height and width from filter_tensor failed.";
     return;
   }
-  size_t oc_block_num = UP_ROUND(out_channel, OC_BLOCK);
-  size_t pack_weight_size = oc_block_num * in_channel * kernel_plane;
-
-  auto origin_weight = reinterpret_cast<float *>(filter_tensor->data_c());
-  memset(packed_weight_, 0, pack_weight_size * sizeof(float));
+  void *origin_weight = IsTrainable() ? filter_tensor->data_c() : origin_weight_;
+  MS_ASSERT(origin_weight != nullptr);
 #ifdef ENABLE_AVX
-  RowMajor2Col16Major(origin_weight, packed_weight_, out_channel, in_channel * kernel_plane);
+  RowMajor2Col16Major(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_), out_channel,
+                      in_channel * kernel_plane);
 #elif defined(ENABLE_ARM32)
-  RowMajor2Col4Major(origin_weight, packed_weight_, out_channel, in_channel * kernel_plane);
+  RowMajor2Col4Major(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_), out_channel,
+                     in_channel * kernel_plane);
 #else
-  RowMajor2Col8Major(origin_weight, packed_weight_, out_channel, in_channel * kernel_plane);
+  RowMajor2Col8Major(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_), out_channel,
+                     in_channel * kernel_plane);
 #endif
 }
 
+int ConvolutionCPUKernel::MallocWeightBiasData() {
+  auto filter_tensor = in_tensors_.at(kWeightIndex);
+  size_t in_channel = filter_tensor->Channel();
+  size_t out_channel = filter_tensor->Batch();
+  conv_param_->input_channel_ = in_channel;
+  conv_param_->output_channel_ = out_channel;
+  size_t oc_block_num = UP_ROUND(out_channel, OC_BLOCK);
+  size_t kernel_plane = filter_tensor->Height() * filter_tensor->Width();
+  size_t pack_weight_size = oc_block_num * in_channel * kernel_plane;
+  packed_weight_ = malloc(pack_weight_size * sizeof(float));
+  if (packed_weight_ == nullptr) {
+    MS_LOG(ERROR) << "malloc packed weight failed.";
+    return RET_ERROR;
+  }
+  memset(packed_weight_, 0, pack_weight_size * sizeof(float));
+
+  bias_data_ = malloc(oc_block_num * sizeof(float));
+  if (bias_data_ == nullptr) {
+    MS_LOG(ERROR) << "malloc bias failed.";
+    return RET_ERROR;
+  }
+  memset(bias_data_, 0, oc_block_num * sizeof(float));
+  return RET_OK;
+}
+
 int ConvolutionCPUKernel::Eval() {
   InnerKernel::Eval();
   if (IsTrainable()) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.h
index bf1afb2a7a7..64d070f5ef4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.h
@@ -28,18 +28,10 @@ class ConvolutionCPUKernel : public ConvolutionBaseCPUKernel {
   ConvolutionCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                        const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx, float *origin_weight,
                        float *origin_bias)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx),
-        origin_weight_(origin_weight),
-        origin_bias_(origin_bias) {}
-  ~ConvolutionCPUKernel() override {
-    if (packed_weight_ != nullptr) {
-      free(packed_weight_);
-      packed_weight_ = nullptr;
-    }
-  }
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias) {}
+  ~ConvolutionCPUKernel() override {}
 
   int Init() override;
-  virtual int InitWeightBias();
   int InitTmpBuffer();
   int ReSize() override;
   int Run() override;
@@ -48,7 +40,8 @@ class ConvolutionCPUKernel : public ConvolutionBaseCPUKernel {
   int Eval() override;
 
  protected:
-  void PackWeight();
+  int MallocWeightBiasData() override;
+  void PackWeight() override;
   void FreeTmpBuffer() {
     if (packed_input_ != nullptr) {
       ctx_->allocator->Free(packed_input_);
@@ -61,9 +54,6 @@ class ConvolutionCPUKernel : public ConvolutionBaseCPUKernel {
   }
 
  protected:
-  float *origin_weight_;  // do not free
-  float *origin_bias_;    // do not free
-  float *packed_weight_ = nullptr;
   float *packed_input_ = nullptr;
   float *col_major_input_ = nullptr;
 };
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.cc
index b8ce82b5c1e..8e2ab33b3a6 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.cc
@@ -28,37 +28,6 @@ using mindspore::lite::RET_NULL_PTR;
 using mindspore::lite::RET_OK;
 
 namespace mindspore::kernel {
-int ConvolutionSWCPUKernel::InitWeightBias() {
-  auto filter_tensor = in_tensors_.at(kWeightIndex);
-  auto input_channel = filter_tensor->Channel();
-  auto output_channel = filter_tensor->Batch();
-  int kernel_h = filter_tensor->Height();
-  int kernel_w = filter_tensor->Width();
-  conv_param_->input_channel_ = input_channel;
-  conv_param_->output_channel_ = output_channel;
-  int kernel_plane = kernel_h * kernel_w;
-  int oc_block_num = UP_DIV(output_channel, oc_tile_);
-  int pack_weight_size = oc_block_num * oc_tile_ * input_channel * kernel_plane;
-  packed_weight_ = reinterpret_cast<float *>(malloc(pack_weight_size * sizeof(float)));
-  if (packed_weight_ == nullptr) {
-    MS_LOG(ERROR) << "malloc packed weight failed.";
-    return RET_NULL_PTR;
-  }
-  memset(packed_weight_, 0, pack_weight_size * sizeof(float));
-  PackNHWCToNXHWCXFp32(kernel_h, kernel_w, output_channel, oc_block_num, input_channel, packed_weight_,
-                       ori_weight_data_);
-  if (in_tensors_.size() == kInputSize2) {
-    packed_bias_ = reinterpret_cast<float *>(malloc(oc_block_num * oc_tile_ * sizeof(float)));
-    if (packed_bias_ == nullptr) {
-      MS_LOG(ERROR) << "malloc bias failed.";
-      return RET_NULL_PTR;
-    }
-    memset(packed_bias_, 0, oc_block_num * oc_tile_ * sizeof(float));
-    memcpy(packed_bias_, ori_bias_data_, output_channel * sizeof(float));
-  }
-  return RET_OK;
-}
-
 int ConvolutionSWCPUKernel::Init() {
   oc_tile_ = C8NUM;
   oc_res_ = conv_param_->output_channel_ % oc_tile_;
@@ -67,7 +36,7 @@ int ConvolutionSWCPUKernel::Init() {
     in_tile_ = C8NUM;
     ic_res_ = conv_param_->input_channel_ % in_tile_;
   }
-  auto ret = InitWeightBias();
+  auto ret = InitConvWeightBias();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Init weight bias failed.";
     return RET_ERROR;
@@ -108,11 +77,11 @@ int ConvolutionSWCPUKernel::ReSize() {
 
 int ConvolutionSWCPUKernel::RunImpl(int task_id) {
   if (conv_param_->kernel_w_ == 1 && conv_param_->kernel_h_ == 1) {
-    Conv1x1SWFp32(input_data_, packed_weight_, reinterpret_cast<float *>(packed_bias_), output_data_, task_id,
-                  conv_param_, slidingWindow_param_);
+    Conv1x1SWFp32(input_data_, reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(bias_data_),
+                  output_data_, task_id, conv_param_, slidingWindow_param_);
   } else {
-    ConvSWFp32(input_data_, packed_weight_, reinterpret_cast<float *>(packed_bias_), output_data_, task_id, conv_param_,
-               slidingWindow_param_);
+    ConvSWFp32(input_data_, reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(bias_data_),
+               output_data_, task_id, conv_param_, slidingWindow_param_);
   }
   return RET_OK;
 }
@@ -178,6 +147,12 @@ int ConvolutionSWCPUKernel::Run() {
     FreeTmpBuffer();
     return ret;
   }
+
+  if (RepackWeight() != RET_OK) {
+    MS_LOG(ERROR) << "Repack weight failed.";
+    return RET_ERROR;
+  }
+
   int error_code = ParallelLaunch(this->ms_context_, ConvolutionSWImpl, this, thread_count_);
   if (error_code != RET_OK) {
     MS_LOG(ERROR) << "conv error error_code[" << error_code << "]";
@@ -192,5 +167,47 @@ int ConvolutionSWCPUKernel::Run() {
   FreeTmpBuffer();
   return RET_OK;
 }
+
+void ConvolutionSWCPUKernel::PackWeight() {
+  auto filter_tensor = in_tensors_.at(kWeightIndex);
+  auto input_channel = filter_tensor->Channel();
+  auto output_channel = filter_tensor->Batch();
+  int kernel_h = filter_tensor->Height();
+  int kernel_w = filter_tensor->Width();
+  int oc_block_num = UP_DIV(output_channel, oc_tile_);
+  void *origin_weight = IsTrainable() ? filter_tensor->data_c() : origin_weight_;
+  MS_ASSERT(origin_weight != nullptr);
+  PackNHWCToNXHWCXFp32(kernel_h, kernel_w, output_channel, oc_block_num, input_channel,
+                       reinterpret_cast<float *>(packed_weight_), reinterpret_cast<float *>(origin_weight));
+}
+
+int ConvolutionSWCPUKernel::MallocWeightBiasData() {
+  auto filter_tensor = in_tensors_.at(kWeightIndex);
+  auto input_channel = filter_tensor->Channel();
+  auto output_channel = filter_tensor->Batch();
+  int kernel_h = filter_tensor->Height();
+  int kernel_w = filter_tensor->Width();
+  conv_param_->input_channel_ = input_channel;
+  conv_param_->output_channel_ = output_channel;
+  int kernel_plane = kernel_h * kernel_w;
+  int oc_block_num = UP_DIV(output_channel, oc_tile_);
+  int pack_weight_size = oc_block_num * oc_tile_ * input_channel * kernel_plane;
+  packed_weight_ = malloc(pack_weight_size * sizeof(float));
+  if (packed_weight_ == nullptr) {
+    MS_LOG(ERROR) << "malloc packed weight failed.";
+    return RET_NULL_PTR;
+  }
+  memset(packed_weight_, 0, pack_weight_size * sizeof(float));
+
+  if (in_tensors_.size() == kInputSize2) {
+    bias_data_ = malloc(oc_block_num * oc_tile_ * sizeof(float));
+    if (bias_data_ == nullptr) {
+      MS_LOG(ERROR) << "malloc bias failed.";
+      return RET_NULL_PTR;
+    }
+    memset(bias_data_, 0, oc_block_num * oc_tile_ * sizeof(float));
+  }
+  return RET_OK;
+}
 }  // namespace mindspore::kernel
 #endif  // ENABLE_AVX
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.h
index a72878e81a2..5112f0dd9e9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.h
@@ -27,19 +27,9 @@ class ConvolutionSWCPUKernel : public ConvolutionBaseCPUKernel {
   ConvolutionSWCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                          const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                          float *origin_weight, float *origin_bias)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx),
-        ori_weight_data_(origin_weight),
-        ori_bias_data_(origin_bias) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias) {}
 
   ~ConvolutionSWCPUKernel() override {
-    if (packed_weight_ != nullptr) {
-      free(packed_weight_);
-      packed_weight_ = nullptr;
-    }
-    if (packed_bias_ != nullptr) {
-      free(packed_bias_);
-      packed_bias_ = nullptr;
-    }
     if (slidingWindow_param_ != nullptr) {
       delete slidingWindow_param_;
       slidingWindow_param_ = nullptr;
@@ -50,10 +40,11 @@ class ConvolutionSWCPUKernel : public ConvolutionBaseCPUKernel {
   int ReSize() override;
   int Run() override;
   int RunImpl(int task_id);
-  int InitWeightBias();
   int InitTmpBuffer();
 
  private:
+  int MallocWeightBiasData() override;
+  void PackWeight() override;
   void FreeTmpBuffer() {
     if (output_data_ != nullptr && oc_res_ != 0) {
       ctx_->allocator->Free(output_data_);
@@ -68,10 +59,6 @@ class ConvolutionSWCPUKernel : public ConvolutionBaseCPUKernel {
   int in_tile_ = 0;      // input channel algin
   int oc_res_ = 0;
   int ic_res_ = 0;
-  float *ori_weight_data_ = nullptr;
-  float *ori_bias_data_ = nullptr;
-  float *packed_weight_ = nullptr;
-  float *packed_bias_ = nullptr;
   float *output_data_ = nullptr;
   float *input_data_ = nullptr;
   SlidingWindowParam *slidingWindow_param_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc
index 3c1bc7da29e..08fb239ff58 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc
@@ -31,77 +31,9 @@ int ConvolutionWinogradCPUKernel::WinogradFilterTransform(const float *weight_da
     return RET_ERROR;
   }
 
-  return WinogradWeightTransform(weight_data, trans_weight_, matrix_g, matrix_gt, oc_block, input_unit_, kernel_unit_,
-                                 conv_param_->input_channel_, conv_param_->output_channel_, true);
-}
-
-int ConvolutionWinogradCPUKernel::InitWeightBias() {
-  auto filter_tensor = in_tensors_.at(kWeightIndex);
-  int in_channel = filter_tensor->Channel();
-  if (in_channel < 0) {
-    MS_LOG(ERROR) << "get channel from filter tensor failed.";
-    return RET_ERROR;
-  }
-  int out_channel = filter_tensor->Batch();
-  if (out_channel < 0) {
-    MS_LOG(ERROR) << "get batch from filter tensor failed.";
-    return RET_ERROR;
-  }
-  conv_param_->input_channel_ = in_channel;
-  conv_param_->output_channel_ = out_channel;
-
-  // set data
-  auto trans_matrix_data_size =
-    input_unit_ * input_unit_ * in_channel * UP_ROUND(out_channel, oc_block_) * sizeof(float);
-  if (trans_weight_ == nullptr) {
-    trans_weight_ = reinterpret_cast<float *>(malloc(trans_matrix_data_size));
-    if (trans_weight_ == nullptr) {
-      MS_LOG(ERROR) << "malloc matrix_buffer failed.";
-      return RET_MEMORY_FAILED;
-    }
-  }
-  memset(trans_weight_, 0, trans_matrix_data_size);
-
-  float matrix_g[64];
-  float matrix_gt[64];
-  float matrix_a[64];
-  float matrix_at[64];
-  float matrix_b[64];
-  float matrix_bt[64];
-  float coef = 1.0f;
-  if (input_unit_ == 8) {
-    coef = 0.5f;
-  }
-  auto ret =
-    CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, coef, output_unit_, kernel_unit_);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "get matrix g from CookToomFilter failed.";
-    return ret;
-  }
-  ret = WinogradFilterTransform(origin_weight_, matrix_g, matrix_gt, oc_block_);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "winograd filter transform failed.";
-    return ret;
-  }
-
-  // init bias
-  size_t new_bias_size = UP_ROUND(out_channel, C4NUM) * sizeof(float);
-  if (bias_data_ == nullptr) {
-    bias_data_ = reinterpret_cast<float *>(malloc(new_bias_size));
-    if (bias_data_ == nullptr) {
-      MS_LOG(ERROR) << "malloc bias_data_ failed.";
-      return RET_MEMORY_FAILED;
-    }
-  }
-  if (in_tensors_.size() == kInputSize2) {
-    size_t origin_size = out_channel * sizeof(float);
-    memcpy(bias_data_, origin_bias_, origin_size);
-    memset(reinterpret_cast<float *>(bias_data_) + out_channel, 0, new_bias_size - origin_size);
-  } else {
-    MS_ASSERT(in_tensors_.size() == kInputSize1);
-    memset(bias_data_, 0, new_bias_size);
-  }
-  return RET_OK;
+  return WinogradWeightTransform(weight_data, reinterpret_cast<float *>(packed_weight_), matrix_g, matrix_gt, oc_block,
+                                 input_unit_, kernel_unit_, conv_param_->input_channel_, conv_param_->output_channel_,
+                                 true);
 }
 
 int ConvolutionWinogradCPUKernel::InitTmpBuffer() {
@@ -158,6 +90,8 @@ int ConvolutionWinogradCPUKernel::ConfigInputOutput() {
 }
 
 int ConvolutionWinogradCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   tile_num_ = C12NUM;
 #ifdef ENABLE_AVX
   oc_block_ = C16NUM;
@@ -169,7 +103,7 @@ int ConvolutionWinogradCPUKernel::Init() {
   conv_param_->input_unit_ = input_unit_;
   conv_param_->output_unit_ = output_unit_;
 
-  auto ret = InitWeightBias();
+  auto ret = InitConvWeightBias();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Init weight bias failed.";
     return RET_ERROR;
@@ -202,8 +136,9 @@ int ConvolutionWinogradCPUKernel::RunImpl(int task_id) {
   MS_ASSERT(ori_input_data != nullptr);
   auto output_data = reinterpret_cast<float *>(out_tensors_.front()->data_c());
   MS_ASSERT(output_data != nullptr);
-  ConvWinogardFp32(ori_input_data, trans_weight_, reinterpret_cast<const float *>(bias_data_), output_data,
-                   tmp_buffer_address_list_, task_id, conv_param_, in_func_, out_func_);
+  ConvWinogardFp32(ori_input_data, reinterpret_cast<float *>(packed_weight_),
+                   reinterpret_cast<const float *>(bias_data_), output_data, tmp_buffer_address_list_, task_id,
+                   conv_param_, in_func_, out_func_);
   return RET_OK;
 }
 
@@ -224,12 +159,9 @@ int ConvolutionWinogradCPUKernel::Run() {
     FreeTmpBuffer();
     return RET_ERROR;
   }
-  if (IsTrain() && IsTrainable()) {
-    ret = InitWeightBias();
-    if (ret != RET_OK) {
-      MS_LOG(ERROR) << "Init weight bias failed.";
-      return RET_ERROR;
-    }
+  if (RepackWeight() != RET_OK) {
+    MS_LOG(ERROR) << "Repack weight failed.";
+    return RET_ERROR;
   }
 
   ret = ParallelLaunch(this->ms_context_, ConvolutionWinogradImpl, this, thread_count_);
@@ -241,6 +173,68 @@ int ConvolutionWinogradCPUKernel::Run() {
   return ret;
 }
 
+int ConvolutionWinogradCPUKernel::MallocWeightBiasData() {
+  auto filter_tensor = in_tensors_.at(kWeightIndex);
+  int in_channel = filter_tensor->Channel();
+  if (in_channel < 0) {
+    MS_LOG(ERROR) << "get channel from filter tensor failed.";
+    return RET_ERROR;
+  }
+  int out_channel = filter_tensor->Batch();
+  if (out_channel < 0) {
+    MS_LOG(ERROR) << "get batch from filter tensor failed.";
+    return RET_ERROR;
+  }
+  conv_param_->input_channel_ = in_channel;
+  conv_param_->output_channel_ = out_channel;
+
+  // set data
+  auto trans_matrix_data_size =
+    input_unit_ * input_unit_ * in_channel * UP_ROUND(out_channel, oc_block_) * sizeof(float);
+  if (packed_weight_ == nullptr) {
+    packed_weight_ = malloc(trans_matrix_data_size);
+    if (packed_weight_ == nullptr) {
+      MS_LOG(ERROR) << "malloc matrix_buffer failed.";
+      return RET_MEMORY_FAILED;
+    }
+  }
+  memset(packed_weight_, 0, trans_matrix_data_size);
+
+  float matrix_a[64];
+  float matrix_at[64];
+  float matrix_b[64];
+  float matrix_bt[64];
+  float coef = 1.0f;
+  if (input_unit_ == 8) {
+    coef = 0.5f;
+  }
+  auto ret =
+    CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g_, matrix_gt_, coef, output_unit_, kernel_unit_);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "get matrix g from CookToomFilter failed.";
+    return ret;
+  }
+
+  // init bias
+  size_t new_bias_size = UP_ROUND(out_channel, C4NUM) * sizeof(float);
+  if (bias_data_ == nullptr) {
+    bias_data_ = malloc(new_bias_size);
+    if (bias_data_ == nullptr) {
+      MS_LOG(ERROR) << "malloc bias_data_ failed.";
+      return RET_MEMORY_FAILED;
+    }
+  }
+  memset(bias_data_, 0, new_bias_size);
+  return RET_OK;
+}
+
+void ConvolutionWinogradCPUKernel::PackWeight() {
+  auto weight_tensor = in_tensors_.at(kWeightIndex);
+  void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_;
+  MS_ASSERT(origin_weight != nullptr);
+  WinogradFilterTransform(reinterpret_cast<float *>(origin_weight), matrix_g_, matrix_gt_, oc_block_);
+}
+
 int ConvolutionWinogradCPUKernel::Eval() {
   auto ret = InnerKernel::Eval();
   if (ret != RET_OK) {
@@ -248,7 +242,7 @@ int ConvolutionWinogradCPUKernel::Eval() {
     return ret;
   }
   if (IsTrainable()) {
-    ret = InitWeightBias();
+    ret = InitConvWeightBias();
     if (ret != RET_OK) {
       MS_LOG(ERROR) << "Init weight bias failed.";
       return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h
index 7d5f792a731..9fd402a7bc2 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h
@@ -30,27 +30,21 @@ class ConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel {
   ConvolutionWinogradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx,
                                int output_unit, float *origin_weight, float *origin_bias)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx),
-        output_unit_(output_unit),
-        origin_weight_(origin_weight),
-        origin_bias_(origin_bias) {}
-  ~ConvolutionWinogradCPUKernel() override {
-    if (trans_weight_ != nullptr) {
-      free(trans_weight_);
-      trans_weight_ = nullptr;
-    }
-  };
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias),
+        output_unit_(output_unit) {}
+  ~ConvolutionWinogradCPUKernel() override {}
   int Init() override;
   int ReSize() override;
   int Run() override;
   int Eval() override;
   int RunImpl(int task_id);
-  int InitWeightBias();
   int InitTmpBuffer();
   int ConfigInputOutput();
   int WinogradFilterTransform(const float *weight_data, float *matrix_g, const float *matrix_gt, int oc_block);
 
  private:
+  int MallocWeightBiasData() override;
+  void PackWeight() override;
   void FreeTmpBuffer() {
     if (trans_input_ != nullptr) {
       ctx_->allocator->Free(trans_input_);
@@ -74,13 +68,12 @@ class ConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel {
   int output_unit_{0};
   int oc_block_{0};
   int tile_num_{0};
-  float *origin_weight_;  // do not free
-  float *origin_bias_;    // do not free
   float *tmp_data_ = nullptr;
   float *trans_input_ = nullptr;
   float *gemm_out_ = nullptr;
   float *col_buffer_ = nullptr;
-  float *trans_weight_ = nullptr;
+  float matrix_g_[64];
+  float matrix_gt_[64];
   TmpBufferAddress tmp_buffer_address_list_[4] = {nullptr};
   InputTransFunc in_func_ = nullptr;
   OutputTransFunc out_func_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/crop_and_resize_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/crop_and_resize_fp32.cc
index cd3456ae562..405056a190d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/crop_and_resize_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/crop_and_resize_fp32.cc
@@ -33,6 +33,8 @@ constexpr size_t kBoxIndex = 1;
 constexpr size_t kBoxIdIndex = 2;
 }  // namespace
 int CropAndResizeCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_3D);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/crop_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/crop_fp32.cc
index cda9a8c5525..e81abdd92d1 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/crop_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/crop_fp32.cc
@@ -36,6 +36,8 @@ int CropLaunch(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 }  // namespace
 
 int CropCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc
index 3d2f184159c..b798512d1ab 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc
@@ -26,10 +26,6 @@ DeconvolutionDepthwiseCPUKernel::~DeconvolutionDepthwiseCPUKernel() {
     delete sliding_;
     sliding_ = nullptr;
   }
-  if (packed_weight_ != nullptr) {
-    free(packed_weight_);
-    packed_weight_ = nullptr;
-  }
 }
 
 int DeconvolutionDepthwiseCPUKernel::InitSlideParam() {
@@ -45,37 +41,6 @@ int DeconvolutionDepthwiseCPUKernel::InitSlideParam() {
   return RET_OK;
 }
 
-int DeconvolutionDepthwiseCPUKernel::InitWeightBias() {
-  // init weight: o, h, w, i; o == group, i == 1
-  auto weight_tensor = in_tensors_.at(kWeightIndex);
-  auto origin_weight = reinterpret_cast<float *>(weight_tensor->data_c());
-  MS_ASSERT(origin_weight != nullptr);
-  int OC4 = UP_DIV(weight_tensor->Batch(), C4NUM);
-  int pack_weight_size = C4NUM * OC4 * weight_tensor->Height() * weight_tensor->Width();
-
-  packed_weight_ = reinterpret_cast<float *>(malloc(pack_weight_size * sizeof(float)));
-  if (packed_weight_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc buffer failed.";
-    return RET_ERROR;
-  }
-  PackNCHWToNC4HW4Fp32(origin_weight, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(),
-                       weight_tensor->Batch());
-
-  bias_data_ = reinterpret_cast<float *>(malloc(C4NUM * OC4 * sizeof(float)));
-  if (bias_data_ == nullptr) {
-    MS_LOG(ERROR) << "Malloc buffer failed.";
-    return RET_ERROR;
-  }
-  memset(bias_data_, 0, C4NUM * OC4 * sizeof(float));
-  if (in_tensors_.size() == kInputSize2) {
-    auto ori_bias = reinterpret_cast<float *>(in_tensors_.at(kBiasIndex)->data_c());
-    memcpy(bias_data_, ori_bias, in_tensors_.at(kBiasIndex)->ElementsNum() * sizeof(float));
-  }
-
-  conv_param_->thread_num_ = MSMIN(thread_count_, OC4);
-  return RET_OK;
-}
-
 int DeconvolutionDepthwiseCPUKernel::InitPackedInputOutput() {
   if (conv_param_->input_channel_ % C4NUM != 0) {
     need_align_ = true;
@@ -100,15 +65,17 @@ int DeconvolutionDepthwiseCPUKernel::InitPackedInputOutput() {
 }
 
 int DeconvolutionDepthwiseCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   sliding_ = new (std::nothrow) SlidingWindowParam;
   if (sliding_ == nullptr) {
     MS_LOG(ERROR) << "new sliding window param failed.";
     return RET_ERROR;
   }
 
-  auto ret = InitWeightBias();
+  auto ret = InitConvWeightBias();
   if (ret != 0) {
-    MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitWeightBias failed.ret: " << ret;
+    MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitConvWeightBias failed.ret: " << ret;
     return ret;
   }
   if (!InferShapeDone()) {
@@ -132,8 +99,8 @@ int DeconvolutionDepthwiseCPUKernel::ReSize() {
 }
 
 int DeconvolutionDepthwiseCPUKernel::Execute(int task_id) {
-  DeconvDwSWFp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast<float *>(bias_data_), conv_param_,
-                 sliding_, task_id);
+  DeconvDwSWFp32(packed_output_, packed_input_, reinterpret_cast<float *>(packed_weight_),
+                 reinterpret_cast<float *>(bias_data_), conv_param_, sliding_, task_id);
   return RET_OK;
 }
 
@@ -148,6 +115,10 @@ int DeconvDwRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 }
 
 int DeconvolutionDepthwiseCPUKernel::Run() {
+  if (RepackWeight() != RET_OK) {
+    MS_LOG(ERROR) << "Repack weight failed.";
+    return RET_ERROR;
+  }
   if (conv_param_->input_channel_ != conv_param_->output_channel_) {
     MS_LOG(ERROR) << "Only support input channel equals output channel.";
     return RET_ERROR;
@@ -190,6 +161,34 @@ int DeconvolutionDepthwiseCPUKernel::Run() {
   return ret;
 }
 
+int DeconvolutionDepthwiseCPUKernel::MallocWeightBiasData() {
+  auto weight_tensor = in_tensors_.at(kWeightIndex);
+  int OC4 = UP_DIV(weight_tensor->Batch(), C4NUM);
+  int pack_weight_size = C4NUM * OC4 * weight_tensor->Height() * weight_tensor->Width();
+  packed_weight_ = malloc(pack_weight_size * sizeof(float));
+  if (packed_weight_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc buffer failed.";
+    return RET_ERROR;
+  }
+
+  bias_data_ = malloc(C4NUM * OC4 * sizeof(float));
+  if (bias_data_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc buffer failed.";
+    return RET_ERROR;
+  }
+  memset(bias_data_, 0, C4NUM * OC4 * sizeof(float));
+  conv_param_->thread_num_ = MSMIN(thread_count_, OC4);
+  return RET_OK;
+}
+
+void DeconvolutionDepthwiseCPUKernel::PackWeight() {
+  auto weight_tensor = in_tensors_.at(kWeightIndex);
+  void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_;
+  MS_ASSERT(origin_weight != nullptr);
+  PackNCHWToNC4HW4Fp32(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_), 1,
+                       weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch());
+}
+
 void DeconvolutionDepthwiseCPUKernel::FreePackedInputOutput() {
   if (need_align_) {
     ms_context_->allocator->Free(packed_input_);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.h
index 0f0bf8f2423..33b99251bfe 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.h
@@ -27,22 +27,23 @@ class DeconvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel {
  public:
   DeconvolutionDepthwiseCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                   const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
+                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
   ~DeconvolutionDepthwiseCPUKernel() override;
 
   int Init() override;
   int InitSlideParam();
   int ReSize() override;
   int Run() override;
-
-  int InitWeightBias();
   int Execute(int task_id);
 
  private:
   int InitPackedInputOutput();
   void FreePackedInputOutput();
+  int MallocWeightBiasData() override;
+  void PackWeight() override;
+
   SlidingWindowParam *sliding_ = nullptr;
-  float *packed_weight_ = nullptr;
   float *packed_input_ = nullptr;
   float *packed_output_ = nullptr;
   bool need_align_ = false;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc
index 4095de69bdb..9b118687cfb 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc
@@ -31,8 +31,6 @@ DeConvolutionCPUKernel::~DeConvolutionCPUKernel() {
     delete matmul_param_;
     matmul_param_ = nullptr;
   }
-  FreeAlignedData(reinterpret_cast<void **>(&weight_ptr_));
-  FreeAlignedData(reinterpret_cast<void **>(&bias_ptr));
 }
 
 int DeConvolutionCPUKernel::ReSize() {
@@ -50,48 +48,47 @@ int DeConvolutionCPUKernel::ReSize() {
   return RET_OK;
 }
 
-int DeConvolutionCPUKernel::InitWeightBias() {
+int DeConvolutionCPUKernel::MallocWeightBiasData() {
   auto weight_tensor = in_tensors_.at(kWeightIndex);
   auto input_channel = weight_tensor->Batch();
   auto output_channel = weight_tensor->Channel();
   auto kernel_h_ = weight_tensor->Height();
   auto kernel_w_ = weight_tensor->Width();
   int output_aligned_size = UP_ROUND(output_channel, C8NUM);
-  bias_ptr = reinterpret_cast<float *>(MallocAlignedData(C32NUM, output_aligned_size * sizeof(float)));
-  if (bias_ptr == nullptr) {
-    MS_LOG(ERROR) << "deconv malloc bias_ptr error!";
-    return RET_ERROR;
-  }
-  memset(bias_ptr, 0, output_aligned_size * sizeof(float));
-  if (in_tensors_.size() == DIMENSION_3D) {
-    if (in_tensors_.at(kBiasIndex)->shape().size() == DIMENSION_1D &&
-        in_tensors_.at(kBiasIndex)->DimensionSize(0) == output_channel) {
-      MS_ASSERT(in_tensors_.at(kBiasIndex)->data_c() != nullptr);
-      memcpy(bias_ptr, in_tensors_.at(kBiasIndex)->data_c(), output_channel * sizeof(float));
-    } else {
-      MS_LOG(ERROR) << "unsupported bias shape for deconv!";
-      return RET_ERROR;
-    }
-  }
-
   size_t weight_pack_size = input_channel * kernel_w_ * kernel_h_ * output_aligned_size * sizeof(float);
-  weight_ptr_ = reinterpret_cast<float *>(MallocAlignedData(C32NUM, weight_pack_size));
-  if (weight_ptr_ == nullptr) {
-    MS_LOG(ERROR) << "deconv malloc weight_ptr_ error!";
+  packed_weight_ = MallocAlignedData(C32NUM, weight_pack_size);
+  if (packed_weight_ == nullptr) {
+    MS_LOG(ERROR) << "deconv malloc packed_weight_ error!";
     return RET_ERROR;
   }
-  memset(weight_ptr_, 0, weight_pack_size);
-  MS_ASSERT(in_tensors_.at(kWeightIndex)->data_c() != nullptr);
-#ifdef ENABLE_AVX
-  PackNHWCToCXHWNXFp32(reinterpret_cast<float *>(in_tensors_.at(kWeightIndex)->data_c()), weight_ptr_, input_channel,
-                       kernel_w_ * kernel_h_, output_channel);
-#else
-  PackNHWCToC8HWN8Fp32(reinterpret_cast<float *>(in_tensors_.at(kWeightIndex)->data_c()), weight_ptr_, input_channel,
-                       kernel_w_ * kernel_h_, output_channel);
-#endif
+  memset(packed_weight_, 0, weight_pack_size);
+
+  bias_data_ = MallocAlignedData(C32NUM, output_aligned_size * sizeof(float));
+  if (bias_data_ == nullptr) {
+    MS_LOG(ERROR) << "deconv malloc bias_data_ error!";
+    return RET_ERROR;
+  }
+  memset(bias_data_, 0, output_aligned_size * sizeof(float));
   return RET_OK;
 }
 
+void DeConvolutionCPUKernel::PackWeight() {
+  auto weight_tensor = in_tensors_.at(kWeightIndex);
+  auto input_channel = weight_tensor->Batch();
+  auto output_channel = weight_tensor->Channel();
+  auto kernel_h = weight_tensor->Height();
+  auto kernel_w = weight_tensor->Width();
+  void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_;
+  MS_ASSERT(origin_weight != nullptr);
+#ifdef ENABLE_AVX
+  PackNHWCToCXHWNXFp32(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_),
+                       input_channel, kernel_w * kernel_h, output_channel);
+#else
+  PackNHWCToC8HWN8Fp32(reinterpret_cast<float *>(origin_weight), reinterpret_cast<float *>(packed_weight_),
+                       input_channel, kernel_w * kernel_h, output_channel);
+#endif
+}
+
 int DeConvolutionCPUKernel::InitParam() {
   input_plane_ = conv_param_->input_h_ * conv_param_->input_w_;
   kernel_plane_ = conv_param_->kernel_w_ * conv_param_->kernel_h_;
@@ -133,26 +130,32 @@ int DeConvolutionCPUKernel::DoDeconv(int task_id) {
   }
   auto tmp_buffer = tmp_buffer_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->row_align_;
 #ifdef ENABLE_AVX
-  DeconvMatmulAvx(pack_input_, weight_ptr_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_,
-                  tmp_buffer, matmul_param_->deep_, matmul_param_->row_align_, oc * C8NUM * kernel_plane_,
-                  kernel_plane_);
+  DeconvMatmulAvx(
+    pack_input_,
+    reinterpret_cast<float *>(packed_weight_) + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_,
+    tmp_buffer, matmul_param_->deep_, matmul_param_->row_align_, oc * C8NUM * kernel_plane_, kernel_plane_);
 #elif ENABLE_SSE
-  DeconvMatmulFloatSse(pack_input_,
-                       weight_ptr_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_,
-                       tmp_buffer, matmul_param_->deep_, matmul_param_->row_align_, oc * C8NUM * kernel_plane_);
+  DeconvMatmulFloatSse(
+    pack_input_,
+    reinterpret_cast<float *>(packed_weight_) + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_,
+    tmp_buffer, matmul_param_->deep_, matmul_param_->row_align_, oc * C8NUM * kernel_plane_);
 #else
-  MatMulOpt(pack_input_, weight_ptr_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_,
-            tmp_buffer, nullptr, ActType_No, matmul_param_->deep_, matmul_param_->row_align_,
-            oc * C8NUM * kernel_plane_, matmul_param_->col_, OutType_C8);
+  MatMulOpt(
+    pack_input_,
+    reinterpret_cast<float *>(packed_weight_) + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_,
+    tmp_buffer, nullptr, ActType_No, matmul_param_->deep_, matmul_param_->row_align_, oc * C8NUM * kernel_plane_,
+    matmul_param_->col_, OutType_C8);
 #endif
 
   DeConvPostFp32C8(tmp_buffer, pack_output_ + task_id * thread_stride_ * C8NUM * output_plane_,
-                   reinterpret_cast<float *>(bias_ptr) + thread_stride_ * task_id * C8NUM,
+                   reinterpret_cast<float *>(bias_data_) + thread_stride_ * task_id * C8NUM,
                    output_ptr_ + task_id * thread_stride_ * C8NUM, oc_res, conv_param_);
   return RET_OK;
 }
 
 int DeConvolutionCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
 #if defined(ENABLE_ARM32) || defined(ENABLE_AVX) || defined(ENABLE_SSE)
   row_tile_ = C4NUM;
 #else
@@ -163,10 +166,15 @@ int DeConvolutionCPUKernel::Init() {
     MS_LOG(ERROR) << "Memory allocation failed";
     return RET_ERROR;
   }
-  int error_code = InitWeightBias();
-  if (error_code != RET_OK) {
-    MS_LOG(ERROR) << "deconv InitWeightBias error!ret: " << error_code;
-    return error_code;
+  if (in_tensors_.at(kWeightIndex)->data_c() != nullptr) {
+    int error_code = InitConvWeightBias();
+    if (error_code != RET_OK) {
+      MS_LOG(ERROR) << "deconv InitConvWeightBias error!ret: " << error_code;
+      return error_code;
+    }
+  } else {
+    is_repack_ = true;
+    MS_LOG(WARNING) << "The weight is nullptr, will pack in runtime.";
   }
   if (!InferShapeDone()) {
     return RET_OK;
@@ -214,6 +222,10 @@ int DeConvolutionCPUKernel::InitRunBuf() {
 }
 
 int DeConvolutionCPUKernel::Run() {
+  if (RepackWeight() != RET_OK) {
+    MS_LOG(ERROR) << "Repack weight failed.";
+    return RET_ERROR;
+  }
   float *src_in = reinterpret_cast<float *>(in_tensors_[0]->data_c());
   float *src_out = reinterpret_cast<float *>(out_tensors_[0]->data_c());
   MS_ASSERT(src_in != nullptr);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.h
index 83f10cd2b81..5a1b028ed0d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.h
@@ -32,7 +32,8 @@ class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel {
  public:
   DeConvolutionCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                          const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
+                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
   ~DeConvolutionCPUKernel() override;
   int Init() override;
   int Run() override;
@@ -45,7 +46,8 @@ class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel {
   int InitRunBuf();
   void FreeRunBuf();
   int InitParam();
-  int InitWeightBias();
+  int MallocWeightBiasData() override;
+  void PackWeight() override;
 
  private:
   MatMulParameter *matmul_param_ = nullptr;
@@ -55,13 +57,11 @@ class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel {
   int thread_count_ = 1;
   int thread_stride_ = 0;
   int row_tile_ = 0;
-  float *weight_ptr_ = nullptr;
   float *pack_input_ = nullptr;
   float *pack_output_ = nullptr;
   float *tmp_buffer_ = nullptr;
   float *input_ptr_ = nullptr;
   float *output_ptr_ = nullptr;
-  float *bias_ptr = nullptr;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DECONVOLUTION_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc
index 8c20867be1d..2677fe41707 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <algorithm>
 #include "src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.h"
 
 using mindspore::lite::RET_ERROR;
@@ -192,7 +193,13 @@ int DeConvWgPostFp32Run(void *cdata, int task_id, float lhs_scale, float rhs_sca
 
 int DeConvolutionWinogradCPUKernel::InitComputeParam() {
   auto weight_tensor = in_tensors_[1];
-
+  auto shape = weight_tensor->shape();
+  if (std::find(shape.begin(), shape.end(), -1) != shape.end()) {
+    MS_LOG(WARNING) << "The shape of weight tensor is invalid.";
+    valid_weight_shape_ = false;
+    return RET_OK;
+  }
+  valid_weight_shape_ = true;
   conv_param_->input_channel_ = weight_tensor->Batch();
   conv_param_->output_channel_ = weight_tensor->Channel();
   conv_param_->kernel_w_ = weight_tensor->Width();
@@ -277,7 +284,11 @@ int DeConvolutionWinogradCPUKernel::InitComputeParam() {
 int DeConvolutionWinogradCPUKernel::InitDataParam() {
   auto weight_tensor = in_tensors_.at(kWeightIndex);
   auto nhwc_weight = reinterpret_cast<float *>(weight_tensor->data_c());
-  MS_ASSERT(nhwc_weight != nullptr);
+  if (nhwc_weight == nullptr) {
+    MS_LOG(WARNING) << "The weight data is nullptr, will init data parameter in runtime.";
+    is_repack_ = true;
+    return RET_OK;
+  }
 
   /* unit data : weight & winograd data */
   for (int i = 0; i < deconv_param_->compute_size_; i++) {
@@ -307,11 +318,30 @@ int DeConvolutionWinogradCPUKernel::InitDataParam() {
 int DeConvolutionWinogradCPUKernel::ReSize() {
   FreeResizeBuf();
   ConvolutionBaseCPUKernel::Init();
-  InitParameter();
+  if (!valid_weight_shape_) {
+    if (InitComputeParam() != RET_OK) {
+      MS_LOG(ERROR) << "InitComputeParam error!";
+      return RET_ERROR;
+    } else if (!valid_weight_shape_) {
+      return RET_OK;
+    }
+    if (InitDataParam() != RET_OK) {
+      MS_LOG(ERROR) << "InitDataParam error!";
+      return RET_ERROR;
+    }
+  }
+
+  int error_code = InitParameter();
+  if (error_code != RET_OK) {
+    MS_LOG(ERROR) << "InitParameter error! ret: " << error_code;
+    return error_code;
+  }
   return RET_OK;
 }
 
 int DeConvolutionWinogradCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   deconv_param_ = new (std::nothrow) DeConvParam();
   if (deconv_param_ == nullptr) {
     MS_LOG(ERROR) << "Memory allocation failed";
@@ -320,16 +350,14 @@ int DeConvolutionWinogradCPUKernel::Init() {
   for (auto &wg : deconv_param_->a_buffer_) {
     wg.buf_init_ = false;
   }
-  int error_code = InitComputeParam();
-  if (error_code != RET_OK) {
-    MS_LOG(ERROR) << "InitComputeParam error! ret: " << error_code;
-    return error_code;
-  }
 
-  error_code = InitDataParam();
-  if (error_code != RET_OK) {
-    MS_LOG(ERROR) << "InitWeightBias error! ret: " << error_code;
-    return error_code;
+  if (InitComputeParam() != RET_OK) {
+    MS_LOG(ERROR) << "InitDataParam error!";
+    return RET_ERROR;
+  }
+  if (valid_weight_shape_ && InitDataParam() != RET_OK) {
+    MS_LOG(ERROR) << "InitDataParam error!";
+    return RET_ERROR;
   }
 
   if (!InferShapeDone()) {
@@ -421,6 +449,20 @@ int DeConvolutionWinogradCPUKernel::Run() {
     return ret;
   }
 
+  if (!valid_weight_shape_) {
+    if (InitComputeParam() != RET_OK) {
+      MS_LOG(ERROR) << "InitDataParam error!";
+      return RET_ERROR;
+    }
+    if (!valid_weight_shape_ || InitParameter() != RET_OK) {
+      MS_LOG(ERROR) << "InitDataParam error!";
+      return RET_ERROR;
+    }
+  }
+  if (IsRepack() && InitDataParam() != RET_OK) {
+    MS_LOG(ERROR) << "InitDataParam error!";
+    return RET_ERROR;
+  }
   float *src_in = reinterpret_cast<float *>(in_tensors_[0]->data_c());
   float *src_out = reinterpret_cast<float *>(out_tensors_[0]->data_c());
   MS_ASSERT(src_in != nullptr);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.h
index b174972d93c..48d4b3a3908 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.h
@@ -32,7 +32,8 @@ class DeConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel {
  public:
   DeConvolutionWinogradCPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                  const std::vector<lite::Tensor *> &outputs, const lite::InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(),
+                                 inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {}
   ~DeConvolutionWinogradCPUKernel() override;
   int Init() override;
   int Run() override;
@@ -61,6 +62,7 @@ class DeConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel {
   std::mutex lock_;
   int thread_num_hw_ = 0;
   int thread_stride_hw_ = 0;
+  bool valid_weight_shape_ = true;
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DECONVOLUTION_WINOGRAD_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/depth_to_space_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/depth_to_space_fp32.cc
index 086a1a12356..521f610cd02 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/depth_to_space_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/depth_to_space_fp32.cc
@@ -26,6 +26,8 @@ using mindspore::schema::PrimitiveType_DepthToSpace;
 
 namespace mindspore::kernel {
 int DepthToSpaceCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   param_->data_type_size_ = sizeof(float);
   if (!InferShapeDone()) {
     return RET_OK;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process_fp32.cc
index 817b4e6f582..3aef46d91ff 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process_fp32.cc
@@ -27,6 +27,7 @@ using mindspore::schema::PrimitiveType_DetectionPostProcess;
 
 namespace mindspore::kernel {
 int DetectionPostProcessCPUKernel::GetInputData() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
   if ((in_tensors_.at(0)->data_type() != kNumberTypeFloat32 && in_tensors_.at(0)->data_type() != kNumberTypeFloat) ||
       (in_tensors_.at(1)->data_type() != kNumberTypeFloat32 && in_tensors_.at(1)->data_type() != kNumberTypeFloat)) {
     MS_LOG(ERROR) << "Input data type error";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc
index 1be7b7dbcca..53f55492a63 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc
@@ -25,6 +25,8 @@ using mindspore::schema::PrimitiveType_Elu;
 
 namespace mindspore::kernel {
 int EluCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc
index 7b13ed938cd..d43edcd5f10 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc
@@ -25,6 +25,8 @@ using mindspore::schema::PrimitiveType_EmbeddingLookupFusion;
 
 namespace mindspore::kernel {
 int EmbeddingLookupCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/exp_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/exp_fp32.cc
index 046cc426b43..50a1be42f69 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/exp_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/exp_fp32.cc
@@ -25,6 +25,8 @@ using mindspore::schema::PrimitiveType_ExpFusion;
 
 namespace mindspore::kernel {
 int ExpCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   float log_base = (param_->base_ == -1) ? 1 : logf(param_->base_);
   param_->in_scale_ = param_->scale_ * log_base;
   if (param_->shift_ == 0) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.cc
index fc3f37d205a..db9f66ba49b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.cc
@@ -28,6 +28,8 @@ using mindspore::schema::PrimitiveType_Fill;
 
 namespace mindspore::kernel {
 int FillCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection_fp32.cc
index 63d3f004c16..3729bff5e8a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection_fp32.cc
@@ -25,6 +25,9 @@ using mindspore::schema::PrimitiveType_FullConnection;
 
 namespace mindspore::kernel {
 int FullconnectionCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
+
   MatmulFp32BaseCPUKernel::InitParameter();
 
   if (params_->a_const_) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.cc
index f145b284161..50b682b68fc 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.cc
@@ -23,7 +23,12 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_FusedBatchNorm;
 
 namespace mindspore::kernel {
+namespace {
+constexpr int kNumInputSize = 5;
+}  // namespace
 int FusedBatchnormCPUKernel::ReSize() {
+  CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_5D);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   FreeMeanAndVariance();
   FreeScaleAndOffset();
   FillParam();
@@ -66,7 +71,7 @@ int FusedBatchnormCPUKernel::InitConstTensor() {
 
 int FusedBatchnormCPUKernel::Run() {
   auto param = reinterpret_cast<BatchNormParameter *>(op_parameter_);
-  if (IsTrain() && IsTrainable() && in_tensors_.size() >= 5) {
+  if (IsTrain() && IsTrainable() && in_tensors_.size() >= kNumInputSize) {
     float *in = static_cast<float *>(in_tensors_[0]->MutableData());
     float *scale = static_cast<float *>(in_tensors_[1]->MutableData());
     float *offset = static_cast<float *>(in_tensors_[2]->MutableData());
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc
index fd454f1b56d..0e42b17f501 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc
@@ -37,6 +37,8 @@ GatherNdCPUKernel::~GatherNdCPUKernel() {
 }
 
 int GatherNdCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.cc
index e313cd74986..3cedf74e3b3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.cc
@@ -27,6 +27,8 @@ using mindspore::schema::PrimitiveType_Gather;
 
 namespace mindspore::kernel {
 int GatherCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   axis_ = *(reinterpret_cast<int *>(in_tensors_.at(2)->data_c()));
   if (!InferShapeDone()) {
     return RET_OK;
@@ -63,7 +65,7 @@ int GatherCPUKernel::DoGather(int task_id) {
   int8_t *int8_in = reinterpret_cast<int8_t *>(input_tensor->data_c());
   int8_t *int8_out = reinterpret_cast<int8_t *>(out_tensor->data_c());
 
-  int data_size = lite::DataTypeSize(input_tensor->data_type());
+  int data_size = static_cast<int>(lite::DataTypeSize(input_tensor->data_type()));
   int8_in += thread_stride * limit * inner_size * data_size;
   int8_out += thread_stride * indices_element_size * inner_size * data_size;
 
@@ -119,7 +121,7 @@ int GatherCPUKernel::AssignIndicesData(bool isIndicesInt32, int indices_num, lit
       }
     } else {
       for (int i = 0; i < indices_num; i++) {
-        indices_data_[i] = reinterpret_cast<float *>(indices_tensor->MutableData())[i];
+        indices_data_[i] = static_cast<int>(reinterpret_cast<float *>(indices_tensor->MutableData())[i]);
       }
     }
   } else {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/glu_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/glu_fp32.cc
index c56a7088f9e..a8e246a0917 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/glu_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/glu_fp32.cc
@@ -30,6 +30,7 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_GLU;
 
 namespace mindspore::kernel {
+const int kGluBranchNum = 2;
 int GluCPUKernel::MallocTmpBuffer() {
   FreeTmpBuffer();
   auto in_tensor = in_tensors_.front();
@@ -115,7 +116,7 @@ int GluCPUKernel::Split(int task_id) {
 int GluCPUKernel::Sigmoid(int task_id) {
   auto input_addr = reinterpret_cast<float *>(split_ptr_.at(1));
   auto output_addr = reinterpret_cast<float *>(sigmoid_ptr_);
-  auto length = in_tensors_.at(0)->ElementsNum() / 2;
+  auto length = in_tensors_.at(0)->ElementsNum() / kGluBranchNum;
 
   int stride = UP_DIV(length, op_parameter_->thread_num_);
   int count = MSMIN(stride, length - stride * task_id);
@@ -129,7 +130,7 @@ int GluCPUKernel::Mul(int task_id) {
   auto input_addr0 = reinterpret_cast<float *>(split_ptr_.at(0));
   auto input_addr1 = reinterpret_cast<float *>(sigmoid_ptr_);
   auto output_addr = reinterpret_cast<float *>(out_tensors_.at(0)->data_c());
-  auto length = in_tensors_.at(0)->ElementsNum() / 2;
+  auto length = in_tensors_.at(0)->ElementsNum() / kGluBranchNum;
 
   int stride = UP_DIV(length, op_parameter_->thread_num_);
   int count = MSMIN(stride, length - stride * task_id);
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/group_convolution_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/group_convolution_fp32.cc
index af5a737fa6c..b2e8cb45248 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/group_convolution_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/group_convolution_fp32.cc
@@ -64,6 +64,8 @@ int GroupConvolutionFp32CPUKernel::PostConcat(int group_id) {
 }
 
 int GroupConvolutionFp32CPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (group_conv_creator_ == nullptr) {
     return lite::RET_ERROR;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/gru_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/gru_fp32.cc
index 90b522ffaee..14e32ba9113 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/gru_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gru_fp32.cc
@@ -162,6 +162,8 @@ int GruCPUKernel::InitStateWeightBias() {
 }
 
 int GruCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_5D);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc
index 9fca05f9231..f18162d392b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc
@@ -27,6 +27,8 @@ using mindspore::schema::PrimitiveType_InstanceNorm;
 
 namespace mindspore::kernel {
 int InstanceNormCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_3D);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/invert_permutation_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/invert_permutation_fp32.cc
index 79aa1154c13..9899da70145 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/invert_permutation_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/invert_permutation_fp32.cc
@@ -27,6 +27,8 @@ using mindspore::schema::PrimitiveType_InvertPermutation;
 
 namespace mindspore::kernel {
 int InvertPermutationCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm_fp32.cc
index eb0228e7eaa..b48390605b7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm_fp32.cc
@@ -31,6 +31,8 @@ namespace {
 const int kMaxThreadNum = 8;
 }
 int L2NormCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc
index 76d743b9511..d12609e0936 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc
@@ -27,6 +27,8 @@ using mindspore::schema::PrimitiveType_LayerNormFusion;
 
 namespace mindspore::kernel {
 int LayerNormCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_3D);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm_fp32.cc
index dd40b54c12c..58167000119 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm_fp32.cc
@@ -27,7 +27,11 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_LRN;
 
 namespace mindspore::kernel {
-int LocalResponseNormCPUKernel::Init() { return RET_OK; }
+int LocalResponseNormCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
+  return RET_OK;
+}
 
 int LocalResponseNormCPUKernel::ReSize() { return RET_OK; }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/lstm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/lstm_fp32.cc
index cb0e4ec6b1c..6988376f45e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/lstm_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/lstm_fp32.cc
@@ -212,6 +212,8 @@ int LstmCPUKernel::InitParam() {
 }
 
 int LstmCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_6D);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.cc
index cc5e90791f4..82a0a977165 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.cc
@@ -48,6 +48,8 @@ void MatmulCPUKernel::InitShapeB() {
 }
 
 int MatmulCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   MatmulFp32BaseCPUKernel::InitParameter();
 
   if (params_->a_const_ == true) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32_base.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32_base.cc
index 0b872af2c7d..f1e6da7ac25 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32_base.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32_base.cc
@@ -78,7 +78,8 @@ int MatmulFp32BaseCPUKernel::InitBufferA() {
   if (op_parameter_->is_train_session_) {
     a_pack_ptr_ = reinterpret_cast<float *>(workspace());
   } else {
-    a_pack_ptr_ = reinterpret_cast<float *>(ms_context_->allocator->Malloc(matrix_a_pack_size_ * sizeof(float)));
+    a_pack_ptr_ =
+      reinterpret_cast<float *>(ms_context_->allocator->Malloc(matrix_a_pack_size_ * static_cast<int>(sizeof(float))));
   }
   if (a_pack_ptr_ == nullptr) {
     MS_LOG(ERROR) << "malloc a_pack_ptr_ failed";
@@ -94,7 +95,8 @@ int MatmulFp32BaseCPUKernel::InitBufferB() {
   if (op_parameter_->is_train_session_) {
     b_pack_ptr_ = reinterpret_cast<float *>(workspace()) + matrix_a_pack_size_;
   } else {
-    b_pack_ptr_ = reinterpret_cast<float *>(ms_context_->allocator->Malloc(matrix_b_pack_size_ * sizeof(float)));
+    b_pack_ptr_ =
+      reinterpret_cast<float *>(ms_context_->allocator->Malloc(matrix_b_pack_size_ * static_cast<int>(sizeof(float))));
   }
   if (b_pack_ptr_ == nullptr) {
     MS_LOG(ERROR) << "malloc b_pack_ptr_ failed";
@@ -128,7 +130,7 @@ int MatmulFp32BaseCPUKernel::InitBiasData() {
     auto bias_tensor = in_tensors_[2];
     int max_bias_data = UP_ROUND(bias_tensor->ElementsNum(), col_tile_);
     // malloc addr need to aligned to 32 bytes
-    bias_ptr_ = reinterpret_cast<float *>(malloc(max_bias_data * sizeof(float)));
+    bias_ptr_ = reinterpret_cast<float *>(malloc(max_bias_data * static_cast<int>(sizeof(float))));
     if (bias_ptr_ == nullptr) {
       MS_LOG(ERROR) << "malloc bias_ptr_ failed";
       return RET_ERROR;
@@ -142,8 +144,8 @@ int MatmulFp32BaseCPUKernel::InitBiasData() {
         bias_ptr_[i] = broadcast_data;
       }
     } else {
-      memset(bias_ptr_, 0, max_bias_data * sizeof(float));
-      memcpy(bias_ptr_, bias_tensor->data_c(), bias_tensor->ElementsNum() * sizeof(float));
+      memset(bias_ptr_, 0, max_bias_data * static_cast<int>(sizeof(float)));
+      memcpy(bias_ptr_, bias_tensor->data_c(), bias_tensor->ElementsNum() * static_cast<int>(sizeof(float)));
     }
   }
   return RET_OK;
@@ -151,7 +153,7 @@ int MatmulFp32BaseCPUKernel::InitBiasData() {
 
 int MatmulFp32BaseCPUKernel::InitMatrixA(const float *src_ptr) {
   if (vec_matmul_) {
-    memcpy(a_pack_ptr_, src_ptr, params_->batch * params_->deep_ * sizeof(float));
+    memcpy(a_pack_ptr_, src_ptr, params_->batch * params_->deep_ * static_cast<int>(sizeof(float)));
     return RET_OK;
   }
 
@@ -176,9 +178,9 @@ int MatmulFp32BaseCPUKernel::InitMatrixB(const float *src_ptr) {
 #ifdef ENABLE_AVX
         RowMajor2Col32Major(src_data, dst, params_->deep_, params_->col_);
 #elif defined(ENABLE_ARM64)
-        memcpy(dst, src_data, params_->col_ * params_->deep_ * sizeof(float));
+        memcpy(dst, src_data, params_->col_ * params_->deep_ * static_cast<int>(sizeof(float)));
 #else
-        memcpy(dst, src_data, params_->col_ * params_->deep_ * sizeof(float));
+        memcpy(dst, src_data, params_->col_ * params_->deep_ * static_cast<int>(sizeof(float)));
 #endif
       } else {
 #ifdef ENABLE_AVX
@@ -270,6 +272,8 @@ int MatmulFp32BaseCPUKernel::FloatRun(int task_id) const {
 }
 
 int MatmulFp32BaseCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
 #ifdef ENABLE_AVX
   matrix_a_pack_fun_ = params_->a_transpose_ ? RowMajor2Row6Major : RowMajor2Col6Major;
   matrix_b_pack_fun_ = params_->b_transpose_ ? RowMajor2Col16Major : RowMajor2Row16Major;
@@ -317,12 +321,14 @@ int MatmulFp32BaseCPUKernel::Init() {
     // only copy weight data
     // resize or run to pack
     auto b_tensor = in_tensors_.at(1);
-    src_b_ = reinterpret_cast<float *>(malloc(params_->batch * params_->deep_ * params_->col_ * sizeof(float)));
+    src_b_ = reinterpret_cast<float *>(
+      malloc(params_->batch * params_->deep_ * params_->col_ * static_cast<int>(sizeof(float))));
     if (src_b_ == nullptr) {
       MS_LOG(ERROR) << "matmul fp16 src_b_ is failed!";
       return RET_ERROR;
     }
-    memcpy(src_b_, b_tensor->data_c(), params_->batch * params_->deep_ * params_->col_ * sizeof(float));
+    memcpy(src_b_, b_tensor->data_c(),
+           params_->batch * params_->deep_ * params_->col_ * static_cast<int>(sizeof(float)));
   }
   return RET_OK;
 }
@@ -344,7 +350,7 @@ int MatmulFp32BaseCPUKernel::ReSize() {
     return RET_ERROR;
   }
   if (op_parameter_->is_train_session_) {
-    set_workspace_size((matrix_a_pack_size_ + matrix_b_pack_size_) * sizeof(float));
+    set_workspace_size((matrix_a_pack_size_ + matrix_b_pack_size_) * static_cast<int>(sizeof(float)));
   }
 
   if (params_->b_const_ && src_b_ != nullptr) {
@@ -380,8 +386,8 @@ int MatmulFp32BaseCPUKernel::InitTmpOutBuffer() {
     int out_channel = params_->col_;
     int oc_block_num = UP_DIV(out_channel, col_tile_);
     MS_ASSERT(ms_context_->allocator != nullptr);
-    output_data_ = reinterpret_cast<float *>(
-      ms_context_->allocator->Malloc(params_->batch * params_->row_ * oc_block_num * col_tile_ * sizeof(float)));
+    output_data_ = reinterpret_cast<float *>(ms_context_->allocator->Malloc(
+      params_->batch * params_->row_ * oc_block_num * col_tile_ * static_cast<int>(sizeof(float))));
     if (output_data_ == nullptr) {
       MS_LOG(ERROR) << "malloc tmp output data failed.";
       return RET_NULL_PTR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/non_max_suppression_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/non_max_suppression_fp32.cc
index a2bffd2cffa..32c7795d5bc 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/non_max_suppression_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/non_max_suppression_fp32.cc
@@ -47,6 +47,8 @@ constexpr int kBoxPointNum = 4;
 }  // namespace
 
 int NonMaxSuppressionCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   // boxes, scores, max_output_boxes, iou_threshold, score_threshold
   if (in_tensors_.size() < kMinInputsSize || in_tensors_.size() > kMaxInputsSize || out_tensors_.size() != kOutputNum) {
     MS_LOG(ERROR) << "NonMaxSuppression input size should be in [" << kMinInputsSize << ", " << kMaxInputsSize << "]"
@@ -245,7 +247,16 @@ int NonMaxSuppressionCPUKernel::Run() {
     return RET_ERROR;
   }
 
-  return Run_Selecte(simple_out, box_num, batch_num, class_num, scores_data, box_data);
+  auto ret = Run_Selecte(simple_out, box_num, batch_num, class_num, scores_data, box_data);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Run_Selecte failed";
+    return RET_ERROR;
+  }
+
+  for (auto *output : this->out_tensors()) {
+    output->ResetRefCount();
+  }
+  return ret;
 }
 
 REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_NonMaxSuppression, LiteKernelCreator<NonMaxSuppressionCPUKernel>)
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/nonzero_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/nonzero_fp32.cc
index 60f267d1efc..5139aba76f2 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/nonzero_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/nonzero_fp32.cc
@@ -28,6 +28,8 @@ using mindspore::schema::PrimitiveType_NonZero;
 
 namespace mindspore::kernel {
 int NonZeroCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.cc
index 8d01d2fe911..f3b6c1d0295 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.cc
@@ -28,9 +28,12 @@ using mindspore::schema::PrimitiveType_PadFusion;
 namespace mindspore::kernel {
 namespace {
 constexpr size_t kMirrorPadInputSize = 2;
-constexpr size_t kPadMaxInputSize = 2;
+constexpr size_t kPadCommonInputSize = 2;
+constexpr size_t kPadMaxInputSize = 3;
 }  // namespace
 int PadCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -40,30 +43,30 @@ int PadCPUKernel::Init() {
 int PadCPUKernel::ReSize() {
   auto input = in_tensors_.at(0);
   auto rank = input->shape().size();
-  if (rank > COMM_SHAPE_SIZE) {
-    MS_LOG(ERROR) << "Pad input rank should <= " << COMM_SHAPE_SIZE << ", got " << rank;
+  if (rank > DEFAULT_PAD_NDIMS) {
+    MS_LOG(ERROR) << "Pad input rank should <= " << DEFAULT_PAD_NDIMS << ", got " << rank;
     return RET_ERROR;
   }
   auto output = out_tensors_.at(0);
   if (pad_param_->pad_mode_ == static_cast<int>(schema::PaddingMode_CONSTANT)) {
-    auto ret = ExtendShape(in_, COMM_SHAPE_SIZE, input->shape().data(), rank);
+    auto ret = ExtendShape(in_, DEFAULT_PAD_NDIMS, input->shape().data(), rank);
     if (ret != RET_OK) {
       return ret;
     }
-    ret = ExtendShape(out_, COMM_SHAPE_SIZE, output->shape().data(), rank);
+    ret = ExtendShape(out_, DEFAULT_PAD_NDIMS, output->shape().data(), rank);
     if (ret != RET_OK) {
       return ret;
     }
-    if (pad_param_->padding_length < MAX_SHAPE_SIZE) {
-      int ori_paddings[MAX_SHAPE_SIZE];
+    if (pad_param_->padding_length < MAX_PAD_SIZE) {
+      int ori_paddings[MAX_PAD_SIZE];
       for (auto i = 0; i < pad_param_->padding_length; ++i) {
         ori_paddings[i] = pad_param_->paddings_[i];
       }
-      ret = ExtendPaddings(pad_param_->paddings_, MAX_SHAPE_SIZE, ori_paddings, pad_param_->padding_length);
+      ret = ExtendPaddings(pad_param_->paddings_, MAX_PAD_SIZE, ori_paddings, pad_param_->padding_length);
       if (ret != RET_OK) {
         return ret;
       }
-      pad_param_->padding_length = MAX_SHAPE_SIZE;
+      pad_param_->padding_length = MAX_PAD_SIZE;
     }
   }
   return RET_OK;
@@ -71,19 +74,17 @@ int PadCPUKernel::ReSize() {
 
 void PadCPUKernel::InitMirrorPadBlock() {
   mirror_pad_block_.clear();
-  std::vector<int> left_pads(COMM_SHAPE_SIZE);
-  for (size_t i = 0; i < COMM_SHAPE_SIZE; ++i) {
+  std::vector<int> left_pads(DEFAULT_PAD_NDIMS);
+  for (size_t i = 0; i < DEFAULT_PAD_NDIMS; ++i) {
     left_pads[i] = pad_param_->paddings_[2 * i];
   }
-
   std::vector<int> input_separate_dims;
   std::vector<int> output_separate_dims;
   std::vector<int> separate_offset;
-
   /* init separate dims */
   int cur_input = 1;
   int cur_output = 1;
-  for (size_t i = 0; i < COMM_SHAPE_SIZE; ++i) {
+  for (size_t i = 0; i < DEFAULT_PAD_NDIMS; ++i) {
     if (cur_input > 1) {
       input_separate_dims.emplace_back(cur_input);
       output_separate_dims.emplace_back(cur_output);
@@ -100,22 +101,18 @@ void PadCPUKernel::InitMirrorPadBlock() {
     output_separate_dims.emplace_back(cur_output);
     separate_offset.emplace_back(0);
   }
-
   /* init separate stride */
   std::vector<int> output_separate_stride;
   output_separate_stride.resize(output_separate_dims.size());
   GetStride(output_separate_stride.data(), output_separate_dims.data(), output_separate_dims.size());
-
   /* init separate stride */
   std::vector<int> remain_stride;
   remain_stride.resize(0);
   int remain_size = GetStride(remain_stride.data(), output_separate_dims.data(), remain_stride.size());
-
   std::vector<int> right_pads(separate_offset.size());
   for (size_t i = 0; i < right_pads.size(); ++i) {
     right_pads[i] = output_separate_dims[i] - input_separate_dims[i] - separate_offset[i];
   }
-
   /* init pad region */
   std::vector<int> pad_region;
   for (size_t i = remain_stride.size(); i < output_separate_stride.size(); ++i) {
@@ -129,30 +126,27 @@ void PadCPUKernel::InitMirrorPadBlock() {
     }
     pad_region.emplace_back(r);
   }
-
   std::vector<int> pad_region_stride(pad_region.size());
   int region_size = GetStride(pad_region_stride.data(), pad_region.data(), pad_region.size());
-  int remain_dim_offset = remain_stride.size();
-
+  int remain_dim_offset = static_cast<int>(remain_stride.size());
   std::vector<int> pad_cord(pad_region.size());
-
   for (int pos = 0; pos < remain_size; ++pos) {
     const int dst_basic_offset = 0;
-
     for (int index = 1; index < region_size; ++index) {
       int dst_offset = dst_basic_offset;
-
       int value = index;
       for (size_t i = 0; i < pad_region.size() && pad_region_stride[i] != 0; ++i) {
         pad_cord[i] = value / pad_region_stride[i];
         value = value % pad_region_stride[i];
       }
-
       MirrorPadBlock block;
-      const int size_offset = COMM_SHAPE_SIZE - static_cast<int>(pad_region.size());
+      const int size_offset = DEFAULT_PAD_NDIMS - static_cast<int>(pad_region.size());
       for (size_t i = 0; i < pad_region.size(); ++i) {
         int di = size_offset + i;
         int si = remain_dim_offset + i;
+        if (di > DEFAULT_PAD_NDIMS) {
+          continue;
+        }
         switch (pad_cord[i]) {
           case 0:
             dst_offset += separate_offset[si] * output_separate_stride[si];
@@ -182,7 +176,6 @@ void PadCPUKernel::InitMirrorPadBlock() {
       mirror_pad_block_.push_back(std::move(block));
     }
   }
-  return;
 }
 
 int PadCPUKernel::ExtendShape(int *shape, int length, const int *ori_shape, int rank) const {
@@ -257,7 +250,7 @@ int PadCPUKernel::RunMirrorPadImpl(int task_id) {
     Pad(input_data, output_data, in_, out_, pad_param_->paddings_, task_id, op_parameter_->thread_num_);
 
     /* calculate region part */
-    for (size_t i = task_id; i < mirror_pad_block_.size(); i += op_parameter_->thread_num_) {
+    for (size_t i = task_id; i < mirror_pad_block_.size(); i += static_cast<size_t>(op_parameter_->thread_num_)) {
       auto block = mirror_pad_block_[i];
 
       for (int a = 0; a < block.size_[0]; a++) {
@@ -265,8 +258,14 @@ int PadCPUKernel::RunMirrorPadImpl(int task_id) {
         for (int b = 0; b < block.size_[1]; b++) {
           int out_b_index = out_a_index + b * block.out_stride_[1];
           for (int c = 0; c < block.size_[2]; ++c) {
-            int output_index = out_b_index + c * block.out_stride_[2];
-            MirrorPad(input_data, output_data, in_, pad_param_, output_index, output_index + block.size_[3]);
+            int out_c_index = out_b_index + c * block.out_stride_[2];
+            for (int d = 0; d < block.size_[3]; ++d) {
+              int out_d_index = out_c_index + d * block.out_stride_[3];
+              for (int e = 0; e < block.size_[4]; ++e) {
+                int output_index = out_d_index + e * block.out_stride_[4];
+                MirrorPad(input_data, output_data, in_, pad_param_, output_index, output_index + block.size_[5]);
+              }
+            }
           }
         }
       }
@@ -282,7 +281,7 @@ int PadCPUKernel::RunMirrorPadImpl(int task_id) {
   return RET_OK;
 }
 
-int PadCPUKernel::CheckPaddings(int *paddings, int length, int *input_shape, int mode) {
+int PadCPUKernel::CheckPaddings(const int *paddings, int length, const int *input_shape, int mode) {
   if (paddings == nullptr || input_shape == nullptr) {
     return RET_NULL_PTR;
   }
@@ -310,8 +309,8 @@ int PadCPUKernel::CheckPaddings(int *paddings, int length, int *input_shape, int
 }
 
 int PadCPUKernel::CopyPaddingFromInput() {
-  if (in_tensors_.size() != kMirrorPadInputSize) {
-    MS_LOG(ERROR) << "Pad Reflect or Symmetric mode need 2 inputs, got " << in_tensors_.size();
+  if (in_tensors_.size() < kMirrorPadInputSize) {
+    MS_LOG(ERROR) << "Pad Reflect or Symmetric mode need at least 2 inputs, got " << in_tensors_.size();
     return RET_ERROR;
   }
   auto padding_tensor = in_tensors_.at(1);
@@ -327,28 +326,28 @@ int PadCPUKernel::CopyPaddingFromInput() {
     return RET_ERROR;
   }
 
-  auto ret = ExtendShape(in_, COMM_SHAPE_SIZE, input_shape.data(), rank);
+  auto ret = ExtendShape(in_, DEFAULT_PAD_NDIMS, input_shape.data(), rank);
   if (ret != RET_OK) {
     return ret;
   }
-  ret = ExtendPaddings(pad_param_->paddings_, MAX_SHAPE_SIZE, paddings, padding_tensor->ElementsNum());
+  ret = ExtendPaddings(pad_param_->paddings_, MAX_PAD_SIZE, paddings, padding_tensor->ElementsNum());
   if (ret != RET_OK) {
     return ret;
   }
-  pad_param_->padding_length = MAX_SHAPE_SIZE;
+  pad_param_->padding_length = MAX_PAD_SIZE;
   return RET_OK;
 }
 
 void PadCPUKernel::CalculateStrides() {
-  pad_param_->in_strides[COMM_SHAPE_SIZE - 1] = 1;
-  for (auto i = COMM_SHAPE_SIZE - 2; i >= 0; --i) {
+  pad_param_->in_strides[DEFAULT_PAD_NDIMS - 1] = 1;
+  for (auto i = DEFAULT_PAD_NDIMS - 2; i >= 0; --i) {
     pad_param_->in_strides[i] = in_[i + 1] * pad_param_->in_strides[i + 1];
   }
-  for (auto i = 0; i < COMM_SHAPE_SIZE; ++i) {
+  for (auto i = 0; i < DEFAULT_PAD_NDIMS; ++i) {
     out_[i] = in_[i] + pad_param_->paddings_[i * 2] + pad_param_->paddings_[i * 2 + 1];
   }
-  pad_param_->out_strides[COMM_SHAPE_SIZE - 1] = 1;
-  for (auto i = COMM_SHAPE_SIZE - 2; i >= 0; --i) {
+  pad_param_->out_strides[DEFAULT_PAD_NDIMS - 1] = 1;
+  for (auto i = DEFAULT_PAD_NDIMS - 2; i >= 0; --i) {
     pad_param_->out_strides[i] = out_[i + 1] * pad_param_->out_strides[i + 1];
   }
 }
@@ -358,7 +357,7 @@ int PadCPUKernel::HandleMirrorPad() {
   if (in_tensors_.size() == 1) {
     auto input_shape = in_tensors_.at(0)->shape();
     int rank = static_cast<int>(input_shape.size());
-    ret = ExtendShape(in_, COMM_SHAPE_SIZE, input_shape.data(), rank);
+    ret = ExtendShape(in_, DEFAULT_PAD_NDIMS, input_shape.data(), rank);
     if (ret != RET_OK) {
       return ret;
     }
@@ -368,7 +367,7 @@ int PadCPUKernel::HandleMirrorPad() {
       return ret;
     }
   }
-  ret = CheckPaddings(pad_param_->paddings_, COMM_SHAPE_SIZE, in_, pad_param_->pad_mode_);
+  ret = CheckPaddings(pad_param_->paddings_, DEFAULT_PAD_NDIMS, in_, pad_param_->pad_mode_);
   if (ret != RET_OK) {
     return ret;
   }
@@ -391,18 +390,21 @@ int PadCPUKernel::Run() {
   }
   int error_code = 0;
   if (pad_param_->pad_mode_ == static_cast<int>(schema::PaddingMode_CONSTANT)) {
-    if (in_tensors_.size() == kPadMaxInputSize) {
+    if (in_tensors_.size() >= kPadCommonInputSize) {
       error_code = CopyPaddingFromInput();
       if (error_code != RET_OK) {
         MS_LOG(ERROR) << "Pad run error, error_code[" << error_code << "]";
         return RET_ERROR;
       }
     }
+    if (in_tensors_.size() == kPadMaxInputSize) {
+      pad_param_->constant_value_ = reinterpret_cast<float *>(in_tensors_.at(2)->data_c())[0];
+    }
     auto output = out_tensors_.at(0);
     int output_size = output->ElementsNum();
     auto output_data = reinterpret_cast<float *>(output->data_c());
     if (abs(pad_param_->constant_value_ - 0.0f) < 1e-5) {
-      memset(output_data, 0, output_size * sizeof(float));
+      memset(output_data, 0, static_cast<size_t>(output_size) * sizeof(float));
     } else {
       for (auto i = 0; i < output_size; ++i) {
         output_data[i] = pad_param_->constant_value_;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.h
index 97ff8ae7802..aaa5e59cb80 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.h
@@ -45,7 +45,7 @@ class PadCPUKernel : public InnerKernel {
   virtual int RunMirrorPadImpl(int task_id);
 
  private:
-  int CheckPaddings(int *paddings, int length, int *input_shape, int mode);
+  int CheckPaddings(const int *paddings, int length, const int *input_shape, int mode);
   void CalculateStrides();
   int ExtendShape(int *shape, int length, const int *ori_shape, int rank) const;
   int ExtendPaddings(int *paddings, int length, const int *ori_paddings, int ori_length) const;
@@ -55,8 +55,8 @@ class PadCPUKernel : public InnerKernel {
   int HandleMirrorPad();
   int CopyPaddingFromInput();
   PadParameter *pad_param_ = nullptr;
-  int in_[4] = {0};
-  int out_[4] = {0};
+  int in_[DEFAULT_PAD_NDIMS] = {0};
+  int out_[DEFAULT_PAD_NDIMS] = {0};
   std::vector<MirrorPadBlock> mirror_pad_block_;
 };
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/pooling_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/pooling_fp32.cc
index c9d6819b3ae..4e84593cf88 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/pooling_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/pooling_fp32.cc
@@ -30,6 +30,8 @@ using mindspore::schema::PrimitiveType_MaxPoolFusion;
 
 namespace mindspore::kernel {
 int PoolingCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto ret = PoolingBaseCPUKernel::Init();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "PoolingBase Init failed.";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/power_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/power_fp32.cc
index b02ea7881db..f6666a734bf 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/power_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/power_fp32.cc
@@ -25,7 +25,11 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_PowFusion;
 
 namespace mindspore::kernel {
-int PowerCPUKernel::Init() { return RET_OK; }
+int PowerCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
+  return RET_OK;
+}
 
 int PowerCPUKernel::ReSize() { return RET_OK; }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/prelu_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/prelu_fp32.cc
index f9dccae7138..c7e9da9cbf6 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/prelu_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/prelu_fp32.cc
@@ -37,6 +37,8 @@ static int PReluRun(void *cdata, int task_id, float lhs_scale, float rhs_scale)
 }
 
 int PReluCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (in_tensors_[1]->ElementsNum() == 1) {
     prelu_param_->channelShared = true;
   } else {
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/range_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/range_fp32.cc
index 7e3da02c0ef..45e1bcf1b24 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/range_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/range_fp32.cc
@@ -28,6 +28,8 @@ using mindspore::schema::PrimitiveType_Range;
 
 namespace mindspore::kernel {
 int RangeCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/rank_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/rank_fp32.cc
index 561bde27f72..aef350b3247 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/rank_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/rank_fp32.cc
@@ -27,7 +27,11 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Rank;
 
 namespace mindspore::kernel {
-int RankCPUKernel::Init() { return RET_OK; }
+int RankCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
+  return RET_OK;
+}
 
 int RankCPUKernel::ReSize() { return RET_OK; }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce_fp32.cc
index c8824e3e57f..fabd47c76ce 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce_fp32.cc
@@ -40,6 +40,8 @@ using mindspore::schema::ReduceMode_ReduceSumSquare;
 
 namespace mindspore::kernel {
 int ReduceCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto ret = ReduceBaseCPUKernel::Init();
   if (ret != RET_OK) {
     return ret;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/relative_position_attention_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/relative_position_attention_fp32.cc
index cb7dce95b27..087e08ed776 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/relative_position_attention_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/relative_position_attention_fp32.cc
@@ -687,6 +687,8 @@ void RelativePositionAttentionCPUKernel::FreeAllPackData() {
 }
 
 int RelativePositionAttentionCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_11D);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto ret = CheckWeights();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "CheckWeights failed.";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/resize_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/resize_fp32.cc
index fbcd53ba4ad..f2a79f05881 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/resize_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/resize_fp32.cc
@@ -37,6 +37,8 @@ constexpr int kResizeSizeDouble = 2;
 }  // namespace
 
 int ResizeCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto ret = ResizeBaseCPUKernel::Init();
   if (ret != RET_OK) {
     return ret;
@@ -115,29 +117,30 @@ int ResizeCPUKernel::MallocTmpBuffer() {
 
   // malloc memory for x, y coordinates
   {
-    coordinate_.x_lefts_ = reinterpret_cast<int *>(malloc(sizeof(int) * x_len));
+    coordinate_.x_lefts_ = reinterpret_cast<int *>(malloc(static_cast<int>(sizeof(int)) * x_len));
     CHECK_MALLOC_RES(coordinate_.x_lefts_, RET_NULL_PTR)
-    coordinate_.y_tops_ = reinterpret_cast<int *>(malloc(sizeof(int) * y_len));
+    coordinate_.y_tops_ = reinterpret_cast<int *>(malloc(static_cast<int>(sizeof(int)) * y_len));
     CHECK_MALLOC_RES(coordinate_.y_tops_, RET_NULL_PTR)
     if (method_ == static_cast<int>(schema::ResizeMethod_LINEAR)) {
-      coordinate_.x_rights_ = reinterpret_cast<int *>(malloc(sizeof(int) * x_len));
+      coordinate_.x_rights_ = reinterpret_cast<int *>(malloc(static_cast<int>(sizeof(int)) * x_len));
       CHECK_MALLOC_RES(coordinate_.x_rights_, RET_NULL_PTR)
-      coordinate_.y_bottoms_ = reinterpret_cast<int *>(malloc(sizeof(int) * y_len));
+      coordinate_.y_bottoms_ = reinterpret_cast<int *>(malloc(static_cast<int>(sizeof(int)) * y_len));
       CHECK_MALLOC_RES(coordinate_.y_bottoms_, RET_NULL_PTR)
     }
   }
 
   // malloc memory for weights of x, y axes
   {
-    x_weights_ = reinterpret_cast<float *>(malloc(sizeof(float) * x_weight_len));
+    x_weights_ = reinterpret_cast<float *>(malloc(static_cast<int>(sizeof(float)) * x_weight_len));
     CHECK_MALLOC_RES(x_weights_, RET_NULL_PTR)
-    y_weights_ = reinterpret_cast<float *>(malloc(sizeof(float) * y_weight_len));
+    y_weights_ = reinterpret_cast<float *>(malloc(static_cast<int>(sizeof(float)) * y_weight_len));
     CHECK_MALLOC_RES(y_weights_, RET_NULL_PTR)
   }
 
   {
-    line_buffer_ = reinterpret_cast<float *>(
-      malloc(sizeof(float) * x_len * in_tensors_.at(0)->Channel() * kResizeSizeDouble * op_parameter_->thread_num_));
+    line_buffer_ =
+      reinterpret_cast<float *>(malloc(static_cast<int>(sizeof(float)) * x_len * in_tensors_.at(0)->Channel() *
+                                       kResizeSizeDouble * op_parameter_->thread_num_));
     CHECK_MALLOC_RES(line_buffer_, RET_NULL_PTR)
   }
   return RET_OK;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_fp32.cc
index fe42dac2a97..4c0d98570d7 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_fp32.cc
@@ -29,7 +29,7 @@ using mindspore::schema::PrimitiveType_ReverseV2;
 namespace mindspore::kernel {
 int ReverseCPUKernel::Stride(int index) {
   int stride = 1;
-  for (size_t i = index + 1; i < in_tensors_.at(0)->shape().size(); ++i) {
+  for (size_t i = static_cast<int>(index) + 1; i < in_tensors_.at(0)->shape().size(); ++i) {
     stride *= in_tensors_.at(0)->shape().at(i);
   }
   return stride;
@@ -63,12 +63,12 @@ int ReverseCPUKernel::ReSize() {
     free(tmp_);
     tmp_ = nullptr;
   }
-  tmp_ = reinterpret_cast<int *>(malloc(data_size_ * sizeof(int)));
+  tmp_ = reinterpret_cast<int *>(malloc(data_size_ * static_cast<int>(sizeof(int))));
   if (tmp_ == nullptr) {
     MS_LOG(ERROR) << "Reverse Malloc tmp_ error!";
     return RET_ERROR;
   }
-  (void)memset(tmp_, 0, data_size_ * sizeof(int));
+  (void)memset(tmp_, 0, data_size_ * static_cast<int>(sizeof(int)));
 
   for (int i = 0; i < param->num_axis_; i++) {
     int axis = param->axis_[i];
@@ -98,6 +98,8 @@ int ReverseCPUKernel::ReSize() {
 }
 
 int ReverseCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -142,7 +144,7 @@ int ReverseCPUKernel::Run() {
 
 void ReverseCPUKernel::UpdateAxisInfo() {
   auto reverse_param = reinterpret_cast<ReverseParameter *>(op_parameter_);
-  int in_shape_len = in_tensors_.front()->shape().size();
+  int in_shape_len = static_cast<int>(in_tensors_.front()->shape().size());
   for (int i = 0; i < reverse_param->num_axis_; ++i) {
     if (reverse_param->axis_[i] < 0) {
       reverse_param->axis_[i] += in_shape_len;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence_fp32.cc
index 765c9d362c7..fd211394654 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence_fp32.cc
@@ -24,6 +24,8 @@ using mindspore::schema::PrimitiveType_ReverseSequence;
 
 namespace mindspore::kernel {
 int ReverseSequenceCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling_fp32.cc
index 4125e856042..4744e78952c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling_fp32.cc
@@ -29,6 +29,8 @@ using mindspore::schema::PrimitiveType_ROIPooling;
 
 namespace mindspore::kernel {
 int ROIPoolingCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -42,7 +44,7 @@ int ROIPoolingCPUKernel::ReSize() {
   }
   auto in_shape = in_tensors_.front()->shape();
   auto out_shape = out_tensors_.front()->shape();
-  int ndims = in_shape.size();
+  int ndims = static_cast<int>(in_shape.size());
   if (ndims < C4NUM) {
     MS_LOG(ERROR) << "ROIPooling in_shape.size() error ,shape dim greater than or equal to 4!";
     return RET_ERROR;
@@ -67,7 +69,7 @@ int ROIPoolingCPUKernel::ReSize() {
     param_->out_strides_[i] = out_shape.at(i + 1) * param_->out_strides_[i + 1];
   }
   param_->thread_num_ = MSMIN(param_->op_parameter_.thread_num_, out_shape.at(0));
-  max_c_ = reinterpret_cast<float *>(malloc(param_->input_c_ * sizeof(float)));
+  max_c_ = reinterpret_cast<float *>(malloc(param_->input_c_ * static_cast<int>(sizeof(float))));
   if (max_c_ == nullptr) {
     MS_LOG(ERROR) << "malloc max_c failed.";
     return RET_MEMORY_FAILED;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/scale_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/scale_fp32.cc
index 9448ac4fa2c..0542f9b2489 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/scale_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scale_fp32.cc
@@ -117,10 +117,8 @@ int ScaleCPUKernel::CalculateParameter() {
 }
 
 int ScaleCPUKernel::Init() {
-  if (in_tensors_.size() < 2 || in_tensors_.size() > 3) {
-    MS_LOG(ERROR) << "inputs to Scale operator should be 2 or 3, but " << in_tensors_.size() << " is given.";
-    return RET_ERROR;
-  }
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto ret = InitScaleOffset();
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Scale fp32 InitScaleOffset failed.";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd_fp32.cc
index 20304f80dd1..f9afe0a5427 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd_fp32.cc
@@ -34,6 +34,8 @@ constexpr int kScatterIndicesIndex = 1;
 constexpr int kScatterUpdateIndex = 2;
 }  // namespace
 int ScatterNDCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_3D);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/shape_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/shape_fp32.cc
index 38f8be7ed8c..9392ca914e9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/shape_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/shape_fp32.cc
@@ -26,7 +26,11 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Shape;
 
 namespace mindspore::kernel {
-int ShapeCPUKernel::Init() { return RET_OK; }
+int ShapeCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
+  return RET_OK;
+}
 
 int ShapeCPUKernel::ReSize() { return RET_OK; }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/size_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/size_fp32.cc
index cd056de6684..25e3d22ecea 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/size_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/size_fp32.cc
@@ -25,7 +25,11 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Size;
 
 namespace mindspore::kernel {
-int SizeCPUKernel::Init() { return RET_OK; }
+int SizeCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
+  return RET_OK;
+}
 
 int SizeCPUKernel::ReSize() { return RET_OK; }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/softmax_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/softmax_fp32.cc
index 2518347add1..9654b6db0ed 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/softmax_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/softmax_fp32.cc
@@ -30,6 +30,8 @@ using mindspore::schema::PrimitiveType_Softmax;
 
 namespace mindspore::kernel {
 int SoftmaxCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto ret = SoftmaxBaseCPUKernel::Init();
   if (ret != RET_OK) {
     return ret;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch_fp32.cc
index b76872a21a0..4a1b974a589 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch_fp32.cc
@@ -53,6 +53,8 @@ void SpaceToBatchCPUKernel::ProcessInput() {
 }
 
 int SpaceToBatchCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth_fp32.cc
index b3f1cfddc68..d895051791b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth_fp32.cc
@@ -32,6 +32,8 @@ using mindspore::schema::PrimitiveType_SpaceToDepth;
 
 namespace mindspore::kernel {
 int SpaceToDepthCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   SpaceToDepthParameter *param = reinterpret_cast<SpaceToDepthParameter *>(op_parameter_);
   if (param->block_size_ <= 0) {
     MS_LOG(ERROR) << "Input block_size should > 0!";
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense_fp32.cc
index 819ae42388a..2f4db92acc9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense_fp32.cc
@@ -30,6 +30,8 @@ using mindspore::schema::PrimitiveType_SparseToDense;
 
 namespace mindspore::kernel {
 int SparseToDenseCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_3D);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   auto input2 = in_tensors_.at(2);
   MS_ASSERT(input2);
   auto input3 = in_tensors_.at(3);
@@ -49,7 +51,7 @@ int SparseToDenseCPUKernel::ReSize() {
   auto output0 = out_tensors_.at(0);
   std::vector<int> out_shape_tensor = output0->shape();
   auto output_shape_tmp = reinterpret_cast<int *>(out_shape_tensor.data());
-  int output_dim = output0->shape().size();
+  int output_dim = static_cast<int>(output0->shape().size());
   for (int i = 0; i < DIMENSION_4D - output_dim; i++) {
     output_shape[i] = 1;
   }
@@ -98,12 +100,13 @@ int SparseToDenseCPUKernel::GenerateIndices() {
     MS_LOG(ERROR) << "Input dim is invalid, dim: " << index_num;
     return RET_ERROR;
   }
-  sparse_indices_vect = reinterpret_cast<int **>(ctx_->allocator->Malloc(sizeof(int *) * index_num));
+  sparse_indices_vect =
+    reinterpret_cast<int **>(ctx_->allocator->Malloc(sizeof(int *) * static_cast<size_t>(index_num)));
   if (sparse_indices_vect == nullptr) {
     MS_LOG(ERROR) << "Null pointer reference: sparse_indices_vect.";
     return RET_ERROR;
   }
-  index_dim = input0->shape().size();
+  index_dim = static_cast<int>(input0->shape().size());
   int *sparse_indices = reinterpret_cast<int *>(input0->MutableData());
   switch (index_dim) {
     case 0:
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/splice_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/splice_fp32.cc
index 9ecfb9d9743..40402526c03 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/splice_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/splice_fp32.cc
@@ -27,7 +27,11 @@ using mindspore::lite::RET_OK;
 using mindspore::lite::RET_PARAM_INVALID;
 using mindspore::schema::PrimitiveType_Splice;
 namespace mindspore::kernel {
-int SpliceCPUKernel::Init() { return RET_OK; }
+int SpliceCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
+  return RET_OK;
+}
 
 int SpliceCPUKernel::ReSize() { return RET_OK; }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/topk_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/topk_fp32.cc
index 71d4047a310..ad0940f3c1b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/topk_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/topk_fp32.cc
@@ -25,6 +25,8 @@ using mindspore::schema::PrimitiveType_TopKFusion;
 
 namespace mindspore::kernel {
 int TopKCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   topk_param_->topk_node_list_ = nullptr;
   if (!InferShapeDone()) {
     return RET_OK;
@@ -59,7 +61,8 @@ int TopKCPUKernel::Run() {
     MS_LOG(ERROR) << "The k value is out of the data size range.";
     return RET_ERROR;
   }
-  topk_param_->topk_node_list_ = ms_context_->allocator->Malloc(sizeof(TopkNode) * topk_param_->last_dim_size_);
+  topk_param_->topk_node_list_ =
+    ms_context_->allocator->Malloc(static_cast<int>(sizeof(TopkNode)) * topk_param_->last_dim_size_);
   if (topk_param_->topk_node_list_ == nullptr) {
     MS_LOG(ERROR) << "Memory allocation failed";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc
index 5aad5a12b8f..9562d2a4250 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc
@@ -27,6 +27,8 @@ using mindspore::schema::PrimitiveType_Transpose;
 
 namespace mindspore::kernel {
 int TransposeCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), C2NUM);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -113,7 +115,7 @@ int TransposeCPUKernel::TransposeDimGreaterThan6(int task_id) {
   return RET_OK;
 }
 
-void TransposeCPUKernel::GetNHNCTransposeFunc(lite::Tensor *in_tensor, lite::Tensor *out_tensor) {
+void TransposeCPUKernel::GetNHNCTransposeFunc(const lite::Tensor *in_tensor, const lite::Tensor *out_tensor) {
   if (in_tensor->shape().size() != 4) {
     return;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.h
index 6b3d1ee83c5..64df8a7f17f 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.h
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.h
@@ -48,7 +48,7 @@ class TransposeCPUKernel : public InnerKernel {
   virtual int TransposeDim2to6();
   virtual int TransposeDimGreaterThan6(int task_id);
 
-  void GetNHNCTransposeFunc(lite::Tensor *in_tensor, lite::Tensor *out_tensor);
+  void GetNHNCTransposeFunc(const lite::Tensor *in_tensor, const lite::Tensor *out_tensor);
   void *in_data_ = nullptr;
   void *out_data_ = nullptr;
   int *out_shape_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/uniform_real_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/uniform_real_fp32.cc
index 1e5b39eff27..02d92a71c12 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/uniform_real_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/uniform_real_fp32.cc
@@ -43,6 +43,13 @@ constexpr size_t kThirdDataIndex = 2;
 constexpr size_t kFourthDataIndex = 3;
 constexpr size_t kBitWidth = 32;
 constexpr size_t kPerSegNum = 4;
+constexpr size_t kFirstDataStride = 1;
+constexpr size_t kSecondDataStride = 2;
+constexpr size_t kThirdDataStride = 3;
+constexpr size_t kFirstRandNum = 0;
+constexpr size_t kSecondRandNum = 1;
+constexpr size_t kThirdRandNum = 2;
+constexpr size_t kFourthRandNum = 3;
 }  // namespace
 
 class PhiloxRandom {
@@ -184,10 +191,10 @@ void GetPhiloxRandomFloat(float *data, size_t length, int seed, int seed2) {
     for (size_t i = 1; i < length / kPerSegNum; i++) {
       philoxRandom.Skip(0);
       randNum = philoxRandom.operator()();
-      data[kPerSegNum * i] = uint32ToFloat(randNum[0]);
-      data[kPerSegNum * i + 1] = uint32ToFloat(randNum[1]);
-      data[kPerSegNum * i + 2] = uint32ToFloat(randNum[2]);
-      data[kPerSegNum * i + 3] = uint32ToFloat(randNum[3]);
+      data[kPerSegNum * i] = uint32ToFloat(randNum[kFirstRandNum]);
+      data[kPerSegNum * i + kFirstDataStride] = uint32ToFloat(randNum[kSecondRandNum]);
+      data[kPerSegNum * i + kSecondDataStride] = uint32ToFloat(randNum[kThirdRandNum]);
+      data[kPerSegNum * i + kThirdDataStride] = uint32ToFloat(randNum[kFourthRandNum]);
     }
     philoxRandom.Skip(0);
     randNum = philoxRandom.operator()();
@@ -197,7 +204,11 @@ void GetPhiloxRandomFloat(float *data, size_t length, int seed, int seed2) {
   }
 }
 
-int UniformRealCPUKernel::Init() { return RET_OK; }
+int UniformRealCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
+  return RET_OK;
+}
 
 int UniformRealCPUKernel::ReSize() { return RET_OK; }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/unique_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/unique_fp32.cc
index f449770b373..024c1c459e3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/unique_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/unique_fp32.cc
@@ -23,7 +23,11 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_Unique;
 
 namespace mindspore::kernel {
-int UniqueCPUKernel::Init() { return RET_OK; }
+int UniqueCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
+  return RET_OK;
+}
 
 int UniqueCPUKernel::ReSize() { return RET_OK; }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/unstack_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/unstack_fp32.cc
index af8ed3aabb1..ffc7438dd00 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/unstack_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/unstack_fp32.cc
@@ -24,6 +24,8 @@ using mindspore::schema::PrimitiveType_Unstack;
 
 namespace mindspore::kernel {
 int UnstackCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   if (!InferShapeDone()) {
     return RET_OK;
   }
@@ -40,7 +42,7 @@ int UnstackCPUKernel::ReSize() {
   para->axis_dim_ = 1;
   para->after_dims_ = 1;
   if (para->axis_ < 0) {
-    para->axis_ += shape_size;
+    para->axis_ += static_cast<int>(shape_size);
   }
 
   for (size_t i = 0; i < shape_size; i++) {
@@ -73,7 +75,7 @@ int UnstackCPUKernel::Run() {
   }
   MS_ASSERT(output_addr_array_);
   auto para = reinterpret_cast<UnstackParameter *>(op_parameter_);
-  para->num_ = out_num;
+  para->num_ = static_cast<int>(out_num);
   Unstack(input, output_addr_array_, para, sizeof(float));
   return RET_OK;
 }
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/where_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/where_fp32.cc
index f1f6e19cd47..d3da01c8e34 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/where_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/where_fp32.cc
@@ -32,6 +32,8 @@ namespace mindspore::kernel {
 constexpr uint32_t kSingleNum = 1;
 constexpr uint32_t kTripleNum = 3;
 int WhereCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
   where_param_->op_parameter_.thread_num_ = thread_count_;
   return RET_OK;
 }
@@ -69,11 +71,12 @@ int WhereCPUKernel::RunWithSingleInput() {
   MS_ASSERT(input);
   condition_ = reinterpret_cast<bool *>(input->data_c());
   where_param_->condition_num_ = input->ElementsNum();
-  where_param_->rank_ = input->shape().size();
+  where_param_->rank_ = static_cast<int>(input->shape().size());
   int strides[8];
   ComputeStrides(in_tensors_.at(0)->shape().data(), strides, where_param_->rank_);
 
-  auto data = ms_context_->allocator->Malloc(where_param_->condition_num_ * where_param_->rank_ * sizeof(int32_t));
+  auto data = ms_context_->allocator->Malloc(where_param_->condition_num_ * where_param_->rank_ *
+                                             static_cast<int>(sizeof(int32_t)));
   if (data == nullptr) {
     MS_LOG(ERROR) << "macllov data is error!";
     return RET_ERROR;
@@ -104,7 +107,7 @@ int WhereCPUKernel::RunWithSingleInput() {
     MS_LOG(ERROR) << "malloc out tensor failed.";
     return RET_ERROR;
   }
-  memcpy(out_data, result, true_num * where_param_->rank_ * sizeof(int32_t));
+  memcpy(out_data, result, true_num * where_param_->rank_ * static_cast<int>(sizeof(int32_t)));
   ms_context_->allocator->Free(data);
   return RET_OK;
 }
@@ -159,6 +162,9 @@ int WhereCPUKernel::Run() {
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Where op run failed.";
   }
+  for (auto *output : this->out_tensors()) {
+    output->ResetRefCount();
+  }
   return ret;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/zeroslike_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/zeroslike_fp32.cc
index 37e7c8ec376..a21d06b7781 100644
--- a/mindspore/lite/src/runtime/kernel/arm/fp32/zeroslike_fp32.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/fp32/zeroslike_fp32.cc
@@ -27,7 +27,11 @@ using mindspore::lite::RET_OK;
 using mindspore::schema::PrimitiveType_ZerosLike;
 
 namespace mindspore::kernel {
-int ZerosLikeCPUKernel::Init() { return RET_OK; }
+int ZerosLikeCPUKernel::Init() {
+  CHECK_LESS_RETURN(in_tensors_.size(), 1);
+  CHECK_LESS_RETURN(out_tensors_.size(), 1);
+  return RET_OK;
+}
 
 int ZerosLikeCPUKernel::Run() {
   auto output_data = reinterpret_cast<float *>(out_tensors_.at(0)->MutableData());
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h
index 7e42b5553d0..79f30b310ca 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h
@@ -42,7 +42,7 @@ class ArithmeticInt8CPUKernel : public InnerKernel {
   int8_t *tile_data0_{nullptr};
   int8_t *tile_data1_{nullptr};
   ArithmeticRunInt8 arithmetic_run_{nullptr};
-  ArithmeticQuantArg quant_args_;
+  ArithmeticQuantArg quant_args_ = {};
 };
 }  // namespace mindspore::kernel
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ARITHMETIC_INT8_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc
index cf4698196c1..c869ba490c9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc
@@ -48,12 +48,12 @@ int BatchnormInt8CPUKernel::InitConstTensor() {
 
   auto mean_ptr = reinterpret_cast<int8_t *>(mean->MutableData());
   auto var_ptr = reinterpret_cast<int8_t *>(variance->MutableData());
-  alpha_addr_ = reinterpret_cast<float *>(malloc(mean->ElementsNum() * sizeof(float)));
+  alpha_addr_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(mean->ElementsNum()) * sizeof(float)));
   if (alpha_addr_ == nullptr) {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     return RET_ERROR;
   }
-  beta_addr_ = reinterpret_cast<float *>(malloc(variance->ElementsNum() * sizeof(float)));
+  beta_addr_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(variance->ElementsNum()) * sizeof(float)));
   if (beta_addr_ == nullptr) {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     return RET_ERROR;
@@ -92,12 +92,12 @@ int BatchnormInt8CPUKernel::InitFusedConstTensor() {
   auto mean_ptr = reinterpret_cast<int8_t *>(mean->MutableData());
   auto var_ptr = reinterpret_cast<int8_t *>(variance->MutableData());
 
-  alpha_addr_ = reinterpret_cast<float *>(malloc(mean->ElementsNum() * sizeof(float)));
+  alpha_addr_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(mean->ElementsNum()) * sizeof(float)));
   if (alpha_addr_ == nullptr) {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     return RET_ERROR;
   }
-  beta_addr_ = reinterpret_cast<float *>(malloc(variance->ElementsNum() * sizeof(float)));
+  beta_addr_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(variance->ElementsNum()) * sizeof(float)));
   if (beta_addr_ == nullptr) {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc
index 017a674168e..e185ff3f9e5 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc
@@ -59,11 +59,12 @@ int ConcatInt8CPUKernel::Init() {
 }
 
 int ConcatInt8CPUKernel::ReSize() {
-  concat_param_->axis_ =
-    concat_param_->axis_ >= 0 ? concat_param_->axis_ : in_tensors_.front()->shape().size() + concat_param_->axis_;
+  concat_param_->axis_ = concat_param_->axis_ >= 0
+                           ? concat_param_->axis_
+                           : static_cast<int>(in_tensors_.front()->shape().size()) + concat_param_->axis_;
 
   auto input_num = in_tensors_.size();
-  concat_param_->input_num_ = input_num;
+  concat_param_->input_num_ = static_cast<int>(input_num);
   concat_param_->input_shapes_ = reinterpret_cast<int **>(malloc(sizeof(int *) * input_num));
   if (concat_param_->input_shapes_ == nullptr) {
     MS_LOG(ERROR) << "malloc concat_param_->input_shapes_ failed.";
@@ -97,7 +98,7 @@ int ConcatInt8CPUKernel::ReSize() {
   memcpy(reinterpret_cast<void *>(concat_param_->output_shapes_), output_tensor->shape().data(),
          sizeof(int) * output_dim);
 
-  for (size_t i = concat_param_->axis_ + 1; i < output_dim; i++) {
+  for (size_t i = static_cast<size_t>(concat_param_->axis_ + 1); i < output_dim; i++) {
     after_axis_size *= concat_param_->output_shapes_[i];
   }
   concat_param_->after_axis_size = after_axis_size;
@@ -122,21 +123,17 @@ int ConcatInt8CPUKernel::Run() {
 
 int ConcatInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
   auto concat = reinterpret_cast<ConcatInt8CPUKernel *>(cdata);
-  auto ret = concat->DoExecute(task_id);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "ConcatInt8Run task_id " << task_id << " failed.";
-    return ret;
-  }
+  concat->DoExecute(task_id);
   return lite::RET_OK;
 }
 
-int ConcatInt8CPUKernel::DoExecute(int task_id) {
+void ConcatInt8CPUKernel::DoExecute(int task_id) {
   int64_t real_dst_count = MSMIN(before_axis_size - task_id * count_unit_, count_unit_);
   if (real_dst_count <= 0) {
-    return lite::RET_OK;
+    return;
   }
   Int8Concat(input_data_, output_data_, concat_param_, concat_param_->axis_, real_dst_count, task_id);
-  return lite::RET_OK;
+  return;
 }
 
 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Concat, LiteKernelCreator<ConcatInt8CPUKernel>)
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h
index aa9f32d2791..8ea19039d86 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h
@@ -57,7 +57,7 @@ class ConcatInt8CPUKernel : public InnerKernel {
   int Init() override;
   int ReSize() override;
   int Run() override;
-  int DoExecute(int task_id);
+  void DoExecute(int task_id);
 
  private:
   int64_t before_axis_size = 0;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.h
index 7938fbdfb34..ab580f81c10 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.h
@@ -33,7 +33,7 @@ class Convolution1x1Int8CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   Convolution1x1Int8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                               const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {}
   ~Convolution1x1Int8CPUKernel() override;
 
   int Init() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc
index 65d46c85614..a1776ef639a 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc
@@ -25,7 +25,7 @@ namespace mindspore::kernel {
 namespace {
 constexpr size_t kUnitBufferMultipler = 4 * 4;
 }  // namespace
-int ProcessFilterUint8(const int8_t *origin_weight, int16_t *dst_weight, ConvParameter *conv_param) {
+int ProcessFilterUint8(const int8_t *origin_weight, int16_t *dst_weight, const ConvParameter *conv_param) {
   auto input_channel = conv_param->input_channel_;
   auto output_channel = conv_param->output_channel_;
   auto kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_;
@@ -116,7 +116,7 @@ int Convolution3x3Int8CPUKernel::InitWeightBias() {
   memset(bias_data_, 0, new_bias_size);
   if (in_tensors_.size() == kInputSize2) {
     auto ori_bias_addr = reinterpret_cast<int32_t *>(in_tensors_.at(kBiasIndex)->MutableData());
-    memcpy(bias_data_, ori_bias_addr, output_channel * sizeof(int32_t));
+    memcpy(bias_data_, ori_bias_addr, static_cast<size_t>(output_channel) * sizeof(int32_t));
   } else {
     MS_ASSERT(in_tensors_.size() == kInputSize1);
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.h
index 6b3c087de86..60d6307739b 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.h
@@ -27,7 +27,7 @@ class Convolution3x3Int8CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   Convolution3x3Int8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                               const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {}
   ~Convolution3x3Int8CPUKernel() override;
 
   int Init() override;
@@ -46,7 +46,7 @@ class Convolution3x3Int8CPUKernel : public ConvolutionBaseCPUKernel {
   int32_t *tmp_dst_buffer_ = nullptr;
   int8_t *tmp_out_ = nullptr;
 };
-int ProcessFilterUint8(const int8_t *origin_weight, int16_t *dst_weight, ConvParameter *conv_param);
+int ProcessFilterUint8(const int8_t *origin_weight, int16_t *dst_weight, const ConvParameter *conv_param);
 }  // namespace mindspore::kernel
 
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_CONVOLUTION_3X3_INT8_H_
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.cc
index 54df66909ea..182b0f859d5 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.cc
@@ -60,13 +60,13 @@ int ConvolutionDepthwise3x3Int8CPUKernel::InitWeightBias() {
   PackNCHWToNHWCInt8(origin_weight, tmp_weight, 1, weight_tensor->Height() * weight_tensor->Width(),
                      weight_tensor->Batch());
 
-  packed_weight_ = reinterpret_cast<int16_t *>(malloc(pack_weight_size * sizeof(int16_t)));
+  packed_weight_ = reinterpret_cast<int16_t *>(malloc(static_cast<size_t>(pack_weight_size) * sizeof(int16_t)));
   if (packed_weight_ == nullptr) {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     free(tmp_weight);
     return RET_ERROR;
   }
-  bool filter_per_channel = conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL;
+  bool filter_per_channel = static_cast<bool>(conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL);
   if (filter_per_channel) {
     for (int i = 0; i < weight_tensor->Height() * weight_tensor->Width(); i++) {
       for (int c = 0; c < channel; c++) {
@@ -87,16 +87,16 @@ int ConvolutionDepthwise3x3Int8CPUKernel::InitWeightBias() {
   }
   free(tmp_weight);
 
-  bias_data_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t)));
+  bias_data_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
   if (bias_data_ == nullptr) {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     return RET_ERROR;
   }
-  memset(bias_data_, 0, channel * sizeof(int32_t));
+  memset(bias_data_, 0, static_cast<size_t>(channel) * sizeof(int32_t));
   if (in_tensors_.size() == kInputSize2) {
     auto bias_tensor = in_tensors_.at(kBiasIndex);
     auto ori_bias = reinterpret_cast<int32_t *>(bias_tensor->MutableData());
-    memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(int32_t));
+    memcpy(bias_data_, ori_bias, static_cast<size_t>(bias_tensor->ElementsNum()) * sizeof(int32_t));
   }
   return RET_OK;
 }
@@ -153,7 +153,8 @@ int ConvDw3x3Int8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale)
 
 int ConvolutionDepthwise3x3Int8CPUKernel::InitBuffer() {
   int buffer_size = kConvDepthwise3x3BufferSize * conv_param_->thread_num_;
-  buffer_ = reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(buffer_size * sizeof(int8_t)));
+  buffer_ =
+    reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(static_cast<size_t>(buffer_size) * sizeof(int8_t)));
   if (buffer_ == nullptr) {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.h
index 58a41e97ec1..93a50ccc0be 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.h
@@ -27,7 +27,7 @@ class ConvolutionDepthwise3x3Int8CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwise3x3Int8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                        const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {}
   ~ConvolutionDepthwise3x3Int8CPUKernel() override;
 
   int Init() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc
index e689107940c..803445f12a5 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc
@@ -55,7 +55,7 @@ int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() {
     return RET_ERROR;
   }
 
-  bool filter_per_channel = conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL;
+  bool filter_per_channel = static_cast<bool>(conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL);
   if (filter_per_channel) {
     for (int i = 0; i < weight_tensor->Height() * weight_tensor->Width(); i++) {
       for (int c = 0; c < channel; c++) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h
index 5f27cef2b85..ccb22bf2109 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h
@@ -27,7 +27,7 @@ class ConvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwiseInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                     const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {}
   ~ConvolutionDepthwiseInt8CPUKernel() override;
 
   int Init() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc
index 4f5166a7f5b..208a2684bc6 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc
@@ -42,7 +42,7 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitWeightBias() {
   auto origin_weight = reinterpret_cast<int8_t *>(weight_tensor->MutableData());
   int OC8 = UP_DIV(weight_tensor->Batch(), C8NUM);
   int pack_weight_size = C8NUM * OC8 * weight_tensor->Height() * weight_tensor->Width();
-  packed_weight_ = reinterpret_cast<int16_t *>(malloc(pack_weight_size * sizeof(int16_t)));
+  packed_weight_ = reinterpret_cast<int16_t *>(malloc(static_cast<size_t>(pack_weight_size) * sizeof(int16_t)));
   if (packed_weight_ == nullptr) {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     return RET_ERROR;
@@ -50,16 +50,16 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitWeightBias() {
   PackDepthwiseInt8Weight(origin_weight, packed_weight_, weight_tensor->Height() * weight_tensor->Width(),
                           weight_tensor->Batch(), &(conv_param_->conv_quant_arg_));
 
-  bias_data_ = reinterpret_cast<int32_t *>(malloc(C8NUM * OC8 * sizeof(int32_t)));
+  bias_data_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(C8NUM * OC8) * sizeof(int32_t)));
   if (bias_data_ == nullptr) {
     MS_LOG(ERROR) << "Malloc buffer failed.";
     return RET_ERROR;
   }
-  memset(bias_data_, 0, C8NUM * OC8 * sizeof(int32_t));
+  memset(bias_data_, 0, static_cast<size_t>(C8NUM * OC8) * sizeof(int32_t));
   if (in_tensors_.size() == kInputSize2) {
     auto bias_tensor = in_tensors_.at(kBiasIndex);
     auto ori_bias = reinterpret_cast<int32_t *>(bias_tensor->MutableData());
-    memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(int32_t));
+    memcpy(bias_data_, ori_bias, static_cast<size_t>(bias_tensor->ElementsNum()) * sizeof(int32_t));
   }
 
   conv_param_->thread_num_ = MSMIN(thread_count_, OC8);
@@ -72,7 +72,8 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitPackedInputOutput() {
 
     int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C8NUM *
                           UP_DIV(conv_param_->input_channel_, C8NUM);
-    packed_input_ = reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(pack_input_size * sizeof(int8_t)));
+    packed_input_ =
+      reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(static_cast<size_t>(pack_input_size) * sizeof(int8_t)));
     if (packed_input_ == nullptr) {
       MS_LOG(ERROR) << "Malloc buffer failed.";
       return RET_ERROR;
@@ -80,7 +81,8 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitPackedInputOutput() {
 
     int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C8NUM *
                            UP_DIV(conv_param_->output_channel_, C8NUM);
-    packed_output_ = reinterpret_cast<int8_t *>(ms_context_->allocator->Malloc(pack_output_size * sizeof(int8_t)));
+    packed_output_ = reinterpret_cast<int8_t *>(
+      ms_context_->allocator->Malloc(static_cast<size_t>(pack_output_size) * sizeof(int8_t)));
     if (packed_output_ == nullptr) {
       MS_LOG(ERROR) << "Malloc buffer failed.";
       return RET_ERROR;
@@ -150,10 +152,10 @@ int ConvolutionDepthwiseSWInt8CPUKernel::ReinitQuantParam() {
 
   auto input_tensor = in_tensors_.at(kInputIndex);
   auto channel = conv_param_->input_channel_;
-  input_scale_ = reinterpret_cast<float *>(malloc(channel * sizeof(float)));
+  input_scale_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(channel) * sizeof(float)));
   MSLITE_CHECK_PTR(input_scale_);
 
-  input_zp_ = reinterpret_cast<int8_t *>(malloc(channel * sizeof(int8_t)));
+  input_zp_ = reinterpret_cast<int8_t *>(malloc(static_cast<size_t>(channel) * sizeof(int8_t)));
   MSLITE_CHECK_PTR(input_zp_);
 
   if (input_tensor->quant_params().size() == kPerTensor) {
@@ -171,10 +173,10 @@ int ConvolutionDepthwiseSWInt8CPUKernel::ReinitQuantParam() {
   }
 
   auto output_tensor = out_tensors_.at(kOutputIndex);
-  output_scale_ = reinterpret_cast<float *>(malloc(channel * sizeof(float)));
+  output_scale_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(channel) * sizeof(float)));
   MSLITE_CHECK_PTR(output_scale_);
 
-  output_zp_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t)));
+  output_zp_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
   MSLITE_CHECK_PTR(output_zp_);
 
   if (output_tensor->quant_params().size() == kPerTensor) {
@@ -191,25 +193,26 @@ int ConvolutionDepthwiseSWInt8CPUKernel::ReinitQuantParam() {
     }
   }
 
-  conv_quant_arg_->real_multiplier_ = reinterpret_cast<double *>(malloc(channel * sizeof(double)));
+  conv_quant_arg_->real_multiplier_ = reinterpret_cast<double *>(malloc(static_cast<size_t>(channel) * sizeof(double)));
   MSLITE_CHECK_PTR(conv_quant_arg_->real_multiplier_);
 
-  conv_quant_arg_->left_shift_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t)));
+  conv_quant_arg_->left_shift_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
   MSLITE_CHECK_PTR(conv_quant_arg_->left_shift_);
 
-  conv_quant_arg_->right_shift_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t)));
+  conv_quant_arg_->right_shift_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
   MSLITE_CHECK_PTR(conv_quant_arg_->right_shift_);
 
-  conv_quant_arg_->quant_multiplier_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t)));
+  conv_quant_arg_->quant_multiplier_ =
+    reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
   MSLITE_CHECK_PTR(conv_quant_arg_->quant_multiplier_);
 
-  conv_quant_arg_->out_act_min_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t)));
+  conv_quant_arg_->out_act_min_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
   MSLITE_CHECK_PTR(conv_quant_arg_->out_act_min_);
 
-  conv_quant_arg_->out_act_max_ = reinterpret_cast<int32_t *>(malloc(channel * sizeof(int32_t)));
+  conv_quant_arg_->out_act_max_ = reinterpret_cast<int32_t *>(malloc(static_cast<size_t>(channel) * sizeof(int32_t)));
   MSLITE_CHECK_PTR(conv_quant_arg_->out_act_max_);
 
-  weight_scale_ = reinterpret_cast<float *>(malloc(channel * sizeof(float)));
+  weight_scale_ = reinterpret_cast<float *>(malloc(static_cast<size_t>(channel) * sizeof(float)));
   MSLITE_CHECK_PTR(weight_scale_);
 
   auto weight_tensor = in_tensors_.at(kWeightIndex);
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h
index acbc0835ef7..b11576f43d4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h
@@ -28,7 +28,7 @@ class ConvolutionDepthwiseSWInt8CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionDepthwiseSWInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                       const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {}
   ~ConvolutionDepthwiseSWInt8CPUKernel() override;
 
   int Init() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc
index b64a6f8e0e1..1f0c35e4d2e 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc
@@ -98,12 +98,12 @@ int ConvolutionInt8CPUKernel::InitWeightBias() {
   memset(bias_data_, 0, bias_size);
   if (in_tensors_.size() == kInputSize2) {
     auto ori_bias = reinterpret_cast<int32_t *>(in_tensors_.at(kBiasIndex)->data_c());
-    memcpy(bias_data_, ori_bias, output_channel * sizeof(int32_t));
+    memcpy(bias_data_, ori_bias, static_cast<size_t>(output_channel) * sizeof(int32_t));
   } else {
     MS_ASSERT(in_tensors_.size() == kInputSize1);
   }
   auto *bias_data = reinterpret_cast<int32_t *>(bias_data_);
-  bool filter_peroc = conv_quant_arg_->per_channel_ & FILTER_PER_CHANNEL;
+  bool filter_peroc = static_cast<bool>(conv_quant_arg_->per_channel_ & FILTER_PER_CHANNEL);
   if (filter_peroc) {
     filter_zp_ptr_ = reinterpret_cast<int32_t *>(malloc(output_channel * sizeof(int32_t)));
     if (filter_zp_ptr_ == nullptr) {
@@ -126,9 +126,9 @@ int ConvolutionInt8CPUKernel::InitWeightBias() {
 
   size_t input_sum_size;
   if (conv_quant_arg_->per_channel_ & FILTER_PER_CHANNEL) {
-    input_sum_size = up_round_oc * tile_num_ * thread_count_ * sizeof(int32_t);
+    input_sum_size = static_cast<size_t>(up_round_oc * tile_num_ * thread_count_) * sizeof(int32_t);
   } else {
-    input_sum_size = tile_num_ * thread_count_ * sizeof(int32_t);
+    input_sum_size = static_cast<size_t>(tile_num_ * thread_count_) * sizeof(int32_t);
   }
   input_sum_ = reinterpret_cast<int32_t *>(malloc(input_sum_size));
   if (input_sum_ == nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.h
index 8afc0c2ed14..bdff948a3e9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.h
@@ -28,7 +28,7 @@ class ConvolutionInt8CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   ConvolutionInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                            const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {}
   ~ConvolutionInt8CPUKernel() override {
     FreeQuantParam();
     if (packed_weight_ != nullptr) {
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc
index e1a628ce206..62d110e5cda 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc
@@ -57,21 +57,16 @@ int CropInt8CPUKernel::Run() {
 
 int CropInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
   auto crop = reinterpret_cast<CropInt8CPUKernel *>(cdata);
-  auto ret = crop->DoExecute(task_id);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "CropInt8Run task id " << task_id << " run failed.";
-    return ret;
-  }
+  crop->DoExecute(task_id);
   return RET_OK;
 }
 
-int CropInt8CPUKernel::DoExecute(int task_id) {
+void CropInt8CPUKernel::DoExecute(int task_id) {
   auto input_tensor = in_tensors_.at(kInputIndex);
   auto out_tensor = out_tensors_.at(kOutputIndex);
   int8_t *input_data = reinterpret_cast<int8_t *>(input_tensor->data_c());
   int8_t *output_data = reinterpret_cast<int8_t *>(out_tensor->data_c());
   Int8Crop(input_data, output_data, task_id, crop_para_);
-  return RET_OK;
 }
 
 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Crop, LiteKernelCreator<CropInt8CPUKernel>)
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h
index 788d5207e0b..99f1d7a4078 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h
@@ -36,7 +36,7 @@ class CropInt8CPUKernel : public CropBaseCPUKernel {
   int Init() override;
   int ReSize() override;
   int Run() override;
-  int DoExecute(int task_id);
+  void DoExecute(int task_id);
 };
 
 int CropInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale);
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h
index aef09fbab57..f85c3343a73 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h
@@ -27,7 +27,7 @@ class DeconvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   DeconvolutionDepthwiseInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                                       const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {}
   ~DeconvolutionDepthwiseInt8CPUKernel() override;
 
   int Init() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h
index 97489e36679..b80dd7c67f2 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h
@@ -33,7 +33,7 @@ class DeConvInt8CPUKernel : public ConvolutionBaseCPUKernel {
  public:
   DeConvInt8CPUKernel(OpParameter *parameter, const std::vector<lite::Tensor *> &inputs,
                       const std::vector<lite::Tensor *> &outputs, const InnerContext *ctx)
-      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {}
+      : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {}
   ~DeConvInt8CPUKernel() override;
 
   int ReSize() override;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc
index 2efab7a88a2..d2d8e3b2c37 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc
@@ -57,7 +57,7 @@ int GatherNdInt8CPUKernel::ReSize() {
 
   auto indices_tensor = in_tensors_.at(1);
   auto indices_shape = indices_tensor->shape();
-  int indices_rank = indices_shape.size();
+  int indices_rank = static_cast<size_t>(indices_shape.size());
   count_ = 1;
   for (int i = 0; i < indices_rank - 1; ++i) {
     count_ *= indices_shape[i];
@@ -66,12 +66,12 @@ int GatherNdInt8CPUKernel::ReSize() {
     MS_LOG(ERROR) << "count_ is invalid, count_: " << count_;
     return RET_ERROR;
   }
-  in_offset_ = reinterpret_cast<int *>(malloc(count_ * sizeof(int)));
+  in_offset_ = reinterpret_cast<int *>(malloc(static_cast<size_t>(count_) * sizeof(int)));
   if (in_offset_ == nullptr) {
     MS_LOG(ERROR) << "GatherNdInt8 Malloc in_offset_ error!";
     return RET_ERROR;
   }
-  (void)memset(in_offset_, 0, count_ * sizeof(int));
+  (void)memset(in_offset_, 0, static_cast<size_t>(count_) * sizeof(int));
   thread_sz_count_ = MSMIN(thread_count_, count_);
   if (thread_sz_count_ == 0) {
     MS_LOG(ERROR) << "div zero";
@@ -85,9 +85,9 @@ int GatherNdInt8CPUKernel::InitOffset() {
   auto ind_quant_args = in_tensors_.at(1)->quant_params();
   auto indices_tensor = in_tensors_.at(1);
   auto indices_shape = indices_tensor->shape();
-  int indices_rank = indices_shape.size();
+  int indices_rank = static_cast<size_t>(indices_shape.size());
   auto in_shape = in_tensors_.front()->shape();
-  int in_rank = in_shape.size();
+  int in_rank = static_cast<size_t>(in_shape.size());
   if (indices_rank < 1) {
     MS_LOG(ERROR) << "inex out of bounds";
     return RET_ERROR;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.h
index eba1229ca0c..43d38e00043 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.h
@@ -44,7 +44,7 @@ class GatherNdInt8CPUKernel : public InnerKernel {
   int *in_offset_ = nullptr;
   int8_t *in_ptr_ = nullptr;
   int8_t *out_ptr_ = nullptr;
-  GatherQuantArg param_;
+  GatherQuantArg param_ = {};
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/group_convolution_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/group_convolution_int8.cc
index ab7a19f7eef..dc624a12ef4 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/group_convolution_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/group_convolution_int8.cc
@@ -29,7 +29,7 @@ int GroupConvolutionInt8CPUKernel::SeparateInput(int group_id) {
   int8_t *src_ptr = reinterpret_cast<int8_t *>(ori_in_data_) + group_id * sub_in_channel;
   int8_t *dst_ptr = sub_in_data;
   for (int i = 0; i < in_plane; ++i) {
-    memcpy(dst_ptr, src_ptr, sub_in_channel * sizeof(int8_t));
+    memcpy(dst_ptr, src_ptr, static_cast<size_t>(sub_in_channel) * sizeof(int8_t));
     src_ptr += ori_in_channel;
     dst_ptr += sub_in_channel;
   }
@@ -45,7 +45,7 @@ int GroupConvolutionInt8CPUKernel::PostConcat(int group_id) {
   int8_t *src_ptr = sub_out_data;
   int8_t *dst_ptr = reinterpret_cast<int8_t *>(ori_out_data_) + group_id * sub_out_channel;
   for (int i = 0; i < out_plane; ++i) {
-    memcpy(dst_ptr, src_ptr, sub_out_channel * sizeof(int8_t));
+    memcpy(dst_ptr, src_ptr, static_cast<size_t>(sub_out_channel) * sizeof(int8_t));
     src_ptr += sub_out_channel;
     dst_ptr += ori_out_channel;
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.h
index 9eaf4883a1f..6d7c057f262 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.h
@@ -37,7 +37,7 @@ class HswishInt8CPUKernel : public InnerKernel {
 
  private:
   int thread_count_;
-  HswishQuantArg quant_arg_;
+  HswishQuantArg quant_arg_ = {};
   void MultiplierInt32ToInt16(int32_t input, int16_t *output) const;
 };
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.h
index e112d6fa4af..137ebe2d6b0 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.h
@@ -39,7 +39,7 @@ class LeakyReluInt8CPUKernel : public InnerKernel {
   int DoExecute(int task_id);
 
  private:
-  LeakyReluQuantArg quant_prelu_parm_;
+  LeakyReluQuantArg quant_prelu_parm_ = {};
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc
index 46f1b2ddcff..fe8cd176587 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc
@@ -187,29 +187,21 @@ int MulInt8CPUKernel::Run() {
 
 int FastHWBroadcastMulInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
   auto mul = reinterpret_cast<MulInt8CPUKernel *>(cdata);
-  auto ret = mul->FastDoExecute(task_id);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "FastHWBroadcastMulInt8Run task_id " << task_id << " failed.";
-    return ret;
-  }
+  mul->FastDoExecute(task_id);
   return lite::RET_OK;
 }
 
 int MulInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
   auto mul = reinterpret_cast<MulInt8CPUKernel *>(cdata);
-  auto ret = mul->DoExecute(task_id);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "MulInt8Run task_id " << task_id << " failed.";
-    return ret;
-  }
+  mul->DoExecute(task_id);
   return lite::RET_OK;
 }
 
-int MulInt8CPUKernel::FastDoExecute(int task_id) {
+void MulInt8CPUKernel::FastDoExecute(int task_id) {
   int depth = out_tensors_.front()->Channel();
   int64_t real_dst_count = MSMIN(elements_num_ - task_id * count_unit_, count_unit_);
   if (real_dst_count <= 0) {
-    return lite::RET_OK;
+    return;
   }
   int8_t *cur_input0_data = input0_data_;
   int8_t *cur_input1_data = input1_data_ + task_id * count_unit_ * depth;
@@ -219,20 +211,19 @@ int MulInt8CPUKernel::FastDoExecute(int task_id) {
     cur_input1_data = input0_data_ + task_id * count_unit_ * depth;
   }
   FastMul(cur_input0_data, cur_input1_data, cur_output_data, depth, real_dst_count, input1_hw_broadcast_, quant_args_);
-  return RET_OK;
 }
 
-int MulInt8CPUKernel::DoExecute(int task_id) {
+void MulInt8CPUKernel::DoExecute(int task_id) {
   int64_t real_dst_count = MSMIN(elements_num_ - task_id * count_unit_, count_unit_);
   if (real_dst_count <= 0) {
-    return lite::RET_OK;
+    return;
   }
   int8_t *cur_input0_data = input0_data_ + task_id * count_unit_;
   int8_t *cur_input1_data = input1_data_ + task_id * count_unit_;
   int8_t *cur_output_data = output_data_ + task_id * count_unit_;
 
   Mul(cur_input0_data, cur_input1_data, cur_output_data, real_dst_count, quant_args_);
-  return lite::RET_OK;
+  return;
 }
 
 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_MulFusion, LiteKernelCreator<MulInt8CPUKernel>)
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h
index 4a82b301950..1d483f93d4c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h
@@ -39,8 +39,8 @@ class MulInt8CPUKernel : public InnerKernel {
   void CheckSameShapeSize(std::vector<int> in_tensor0_shape, std::vector<int> in_tensor1_shape);
   void CheckIfFastImpl();
   int Run() override;
-  int DoExecute(int task_id);
-  int FastDoExecute(int task_id);
+  void DoExecute(int task_id);
+  void FastDoExecute(int task_id);
 
  private:
   const lite::InnerContext *ctx_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.cc b/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.cc
index 7e24d9d7361..31552f5cc76 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.cc
@@ -30,16 +30,17 @@ void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, i
 }
 
 void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                                  size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
-                                  int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
-                                  int32_t maxi, size_t per_channel) {
+                                  size_t stride, const int32_t *input_sum, const int32_t *bias,
+                                  const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
+                                  int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel) {
   return MatmulInt8DpNeon64(a, b, dst, UP_ROUND(row, C8NUM), UP_ROUND(col, C8NUM), deep_4, input_sum, bias, mini, maxi,
                             output_zp, multiplier, left_shift, right_shift, row, col, stride, per_channel);
 }
 void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                                   size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
-                                   int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
-                                   int32_t maxi, size_t per_channel, int32_t *filter_zp) {
+                                   size_t stride, const int32_t *input_sum, const int32_t *bias,
+                                   const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
+                                   int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel,
+                                   const int32_t *filter_zp) {
   return MatmulInt8DpOpt(a, b, dst, row, col, deep_4, input_sum, bias, mini, maxi, output_zp, multiplier, left_shift,
                          right_shift, stride, per_channel, filter_zp);
 }
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.h b/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.h
index bf60e312410..302268d003c 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.h
@@ -25,11 +25,11 @@ extern "C" {
 void MatMulOptR4Int8Neon64(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16,
                            const int *input_sum, const int *bias);
 void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4, const int *a_sums,
-                        const int *bias, int act_min, int act_max, int out_zp, int *multiplier, int *left_shift,
-                        int *right_shift, int row, int col, int stride, size_t peroc);
+                        const int *bias, int act_min, int act_max, int out_zp, const int *multiplier,
+                        const int *left_shift, const int *right_shift, int row, int col, int stride, size_t peroc);
 void MatmulInt8DpOpt(const int8_t *a, const int8_t *b, int8_t *dst, size_t row8, size_t col8, size_t deep4,
-                     const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, int *multiplier,
-                     int *left_shift, int *right_shift, size_t stride, size_t peroc, int *filter_zp);
+                     const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, const int *multiplier,
+                     const int *left_shift, const int *right_shift, size_t stride, size_t peroc, const int *filter_zp);
 #ifdef ENABLE_ARM64
 void IndirectGemmInt8_optimize_handler(int8_t *dst, const int8_t *src, const int8_t *weight, const int32_t *bias,
                                        size_t ksize, size_t ic4, size_t output_channel, size_t offset,
@@ -40,13 +40,14 @@ void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, i
                                    const int *input_sum, const int *bias);
 
 void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                                  size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
-                                  int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
-                                  int32_t maxi, size_t per_channel);
+                                  size_t stride, const int32_t *input_sum, const int32_t *bias,
+                                  const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
+                                  int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel);
 void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4,
-                                   size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift,
-                                   int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini,
-                                   int32_t maxi, size_t per_channel, int32_t *filter_zp);
+                                   size_t stride, const int32_t *input_sum, const int32_t *bias,
+                                   const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier,
+                                   int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel,
+                                   const int32_t *filter_zp);
 #endif
 
 #ifdef __cplusplus
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc
index 0b3c8ea1f87..d45afdee830 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc
@@ -85,7 +85,7 @@ int PadInt8CPUKernel::SetQuantParam() {
 int PadInt8CPUKernel::InitPadParam() {
   auto in_dims = in_tensors_.at(0)->shape();
   auto out_dims = out_tensors_.at(0)->shape();
-  int ndims = in_dims.size();
+  int ndims = static_cast<size_t>(in_dims.size());
 
   int in[] = {1, 1, 1, 1};
   int out[] = {1, 1, 1, 1};
@@ -267,7 +267,8 @@ int PadInt8CPUKernel::Run() {
 
   int error_code;
   if (pad_param_->pad_mode_ == static_cast<int>(schema::PaddingMode_CONSTANT)) {
-    memset(out_data_, pad_param_->pad_quant_arg_.constant_value_[0], out_tensors_[0]->ElementsNum() * sizeof(int8_t));
+    memset(out_data_, pad_param_->pad_quant_arg_.constant_value_[0],
+           static_cast<size_t>(out_tensors_[0]->ElementsNum()) * sizeof(int8_t));
     error_code = ParallelLaunch(this->ms_context_, PadInt8Impl, this, op_parameter_->thread_num_);
     if (error_code != RET_OK) {
       MS_LOG(ERROR) << "Resize run error, error_code[" << error_code << "]";
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.h
index 6f4c0718542..a66943c81fd 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.h
@@ -93,7 +93,7 @@ class ReduceInt8CPUKernel : public ReduceBaseCPUKernel {
   bool valid_shape_ = false;
   bool pattern_impl_ = false;
   Four_DIMENSION_REDUCE_TEMPLATE pattern_;
-  QuantMulArg reduce_mean_quant_param_;  // used in reduce mean 4D situation
+  QuantMulArg reduce_mean_quant_param_ = {};  // used in reduce mean 4D situation
   Reducer reducer_ = nullptr;
   LastReducer last_reducer_ = nullptr;
   std::vector<QuantMulArg *> mean_multipliers_;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.h
index fad5a09c0f6..ffc79ac2f24 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.h
@@ -37,7 +37,7 @@ class ReluXInt8CPUKernel : public InnerKernel {
   int Run() override;
   int DoActivation(int task_id);
 
-  ReluXQuantArg quant_arg_;
+  ReluXQuantArg quant_arg_ = {};
 
  private:
   int type_{0};
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc
index aa0362f5528..45fc3a784d9 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc
@@ -63,18 +63,14 @@ int ReshapeInt8CPUKernel::Run() {
 
 int ReshapeInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
   auto reshape = reinterpret_cast<ReshapeInt8CPUKernel *>(cdata);
-  auto ret = reshape->DoExecute(task_id);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "Reshapeint8Run task_id " << task_id << " failed.";
-    return ret;
-  }
+  reshape->DoExecute(task_id);
   return lite::RET_OK;
 }
 
-int ReshapeInt8CPUKernel::DoExecute(int task_id) {
+void ReshapeInt8CPUKernel::DoExecute(int task_id) {
   int64_t real_dst_count = MSMIN(elements_num_ - task_id * count_unit_, count_unit_);
   if (real_dst_count <= 0) {
-    return lite::RET_OK;
+    return;
   }
   MS_ASSERT(input_data_);
   MS_ASSERT(output_data_);
@@ -82,7 +78,7 @@ int ReshapeInt8CPUKernel::DoExecute(int task_id) {
   int8_t *cur_output_data = output_data_ + task_id * count_unit_;
 
   Int8Reshape(cur_input0_data, cur_output_data, real_dst_count, reshape_param_->quant_para_);
-  return lite::RET_OK;
+  return;
 }
 
 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Reshape, LiteKernelCreator<ReshapeInt8CPUKernel>)
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h
index b0f5276c425..fa5b18c4f73 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h
@@ -37,7 +37,7 @@ class ReshapeInt8CPUKernel : public InnerKernel {
   int Init() override;
   int ReSize() override;
   int Run() override;
-  int DoExecute(int task_id);
+  void DoExecute(int task_id);
 
  private:
   int64_t elements_num_ = 0;
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc
index de1092a72ba..d77fb20b694 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc
@@ -37,20 +37,32 @@ constexpr unsigned int OFFSET_BASE = 10;
 }  // namespace
 void ResizeInt8CPUKernel::FreeResizeBiLinear() {
   free(resize_quant_arg_.x_axis_index_);
+  resize_quant_arg_.x_axis_index_ = nullptr;
   free(resize_quant_arg_.x_axis_lower_);
+  resize_quant_arg_.x_axis_lower_ = nullptr;
   free(resize_quant_arg_.x_axis_upper_);
+  resize_quant_arg_.x_axis_upper_ = nullptr;
   free(resize_quant_arg_.y_axis_index_);
+  resize_quant_arg_.y_axis_index_ = nullptr;
   free(resize_quant_arg_.y_axis_lower_);
+  resize_quant_arg_.y_axis_lower_ = nullptr;
   free(resize_quant_arg_.y_axis_upper_);
+  resize_quant_arg_.y_axis_upper_ = nullptr;
 }
 
 void ResizeInt8CPUKernel::FreeFloatResizeBiLinear() {
   free(resize_float_quant_arg_.x_axis_index_);
+  resize_float_quant_arg_.x_axis_index_ = nullptr;
   free(resize_float_quant_arg_.x_axis_lower_);
+  resize_float_quant_arg_.x_axis_lower_ = nullptr;
   free(resize_float_quant_arg_.x_axis_upper_);
+  resize_float_quant_arg_.x_axis_upper_ = nullptr;
   free(resize_float_quant_arg_.y_axis_index_);
+  resize_float_quant_arg_.y_axis_index_ = nullptr;
   free(resize_float_quant_arg_.y_axis_lower_);
+  resize_float_quant_arg_.y_axis_lower_ = nullptr;
   free(resize_float_quant_arg_.y_axis_upper_);
+  resize_float_quant_arg_.y_axis_upper_ = nullptr;
 }
 
 ResizeInt8CPUKernel::~ResizeInt8CPUKernel() {
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h
index 874267bc9cb..6d5881c57bd 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h
@@ -52,8 +52,8 @@ class ResizeInt8CPUKernel : public ResizeBaseCPUKernel {
   QuantArg *quant_in_{nullptr};
   QuantArg *quant_out_{nullptr};
   QuantMulArg *multiplier_{nullptr};
-  ResizeQuantArg resize_quant_arg_;
-  ResizeFloatScaleQuantArg resize_float_quant_arg_;
+  ResizeQuantArg resize_quant_arg_ = {};
+  ResizeFloatScaleQuantArg resize_float_quant_arg_ = {};
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc
index ee42ef26f3c..3ae9295ee40 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc
@@ -42,7 +42,7 @@ int SplitInt8CPUKernel::Init() {
   auto in_quant_args = in_tensor->quant_params();
   param->quant_arg_.in_args_.scale_ = in_quant_args.front().scale;
   param->quant_arg_.in_args_.zp_ = in_quant_args.front().zeroPoint;
-  MS_ASSERT(param->num_split_ == this->out_tensors_.size());
+  MS_ASSERT(static_cast<size_t>(param->num_split_) == this->out_tensors_.size());
   for (int i = 0; i < param->num_split_; i++) {
     auto *out_tensor = out_tensors_.at(i);
     auto out_quant_args = out_tensor->quant_params();
@@ -91,7 +91,7 @@ int SplitInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
 int SplitInt8CPUKernel::Run() {
   auto in_tensor = in_tensors_.at(kInputIndex);
   input_ptr_ = reinterpret_cast<int8_t *>(in_tensor->MutableData());
-  MS_ASSERT(param->num_split_ == this->out_tensors_.size());
+  MS_ASSERT(static_cast<size_t>(param->num_split_) == this->out_tensors_.size());
   for (int i = 0; i < param->num_split_; i++) {
     output_ptr_[i] = reinterpret_cast<int8_t *>(out_tensors_.at(i)->data_c());
   }
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc
index 884cd364a13..ed60486fc6d 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc
@@ -64,7 +64,7 @@ int SqueezeInt8CPUKernel::Init() {
   auto quant_params = output_tensor->quant_params();
   MS_ASSERT(quant_params.size() == 1);
   quant_squeeze_param_->out_quant_args_ = reinterpret_cast<QuantArg *>(malloc(sizeof(QuantArg)));
-  if (quant_squeeze_param_->in_quant_args_ == nullptr) {
+  if (quant_squeeze_param_->out_quant_args_ == nullptr) {
     MS_LOG(ERROR) << "malloc QuantArg failed";
     if (quant_squeeze_param_ != nullptr) {
       if (quant_squeeze_param_->in_quant_args_ != nullptr) {
@@ -97,15 +97,11 @@ int SqueezeInt8CPUKernel::Run() {
 
 int SqueezeInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) {
   auto Squeeze = reinterpret_cast<SqueezeInt8CPUKernel *>(cdata);
-  auto ret = Squeeze->DoExecute(task_id);
-  if (ret != RET_OK) {
-    MS_LOG(ERROR) << "SqueezeInt8Run task_id " << task_id << " failed.";
-    return ret;
-  }
+  Squeeze->DoExecute(task_id);
   return RET_OK;
 }
 
-int SqueezeInt8CPUKernel::DoExecute(int task_id) {
+void SqueezeInt8CPUKernel::DoExecute(int task_id) {
   auto input_tensor = in_tensors_.at(kInputIndex);
   MS_ASSERT(input_tensor);
   auto out_tensor = out_tensors_.at(kOutputIndex);
@@ -117,7 +113,6 @@ int SqueezeInt8CPUKernel::DoExecute(int task_id) {
 
   int num = input_tensor->ElementsNum();
   SqueezeInt8(input_data, output_data, quant_squeeze_param_, num, task_id, op_parameter_->thread_num_);
-  return RET_OK;
 }
 
 REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Squeeze, LiteKernelCreator<SqueezeInt8CPUKernel>)
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.h
index 066f9987c2e..65b3d6b7fb5 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.h
@@ -36,7 +36,7 @@ class SqueezeInt8CPUKernel : public InnerKernel {
   int Init() override;
   int ReSize() override;
   int Run() override;
-  int DoExecute(int tId);
+  void DoExecute(int tId);
 
  private:
   SqueezeQuantArg *quant_squeeze_param_{nullptr};
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/tanh_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/tanh_int8.h
index 15df0e25cef..5507bc93255 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/tanh_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/tanh_int8.h
@@ -46,7 +46,7 @@ class TanhInt8CPUKernel : public InnerKernel {
   int element_size_{0};
   int thread_count_{0};
   int thread_stride_{0};
-  TanhQuantParameter tanh_quant_;
+  TanhQuantParameter tanh_quant_ = {};
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.cc
index 3442093c104..1f981e90fc5 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.cc
@@ -79,7 +79,7 @@ int TransposeInt8CPUKernel::DoTranspose(int task_id) {
   return RET_OK;
 }
 
-void TransposeInt8CPUKernel::GetNHNCTransposeFunc(lite::Tensor *in_tensor, lite::Tensor *out_tensor,
+void TransposeInt8CPUKernel::GetNHNCTransposeFunc(const lite::Tensor *in_tensor, const lite::Tensor *out_tensor,
                                                   const TransposeParameter *param) {
   auto out_shape = out_tensor->shape();
   if (in_tensor->shape().size() == DIMENSION_4D && param->perm_[0] == 0 && param->perm_[1] == 2 &&
diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.h
index c8aed254a6e..dbee9ab45c3 100644
--- a/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.h
+++ b/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.h
@@ -44,7 +44,8 @@ class TransposeInt8CPUKernel : public InnerKernel {
   int DoTranspose(int task_id);
 
  private:
-  void GetNHNCTransposeFunc(lite::Tensor *in_tensor, lite::Tensor *out_tensor, const TransposeParameter *param);
+  void GetNHNCTransposeFunc(const lite::Tensor *in_tensor, const lite::Tensor *out_tensor,
+                            const TransposeParameter *param);
   TransposeParameter *transpose_param_;
   TransposeFunc NHNCTransposeFunc_ = nullptr;
   int8_t *in_ptr_ = nullptr;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/pooling2d.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/pooling2d.cl
index 130e296409f..bbc8a9852f6 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/cl/pooling2d.cl
+++ b/mindspore/lite/src/runtime/kernel/opencl/cl/pooling2d.cl
@@ -1,6 +1,7 @@
 #ifdef cl_khr_fp16
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 #endif
+#define LOCAL_CACHE_THREAD 16
 #define divide_no_check(a, b) (a / b)
 __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
 __kernel void AvgPooling2d_NHWC4_IMG(__read_only image2d_t input, __write_only image2d_t output, const int4 input_shape,
@@ -34,41 +35,11 @@ __kernel void AvgPooling2d_NHWC4_IMG(__read_only image2d_t input, __write_only i
     }
   }
   FLT4 result = TO_FLT4(divide_no_check(r, window_size));
-  WRITE_IMAGE(output, (int2)(Y * output_shape.w + Z, N * output_shape.y + X), result);
-}
-
-__kernel void AvgPooling2d_ReLU_NHWC4_IMG(__read_only image2d_t input, __write_only image2d_t output,
-                                          const int4 input_shape, const int4 output_shape, const int2 stride,
-                                          const int2 kernel_size, const int2 padding) {
-  // axis to dst tensor coordinate
-  int X = get_global_id(2);  // N*H
-  int Y = get_global_id(1);  // W
-  int Z = get_global_id(0);  // C4
-  int N = X / output_shape.y;
-  X = X % output_shape.y;
-  // boundary check
-  if (N >= output_shape.x || X >= output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
-    return;
-  }
-
-  FLT4 r = (FLT4)(0.0f);
-  FLT window_size = 0.0f;
-  int xs = X * stride.x - padding.x;
-  int ys = Y * stride.y - padding.y;
-
-  for (int ky = 0; ky < kernel_size.y; ++ky) {
-    int y_c = ys + ky;
-    bool outside_y = y_c < 0 || y_c >= input_shape.z;
-    for (int kx = 0; kx < kernel_size.x; ++kx) {
-      int x_c = xs + kx;
-      bool outside = outside_y || x_c < 0 || x_c >= input_shape.y;
-      r +=
-        !outside ? READ_IMAGE(input, smp_zero, (int2)(y_c * input_shape.w + Z, N * input_shape.y + x_c)) : (FLT4)(0.0f);
-      window_size += !outside ? 1.0f : 0.0f;
-    }
-  }
-  FLT4 result = TO_FLT4(divide_no_check(r, window_size));
+#ifdef RELU
   WRITE_IMAGE(output, (int2)(Y * output_shape.w + Z, N * output_shape.y + X), max(result, (FLT4)(0.f)));
+#else
+  WRITE_IMAGE(output, (int2)(Y * output_shape.w + Z, N * output_shape.y + X), result);
+#endif
 }
 
 __kernel void MaxPooling2d_NHWC4_IMG(__read_only image2d_t input, __write_only image2d_t output, const int4 input_shape,
@@ -98,35 +69,41 @@ __kernel void MaxPooling2d_NHWC4_IMG(__read_only image2d_t input, __write_only i
       maximum = max(src, maximum);
     }
   }
+#ifdef RELU
+  WRITE_IMAGE(output, (int2)(Y * output_shape.w + Z, N * output_shape.y + X), max(maximum, (FLT4)(0.f)));
+#else
   WRITE_IMAGE(output, (int2)(Y * output_shape.w + Z, N * output_shape.y + X), maximum);
+#endif
 }
 
-__kernel void MaxPooling2d_ReLU_NHWC4_IMG(__read_only image2d_t input, __write_only image2d_t output,
-                                          const int4 input_shape, const int4 output_shape, const int2 stride,
-                                          const int2 kernel_size, const int2 padding) {
-  // axis to dst tensor coordinate
-  int X = get_global_id(2);  // N*H
-  int Y = get_global_id(1);  // W
-  int Z = get_global_id(0);  // C4
-  int N = X / output_shape.y;
-  X = X % output_shape.y;
-  // boundary check
-  if (N >= output_shape.x || X >= output_shape.y || Y >= output_shape.z || Z >= output_shape.w) {
-    return;
-  }
-
-  FLT4 maximum = (FLT4)(-10000.0f);
-  int xs = X * stride.x - padding.x;
-  int ys = Y * stride.y - padding.y;
-  for (int ky = 0; ky < kernel_size.y; ++ky) {
-    int y_c = ys + ky;
-    if (y_c < 0 || y_c >= input_shape.z) continue;
-    for (int kx = 0; kx < kernel_size.x; ++kx) {
-      int x_c = xs + kx;
-      if (x_c < 0 || x_c >= input_shape.y) continue;
-      FLT4 src = READ_IMAGE(input, smp_zero, (int2)(y_c * input_shape.w + Z, N * input_shape.y + x_c));
-      maximum = max(src, maximum);
+__kernel void AvgPooling2d_global_NHWC4_IMG(__read_only image2d_t src_data, __write_only image2d_t dst_data,
+                                            int4 size) {
+  int X = get_global_id(0);  // C4
+  int localy = get_local_id(1);
+  int localz = get_local_id(2);
+  if (X >= size.z) return;
+  __local float4 temp[LOCAL_CACHE_THREAD][LOCAL_CACHE_THREAD];
+  temp[localy][localz] = (float4)0.f;
+  for (int h = localy; h < size.x; h += LOCAL_CACHE_THREAD) {
+    for (int w = localz; w < size.y; w += LOCAL_CACHE_THREAD) {
+      temp[localy][localz] += convert_float4(READ_IMAGE(src_data, smp_zero, (int2)(w * size.z + X, h)));
     }
   }
-  WRITE_IMAGE(output, (int2)(Y * output_shape.w + Z, N * output_shape.y + X), max(maximum, (FLT4)(0.f)));
+  barrier(CLK_LOCAL_MEM_FENCE);
+  if (localz == 0) {
+    for (int i = 1; i < LOCAL_CACHE_THREAD; i++) {
+      temp[localy][0] += temp[localy][i];
+    }
+  }
+  barrier(CLK_LOCAL_MEM_FENCE);
+  float4 result = temp[0][0];
+  for (int i = 1; i < LOCAL_CACHE_THREAD; i++) {
+    result += temp[i][0];
+  }
+  result /= size.x * size.y;
+#ifdef RELU
+  WRITE_IMAGE(dst_data, (int2)(X, 0), max(TO_FLT4(result), (FLT4)(0.f)));
+#else
+  WRITE_IMAGE(dst_data, (int2)(X, 0), TO_FLT4(result));
+#endif
 }
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
index 0ac112b88d3..f7dab80ed41 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc
@@ -65,37 +65,53 @@ int ActivationOpenCLKernel::CheckSpecs() {
 int ActivationOpenCLKernel::Prepare() {
   outShape = GpuTensorInfo(out_tensors_[0]);
   std::string source = activation_source;
-  std::string program_name = "Activation";
+  const std::string program_name = "Activation";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
   }
-  std::string kernel_name = GetActTypeString(type_);
+  const std::string kernel_name = GetActTypeString(type_);
   auto build_options_ext = CreateBuildOptionsExtByDType(this->registry_data_type_);
   auto ret = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   SetGlobalLocal();
   MS_LOG(DEBUG) << kernel_name << " init Done!";
   return RET_OK;
 }
 
-void ActivationOpenCLKernel::SetConstArgs() {
+int ActivationOpenCLKernel::SetConstArgs() {
   int arg_idx = 2;
   cl_int2 image_size = {static_cast<int>(outShape.width), static_cast<int>(outShape.height)};
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, image_size);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, image_size) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
   if (type_ == ActivationType_LEAKY_RELU) {
-    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, alpha_);
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, alpha_) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   }
   if (type_ == ActivationType_SIGMOID) {
     int c4 = outShape.Slice;
     int last_c4 = outShape.C % 4 == 0 ? 4 : outShape.C % 4;
-    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, c4);
-    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, last_c4);
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, c4) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, last_c4) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   }
+  return RET_OK;
 }
 
 void ActivationOpenCLKernel::SetGlobalLocal() {
@@ -107,8 +123,14 @@ void ActivationOpenCLKernel::SetGlobalLocal() {
 int ActivationOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
   int arg_idx = 0;
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
   auto ret = ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Run kernel:" << this->name() << " fail.";
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h
index 0c47e8955a3..7031a9a8f9e 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h
@@ -35,7 +35,7 @@ class ActivationOpenCLKernel : public OpenCLKernel {
   int Run() override;
   int Prepare() override;
   int CheckSpecs() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
 
  private:
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc
index 48e0cfe5054..8d7118776a5 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc
@@ -16,6 +16,7 @@
 #include <cstring>
 #include <string>
 #include <functional>
+#include <algorithm>
 #include "src/kernel_registry.h"
 #include "src/runtime/kernel/opencl/utils.h"
 #include "src/runtime/kernel/opencl/kernel/argminmax.h"
@@ -58,19 +59,41 @@ int ArgMinMaxOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-void ArgMinMaxOpenCLKernel::SetConstArgs() {
+int ArgMinMaxOpenCLKernel::SetConstArgs() {
   auto param = reinterpret_cast<ArgMinMaxParameter *>(op_parameter_);
   cl_int4 in_shape{static_cast<int>(im_in_.N), static_cast<int>(im_in_.H), static_cast<int>(im_in_.W),
                    static_cast<int>(im_in_.C)};
   cl_int4 flags = {param->out_value_, param->get_max_, param->axis_, param->topk_};
   int arg_cnt = 2;
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, buff_, lite::opencl::MemType::BUF);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, ids_, lite::opencl::MemType::BUF);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_shape);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, cus_size_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, strides_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, flags);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, buff_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, ids_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, cus_size_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, strides_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, flags) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 void ArgMinMaxOpenCLKernel::SetGlobalLocal() {
@@ -134,14 +157,22 @@ int ArgMinMaxOpenCLKernel::InitWeights() {
   auto allocator = ocl_runtime_->GetAllocator();
   int dtype_size = ocl_runtime_->GetFp16Enable() ? sizeof(int16_t) : sizeof(float);
   buff_ = allocator->Malloc(in_tensors_[0]->ElementsNum() * dtype_size, lite::opencl::MemType::BUF);
+  if (buff_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc failed.";
+    return RET_ERROR;
+  }
   ids_ = allocator->Malloc(in_tensors_[0]->ElementsNum() * sizeof(int32_t), lite::opencl::MemType::BUF);
+  if (ids_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
 int ArgMinMaxOpenCLKernel::Prepare() {
-  std::string kernel_name = "argminmax";
+  const std::string kernel_name = "argminmax";
   std::string source = argminmax_source;
-  std::string program_name = "argminmax";
+  const std::string program_name = "argminmax";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -162,16 +193,28 @@ int ArgMinMaxOpenCLKernel::Prepare() {
 
   InitWeights();
   SetGlobalLocal();
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
 
 int ArgMinMaxOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running! ";
-  ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
-  ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), lite::opencl::MemType::BUF) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.h
index ec3b70ce256..220949e3e2c 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.h
@@ -32,7 +32,7 @@ class ArgMinMaxOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
 
   int CheckSpecs() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
   int InitWeights() override;
   int Tune() override { return lite::RET_OK; }
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
index 44ff1a45694..b5afadce8a3 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc
@@ -98,6 +98,10 @@ int ArithmeticOpenCLKernel::InitWeights() {
       size_t dtype = fp16_enable ? CL_HALF_FLOAT : CL_FLOAT;
       ImageSize img_size{in_shape.width, in_shape.height, dtype};
       auto weight_ptr_ = allocator->Malloc(img_size, weight.data());
+      if (weight_ptr_ == nullptr) {
+        MS_LOG(ERROR) << "Malloc failed.";
+        return RET_ERROR;
+      }
       weight_ptrs_.push_back(weight_ptr_);
     } else {
       weight_ptrs_.push_back(nullptr);
@@ -106,7 +110,7 @@ int ArithmeticOpenCLKernel::InitWeights() {
   return RET_OK;
 }
 
-void ArithmeticOpenCLKernel::SetConstArgs() {
+int ArithmeticOpenCLKernel::SetConstArgs() {
   int arg_idx = 3;
   if (!element_flag_) {
     cl_int4 in0_shape = {static_cast<int>(in0_shape_.N), static_cast<int>(in0_shape_.H), static_cast<int>(in0_shape_.W),
@@ -121,16 +125,38 @@ void ArithmeticOpenCLKernel::SetConstArgs() {
     } else if (in0_shape_.C != 1 && in1_shape_.C == 1) {
       broadcastC_flag = 2;  // BroadCast C4 in input1
     }
-    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in0_shape);
-    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in1_shape);
-    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape);
-    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, broadcastC_flag);
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in0_shape) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in1_shape) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, broadcastC_flag) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   } else {
     cl_int2 output_shape{static_cast<int>(global_range_[0]), static_cast<int>(global_range_[1])};
-    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape);
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   }
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_min_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_max_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_min_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_max_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 int ArithmeticOpenCLKernel::Prepare() {
@@ -179,7 +205,7 @@ int ArithmeticOpenCLKernel::Prepare() {
     activation_max_ = 6.f;
   }
 
-  std::string program_name = "Arithmetic";
+  const std::string program_name = "Arithmetic";
   std::string source = arithmetic_source;
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
@@ -196,7 +222,10 @@ int ArithmeticOpenCLKernel::Prepare() {
   if (type() != PrimitiveType_BiasAdd) {
     InitWeights();
   }
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   MS_LOG(DEBUG) << kernel_name_ << " Init Done!";
   return RET_OK;
 }
@@ -206,10 +235,22 @@ int ArithmeticOpenCLKernel::Run() {
   auto input_0_ptr = weight_ptrs_[0] == nullptr ? in_tensors_[0]->data_c() : weight_ptrs_[0];
   auto input_1_ptr = weight_ptrs_[1] == nullptr ? in_tensors_[1]->data_c() : weight_ptrs_[1];
   int arg_idx = 0;
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr);
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr);
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h
index ff7bfa922b1..e19386cf3b4 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h
@@ -35,7 +35,7 @@ class ArithmeticOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
   int CheckSpecs() override;
   int InitWeights() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
 
  private:
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc
index 4a30f4c33c6..dbc619ab884 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc
@@ -86,7 +86,7 @@ int ArithmeticSelfOpenCLKernel::Prepare() {
     kernel_name += std::string(schema::EnumNamePrimitiveType(type())) + "_NHWC4";
   }
   MS_LOG(DEBUG) << "execute kernel name : " << kernel_name;
-  std::string program_name = "ArithmeticSelf";
+  const std::string program_name = "ArithmeticSelf";
   if (!ocl_runtime_->LoadSource(program_name, arithmeticself_source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -98,15 +98,27 @@ int ArithmeticSelfOpenCLKernel::Prepare() {
     return ret;
   }
   SetGlobalLocal();
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
 int ArithmeticSelfOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running! ";
-  ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c());
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h
index 2419ee40783..4cd9e2ba16a 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h
@@ -47,7 +47,13 @@ class ArithmeticSelfOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
 
   int CheckSpecs() override;
-  void SetConstArgs() override { ocl_runtime_->SetKernelArg(kernel_, 2, output_shape_); }
+  int SetConstArgs() override {
+    if (ocl_runtime_->SetKernelArg(kernel_, 2, output_shape_) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
+    return RET_OK;
+  }
   void SetGlobalLocal() override;
 
   int Run() override;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc
index c0dbd556b05..105b5abb051 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc
@@ -55,7 +55,7 @@ int BatchToSpaceNDOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-void BatchToSpaceNDOpenCLKernel::SetConstArgs() {
+int BatchToSpaceNDOpenCLKernel::SetConstArgs() {
   auto param = reinterpret_cast<BatchToSpaceParameter *>(this->op_parameter_);
   size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
   size_t CI4 = UP_DIV(in_tensors_[0]->Channel(), C4NUM);
@@ -66,10 +66,23 @@ void BatchToSpaceNDOpenCLKernel::SetConstArgs() {
   cl_int4 paddings = {param->crops_[0], param->crops_[1], param->crops_[2], param->crops_[3]};
 
   int arg_cnt = 2;
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, block_size);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, paddings);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, block_size) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, paddings) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 void BatchToSpaceNDOpenCLKernel::SetGlobalLocal() {
@@ -82,9 +95,9 @@ void BatchToSpaceNDOpenCLKernel::SetGlobalLocal() {
 }
 
 int BatchToSpaceNDOpenCLKernel::Prepare() {
-  std::string kernel_name = "batch_to_space_nd_NHWC4";
+  const std::string kernel_name = "batch_to_space_nd_NHWC4";
   std::string source = batch_to_space_nd_source;
-  std::string program_name = "batch_to_space_nd";
+  const std::string program_name = "batch_to_space_nd";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -96,16 +109,28 @@ int BatchToSpaceNDOpenCLKernel::Prepare() {
     return ret;
   }
   SetGlobalLocal();
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
 
 int BatchToSpaceNDOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running! ";
-  ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c());
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
 
   return RET_OK;
 }
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.h
index aeeced68781..df756af6778 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.h
@@ -32,7 +32,7 @@ class BatchToSpaceNDOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
 
   int CheckSpecs() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
   int Tune() override { return lite::RET_OK; }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
index b135ed41c3d..56577306bbe 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc
@@ -59,15 +59,25 @@ void BatchNormGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t
   local->push_back(z);
 }
 
-void BatchNormOpenCLKernel::SetConstArgs() {
+int BatchNormOpenCLKernel::SetConstArgs() {
   int arg_cn = 6;
   auto param = reinterpret_cast<BatchNormParameter *>(this->op_parameter_);
   auto input0_shape = in_tensors_.at(0)->shape();
   cl_int4 input_shape_ = {input0_shape.at(0), input0_shape.at(1), input0_shape.at(2),
                           UP_DIV(input0_shape.at(3), C4NUM)};
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->epsilon_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input0_shape.at(3));
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->epsilon_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input0_shape.at(3)) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 void BatchNormOpenCLKernel::SetGlobalLocal() {
@@ -83,6 +93,41 @@ void BatchNormOpenCLKernel::SetGlobalLocal() {
   OpenCLKernel::AlignGlobalLocal(global_size_, local_size_);
 }
 
+int BatchNormOpenCLKernel::UnmapBuffer() {
+  auto allocator = ocl_runtime_->GetAllocator();
+  if (allocator->UnmapBuffer(scale_) != RET_OK) {
+    return RET_ERROR;
+  }
+  if (allocator->UnmapBuffer(offset_) != RET_OK) {
+    return RET_ERROR;
+  }
+  if (allocator->UnmapBuffer(mean_) != RET_OK) {
+    return RET_ERROR;
+  }
+  if (allocator->UnmapBuffer(variance_) != RET_OK) {
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int BatchNormOpenCLKernel::MapBuffer() {
+  auto allocator = ocl_runtime_->GetAllocator();
+  if (allocator->MapBuffer(scale_, CL_MAP_WRITE, nullptr, true) == nullptr) {
+    return RET_ERROR;
+  }
+  if (allocator->MapBuffer(offset_, CL_MAP_WRITE, nullptr, true) == nullptr) {
+    return RET_ERROR;
+  }
+  if (allocator->MapBuffer(mean_, CL_MAP_WRITE, nullptr, true) == nullptr) {
+    return RET_ERROR;
+  }
+  if (allocator->MapBuffer(variance_, CL_MAP_WRITE, nullptr, true) == nullptr) {
+    return RET_ERROR;
+  }
+
+  return RET_OK;
+}
+
 int BatchNormOpenCLKernel::Initweight() {
   auto allocator = ocl_runtime_->GetAllocator();
   GpuTensorInfo img_info(in_tensors_.at(1));
@@ -90,15 +135,30 @@ int BatchNormOpenCLKernel::Initweight() {
   size_t weight_size = img_info.OriginSize;
   // allocated memory for weight and init value
   scale_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
+  if (scale_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc failed.";
+    return RET_ERROR;
+  }
   offset_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
+  if (offset_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc failed.";
+    return RET_ERROR;
+  }
   mean_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
+  if (mean_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc failed.";
+    return RET_ERROR;
+  }
   variance_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
+  if (variance_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc failed.";
+    return RET_ERROR;
+  }
 
-  allocator->MapBuffer(scale_, CL_MAP_WRITE, nullptr, true);
-  allocator->MapBuffer(offset_, CL_MAP_WRITE, nullptr, true);
-  allocator->MapBuffer(mean_, CL_MAP_WRITE, nullptr, true);
-  allocator->MapBuffer(variance_, CL_MAP_WRITE, nullptr, true);
-
+  if (MapBuffer() != RET_OK) {
+    MS_LOG(ERROR) << "Map Buffer failed.";
+    return RET_ERROR;
+  }
   memset(scale_, 1, weight_size);
   memset(offset_, 0x00, weight_size);
   memset(mean_, 0x00, weight_size);
@@ -153,18 +213,18 @@ int BatchNormOpenCLKernel::Initweight() {
       memcpy(variance_, in_tensors_.at(4)->data_c(), weight_size);
     }
   }
-  allocator->UnmapBuffer(scale_);
-  allocator->UnmapBuffer(offset_);
-  allocator->UnmapBuffer(mean_);
-  allocator->UnmapBuffer(variance_);
+  if (UnmapBuffer() != RET_OK) {
+    MS_LOG(ERROR) << "UnmapBuffer failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
 int BatchNormOpenCLKernel::Prepare() {
   use_fp16_enable_ = ocl_runtime_->GetFp16Enable();
-  std::string kernel_name = "Batch_normalization_NHWC4";
+  const std::string kernel_name = "Batch_normalization_NHWC4";
   std::string source = batchnorm_source;
-  std::string program_name = "Batch_normalization";
+  const std::string program_name = "Batch_normalization";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -181,7 +241,10 @@ int BatchNormOpenCLKernel::Prepare() {
     MS_LOG(ERROR) << "Initweight failed ";
     return RET_ERROR;
   }
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   SetGlobalLocal();
 
   return RET_OK;
@@ -190,13 +253,34 @@ int BatchNormOpenCLKernel::Prepare() {
 int BatchNormOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running! ";
   int arg_cn = 0;
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c());            // input tensor
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, scale_, lite::opencl::MemType::BUF);     // scale
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, offset_, lite::opencl::MemType::BUF);    // offset
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, mean_, lite::opencl::MemType::BUF);      // mean
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, variance_, lite::opencl::MemType::BUF);  // variance
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c());           // out tensor
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }  // input tensor
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, scale_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }  // scale
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, offset_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }  // offset
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, mean_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }  // mean
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, variance_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }  // variance
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }  // out tensor
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h
index 80b217febba..7f7b90710d5 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h
@@ -32,11 +32,13 @@ class BatchNormOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
 
   int CheckSpecs() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
 
  private:
   int Initweight();
+  int UnmapBuffer();
+  int MapBuffer();
 
  private:
   bool use_fp16_enable_{false};
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc
index b022b270417..08e24d4fd68 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc
@@ -52,9 +52,13 @@ int CastOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-void CastOpenCLKernel::SetConstArgs() {
+int CastOpenCLKernel::SetConstArgs() {
   cl_int2 shape = {static_cast<int>(shape_.width), static_cast<int>(shape_.height)};
-  ocl_runtime_->SetKernelArg(kernel_, 2, shape);
+  if (ocl_runtime_->SetKernelArg(kernel_, 2, shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 void CastOpenCLKernel::SetGlobalLocal() {
@@ -68,8 +72,8 @@ int CastOpenCLKernel::Prepare() {
     {kNumberTypeFloat32, "fp32"},
     {kNumberTypeFloat16, "fp16"},
   };
-  std::string program_name = "Cast";
-  std::string kernel_name =
+  const std::string program_name = "Cast";
+  const std::string kernel_name =
     "Cast_" + dtype_names[in_tensors_.front()->data_type()] + "_to_" + dtype_names[out_tensors_.front()->data_type()];
   if (!ocl_runtime_->LoadSource(program_name, cast_source)) {
     MS_LOG(ERROR) << "Load source failed.";
@@ -80,16 +84,28 @@ int CastOpenCLKernel::Prepare() {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   SetGlobalLocal();
   return RET_OK;
 }
 
 int CastOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running! ";
-  ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c());
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h
index 3db1f15a008..68fc43cd6c9 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h
@@ -31,7 +31,7 @@ class CastOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
 
   int CheckSpecs() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
 
   int Run() override;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
index 6beebbfbe29..05a986da862 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc
@@ -38,7 +38,10 @@ int ConcatOpenCLKernel::RunAxis0() {
   auto *out_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(dst_data));
   for (int i = 0; i < in_tensors_.size(); i++) {
     auto src_data = weight_ptrs_.at(i) == nullptr ? in_tensors_[i]->data_c() : weight_ptrs_.at(i);
-    allocator_->GetImageSize(src_data, &img_size);
+    if (allocator_->GetImageSize(src_data, &img_size) != RET_OK) {
+      MS_LOG(ERROR) << "GetImageSize failed.";
+      return RET_ERROR;
+    }
     auto src_origin = cl::array<cl::size_type, 3U>{0, 0, 0};
     auto region = cl::array<cl::size_type, 3U>{img_size.width, img_size.height, 1};
     auto *input_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data));
@@ -107,7 +110,7 @@ int ConcatOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-void ConcatOpenCLKernel::SetConstArgs() {
+int ConcatOpenCLKernel::SetConstArgs() {
   GpuTensorInfo img_info(out_tensors_[0]);
   size_t dtype = ocl_runtime_->GetFp16Enable() ? sizeof(cl_half) : sizeof(cl_float);
   stride_w = img_info.RowPitch() / dtype;
@@ -124,9 +127,15 @@ void ConcatOpenCLKernel::SetConstArgs() {
         temp.s[j] = in_tensor->shape()[j];
       }
       Broadcast2GpuShape(in_shape_.s, temp.s, in_tensor->shape().size(), 1);
-      ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_);
+      if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) {
+        MS_LOG(ERROR) << "SetKernelArg failed.";
+        return RET_ERROR;
+      }
+    }
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
     }
-    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w);
   } else {
     for (auto &in_tensor : in_tensors_) {
       cl_int4 temp = {};
@@ -135,11 +144,18 @@ void ConcatOpenCLKernel::SetConstArgs() {
       }
       Broadcast2GpuShape(in_shape_.s, temp.s, in_tensor->shape().size(), 1);
       in_shape_.s[3] = UP_DIV(in_shape_.s[3], C4NUM);
-      ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_);
+      if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) {
+        MS_LOG(ERROR) << "SetKernelArg failed.";
+        return RET_ERROR;
+      }
     }
   }
   out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 void ConcatOpenCLKernel::SetGlobalLocal() {
@@ -190,6 +206,10 @@ int ConcatOpenCLKernel::ConvertWeightToTensor() {
       }
       ImageSize img_size{in_shape.width, in_shape.height, dtype};
       auto weight_ptr_ = allocator->Malloc(img_size, weight.data());
+      if (weight_ptr_ == nullptr) {
+        MS_LOG(ERROR) << "Malloc failed.";
+        return RET_ERROR;
+      }
       weight_ptrs_.push_back(weight_ptr_);
     } else {
       weight_ptrs_.push_back(nullptr);
@@ -222,7 +242,7 @@ int ConcatOpenCLKernel::Prepare() {
   kernel_name += "_NHWC4";
   MS_LOG(DEBUG) << "kernel_name=: " << kernel_name;
   std::string source = concat_source;
-  std::string program_name = "Concat";
+  const std::string program_name = "Concat";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -234,7 +254,10 @@ int ConcatOpenCLKernel::Prepare() {
     return ret;
   }
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   SetGlobalLocal();
   return RET_OK;
 }
@@ -247,14 +270,27 @@ int ConcatOpenCLKernel::Run() {
   int arg_cn = 0;
   for (int i = 0; i < in_tensors_.size(); ++i) {
     auto input_ptr = weight_ptrs_.at(i) == nullptr ? in_tensors_[i]->data_c() : weight_ptrs_.at(i);
-    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_ptr);
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_ptr) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   }
   if (axis_ == 3 && !Align_) {
-    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF) !=
+        CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   } else {
-    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
   }
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h
index 9b3ffae6bb4..363888eaf2c 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h
@@ -31,7 +31,7 @@ class ConcatOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
 
   int CheckSpecs() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
   int Run() override;
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc
index 26f77796123..bfed62a5129 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc
@@ -108,7 +108,10 @@ int Conv2DOpenCLKernel::Prepare() {
     return ret;
   }
   SetGlobalLocal();
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
@@ -142,7 +145,7 @@ void Conv2DOpenCLKernel::InitAttrs() {
 
 int Conv2DOpenCLKernel::BuildKernel() {
   SetBlockSize();
-  std::string program_name = "conv2d";
+  const std::string program_name = "conv2d";
   std::stringstream kernel_name;
   kernel_name << "Conv2D_H" << block_size_.H << "W" << block_size_.W << "C" << block_size_.C;
   if (filter_type_ == MemType::IMG) {
@@ -245,9 +248,11 @@ void Conv2DOpenCLKernel::SetMaliFp16BlockSize(int task_size_per_cu, bool w_kerne
 }
 
 int Conv2DOpenCLKernel::InitWeights() {
-  InitFilter();
+  if (InitFilter() != RET_OK) {
+    return RET_ERROR;
+  }
   if (has_bias_) {
-    InitBias();
+    return InitBias();
   }
   return RET_OK;
 }
@@ -300,7 +305,7 @@ void ConvertFilter(void *src, void *dst, TypeId src_dtype, TypeId dst_dtype, Fil
   }
 }
 
-void Conv2DOpenCLKernel::InitFilter() {
+int Conv2DOpenCLKernel::InitFilter() {
   auto allocator = ocl_runtime_->GetAllocator();
 
   // allocate opencl memory: buffer or image2d
@@ -312,9 +317,17 @@ void Conv2DOpenCLKernel::InitFilter() {
     size_t dtype = use_fp16_ ? CL_HALF_FLOAT : CL_FLOAT;
     size = width * height * CO_TILE * sizeof_FLT_;
     packed_filter_ = allocator->Malloc({width, height, dtype});
+    if (packed_filter_ == nullptr) {
+      MS_LOG(ERROR) << "Malloc failed.";
+      return RET_ERROR;
+    }
   } else {
     size = UP_DIV(CO_SLICES_, Ogroup) * KH_ * KW_ * CI_SLICES_ * Ogroup * CI_TILE * CO_TILE * sizeof_FLT_;
     packed_filter_ = allocator->Malloc(size, lite::opencl::MemType::BUF);
+    if (packed_filter_ == nullptr) {
+      MS_LOG(ERROR) << "Malloc failed.";
+      return RET_ERROR;
+    }
   }
 
   // rearrange filter
@@ -333,15 +346,22 @@ void Conv2DOpenCLKernel::InitFilter() {
   if (filter_type_ == MemType::IMG) {
     ocl_runtime_->WriteImage(packed_filter_, tmp.data());
   } else {
-    allocator->MapBuffer(packed_filter_, CL_MAP_WRITE, nullptr, true);
+    if (allocator->MapBuffer(packed_filter_, CL_MAP_WRITE, nullptr, true) == nullptr) {
+      MS_LOG(ERROR) << "Map Buffer failed.";
+      return RET_ERROR;
+    }
     memcpy(packed_filter_, tmp.data(), size);
-    allocator->UnmapBuffer(packed_filter_);
+    if (allocator->UnmapBuffer(packed_filter_) != RET_OK) {
+      MS_LOG(ERROR) << "UnmapBuffer failed.";
+      return RET_ERROR;
+    }
   }
 
   FreeStoredData(stored_filter_);
+  return RET_OK;
 }
 
-void Conv2DOpenCLKernel::InitBias() {
+int Conv2DOpenCLKernel::InitBias() {
   auto allocator = ocl_runtime_->GetAllocator();
 
   // align bias from C to C4
@@ -349,8 +369,15 @@ void Conv2DOpenCLKernel::InitBias() {
   void *src_data = stored_bias_ == nullptr ? bias_tensor->data_c() : stored_bias_;
   size_t packed_bias_size = UP_ROUND(CO_SLICES_, block_size_.C) * CO_TILE * sizeof_FLT_;
   packed_bias_ = allocator->Malloc(packed_bias_size, lite::opencl::MemType::BUF);
+  if (packed_bias_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc failed.";
+    return RET_ERROR;
+  }
 
-  allocator->MapBuffer(packed_bias_, CL_MAP_WRITE, nullptr, true);
+  if (allocator->MapBuffer(packed_bias_, CL_MAP_WRITE, nullptr, true) == nullptr) {
+    MS_LOG(ERROR) << "Map Buffer failed.";
+    return RET_ERROR;
+  }
   memset(packed_bias_, 0x00, packed_bias_size);
   if (bias_tensor->data_type() == kNumberTypeFloat16) {
     if (use_fp16_) {
@@ -375,11 +402,15 @@ void Conv2DOpenCLKernel::InitBias() {
       memcpy(packed_bias_, src_data, CO_ * sizeof_FLT_);
     }
   }
-  allocator->UnmapBuffer(packed_bias_);
+  if (allocator->UnmapBuffer(packed_bias_) != RET_OK) {
+    MS_LOG(ERROR) << "UnmapBuffer failed.";
+    return RET_ERROR;
+  }
   FreeStoredData(stored_bias_);
+  return RET_OK;
 }
 
-void Conv2DOpenCLKernel::SetConstArgs() {
+int Conv2DOpenCLKernel::SetConstArgs() {
   cl_int4 input_shape = {batch_size_, IH_, IW_, CI_SLICES_};
   cl_int4 output_shape = {batch_size_, OH_, OW_, CO_SLICES_};
   cl_int4 kernel_stride = {KH_, KW_, param_->stride_h_, param_->stride_w_};
@@ -387,15 +418,43 @@ void Conv2DOpenCLKernel::SetConstArgs() {
   cl_int2 dilation = {param_->dilation_h_, param_->dilation_w_};
 
   int arg_cn = 2;
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_filter_, filter_type_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_bias_, MemType::BUF);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, kernel_stride);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, pad);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, dilation);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param_->act_type_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn, alpha_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_filter_, filter_type_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_bias_, MemType::BUF) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, kernel_stride) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, pad) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, dilation) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param_->act_type_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn, alpha_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 void Conv2DOpenCLKernel::SetGlobalLocal() {
@@ -429,9 +488,18 @@ void Conv2DOpenCLKernel::SetGlobalLocal() {
 
 int Conv2DOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
-  ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c());
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.h
index f12ec7124f7..751b960774a 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.h
@@ -53,7 +53,7 @@ class Conv2DOpenCLKernel : public OpenCLKernel {
   int CheckSpecs() override;
   int Prepare() override;
   int InitWeights() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
   int Run() override;
 
@@ -78,8 +78,8 @@ class Conv2DOpenCLKernel : public OpenCLKernel {
  protected:
   void InitAttrs();
   virtual int BuildKernel();
-  virtual void InitFilter();
-  void InitBias();
+  virtual int InitFilter();
+  int InitBias();
   bool use_fp16_{false};
   size_t sizeof_FLT_{4};
   ConvParameter *param_{nullptr};
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
index c3a5d528ecb..16bd63384c5 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc
@@ -55,10 +55,10 @@ int Conv2dTransposeOpenCLKernel::CheckSpecs() {
 }
 
 int Conv2dTransposeOpenCLKernel::Prepare() {
-  std::string kernel_name = "conv2d_transpose";
+  const std::string kernel_name = "conv2d_transpose";
   enable_fp16_ = ocl_runtime_->GetFp16Enable();
   std::string source = GetActDefines() + conv2d_transpose_source;
-  std::string program_name = "conv2d_transpose";
+  const std::string program_name = "conv2d_transpose";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -74,7 +74,10 @@ int Conv2dTransposeOpenCLKernel::Prepare() {
     return ret;
   }
   SetGlobalLocal();
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
@@ -94,7 +97,7 @@ void Conv2dTransposeOpenCLKernel::SetGlobalLocal() {
   AlignGlobalLocal(global_size_, local_size_);
 }
 
-void Conv2dTransposeOpenCLKernel::SetConstArgs() {
+int Conv2dTransposeOpenCLKernel::SetConstArgs() {
   int arg_cnt = 2;
   auto *param = reinterpret_cast<ConvParameter *>(op_parameter_);
   int ci = in_tensors_[0]->shape()[3];
@@ -115,14 +118,39 @@ void Conv2dTransposeOpenCLKernel::SetConstArgs() {
   cl_int2 padding = {pad_h, pad_w};
   cl_int4 src_size = {h, w, UP_DIV(ci, C4NUM), n};
   cl_int4 dst_size = {oh, ow, UP_DIV(co, C4NUM), n};
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padWeight_, lite::opencl::MemType::BUF);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt, static_cast<cl_int>(param->act_type_));
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padWeight_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt, static_cast<cl_int>(param->act_type_)) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 int Conv2dTransposeOpenCLKernel::InitWeights() {
@@ -147,7 +175,15 @@ int Conv2dTransposeOpenCLKernel::InitFilter() {
   // IHWO to OHWI4(I)4(O)(converter format is IHWO)
   // init padWeight_(buffer mem)
   padWeight_ = allocator->Malloc(div_ci * div_co * C4NUM * C4NUM * kh * kw * data_size, lite::opencl::MemType::BUF);
+  if (padWeight_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc failed.";
+    return RET_ERROR;
+  }
   padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true);
+  if (padWeight_ == nullptr) {
+    MS_LOG(ERROR) << "Map Buffer failed.";
+    return RET_ERROR;
+  }
   memset(padWeight_, 0x00, div_ci * div_co * C4NUM * C4NUM * kh * kw * data_size);
   auto origin_weight = stored_weight_ == nullptr ? in_tensors_.at(kWeightIndex)->data_c() : stored_weight_;
   auto weight_dtype = in_tensors_.at(kWeightIndex)->data_type();
@@ -188,7 +224,10 @@ int Conv2dTransposeOpenCLKernel::InitFilter() {
       }
     }
   }
-  allocator->UnmapBuffer(padWeight_);
+  if (allocator->UnmapBuffer(padWeight_) != RET_OK) {
+    MS_LOG(ERROR) << "UnmapBuffer failed.";
+    return RET_ERROR;
+  }
   FreeStoredData(stored_weight_);
   return RET_OK;
 }
@@ -208,7 +247,15 @@ int Conv2dTransposeOpenCLKernel::InitBias() {
   }
   ImageSize img_size{im_dst_x, im_dst_y, img_dtype};
   bias_ = allocator->Malloc(img_size);
+  if (bias_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc failed.";
+    return RET_ERROR;
+  }
   bias_ = allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true);
+  if (bias_ == nullptr) {
+    MS_LOG(ERROR) << "Map Buffer failed.";
+    return RET_ERROR;
+  }
   memset(bias_, 0x00, div_co * C4NUM * data_size);
   if (in_tensors_.size() == INPUT_TENSOR_SIZE_3) {
     void *src_data = stored_bias_ == nullptr ? in_tensors_.at(kBiasIndex)->data_c() : stored_bias_;
@@ -225,7 +272,10 @@ int Conv2dTransposeOpenCLKernel::InitBias() {
       memcpy(bias_, src_data, co * data_size);
     }
   }
-  allocator->UnmapBuffer(bias_);
+  if (allocator->UnmapBuffer(bias_) != RET_OK) {
+    MS_LOG(ERROR) << "UnmapBuffer failed.";
+    return RET_ERROR;
+  }
   FreeStoredData(stored_bias_);
   return RET_OK;
 }
@@ -233,9 +283,18 @@ int Conv2dTransposeOpenCLKernel::InitBias() {
 int Conv2dTransposeOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
   int arg_cnt = 0;
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c());
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h
index 70caeb50ced..b709dee59b0 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h
@@ -34,7 +34,7 @@ class Conv2dTransposeOpenCLKernel : public OpenCLKernel {
   int InitWeights() override;
   int InitFilter();
   int InitBias();
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
   int StoreConstData() override;
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
index 7e9f7f7b572..73733bafd20 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc
@@ -73,7 +73,7 @@ int DepthwiseConv2dOpenCLKernel::Prepare() {
   } else {
     block_size_.C = block_size_.H = block_size_.W = 1;
   }
-  std::string program_name = "DepthwiseConv2d";
+  const std::string program_name = "DepthwiseConv2d";
   std::string source = depthwise_conv2d_source;
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
@@ -94,7 +94,10 @@ int DepthwiseConv2dOpenCLKernel::Prepare() {
     return ret;
   }
   SetGlobalLocal();
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   MS_LOG(DEBUG) << kernel_name << " Init Done! mem type=" << static_cast<int>(out_mem_type_);
   return RET_OK;
 }
@@ -153,10 +156,12 @@ int DepthwiseConv2dOpenCLKernel::InitWeights() {
     size_t img_dtype = ocl_runtime_->GetFp16Enable() ? CL_HALF_FLOAT : CL_FLOAT;
     ImageSize img_size{(size_t)plane_out / C4NUM, (size_t)out_info.N * CO4, img_dtype};
     packed_weight_ = allocator->Malloc(img_size, temp_filter.data());
+
   } else {
     packed_weight_ = allocator->Malloc(pack_weight_size, temp_filter.data());
   }
   if (packed_weight_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc failed.";
     return RET_ERROR;
   }
   FreeStoredData(stored_weight_);
@@ -199,13 +204,15 @@ int DepthwiseConv2dOpenCLKernel::InitBias() {
   }
   bias_data_ = allocator->Malloc(bias_size, temp_bias.data());
   if (bias_data_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc failed.";
     return RET_ERROR;
   }
+
   FreeStoredData(stored_bias_);
   return RET_OK;
 }
 
-void DepthwiseConv2dOpenCLKernel::SetConstArgs() {
+int DepthwiseConv2dOpenCLKernel::SetConstArgs() {
   auto parameter = reinterpret_cast<ConvParameter *>(op_parameter_);
   auto in_info = GpuTensorInfo(in_tensors_[0]);
   auto out_info = GpuTensorInfo(out_tensors_[0]);
@@ -222,16 +229,47 @@ void DepthwiseConv2dOpenCLKernel::SetConstArgs() {
   cl_int4 dst_size = {(cl_int)out_info.W, (cl_int)out_info.H, (cl_int)CO4, (cl_int)out_info.N};
 
   int arg_cnt = 2;
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, packed_weight_, filter_type_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_data_, lite::opencl::MemType::BUF);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dilation);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].first);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].second);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, packed_weight_, filter_type_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_data_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dilation) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].first) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].second) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 void DepthwiseConv2dOpenCLKernel::SetGlobalLocal() {
@@ -286,9 +324,18 @@ int DepthwiseConv2dOpenCLKernel::StoreConstData() {
 
 int DepthwiseConv2dOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
-  ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_[0]->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_[0]->data_c());
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h
index 8fdbed9d1bd..91626bb9606 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h
@@ -41,7 +41,7 @@ class DepthwiseConv2dOpenCLKernel : public OpenCLKernel {
   int CheckSpecs() override;
   int InitWeights() override;
   int InitBias();
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
   int StoreConstData() override;
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.cc
index a42d0f9b9d1..dac1c248bcf 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.cc
@@ -35,7 +35,10 @@ int FillOpenCLKernel::RunFill() {
   cl_int4 fill_value = {};
   fill_value.s[0] = fill_value.s[1] = fill_value.s[2] = fill_value.s[3] = default_;
   auto src_data = out_tensors_[0]->data_c();
-  allocator_->GetImageSize(src_data, &img_size);
+  if (allocator_->GetImageSize(src_data, &img_size) != RET_OK) {
+    MS_LOG(ERROR) << "GetImageSize failed.";
+    return RET_ERROR;
+  }
   auto src_origin = cl::array<cl::size_type, 3U>{0, 0, 0};
   auto region = cl::array<cl::size_type, 3U>{img_size.width, img_size.height, 1};
   cl::Image2D *out_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data));
@@ -59,7 +62,7 @@ int FillOpenCLKernel::RunShape() {
   return RET_OK;
 }
 
-void FillOpenCLKernel::SetConstArgs() {}
+int FillOpenCLKernel::SetConstArgs() { return RET_OK; }
 
 void FillOpenCLKernel::SetGlobalLocal() {}
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h
index e60da1d447a..0828414c7b6 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h
@@ -31,7 +31,7 @@ class FillOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
 
   int CheckSpecs() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
 
   int Run() override;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc
index 00971e0b5fa..f86b979bf9c 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc
@@ -98,7 +98,7 @@ int FullConnectionOpenCLKernel::Prepare() {
     kernel_name = "FullConnectionWeightVar";
   }
   std::string source = fullconnection_source;
-  std::string program_name = "FullConnection";
+  const std::string program_name = "FullConnection";
   if (!ocl_runtime_->LoadSource(program_name, GetActDefines() + source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -113,7 +113,10 @@ int FullConnectionOpenCLKernel::Prepare() {
   if (ret != RET_OK) {
     return ret;
   }
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   SetGlobalLocal();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
@@ -137,7 +140,15 @@ int FullConnectionOpenCLKernel::InitFilter() {
   size_t dtype_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float);
   padWeight_ = allocator->Malloc(nhw_remainder * intensor_shape.Slice * co4 * C4NUM * C4NUM * dtype_size,
                                  lite::opencl::MemType::BUF);
+  if (padWeight_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc failed.";
+    return RET_ERROR;
+  }
   padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true);
+  if (padWeight_ == nullptr) {
+    MS_LOG(ERROR) << "Map Buffer failed.";
+    return RET_ERROR;
+  }
   auto padWeightFp32 = reinterpret_cast<float *>(padWeight_);
   auto padWeightFp16 = reinterpret_cast<float16_t *>(padWeight_);
   memset(padWeight_, 0x00, nhw_remainder * intensor_shape.Slice * co4 * C4NUM * C4NUM * dtype_size);
@@ -183,7 +194,10 @@ int FullConnectionOpenCLKernel::InitFilter() {
       }
     }
   }
-  allocator->UnmapBuffer(padWeight_);
+  if (allocator->UnmapBuffer(padWeight_) != RET_OK) {
+    MS_LOG(ERROR) << "UnmapBuffer failed.";
+    return RET_ERROR;
+  }
   FreeStoredData(stored_weight_);
   return RET_OK;
 }
@@ -202,7 +216,15 @@ int FullConnectionOpenCLKernel::InitBias() {
   }
   ImageSize img_size{im_dst_x, im_dst_y, img_dtype};
   bias_ = allocator->Malloc(img_size);
+  if (bias_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc failed.";
+    return RET_ERROR;
+  }
   bias_ = allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true);
+  if (bias_ == nullptr) {
+    MS_LOG(ERROR) << "Map Buffer failed.";
+    return RET_ERROR;
+  }
   memset(bias_, 0x00, co4 * C4NUM * dtype_size);
   if (in_tensors_.size() == INPUT_TENSOR_SIZE_3) {
     void *src_data = stored_bias_ == nullptr ? in_tensors_.at(kBiasIndex)->data_c() : stored_bias_;
@@ -218,7 +240,10 @@ int FullConnectionOpenCLKernel::InitBias() {
       memcpy(bias_, src_data, CO_ * dtype_size);
     }
   }
-  allocator->UnmapBuffer(bias_);
+  if (allocator->UnmapBuffer(bias_) != RET_OK) {
+    MS_LOG(ERROR) << "UnmapBuffer failed.";
+    return RET_ERROR;
+  }
   FreeStoredData(stored_bias_);
   return RET_OK;
 }
@@ -231,22 +256,44 @@ void FullConnectionOpenCLKernel::SetGlobalLocal() {
   AlignGlobalLocal(global_size_, local_size_);
 }
 
-void FullConnectionOpenCLKernel::SetConstArgs() {
+int FullConnectionOpenCLKernel::SetConstArgs() {
   if (!weight_var_) {
-    ocl_runtime_->SetKernelArg(kernel_, 2, padWeight_, lite::opencl::MemType::BUF);
+    if (ocl_runtime_->SetKernelArg(kernel_, 2, padWeight_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   }
   int arg_count = 3;
-  ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_count++, N_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, N_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
   auto intensor_shape = GpuTensorInfo(in_tensors_[0]);
   int CI4 = CI_remainder_ * intensor_shape.Slice;
-  ocl_runtime_->SetKernelArg(kernel_, arg_count++, CI4);
-  ocl_runtime_->SetKernelArg(kernel_, arg_count++, UP_DIV(CO_, C4NUM));
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, CI4) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, UP_DIV(CO_, C4NUM)) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
   auto in_shape_info = GpuTensorInfo(in_tensors_[0]);
   cl_int2 in_img_shape = {static_cast<int>(in_shape_info.height), static_cast<int>(in_shape_info.width)};
-  ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_img_shape);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_img_shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
   auto *param = reinterpret_cast<MatMulParameter *>(op_parameter_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_count, static_cast<cl_int>(param->act_type_));
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_count, static_cast<cl_int>(param->act_type_)) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 int FullConnectionOpenCLKernel::StoreConstData() {
@@ -270,12 +317,24 @@ int FullConnectionOpenCLKernel::StoreConstData() {
 int FullConnectionOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
   int arg_count = 0;
-  ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c());
-  if (weight_var_) {
-    ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[1]->data_c());
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (weight_var_) {
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[1]->data_c()) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
   }
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h
index be830de30ee..09bc05d2f74 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h
@@ -31,7 +31,7 @@ class FullConnectionOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
   int CheckSpecs() override;
   int InitWeights() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
   int Tune() override { return lite::RET_OK; }
   int StoreConstData() override;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc
index f96d4583eb1..faaa7e81a00 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc
@@ -164,8 +164,8 @@ bool IsEltwiseAndOperatorSupported(LiteKernel *node) {
 
 int FusionEltwiseOpenCLKernel::Prepare() {
   std::string source = Codegen();
-  std::string program_name = "FusionEltwise\n" + source;
-  std::string kernel_name = "FusionEltwise";
+  const std::string program_name = "FusionEltwise\n" + source;
+  const std::string kernel_name = "FusionEltwise";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -183,7 +183,10 @@ int FusionEltwiseOpenCLKernel::Prepare() {
   }
   InitWeights();
   SetGlobalLocal();
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
@@ -217,7 +220,14 @@ int FusionEltwiseOpenCLKernel::InitWeights() {
         size_t num = tensor_info.ElementsNum;
         size_t size = tensor_info.Image2DSize;
         void *buffer = allocator->Malloc(size, lite::opencl::MemType::BUF);
-        allocator->MapBuffer(buffer, CL_MAP_WRITE, nullptr, true);
+        if (buffer == nullptr) {
+          MS_LOG(ERROR) << "Malloc failed.";
+          return RET_ERROR;
+        }
+        if (allocator->MapBuffer(buffer, CL_MAP_WRITE, nullptr, true) == nullptr) {
+          MS_LOG(ERROR) << "Map Buffer failed.";
+          return RET_ERROR;
+        }
         memset(buffer, 0x00, size);
         if (tensor->data_type() == kNumberTypeFloat16) {
           if (use_fp16) {
@@ -232,7 +242,10 @@ int FusionEltwiseOpenCLKernel::InitWeights() {
             CopyNumber<float32_t, float32_t>(buffer, tensor->data_c(), num);
           }
         }
-        allocator->UnmapBuffer(buffer);
+        if (allocator->UnmapBuffer(buffer) != RET_OK) {
+          MS_LOG(ERROR) << "UnmapBuffer failed.";
+          return RET_ERROR;
+        }
         buffer_weights_.push_back(buffer);
       }
     }
@@ -247,7 +260,7 @@ void FusionEltwiseOpenCLKernel::SetGlobalLocal() {
   AlignGlobalLocal(global_size_, local_size_);
 }
 
-void FusionEltwiseOpenCLKernel::SetConstArgs() {
+int FusionEltwiseOpenCLKernel::SetConstArgs() {
   auto output = GpuTensorInfo(out_tensors_.front());
   cl_int4 output_shape = {static_cast<cl_int>(output.N), static_cast<cl_int>(output.H), static_cast<cl_int>(output.W),
                           static_cast<cl_int>(output.C)};
@@ -260,18 +273,32 @@ void FusionEltwiseOpenCLKernel::SetConstArgs() {
       if (IsScalar(in_tensor->shape())) {
         if (ocl_runtime_->GetFp16Enable()) {
           auto value = static_cast<float16_t>(scalar_weights_[scalar_idx++]);
-          ocl_runtime_->SetKernelArg(kernel_, arg_idx, *(reinterpret_cast<cl_half *>(&value)));
+          if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, *(reinterpret_cast<cl_half *>(&value))) != CL_SUCCESS) {
+            MS_LOG(ERROR) << "SetKernelArg failed.";
+            return RET_ERROR;
+          }
         } else {
-          ocl_runtime_->SetKernelArg(kernel_, arg_idx, scalar_weights_[scalar_idx++]);
+          if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, scalar_weights_[scalar_idx++]) != CL_SUCCESS) {
+            MS_LOG(ERROR) << "SetKernelArg failed.";
+            return RET_ERROR;
+          }
         }
       } else {
-        ocl_runtime_->SetKernelArg(kernel_, arg_idx, buffer_weights_[buffer_idx++], lite::opencl::MemType::BUF);
+        if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, buffer_weights_[buffer_idx++], lite::opencl::MemType::BUF) !=
+            CL_SUCCESS) {
+          MS_LOG(ERROR) << "SetKernelArg failed.";
+          return RET_ERROR;
+        }
       }
     }
     arg_idx++;  // for act input
   }
   arg_idx++;  // for output
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx, output_shape);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, output_shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 int FusionEltwiseOpenCLKernel::Run() {
@@ -279,12 +306,21 @@ int FusionEltwiseOpenCLKernel::Run() {
   int arg_idx = 0;
   for (auto *in_tensor : in_tensors_) {
     if (!in_tensor->IsConst()) {
-      ocl_runtime_->SetKernelArg(kernel_, arg_idx, in_tensor->data_c());
+      if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, in_tensor->data_c()) != CL_SUCCESS) {
+        MS_LOG(ERROR) << "SetKernelArg failed.";
+        return RET_ERROR;
+      }
     }
     arg_idx++;
   }
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx, out_tensors_.front()->data_c());
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, out_tensors_.front()->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.h
index 800c1aa4c0a..b585273cfad 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.h
@@ -162,7 +162,7 @@ class FusionEltwiseOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
   int InitWeights() override;
   void SetGlobalLocal() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   int Run() override;
 
   void ClearParameter() { op_parameter_ = nullptr; }
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc
index 251c0df94c1..68dbaf98b4b 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc
@@ -81,7 +81,7 @@ int GatherOpenCLKernel::CheckSpecs() {
   }
 }
 
-void GatherOpenCLKernel::SetConstArgs() {
+int GatherOpenCLKernel::SetConstArgs() {
   auto input = GpuTensorInfo(in_tensors_.front());
   auto output = GpuTensorInfo(out_tensors_.front());
   int indices_num = in_tensors_.at(1)->ElementsNum();
@@ -90,10 +90,23 @@ void GatherOpenCLKernel::SetConstArgs() {
   cl_int4 dst_size = {static_cast<cl_int>(output.W), static_cast<cl_int>(output.H), static_cast<cl_int>(output.Slice),
                       static_cast<cl_int>(output.N)};
   int arg_cnt = 3;
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, indices_num);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt, axis_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, indices_num) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt, axis_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 void GatherOpenCLKernel::SetGlobalLocal() {
@@ -104,11 +117,11 @@ void GatherOpenCLKernel::SetGlobalLocal() {
 }
 
 int GatherOpenCLKernel::Prepare() {
-  std::string kernel_name = "gather";
+  const std::string kernel_name = "gather";
   if (in_tensors_.at(0)->shape().size() == 1 && axis_ == 0) {
     axis_ = 3;
   }
-  std::string program_name = "gather";
+  const std::string program_name = "gather";
   if (!ocl_runtime_->LoadSource(program_name, gather_source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -127,7 +140,10 @@ int GatherOpenCLKernel::Prepare() {
     }
   }
   SetGlobalLocal();
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
@@ -135,11 +151,21 @@ int GatherOpenCLKernel::Prepare() {
 int GatherOpenCLKernel::ConvertTensorToweight() {
   auto allocator = ocl_runtime_->GetAllocator();
   auto indices_tensor = in_tensors_.at(1);
-  allocator->MapBuffer(indices_tensor->data_c(), CL_MAP_WRITE, nullptr, true);
+  if (allocator->MapBuffer(indices_tensor->data_c(), CL_MAP_WRITE, nullptr, true) == nullptr) {
+    MS_LOG(ERROR) << "Map Buffer failed.";
+    return RET_ERROR;
+  }
   auto indices_num = indices_tensor->ElementsNum();
   indices_data_ =
     reinterpret_cast<int32_t *>(allocator->Malloc(sizeof(int32_t) * indices_num, lite::opencl::MemType::BUF));
-  allocator->MapBuffer(indices_data_, CL_MAP_WRITE, nullptr, true);
+  if (indices_data_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc failed.";
+    return RET_ERROR;
+  }
+  if (allocator->MapBuffer(indices_data_, CL_MAP_WRITE, nullptr, true) == nullptr) {
+    MS_LOG(ERROR) << "Map Buffer failed.";
+    return RET_ERROR;
+  }
   if (indices_data_ == nullptr) {
     MS_LOG(ERROR) << "Memory allocation failed";
     return RET_ERROR;
@@ -155,8 +181,14 @@ int GatherOpenCLKernel::ConvertTensorToweight() {
                   << " But Your type is :" << data_type;
     return RET_ERROR;
   }
-  allocator->UnmapBuffer(indices_data_);
-  allocator->UnmapBuffer(indices_tensor->data_c());
+  if (allocator->UnmapBuffer(indices_data_) != RET_OK) {
+    MS_LOG(ERROR) << "UnmapBuffer failed.";
+    return RET_ERROR;
+  }
+  if (allocator->UnmapBuffer(indices_tensor->data_c()) != RET_OK) {
+    MS_LOG(ERROR) << "UnmapBuffer failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
@@ -197,7 +229,10 @@ int GatherOpenCLKernel::PreProcess() {
   if (!InferShapeDone()) {
     auto indices_tensor = in_tensors_[1];
     if (!indices_tensor->IsConst()) {
-      ocl_runtime_->SyncCommandQueue();
+      if (!ocl_runtime_->SyncCommandQueue()) {
+        MS_LOG(ERROR) << "SyncCommandQueue failed.";
+        return RET_ERROR;
+      }
       indices_tensor->MutableData();
     }
   }
@@ -209,10 +244,22 @@ int GatherOpenCLKernel::Run() {
   if (intensor1_is_tensor) {
     ConvertTensorToweight();
   }
-  ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_.front()->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_.front()->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, 2, indices_data_, lite::opencl::MemType::BUF);
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_.front()->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_.front()->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, 2, indices_data_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h
index 5ec2047f2d0..78f3e2d531b 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h
@@ -34,7 +34,7 @@ class GatherOpenCLKernel : public OpenCLKernel {
   int PreProcess() override;
 
   int CheckSpecs() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
   int Tune() override { return lite::RET_OK; }
   int ConvertTensorToweight();
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.cc
index b803bae593e..74504b8e983 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.cc
@@ -98,6 +98,10 @@ int ArithmeticInt8OpenCLKernel::InitWeights() {
       size_t dtype = fp16_enable ? CL_HALF_FLOAT : CL_FLOAT;
       ImageSize img_size{in_shape.width, in_shape.height, dtype};
       auto weight_ptr_ = allocator->Malloc(img_size, weight.data());
+      if (weight_ptr_ == nullptr) {
+        MS_LOG(ERROR) << "Malloc failed.";
+        return RET_ERROR;
+      }
       weight_ptrs_.push_back(weight_ptr_);
     } else {
       weight_ptrs_.push_back(nullptr);
@@ -106,7 +110,7 @@ int ArithmeticInt8OpenCLKernel::InitWeights() {
   return RET_OK;
 }
 
-void ArithmeticInt8OpenCLKernel::SetConstArgs() {
+int ArithmeticInt8OpenCLKernel::SetConstArgs() {
   int arg_idx = 3;
   if (!element_flag_) {
     cl_int4 in0_shape = {static_cast<int>(in0_shape_.N), static_cast<int>(in0_shape_.H), static_cast<int>(in0_shape_.W),
@@ -121,16 +125,37 @@ void ArithmeticInt8OpenCLKernel::SetConstArgs() {
     } else if (in0_shape_.C != 1 && in1_shape_.C == 1) {
       broadcastC_flag = 2;  // BroadCast C4 in input1
     }
-    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in0_shape);
-    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in1_shape);
-    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape);
-    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, broadcastC_flag);
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in0_shape) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in1_shape) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, broadcastC_flag) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   } else {
     cl_int2 output_shape{static_cast<int>(global_range_[0]), static_cast<int>(global_range_[1])};
-    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape);
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_min_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_max_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
   }
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_min_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_max_);
 
   // set quantization parameter.
   auto input0_quant_param = in_tensors_[0]->quant_params().front();
@@ -141,8 +166,15 @@ void ArithmeticInt8OpenCLKernel::SetConstArgs() {
   cl_char4 zero_point = {static_cast<int8_t>(input0_quant_param.zeroPoint),
                          static_cast<int8_t>(input1_quant_param.zeroPoint),
                          static_cast<int8_t>(output_quant_param.zeroPoint), 0};
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale);       // scale
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, zero_point);  // zero_point
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }  // scale
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, zero_point) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }  // zero_point
+  return RET_OK;
 }
 
 int ArithmeticInt8OpenCLKernel::Prepare() {
@@ -191,7 +223,7 @@ int ArithmeticInt8OpenCLKernel::Prepare() {
     activation_max_ = 6.f;
   }
 
-  std::string program_name = "Arithmetic";
+  const std::string program_name = "Arithmetic";
   std::string source = arithmetic_source;
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
@@ -207,7 +239,10 @@ int ArithmeticInt8OpenCLKernel::Prepare() {
   if (type() != PrimitiveType_BiasAdd) {
     InitWeights();
   }
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   MS_LOG(DEBUG) << kernel_name_ << " Init Done!";
   return RET_OK;
 }
@@ -218,10 +253,22 @@ int ArithmeticInt8OpenCLKernel::Run() {
   auto input_1_ptr = weight_ptrs_[1] == nullptr ? in_tensors_[1]->data_c() : weight_ptrs_[1];
   int arg_idx = 0;
 
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr);
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr);
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.h
index 667ea8f4763..3f8feb78749 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.h
@@ -33,7 +33,7 @@ class ArithmeticInt8OpenCLKernel : public OpenCLKernel {
   int Prepare() override;
   int CheckSpecs() override;
   int InitWeights() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
 
  private:
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.cc
index 08f552c8d34..ea3599de657 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.cc
@@ -67,15 +67,31 @@ void LayerNormGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t
   local->push_back(z);
 }
 
-void LayerNormOpenCLKernel::SetConstArgs() {
+int LayerNormOpenCLKernel::SetConstArgs() {
   int arg_cn = 6;
   GpuTensorInfo img_info(in_tensors_.at(0));
   in_shape_.s[0] = img_info.N, in_shape_.s[1] = img_info.H, in_shape_.s[2] = img_info.W, in_shape_.s[3] = img_info.C;
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, epsilon_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, normalized_axis_);
-  ocl_runtime_->SetKernelArg(kernel_mean_var_, 3, in_shape_);
-  ocl_runtime_->SetKernelArg(kernel_mean_var_, 4, normalized_shape_size_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, epsilon_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, normalized_axis_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_mean_var_, 3, in_shape_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_mean_var_, 4, normalized_shape_size_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 void AlignMeanVarGlobalLocal(const std::vector<int> &global, const std::vector<int> &local, cl::NDRange *global_range,
@@ -106,9 +122,23 @@ int LayerNormOpenCLKernel::Initweight() {
   size_t weight_size = img_info.Image2DSize;
   // allocated memory for weight and init value
   gamma_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
+  if (gamma_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc failed.";
+    return RET_ERROR;
+  }
   beta_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
-  allocator->MapBuffer(gamma_, CL_MAP_WRITE, nullptr, true);
-  allocator->MapBuffer(beta_, CL_MAP_WRITE, nullptr, true);
+  if (beta_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc failed.";
+    return RET_ERROR;
+  }
+  if (allocator->MapBuffer(gamma_, CL_MAP_WRITE, nullptr, true) == nullptr) {
+    MS_LOG(ERROR) << "Map Buffer failed.";
+    return RET_ERROR;
+  }
+  if (allocator->MapBuffer(beta_, CL_MAP_WRITE, nullptr, true) == nullptr) {
+    MS_LOG(ERROR) << "Map Buffer failed.";
+    return RET_ERROR;
+  }
   memset(gamma_, 0x01, weight_size);
   memset(beta_, 0x00, weight_size);
 
@@ -143,8 +173,14 @@ int LayerNormOpenCLKernel::Initweight() {
       memcpy(beta_, in_tensors_.at(2)->data_c(), weight_size);
     }
   }
-  allocator->UnmapBuffer(gamma_);
-  allocator->UnmapBuffer(beta_);
+  if (allocator->UnmapBuffer(gamma_) != RET_OK) {
+    MS_LOG(ERROR) << "UnmapBuffer failed.";
+    return RET_ERROR;
+  }
+  if (allocator->UnmapBuffer(beta_) != RET_OK) {
+    MS_LOG(ERROR) << "UnmapBuffer failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
@@ -164,11 +200,19 @@ int LayerNormOpenCLKernel::Prepare() {
   size_t size_dtype = use_fp16_enable_ ? sizeof(float16_t) : sizeof(float);
   mean_size *= size_dtype;
   mean_ = allocator->Malloc(mean_size, lite::opencl::MemType::BUF);
+  if (mean_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc failed.";
+    return RET_ERROR;
+  }
   var_ = allocator->Malloc(mean_size, lite::opencl::MemType::BUF);
-  std::string kernel_name = "LayerNormalization_NHWC4";
+  if (var_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc failed.";
+    return RET_ERROR;
+  }
+  const std::string kernel_name = "LayerNormalization_NHWC4";
   std::string kernel_name_mean_var = "ComputeMeanVar";
   std::string source = layer_norm_source;
-  std::string program_name = "LayerNormalization";
+  const std::string program_name = "LayerNormalization";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -182,7 +226,10 @@ int LayerNormOpenCLKernel::Prepare() {
   kernel_name_mean_var += "Axis" + std::to_string(normalized_axis_) + "NHWC4";
   ocl_runtime_->BuildKernel(kernel_mean_var_, program_name, kernel_name_mean_var, build_options_ext);
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   SetGlobalLocal();
 
   return RET_OK;
@@ -191,21 +238,48 @@ int LayerNormOpenCLKernel::Prepare() {
 int LayerNormOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running! ";
   int arg1_cn = 0;
-  ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, in_tensors_.at(0)->data_c());        // input tensor
-  ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, mean_, lite::opencl::MemType::BUF);  // mean_
-  ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, var_, lite::opencl::MemType::BUF);   // var_  return RET_OK;
+  if (ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }  // input tensor
+  if (ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, mean_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, var_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
   ocl_runtime_->RunKernel(kernel_mean_var_, global_mean_var_, local_mean_var_, nullptr, &event_);
 
   int arg_cn = 0;
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c());         // input tensor
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c());        // out tensor
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, mean_, lite::opencl::MemType::BUF);   // mean_
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, var_, lite::opencl::MemType::BUF);    // var_
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, gamma_, lite::opencl::MemType::BUF);  // gamma_
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, beta_, lite::opencl::MemType::BUF);   // beta_
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }  // input tensor
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }  // out tensor
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, mean_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }  // mean_
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, var_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }  // var_
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, gamma_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }  // gamma_
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, beta_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }  // beta_
   ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
-}
+}  // namespace mindspore::kernel
 
 REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_LayerNormFusion, OpenCLKernelCreator<LayerNormOpenCLKernel>)
 REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_LayerNormFusion, OpenCLKernelCreator<LayerNormOpenCLKernel>)
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.h
index 67f40e01ad0..ca432abca14 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.h
@@ -31,7 +31,7 @@ class LayerNormOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
 
   int CheckSpecs() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
 
  private:
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
index 3815743c0c4..dc5b5b6cd51 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc
@@ -84,7 +84,7 @@ int MatMulOpenCLKernel::Prepare() {
   std::map<int, std::string> dims2str = {{2, "_2d"}, {3, "_4d"}, {4, "_4d"}};
   kernel_name += dims2str[dims];
   std::string source = matmul_source;
-  std::string program_name = "MatMul";
+  const std::string program_name = "MatMul";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -95,13 +95,16 @@ int MatMulOpenCLKernel::Prepare() {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   SetGlobalLocal();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
 
-void MatMulOpenCLKernel::PadWeight(std::vector<int> weight_shape_4d, int ci, int co) {
+int MatMulOpenCLKernel::PadWeight(std::vector<int> weight_shape_4d, int ci, int co) {
   auto allocator = ocl_runtime_->GetAllocator();
   int a = weight_shape_4d[0];
   int b = weight_shape_4d[1];
@@ -109,7 +112,15 @@ void MatMulOpenCLKernel::PadWeight(std::vector<int> weight_shape_4d, int ci, int
   int co4 = UP_DIV(co, C4NUM);
   size_t dtype_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float);
   padWeight_ = allocator->Malloc(a * b * ci4 * co4 * C4NUM * C4NUM * dtype_size, lite::opencl::MemType::BUF);
+  if (padWeight_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc failed.";
+    return RET_ERROR;
+  }
   padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true);
+  if (padWeight_ == nullptr) {
+    MS_LOG(ERROR) << "Map Buffer failed.";
+    return RET_ERROR;
+  }
   auto padWeightFp32 = reinterpret_cast<float *>(padWeight_);
   auto padWeightFp16 = reinterpret_cast<float16_t *>(padWeight_);
   memset(padWeight_, 0x00, a * b * ci4 * co4 * C4NUM * C4NUM * dtype_size);
@@ -157,6 +168,7 @@ void MatMulOpenCLKernel::PadWeight(std::vector<int> weight_shape_4d, int ci, int
       }
     }
   }
+  return RET_OK;
 }
 
 int MatMulOpenCLKernel::InitWeights() {
@@ -185,7 +197,10 @@ int MatMulOpenCLKernel::InitWeights() {
 
   PadWeight(weight_shape_4d, ci, CO_);
 
-  allocator->UnmapBuffer(padWeight_);
+  if (allocator->UnmapBuffer(padWeight_) != RET_OK) {
+    MS_LOG(ERROR) << "UnmapBuffer failed.";
+    return RET_ERROR;
+  }
   FreeStoredData(stored_weight_);
   return InitBias();
 }
@@ -204,7 +219,15 @@ int MatMulOpenCLKernel::InitBias() {
   }
   lite::opencl::ImageSize img_size{im_dst_x, im_dst_y, img_dtype};
   bias_ = allocator->Malloc(img_size);
+  if (bias_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc failed.";
+    return RET_ERROR;
+  }
   bias_ = allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true);
+  if (bias_ == nullptr) {
+    MS_LOG(ERROR) << "Map Buffer failed.";
+    return RET_ERROR;
+  }
   memset(bias_, 0x00, co4 * C4NUM * dtype_size);
   if (in_tensors_.size() == INPUT_TENSOR_SIZE_3) {
     void *src_data = stored_bias_ == nullptr ? in_tensors_.at(kBiasIndex)->data_c() : stored_bias_;
@@ -220,7 +243,10 @@ int MatMulOpenCLKernel::InitBias() {
       memcpy(bias_, src_data, CO_ * dtype_size);
     }
   }
-  allocator->UnmapBuffer(bias_);
+  if (allocator->UnmapBuffer(bias_) != RET_OK) {
+    MS_LOG(ERROR) << "UnmapBuffer failed.";
+    return RET_ERROR;
+  }
   FreeStoredData(stored_bias_);
   return RET_OK;
 }
@@ -235,29 +261,54 @@ void MatMulOpenCLKernel::SetGlobalLocal() {
   AlignGlobalLocal(global_size_, local_size_);
 }
 
-void MatMulOpenCLKernel::SetConstArgs() {
+int MatMulOpenCLKernel::SetConstArgs() {
   int arg_count = 2;
   cl_int4 in_shape = {inShape[0], inShape[1], inShape[2], inShape[3]};
   cl_int4 out_shape = {outShape[0], outShape[1], outShape[2], outShape[3]};
   if (act_weight_) {
     arg_count++;
   } else {
-    ocl_runtime_->SetKernelArg(kernel_, arg_count++, padWeight_, lite::opencl::MemType::BUF);
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, padWeight_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   }
-  ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_shape);
-  ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_shape);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 int MatMulOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
   int arg_count = 0;
-  ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c());
-  if (act_weight_) {
-    ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[1]->data_c());
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (act_weight_) {
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[1]->data_c()) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
   }
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h
index 54aee868ba4..02c62986c18 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h
@@ -32,7 +32,7 @@ class MatMulOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
   int CheckSpecs() override;
   int InitWeights() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
   int Tune() override { return lite::RET_OK; }
   int InitBias();
@@ -54,7 +54,7 @@ class MatMulOpenCLKernel : public OpenCLKernel {
   std::vector<int> outShape{std::vector<int>(MAX_DIMS, 1)};
 
  private:
-  void PadWeight(std::vector<int> weight_shape_4d, int ci, int co);
+  int PadWeight(std::vector<int> weight_shape_4d, int ci, int co);
 };
 }  // namespace mindspore::kernel
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc
index f6f231c1605..fe128cf5c49 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc
@@ -48,7 +48,7 @@ int OneHotOpenCLKernel::Prepare() {
     kernel_name += "Axis" + std::to_string(axis_);
   }
   std::string source = one_hot_source;
-  std::string program_name = "OneHot";
+  const std::string program_name = "OneHot";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -65,7 +65,10 @@ int OneHotOpenCLKernel::Prepare() {
     return ret;
   }
   InitWeights();
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   SetGlobalLocal();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
@@ -87,18 +90,40 @@ int OneHotOpenCLKernel::InitWeights() {
   return RET_OK;
 }
 
-void OneHotOpenCLKernel::SetConstArgs() {
+int OneHotOpenCLKernel::SetConstArgs() {
   cl_int2 cl_in_image2d_shape = {static_cast<cl_int>(in_shape_.width), static_cast<cl_int>(in_shape_.height)};
   cl_int4 cl_out_shape = {static_cast<cl_int>(out_shape_.N), static_cast<cl_int>(out_shape_.H),
                           static_cast<cl_int>(out_shape_.W), static_cast<cl_int>(out_shape_.Slice)};
   int arg_idx = 2;
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_image2d_shape);
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape);
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, depth_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, on_value_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, off_value_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast<int>(out_shape_.C));
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx, static_cast<int>(param_->support_neg_index_));
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_image2d_shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, depth_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, on_value_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, off_value_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast<int>(out_shape_.C)) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, static_cast<int>(param_->support_neg_index_)) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 void OneHotOpenCLKernel::SetGlobalLocal() {
   local_size_ = {};
@@ -108,9 +133,18 @@ void OneHotOpenCLKernel::SetGlobalLocal() {
 
 int OneHotOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
-  ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c());
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h
index 7efcc4e556f..add5beaf7bd 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h
@@ -33,7 +33,7 @@ class OneHotOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
   int InitWeights() override;
   int CheckSpecs() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
 
  private:
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc
index fee30266b16..3cd6fdd054f 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc
@@ -81,11 +81,14 @@ int PadOpenCLKernel::Prepare() {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
-void PadOpenCLKernel::SetConstArgs() {
+int PadOpenCLKernel::SetConstArgs() {
   auto input = GpuTensorInfo(in_tensors_.front());
   auto output = GpuTensorInfo(out_tensors_.front());
   cl_int4 input_shape = {static_cast<cl_int>(input.N), static_cast<cl_int>(input.H), static_cast<cl_int>(input.W),
@@ -105,20 +108,45 @@ void PadOpenCLKernel::SetConstArgs() {
   Broadcast2GpuShape(pad_before.s, pad_before_ori.data(), ndim, 0);
 
   int arg_cn = 2;
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, pad_before);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn, param_->constant_value_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, pad_before) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn, param_->constant_value_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
   local_size_ = {8, 4, 1};
   global_size_ = {output.N * output.H, output.W, output.Slice};
   AlignGlobalLocal(global_size_, local_size_);
+  return RET_OK;
 }
 
 int PadOpenCLKernel::Run() {
-  ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c());
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h
index 4464241d1d6..3752982727d 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h
@@ -35,7 +35,7 @@ class PadOpenCLKernel : public OpenCLKernel {
   int CheckSpecs() override;
 
   int Prepare() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
 
   int Run() override;
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc
index 668863226b8..9f1fd5c8763 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc
@@ -53,18 +53,25 @@ int PoolingOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-int PoolingOpenCLKernel::Prepare() {
+int PoolingOpenCLKernel::BuildKernel() {
   std::string kernel_name;
   if (parameter_->pool_mode_ == PoolMode_MaxPool) {
     kernel_name = "MaxPooling2d";
   } else if (parameter_->pool_mode_ == PoolMode_AvgPool) {
     kernel_name = "AvgPooling2d";
   }
+
+  if (parameter_->global_ &&
+      (parameter_->window_h_ >= LOCAL_CACHE_THREAD || parameter_->window_w_ >= LOCAL_CACHE_THREAD)) {
+    kernel_name += "_global";
+    is_use_local_ = true;
+  }
+  auto build_options_ext = CreateBuildOptionsExtByDType(this->registry_data_type_);
   switch (parameter_->act_type_) {
     case ActType_No:
       break;
     case ActType_Relu:
-      kernel_name += "_ReLU";
+      build_options_ext.emplace_back("-DRELU");
       break;
     default:
       MS_LOG(ERROR) << "Unsupported activation type " << parameter_->act_type_;
@@ -73,34 +80,49 @@ int PoolingOpenCLKernel::Prepare() {
   kernel_name += "_NHWC4";
   kernel_name += "_IMG";
   std::string source = pooling2d_source;
-  std::string program_name = "Pooling2d";
+  const std::string program_name = "Pooling2d";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
   }
-  auto build_options_ext = CreateBuildOptionsExtByDType(this->registry_data_type_);
   auto ret = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext);
   if (ret != RET_OK) {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  SetConstArgs();
-  SetGlobalLocal();
-  MS_LOG(DEBUG) << kernel_name << " Init Done!";
+  return RET_OK;
+}
 
+int PoolingOpenCLKernel::Prepare() {
+  input_tensor_ = GpuTensorInfo(in_tensors_[0]);
+  if (BuildKernel() != RET_OK) {
+    MS_LOG(ERROR) << "BuildKernel failed.";
+    return RET_ERROR;
+  }
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
+  SetGlobalLocal();
   return RET_OK;
 }
 
 void PoolingOpenCLKernel::SetGlobalLocal() {
-  const size_t global_x = out_tensors_[0]->shape()[1] * out_tensors_[0]->shape()[0];
-  const size_t global_y = out_tensors_[0]->shape()[2];
-  const size_t global_z = UP_DIV(out_tensors_[0]->shape()[3], C4NUM);
-  global_size_ = {global_z, global_y, global_x};
-  local_size_ = {};
-  AlignGlobalLocal(global_size_, local_size_);
+  if (is_use_local_) {
+    local_size_ = {1, LOCAL_CACHE_THREAD, LOCAL_CACHE_THREAD};
+    global_size_ = {static_cast<size_t>(input_tensor_.Slice), 1, 1};
+    AlignGlobalLocal(global_size_, local_size_);
+  } else {
+    const size_t global_x = out_tensors_[0]->shape()[1] * out_tensors_[0]->shape()[0];
+    const size_t global_y = out_tensors_[0]->shape()[2];
+    const size_t global_z = UP_DIV(out_tensors_[0]->shape()[3], C4NUM);
+    global_size_ = {global_z, global_y, global_x};
+    local_size_ = {};
+    AlignGlobalLocal(global_size_, local_size_);
+  }
 }
 
-void PoolingOpenCLKernel::SetConstArgs() {
+int PoolingOpenCLKernel::SetGlobalConstArgs() {
   int slices = UP_DIV(out_tensors_[0]->shape()[3], C4NUM);
   cl_int4 input_shape = {in_tensors_[0]->shape()[0], in_tensors_[0]->shape()[1], in_tensors_[0]->shape()[2], slices};
   cl_int4 output_shape = {out_tensors_[0]->shape()[0], out_tensors_[0]->shape()[1], out_tensors_[0]->shape()[2],
@@ -109,19 +131,73 @@ void PoolingOpenCLKernel::SetConstArgs() {
   cl_int2 kernel_size = {parameter_->window_h_, parameter_->window_w_};
   cl_int2 padding = {parameter_->pad_u_, parameter_->pad_l_};
   int arg_idx = 2;
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape);
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape);
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, stride);
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, kernel_size);
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, padding);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, stride) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, kernel_size) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, padding) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int PoolingOpenCLKernel::SetLocalConstArgs() {
+  int h = input_tensor_.H;
+  int w = input_tensor_.W;
+  int c = input_tensor_.C;
+  int c4 = UP_DIV(c, C4NUM);
+  cl_int4 size = {h, w, c4, c};
+  int arg_idx = 2;
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int PoolingOpenCLKernel::SetConstArgs() {
+  if (is_use_local_) {
+    return SetLocalConstArgs();
+  } else {
+    return SetGlobalConstArgs();
+  }
+}
+
+int PoolingOpenCLKernel::Tune() {
+  if (is_use_local_) {
+    return RET_OK;
+  }
+  return OpenCLKernel::Tune();
 }
 
 int PoolingOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
   int arg_idx = 0;
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h
index e47b34b1bf0..1bc0cb86440 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h
@@ -32,11 +32,20 @@ class PoolingOpenCLKernel : public OpenCLKernel {
   int Run() override;
   int Prepare() override;
   int CheckSpecs() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
+  int Tune() override;
+
+ private:
+  int BuildKernel();
+  int SetGlobalConstArgs();
+  int SetLocalConstArgs();
 
  private:
   PoolingParameter *parameter_;
+  bool is_use_local_ = false;
+  static const size_t LOCAL_CACHE_THREAD{16};
+  GpuTensorInfo input_tensor_;
 };
 
 }  // namespace mindspore::kernel
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/power.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/power.cc
index 817c6aaeeaf..b9d8890fb5c 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/power.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/power.cc
@@ -63,15 +63,21 @@ void PowerGetWorkGroup(const std::vector<size_t> &global, std::vector<size_t> *l
   local->push_back(z);
 }
 
-void PowerOpenCLKernel::SetConstArgs() {
+int PowerOpenCLKernel::SetConstArgs() {
   float unalign_w = static_cast<float>(out_shape_.s[3]);
   out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM);
   int arg_cn = 2;
   if (!broadcast_) {
     arg_cn++;
-    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_);
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   } else {
-    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_);
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   }
   if (use_fp16_enable_) {
     auto x = static_cast<float16_t>(power_);
@@ -80,11 +86,18 @@ void PowerOpenCLKernel::SetConstArgs() {
     auto w = static_cast<float16_t>(unalign_w);
     cl_half4 parameter = {*(reinterpret_cast<uint16_t *>(&x)), *(reinterpret_cast<uint16_t *>(&y)),
                           *(reinterpret_cast<uint16_t *>(&z)), *(reinterpret_cast<uint16_t *>(&w))};
-    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, parameter);
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, parameter) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   } else {
     cl_float4 parameter = {power_, shift_, scale_, unalign_w};
-    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, parameter);
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, parameter) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   }
+  return RET_OK;
 }
 
 void PowerOpenCLKernel::SetGlobalLocal() {
@@ -111,7 +124,7 @@ int PowerOpenCLKernel::Prepare() {
   auto param = reinterpret_cast<PowerParameter *>(this->op_parameter_);
   std::string kernel_name = "power";
   std::string source = power_source;
-  std::string program_name = "power";
+  const std::string program_name = "power";
   if (broadcast_) {
     power_ = param->power_;
     kernel_name += "_broadcast";
@@ -130,7 +143,10 @@ int PowerOpenCLKernel::Prepare() {
   }
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   SetGlobalLocal();
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
@@ -138,13 +154,28 @@ int PowerOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running! ";
   int arg_cn = 0;
   if (broadcast_) {
-    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c());
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   } else {
-    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c());
-    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(1)->data_c());
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(1)->data_c()) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
   }
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c());
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/power.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/power.h
index 71934bd7b92..ea36486b0a5 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/power.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/power.h
@@ -30,7 +30,7 @@ class PowerOpenCLKernel : public OpenCLKernel {
 
   int Prepare() override;
   int CheckSpecs() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
   int Run() override;
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc
index 9e7f08a1510..2784f06b708 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc
@@ -46,7 +46,14 @@ int PReluOpenCLKernel::InitWeights() {
     auto sizeof_FLT = enable_fp16_ ? sizeof(float16_t) : sizeof(float);
     size_t weight_size = UP_ROUND(C_, C4NUM) * sizeof_FLT;
     weight_vector_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
-    allocator->MapBuffer(weight_vector_, CL_MAP_WRITE, nullptr, true);
+    if (weight_vector_ == nullptr) {
+      MS_LOG(ERROR) << "Malloc failed.";
+      return RET_ERROR;
+    }
+    if (allocator->MapBuffer(weight_vector_, CL_MAP_WRITE, nullptr, true) == nullptr) {
+      MS_LOG(ERROR) << "Map Buffer failed.";
+      return RET_ERROR;
+    }
     memset(weight_vector_, 0x00, weight_size);
     if (weight_tensor->data_type() == kNumberTypeFloat16) {
       if (enable_fp16_) {
@@ -69,7 +76,10 @@ int PReluOpenCLKernel::InitWeights() {
         memcpy(weight_vector_, weight_tensor->data_c(), C_ * sizeof_FLT);
       }
     }
-    allocator->UnmapBuffer(weight_vector_);
+    if (allocator->UnmapBuffer(weight_vector_) != RET_OK) {
+      MS_LOG(ERROR) << "UnmapBuffer failed.";
+      return RET_ERROR;
+    }
   }
   return RET_OK;
 }
@@ -95,11 +105,18 @@ int PReluOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-void PReluOpenCLKernel::SetConstArgs() {
+int PReluOpenCLKernel::SetConstArgs() {
   int arg_idx = 3;
   out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM);
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, 2);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, 2) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 void PReluOpenCLKernel::SetGlobalLocal() {
@@ -126,8 +143,8 @@ int PReluOpenCLKernel::Prepare() {
   weight_is_scalar = param->channelShared;
   enable_fp16_ = ocl_runtime_->GetFp16Enable();
   std::string source = prelu_source;
-  std::string program_name = "PRelu";
-  std::string kernel_name = "PRelu_" + std::string(weight_is_scalar ? "scalar" : "vector");
+  const std::string program_name = "PRelu";
+  const std::string kernel_name = "PRelu_" + std::string(weight_is_scalar ? "scalar" : "vector");
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -141,7 +158,10 @@ int PReluOpenCLKernel::Prepare() {
   InitWeights();
   MS_LOG(DEBUG) << program_name << " init Done!";
   MS_LOG(DEBUG) << "kernel_name=: " << kernel_name << " init Done!";
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   SetGlobalLocal();
   return RET_OK;
 }
@@ -149,12 +169,24 @@ int PReluOpenCLKernel::Prepare() {
 int PReluOpenCLKernel::Run() {
   MS_LOG(DEBUG) << op_parameter_->name_ << " Running!";
   int arg_idx = 0;
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
   if (weight_is_scalar) {
-    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_scalar_);
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_scalar_) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   } else {
-    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_vector_, lite::opencl::MemType::BUF);
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_vector_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   }
   auto ret = ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   if (ret != mindspore::lite::RET_OK) {
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h
index 739149eee49..b6e6d3de247 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h
@@ -31,7 +31,7 @@ class PReluOpenCLKernel : public OpenCLKernel {
 
   int Prepare() override;
   int CheckSpecs() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
   int Run() override;
   int InitWeights() override;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc
index 237820dc37f..4186f6911c7 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc
@@ -17,6 +17,7 @@
 #include <set>
 #include <string>
 #include <map>
+#include <algorithm>
 #include "include/errorcode.h"
 #include "src/kernel_registry.h"
 #include "src/runtime/kernel/opencl/kernel/reduce.h"
@@ -179,7 +180,7 @@ int ReduceOpenCLKernel::Prepare() {
   }
   kernel_name += GetReduceTypeStr(reduce_param->mode_);
   std::string source = reduce_source;
-  std::string program_name = "Reduce";
+  const std::string program_name = "Reduce";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -190,22 +191,32 @@ int ReduceOpenCLKernel::Prepare() {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   SetGlobalLocal();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
-void ReduceOpenCLKernel::SetConstArgs() {
+int ReduceOpenCLKernel::SetConstArgs() {
   int h = inShape.H;
   int w = inShape.W;
   int c = inShape.C;
   int c4 = UP_DIV(c, C4NUM);
   cl_int4 size = {h, w, c4, c};
   int arg_idx = 2;
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size);
-  if (wc_reduce_ || c_reduce_) {
-    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, GenC4Mask());
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
   }
+  if (wc_reduce_ || c_reduce_) {
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, GenC4Mask()) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
+  }
+  return RET_OK;
 }
 void ReduceOpenCLKernel::SetGlobalLocal() {
   int h = inShape.H;
@@ -235,9 +246,18 @@ int ReduceOpenCLKernel::Tune() {
 int ReduceOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
   int arg_idx = 0;
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h
index 2d359a19ee7..ae70347aaa0 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h
@@ -32,7 +32,7 @@ class ReduceOpenCLKernel : public OpenCLKernel {
   int Run() override;
   int Prepare() override;
   int CheckSpecs() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
   int Tune() override;
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
index 79116366827..b343ecc5ed2 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc
@@ -53,15 +53,22 @@ int ReshapeOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-void ReshapeOpenCLKernel::SetConstArgs() {
+int ReshapeOpenCLKernel::SetConstArgs() {
   auto in = GpuTensorInfo(in_tensors_.front());
   auto out = GpuTensorInfo(out_tensors_.front());
   cl_int4 src_size = {cl_int(in.C), cl_int(in.W), cl_int(in.H), cl_int(in.N)};
   cl_int4 dst_size = {cl_int(out.width), cl_int(out.height), cl_int(out.C), cl_int(out.C * out.W)};
 
   int arg_idx = 2;
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, src_size);
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, dst_size);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, src_size) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, dst_size) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 void ReshapeOpenCLKernel::SetGlobalLocal() {
@@ -72,9 +79,9 @@ void ReshapeOpenCLKernel::SetGlobalLocal() {
 }
 
 int ReshapeOpenCLKernel::Prepare() {
-  std::string kernel_name = "reshape_NHWC4";
+  const std::string kernel_name = "reshape_NHWC4";
   std::string source = reshape_source;
-  std::string program_name = "reshape";
+  const std::string program_name = "reshape";
   auto build_options_ext = CreateBuildOptionsExtByDType(this->registry_data_type_);
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
@@ -87,16 +94,28 @@ int ReshapeOpenCLKernel::Prepare() {
   }
 
   SetGlobalLocal();
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
 
 int ReshapeOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
-  ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c());
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
@@ -104,7 +123,10 @@ int ReshapeOpenCLKernel::PreProcess() {
   if (type() == PrimitiveType_Reshape && !InferShapeDone()) {
     auto shape_tensor = in_tensors_[1];
     if (!shape_tensor->IsConst()) {
-      ocl_runtime_->SyncCommandQueue();
+      if (!ocl_runtime_->SyncCommandQueue()) {
+        MS_LOG(ERROR) << "SyncCommandQueue failed.";
+        return RET_ERROR;
+      }
       shape_tensor->MutableData();
     }
   }
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h
index 149e50ab96c..7b9025b5866 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h
@@ -30,7 +30,7 @@ class ReshapeOpenCLKernel : public OpenCLKernel {
   int Run() override;
   int Prepare() override;
   int CheckSpecs() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
   int PreProcess() override;
 };
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc
index 8d4156db470..cf91a167f4f 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc
@@ -64,7 +64,7 @@ int ResizeOpenCLKernel::Prepare() {
   }
   kernel_name += "_NHWC4";
   std::string source = resize_source;
-  std::string program_name = "Resize";
+  const std::string program_name = "Resize";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -75,7 +75,10 @@ int ResizeOpenCLKernel::Prepare() {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   SetGlobalLocal();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
@@ -87,7 +90,7 @@ float ResizeOpenCLKernel::getResizeScaleFactor(int input_size, int output_size)
            : static_cast<float>(input_size) / static_cast<float>(output_size);
 }
 
-void ResizeOpenCLKernel::SetConstArgs() {
+int ResizeOpenCLKernel::SetConstArgs() {
   auto in_shape = in_tensors_[0]->shape();
   auto out_shape = out_tensors_[0]->shape();
   int n = out_shape[0];
@@ -101,9 +104,19 @@ void ResizeOpenCLKernel::SetConstArgs() {
   cl_int4 out_size = {n, h, w, c4};
   cl_float2 scale = {scale_h, scale_w};
   int arg_idx = 2;
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_size);
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_size);
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_size) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_size) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 void ResizeOpenCLKernel::SetGlobalLocal() {
@@ -116,9 +129,18 @@ void ResizeOpenCLKernel::SetGlobalLocal() {
 int ResizeOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
   int arg_idx = 0;
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
@@ -126,7 +148,10 @@ int ResizeOpenCLKernel::PreProcess() {
   if (type() == PrimitiveType_Resize && !InferShapeDone() && in_tensors_.size() == INPUT_TENSOR_SIZE_2) {
     auto shape_tensor = in_tensors_[1];
     if (!shape_tensor->IsConst()) {
-      ocl_runtime_->SyncCommandQueue();
+      if (!ocl_runtime_->SyncCommandQueue()) {
+        MS_LOG(ERROR) << "SyncCommandQueue failed.";
+        return RET_ERROR;
+      }
       shape_tensor->MutableData();
     }
   }
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.h
index 38b5eee6d9e..ea73e0b10a7 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.h
@@ -31,7 +31,7 @@ class ResizeOpenCLKernel : public OpenCLKernel {
   int Run() override;
   int Prepare() override;
   int CheckSpecs() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
   int PreProcess() override;
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc
index f298fff5958..14c83e0a780 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc
@@ -98,14 +98,30 @@ int ScaleOpenCLKernel::InitWeights() {
     img_size.height = 1;
     img_size.width = UP_DIV(scale_tensor->shape()[0], C4NUM);
     scale_ptr_ = allocator->Malloc(img_size, scale_tensor->data_c());
+    if (scale_ptr_ == nullptr) {
+      MS_LOG(ERROR) << "Malloc failed.";
+      return RET_ERROR;
+    }
     offset_ptr_ = allocator->Malloc(img_size, offset_tensor->data_c());
+    if (offset_ptr_ == nullptr) {
+      MS_LOG(ERROR) << "Malloc failed.";
+      return RET_ERROR;
+    }
     return RET_OK;
   }
 
   if (in_tensor->format() == scale_tensor->format()) {
     if (in_tensor->data_type() == scale_tensor->data_type()) {
       scale_ptr_ = allocator->Malloc(img_size, scale_tensor->data_c());
+      if (scale_ptr_ == nullptr) {
+        MS_LOG(ERROR) << "Malloc failed.";
+        return RET_ERROR;
+      }
       offset_ptr_ = allocator->Malloc(img_size, offset_tensor->data_c());
+      if (offset_ptr_ == nullptr) {
+        MS_LOG(ERROR) << "Malloc failed.";
+        return RET_ERROR;
+      }
     } else {
       MS_LOG(ERROR) << "Unsupported data type transpose from " << scale_tensor->data_type() << "to "
                     << in_tensor->data_type();
@@ -121,7 +137,15 @@ int ScaleOpenCLKernel::InitWeights() {
       PackNHWCToNHWC4(scale_tensor->data_c(), scale.data(), src_is_fp16, fp16_enable, image2d_info);
       PackNHWCToNHWC4(offset_tensor->data_c(), offset.data(), src_is_fp16, fp16_enable, image2d_info);
       scale_ptr_ = allocator->Malloc(img_size, scale.data());
+      if (scale_ptr_ == nullptr) {
+        MS_LOG(ERROR) << "Malloc failed.";
+        return RET_ERROR;
+      }
       offset_ptr_ = allocator->Malloc(img_size, offset.data());
+      if (offset_ptr_ == nullptr) {
+        MS_LOG(ERROR) << "Malloc failed.";
+        return RET_ERROR;
+      }
     } else {
       MS_LOG(ERROR) << "Unsupported data type transpose from " << scale_tensor->data_type() << "to "
                     << in_tensor->data_type();
@@ -175,7 +199,7 @@ int ScaleOpenCLKernel::Prepare() {
   } else {
     kernel_name += "_BUF";
   }
-  std::string program_name = "Scale";
+  const std::string program_name = "Scale";
   std::string source = GetActDefines() + scale_source;
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
@@ -193,44 +217,86 @@ int ScaleOpenCLKernel::Prepare() {
   return RET_OK;
 }
 
-int ScaleOpenCLKernel::Run() {
-  MS_LOG(DEBUG) << this->name() << " Running!";
-  auto *param = reinterpret_cast<const ScaleParameter *>(op_parameter_);
+int ScaleOpenCLKernel::SetKernelArg(int *idx) {
   int arg_idx = 0;
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
+    return RET_ERROR;
+  }
   if (weight_vector_flag_) {
     void *scale = scale_ptr_ == nullptr ? in_tensors_[1]->data_c() : scale_ptr_;
     void *offset = offset_ptr_ == nullptr ? in_tensors_[2]->data_c() : offset_ptr_;
-    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale);
-    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset);
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale) != CL_SUCCESS) {
+      return RET_ERROR;
+    }
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset) != CL_SUCCESS) {
+      return RET_ERROR;
+    }
   } else {
     if (in_tensors_[1]->data_type() == kNumberTypeFloat32) {
       float scale = static_cast<float *>(in_tensors_[1]->data_c())[0];
       float offset = static_cast<float *>(in_tensors_[2]->data_c())[0];
-      ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale);
-      ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset);
+      if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale) != CL_SUCCESS) {
+        return RET_ERROR;
+      }
+      if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset) != CL_SUCCESS) {
+        return RET_ERROR;
+      }
     } else if (in_tensors_[1]->data_type() == kNumberTypeFloat16) {
       float16_t scale = static_cast<float16_t *>(in_tensors_[1]->data_c())[0];
       float16_t offset = static_cast<float16_t *>(in_tensors_[2]->data_c())[0];
-      ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast<float>(scale));
-      ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast<float>(offset));
+      if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast<float>(scale)) != CL_SUCCESS) {
+        return RET_ERROR;
+      }
+      if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast<float>(offset)) != CL_SUCCESS) {
+        return RET_ERROR;
+      }
     } else {
       MS_LOG(ERROR) << "Unsupported data type " << in_tensors_[1]->data_type();
       return RET_ERROR;
     }
   }
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
+    return RET_ERROR;
+  }
   cl_int2 output_shape{static_cast<int>(global_size_[0]), static_cast<int>(global_size_[1])};
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape) != CL_SUCCESS) {
+    return RET_ERROR;
+  }
+  *idx = arg_idx;
+  return RET_OK;
+}
+
+int ScaleOpenCLKernel::Run() {
+  MS_LOG(DEBUG) << this->name() << " Running!";
+  auto *param = reinterpret_cast<const ScaleParameter *>(op_parameter_);
+  int arg_idx = 0;
+
+  if (SetKernelArg(&arg_idx) != RET_OK) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+
   if (weight_vector_flag_ && broadcast_flag_) {
     if (broadcast_H_flag_) {
-      ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[1]->shape()[0]);
+      if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[1]->shape()[0]) != CL_SUCCESS) {
+        MS_LOG(ERROR) << "SetKernelArg failed.";
+        return RET_ERROR;
+      }
     } else {
-      ocl_runtime_->SetKernelArg(kernel_, arg_idx++, UP_DIV(in_tensors_[1]->shape()[0], C4NUM));
+      if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, UP_DIV(in_tensors_[1]->shape()[0], C4NUM)) != CL_SUCCESS) {
+        MS_LOG(ERROR) << "SetKernelArg failed.";
+        return RET_ERROR;
+      }
     }
   }
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->activation_type_);
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->activation_type_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h
index 755bdc1db28..f1abc693ff7 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h
@@ -34,7 +34,7 @@ class ScaleOpenCLKernel : public OpenCLKernel {
 
  private:
   void Image2dGetWorkGroupSize();
-
+  int SetKernelArg(int *idx);
   bool weight_vector_flag_{true};
   bool broadcast_flag_{false};
   bool broadcast_H_flag_{false};
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc
index 2491f59036c..9f8fb994a90 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc
@@ -75,7 +75,7 @@ int SoftmaxOpenCLKernel::Prepare() {
     kernel_name += "Axis" + std::to_string(axis_);
   }
   kernel_name += "_NHWC4";
-  std::string program_name = "Softmax";
+  const std::string program_name = "Softmax";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -93,7 +93,10 @@ int SoftmaxOpenCLKernel::Prepare() {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   SetGlobalLocal();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return lite::RET_OK;
@@ -131,24 +134,40 @@ int SoftmaxOpenCLKernel::Tune() {
   return OpenCLKernel::Tune();
 }
 
-void SoftmaxOpenCLKernel::SetConstArgs() {
+int SoftmaxOpenCLKernel::SetConstArgs() {
   int arg_idx = 2;
   int channel = out_shape_.C;
   int c4 = out_shape_.Slice;
   auto mask_ = GetMaskForLastChannel(channel);
   cl_float4 mask = {mask_[0], mask_[1], mask_[2], mask_[3]};
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, mask);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, mask) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
   cl_int4 input_shape = {static_cast<int>(out_shape_.N), static_cast<int>(out_shape_.H), static_cast<int>(out_shape_.W),
                          c4};
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx, input_shape);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, input_shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 int SoftmaxOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
   int arg_idx = 0;
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return lite::RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h
index da0b75b29e0..504e1e8715f 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h
@@ -30,7 +30,7 @@ class SoftmaxOpenCLKernel : public OpenCLKernel {
   int Run() override;
   int Prepare() override;
   int CheckSpecs() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
   int Tune() override;
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc
index 6b6da404602..09f6cc70871 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc
@@ -61,7 +61,7 @@ int SpaceToBatchNDOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-void SpaceToBatchNDOpenCLKernel::SetConstArgs() {
+int SpaceToBatchNDOpenCLKernel::SetConstArgs() {
   auto param = reinterpret_cast<SpaceToBatchParameter *>(this->op_parameter_);
   size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM);
   size_t CI4 = UP_DIV(in_tensors_[0]->Channel(), C4NUM);
@@ -71,10 +71,23 @@ void SpaceToBatchNDOpenCLKernel::SetConstArgs() {
   cl_int4 paddings = {param->paddings_[0], param->paddings_[1], param->paddings_[2], param->paddings_[3]};
 
   int arg_cnt = 2;
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, block_size);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, paddings);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, block_size) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, paddings) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 void SpaceToBatchNDOpenCLKernel::SetGlobalLocal() {
@@ -87,9 +100,9 @@ void SpaceToBatchNDOpenCLKernel::SetGlobalLocal() {
 }
 
 int SpaceToBatchNDOpenCLKernel::Prepare() {
-  std::string kernel_name = "space_to_batch_nd_NHWC4";
+  const std::string kernel_name = "space_to_batch_nd_NHWC4";
   std::string source = space_to_batch_nd_source;
-  std::string program_name = "space_to_batch_nd";
+  const std::string program_name = "space_to_batch_nd";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -101,7 +114,10 @@ int SpaceToBatchNDOpenCLKernel::Prepare() {
     return ret;
   }
   SetGlobalLocal();
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
@@ -109,9 +125,18 @@ int SpaceToBatchNDOpenCLKernel::Prepare() {
 int SpaceToBatchNDOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running! ";
 
-  ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c());
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
 
   return RET_OK;
 }
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.h
index 30df823c059..e545c68b2a4 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.h
@@ -32,7 +32,7 @@ class SpaceToBatchNDOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
 
   int CheckSpecs() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
 
  private:
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc
index 0303ea31bdb..0e69cd3ef23 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc
@@ -51,7 +51,7 @@ int SpaceToDepthOpenCLKernel::Prepare() {
     kernel_name += "Align";
   }
   std::string source = space_to_depth_source;
-  std::string program_name = "SpaceToDepth";
+  const std::string program_name = "SpaceToDepth";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -63,28 +63,47 @@ int SpaceToDepthOpenCLKernel::Prepare() {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   SetGlobalLocal();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
-void SpaceToDepthOpenCLKernel::SetConstArgs() {
+int SpaceToDepthOpenCLKernel::SetConstArgs() {
   cl_int4 cl_in_shape = {static_cast<cl_int>(in_shape_.N), static_cast<cl_int>(in_shape_.H),
                          static_cast<cl_int>(in_shape_.W), static_cast<cl_int>(in_shape_.Slice)};
   cl_int4 cl_out_shape = {static_cast<cl_int>(out_shape_.N), static_cast<cl_int>(out_shape_.H),
                           static_cast<cl_int>(out_shape_.W), static_cast<cl_int>(out_shape_.Slice)};
   auto param = reinterpret_cast<SpaceToDepthParameter *>(op_parameter_);
   int arg_idx = 2;
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_shape);
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape);
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->block_size_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->block_size_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
   if (type() == PrimitiveType_DepthToSpace) {
     int co_size = out_shape_.C;
-    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, co_size);
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, co_size) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   } else {
     int ci_size = in_shape_.C;
-    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, ci_size);
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, ci_size) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   }
+  return RET_OK;
 }
 void SpaceToDepthOpenCLKernel::SetGlobalLocal() {
   local_size_ = {};
@@ -95,9 +114,18 @@ void SpaceToDepthOpenCLKernel::SetGlobalLocal() {
 int SpaceToDepthOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
   int arg_idx = 0;
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.h
index 3576e26d616..75ee5d1d1b6 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.h
@@ -32,7 +32,7 @@ class SpaceToDepthOpenCLKernel : public OpenCLKernel {
   int Run() override;
   int Prepare() override;
   int CheckSpecs() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
 
  private:
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc
index f3f6c8c084f..dc532bbbb92 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc
@@ -37,7 +37,10 @@ int SparseToDenseOpenCLKernel::InitOutputToDefault() {
   cl_float4 fill_value = {};
   fill_value.s[0] = fill_value.s[1] = fill_value.s[2] = fill_value.s[3] = default_;
   auto src_data = out_tensors_[0]->data_c();
-  allocator_->GetImageSize(src_data, &img_size);
+  if (allocator_->GetImageSize(src_data, &img_size) != RET_OK) {
+    MS_LOG(ERROR) << "GetImageSize failed.";
+    return RET_ERROR;
+  }
   auto src_origin = cl::array<cl::size_type, 3U>{0, 0, 0};
   auto region = cl::array<cl::size_type, 3U>{img_size.width, img_size.height, 1};
   cl::Image2D *out_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data));
@@ -62,7 +65,14 @@ int SparseToDenseOpenCLKernel::InitWeights() {
     auto sizeof_FLT = enable_fp16_ ? sizeof(float16_t) : sizeof(float);
     size_t weight_size = UP_ROUND(size, C4NUM) * sizeof_FLT;
     weight_vector_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF);
-    allocator->MapBuffer(weight_vector_, CL_MAP_WRITE, nullptr, true);
+    if (weight_vector_ == nullptr) {
+      MS_LOG(ERROR) << "Malloc failed.";
+      return RET_ERROR;
+    }
+    if (allocator->MapBuffer(weight_vector_, CL_MAP_WRITE, nullptr, true) == nullptr) {
+      MS_LOG(ERROR) << "Map Buffer failed.";
+      return RET_ERROR;
+    }
     memset(weight_vector_, 0x00, weight_size);
     if (weight_tensor->data_type() == kNumberTypeFloat16) {
       if (enable_fp16_) {
@@ -85,7 +95,10 @@ int SparseToDenseOpenCLKernel::InitWeights() {
         memcpy(weight_vector_, weight_tensor->data_c(), size * sizeof_FLT);
       }
     }
-    allocator->UnmapBuffer(weight_vector_);
+    if (allocator->UnmapBuffer(weight_vector_) != RET_OK) {
+      MS_LOG(ERROR) << "UnmapBuffer failed.";
+      return RET_ERROR;
+    }
   }
   return RET_OK;
 }
@@ -115,7 +128,7 @@ int SparseToDenseOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-void SparseToDenseOpenCLKernel::SetConstArgs() {
+int SparseToDenseOpenCLKernel::SetConstArgs() {
   auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper();
   GpuTensorInfo img_info(out_tensors_[0]);
   size_t dtype = enable_fp16_ ? sizeof(cl_half) : sizeof(cl_float);
@@ -124,11 +137,27 @@ void SparseToDenseOpenCLKernel::SetConstArgs() {
   auto out_shape_temp = out_tensors_[0]->shape();
   cl_int4 out_shape = {out_n_, out_h_, out_w_, UP_DIV(out_c_, C4NUM)};
   int arg_cn = 3;
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, default_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, inshapeindex1_dim);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, default_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, inshapeindex1_dim) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 void SparseToDenseOpenCLKernel::SetGlobalLocal() {
@@ -144,9 +173,9 @@ int SparseToDenseOpenCLKernel::Prepare() {
   input_dim_ = in_tensors_[0]->shape().size();
   inshapeindex1_dim = in_tensors_[0]->shape()[1];
   weight_scalar_ = in_tensors_[2]->IsScalar();
-  std::string kernel_name = "SparseToDense" + std::string(weight_scalar_ ? "Scalar" : "Vector");
+  const std::string kernel_name = "SparseToDense" + std::string(weight_scalar_ ? "Scalar" : "Vector");
   std::string source = sparse_to_dense_source;
-  std::string program_name = "SparseToDense";
+  const std::string program_name = "SparseToDense";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -174,7 +203,10 @@ int SparseToDenseOpenCLKernel::Prepare() {
   InitWeights();
   InferShapeTo4D();
   SetGlobalLocal();
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
@@ -212,14 +244,30 @@ int SparseToDenseOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running! ";
   InitOutputToDefault();
   int arg_cn = 0;
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
-  if (!weight_scalar_) {
-    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, weight_vector_, lite::opencl::MemType::BUF);
-  } else {
-    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, weight_scalar_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF) !=
+      CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (!weight_scalar_) {
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, weight_vector_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
+  } else {
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, weight_scalar_) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
   }
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.h
index 0ffc6359f98..f98dc6f0265 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.h
@@ -31,7 +31,7 @@ class SparseToDenseOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
   int Run() override;
   int InitWeights() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
   int CheckSpecs() override;
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/split.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/split.cc
index 862d4f2dba1..206bbffbf33 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/split.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/split.cc
@@ -41,7 +41,10 @@ int SplitOpenCLKernel::RunAxis0() {
   for (int i = 0; i < out_tensors_.size(); i++) {
     auto dst_data = out_tensors_[i]->data_c();
     ImageSize img_size;
-    allocator_->GetImageSize(dst_data, &img_size);
+    if (allocator_->GetImageSize(dst_data, &img_size) != RET_OK) {
+      MS_LOG(ERROR) << "GetImageSize failed.";
+      return RET_ERROR;
+    }
     auto dst_area = cl::array<cl::size_type, 3U>{0, 0, 0};
     auto region = cl::array<cl::size_type, 3U>{img_size.width, img_size.height, 1};
     cl::Image2D *out_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(dst_data));
@@ -93,23 +96,32 @@ int SplitOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-void SplitOpenCLKernel::AlignSplitSizes(SplitParameter *param, const std::vector<int> &in_shape) {
+int SplitOpenCLKernel::AlignSplitSizes(SplitParameter *param, const std::vector<int> &in_shape) {
   auto allocator = ocl_runtime_->GetAllocator();
   int shape_dim = in_shape.at(param->split_dim_);
   if (num_split_ == 1) {
     size_t num_split = UP_DIV(shape_dim, param->split_sizes_[0]);
     split_sizes_ = reinterpret_cast<int *>(allocator->Malloc(num_split * sizeof(int), lite::opencl::MemType::BUF));
+    if (split_sizes_ == nullptr) {
+      MS_LOG(ERROR) << "Malloc failed.";
+      return RET_ERROR;
+    }
     for (int i = 0; i < num_split - 1; ++i) {
       split_sizes_[i] = (i + 1) * param->split_sizes_[0];
     }
   } else {
     int sum = 0;
     split_sizes_ = reinterpret_cast<int *>(allocator->Malloc(num_split_ * sizeof(int), lite::opencl::MemType::BUF));
+    if (split_sizes_ == nullptr) {
+      MS_LOG(ERROR) << "Malloc failed.";
+      return RET_ERROR;
+    }
     for (int i = 0; i < num_split_ - 1; ++i) {
       sum += param->split_sizes_[i];
       split_sizes_[i] = sum;
     }
   }
+  return RET_OK;
 }
 
 int SplitOpenCLKernel::Prepare() {
@@ -129,7 +141,10 @@ int SplitOpenCLKernel::Prepare() {
       }
     }
   }
-  AlignSplitSizes(param, in_shape);
+  if (AlignSplitSizes(param, in_shape) != RET_OK) {
+    MS_LOG(ERROR) << "AlignSplitSizes failed.";
+    return RET_ERROR;
+  }
   std::string kernel_name = "split_out";
   kernel_name += std::to_string(num_split_);
   kernel_name += "_axis" + std::to_string(split_dim_);
@@ -138,7 +153,7 @@ int SplitOpenCLKernel::Prepare() {
   }
   MS_LOG(DEBUG) << "kernel_name=: " << kernel_name;
   std::string source = split_source;
-  std::string program_name = "split";
+  const std::string program_name = "split";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -151,12 +166,15 @@ int SplitOpenCLKernel::Prepare() {
     return ret;
   }
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   SetGlobalLocal();
   return RET_OK;
 }
 
-void SplitOpenCLKernel::SetConstArgs() {
+int SplitOpenCLKernel::SetConstArgs() {
   int arg_cn = out_tensors_.size() + 2;
   cl_int4 shape = {};
   for (int i = 0; i < in_tensors_[0]->shape().size(); ++i) {
@@ -166,7 +184,10 @@ void SplitOpenCLKernel::SetConstArgs() {
   if (Align_) {
     in_shape_.s[3] = UP_DIV(in_shape_.s[3], C4NUM);
   }
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
 
   for (int i = 0; i < out_tensors_.size(); ++i) {
     cl_int4 temp = {};
@@ -177,13 +198,21 @@ void SplitOpenCLKernel::SetConstArgs() {
     if (Align_) {
       out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM);
     }
-    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_);
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   }
-  GpuTensorInfo img_info(in_tensors_.at(0));
-  size_t dtype = enable_fp16_ ? sizeof(cl_half) : sizeof(cl_float);
-  stride_w = img_info.RowPitch() / dtype;
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w);
-  return;
+  if (!Align_) {
+    GpuTensorInfo img_info(in_tensors_.at(0));
+    size_t dtype = enable_fp16_ ? sizeof(cl_half) : sizeof(cl_float);
+    stride_w = img_info.RowPitch() / dtype;
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
+  }
+  return RET_OK;
 }
 
 void SplitOpenCLKernel::SetGlobalLocal() {
@@ -205,15 +234,31 @@ int SplitOpenCLKernel::Run() {
   }
   int arg_cn = 0;
   if (Align_) {
-    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c());
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   } else {
-    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c(), lite::opencl::MemType::BUF);
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c(), lite::opencl::MemType::BUF) !=
+        CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   }
   for (int i = 0; i < out_tensors_.size(); ++i) {
-    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(i)->data_c());
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(i)->data_c()) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, split_sizes_, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
   }
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, split_sizes_, lite::opencl::MemType::BUF);
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/split.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/split.h
index c8be6a244da..b7e25a93996 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/split.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/split.h
@@ -31,12 +31,12 @@ class SplitOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
 
   int CheckSpecs() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
   int Run() override;
 
  private:
-  void AlignSplitSizes(SplitParameter *param, const std::vector<int> &in_shape);
+  int AlignSplitSizes(SplitParameter *param, const std::vector<int> &in_shape);
   int RunAxis0();
 
  private:
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc
index 819c2ab8b7c..5b08fbb3245 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc
@@ -36,7 +36,10 @@ int StackOpenCLKernel::RunAxis0() {
   cl::Image2D *out_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(dst_data));
   for (int i = 0; i < in_tensors_.size(); i++) {
     auto src_data = in_tensors_[i]->data_c();
-    allocator_->GetImageSize(src_data, &img_size);
+    if (allocator_->GetImageSize(src_data, &img_size) != RET_OK) {
+      MS_LOG(ERROR) << "GetImageSize failed.";
+      return RET_ERROR;
+    }
     auto src_origin = cl::array<cl::size_type, 3U>{0, 0, 0};
     auto region = cl::array<cl::size_type, 3U>{img_size.width, img_size.height, 1};
     cl::Image2D *input_image = reinterpret_cast<cl::Image2D *>(allocator_->GetImage(src_data));
@@ -95,7 +98,7 @@ int StackOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-void StackOpenCLKernel::SetConstArgs() {
+int StackOpenCLKernel::SetConstArgs() {
   int arg_cn = in_tensors_.size() + 1;
   cl_int4 inshape_tmp = {}, outshape_tmp = {};
   for (int i = 0; i < in_tensors_[0]->shape().size(); ++i) {
@@ -108,8 +111,14 @@ void StackOpenCLKernel::SetConstArgs() {
   Broadcast2GpuShape(out_shape_.s, outshape_tmp.s, out_tensors_[0]->shape().size(), 1);
   in_shape_.s[3] = UP_DIV(in_shape_.s[3], C4NUM);
   out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
   if (buffer_button_) {
     GpuTensorInfo img_info_out(out_tensors_[0]);
     GpuTensorInfo img_info_in(in_tensors_[0]);
@@ -117,8 +126,12 @@ void StackOpenCLKernel::SetConstArgs() {
     stride_w_out = img_info_out.RowPitch() / dtype;
     stride_w_in = img_info_in.RowPitch() / dtype;
     cl_int2 stride_w = {stride_w_out, stride_w_in};
-    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w);
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   }
+  return RET_OK;
 }
 
 void StackOpenCLKernel::SetGlobalLocal() {
@@ -162,7 +175,7 @@ int StackOpenCLKernel::Prepare() {
 
   MS_LOG(DEBUG) << "kernel_name=: " << kernel_name;
   std::string source = stack_source;
-  std::string program_name = "stack";
+  const std::string program_name = "stack";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -174,7 +187,10 @@ int StackOpenCLKernel::Prepare() {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   SetGlobalLocal();
 
   return RET_OK;
@@ -188,16 +204,33 @@ int StackOpenCLKernel::Run() {
   int arg_cn = 0;
   if (buffer_button_) {
     for (int i = 0; i < in_tensors_.size(); ++i) {
-      ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c(), lite::opencl::MemType::BUF);
+      if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c(), lite::opencl::MemType::BUF) !=
+          CL_SUCCESS) {
+        MS_LOG(ERROR) << "SetKernelArg failed.";
+        return RET_ERROR;
+      }
+    }
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF) !=
+        CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
     }
-    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF);
   } else {
     for (int i = 0; i < in_tensors_.size(); ++i) {
-      ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c());
+      if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c()) != CL_SUCCESS) {
+        MS_LOG(ERROR) << "SetKernelArg failed.";
+        return RET_ERROR;
+      }
+    }
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
     }
-    ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c());
   }
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Stack, OpenCLKernelCreator<StackOpenCLKernel>);
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.h
index a41bc0ff7ee..1585fae341d 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.h
@@ -29,7 +29,7 @@ class StackOpenCLKernel : public OpenCLKernel {
   ~StackOpenCLKernel() override{};
   int Prepare() override;
   int CheckSpecs() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
 
   int Run() override;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.cc
index 59df111e2a8..bd21ab17886 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.cc
@@ -27,9 +27,9 @@ using mindspore::lite::opencl::ImageSize;
 
 namespace mindspore::kernel {
 int StrassenOpenCLKernel::Prepare() {
-  std::string kernel_name = "MatMul_Strassen_NHWC4_2d";
+  const std::string kernel_name = "MatMul_Strassen_NHWC4_2d";
   std::string source = strassen_source;
-  std::string program_name = "MatMul";
+  const std::string program_name = "MatMul";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -50,13 +50,16 @@ int StrassenOpenCLKernel::Prepare() {
   if (ret != RET_OK) {
     return ret;
   }
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   SetGlobalLocal();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
 
-void StrassenOpenCLKernel::AllocatorMemoryForStrassen(int NumA, int NumB) {
+int StrassenOpenCLKernel::AllocatorMemoryForStrassen(int NumA, int NumB) {
   auto allocator = ocl_runtime_->GetAllocator();
   size_t img_dtype = enable_fp16_ ? CL_HALF_FLOAT : CL_FLOAT;
   ImageSize img_size{static_cast<size_t>(UP_DIV(NumA, C4NUM)), static_cast<size_t>(NumA), img_dtype};
@@ -64,15 +67,52 @@ void StrassenOpenCLKernel::AllocatorMemoryForStrassen(int NumA, int NumB) {
   size_t memB = NumB * NumB * dtype_size;
   for (int depth = 0; depth < MAXDEPTH; depth++) {
     B_temp[depth] = allocator->Malloc(memB, lite::opencl::MemType::BUF);
+    if (B_temp[depth] == nullptr) {
+      MS_LOG(ERROR) << "Malloc failed.";
+      return RET_ERROR;
+    }
     A_temp[depth] = allocator->Malloc(img_size);
+    if (A_temp[depth] == nullptr) {
+      MS_LOG(ERROR) << "Malloc failed.";
+      return RET_ERROR;
+    }
     M1[depth] = allocator->Malloc(img_size);
+    if (M1[depth] == nullptr) {
+      MS_LOG(ERROR) << "Malloc failed.";
+      return RET_ERROR;
+    }
     M2[depth] = allocator->Malloc(img_size);
+    if (M2[depth] == nullptr) {
+      MS_LOG(ERROR) << "Malloc failed.";
+      return RET_ERROR;
+    }
     M3[depth] = allocator->Malloc(img_size);
+    if (M3[depth] == nullptr) {
+      MS_LOG(ERROR) << "Malloc failed.";
+      return RET_ERROR;
+    }
     M4[depth] = allocator->Malloc(img_size);
+    if (M4[depth] == nullptr) {
+      MS_LOG(ERROR) << "Malloc failed.";
+      return RET_ERROR;
+    }
     M5[depth] = allocator->Malloc(img_size);
+    if (M5[depth] == nullptr) {
+      MS_LOG(ERROR) << "Malloc failed.";
+      return RET_ERROR;
+    }
     M6[depth] = allocator->Malloc(img_size);
+    if (M6[depth] == nullptr) {
+      MS_LOG(ERROR) << "Malloc failed.";
+      return RET_ERROR;
+    }
     M7[depth] = allocator->Malloc(img_size);
+    if (M7[depth] == nullptr) {
+      MS_LOG(ERROR) << "Malloc failed.";
+      return RET_ERROR;
+    }
   }
+  return RET_OK;
 }
 
 int StrassenOpenCLKernel::InitWeights() {
@@ -82,14 +122,25 @@ int StrassenOpenCLKernel::InitWeights() {
   int NumB = in_tensors_[1]->shape()[0];
   size_t dtype_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float);
   padWeight_ = allocator->Malloc(NumA * NumB * dtype_size, lite::opencl::MemType::BUF);
+  if (padWeight_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc failed.";
+    return RET_ERROR;
+  }
   padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true);
+  if (padWeight_ == nullptr) {
+    MS_LOG(ERROR) << "Map Buffer failed.";
+    return RET_ERROR;
+  }
   auto padWeightFp32 = reinterpret_cast<float *>(padWeight_);
   auto padWeightFp16 = reinterpret_cast<float16_t *>(padWeight_);
   memset(padWeight_, 0x00, NumA * NumB * dtype_size);
   auto originWeightFp32 = reinterpret_cast<float *>(in_tensors_.at(kWeightIndex)->data_c());
   auto originWeightFp16 = reinterpret_cast<float16_t *>(in_tensors_.at(kWeightIndex)->data_c());
   bool isModelFp16 = in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat16;
-  AllocatorMemoryForStrassen(NumA / 2, NumB / 2);
+  if (AllocatorMemoryForStrassen(NumA / 2, NumB / 2) != RET_OK) {
+    MS_LOG(ERROR) << "AllocatorMemoryForStrassen failed.";
+    return RET_ERROR;
+  }
   size_t size = NumA * NumB * dtype_size;
   if (isModelFp16) {
     if (enable_fp16_) {
@@ -108,7 +159,10 @@ int StrassenOpenCLKernel::InitWeights() {
       memcpy(padWeightFp32, originWeightFp32, size);
     }
   }
-  allocator->UnmapBuffer(padWeight_);
+  if (allocator->UnmapBuffer(padWeight_) != RET_OK) {
+    MS_LOG(ERROR) << "UnmapBuffer failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
@@ -120,7 +174,7 @@ void AlignStrassenGlobalLocal(const std::vector<size_t> &global, const std::vect
 }
 
 // 0 : global_size_, 1: global_size_add_sub
-void StrassenOpenCLKernel::StrassenSetGlobalLocal(size_t strassen_size, int type_flag) {
+int StrassenOpenCLKernel::StrassenSetGlobalLocal(size_t strassen_size, int type_flag) {
   size_t strassen_size_C4 = UP_DIV(strassen_size, C4NUM);
   local_size_add_sub = {16, 1, 16};
   if (type_flag == 0) {
@@ -130,6 +184,7 @@ void StrassenOpenCLKernel::StrassenSetGlobalLocal(size_t strassen_size, int type
     global_size_add_sub = {strassen_size_C4, 1, strassen_size};
     AlignStrassenGlobalLocal(global_size_add_sub, local_size_add_sub, &global_add_sub_, &local_add_sub_);
   }
+  return RET_OK;
 }
 
 void StrassenOpenCLKernel::SetGlobalLocal() {
@@ -142,111 +197,188 @@ void StrassenOpenCLKernel::SetGlobalLocal() {
   StrassenSetGlobalLocal(strassen_size, 2);  // set global_size_weights
 }
 
-void StrassenOpenCLKernel::StrassenSetConstArgs(cl::Kernel *kernel, int index, int strassen_size,
-                                                bool is_matmul_kernel) {
+int StrassenOpenCLKernel::StrassenSetConstArgs(cl::Kernel *kernel, int index, int strassen_size,
+                                               bool is_matmul_kernel) {
   cl_int4 shape;
   if (is_matmul_kernel) {
     shape = {1, 1, strassen_size, strassen_size};
   } else {
     shape = {strassen_size, 1, 1, UP_DIV(strassen_size, C4NUM)};
   }
-  ocl_runtime_->SetKernelArg(*kernel, index, shape);
+  if (ocl_runtime_->SetKernelArg(*kernel, index, shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
-void StrassenOpenCLKernel::SetConstArgs() {
-  int arg_count = 2;
-  cl_int4 in_shape = {inShape[0], inShape[1], inShape[2], inShape[3]};
-  cl_int4 out_shape = {outShape[0], outShape[1], outShape[2], outShape[3]};
-  cl_int4 shape_offset = {0, 0, 0, 0};
+int StrassenOpenCLKernel::SetConstArgs() {
   int strassen_size = inShape[3] / 2;
-  out_shape.s[2] = in_shape.s[2] = in_shape.s[2] / 2;
-  out_shape.s[3] = in_shape.s[3] = in_shape.s[3] / 2;
   StrassenSetConstArgs(&kernel_IMG_add_sub_2, 3, strassen_size, false);
   StrassenSetConstArgs(&kernel_BUF_add_sub_2, 2, strassen_size, false);
-  ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_shape);
-  ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_shape);
-  ocl_runtime_->SetKernelArg(kernel_, arg_count++, shape_offset);
+  return RET_OK;
 }
 
-void StrassenOpenCLKernel::StrassenDataFilled(cl::Kernel *kernel, void *input, void *output, const int size,
-                                              cl_int2 offset, lite::opencl::MemType mem_type) {
+int StrassenOpenCLKernel::StrassenDataFilled(cl::Kernel *kernel, void *input, void *output, const int size,
+                                             cl_int2 offset, lite::opencl::MemType mem_type) {
   if (input == nullptr || output == nullptr) {
     MS_LOG(ERROR) << "StrassenDataFilled input or output can not nullptr";
-    return;
+    return RET_ERROR;
   }
   if (mem_type == lite::opencl::MemType::IMG) {
-    ocl_runtime_->SetKernelArg(*kernel, 0, input);
-    ocl_runtime_->SetKernelArg(*kernel, 1, output);
+    if (ocl_runtime_->SetKernelArg(*kernel, 0, input) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
+    if (ocl_runtime_->SetKernelArg(*kernel, 1, output) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   } else {
-    ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::BUF);
-    ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::BUF);
+    if (ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
+    if (ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   }
   StrassenSetConstArgs(kernel, 2, size, false);
-  ocl_runtime_->SetKernelArg(*kernel, 3, offset);
-  ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(*kernel, 3, offset) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
-void StrassenOpenCLKernel::StrassenAddSub(cl::Kernel *kernel, void *input, void *output, const int size, cl_int4 offset,
-                                          int flag, lite::opencl::MemType mem_type) {
+int StrassenOpenCLKernel::StrassenAddSub(cl::Kernel *kernel, void *input, void *output, const int size, cl_int4 offset,
+                                         int flag, lite::opencl::MemType mem_type) {
   if (input == nullptr || output == nullptr) {
     MS_LOG(ERROR) << "StrassenAddSub input or output can not nullptr";
-    return;
+    return RET_ERROR;
   }
   if (mem_type == lite::opencl::MemType::IMG) {
-    ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::IMG);
-    ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::IMG);
+    if (ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::IMG) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
+    if (ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::IMG) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   } else {
-    ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::BUF);
-    ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::BUF);
+    if (ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
+    if (ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   }
   StrassenSetConstArgs(kernel, 2, size, false);
-  ocl_runtime_->SetKernelArg(*kernel, 3, offset);
-  ocl_runtime_->SetKernelArg(*kernel, 4, flag);
-  ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(*kernel, 3, offset) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(*kernel, 4, flag) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
-void StrassenOpenCLKernel::StrassenBackResult(cl::Kernel *kernel, void *input1, void *input2, void *input3,
-                                              void *input4, void *input5, void *input6, void *input7, void *output,
-                                              const int size) {
+int StrassenOpenCLKernel::StrassenBackResult(cl::Kernel *kernel, void *input1, void *input2, void *input3, void *input4,
+                                             void *input5, void *input6, void *input7, void *output, const int size) {
   if (input1 == nullptr || input2 == nullptr || input3 == nullptr || input4 == nullptr || input5 == nullptr ||
       input6 == nullptr || input7 == nullptr || output == nullptr) {
     MS_LOG(ERROR) << "StrassenBackResult input or output can not nullptr";
-    return;
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(*kernel, 0, input1) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(*kernel, 1, input2) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(*kernel, 2, input3) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(*kernel, 3, input4) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(*kernel, 4, input5) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(*kernel, 5, input6) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(*kernel, 6, input7) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(*kernel, 7, output) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
   }
-  ocl_runtime_->SetKernelArg(*kernel, 0, input1);
-  ocl_runtime_->SetKernelArg(*kernel, 1, input2);
-  ocl_runtime_->SetKernelArg(*kernel, 2, input3);
-  ocl_runtime_->SetKernelArg(*kernel, 3, input4);
-  ocl_runtime_->SetKernelArg(*kernel, 4, input5);
-  ocl_runtime_->SetKernelArg(*kernel, 5, input6);
-  ocl_runtime_->SetKernelArg(*kernel, 6, input7);
-  ocl_runtime_->SetKernelArg(*kernel, 7, output);
   StrassenSetConstArgs(kernel, 8, size, false);
-  ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_);
+  if (ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
-void StrassenOpenCLKernel::StrassenRunMmatmul(void *input, void *weight, void *output, const int size) {
+int StrassenOpenCLKernel::StrassenRunMmatmul(void *input, void *weight, void *output, const int size) {
   if (input == nullptr || weight == nullptr || output == nullptr) {
     MS_LOG(ERROR) << "StrassenRunMmatmul input ,weight or output can not nullptr";
-    return;
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, 0, input) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, 1, output) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, 2, weight, lite::opencl::MemType::BUF) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
   }
-  ocl_runtime_->SetKernelArg(kernel_, 0, input);
-  ocl_runtime_->SetKernelArg(kernel_, 1, output);
-  ocl_runtime_->SetKernelArg(kernel_, 2, weight, lite::opencl::MemType::BUF);
   StrassenSetConstArgs(&kernel_, 3, size, true);
   StrassenSetConstArgs(&kernel_, 4, size, true);
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
-void StrassenOpenCLKernel::DoStrassen(void *data, void *weight, void *result, const int size, const int depth,
-                                      const int threshold) {
+int StrassenOpenCLKernel::DoStrassen(void *data, void *weight, void *result, const int size, const int depth,
+                                     const int threshold) {
   const int size_2 = size / 2;
   int C4 = UP_DIV(size_2, C4NUM);
   if (size <= threshold) {
     //   run matmul;
     StrassenSetGlobalLocal(size, 0);
     StrassenRunMmatmul(data, weight, result, size);
-    return;
+    return RET_OK;
   }
   // flag = 0 : add   otherwise flag = 1 : sub
   //   M1 = A11 * ( B12- B22)
@@ -307,6 +439,7 @@ void StrassenOpenCLKernel::DoStrassen(void *data, void *weight, void *result, co
   StrassenSetGlobalLocal(size_2, 1);
   StrassenBackResult(&kernel_back_result, M1[depth + 1], M2[depth + 1], M3[depth + 1], M4[depth + 1], M5[depth + 1],
                      M6[depth + 1], M7[depth + 1], result, size_2);
+  return RET_OK;
 }
 
 int StrassenOpenCLKernel::Run() {
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.h
index 808cddd6d18..48596a3ebd2 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.h
@@ -33,22 +33,22 @@ class StrassenOpenCLKernel : public MatMulOpenCLKernel {
   int Run() override;
   int Prepare() override;
   int InitWeights() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
 
   // strassen
  private:
-  void AllocatorMemoryForStrassen(int NumA, int NumB);
-  void DoStrassen(void *data, void *weight, void *result, const int size, const int depth, const int threshold);
-  void StrassenSetGlobalLocal(size_t strassen_size, int type_flag);
-  void StrassenSetConstArgs(cl::Kernel *kernel, int index, int strassen_size, bool is_matmul_kernel);
-  void StrassenDataFilled(cl::Kernel *kernel, void *input, void *output, const int size, cl_int2 offset,
-                          lite::opencl::MemType mem_type);
-  void StrassenAddSub(cl::Kernel *kernel, void *input, void *output, const int size, cl_int4 offset, int flag,
-                      lite::opencl::MemType mem_type);
-  void StrassenBackResult(cl::Kernel *kernel, void *input1, void *input2, void *input3, void *input4, void *input5,
-                          void *input6, void *input7, void *output, const int size);
-  void StrassenRunMmatmul(void *input, void *weight, void *output, const int size);
+  int AllocatorMemoryForStrassen(int NumA, int NumB);
+  int DoStrassen(void *data, void *weight, void *result, const int size, const int depth, const int threshold);
+  int StrassenSetGlobalLocal(size_t strassen_size, int type_flag);
+  int StrassenSetConstArgs(cl::Kernel *kernel, int index, int strassen_size, bool is_matmul_kernel);
+  int StrassenDataFilled(cl::Kernel *kernel, void *input, void *output, const int size, cl_int2 offset,
+                         lite::opencl::MemType mem_type);
+  int StrassenAddSub(cl::Kernel *kernel, void *input, void *output, const int size, cl_int4 offset, int flag,
+                     lite::opencl::MemType mem_type);
+  int StrassenBackResult(cl::Kernel *kernel, void *input1, void *input2, void *input3, void *input4, void *input5,
+                         void *input6, void *input7, void *output, const int size);
+  int StrassenRunMmatmul(void *input, void *weight, void *output, const int size);
   cl::Kernel kernel_IMG_add_sub_2;
   cl::Kernel MatMul_StrassenBUFFilled;
   cl::Kernel MatMul_StrassenIMGFilled;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc
index b1d7fa9b762..9d00ac7a4dd 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc
@@ -85,7 +85,7 @@ int StridedSliceOpenCLKernel::CheckSpecs() {
 }
 
 int StridedSliceOpenCLKernel::Prepare() {
-  std::string program_name = "strided_slice";
+  const std::string program_name = "strided_slice";
   if (!ocl_runtime_->LoadSource(program_name, strided_slice_source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -96,7 +96,10 @@ int StridedSliceOpenCLKernel::Prepare() {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   SetGlobalLocal();
   return RET_OK;
 }
@@ -187,14 +190,33 @@ int StridedSliceOpenCLKernel::InitConstArgs() {
   return RET_OK;
 }
 
-void StridedSliceOpenCLKernel::SetConstArgs() {
+int StridedSliceOpenCLKernel::SetConstArgs() {
   int arg_cn = 2;
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, begin_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn, size_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, begin_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn, size_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 void StridedSliceOpenCLKernel::SetGlobalLocal() {
@@ -214,9 +236,18 @@ void StridedSliceOpenCLKernel::SetGlobalLocal() {
 
 int StridedSliceOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running! ";
-  ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c());
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h
index 87e2638dc49..3ce6b991ee5 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h
@@ -31,7 +31,7 @@ class StridedSliceOpenCLKernel : public OpenCLKernel {
   int CheckSpecs() override;
 
   int Prepare() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
 
   int Run() override;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
index 5380f461462..0d6ff88d36d 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc
@@ -42,11 +42,18 @@ int ToFormatOpenCLKernel::CheckSpecs() {
   return RET_OK;
 }
 
-void ToFormatOpenCLKernel::SetConstArgs() {
+int ToFormatOpenCLKernel::SetConstArgs() {
   cl_int4 shape{(cl_int)N_, (cl_int)H_, (cl_int)W_, (cl_int)C_};
   cl_int4 gsize{(cl_int)(N_ * H_), (cl_int)W_, (cl_int)UP_DIV(C_, C4NUM), 1};
-  ocl_runtime_->SetKernelArg(kernel_, 2, gsize);
-  ocl_runtime_->SetKernelArg(kernel_, 3, shape);
+  if (ocl_runtime_->SetKernelArg(kernel_, 2, gsize) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, 3, shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 void ToFormatOpenCLKernel::SetGlobalLocal() {
@@ -70,7 +77,7 @@ int ToFormatOpenCLKernel::Prepare() {
   kernel_name += dtype_str[in_tensor->data_type()] + "_" + dtype_str[out_tensor->data_type()];
   this->set_name(kernel_name);
 
-  std::string program_name = "to_format";
+  const std::string program_name = "to_format";
   std::string source = to_format_source;
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
@@ -89,7 +96,10 @@ int ToFormatOpenCLKernel::Prepare() {
   C_ = output.C;
 
   SetGlobalLocal();
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
@@ -98,9 +108,18 @@ int ToFormatOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
   auto src_mem_type = (out_mem_type_ == MemType::IMG) ? lite::opencl::MemType::BUF : lite::opencl::MemType::IMG;
   auto dst_mem_type = out_mem_type_;
-  ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c(), src_mem_type);
-  ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c(), dst_mem_type);
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c(), src_mem_type) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c(), dst_mem_type) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h
index d600519e3c4..0e1989d157f 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h
@@ -35,7 +35,7 @@ class ToFormatOpenCLKernel : public OpenCLKernel {
   int Prepare() override;
 
   int CheckSpecs() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
   int InferShape() override;
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
index 6841867de66..9c7cbea7c29 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc
@@ -101,7 +101,7 @@ int TransposeOpenCLKernel::Prepare() {
   kernel_name += "_NHWC4";
 
   std::string source = transpose_source;
-  std::string program_name = "transpose";
+  const std::string program_name = "transpose";
   if (!ocl_runtime_->LoadSource(program_name, source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -113,32 +113,45 @@ int TransposeOpenCLKernel::Prepare() {
     MS_LOG(ERROR) << "Build kernel failed.";
     return ret;
   }
-  SetConstArgs();
+  if (SetConstArgs() != RET_OK) {
+    MS_LOG(ERROR) << "SeConstArgs failed.";
+    return RET_ERROR;
+  }
   SetGlobalLocal();
   MS_LOG(DEBUG) << kernel_name << " Init Done!";
   return RET_OK;
 }
 
-void TransposeOpenCLKernel::SetConstArgs() {
+int TransposeOpenCLKernel::SetConstArgs() {
   size_t n = tensor_size_.N;
   size_t h = tensor_size_.H;
   size_t w = tensor_size_.W;
   size_t c = tensor_size_.C;
   int arg_idx = 2;
   cl_int4 shape = {static_cast<int>(n), static_cast<int>(h), static_cast<int>(w), static_cast<int>(c)};
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, shape);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
   if (type_ == TransposeType::GENERAL) {
     int de_perm[4];  // output to input perm
     for (int i = 0; i < 4; i++) {
       de_perm[perm_4d_[i]] = i;
     }
     cl_int4 de_perm_cl = {de_perm[0], de_perm[1], de_perm[2], de_perm[3]};
-    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, de_perm_cl);
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, de_perm_cl) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
     GpuTensorInfo in_shape = GpuTensorInfo(in_tensors_[0]);
     cl_int4 in_shape_int4 = {static_cast<cl_int>(in_shape.N), static_cast<cl_int>(in_shape.H),
                              static_cast<cl_int>(in_shape.W), static_cast<cl_int>(in_shape.C)};
-    ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_shape_int4);
+    if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_shape_int4) != CL_SUCCESS) {
+      MS_LOG(ERROR) << "SetKernelArg failed.";
+      return RET_ERROR;
+    }
   }
+  return RET_OK;
 }
 
 void TransposeOpenCLKernel::SetGlobalLocal() {
@@ -161,9 +174,18 @@ void TransposeOpenCLKernel::SetGlobalLocal() {
 int TransposeOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " Running!";
   int arg_idx = 0;
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c());
-  ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c());
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h
index 54edb3fd011..5daaf10cd35 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h
@@ -33,7 +33,7 @@ class TransposeOpenCLKernel : public OpenCLKernel {
   int Run() override;
   int Prepare() override;
   int CheckSpecs() override;
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
 
  private:
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc
index 7b52015c617..8e51bcaaaed 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc
@@ -78,7 +78,7 @@ std::vector<float> GenerateWinogradFilter(void *src, TypeId dtype, size_t CO, si
 }  // namespace
 
 int WinogradOpenCLKernel::BuildKernel() {
-  std::string program_name = "winograd";
+  const std::string program_name = "winograd";
   if (!ocl_runtime_->LoadSource(program_name, GetActDefines() + winograd_source)) {
     MS_LOG(ERROR) << "Load source failed.";
     return RET_ERROR;
@@ -103,7 +103,7 @@ int WinogradOpenCLKernel::BuildKernel() {
   return RET_OK;
 }
 
-void WinogradOpenCLKernel::InitFilter() {
+int WinogradOpenCLKernel::InitFilter() {
   auto allocator = ocl_runtime_->GetAllocator();
 
   // allocate opencl memory: buffer or image2d
@@ -115,9 +115,17 @@ void WinogradOpenCLKernel::InitFilter() {
     size_t dtype = use_fp16_ ? CL_HALF_FLOAT : CL_FLOAT;
     size = width * height * CO_TILE * sizeof_FLT_;
     packed_filter_ = allocator->Malloc({width, height, dtype});
+    if (packed_filter_ == nullptr) {
+      MS_LOG(ERROR) << "Malloc failed.";
+      return RET_ERROR;
+    }
   } else {
     size = UP_DIV(CO_SLICES_, Ogroup) * 6 * 6 * CI_SLICES_ * Ogroup * CI_TILE * CO_TILE * sizeof_FLT_;
     packed_filter_ = allocator->Malloc(size, MemType::BUF);
+    if (packed_filter_ == nullptr) {
+      MS_LOG(ERROR) << "Malloc failed.";
+      return RET_ERROR;
+    }
   }
 
   // rearrange filter
@@ -128,6 +136,10 @@ void WinogradOpenCLKernel::InitFilter() {
   void *src_data = winograd_filter.data();
 #else
   auto winograd_filter = std::make_unique<float[]>(CO_ * 6 * 6 * CI_);
+  if (winograd_filter == nullptr) {
+    MS_LOG(ERROR) << "new winograd_filter failed.";
+    return RET_ERROR;
+  }
   WinogradWeightTransform(reinterpret_cast<const float *>(src_filter_data),
                           reinterpret_cast<float *>(winograd_filter.get()), nullptr, Gt, 1, 6, 3, CI_, CO_, false);
 
@@ -147,53 +159,121 @@ void WinogradOpenCLKernel::InitFilter() {
   if (filter_type_ == MemType::IMG) {
     ocl_runtime_->WriteImage(packed_filter_, tmp.data());
   } else {
-    allocator->MapBuffer(packed_filter_, CL_MAP_WRITE, nullptr, true);
+    if (allocator->MapBuffer(packed_filter_, CL_MAP_WRITE, nullptr, true) == nullptr) {
+      MS_LOG(ERROR) << "Map Buffer failed.";
+      return RET_ERROR;
+    }
     memcpy(packed_filter_, tmp.data(), size);
-    allocator->UnmapBuffer(packed_filter_);
+    if (allocator->UnmapBuffer(packed_filter_) != RET_OK) {
+      MS_LOG(ERROR) << "UnmapBuffer failed.";
+      return RET_ERROR;
+    }
   }
   FreeStoredData(stored_filter_);
+  return RET_OK;
 }
 
-void WinogradOpenCLKernel::AllocateMemory() {
+int WinogradOpenCLKernel::AllocateMemory() {
   auto allocator = ocl_runtime_->GetAllocator();
   size_t img_dtype = use_fp16_ ? CL_HALF_FLOAT : CL_FLOAT;
 
   size_t width = TILE_HW_;
   size_t height = CI_SLICES_ * 36;
   winograd_mem0_ = allocator->Malloc({width, height, img_dtype});
+  if (winograd_mem0_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc failed.";
+    return RET_ERROR;
+  }
 
   width = TILE_HW_;
   height = CO_SLICES_ * 36;
   winograd_mem1_ = allocator->Malloc({width, height, img_dtype});
+  if (winograd_mem1_ == nullptr) {
+    MS_LOG(ERROR) << "Malloc failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
-void WinogradOpenCLKernel::SetConstArgs() {
+int WinogradOpenCLKernel::SetConstArgs() {
   AllocateMemory();
 
   int arg_cn = 1;
   cl_int4 input_shape = {batch_size_, OH_, OW_, CI_SLICES_};  // maybe pad=0, so use OH/OW
-  ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, winograd_mem0_);
-  ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, input_shape);
-  ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, TILE_HW_);
-  ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, param_->pad_u_);
-  ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn, param_->pad_l_);
+  if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, winograd_mem0_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, input_shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, TILE_HW_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, param_->pad_u_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn, param_->pad_l_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
 
   arg_cn = 0;
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, winograd_mem0_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, winograd_mem1_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_filter_, filter_type_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, TILE_HW_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn++, CI_SLICES_);
-  ocl_runtime_->SetKernelArg(kernel_, arg_cn, CO_SLICES_);
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, winograd_mem0_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, winograd_mem1_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_filter_, filter_type_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, TILE_HW_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, CI_SLICES_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_, arg_cn, CO_SLICES_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
 
   arg_cn = 2;
   cl_int4 output_shape = {batch_size_, OH_, OW_, CO_SLICES_};
-  ocl_runtime_->SetKernelArg(kernel_36to4x4_, 0, winograd_mem1_);
-  ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, packed_bias_, MemType::BUF);
-  ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, output_shape);
-  ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, TILE_HW_);
-  ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, param_->act_type_);
-  ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn, alpha_);
+  if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, 0, winograd_mem1_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, packed_bias_, MemType::BUF) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, output_shape) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, TILE_HW_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, param_->act_type_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn, alpha_) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
 
 void WinogradOpenCLKernel::SetGlobalLocal() {
@@ -205,15 +285,30 @@ void WinogradOpenCLKernel::SetGlobalLocal() {
 int WinogradOpenCLKernel::Run() {
   MS_LOG(DEBUG) << this->name() << " winograd Running!";
   MS_LOG(DEBUG) << "winograd kernel0 Running!";
-  ocl_runtime_->SetKernelArg(kernel_4x4to36_, 0, in_tensors_.front()->data_c());
-  ocl_runtime_->RunKernel(kernel_4x4to36_, global_4x4to36_, local_4x4to36_, nullptr, &event_);
+  if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_4x4to36_, global_4x4to36_, local_4x4to36_, nullptr, &event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
 
   MS_LOG(DEBUG) << "winograd kernel1 Running!";
-  ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &kernel2_event_);
+  if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &kernel2_event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
 
   MS_LOG(DEBUG) << "winograd kernel2 Running!";
-  ocl_runtime_->SetKernelArg(kernel_36to4x4_, 1, out_tensors_.front()->data_c());
-  ocl_runtime_->RunKernel(kernel_36to4x4_, global_36to4x4_, local_36to4x4_, nullptr, &kernel3_event_);
+  if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) {
+    MS_LOG(ERROR) << "SetKernelArg failed.";
+    return RET_ERROR;
+  }
+  if (ocl_runtime_->RunKernel(kernel_36to4x4_, global_36to4x4_, local_36to4x4_, nullptr, &kernel3_event_) != RET_OK) {
+    MS_LOG(ERROR) << "RunKernel failed.";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.h
index 7ed7050a2d0..9f3da53f780 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.h
@@ -32,7 +32,7 @@ class WinogradOpenCLKernel : public Conv2DOpenCLKernel {
 
   ~WinogradOpenCLKernel() override = default;
 
-  void SetConstArgs() override;
+  int SetConstArgs() override;
   void SetGlobalLocal() override;
   int Run() override;
 
@@ -42,8 +42,8 @@ class WinogradOpenCLKernel : public Conv2DOpenCLKernel {
 
  private:
   int BuildKernel() override;
-  void InitFilter() override;
-  void AllocateMemory();
+  int InitFilter() override;
+  int AllocateMemory();
 
   cl::Kernel kernel_4x4to36_;
   cl::Kernel kernel_36to4x4_;
diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc
index 78e6a6842da..bdab2eb6599 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc
@@ -24,7 +24,7 @@ using mindspore::lite::RET_OK;
 using mindspore::lite::opencl::ImageSize;
 
 namespace mindspore::kernel {
-int OpenCLKernel::AlignGlobalLocal(const std::vector<size_t> &global, const std::vector<size_t> &local) {
+void OpenCLKernel::AlignGlobalLocal(const std::vector<size_t> &global, const std::vector<size_t> &local) {
   std::vector<size_t> internal_global_ws = global;
   for (size_t i = 0; i < local.size(); ++i) {
     internal_global_ws.at(i) = UP_ROUND(global.at(i), local.at(i));
@@ -50,16 +50,12 @@ int OpenCLKernel::AlignGlobalLocal(const std::vector<size_t> &global, const std:
     if (!local.empty()) {
       local_range_ = cl::NDRange(local.at(0), local.at(1));
     }
-  } else if (global.size() == 3) {
+  } else if (global.size() >= 3) {
     global_range_ = cl::NDRange(internal_global_ws.at(0), internal_global_ws.at(1), internal_global_ws.at(2));
     if (!local.empty()) {
       local_range_ = cl::NDRange(local.at(0), local.at(1), local.at(2));
     }
-  } else {
-    MS_LOG(ERROR) << "Not supported NDRange!";
-    return RET_ERROR;
   }
-  return RET_OK;
 }
 
 int OpenCLKernel::GetImageSize(size_t idx, lite::opencl::ImageSize *img_size) {
@@ -112,11 +108,17 @@ void OpenCLKernel::PrintOutput(int print_num, const std::string &out_file) {
   auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper();
   auto runtime = runtime_wrapper.GetInstance();
   auto allocator = runtime->GetAllocator();
-  runtime->SyncCommandQueue();
+  if (!runtime->SyncCommandQueue()) {
+    MS_LOG(ERROR) << "SyncCommandQueue failed.";
+  }
   if (mem_type == lite::opencl::MemType::BUF) {
-    allocator->MapBuffer(tensor->data_c(), CL_MAP_READ, nullptr, true);
+    if (allocator->MapBuffer(tensor->data_c(), CL_MAP_READ, nullptr, true) == nullptr) {
+      MS_LOG(ERROR) << "Map Buffer failed.";
+    }
     memcpy(data.data(), tensor->data_c(), img_info.OriginSize);
-    allocator->UnmapBuffer(tensor->data_c());
+    if (allocator->UnmapBuffer(tensor->data_c()) != RET_OK) {
+      MS_LOG(ERROR) << "UnmapBuffer failed.";
+    }
   } else {
     runtime->ReadImage(tensor->data_c(), data.data());
   }
@@ -181,6 +183,7 @@ int OpenCLKernel::PreProcess() {
       }
     }
     output->set_allocator(allocator);
+    output->ResetRefCount();
   }
   return RET_OK;
 }
diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
index 24f10a7aa16..4e17512a38d 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
+++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h
@@ -185,7 +185,7 @@ class OpenCLKernel : public InnerKernel {
     ocl_runtime_ = ocl_runtime_wrap_.GetInstance();
   }
   ~OpenCLKernel() override = default;
-  int AlignGlobalLocal(const std::vector<size_t> &global, const std::vector<size_t> &local);
+  void AlignGlobalLocal(const std::vector<size_t> &global, const std::vector<size_t> &local);
 
   int Prepare() override { return RET_OK; }
   int PreProcess() override;
@@ -194,7 +194,7 @@ class OpenCLKernel : public InnerKernel {
 
   virtual int CheckSpecs();
   virtual int InitWeights() { return RET_OK; }
-  virtual void SetConstArgs() {}
+  virtual int SetConstArgs() { return RET_OK; }
   virtual void SetGlobalLocal() {}
   virtual int GetGlobalSize(size_t idx, std::vector<size_t> *global_size) { return RET_ERROR; }
   virtual int GetLocalSize(size_t idx, const std::vector<size_t> &global_size, std::vector<size_t> *local_size) {
diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc
index 957d89a77db..e1c52e51949 100644
--- a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc
+++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc
@@ -420,6 +420,7 @@ int OpenCLSubGraph::Execute() {
     return ret;
   }
   if (!ocl_runtime_->SyncCommandQueue()) {
+    MS_LOG(ERROR) << "SyncCommandQueue failed.";
     return RET_ERROR;
   }
   return RET_OK;
@@ -449,6 +450,7 @@ int OpenCLSubGraph::Execute(const KernelCallBack &before, const KernelCallBack &
     return ret;
   }
   if (!ocl_runtime_->SyncCommandQueue()) {
+    MS_LOG(ERROR) << "SyncCommandQueue failed.";
     return RET_ERROR;
   }
   return RET_OK;
diff --git a/mindspore/lite/src/runtime/runtime_pass.cc b/mindspore/lite/src/runtime/runtime_pass.cc
index 0954c178d9f..8bb988e3338 100644
--- a/mindspore/lite/src/runtime/runtime_pass.cc
+++ b/mindspore/lite/src/runtime/runtime_pass.cc
@@ -20,60 +20,30 @@
 namespace mindspore::lite {
 void Nc4hw4PassReplace(std::vector<kernel::LiteKernel *> *kernels, std::vector<Tensor *> *tensors, size_t index) {
   kernel::LiteKernel *conv_kernel = kernels->at(index);
-  kernel::LiteKernel *transpose_kernel = conv_kernel->out_kernels().front();
-  kernel::LiteKernel *c4_kernel = transpose_kernel->out_kernels().front();
-  kernel::LiteKernel *transpose2_kernel = c4_kernel->out_kernels().front();
-  std::vector<kernel::LiteKernel *> end_kernels = transpose2_kernel->out_kernels();
+  kernel::LiteKernel *traspose_kernel = conv_kernel->out_kernels().front();
+  kernel::LiteKernel *c4_kernel = traspose_kernel->out_kernels().front();
 
   /* tensor */
-  {
-    /* transpose_kernel */
-    Tensor *transpose_param_tensor = transpose_kernel->in_tensors().at(1);
-    VectorSetNull(tensors, transpose_param_tensor);
-    delete transpose_param_tensor;
-    transpose_param_tensor = nullptr;
+  Tensor *transpose_param_tensor = traspose_kernel->in_tensors().at(1);
+  VectorErase(tensors, transpose_param_tensor);
+  delete transpose_param_tensor;
+  transpose_param_tensor = nullptr;
 
-    Tensor *conv_out_tensor = conv_kernel->out_tensors().front();
-    conv_out_tensor->set_format(NC4HW4);
-    Tensor *c4_input_tensor = c4_kernel->in_tensors().front();
-    c4_kernel->set_in_tensor(conv_out_tensor, 0);
-    VectorSetNull(tensors, c4_input_tensor);
-    delete c4_input_tensor;
-    c4_input_tensor = nullptr;
-  }
-  {
-    /* transpose2_kernel */
-    Tensor *transpose_param_tensor = transpose2_kernel->in_tensors().at(1);
-    VectorSetNull(tensors, transpose_param_tensor);
-    delete transpose_param_tensor;
-    transpose_param_tensor = nullptr;
-
-    Tensor *nwhc_tensor = c4_kernel->out_tensors().front();
-    nwhc_tensor->set_format(NHWC);
-    for (auto end : end_kernels) {
-      end->set_in_tensor(nwhc_tensor, 0);
-    }
-    Tensor *trans_out = transpose2_kernel->out_tensors().front();
-    VectorSetNull(tensors, trans_out);
-    delete trans_out;
-    trans_out = nullptr;
-  }
+  Tensor *conv_out_tensor = conv_kernel->out_tensors().front();
+  conv_out_tensor->set_format(NC4HW4);
+  Tensor *c4_input_tensor = c4_kernel->in_tensors().front();
+  c4_kernel->set_in_tensor(conv_out_tensor, 0);
+  VectorErase(tensors, c4_input_tensor);
+  delete c4_input_tensor;
+  c4_input_tensor = nullptr;
 
   /* kernel */
-  VectorErase(kernels, transpose_kernel);
-  delete transpose_kernel;
-  transpose_kernel = nullptr;
+  VectorErase(kernels, traspose_kernel);
+  delete traspose_kernel;
+  traspose_kernel = nullptr;
   conv_kernel->set_out_kernels({c4_kernel});
   c4_kernel->set_in_kernels({conv_kernel});
 
-  c4_kernel->set_out_kernels(transpose2_kernel->out_kernels());
-  for (auto end : end_kernels) {
-    end->set_in_kernels({c4_kernel});
-  }
-  VectorErase(kernels, transpose2_kernel);
-  delete transpose2_kernel;
-  transpose2_kernel = nullptr;
-
   return;
 }
 
@@ -90,38 +60,27 @@ bool Nc4hw4PassMatch(std::vector<kernel::LiteKernel *> *kernels, size_t index) {
     return false;
   }
 
-  kernel::LiteKernel *traspose_nhwc2nchw_kernel = start_kernel->out_kernels().front();
-  if (traspose_nhwc2nchw_kernel->type() != Nc4hw4FormatTransposeOp) {
+  kernel::LiteKernel *traspose_kernel = start_kernel->out_kernels().front();
+  if (start_kernel->type() != Nc4hw4FormatTransposeOp) {
     return false;
   }
-  if (traspose_nhwc2nchw_kernel->out_kernels().size() != 1) {
+  if (traspose_kernel->out_kernels().size() != 1) {
     return false;
   }
 
-  kernel::LiteKernel *end_kernel = traspose_nhwc2nchw_kernel->out_kernels().front();
+  kernel::LiteKernel *end_kernel = traspose_kernel->out_kernels().front();
   if (IsContain(Nc4hw4FormatInOpList, end_kernel->type()) == false) {
     return false;
   }
-  if (end_kernel->out_kernels().size() != 1) {
-    return false;
-  }
-
-  kernel::LiteKernel *transpose_nchw2nhwc_kernel = end_kernel->out_kernels().front();
-  if (transpose_nchw2nhwc_kernel->type() != Nc4hw4FormatTransposeOp) {
-    return false;
-  }
 
   /* double check ops topological sorted in kernel-list */
   auto start_iter = find(kernels->begin(), kernels->end(), start_kernel);
   auto start_index = std::distance(kernels->begin(), start_iter);
-  auto traspose_nhwc2nchw_iter = find(kernels->begin(), kernels->end(), traspose_nhwc2nchw_kernel);
-  auto traspose_nhwc2nchw_index = std::distance(kernels->begin(), traspose_nhwc2nchw_iter);
+  auto transpose_iter = find(kernels->begin(), kernels->end(), traspose_kernel);
+  auto transpose_index = std::distance(kernels->begin(), transpose_iter);
   auto end_iter = find(kernels->begin(), kernels->end(), end_kernel);
   auto end_index = std::distance(kernels->begin(), end_iter);
-  auto transpose_nchw2nhwc_iter = find(kernels->begin(), kernels->end(), transpose_nchw2nhwc_kernel);
-  auto transpose_nchw2nhwc_index = std::distance(kernels->begin(), transpose_nchw2nhwc_iter);
-  if (start_index > traspose_nhwc2nchw_index || traspose_nhwc2nchw_index > end_index ||
-      end_index > transpose_nchw2nhwc_index) {
+  if (start_index > transpose_index || transpose_index > end_index) {
     return false;
   }
 
@@ -129,31 +88,31 @@ bool Nc4hw4PassMatch(std::vector<kernel::LiteKernel *> *kernels, size_t index) {
 }
 
 bool Nc4hw4PassValid(const InnerContext *context, std::vector<kernel::LiteKernel *> *kernels) {
+  return false;
+
   if (context->IsGpuEnabled() || context->IsNpuEnabled()) {
     return false;
   }
 
   for (auto kernel : *kernels) {
-    if (kernel->op_parameter() != nullptr) {
-      if (kernel->op_parameter()->quant_type_ == schema::QuantType_AwareTraining ||
-          kernel->op_parameter()->quant_type_ == schema::QuantType_PostTraining) {
-        return false;
-      }
+    if (kernel->op_parameter()->quant_type_ == schema::QuantType_AwareTraining ||
+        kernel->op_parameter()->quant_type_ == schema::QuantType_PostTraining) {
+      return false;
     }
   }
-  return false;
+  return true;
 }
 
-void Nc4hw4PassAct(std::vector<kernel::LiteKernel *> *kernels, std::vector<Tensor *> *tensors) {
+void Nc4hw4Pass(std::vector<kernel::LiteKernel *> *kernels, std::vector<Tensor *> *tensors) {
   size_t kernel_size = kernels->size();
   size_t index = 0;
-  for (; index + 3 < kernel_size; index++) {
+  for (; index < kernel_size - 2; index++) {
     kernel::LiteKernel *kernel = kernels->at(index);
 
     if (kernel->subgraph_type() != kernel::kNotSubGraph) {
       kernel::SubGraphKernel *subgraph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
       std::vector<kernel::LiteKernel *> &particial_nodes = subgraph->nodes();
-      Nc4hw4PassAct(&particial_nodes, tensors);
+      Nc4hw4Pass(&particial_nodes, tensors);
     }
 
     if (Nc4hw4PassMatch(kernels, index)) {
@@ -164,11 +123,4 @@ void Nc4hw4PassAct(std::vector<kernel::LiteKernel *> *kernels, std::vector<Tenso
   }
   return;
 }
-
-void Nc4hw4Pass(const InnerContext *context, std::vector<kernel::LiteKernel *> *kernels,
-                std::vector<Tensor *> *tensors) {
-  if (Nc4hw4PassValid(context, kernels)) {
-    Nc4hw4PassAct(kernels, tensors);
-  }
-}
 }  // namespace mindspore::lite
diff --git a/mindspore/lite/src/runtime/runtime_pass.h b/mindspore/lite/src/runtime/runtime_pass.h
index a12d050461c..141c7d8e3c4 100644
--- a/mindspore/lite/src/runtime/runtime_pass.h
+++ b/mindspore/lite/src/runtime/runtime_pass.h
@@ -17,7 +17,6 @@
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_PASS_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_PASS_H_
 
-#ifndef RUNTIME_PASS_CLIP
 #include <vector>
 #include "src/lite_kernel.h"
 #include "src/sub_graph_kernel.h"
@@ -27,15 +26,16 @@
 namespace mindspore::lite {
 
 /* Nc4hw4 PASS
- * before  : --(nhwc)-- CONV --(nhwc)-- TRANSPOSE --(nchw)-- IN --(nchw)-- TRANSPOSE --(nhwc)--
- * after   : --(nhwc)-- CONV --(nc4hw4)-- IN --(nhwc)--
+ * before  :  CONV --(nhwc)-- TRANSPOSE --(nhwc)-- OP
+ * after   :  CONV --(nc4hw4)-- OP
  * */
 static const schema::PrimitiveType Nc4hw4FormatTransposeOp = schema::PrimitiveType_Transpose;
 static const std::vector<schema::PrimitiveType> Nc4hw4FormatOutOpList = {schema::PrimitiveType_Conv2DFusion};
-static const std::vector<schema::PrimitiveType> Nc4hw4FormatInOpList = {schema::PrimitiveType_InstanceNorm};
-void Nc4hw4Pass(const InnerContext *context, std::vector<kernel::LiteKernel *> *kernels,
-                std::vector<Tensor *> *tensors);
+static const std::vector<schema::PrimitiveType> Nc4hw4FormatInOpList = {schema::PrimitiveType_InstanceNorm,
+                                                                        schema::PrimitiveType_PadFusion};
+bool Nc4hw4PassValid(const InnerContext *context, std::vector<kernel::LiteKernel *> *kernels);
+void Nc4hw4Pass(std::vector<kernel::LiteKernel *> *kernels, std::vector<Tensor *> *tensors);
 
 }  // namespace mindspore::lite
-#endif
+
 #endif  // MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_PASS_H_
diff --git a/mindspore/lite/src/scheduler.cc b/mindspore/lite/src/scheduler.cc
index e55b112dcc6..6b6793d2d0c 100644
--- a/mindspore/lite/src/scheduler.cc
+++ b/mindspore/lite/src/scheduler.cc
@@ -34,6 +34,7 @@
 #include "src/common/prim_util.h"
 #include "src/common/tensor_util.h"
 #include "src/runtime/infer_manager.h"
+#include "src/runtime/runtime_pass.h"
 #include "src/sub_graph_split.h"
 #include "src/weight_decoder.h"
 #include "src/runtime/kernel/arm/fp16/fp16_op_handler.h"
@@ -61,15 +62,6 @@ kernel::SubGraphKernel *CreateCustomSubGraph(std::vector<kernel::LiteKernel *> &
 }
 }  // namespace
 
-void Scheduler::SetSubgraphForPartialNode() {
-  for (auto &pair : partial_kernel_subgraph_index_map_) {
-    auto &partial_kernel = pair.first;
-    auto &subgraph_index = pair.second;
-    static_cast<kernel::PartialFusionKernel *>(partial_kernel->kernel())
-      ->set_subgraph_kernel(subgraph_index_subgraph_kernel_map_.at(subgraph_index));
-  }
-}
-
 int Scheduler::InitKernels(std::vector<kernel::LiteKernel *> dst_kernels) {
   if (is_train_session_) {
     return RET_OK;
@@ -117,9 +109,14 @@ int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
   }
 
   if (context_->enable_parallel_ && infershape_ret != RET_INFER_INVALID) {
+#ifdef ENABLE_AUTO_PARALLEL
     auto search_sub_graph =
       SearchSubGraph(context_, src_model_, src_tensors_, &op_parameters_, &graph_output_node_indexes_);
     search_sub_graph.SubGraphSplit();
+#else
+    MS_LOG(ERROR) << unsupport_auto_parallel_log;
+    return RET_NOT_SUPPORT;
+#endif
   }
 
   int ret = ScheduleGraphToKernels(dst_kernels);
@@ -129,7 +126,9 @@ int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
     return ret;
   }
 
+#ifdef ENABLE_CONTROL_TENSORLIST
   SetSubgraphForPartialNode();
+#endif
   if (delegate_ != nullptr) {
     ret = ReplaceDelegateKernels(dst_kernels);
     if (ret != RET_OK) {
@@ -137,8 +136,13 @@ int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
       return ret;
     }
   }
-  FindAllInoutKernels(*dst_kernels);
 
+  if (Nc4hw4PassValid(context_, dst_kernels)) {
+    Nc4hw4Pass(dst_kernels, src_tensors_);
+  }
+
+  FindAllInoutKernels(*dst_kernels);
+#ifdef ENABLE_CONTROL_TENSORLIST
   if (IsControlFlowParttern(*dst_kernels)) {
     ret = ConstructControlFlowMainGraph(dst_kernels);
     if (ret != RET_OK) {
@@ -146,6 +150,7 @@ int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
       return ret;
     }
   } else {
+#endif
     auto src_kernel = *dst_kernels;
     dst_kernels->clear();
     std::map<const kernel::LiteKernel *, bool> is_kernel_finish;
@@ -154,7 +159,9 @@ int Scheduler::Schedule(std::vector<kernel::LiteKernel *> *dst_kernels) {
       MS_LOG(ERROR) << "ConstructSubGraphs failed.";
       return ret;
     }
+#ifdef ENABLE_CONTROL_TENSORLIST
   }
+#endif
 
   ret = InitKernels(*dst_kernels);
   if (ret != RET_OK) {
@@ -332,19 +339,6 @@ int Scheduler::RestoreSubGraphInput(const lite::Model::Node *partial_node) {
   return RET_OK;
 }
 
-void CopyTensorList(TensorList *dst_tensor, TensorList *src_tensor) {
-  dst_tensor->set_data_type(src_tensor->data_type());
-  dst_tensor->set_format(src_tensor->format());
-  dst_tensor->set_element_shape(src_tensor->element_shape());
-  dst_tensor->set_shape(src_tensor->shape());
-  std::vector<Tensor *> cpy_tensors{};
-  for (auto &tensor : src_tensor->tensors()) {
-    auto new_tensor = Tensor::CopyTensor(*tensor, false);
-    cpy_tensors.push_back(new_tensor);
-  }
-  dst_tensor->set_tensors(cpy_tensors);
-}
-
 void CopyCommonTensor(Tensor *dst_tensor, Tensor *src_tensor) {
   dst_tensor->set_data_type(src_tensor->data_type());
   dst_tensor->set_shape(src_tensor->shape());
@@ -396,36 +390,6 @@ int Scheduler::InferPartialShape(const lite::Model::Node *node) {
   return ret;
 }
 
-int Scheduler::InferSwitchShape(const lite::Model::Node *switch_node) {
-  MS_ASSERT(src_model_ != nullptr);
-  MS_ASSERT(switch_node != nullptr);
-  if (!IsSwitchNode(switch_node->primitive_)) {
-    MS_LOG(ERROR) << "Node is not a switch";
-    return RET_PARAM_INVALID;
-  }
-  std::deque<lite::Model::Node *> partial_cnode_to_infer{};
-  auto true_branch_output_index = switch_node->input_indices_.at(kSwitchTrueBranch);
-  auto false_branch_output_index = switch_node->input_indices_.at(kSwitchFalseBranch);
-  for (auto &node : src_model_->all_nodes_) {
-    if ((IsContain(node->output_indices_, true_branch_output_index) ||
-         IsContain(node->output_indices_, false_branch_output_index)) &&
-        IsPartialNode(node->primitive_) && partial_cnode_inferred_.find(node) == partial_cnode_inferred_.end()) {
-      partial_cnode_inferred_.insert(node);
-      partial_cnode_to_infer.push_back(node);
-    }
-  }
-
-  while (!partial_cnode_to_infer.empty()) {
-    auto &node = partial_cnode_to_infer.front();
-    partial_cnode_to_infer.pop_front();
-    int ret = InferPartialShape(node);
-    if (ret != RET_OK) {
-      MS_LOG(WARNING) << "partial infer not ok, ret: " << ret;
-    }
-  }
-  return RET_OK;
-}
-
 Model::Node *Scheduler::NodeInputIsPartial(const lite::Model::Node *node) {
   MS_ASSERT(src_model_ != nullptr);
   MS_ASSERT(node != nullptr);
@@ -441,21 +405,6 @@ Model::Node *Scheduler::NodeInputIsPartial(const lite::Model::Node *node) {
   return nullptr;
 }
 
-Model::Node *Scheduler::NodeInputIsSwitch(const lite::Model::Node *node) {
-  MS_ASSERT(src_model_ != nullptr);
-  MS_ASSERT(node != nullptr);
-  for (auto &iter : src_model_->all_nodes_) {
-    if (iter->output_indices_ == node->input_indices_) {
-      if (IsSwitchNode(iter->primitive_)) {
-        return iter;
-      } else {
-        return nullptr;
-      }
-    }
-  }
-  return nullptr;
-}
-
 int Scheduler::InferCallShape(const lite::Model::Node *node) {
   MS_ASSERT(src_model_ != nullptr);
   MS_ASSERT(node != nullptr);
@@ -468,11 +417,12 @@ int Scheduler::InferCallShape(const lite::Model::Node *node) {
   if (partial_input) {
     return InferPartialShape(partial_input);
   }
-
+#ifdef ENABLE_CONTROL_TENSORLIST
   auto switch_input = NodeInputIsSwitch(node);
   if (switch_input) {
     return InferSwitchShape(switch_input);
   }
+#endif
 
   MS_LOG(ERROR) << "call input is not partial and also not switch.";
   return RET_ERROR;
@@ -1090,12 +1040,6 @@ kernel::LiteKernel *Scheduler::ScheduleNodeToKernel(const lite::Model::Node *src
   return kernel;
 }
 
-bool Scheduler::SubGraphHasScheduled(const int &index) {
-  return scheduled_subgraph_index_.find(index) != scheduled_subgraph_index_.end();
-}
-
-void Scheduler::SubGraphMarkScheduled(const int &index) { scheduled_subgraph_index_.insert(index); }
-
 bool Scheduler::IsControlFlowPattern(const lite::Model::Node &partial_node) {
   lite::Model::Node *partial_node_output = nullptr;
   for (auto output_index : partial_node.output_indices_) {
@@ -1147,6 +1091,7 @@ int Scheduler::ScheduleSubGraphToKernels(size_t subgraph_index, std::vector<kern
 
     if (IsPartialNode(primitive)) {
       if (IsControlFlowPattern(*node)) {
+#ifdef ENABLE_CONTROL_TENSORLIST
         kernel = ScheduleNodeToKernel(node, prefer_data_type);
         auto partial_subgraph_index = GetPartialGraphIndex(primitive);
         if (SubGraphHasScheduled(partial_subgraph_index)) {
@@ -1157,6 +1102,10 @@ int Scheduler::ScheduleSubGraphToKernels(size_t subgraph_index, std::vector<kern
           partial_kernel_subgraph_index_map_[kernel] = partial_subgraph_index;
           subgraphs_to_schedule_.push_back(partial_subgraph_index);
         }
+#else
+        MS_LOG(ERROR) << unsupport_control_tensorlist_log;
+        return RET_ERROR;
+#endif
       } else {
         kernel = SchedulePartialToKernel(node);
       }
@@ -1283,6 +1232,7 @@ TypeId Scheduler::GetFirstFp32Fp16OrInt8Type(const std::vector<Tensor *> &in_ten
     if (dtype == kObjectTypeString) {
       return kNumberTypeFloat32;
     }
+#ifdef ENABLE_CONTROL_TENSORLIST
     if (dtype == kObjectTypeTensorType) {
       auto tensor_list = reinterpret_cast<TensorList *>(tensor);
       auto tensor_list_dtype = tensor_list->tensors_data_type();
@@ -1292,6 +1242,7 @@ TypeId Scheduler::GetFirstFp32Fp16OrInt8Type(const std::vector<Tensor *> &in_ten
         return tensor_list_dtype;
       }
     }
+#endif
     if (dtype == kNumberTypeFloat32 || dtype == kNumberTypeFloat16 || dtype == kNumberTypeInt8 ||
         dtype == kNumberTypeInt32 || dtype == kNumberTypeBool) {
       return dtype;
@@ -1366,6 +1317,80 @@ kernel::SubGraphType Scheduler::PartialSubGraphType(const std::vector<kernel::Li
   return kernel::kCpuFP32SubGraph;
 }
 
+#ifdef ENABLE_CONTROL_TENSORLIST
+int Scheduler::InferSwitchShape(const lite::Model::Node *switch_node) {
+  MS_ASSERT(src_model_ != nullptr);
+  MS_ASSERT(switch_node != nullptr);
+  if (!IsSwitchNode(switch_node->primitive_)) {
+    MS_LOG(ERROR) << "Node is not a switch";
+    return RET_PARAM_INVALID;
+  }
+  std::deque<lite::Model::Node *> partial_cnode_to_infer{};
+  auto true_branch_output_index = switch_node->input_indices_.at(kSwitchTrueBranch);
+  auto false_branch_output_index = switch_node->input_indices_.at(kSwitchFalseBranch);
+  for (auto &node : src_model_->all_nodes_) {
+    if ((IsContain(node->output_indices_, true_branch_output_index) ||
+         IsContain(node->output_indices_, false_branch_output_index)) &&
+        IsPartialNode(node->primitive_) && partial_cnode_inferred_.find(node) == partial_cnode_inferred_.end()) {
+      partial_cnode_inferred_.insert(node);
+      partial_cnode_to_infer.push_back(node);
+    }
+  }
+
+  while (!partial_cnode_to_infer.empty()) {
+    auto &node = partial_cnode_to_infer.front();
+    partial_cnode_to_infer.pop_front();
+    int ret = InferPartialShape(node);
+    if (ret != RET_OK) {
+      MS_LOG(WARNING) << "partial infer not ok, ret: " << ret;
+    }
+  }
+  return RET_OK;
+}
+
+Model::Node *Scheduler::NodeInputIsSwitch(const lite::Model::Node *node) {
+  MS_ASSERT(src_model_ != nullptr);
+  MS_ASSERT(node != nullptr);
+  for (auto &iter : src_model_->all_nodes_) {
+    if (iter->output_indices_ == node->input_indices_) {
+      if (IsSwitchNode(iter->primitive_)) {
+        return iter;
+      } else {
+        return nullptr;
+      }
+    }
+  }
+  return nullptr;
+}
+
+bool Scheduler::SubGraphHasScheduled(const int &index) {
+  return scheduled_subgraph_index_.find(index) != scheduled_subgraph_index_.end();
+}
+
+void Scheduler::SubGraphMarkScheduled(const int &index) { scheduled_subgraph_index_.insert(index); }
+
+void Scheduler::SetSubgraphForPartialNode() {
+  for (auto &pair : partial_kernel_subgraph_index_map_) {
+    auto &partial_kernel = pair.first;
+    auto &subgraph_index = pair.second;
+    static_cast<kernel::PartialFusionKernel *>(partial_kernel->kernel())
+      ->set_subgraph_kernel(subgraph_index_subgraph_kernel_map_.at(subgraph_index));
+  }
+}
+
+void CopyTensorList(TensorList *dst_tensor, TensorList *src_tensor) {
+  dst_tensor->set_data_type(src_tensor->data_type());
+  dst_tensor->set_format(src_tensor->format());
+  dst_tensor->set_element_shape(src_tensor->element_shape());
+  dst_tensor->set_shape(src_tensor->shape());
+  std::vector<Tensor *> cpy_tensors{};
+  for (auto &tensor : src_tensor->tensors()) {
+    auto new_tensor = Tensor::CopyTensor(*tensor, false);
+    cpy_tensors.push_back(new_tensor);
+  }
+  dst_tensor->set_tensors(cpy_tensors);
+}
+
 bool Scheduler::IsControlFlowParttern(const std::vector<kernel::LiteKernel *> &kernels) {
   if (std::any_of(kernels.begin(), kernels.end(), [](kernel::LiteKernel *item) {
         if (item->op_parameter()) {
@@ -1398,4 +1423,5 @@ int Scheduler::ConstructControlFlowMainGraph(std::vector<kernel::LiteKernel *> *
   kernels->insert(kernels->begin(), subgraph_kernel);
   return RET_OK;
 }
+#endif
 }  // namespace mindspore::lite
diff --git a/mindspore/lite/src/scheduler.h b/mindspore/lite/src/scheduler.h
index 3ef86742667..077e1d65836 100644
--- a/mindspore/lite/src/scheduler.h
+++ b/mindspore/lite/src/scheduler.h
@@ -56,8 +56,6 @@ class Scheduler {
   void FindNodeInoutTensors(const Model::Node &node, std::vector<Tensor *> *inputs, std::vector<Tensor *> *outputs);
   Model::Node *NodeInputIsPartial(const Model::Node *node);
   int InferPartialShape(const Model::Node *node);
-  Model::Node *NodeInputIsSwitch(const Model::Node *node);
-  int InferSwitchShape(const Model::Node *node);
   int InferCallShape(const Model::Node *node);
   int InferNodeShape(const Model::Node *node);
   int InferSubGraphShape(size_t subgraph_index);
@@ -95,19 +93,24 @@ class Scheduler {
   std::vector<kernel::LiteKernel *> ScheduleMainSubGraphToKernels();
   kernel::LiteKernel *SchedulePartialToSubGraphKernel(const int &subgraph_index);
   kernel::SubGraphType PartialSubGraphType(const std::vector<kernel::LiteKernel *> &kernels);
-  bool IsControlFlowParttern(const std::vector<kernel::LiteKernel *> &kernels);
-  int ConstructControlFlowMainGraph(std::vector<kernel::LiteKernel *> *kernels);
 
   // other methods
   static TypeId GetFirstFp32Fp16OrInt8Type(const std::vector<Tensor *> &in_tensors);
   static void SetKernelTensorDataType(kernel::LiteKernel *kernel);
   int CopyPartialShapeToSubGraph(const lite::Model::Node *partial_node);
   int RestoreSubGraphInput(const lite::Model::Node *partial_node);
+
+  bool IsControlFlowPattern(const lite::Model::Node &partial_node);
+  int SubGraphPreferDataType(const int &subgraph_index, TypeId *prefer_data_type);
+#ifdef ENABLE_CONTROL_TENSORLIST
+  int InferSwitchShape(const Model::Node *node);
+  Model::Node *NodeInputIsSwitch(const Model::Node *node);
   bool SubGraphHasScheduled(const int &index);
   void SubGraphMarkScheduled(const int &index);
   void SetSubgraphForPartialNode();
-  bool IsControlFlowPattern(const lite::Model::Node &partial_node);
-  int SubGraphPreferDataType(const int &subgraph_index, TypeId *prefer_data_type);
+  bool IsControlFlowParttern(const std::vector<kernel::LiteKernel *> &kernels);
+  int ConstructControlFlowMainGraph(std::vector<kernel::LiteKernel *> *kernels);
+#endif
 
  protected:
   const InnerContext *context_ = nullptr;
@@ -124,11 +127,13 @@ class Scheduler {
   std::unique_ptr<SchedulerCb> sched_cb_;
   std::map<kernel::Kernel *, const schema::Primitive *> primitives_;
   std::shared_ptr<Delegate> delegate_ = nullptr;
-  std::set<int> scheduled_subgraph_index_{};
   std::deque<int> subgraphs_to_schedule_{};
-  std::unordered_map<kernel::LiteKernel *, size_t> partial_kernel_subgraph_index_map_{};
   std::unordered_map<size_t, kernel::LiteKernel *> subgraph_index_subgraph_kernel_map_{};
+#ifdef ENABLE_CONTROL_TENSORLIST
+  std::set<int> scheduled_subgraph_index_{};
+  std::unordered_map<kernel::LiteKernel *, size_t> partial_kernel_subgraph_index_map_{};
   std::set<lite::Model::Node *> partial_cnode_inferred_{};
+#endif
 };
 }  // namespace mindspore::lite
 
diff --git a/mindspore/lite/src/sub_graph_kernel.cc b/mindspore/lite/src/sub_graph_kernel.cc
index c75b955fea9..4e8b7637238 100644
--- a/mindspore/lite/src/sub_graph_kernel.cc
+++ b/mindspore/lite/src/sub_graph_kernel.cc
@@ -144,9 +144,9 @@ void SubGraphKernel::InitInputTensorInitRefCount() {
   }
 }
 
-void SubGraphKernel::InitOutTensorInitRefCount() {
+void SubGraphKernel::InitOutTensorInitRefCount(const std::vector<LiteKernel *> *mask_kernels) {
   for (auto *node : nodes_) {
-    node->InitOutTensorInitRefCount();
+    node->InitOutTensorInitRefCount(mask_kernels);
   }
 }
 
@@ -221,14 +221,6 @@ int CpuSubGraph::Prepare() {
 
 int CpuSubGraph::Execute(const KernelCallBack &before, const KernelCallBack &after) {
   MS_ASSERT(this->Context()->allocator.get() != nullptr);
-#ifdef SUPPORT_GPU
-  // In heterogeneous scenarios of CPU and GPU, call MutableData to MapBuffer(synchronize data).
-  if (this->Context()->IsGpuEnabled()) {
-    for (auto tensor : this->in_tensors()) {
-      tensor->MutableData();
-    }
-  }
-#endif
 
   for (auto *kernel : nodes_) {
     MS_ASSERT(kernel != nullptr);
diff --git a/mindspore/lite/src/sub_graph_kernel.h b/mindspore/lite/src/sub_graph_kernel.h
index 0200b2ebd8b..647c1a075ef 100644
--- a/mindspore/lite/src/sub_graph_kernel.h
+++ b/mindspore/lite/src/sub_graph_kernel.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef MINDSPORE_LITE_SRC_SUB_GRAPH_H
-#define MINDSPORE_LITE_SRC_SUB_GRAPH_H
+#ifndef MINDSPORE_LITE_SRC_SUB_GRAPH_KERNEL_H_
+#define MINDSPORE_LITE_SRC_SUB_GRAPH_KERNEL_H_
 
 #include <atomic>
 #include <utility>
@@ -101,7 +101,7 @@ class SubGraphKernel : public LiteKernel {
   // called after Run
   int ReSize() override;
 
-  void InitOutTensorInitRefCount() override;
+  void InitOutTensorInitRefCount(const std::vector<LiteKernel *> *mask_kernels) override;
 
   void InitInputTensorInitRefCount();
 
@@ -109,7 +109,7 @@ class SubGraphKernel : public LiteKernel {
 
   std::string ToString() const override;
 
-  std::vector<LiteKernel *> nodes() { return this->nodes_; }
+  std::vector<LiteKernel *> &nodes() { return this->nodes_; }
 
   void DropNode(LiteKernel *node);
 
@@ -226,4 +226,4 @@ class CustomSubGraph : public SubGraphKernel {
   int Execute(const KernelCallBack &before, const KernelCallBack &after) override;
 };
 }  // namespace mindspore::kernel
-#endif  // MINDSPORE_LITE_SRC_SUB_GRAPH_H
+#endif  // MINDSPORE_LITE_SRC_SUB_GRAPH_KERNEL_H_
diff --git a/mindspore/lite/src/tensor.cc b/mindspore/lite/src/tensor.cc
index 93822eb96e3..8dc10b2e0af 100644
--- a/mindspore/lite/src/tensor.cc
+++ b/mindspore/lite/src/tensor.cc
@@ -316,7 +316,9 @@ void Tensor::FreeData() {
     this->data_ = nullptr;
   } else {
     allocator_->Free(this->data_);
-    this->data_ = nullptr;
+    if (!IS_STATIC_ALLOCATOR(allocator_) || (allocator_->RefCount(this->data_) != 0)) {
+      this->data_ = nullptr;
+    }
   }
 }
 
diff --git a/mindspore/lite/src/tensor.h b/mindspore/lite/src/tensor.h
index 1933aeec957..86cdd64c305 100644
--- a/mindspore/lite/src/tensor.h
+++ b/mindspore/lite/src/tensor.h
@@ -34,17 +34,20 @@
 
 namespace mindspore {
 namespace lite {
+
+#define STATIC_ALLOCATION -271964
+#define IS_STATIC_ALLOCATOR(allocator) ((allocator != nullptr) && (allocator->RefCount(nullptr) == STATIC_ALLOCATION))
 struct LiteQuantParam {
   double scale;
   int32_t zeroPoint;
   float var_corr{1};
   float mean_corr{0};
-  bool inited;
+  bool inited{false};
   std::vector<float> clusters{};
-  int bitNum;
-  int roundType;
-  int multiplier;
-  int dstDtype;
+  int bitNum{8};
+  int roundType{1};
+  int multiplier{1};
+  int dstDtype{32};
 };
 
 class Tensor : public mindspore::tensor::MSTensor {
@@ -133,7 +136,6 @@ class Tensor : public mindspore::tensor::MSTensor {
   void set_format(mindspore::Format format) override { this->format_ = format; }
 
   mindspore::Format format() const override { return this->format_; }
-
   virtual int ref_count() const { return ref_count_; }
 
   virtual int init_ref_count() const { return this->init_ref_count_; }
diff --git a/mindspore/lite/src/tensorlist.h b/mindspore/lite/src/tensorlist.h
index d03ee57bd2d..e2474eb4d76 100644
--- a/mindspore/lite/src/tensorlist.h
+++ b/mindspore/lite/src/tensorlist.h
@@ -24,7 +24,7 @@
 #include "src/common/log_adapter.h"
 #include "schema/model_generated.h"
 #include "src/tensor.h"
-
+#ifdef ENABLE_CONTROL_TENSORLIST
 namespace mindspore::lite {
 /**
  * Tensorlist is a container of vector, in which each element is a tensor object.
@@ -177,5 +177,5 @@ class TensorList : public Tensor {
   int max_elements_num_ = -1;
 };
 }  // namespace mindspore::lite
-
+#endif
 #endif  // MINDSPORE_LITE_SRC_TENSORLIST_H_
diff --git a/mindspore/lite/src/train/train_session.cc b/mindspore/lite/src/train/train_session.cc
index 8a5514be5a7..d6601eafcde 100644
--- a/mindspore/lite/src/train/train_session.cc
+++ b/mindspore/lite/src/train/train_session.cc
@@ -24,22 +24,25 @@
 #include <memory>
 #include <map>
 #include "include/errorcode.h"
-#include "src/common/utils.h"
-#include "src/tensor.h"
-#include "src/lite_model.h"
-#include "src/train/loss_kernel.h"
-#include "src/train/optimizer_kernel.h"
-#include "src/sub_graph_kernel.h"
-#include "src/train/train_populate_parameter.h"
-#include "src/train/train_populate_parameter_v0.h"
 #include "src/executor.h"
+#include "src/lite_model.h"
+#include "src/lite_kernel_util.h"
+#include "src/sub_graph_kernel.h"
+#include "src/tensor.h"
 #include "src/kernel_registry.h"
+#include "src/common/prim_util.h"
+#include "src/common/tensor_util.h"
+#include "src/common/utils.h"
 #include "src/runtime/kernel/arm/fp32_grad/convolution.h"
 #include "src/runtime/kernel/arm/fp32/batchnorm_fp32.h"
-#include "src/common/tensor_util.h"
+#include "src/train/loss_kernel.h"
+#include "src/train/optimizer_kernel.h"
 #include "src/train/train_utils.h"
 #include "src/train/train_export.h"
-#include "src/common/prim_util.h"
+#include "src/train/opt_allocator.h"
+#include "src/train/static_allocator.h"
+#include "src/train/train_populate_parameter.h"
+#include "src/train/train_populate_parameter_v0.h"
 
 namespace mindspore {
 namespace lite {
@@ -67,6 +70,7 @@ int TrainSession::Init(const Context *context, const TrainCfg *train_cfg) {
     }
     cfg_ = *train_cfg;
   }
+  allocator_ = context->allocator;
   return lite::LiteSession::Init(context);
 }
 
@@ -158,6 +162,51 @@ int TrainSession::InitCallBack() {
   return RET_OK;
 }
 
+int TrainSession::AllocTensors(const std::vector<kernel::LiteKernel *> &kernels) {
+  if (!IS_STATIC_ALLOCATOR(allocator_)) return RET_OK;
+  OptAllocator allocator;
+  std::unordered_map<lite::Tensor *, int> ref_count;
+  std::unordered_map<lite::Tensor *, size_t> offset_map;
+  for (auto kernel : kernels) {
+    for (auto tensor : kernel->out_tensors()) {
+      size_t size = tensor->Size();
+      size_t offset = allocator.Malloc(size);
+      offset_map[tensor] = offset;
+      ref_count[tensor] = tensor->init_ref_count();
+    }
+    for (auto tensor : kernel->in_tensors()) {
+      if (tensor->category() == lite::Tensor::VAR) {
+        int count = ref_count[tensor] - 1;
+        ref_count[tensor] = count;
+        if (count == 0) {
+          allocator.Free(offset_map[tensor]);
+        }
+      }
+    }
+  }
+  // Set Tensor data
+  if (tensors_data_ == nullptr) {
+    auto size = allocator.total_size();
+    auto buf = malloc(size);
+    if (buf == nullptr) {
+      MS_LOG(ERROR) << "cannot allocate buffer size" << size;
+      return RET_ERROR;
+    }
+    StaticAllocator *alloc = reinterpret_cast<StaticAllocator *>(allocator_.get());
+    alloc->SetContex(buf, size);
+    tensors_data_ = buf;
+  }
+  for (auto kernel : train_kernels_) {
+    for (auto tensor : kernel->out_tensors()) {
+      auto it = offset_map.find(tensor);
+      if (it != offset_map.end()) {
+        tensor->set_data(reinterpret_cast<void *>(reinterpret_cast<char *>(tensors_data_) + it->second));
+      }
+    }
+  }
+  return RET_OK;
+}
+
 int TrainSession::CompileGraph(lite::Model *model) { return lite::RET_ERROR; }
 
 int TrainSession::CompileTrainGraph(std::shared_ptr<Model> model) {
@@ -193,10 +242,21 @@ int TrainSession::CompileTrainGraph(std::shared_ptr<Model> model) {
     MS_LOG(ERROR) << "failed to allocate space";
     return RET_ERROR;
   }
+  ret = AllocTensors(train_kernels_);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "failed to allocate space";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
-TrainSession::~TrainSession() { FreeWorkSpace(); }
+TrainSession::~TrainSession() {
+  FreeWorkSpace();
+  if (tensors_data_ != nullptr) {
+    free(tensors_data_);
+    tensors_data_ = nullptr;
+  }
+}
 
 int TrainSession::ExecKernels(const KernelCallBack &before, const KernelCallBack &after,
                               const std::vector<kernel::LiteKernel *> &run_kernels) {
@@ -412,6 +472,19 @@ int TrainSession::Train() {
   output_node_map_ = train_output_node_map_;
   output_tensor_map_ = train_output_tensor_map_;
   output_tensor_names_ = train_output_tensor_names_;
+  kernel::LiteKernelUtil::InitTensorInitRefCount(train_kernels_);
+  for (auto &ms_tensors : eval_output_node_map_) {  // Allow to look at prediction also during training
+    for (auto &ms_tensor : ms_tensors.second) {
+      lite::Tensor *lite_tensor = static_cast<lite::Tensor *>(ms_tensor);
+      lite_tensor->set_init_ref_count(lite_tensor->init_ref_count() + 1);
+    }
+  }
+  // allocate tensors
+  auto ret = AllocTensors(train_kernels_);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "failed to allocate tensor space";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
@@ -431,6 +504,18 @@ int TrainSession::Eval() {
   output_node_map_ = eval_output_node_map_;
   output_tensor_map_ = eval_output_tensor_map_;
   output_tensor_names_ = eval_output_tensor_names_;
+  kernel::LiteKernelUtil::InitTensorInitRefCount(inference_kernels_);
+  for (auto &ms_tensors : eval_output_node_map_) {
+    for (auto &ms_tensor : ms_tensors.second) {
+      lite::Tensor *lite_tensor = static_cast<lite::Tensor *>(ms_tensor);
+      lite_tensor->set_init_ref_count(lite_tensor->init_ref_count() + 1);
+    }
+  }
+  auto ret = AllocTensors(inference_kernels_);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "failed to allocate space";
+    return RET_ERROR;
+  }
   return RET_OK;
 }
 
@@ -766,7 +851,12 @@ session::LiteSession *session::TrainSession::CreateTrainSession(const std::strin
     MS_LOG(ERROR) << "create session failed";
     return nullptr;
   }
-
+  if (context->allocator == nullptr) {
+    const_cast<lite::Context *>(context)->allocator = std::shared_ptr<Allocator>(new (std::nothrow) StaticAllocator());
+    if (context->allocator == nullptr) {
+      MS_LOG(ERROR) << " cannot convert to static allocation";
+    }
+  }
   auto ret = session->Init(context, cfg);
   if (ret != mindspore::lite::RET_OK) {
     MS_LOG(ERROR) << "init session failed";
diff --git a/mindspore/lite/src/train/train_session.h b/mindspore/lite/src/train/train_session.h
index a21ab9f07f3..257d29180cc 100644
--- a/mindspore/lite/src/train/train_session.h
+++ b/mindspore/lite/src/train/train_session.h
@@ -147,6 +147,7 @@ class TrainSession : virtual public lite::LiteSession {
   void FreeRestoreTensors();
   bool AllInputsNeedScale(kernel::LiteKernel *kernel);
   void FreeWorkSpace();
+  int AllocTensors(const std::vector<kernel::LiteKernel *> &kernels);
 
   std::map<Tensor *, Tensor *> restored_origin_tensors_;
   int virtual_batch_idx_ = 0;
@@ -155,6 +156,8 @@ class TrainSession : virtual public lite::LiteSession {
   void *workspace_ = nullptr;
   SchedCallBack sched_mix_precision_callback_;
   bool train_mode_ = false;
+  void *tensors_data_ = nullptr;
+  std::shared_ptr<Allocator> allocator_;
 };
 
 }  // namespace lite
diff --git a/mindspore/lite/src/weight_decoder.cc b/mindspore/lite/src/weight_decoder.cc
index aca7b1ca5a3..589d2284454 100644
--- a/mindspore/lite/src/weight_decoder.cc
+++ b/mindspore/lite/src/weight_decoder.cc
@@ -20,11 +20,13 @@
 #include "src/huffman_decode.h"
 
 namespace mindspore::lite {
+constexpr int kBit8 = 8;
+constexpr int kBit32 = 32;
 std::vector<bool> StringToBitVector(const std::string &str) {
-  std::vector<bool> vec(str.size() * 8);
+  std::vector<bool> vec(str.size() * kBit8);
   size_t index = 0;
   for (auto ch : str) {
-    for (size_t shift = 8; shift > 0; shift--) {
+    for (size_t shift = kBit8; shift > 0; shift--) {
       vec[index++] = (ch >> (shift - 1)) & 0x1;
     }
   }
@@ -47,7 +49,7 @@ STATUS IndexingDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor)
   if (unique_value_cnt == 0) {
     unique_value_cnt = 1 << bit_num;
   }
-  // parse unique_value_set;
+  // parse unique_value_set
   std::vector<int> unique_values;
   for (size_t i = 0; i < unique_value_cnt; i++) {
     int unique_value = 0;
@@ -81,7 +83,7 @@ STATUS IndexingDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor)
     return RET_NULL_PTR;
   }
   auto dst_data = dst_tensor->data_c();
-  if (bit_num <= 8) {
+  if (bit_num <= kBit8) {
     ret = UnIndexTensorData<int8_t>(unique_values, unique_value_index_vec, dst_data, dst_tensor->Size());
   } else {
     ret = UnIndexTensorData<int16_t>(unique_values, unique_value_index_vec, dst_data, dst_tensor->Size());
@@ -102,15 +104,15 @@ STATUS SparseDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor) {
   size_t index = 0;
   // parse coor_best_bit
   size_t coor_best_bit = 0;
-  for (size_t i = 0; i < 8; i++) {
+  for (size_t i = 0; i < kBit8; i++) {
     bool bit = bit_vec[index++];
-    coor_best_bit |= bit << (8 - i - 1);
+    coor_best_bit |= bit << (kBit8 - i - 1);
   }
   // parse nz_cnt
   size_t nz_cnt = 0;
-  for (size_t i = 0; i < 32; i++) {
+  for (size_t i = 0; i < kBit32; i++) {
     bool bit = bit_vec[index++];
-    nz_cnt |= bit << (32 - i - 1);
+    nz_cnt |= bit << (kBit32 - i - 1);
   }
   // parse unique_value cnt
   size_t unique_value_cnt = 0;
@@ -167,7 +169,7 @@ STATUS SparseDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor) {
   }
   auto dst_data = dst_tensor->data_c();
 
-  if (bit_num <= 8) {
+  if (bit_num <= kBit8) {
     ret = UnSparseTensorData<int8_t>(unique_values, unique_value_index_vec, coor_vec, src_tensor.quantParams(),
                                      elem_cnt, coor_best_bit, dst_data, dst_tensor->Size());
   } else {
@@ -233,6 +235,7 @@ int WeightDecoder::DequantWeight(lite::Tensor *input_tensor, bool channel_first,
   return RET_OK;
 }
 
+#ifdef ENABLE_HUFFMAN_DECODE
 int WeightDecoder::DecodeHuffmanCode(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor) {
   MS_ASSERT(dst_tensor != nullptr);
   if (!dst_tensor->IsConst() || !src_tensor.enableHuffmanCode()) {
@@ -262,6 +265,7 @@ int WeightDecoder::DecodeHuffmanCode(const schema::Tensor &src_tensor, lite::Ten
   }
   return RET_OK;
 }
+#endif
 
 int WeightDecoder::UnPackToInt(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor) {
   MS_ASSERT(dst_tensor != nullptr);
diff --git a/mindspore/lite/src/weight_decoder.h b/mindspore/lite/src/weight_decoder.h
index e8fd3c96454..0d4097f62a9 100644
--- a/mindspore/lite/src/weight_decoder.h
+++ b/mindspore/lite/src/weight_decoder.h
@@ -128,7 +128,9 @@ class WeightDecoder {
 
   static int UnPackToInt(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor);
 
+#ifdef ENABLE_HUFFMAN_DECODE
   static int DecodeHuffmanCode(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor);
+#endif
 
   static int DequantNode(OpParameter *op_parameter, const std::vector<Tensor *> &in_tensors, TypeId dst_data_type);
 
diff --git a/mindspore/lite/test/CMakeLists.txt b/mindspore/lite/test/CMakeLists.txt
index 1f78a5cb6f9..ef6612e9e97 100644
--- a/mindspore/lite/test/CMakeLists.txt
+++ b/mindspore/lite/test/CMakeLists.txt
@@ -100,6 +100,7 @@ set(TEST_LITE_SRC
         ${KERNEL_OP_SRC}
         ${LITE_DIR}/src/runtime/inner_allocator.cc
         ${LITE_DIR}/src/runtime/infer_manager.cc
+        ${LITE_DIR}/src/runtime/runtime_pass.cc
         ${LITE_DIR}/src/tensor.cc
         ${LITE_DIR}/src/ms_tensor.cc
         ${LITE_DIR}/src/tensorlist.cc
@@ -208,7 +209,6 @@ if(MSLITE_ENABLE_CONVERTER)
             ${LITE_DIR}/tools/optimizer/common/gllo_utils.cc
             ${LITE_DIR}/tools/optimizer/common/format_utils.cc
             ${LITE_DIR}/tools/optimizer/common/multiple_pattern_process_pass.cc
-            ${LITE_DIR}/tools/optimizer/format/conv_weight_format.cc
             ${LITE_DIR}/tools/optimizer/format/delete_redundant_transpose.cc
             ${LITE_DIR}/tools/optimizer/format/to_format_base.cc
             ${LITE_DIR}/tools/optimizer/format/to_nchw_format.cc
@@ -301,6 +301,7 @@ file(GLOB_RECURSE TEST_CASE_KERNEL_SRC
         ${TEST_DIR}/ut/src/runtime/kernel/arm/fp32/*.cc
         ${TEST_DIR}/ut/src/runtime/kernel/arm/int8/*.cc
         ${TEST_DIR}/ut/src/runtime/kernel/arm/string/*.cc
+        ${TEST_DIR}/ut/src/runtime/runtime_pass_tests.cc
         ${TEST_DIR}/ut/nnacl/infer/*.cc
         )
 
diff --git a/mindspore/lite/test/config/models_ms_train.cfg b/mindspore/lite/test/config/models_ms_train.cfg
index 7ec90491da0..b8d65866245 100644
--- a/mindspore/lite/test/config/models_ms_train.cfg
+++ b/mindspore/lite/test/config/models_ms_train.cfg
@@ -40,4 +40,7 @@ mobilenetv1 vb 0.5
 mobilenetv2 vb 0.5
 mobilenetv3 vb 0.5
 emnist transfer
+unified_api code_example
+train_lenet code_example
+train_lenet_java code_example
 # LAST
diff --git a/mindspore/lite/test/config/models_npu.cfg b/mindspore/lite/test/config/models_npu.cfg
index f1d90c4e9b6..fff1d7aaf65 100644
--- a/mindspore/lite/test/config/models_npu.cfg
+++ b/mindspore/lite/test/config/models_npu.cfg
@@ -88,3 +88,7 @@ ml_video_edit_hair_dyeing_segmodel_v2 0.5
 ml_video_edit_makeup_mobilenetv203.onnx 2
 ml_video_edit_hairline_segmentation;3 0.5
 ml_video_edit_hair_dyeing_migrate_v2.onnx;4 0.5
+ml_audio_kit_encoder_v5.pb;6;1,32:1,32:1,32:1,32:1:1
+fsr_270_mindspore.pb 1
+fsr_360_mindspore.pb 1
+fsr_720_mindspore.pb 1
diff --git a/mindspore/lite/test/config/models_onnx.cfg b/mindspore/lite/test/config/models_onnx.cfg
index e86513bc650..bd303c24687 100644
--- a/mindspore/lite/test/config/models_onnx.cfg
+++ b/mindspore/lite/test/config/models_onnx.cfg
@@ -100,3 +100,8 @@ gender_lstm_vad.onnx
 gender_resnet34_lzl.onnx
 # cur acc for tiny-yolov3-11 is 2.5% because the Unsqueeze_concat_7:0's output of the last op has very small numbers.
 tiny-yolov3-11.onnx;2;1,224,224,3:1,2 3
+# cur acc for ml_video_edit_art_transfer is 2+%
+ml_video_edit_art_transfer.onnx;3
+ssd-10.onnx;;;;calib_only
+Q888_CV_face_recognition_self.onnx
+ml_video_edit_dimming_tech_model_styleGan.onnx;2
diff --git a/mindspore/lite/test/config/models_onnx_fp16.cfg b/mindspore/lite/test/config/models_onnx_fp16.cfg
index 4c23284359c..d9ec6e16a45 100644
--- a/mindspore/lite/test/config/models_onnx_fp16.cfg
+++ b/mindspore/lite/test/config/models_onnx_fp16.cfg
@@ -102,3 +102,4 @@ ml_asr_decoder_202103.onnx;2;1,64,512:1,64 0.5
 ml_video_edit_makeup_mobilenetv203.onnx 4
 # The input of ml_video_edit_hair_dyeing_migrate_v2.onnx should be between [0, 1]
 ml_video_edit_hair_dyeing_migrate_v2.onnx;4 2.5
+Q888_CV_face_recognition_self.onnx 3.5
diff --git a/mindspore/lite/test/config/models_posttraining.cfg b/mindspore/lite/test/config/models_posttraining.cfg
index 5c997283099..f684576a709 100644
--- a/mindspore/lite/test/config/models_posttraining.cfg
+++ b/mindspore/lite/test/config/models_posttraining.cfg
@@ -1,5 +1,5 @@
 ml_face_mnet 105
 ml_face_landmark_2 2
 mobilenet.tflite 0.5
-#transformer_20200831_encoder_fp32.tflite;36 70
-#transformer_20200831_decoder_fp32.tflite;11 35
+transformer_20200831_encoder_fp32.tflite;36 70
+transformer_20200831_decoder_fp32.tflite;11 35
diff --git a/mindspore/lite/test/config/models_tf.cfg b/mindspore/lite/test/config/models_tf.cfg
index 3aa1a4e24a8..999fb519b56 100644
--- a/mindspore/lite/test/config/models_tf.cfg
+++ b/mindspore/lite/test/config/models_tf.cfg
@@ -104,4 +104,6 @@ hiai_nlu_model_v1.pb;3;1,16:1,16:1,16 2.0
 hiai_nlu_model_v2.pb;7;1,5:1,6:1,174:1,98:1,5:1,5:1,5
 hiai_nlu_model_multi.pb;6;1,32:1,32:1,6:1,11:1,74:1,32
 hiai_nlu_model_single.pb;3;1,32:1,32:1,32
-
+fsr_270_mindspore.pb
+fsr_360_mindspore.pb
+fsr_720_mindspore.pb
diff --git a/mindspore/lite/test/config/models_tf_fp16.cfg b/mindspore/lite/test/config/models_tf_fp16.cfg
index 3ce1b398e3b..3196d0697e3 100644
--- a/mindspore/lite/test/config/models_tf_fp16.cfg
+++ b/mindspore/lite/test/config/models_tf_fp16.cfg
@@ -88,3 +88,6 @@ hiai_transformer_encoder.pb;15 4
 decoder_step_nocumsum_v5.pb;13;1:1,512:1,1429,2:1,127:1,127:1,127:1,127,320:1,80:1,512:1,512:1,512:1,512:1,512 1.2
 hiai_nlu_model_multi.pb;6;1,32:1,32:1,6:1,11:1,74:1,32 25
 hiai_nlu_model_single.pb;3;1,32:1,32:1,32 2470
+fsr_270_mindspore.pb 6.0
+fsr_360_mindspore.pb 6.5
+fsr_720_mindspore.pb 2.0
diff --git a/mindspore/lite/test/runtest.sh b/mindspore/lite/test/runtest.sh
index 4bd7a81d1b1..91a33f61883 100644
--- a/mindspore/lite/test/runtest.sh
+++ b/mindspore/lite/test/runtest.sh
@@ -84,3 +84,6 @@ echo 'run mindrt parallel ut test'
 
 echo 'user set output tensors st test'
 ./lite-test --gtest_filter="GraphTest.UserSetGraphOutput*"
+
+echo 'runtime pass'
+./lite-test --gtest_filter="RuntimePass.*"
diff --git a/mindspore/lite/test/st/run_benchmark_nets.sh b/mindspore/lite/test/st/run_benchmark_nets.sh
index 3a5de9cbfaa..7a6db1a6e40 100644
--- a/mindspore/lite/test/st/run_benchmark_nets.sh
+++ b/mindspore/lite/test/st/run_benchmark_nets.sh
@@ -119,11 +119,10 @@ if [[ $backend == "all" || $backend == "x86-all" || $backend == "x86" || $backen
 fi
 
 if [[ $backend == "all" || $backend == "arm32_3516D" ]]; then
-    exit 0
-#    sh $cur_path/scripts/nnie/run_converter_nnie.sh -r $release_path -m $models_path -d $device_id -e $backend
-#    hi3516_status=$?
-#    if [[ $hi3516_status -ne 0 ]]; then
-#      echo "Run nnie hi3516 failed"
-#      exit 1
-#    fi
+    sh $cur_path/scripts/nnie/run_converter_nnie.sh -r $release_path -m $models_path -d $device_id -e $backend
+    hi3516_status=$?
+    if [[ $hi3516_status -ne 0 ]]; then
+      echo "Run nnie hi3516 failed"
+      exit 1
+    fi
 fi
diff --git a/mindspore/lite/test/st/scripts/base_functions.sh b/mindspore/lite/test/st/scripts/base_functions.sh
index 1cc37e0e18f..480009512fe 100644
--- a/mindspore/lite/test/st/scripts/base_functions.sh
+++ b/mindspore/lite/test/st/scripts/base_functions.sh
@@ -146,18 +146,21 @@ function Run_Benchmark() {
       if [[ $6 == "arm64" && $7 == "CPU" && ! ${cfg_file_name} =~ "fp16" ]]; then
         benchmark_mode="calib+loop"
       fi
-      # adjust file name
-      infix=""
+      # adjust precision mode
       mode="fp32"
       if [[ ${cfg_file_name} =~ "fp16" ]]; then
         mode="fp16"
-      elif [[ ${cfg_file_name} =~ "bit" ]]; then
+      fi
+      # adjust file name
+      infix=""
+      if [[ ${cfg_file_name} =~ "bit" ]]; then
         infix="_${cfg_file##*_}"
         infix=${infix%.*}
       elif [[ ${cfg_file_name} =~ "_train" ]]; then
         infix="_train"
       elif [[ ${cfg_file_name} =~ "_weightquant" ]]; then
         infix="_weightquant"
+        benchmark_mode="calib"
       elif [[ ${cfg_file_name} =~ "_posttraining" ]]; then
         model_name=${model_name}"_posttraining"
       elif [[ ${cfg_file_name} =~ "_process_only" ]]; then
@@ -198,6 +201,9 @@ function Run_Benchmark() {
       if [[ ${mode} == "fp16" ]]; then
         enableFp16="true"
       fi
+      if [[ ${extra_info} =~ "calib_only" ]]; then
+        benchmark_mode="calib"
+      fi
       # start running benchmark
       echo "---------------------------------------------------------" >> "$4"
       if [[ ${benchmark_mode} = "calib" || ${benchmark_mode} = "calib+loop" ]]; then
diff --git a/mindspore/lite/test/st/scripts/nnie/run_converter_nnie.sh b/mindspore/lite/test/st/scripts/nnie/run_converter_nnie.sh
index 85659162783..96b47a91080 100755
--- a/mindspore/lite/test/st/scripts/nnie/run_converter_nnie.sh
+++ b/mindspore/lite/test/st/scripts/nnie/run_converter_nnie.sh
@@ -64,8 +64,8 @@ function Run_Hi3516() {
   # cp files to nfs shared folder
   echo "start push files to hi3516"
   echo ${device_ip}
-  sshpass -p "mindspore@123" scp ${benchmark_test_path}/* root@${device_ip}:/user/nnie/benchmark_test/ || exit 1
-  sshpass -p "mindspore@123" ssh root@${device_ip} "cd /user/nnie/benchmark_test; sh run_benchmark_nnie.sh"
+  scp ${benchmark_test_path}/* root@${device_ip}:/user/nnie/benchmark_test/ || exit 1
+  ssh root@${device_ip} "cd /user/nnie/benchmark_test; sh run_benchmark_nnie.sh"
   if [ $? = 0 ]; then
     run_result='hi3516: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file};
   else
diff --git a/mindspore/lite/test/st/scripts/run_benchmark_arm64.sh b/mindspore/lite/test/st/scripts/run_benchmark_arm64.sh
index e020b15f03c..e1b807887c0 100644
--- a/mindspore/lite/test/st/scripts/run_benchmark_arm64.sh
+++ b/mindspore/lite/test/st/scripts/run_benchmark_arm64.sh
@@ -15,9 +15,9 @@ function Run_Converter() {
     mkdir -p ${ms_models_path}
     # Prepare the config file list
     local fp32_cfg_file_list=("$models_tf_config" "$models_tflite_config" "$models_caffe_config" "$models_onnx_config" "$models_mindspore_config" \
-                              "$models_mindspore_train_config" "$models_tflite_posttraining_config" "$models_caffe_posttraining_config" \
+                              "$models_mindspore_train_config" "$models_posttraining_config" "$models_process_only_fp16_config" \
                               "$models_tflite_awaretraining_config" "$models_weightquant_config" "$models_weightquant_7bit_config" \
-                              "$models_weightquant_9bit_config" "$models_for_process_only_config")
+                              "$models_weightquant_9bit_config" "$models_process_only_config")
 
     local fp16_cfg_file_list=("$models_onnx_fp16_config" "$models_caffe_fp16_config" "$models_tflite_fp16_config" "$models_tf_fp16_config")
     # Convert models:
@@ -34,9 +34,9 @@ function Run_Converter() {
 function Run_arm64() {
     # Prepare the config file list
     local arm64_cfg_file_list=("$models_tf_config" "$models_tflite_config" "$models_caffe_config" "$models_onnx_config" "$models_mindspore_config" \
-                              "$models_mindspore_train_config" "$models_tflite_posttraining_config" "$models_caffe_posttraining_config" \
+                              "$models_mindspore_train_config" "$models_posttraining_config" "$models_compatibility_config" \
                               "$models_tflite_awaretraining_config" "$models_weightquant_config" "$models_weightquant_7bit_config" \
-                              "$models_weightquant_9bit_config" "$models_for_process_only_config" "$models_compatibility_config")
+                              "$models_weightquant_9bit_config" "$models_process_only_config" "$models_process_only_fp16_config")
     # Run converted models:
     # $1:cfgFileList; $2:modelPath; $3:dataPath; $4:logFile; $5:resultFile; $6:platform; $7:processor; $8:phoneId;
     Run_Benchmark "${arm64_cfg_file_list[*]}" . '/data/local/tmp' $run_arm64_fp32_log_file $run_benchmark_result_file 'arm64' 'CPU' $device_id
@@ -44,7 +44,8 @@ function Run_arm64() {
 
 # Run on arm64-fp16 platform:
 function Run_arm64_fp16() {
-    local arm64_cfg_file_list=("$models_onnx_fp16_config" "$models_caffe_fp16_config" "$models_tflite_fp16_config" "$models_tf_fp16_config")
+    local arm64_cfg_file_list=("$models_onnx_fp16_config" "$models_caffe_fp16_config" "$models_tflite_fp16_config" "$models_tf_fp16_config" \
+                               "$models_process_only_fp16_config")
     # $1:cfgFileList; $2:modelPath; $3:dataPath; $4:logFile; $5:resultFile; $6:platform; $7:processor; $8:phoneId;
     Run_Benchmark "${arm64_cfg_file_list[*]}" . '/data/local/tmp' $run_arm64_fp16_log_file $run_benchmark_result_file 'arm64' 'CPU' $device_id
 }
@@ -90,8 +91,7 @@ models_tflite_config=${basepath}/../config/models_tflite.cfg
 models_tf_config=${basepath}/../config/models_tf.cfg
 models_caffe_config=${basepath}/../config/models_caffe.cfg
 models_tflite_awaretraining_config=${basepath}/../config/models_tflite_awaretraining.cfg
-models_tflite_posttraining_config=${basepath}/../config/models_tflite_posttraining.cfg
-models_caffe_posttraining_config=${basepath}/../config/models_caffe_posttraining.cfg
+models_posttraining_config=${basepath}/../config/models_posttraining.cfg
 models_onnx_config=${basepath}/../config/models_onnx.cfg
 models_onnx_fp16_config=${basepath}/../config/models_onnx_fp16.cfg
 models_caffe_fp16_config=${basepath}/../config/models_caffe_fp16.cfg
@@ -103,7 +103,8 @@ models_weightquant_7bit_config=${basepath}/../config/models_weightquant_7bit.cfg
 models_weightquant_9bit_config=${basepath}/../config/models_weightquant_9bit.cfg
 models_weightquant_config=${basepath}/../config/models_weightquant.cfg
 models_compatibility_config=${basepath}/../config/models_compatibility.cfg
-models_for_process_only_config=${basepath}/../config/models_for_process_only.cfg
+models_process_only_config=${basepath}/../config/models_process_only.cfg
+models_process_only_fp16_config=${basepath}/../config/models_process_only_fp16.cfg
 
 ms_models_path=${basepath}/ms_models
 
diff --git a/mindspore/lite/test/st/scripts/run_benchmark_x86.sh b/mindspore/lite/test/st/scripts/run_benchmark_x86.sh
index e1e137e8b1e..dd61c255e70 100644
--- a/mindspore/lite/test/st/scripts/run_benchmark_x86.sh
+++ b/mindspore/lite/test/st/scripts/run_benchmark_x86.sh
@@ -51,9 +51,9 @@ function Run_Converter() {
 
     # Prepare the config file list
     local x86_cfg_file_list=("$models_tf_config" "$models_tflite_config" "$models_caffe_config" "$models_onnx_config" "$models_mindspore_config" \
-                             "$models_mindspore_train_config" "$models_tflite_posttraining_config" "$models_caffe_posttraining_config" \
+                             "$models_mindspore_train_config" "$models_posttraining_config" "$models_process_only_config" \
                              "$models_tflite_awaretraining_config" "$models_weightquant_config" "$models_weightquant_7bit_config" \
-                             "$models_weightquant_9bit_config" "$models_for_process_only_config")
+                             "$models_weightquant_9bit_config")
     # Convert models:
     # $1:cfgFileList; $2:inModelPath; $3:outModelPath; $4:logFile; $5:resultFile;
     Convert "${x86_cfg_file_list[*]}" $models_path $ms_models_path $run_converter_log_file $run_converter_result_file
@@ -102,9 +102,9 @@ function Run_x86() {
 
     # Prepare the config file list
     local x86_cfg_file_list=("$models_tf_config" "$models_tflite_config" "$models_caffe_config" "$models_onnx_config" "$models_mindspore_config" \
-                             "$models_mindspore_train_config" "$models_tflite_posttraining_config" "$models_caffe_posttraining_config" \
+                             "$models_mindspore_train_config" "$models_posttraining_config" "$models_process_only_fp16_config" \
                              "$models_tflite_awaretraining_config" "$models_weightquant_config" "$models_weightquant_7bit_config" \
-                             "$models_weightquant_9bit_config" "$models_for_process_only_config")
+                             "$models_weightquant_9bit_config" "$models_process_only_config")
     # Run converted models:
     # $1:cfgFileList; $2:modelPath; $3:dataPath; $4:logFile; $5:resultFile; $6:platform; $7:processor; $8:phoneId;
     Run_Benchmark "${x86_cfg_file_list[*]}" $ms_models_path $models_path $run_x86_log_file $run_benchmark_result_file 'x86' 'CPU' ''
@@ -120,9 +120,9 @@ function Run_x86_sse() {
 
     # Prepare the config file list
     local sse_cfg_file_list=("$models_tf_config" "$models_tflite_config" "$models_caffe_config" "$models_onnx_config" "$models_mindspore_config" \
-                             "$models_mindspore_train_config" "$models_tflite_posttraining_config" "$models_caffe_posttraining_config" \
+                             "$models_mindspore_train_config" "$models_posttraining_config" "$models_process_only_fp16_config" \
                              "$models_tflite_awaretraining_config" "$models_weightquant_config" "$models_weightquant_7bit_config" \
-                             "$models_weightquant_9bit_config" "$models_for_process_only_config")
+                             "$models_weightquant_9bit_config" "$models_process_only_config")
     # Run converted models:
     # $1:cfgFileList; $2:modelPath; $3:dataPath; $4:logFile; $5:resultFile; $6:platform; $7:processor; $8:phoneId;
     Run_Benchmark "${sse_cfg_file_list[*]}" $ms_models_path $models_path $run_x86_sse_log_file $run_benchmark_result_file 'x86' 'CPU' ''
@@ -138,9 +138,9 @@ function Run_x86_avx() {
 
     # Prepare the config file list
     local avx_cfg_file_list=("$models_tf_config" "$models_tflite_config" "$models_caffe_config" "$models_onnx_config" "$models_mindspore_config" \
-                             "$models_mindspore_train_config" "$models_tflite_posttraining_config" "$models_caffe_posttraining_config" \
+                             "$models_mindspore_train_config" "$models_posttraining_config" "$models_process_only_fp16_config" \
                              "$models_tflite_awaretraining_config" "$models_weightquant_config" "$models_weightquant_7bit_config" \
-                             "$models_weightquant_9bit_config" "$models_for_process_only_config")
+                             "$models_weightquant_9bit_config" "$models_process_only_config")
     # Run converted models:
     # $1:cfgFileList; $2:modelPath; $3:dataPath; $4:logFile; $5:resultFile; $6:platform; $7:processor; $8:phoneId; $9:benchmark_mode
     Run_Benchmark "${avx_cfg_file_list[*]}" $ms_models_path $models_path $run_x86_avx_log_file $run_benchmark_result_file 'x86' 'CPU' ''
@@ -219,15 +219,15 @@ models_tflite_config=${basepath}/../config/models_tflite.cfg
 models_tf_config=${basepath}/../config/models_tf.cfg
 models_caffe_config=${basepath}/../config/models_caffe.cfg
 models_tflite_awaretraining_config=${basepath}/../config/models_tflite_awaretraining.cfg
-models_tflite_posttraining_config=${basepath}/../config/models_tflite_posttraining.cfg
-models_caffe_posttraining_config=${basepath}/../config/models_caffe_posttraining.cfg
+models_posttraining_config=${basepath}/../config/models_tflite_posttraining.cfg
 models_onnx_config=${basepath}/../config/models_onnx.cfg
 models_mindspore_config=${basepath}/../config/models_mindspore.cfg
 models_mindspore_train_config=${basepath}/../config/models_mindspore_train.cfg
 models_weightquant_7bit_config=${basepath}/../config/models_weightquant_7bit.cfg
 models_weightquant_9bit_config=${basepath}/../config/models_weightquant_9bit.cfg
 models_weightquant_config=${basepath}/../config/models_weightquant.cfg
-models_for_process_only_config=${basepath}/../config/models_for_process_only.cfg
+models_process_only_config=${basepath}/../config/models_process_only.cfg
+models_process_only_fp16_config=${basepath}/../config/models_process_only_fp16.cfg
 
 ms_models_path=${basepath}/ms_models
 
diff --git a/mindspore/lite/test/st/scripts/run_net_train.sh b/mindspore/lite/test/st/scripts/run_net_train.sh
index 86121ab8283..cde5ff984d4 100755
--- a/mindspore/lite/test/st/scripts/run_net_train.sh
+++ b/mindspore/lite/test/st/scripts/run_net_train.sh
@@ -49,8 +49,8 @@ function Run_Converter() {
     # Convert mindspore train models:
     while read line; do
         LFS=" " read -r -a line_array <<< ${line}
-        parse_line convert
         local model_prefix=${line_array[0]}_train
+        parse_line convert
         if [[ "$?" == "1" ]]; then continue; fi
         if [[ $model_name == \#* ]]; then
           continue
@@ -93,6 +93,23 @@ function Run_Converter() {
     return ${fail}
 }
 
+function should_run_example() {
+  ret=0
+  while read line; do
+    LFS=" " read -r -a line_array <<< ${line}
+    model_name=${line_array[0]}
+    if [[ $model_name == \#* ]]; then
+      continue
+    fi
+    if [[ $model_name == "$1" ]]; then
+      if [[ ${line_array[1]} == "code_example" ]]; then
+        ret=1
+      fi
+    fi
+  done < ${models_ms_train_config}
+  return $ret
+}
+
 function parse_line() {
     i=1
     loss_name=
@@ -138,6 +155,9 @@ function parse_line() {
              fi
              check_convert=1
             ;;
+          "code_example")
+             ret=1
+             ;;
           *)
             check=`echo "${line_array[i]}" | grep -E '^\-?[0-9]*\.?[0-9]+$'`
             if [ "${check}" != "" ] ; then
@@ -208,9 +228,9 @@ function Run_x86() {
             --virtualBatch=${virtual_batch} \
             --lossName=${loss_name} >> "${run_x86_log_file}"
         if [ $? = 0 ]; then
-            run_result='x86_'${log_suffix}': '${model_name}''${suffix_print}' pass'; echo ${run_result} >> ${run_benchmark_train_result_file}
+            run_result='x86'${log_suffix}': '${model_name}''${suffix_print}' pass'; echo ${run_result} >> ${run_benchmark_train_result_file}
         else
-            run_result='x86_'${log_suffix}': '${model_name}''${suffix_print}' failed'; echo ${run_result} >> ${run_benchmark_train_result_file}
+            run_result='x86'${log_suffix}': '${model_name}''${suffix_print}' failed'; echo ${run_result} >> ${run_benchmark_train_result_file}
             fail=1
         fi
     done < ${models_ms_train_config}
@@ -351,7 +371,6 @@ ENDM
 }
 
 function Run_CodeExamples() {
-    ls ${basepath}/../../
     fail=0
     target="x86"
     tarball_path=${x86_path}/mindspore-lite-${version}-linux-x64.tar.gz
@@ -360,10 +379,13 @@ function Run_CodeExamples() {
       tarball_path=${arm64_path}/mindspore-lite-${version_arm64}-android-aarch64.tar.gz
       export ANDROID_SERIAL=${device_id}
     fi
+    should_run_example "train_lenet_java"
+    should_run=$?
+
     export PATH=${x86_path}/mindspore-lite-${version}-linux-x64/tools/converter/converter/:$PATH
     export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${x86_path}/mindspore-lite-${version}-linux-x64/tools/converter/lib/:${x86_path}/mindspore-lite-${version}-linux-x64/tools/converter/third_party/glog/lib
 
-    if [[ $backend == "all" || $backend == "x86-all" || $backend == "x86_train" || $backend == "x86-java" ]]; then
+    if [[ "$should_run" == "1" && ($backend == "all" || $backend == "x86-all" || $backend == "x86_train" || $backend == "x86-java") ]]; then
       cd ${basepath}/../../examples/train_lenet_java || exit 1
       chmod 777 ./prepare_and_run.sh
       ./prepare_and_run.sh -D ${datasets_path}/mnist -r ${tarball_path} -m ${models_path}/code_example.mindir >> ${run_code_examples_log_file}
@@ -378,37 +400,46 @@ function Run_CodeExamples() {
     fi
 
     if [[ $backend == "all" || $backend == "train" || $backend == "x86_train" || $backend == "codegen&train" || $backend == "arm64_train" ]]; then
-      cd ${basepath}/../../examples/unified_api || exit 1
-      chmod 777 ./prepare_and_run.sh
-      chmod 777 ./*/*.sh
-      ./prepare_and_run.sh -D ${datasets_path}/mnist -r ${tarball_path} -t ${target} -m ${models_path}/code_example.mindir -e 1 >> ${run_code_examples_log_file}
-      accurate=$(tail -20 ${run_code_examples_log_file} | awk 'NF==3 && /Accuracy is/ { sum += $3} END { print (sum > 1.6) }')
-      if [ $accurate -eq 1 ]; then
-        echo "Unified API Trained and reached accuracy" >> ${run_code_examples_log_file}
-        echo 'code_examples: unified_api pass' >> ${run_benchmark_train_result_file}
-      else
-        echo "Unified API demo failure" >> ${run_code_examples_log_file}
-        echo 'code_examples: unified_api failed' >> ${run_benchmark_train_result_file}
-        fail=1
-      fi
-      rm -rf package*/dataset
-      cd -
 
-      cd ${basepath}/../../examples/train_lenet || exit 1
-      chmod 777 ./prepare_and_run.sh
-      chmod 777 ./*/*.sh
-      ./prepare_and_run.sh -D ${datasets_path}/mnist -r ${tarball_path} -t ${target} -m ${models_path}/code_example.mindir -e 1 >> ${run_code_examples_log_file}
-      accurate=$(tail -10 ${run_code_examples_log_file} | awk 'NF==3 && /Accuracy is/ { sum += $3} END { print (sum > 1.6) }')
-      if [ $accurate -eq 1 ]; then
-        echo "Lenet Trained and reached accuracy" >> ${run_code_examples_log_file}
-        echo 'code_examples: train_lenet pass' >> ${run_benchmark_train_result_file}
-      else
-        echo "Train Lenet demo failure" >> ${run_code_examples_log_file}
-        echo 'code_examples: train_lenet failed' >> ${run_benchmark_train_result_file}
-        fail=1
+      should_run_example "unified_api"
+      should_run=$?
+      if [[ "$should_run" == "1" ]]; then
+        cd ${basepath}/../../examples/unified_api || exit 1
+        chmod 777 ./prepare_and_run.sh
+        chmod 777 ./*/*.sh
+        ./prepare_and_run.sh -D ${datasets_path}/mnist -r ${tarball_path} -t ${target} -m ${models_path}/code_example.mindir -e 1 >> ${run_code_examples_log_file}
+        accurate=$(tail -20 ${run_code_examples_log_file} | awk 'NF==3 && /Accuracy is/ { sum += $3} END { print (sum > 1.6) }')
+        if [ $accurate -eq 1 ]; then
+          echo "Unified API Trained and reached accuracy" >> ${run_code_examples_log_file}
+          echo 'code_examples: unified_api pass' >> ${run_benchmark_train_result_file}
+        else
+          echo "Unified API demo failure" >> ${run_code_examples_log_file}
+          echo 'code_examples: unified_api failed' >> ${run_benchmark_train_result_file}
+          fail=1
+        fi
+        rm -rf package*/dataset
+        cd -
+      fi
+
+      should_run_example "train_lenet"
+      should_run=$?
+      if [[ "$should_run" == "1" ]]; then
+        cd ${basepath}/../../examples/train_lenet || exit 1
+        chmod 777 ./prepare_and_run.sh
+        chmod 777 ./*/*.sh
+        ./prepare_and_run.sh -D ${datasets_path}/mnist -r ${tarball_path} -t ${target} -m ${models_path}/code_example.mindir -e 1 >> ${run_code_examples_log_file}
+        accurate=$(tail -10 ${run_code_examples_log_file} | awk 'NF==3 && /Accuracy is/ { sum += $3} END { print (sum > 1.6) }')
+        if [ $accurate -eq 1 ]; then
+          echo "Lenet Trained and reached accuracy" >> ${run_code_examples_log_file}
+          echo 'code_examples: train_lenet pass' >> ${run_benchmark_train_result_file}
+        else
+          echo "Train Lenet demo failure" >> ${run_code_examples_log_file}
+          echo 'code_examples: train_lenet failed' >> ${run_benchmark_train_result_file}
+          fail=1
+        fi
+        rm -rf package*/dataset
+        cd -
       fi
-      rm -rf package*/dataset
-      cd -
     fi
     return ${fail}
 }
@@ -596,7 +627,7 @@ echo "Push files to benchmark_train_test folder and run benchmark_train"
 benchmark_train_test_path=${basepath}/benchmark_train_test
 rm -rf ${benchmark_train_test_path}
 mkdir -p ${benchmark_train_test_path}
-cp -a ${ms_models_path}/*.ms ${benchmark_train_test_path} || exit 1
+cp -a ${ms_models_path}/*.ms ${benchmark_train_test_path}
 
 isFailed=0
 if [[ $backend == "all" || $backend == "train" || $backend == "x86_train" || $backend == "codegen&train" ]]; then
diff --git a/mindspore/lite/test/ut/nnacl/infer/custom_extract_features_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/custom_extract_features_infer_test.cc
index 3062d4f59d6..cd1ded3f5fd 100644
--- a/mindspore/lite/test/ut/nnacl/infer/custom_extract_features_infer_test.cc
+++ b/mindspore/lite/test/ut/nnacl/infer/custom_extract_features_infer_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "nnacl/infer/custom_extract_features_infer.h"
+#include "nnacl/infer/string/custom_extract_features_infer.h"
 
 namespace mindspore {
 
diff --git a/mindspore/lite/test/ut/nnacl/infer/custom_normalize_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/custom_normalize_infer_test.cc
index 9b932f28492..1c84fdd7215 100644
--- a/mindspore/lite/test/ut/nnacl/infer/custom_normalize_infer_test.cc
+++ b/mindspore/lite/test/ut/nnacl/infer/custom_normalize_infer_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "nnacl/infer/custom_normalize_infer.h"
+#include "nnacl/infer/string/custom_normalize_infer.h"
 
 namespace mindspore {
 
diff --git a/mindspore/lite/test/ut/nnacl/infer/custom_predict_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/custom_predict_infer_test.cc
index 62cf10fa8aa..b908aa7a344 100644
--- a/mindspore/lite/test/ut/nnacl/infer/custom_predict_infer_test.cc
+++ b/mindspore/lite/test/ut/nnacl/infer/custom_predict_infer_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "nnacl/infer/custom_predict_infer.h"
+#include "nnacl/infer/string/custom_predict_infer.h"
 
 namespace mindspore {
 
diff --git a/mindspore/lite/test/ut/nnacl/infer/hashtable_lookup_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/hashtable_lookup_infer_test.cc
index b6dbf4b6085..4768bedf7e4 100644
--- a/mindspore/lite/test/ut/nnacl/infer/hashtable_lookup_infer_test.cc
+++ b/mindspore/lite/test/ut/nnacl/infer/hashtable_lookup_infer_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "nnacl/infer/hashtable_lookup_infer.h"
+#include "nnacl/infer/string/hashtable_lookup_infer.h"
 
 namespace mindspore {
 
diff --git a/mindspore/lite/test/ut/nnacl/infer/lsh_projection_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/lsh_projection_infer_test.cc
index 9b27f538cbd..33717760b18 100644
--- a/mindspore/lite/test/ut/nnacl/infer/lsh_projection_infer_test.cc
+++ b/mindspore/lite/test/ut/nnacl/infer/lsh_projection_infer_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "nnacl/infer/lsh_projection_infer.h"
+#include "nnacl/infer/string/lsh_projection_infer.h"
 
 namespace mindspore {
 
diff --git a/mindspore/lite/test/ut/nnacl/infer/skip_gram_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/skip_gram_infer_test.cc
index 469b0934498..ef7adebb898 100644
--- a/mindspore/lite/test/ut/nnacl/infer/skip_gram_infer_test.cc
+++ b/mindspore/lite/test/ut/nnacl/infer/skip_gram_infer_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "nnacl/infer/skip_gram_infer.h"
+#include "nnacl/infer/string/skip_gram_infer.h"
 
 namespace mindspore {
 
diff --git a/mindspore/lite/test/ut/nnacl/infer/tensorlist_fromtensor_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/tensorlist_fromtensor_infer_test.cc
index eeefae7073a..6c03371fe05 100644
--- a/mindspore/lite/test/ut/nnacl/infer/tensorlist_fromtensor_infer_test.cc
+++ b/mindspore/lite/test/ut/nnacl/infer/tensorlist_fromtensor_infer_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/common_test.h"
 #include "src/common/tensor_util.h"
-#include "nnacl/infer/tensorlist_fromtensor_infer.h"
+#include "nnacl/infer/control/tensorlist_fromtensor_infer.h"
 
 namespace mindspore {
 
diff --git a/mindspore/lite/test/ut/nnacl/infer/tensorlist_getitem_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/tensorlist_getitem_infer_test.cc
index d92851cd325..05872d6b741 100644
--- a/mindspore/lite/test/ut/nnacl/infer/tensorlist_getitem_infer_test.cc
+++ b/mindspore/lite/test/ut/nnacl/infer/tensorlist_getitem_infer_test.cc
@@ -15,7 +15,7 @@
  */
 #include "common/common_test.h"
 #include "src/common/tensor_util.h"
-#include "nnacl/infer/tensorlist_getitem_infer.h"
+#include "nnacl/infer/control/tensorlist_getitem_infer.h"
 
 namespace mindspore {
 
diff --git a/mindspore/lite/test/ut/nnacl/infer/tensorlist_reserve_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/tensorlist_reserve_infer_test.cc
index 37f93257529..a8c877b72f6 100644
--- a/mindspore/lite/test/ut/nnacl/infer/tensorlist_reserve_infer_test.cc
+++ b/mindspore/lite/test/ut/nnacl/infer/tensorlist_reserve_infer_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "nnacl/infer/tensorlist_reserve_infer.h"
+#include "nnacl/infer/control/tensorlist_reserve_infer.h"
 
 namespace mindspore {
 
diff --git a/mindspore/lite/test/ut/nnacl/infer/tensorlist_setitem_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/tensorlist_setitem_infer_test.cc
index 5626e5b9719..9c43909aef9 100644
--- a/mindspore/lite/test/ut/nnacl/infer/tensorlist_setitem_infer_test.cc
+++ b/mindspore/lite/test/ut/nnacl/infer/tensorlist_setitem_infer_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "nnacl/infer/tensorlist_setitem_infer.h"
+#include "nnacl/infer/control/tensorlist_setitem_infer.h"
 
 namespace mindspore {
 
diff --git a/mindspore/lite/test/ut/nnacl/infer/tensorlist_stack_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/tensorlist_stack_infer_test.cc
index bf020b5e5d1..e7e4a27b30b 100644
--- a/mindspore/lite/test/ut/nnacl/infer/tensorlist_stack_infer_test.cc
+++ b/mindspore/lite/test/ut/nnacl/infer/tensorlist_stack_infer_test.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include "common/common_test.h"
-#include "nnacl/infer/tensorlist_stack_infer.h"
+#include "nnacl/infer/control/tensorlist_stack_infer.h"
 
 namespace mindspore {
 
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/skip_gram_fp32.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/skip_gram_fp32.cc
index 200b9f49f19..7bbc852b1b3 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/skip_gram_fp32.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/skip_gram_fp32.cc
@@ -15,7 +15,7 @@
  */
 
 #include <iostream>
-#include "src/runtime/kernel/arm/fp32/skip_gram_fp32.h"
+#include "src/runtime/kernel/arm/string/skip_gram.h"
 #include "nnacl/skip_gram_parameter.h"
 #include "src/common/file_utils.h"
 #include "common/common_test.h"
diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/string/normalize.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/string/normalize.cc
index 1c86a856853..202ae5a2f87 100644
--- a/mindspore/lite/test/ut/src/runtime/kernel/arm/string/normalize.cc
+++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/string/normalize.cc
@@ -15,7 +15,7 @@
  */
 
 #include <iostream>
-#include "src/runtime/kernel/arm/fp32/skip_gram_fp32.h"
+#include "src/runtime/kernel/arm/string/skip_gram.h"
 #include "src/runtime/kernel/arm/string/normalize.h"
 #include "mindspore/lite/src/kernel_registry.h"
 #include "nnacl/skip_gram_parameter.h"
diff --git a/mindspore/lite/test/ut/src/runtime/runtime_pass_tests.cc b/mindspore/lite/test/ut/src/runtime/runtime_pass_tests.cc
index 0f05beb6208..ab961bb8b87 100644
--- a/mindspore/lite/test/ut/src/runtime/runtime_pass_tests.cc
+++ b/mindspore/lite/test/ut/src/runtime/runtime_pass_tests.cc
@@ -59,52 +59,23 @@ void Nc4hw4PassConstruct(std::vector<kernel::LiteKernel *> *kernels, std::vector
                                                  transpose_param, &transpose_kernel, nullptr);
   kernels->push_back(transpose_kernel);
 
-  lite::Tensor *in_param_tensor = new lite::Tensor();
-  tensors->push_back(in_param_tensor);
-  lite::Tensor *in_out_tensor = new lite::Tensor();
-  tensors->push_back(in_out_tensor);
-  OpParameter *in_param = new OpParameter();
-  kernel::KernelKey in_desc{kernel::kCPU, kNumberTypeFloat32, schema::PrimitiveType_InstanceNorm};
-  kernel::LiteKernel *in_kernel = nullptr;
-  std::vector<lite::Tensor *> in_in = {transpose_out_tensor, in_param_tensor};
-  std::vector<lite::Tensor *> in_out = {in_out_tensor};
-  lite::KernelRegistry::GetInstance()->GetKernel(in_in, in_out, ctx, nullptr, in_desc, in_param, &in_kernel, nullptr);
-  kernels->push_back(in_kernel);
-
-  lite::Tensor *transpose2_param_tensor = new lite::Tensor();
-  tensors->push_back(transpose_param_tensor);
-  lite::Tensor *transpose2_out_tensor = new lite::Tensor();
-  tensors->push_back(transpose_param_tensor);
-  OpParameter *transpose2_param = new OpParameter();
-  kernel::KernelKey transpose2_desc{kernel::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Transpose};
-  kernel::LiteKernel *transpose2_kernel = nullptr;
-  std::vector<lite::Tensor *> transpose2_in = {in_out_tensor, transpose2_param_tensor};
-  std::vector<lite::Tensor *> transpose2_out = {transpose2_out_tensor};
-  lite::KernelRegistry::GetInstance()->GetKernel(transpose2_in, transpose2_out, ctx, nullptr, transpose2_desc,
-                                                 transpose2_param, &transpose2_kernel, nullptr);
-  kernels->push_back(transpose2_kernel);
-
-  lite::Tensor *conv2_weight = new lite::Tensor();
-  tensors->push_back(conv2_weight);
-  lite::Tensor *conv2_out_tensor = new lite::Tensor();
-  tensors->push_back(conv2_out_tensor);
-  std::vector<lite::Tensor *> conv2_in = {transpose2_out_tensor, conv_weight};
-  std::vector<lite::Tensor *> conv2_out = {conv2_out_tensor};
-  OpParameter *conv2_param = new OpParameter();
-  kernel::KernelKey conv2_desc{kernel::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Conv2DFusion};
-  kernel::LiteKernel *conv2_kernel = nullptr;
-  lite::KernelRegistry::GetInstance()->GetKernel(conv2_in, conv2_out, ctx, nullptr, conv2_desc, conv2_param,
-                                                 &conv2_kernel, nullptr);
-  kernels->push_back(conv2_kernel);
+  lite::Tensor *pad_param_tensor = new lite::Tensor();
+  tensors->push_back(pad_param_tensor);
+  lite::Tensor *pad_out_tensor = new lite::Tensor();
+  tensors->push_back(pad_out_tensor);
+  OpParameter *pad_param = new OpParameter();
+  kernel::KernelKey pad_desc{kernel::kCPU, kNumberTypeFloat32, schema::PrimitiveType_PadFusion};
+  kernel::LiteKernel *pad_kernel = nullptr;
+  std::vector<lite::Tensor *> pad_in = {transpose_out_tensor, pad_param_tensor};
+  std::vector<lite::Tensor *> pad_out = {pad_out_tensor};
+  lite::KernelRegistry::GetInstance()->GetKernel(pad_in, pad_out, ctx, nullptr, pad_desc, pad_param, &pad_kernel,
+                                                 nullptr);
+  kernels->push_back(pad_kernel);
 
   conv_kernel->set_out_kernels({transpose_kernel});
   transpose_kernel->set_in_kernels({conv_kernel});
-  transpose_kernel->set_out_kernels({in_kernel});
-  in_kernel->set_in_kernels({transpose_kernel});
-  in_kernel->set_out_kernels({transpose2_kernel});
-  transpose2_kernel->set_in_kernels({in_kernel});
-  transpose2_kernel->set_out_kernels({conv2_kernel});
-  conv2_kernel->set_in_kernels({transpose2_kernel});
+  transpose_kernel->set_out_kernels({pad_kernel});
+  pad_kernel->set_in_kernels({transpose_kernel});
   return;
 }
 
@@ -114,12 +85,11 @@ TEST_F(RuntimePass, Nc4hw4Pass1) {
   std::vector<lite::Tensor *> tensors;
   Nc4hw4PassConstruct(&kernels, &tensors, ctx.get());
 
-  ASSERT_EQ(kernels.size(), 5);
-
   /* runtime pass */
   lite::Nc4hw4PassReplace(&kernels, &tensors, 0);
 
-  ASSERT_EQ(kernels.size(), 3);
+  ASSERT_EQ(kernels.size(), 2);
+  ASSERT_EQ(tensors.size(), 5);
 
   for (auto tensor : tensors) {
     delete tensor;
diff --git a/mindspore/lite/tools/benchmark/benchmark_base.cc b/mindspore/lite/tools/benchmark/benchmark_base.cc
index 77c5004bc35..abb7f1deada 100644
--- a/mindspore/lite/tools/benchmark/benchmark_base.cc
+++ b/mindspore/lite/tools/benchmark/benchmark_base.cc
@@ -208,10 +208,10 @@ void BenchmarkFlags::InitInputDataList() {
 void BenchmarkFlags::InitResizeDimsList() {
   std::string content = this->resize_dims_in_;
   std::vector<int> shape;
-  auto shape_strs = StringSplit(content, std::string(DELIM_COLON));
+  auto shape_strs = StrSplit(content, std::string(DELIM_COLON));
   for (const auto &shape_str : shape_strs) {
     shape.clear();
-    auto dim_strs = StringSplit(shape_str, std::string(DELIM_COMMA));
+    auto dim_strs = StrSplit(shape_str, std::string(DELIM_COMMA));
     std::cout << "Resize Dims: ";
     for (const auto &dim_str : dim_strs) {
       std::cout << dim_str << " ";
diff --git a/mindspore/lite/tools/benchmark_train/net_train.cc b/mindspore/lite/tools/benchmark_train/net_train.cc
index c06e39505bd..76164f076a3 100644
--- a/mindspore/lite/tools/benchmark_train/net_train.cc
+++ b/mindspore/lite/tools/benchmark_train/net_train.cc
@@ -603,7 +603,7 @@ int NetTrain::InitCallbackParameter() {
     }
     op_call_times_total_++;
     op_begin_ = GetTimeUs();
-    if ((callParam.node_type == "Adam") || (callParam.node_type == "Assign")) {
+    if ((callParam.node_type == "Adam") || (callParam.node_type == "Assign") || callParam.node_type == "SGD") {
       for (auto tensor : before_outputs) {
         std::fill(reinterpret_cast<int8_t *>(tensor->MutableData()),
                   reinterpret_cast<int8_t *>(tensor->MutableData()) + tensor->Size(), 0);
@@ -646,10 +646,10 @@ int NetTrain::InitCallbackParameter() {
 void NetTrainFlags::InitResizeDimsList() {
   std::string content = this->resize_dims_in_;
   std::vector<int> shape;
-  auto shape_strs = StringSplit(content, std::string(DELIM_COLON));
+  auto shape_strs = StrSplit(content, std::string(DELIM_COLON));
   for (const auto &shape_str : shape_strs) {
     shape.clear();
-    auto dim_strs = StringSplit(shape_str, std::string(DELIM_COMMA));
+    auto dim_strs = StrSplit(shape_str, std::string(DELIM_COMMA));
     std::cout << "Resize Dims: ";
     for (const auto &dim_str : dim_strs) {
       std::cout << dim_str << " ";
diff --git a/mindspore/lite/tools/common/flag_parser.cc b/mindspore/lite/tools/common/flag_parser.cc
index 1c4ed26b791..58fc4d139a3 100644
--- a/mindspore/lite/tools/common/flag_parser.cc
+++ b/mindspore/lite/tools/common/flag_parser.cc
@@ -24,6 +24,10 @@ Option<std::string> FlagParser::ParseFlags(int argc, const char *const *argv, bo
                                            bool supportDuplicate) {
   MS_ASSERT(argv != nullptr);
   const int FLAG_PREFIX_LEN = 2;
+  if (argc <= 0) {
+    MS_LOG(ERROR) << "The arguments number is out of range";
+    return Option<std::string>("Failed: flags is not valid");
+  }
   binName = GetFileName(argv[0]);
 
   std::multimap<std::string, Option<std::string>> keyValues;
diff --git a/mindspore/lite/tools/common/flag_parser.h b/mindspore/lite/tools/common/flag_parser.h
index 7a69333ee5f..26d881b3257 100644
--- a/mindspore/lite/tools/common/flag_parser.h
+++ b/mindspore/lite/tools/common/flag_parser.h
@@ -280,8 +280,11 @@ void FlagParser::AddFlag(Option<T> Flags::*t, const std::string &flagName, const
   ConstructFlag(t, flagName, helpInfo, &flagItem);
   flagItem.isRequired = false;
   flagItem.parse = [t](FlagParser *base, const std::string &value) -> Option<Nothing> {
+    if (base == nullptr) {
+      return Option<Nothing>(Nothing());
+    }
     auto *flag = dynamic_cast<Flags *>(base);
-    if (base != nullptr) {
+    if (flag != nullptr) {
       Option<T> ret = Option<std::string>(GenericParseValue<T>(value));
       if (ret.IsNone()) {
         return Option<Nothing>(None());
diff --git a/mindspore/lite/tools/common/func_graph_subgraph.cc b/mindspore/lite/tools/common/func_graph_subgraph.cc
index a507353a63b..79d900fa277 100644
--- a/mindspore/lite/tools/common/func_graph_subgraph.cc
+++ b/mindspore/lite/tools/common/func_graph_subgraph.cc
@@ -482,9 +482,7 @@ void SubGraph::CreateCNodeForPartialSubGraph(
   // move cnode from belong_graph to subgraph
   for (auto &node : this->GetNodes()) {
     sub_graph->AddNode(node);
-    if (!utils::isa<ValueNodePtr>(node)) {
-      node->set_func_graph(sub_graph);
-    }
+    node->set_func_graph(sub_graph);
     for (size_t i = 0; i < node->inputs().size(); i++) {
       if (node == nullptr || node->inputs().at(i)) {
         continue;
diff --git a/mindspore/lite/tools/common/graph_util.cc b/mindspore/lite/tools/common/graph_util.cc
index 9e9c1ba552c..2e6407a63cf 100644
--- a/mindspore/lite/tools/common/graph_util.cc
+++ b/mindspore/lite/tools/common/graph_util.cc
@@ -26,6 +26,7 @@
 #include "tools/common/node_util.h"
 #include "src/common/log_adapter.h"
 #include "src/common/utils.h"
+#include "tools/converter/ops/ops_def.h"
 
 namespace mindspore {
 namespace lite {
@@ -33,6 +34,29 @@ namespace {
 enum QuantBitNum { QuantBitNum_INT8 = 8, QuantBitNum_INT16 = 16 };
 const int kZeroPointGap = 128;
 }  // namespace
+int SetFuncGraphOutput(const FuncGraphPtr &graph, const std::vector<AnfNodePtr> &outputs) {
+  if (graph == nullptr || outputs.empty()) {
+    MS_LOG(DEBUG) << "Input graph is nullptr or outputs is empty";
+    return RET_INPUT_PARAM_INVALID;
+  }
+  if (outputs.size() == 1) {
+    graph->set_output(outputs.front(), false);
+    return RET_OK;
+  }
+  auto make_tuple_prim_ptr = std::make_shared<lite::MakeTuple>();
+  if (make_tuple_prim_ptr == nullptr) {
+    MS_LOG(DEBUG) << "new MakeTuple failed";
+    return lite::RET_NULL_PTR;
+  }
+  auto make_tuple_cnode = graph->NewCNode(make_tuple_prim_ptr, outputs);
+  if (make_tuple_prim_ptr == nullptr) {
+    MS_LOG(DEBUG) << "new cnode failed";
+    return lite::RET_NULL_PTR;
+  }
+  make_tuple_cnode->set_fullname_with_scope("return tuple");
+  graph->set_output(make_tuple_cnode, false);
+  return RET_OK;
+}
 
 OpDefCopyer GetSimpleOpCopyer() {
   return [](CNodeT *inCNode) -> std::unique_ptr<CNodeT> {
diff --git a/mindspore/lite/tools/common/graph_util.h b/mindspore/lite/tools/common/graph_util.h
index 1fc3f60dbf0..720b9111085 100644
--- a/mindspore/lite/tools/common/graph_util.h
+++ b/mindspore/lite/tools/common/graph_util.h
@@ -46,6 +46,8 @@ using OpDefCopyer = std::function<std::unique_ptr<schema::CNodeT>(schema::CNodeT
 
 OpDefCopyer GetSimpleOpCopyer();
 
+int SetFuncGraphOutput(const FuncGraphPtr &graph, const std::vector<AnfNodePtr> &outputs);
+
 std::vector<size_t> GetInputNodeIdx(const schema::MetaGraphT &graphT, const size_t &nodeIdx, int inputIndexIdx = -1);
 
 std::vector<size_t> GetInputNodeIdx(const schema::MetaGraphT &graphT, const schema::CNodeT &node,
diff --git a/mindspore/lite/tools/common/node_util.cc b/mindspore/lite/tools/common/node_util.cc
index 57ec131fd7d..65d6a8659e9 100644
--- a/mindspore/lite/tools/common/node_util.cc
+++ b/mindspore/lite/tools/common/node_util.cc
@@ -28,147 +28,19 @@
 namespace mindspore {
 namespace lite {
 constexpr size_t kInitialSize = 1024;
-
-static const std::vector<schema::PrimitiveType> nhwcOpList = {schema::PrimitiveType_Conv2DBackpropFilterFusion,
-                                                              schema::PrimitiveType_Conv2DBackpropInputFusion,
-                                                              schema::PrimitiveType_AvgPoolGrad,
-                                                              schema::PrimitiveType_MaxPoolGrad,
-                                                              schema::PrimitiveType_BiasAddGrad,
-                                                              schema::PrimitiveType_BatchNormGrad,
-                                                              schema::PrimitiveType_ApplyMomentum,
-                                                              schema::PrimitiveType_SGD,
-                                                              schema::PrimitiveType_Adam,
-                                                              schema::PrimitiveType_ResizeGrad,
-                                                              schema::PrimitiveType_AvgPoolFusion,
-                                                              schema::PrimitiveType_MaxPoolFusion,
-                                                              schema::PrimitiveType_Conv2DFusion,
-                                                              schema::PrimitiveType_Conv2dTransposeFusion,
-                                                              schema::PrimitiveType_LRN,
-                                                              schema::PrimitiveType_Resize,
-                                                              schema::PrimitiveType_BatchNorm,
-                                                              schema::PrimitiveType_FusedBatchNorm,
-                                                              schema::PrimitiveType_PReLUFusion,
-                                                              schema::PrimitiveType_BiasAdd,
-                                                              schema::PrimitiveType_SpaceToDepth,
-                                                              schema::PrimitiveType_DepthToSpace,
-                                                              schema::PrimitiveType_TopKFusion,
-                                                              schema::PrimitiveType_BatchToSpace,
-                                                              schema::PrimitiveType_SpaceToBatch,
-                                                              schema::PrimitiveType_SpaceToBatchND};
-
-static const std::vector<schema::PrimitiveType> nchwOpList = {schema::PrimitiveType_InstanceNorm};
-
-static const std::vector<schema::PrimitiveType> nhwcOpAllInputList = {
-  schema::PrimitiveType_AvgPoolGrad,    schema::PrimitiveType_MaxPoolGrad,
-  schema::PrimitiveType_ActivationGrad, schema::PrimitiveType_Conv2DBackpropFilterFusion,
-  schema::PrimitiveType_BatchNormGrad,  schema::PrimitiveType_ResizeGrad};
-
-// index {} mean all inputs need insert
-static std::unordered_map<schema::PrimitiveType, std::vector<int>> extNhwcInsertIndex = {
-  {schema::PrimitiveType_BatchNormGrad, {0, 1}},
-  {schema::PrimitiveType_Conv2DBackpropFilterFusion, {0, 1}},
-  {schema::PrimitiveType_ApplyMomentum, {3}},
-  {schema::PrimitiveType_SGD, {1}},
-  {schema::PrimitiveType_Adam, {9}}};
-
-static const std::vector<schema::PrimitiveType> fp32FullOpList = {
-  schema::PrimitiveType_Concat, schema::PrimitiveType_AddFusion,
-  schema::PrimitiveType_Floor};  // fp32 ops support C4 and nhwc in fp32
-
-static const std::vector<schema::PrimitiveType> int8NeedNhwcOpList = {};
-
-static const std::vector<schema::PrimitiveType> int8OpList = {schema::PrimitiveType_Conv2DFusion,
-                                                              schema::PrimitiveType_Conv2dTransposeFusion,
-                                                              schema::PrimitiveType_AddFusion,
-                                                              schema::PrimitiveType_Transpose,
-                                                              schema::PrimitiveType_AvgPoolFusion,
-                                                              schema::PrimitiveType_MaxPoolFusion,
-                                                              schema::PrimitiveType_Concat,
-                                                              schema::PrimitiveType_Softmax,
-                                                              schema::PrimitiveType_Reshape,
-                                                              schema::PrimitiveType_Activation,
-                                                              schema::PrimitiveType_Resize,
-                                                              schema::PrimitiveType_FullConnection,
-                                                              schema::PrimitiveType_ArgMaxFusion,
-                                                              schema::PrimitiveType_ArgMinFusion,
-                                                              schema::PrimitiveType_BatchNorm,
-                                                              schema::PrimitiveType_FusedBatchNorm,
-                                                              schema::PrimitiveType_BiasAdd,
-                                                              schema::PrimitiveType_DivFusion,
-                                                              schema::PrimitiveType_MulFusion,
-                                                              schema::PrimitiveType_SliceFusion,
-                                                              schema::PrimitiveType_Split,
-                                                              schema::PrimitiveType_Squeeze,
-                                                              schema::PrimitiveType_SubFusion,
-                                                              schema::PrimitiveType_StridedSlice,
-                                                              schema::PrimitiveType_TopKFusion,
-                                                              schema::PrimitiveType_Unsqueeze,
-                                                              schema::PrimitiveType_MatMul,
-                                                              schema::PrimitiveType_PadFusion,
-                                                              schema::PrimitiveType_ScaleFusion,
-                                                              schema::PrimitiveType_Cast,
-                                                              schema::PrimitiveType_Shape,
-                                                              schema::PrimitiveType_ExpandDims,
-                                                              schema::PrimitiveType_BatchToSpace,
-                                                              schema::PrimitiveType_BatchToSpaceND,
-                                                              schema::PrimitiveType_ReduceFusion,
-                                                              schema::PrimitiveType_Round,
-                                                              schema::PrimitiveType_Floor,
-                                                              schema::PrimitiveType_Ceil,
-                                                              schema::PrimitiveType_Abs,
-                                                              schema::PrimitiveType_Sin,
-                                                              schema::PrimitiveType_Cos,
-                                                              schema::PrimitiveType_Log,
-                                                              schema::PrimitiveType_Sqrt,
-                                                              schema::PrimitiveType_Rsqrt,
-                                                              schema::PrimitiveType_Square,
-                                                              schema::PrimitiveType_LogicalNot,
-                                                              schema::PrimitiveType_SpaceToBatch,
-                                                              schema::PrimitiveType_SpaceToBatchND,
-                                                              schema::PrimitiveType_DepthToSpace,
-                                                              schema::PrimitiveType_PowFusion,
-                                                              schema::PrimitiveType_GatherNd,
-                                                              schema::PrimitiveType_LeakyRelu,
-                                                              schema::PrimitiveType_Gather,
-                                                              schema::PrimitiveType_Equal,
-                                                              schema::PrimitiveType_NotEqual,
-                                                              schema::PrimitiveType_LessEqual,
-                                                              schema::PrimitiveType_Greater,
-                                                              schema::PrimitiveType_GreaterEqual,
-                                                              schema::PrimitiveType_Eltwise,
-                                                              schema::PrimitiveType_DetectionPostProcess,
-                                                              schema::PrimitiveType_Crop,
-                                                              schema::PrimitiveType_PriorBox,
-                                                              schema::PrimitiveType_QuantDTypeCast,
-                                                              schema::PrimitiveType_LayerNormFusion,
-                                                              schema::PrimitiveType_L2NormalizeFusion};
-
-static const std::vector<schema::PrimitiveType> needInsertOpList = {
-  schema::PrimitiveType_Eltwise,       schema::PrimitiveType_Activation,   schema::PrimitiveType_Concat,
-  schema::PrimitiveType_PowFusion,     schema::PrimitiveType_StridedSlice, schema::PrimitiveType_AddFusion,
-  schema::PrimitiveType_AddN,          schema::PrimitiveType_Split,        schema::PrimitiveType_SliceFusion,
-  schema::PrimitiveType_Crop,          schema::PrimitiveType_MulFusion,    schema::PrimitiveType_Maximum,
-  schema::PrimitiveType_ActivationGrad};
-
-static const std::unordered_map<int, int> nc2NhAxisMap = {{0, 0}, {1, -1}, {2, 1}, {3, 2}};
-
-std::unordered_map<int, int> GetNc2NhAxisMap() { return nc2NhAxisMap; }
-
-std::vector<schema::PrimitiveType> GetInsertOpList() { return needInsertOpList; }
-
-std::vector<schema::PrimitiveType> Getfp32FullOpList() { return fp32FullOpList; }
-
-std::vector<schema::PrimitiveType> GetNhwcOpList() { return nhwcOpList; }
-
-std::vector<schema::PrimitiveType> GetNchwOpList() { return nchwOpList; }
-
-std::unordered_map<schema::PrimitiveType, std::vector<int>> GetExtNhwcIndexes() { return extNhwcInsertIndex; }
-
-std::vector<schema::PrimitiveType> GetNhwcAllInputOpList() { return nhwcOpAllInputList; }
-
-std::vector<schema::PrimitiveType> GetUint8NhwcOpList() { return int8NeedNhwcOpList; }
-
-std::vector<schema::PrimitiveType> GetInt8OpList() { return int8OpList; }
+std::vector<CNodePtr> GetInputCNode(const CNodePtr &cnode) {
+  if (cnode == nullptr) {
+    return {};
+  }
+  std::vector<CNodePtr> inputs;
+  for (const auto &input : cnode->inputs()) {
+    if (input == nullptr || !utils::isa<CNodePtr>(input)) {
+      continue;
+    }
+    inputs.emplace_back(utils::cast<CNodePtr>(input));
+  }
+  return inputs;
+}
 
 const schema::Primitive *ConvertToPrimitive(schema::PrimitiveT *primitive_t, flatbuffers::FlatBufferBuilder *fbb) {
   if (primitive_t == nullptr || fbb == nullptr) {
@@ -463,6 +335,5 @@ size_t GetCNodeOutputsSize(const std::shared_ptr<AnfNode> &anf_node, bool train_
     return 1;
   }
 }
-
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/common/node_util.h b/mindspore/lite/tools/common/node_util.h
index 7fcba451927..6a2f1a560ae 100644
--- a/mindspore/lite/tools/common/node_util.h
+++ b/mindspore/lite/tools/common/node_util.h
@@ -31,6 +31,8 @@
 
 namespace mindspore {
 namespace lite {
+std::vector<CNodePtr> GetInputCNode(const CNodePtr &cnode);
+
 template <typename T>
 int CreateOperator(const std::unique_ptr<schema::PrimitiveT> &primitive, schema::PrimitiveType type) {
   auto attr = std::make_unique<T>();
diff --git a/mindspore/lite/tools/converter/CMakeLists.txt b/mindspore/lite/tools/converter/CMakeLists.txt
index 1d33d5dc863..52e3e50abe3 100644
--- a/mindspore/lite/tools/converter/CMakeLists.txt
+++ b/mindspore/lite/tools/converter/CMakeLists.txt
@@ -27,6 +27,7 @@ file(GLOB_RECURSE CONVERTER_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
         ${CMAKE_CURRENT_SOURCE_DIR}/../common/tensor_util.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/../common/string_util.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/../common/protobuf_utils.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/../common/func_graph_subgraph.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/../common/flag_parser.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/../common/storage.cc
         ${CMAKE_CURRENT_SOURCE_DIR}/../../src/ir/primitive_t_value.cc
@@ -47,7 +48,6 @@ file(GLOB_RECURSE CONVERTER_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
         ../optimizer/common/gllo_utils.cc
         ../optimizer/common/format_utils.cc
         ../optimizer/common/multiple_pattern_process_pass.cc
-        ../optimizer/format/conv_weight_format.cc
         ../optimizer/format/delete_redundant_transpose.cc
         ../optimizer/format/to_format_base.cc
         ../optimizer/format/to_nchw_format.cc
@@ -113,6 +113,7 @@ file(GLOB_RECURSE CONVERTER_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}
         ../optimizer/graph/transpose_strategy.cc
         ../optimizer/graph/reduce_same_act_pass.cc
         ../optimizer/graph/split_one_pass.cc
+        ../optimizer/graph/find_const_subgraph_pass.cc
         )
 
 add_subdirectory(../anf_exporter anf_exporter)
@@ -136,11 +137,14 @@ set(LITE_SRC
         ${SRC_DIR}/common/tensor_util.cc
         ${SRC_DIR}/runtime/inner_allocator.cc
         ${SRC_DIR}/runtime/infer_manager.cc
+        ${SRC_DIR}/runtime/runtime_pass.cc
         ${SRC_DIR}/inner_context.cc
         ${SRC_DIR}/tensor.cc
         ${SRC_DIR}/ms_tensor.cc
         ${SRC_DIR}/tensorlist.cc
         ${SRC_DIR}/registry/kernel_interface_registry.cc
+        ${SRC_DIR}/registry/register_utils.cc
+        ${SRC_DIR}/registry/register_kernel_impl.cc
         ${SRC_DIR}/registry/kernel_interface.cc
         ${SRC_DIR}/kernel_registry.cc
         ${SRC_DIR}/inner_kernel.cc
diff --git a/mindspore/lite/tools/converter/anf_transform.cc b/mindspore/lite/tools/converter/anf_transform.cc
index 02c0aef1a7f..65b50868f20 100644
--- a/mindspore/lite/tools/converter/anf_transform.cc
+++ b/mindspore/lite/tools/converter/anf_transform.cc
@@ -72,7 +72,6 @@
 #include "tools/optimizer/format/delete_redundant_transpose.h"
 #include "tools/optimizer/format/to_nchw_format.h"
 #include "tools/optimizer/format/to_nhwc_format.h"
-#include "tools/optimizer/format/conv_weight_format.h"
 
 using std::string;
 namespace mindspore::lite {
@@ -389,8 +388,6 @@ FuncGraphPtr AnfTransform::TransformFuncGraph(const FuncGraphPtr &old_graph, con
 void AnfTransform::AppendPassToStoreRoom(const converter::Flags *config) {
   auto fmk = config->fmk;
   auto is_train = config->trainModel;
-  opt::PassRegistry("ConvWeightToKHWC", std::make_shared<opt::ConvWeightToKHWC>());
-  opt::PassRegistry("ConvWeightToKCHW", std::make_shared<opt::ConvWeightToKCHW>());
   opt::PassRegistry("DecreaseTransposeAlgo", std::make_shared<opt::DecreaseTransposeAlgo>(fmk, is_train));
   opt::PassRegistry("DeleteRedundantTranspose", std::make_shared<opt::DeleteRedundantTranspose>());
   opt::PassRegistry("InferShapePass", std::make_shared<opt::InferShapePass>(fmk, is_train));
diff --git a/mindspore/lite/tools/converter/converter_flags.cc b/mindspore/lite/tools/converter/converter_flags.cc
index d97136bdf45..c17fe9a2814 100644
--- a/mindspore/lite/tools/converter/converter_flags.cc
+++ b/mindspore/lite/tools/converter/converter_flags.cc
@@ -32,6 +32,7 @@ namespace converter {
 namespace {
 constexpr int kBase = 10;
 constexpr int kQuantBitNumInt16 = 16;
+constexpr int kPathLengthUpperLimit = 1024;
 }  // namespace
 Flags::Flags() {
   AddFlag(&Flags::fmkIn, "fmk", "Input model framework type. TF | TFLITE | CAFFE | MINDIR | ONNX", "");
@@ -211,10 +212,10 @@ int Flags::InitTrainModel() {
 int Flags::InitInTensorShape() {
   std::string content = this->inTensorShape;
   std::vector<int64_t> shape;
-  auto shape_strs = StringSplit(content, std::string(";"));
+  auto shape_strs = StrSplit(content, std::string(";"));
   for (const auto &shape_str : shape_strs) {
     shape.clear();
-    auto string_split = StringSplit(shape_str, std::string(":"));
+    auto string_split = StrSplit(shape_str, std::string(":"));
     auto name = string_split[0];
     if (name.empty()) {
       MS_LOG(ERROR) << "input tensor name is empty";
@@ -223,7 +224,7 @@ int Flags::InitInTensorShape() {
     if (dim_strs.empty()) {
       MS_LOG(ERROR) << "input tensor dim string is empty";
     }
-    auto dims = StringSplit(dim_strs, std::string(","));
+    auto dims = StrSplit(dim_strs, std::string(","));
     if (dims.empty()) {
       MS_LOG(ERROR) << "input tensor dim is empty";
     }
@@ -428,7 +429,7 @@ std::string GetStrFromConfigFile(const std::string &file, const std::string &tar
   }
 
 #ifdef _WIN32
-  char *real_path = _fullpath(resolved_path.get(), file.c_str(), 1024);
+  char *real_path = _fullpath(resolved_path.get(), file.c_str(), kPathLengthUpperLimit);
 #else
   char *real_path = realpath(file.c_str(), resolved_path.get());
 #endif
@@ -486,7 +487,6 @@ std::vector<std::string> SplitStringToVector(const std::string &raw_str, const c
   }
   return res;
 }
-
 }  // namespace converter
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/converter/export_model.cc b/mindspore/lite/tools/converter/export_model.cc
index 3fe4924c7db..6789e67afba 100644
--- a/mindspore/lite/tools/converter/export_model.cc
+++ b/mindspore/lite/tools/converter/export_model.cc
@@ -193,7 +193,7 @@ STATUS ExportModel(const FuncGraphPtr &graph) {
     return RET_ERROR;
   }
   (void)Manage(mirror_graph, true);
-  if (!opt::RunOptimizerPass(mirror_graph, {"InferShapePass", "DecreaseTransposeAlgo"})) {
+  if (!opt::RunOptimizerPass(mirror_graph, {"InferShapePass", "DeleteRedundantTranspose", "DecreaseTransposeAlgo"})) {
     MS_LOG(ERROR) << "Run transpose opt pass failed.";
     return RET_ERROR;
   }
diff --git a/mindspore/lite/tools/converter/import/mindspore_importer.cc b/mindspore/lite/tools/converter/import/mindspore_importer.cc
index bcc6e40885f..7dbacf58af4 100644
--- a/mindspore/lite/tools/converter/import/mindspore_importer.cc
+++ b/mindspore/lite/tools/converter/import/mindspore_importer.cc
@@ -16,6 +16,7 @@
 
 #include "tools/converter/import/mindspore_importer.h"
 #include <memory>
+#include <set>
 #include <vector>
 #include <regex>
 #include "tools/converter/parser/parser_utils.h"
@@ -49,96 +50,6 @@ STATUS MindsporeImporter::Mindir2AnfAdjust(const FuncGraphPtr &func_graph, const
   return RET_OK;
 }
 
-STATUS MindsporeImporter::WeightFormatTransform(const FuncGraphPtr &graph) {
-  MS_ASSERT(graph != nullptr);
-  auto node_list = TopoSort(graph->get_return());
-  for (auto &node : node_list) {
-    if (!utils::isa<CNodePtr>(node)) {
-      continue;
-    }
-    auto conv_cnode = node->cast<CNodePtr>();
-    if (!opt::CheckPrimitiveType(node, prim::kPrimConv2DFusion) &&
-        !opt::CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) &&
-        !opt::CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion)) {
-      continue;
-    }
-    MS_ASSERT(conv_cnode->inputs().size() > kConvWeightIndex);
-    int status = HardCodeMindir(conv_cnode, graph);
-    if (status != lite::RET_OK) {
-      MS_LOG(ERROR) << "Format hard code failed: " << status << ", node: " << node->fullname_with_scope();
-      return RET_ERROR;
-    }
-  }
-  return RET_OK;
-}
-
-STATUS MindsporeImporter::HardCodeMindir(const CNodePtr &conv_node, const FuncGraphPtr &graph) {
-  MS_ASSERT(conv_cnode != nullptr);
-  auto prim = GetValueNode<PrimitivePtr>(conv_node->input(0));
-  if (prim == nullptr) {
-    MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive.";
-    return lite::RET_ERROR;
-  }
-  int64_t format = prim->GetAttr(ops::kFormat) != nullptr ? GetValue<int64_t>(prim->GetAttr(ops::kFormat)) : 0;
-  auto weight_node = conv_node->input(kConvWeightIndex);
-  schema::Format weight_dst_format = schema::Format::Format_KHWC;
-  STATUS status = RET_OK;
-  schema::Format weight_src_format = schema::Format::Format_NUM_OF_FORMAT;
-  switch (quant_type_) {
-    case QuantType_AwareTraining:
-    case QuantType_PostTraining:
-    case QuantType_WeightQuant:
-    case QuantType_QUANT_NONE: {
-      if (format == schema::Format::Format_KHWC) {
-        weight_src_format = schema::Format::Format_KHWC;
-      } else {
-        weight_src_format = schema::Format::Format_KCHW;
-      }
-    } break;
-    default: {
-      MS_LOG(ERROR) << "Unsupported quantType: " << EnumNameQuantType(quant_type_)
-                    << ", node: " << conv_node->fullname_with_scope();
-      return RET_ERROR;
-    }
-  }
-  if (utils::isa<CNodePtr>(weight_node)) {
-    status = HandleWeightConst(graph, conv_node, weight_node->cast<CNodePtr>(), weight_src_format, weight_dst_format);
-    if (status != lite::RET_OK) {
-      MS_LOG(ERROR) << "handle weight-const failed.";
-      return RET_ERROR;
-    }
-  }
-  weight_node = conv_node->input(kConvWeightIndex);
-  auto weight_value = opt::GetTensorInfo(weight_node);
-  if (weight_value != nullptr) {
-    status = opt::TransFilterFormat(weight_value, weight_src_format, weight_dst_format);
-    if (status != RET_OK) {
-      MS_LOG(ERROR) << "TransFilter " << EnumNameFormat(schema::EnumValuesFormat()[weight_dst_format]) << "To"
-                    << EnumNameFormat(weight_dst_format) << " failed, node : " << conv_node->fullname_with_scope()
-                    << "quant type:" << quant_type_;
-      return RET_ERROR;
-    }
-    prim->AddAttr(ops::kFormat, MakeValue<int64_t>(weight_dst_format));
-    auto type_id = static_cast<TypeId>(weight_value->data_type());
-    auto shape = weight_value->shape();
-    std::vector<int64_t> shape_vector(shape.begin(), shape.end());
-    auto abstract = lite::CreateTensorAbstract(shape_vector, type_id);
-    if (abstract == nullptr) {
-      MS_LOG(ERROR) << "Create tensor abstarct failed";
-      return RET_ERROR;
-    }
-    weight_node->set_abstract(abstract);
-  }
-  if (utils::isa<ParameterPtr>(weight_node)) {
-    status = HandleWeightSharing(graph, KHWC, weight_node->cast<ParameterPtr>(), weight_src_format, weight_dst_format);
-    if (status != lite::RET_OK) {
-      MS_LOG(ERROR) << "handle weight-sharing failed.";
-      return RET_ERROR;
-    }
-  }
-  return lite::RET_OK;
-}
-
 size_t MindsporeImporter::Hex2ByteArray(const std::string &hex_str, unsigned char *byte_array, size_t max_len) {
   std::regex r("[0-9a-fA-F]+");
   if (!std::regex_match(hex_str, r)) {
@@ -208,16 +119,11 @@ FuncGraphPtr MindsporeImporter::ImportMindIR(const converter::Flags &flag) {
     ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
     return nullptr;
   }
-  auto unify_format = std::make_shared<UnifyFormatToNHWC>(lite::converter::FmkType_MS, flag.trainModel);
+  auto unify_format = std::make_shared<UnifyFormatToNHWC>(lite::converter::FmkType_MS, flag.trainModel, flag.quantType);
   if (!unify_format->Run(func_graph)) {
     MS_LOG(ERROR) << "Run insert transpose failed.";
     return nullptr;
   }
-  if ((status = WeightFormatTransform(func_graph)) != RET_OK) {
-    MS_LOG(ERROR) << "WeightFormatTransform failed.";
-    ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
-    return nullptr;
-  }
   return func_graph;
 }
 }  // namespace mindspore::lite
diff --git a/mindspore/lite/tools/converter/import/mindspore_importer.h b/mindspore/lite/tools/converter/import/mindspore_importer.h
index ca74b4b8fd6..96bcabe1d7f 100644
--- a/mindspore/lite/tools/converter/import/mindspore_importer.h
+++ b/mindspore/lite/tools/converter/import/mindspore_importer.h
@@ -17,6 +17,7 @@
 #ifndef MINDSPORE_LITE_TOOLS_IMPORT_MINDSPORE_IMPORTER_H_
 #define MINDSPORE_LITE_TOOLS_IMPORT_MINDSPORE_IMPORTER_H_
 
+#include <set>
 #include <string>
 #include "tools/converter/converter_flags.h"
 #include "load_mindir/load_model.h"
@@ -30,8 +31,6 @@ class MindsporeImporter {
 
  private:
   STATUS Mindir2AnfAdjust(const FuncGraphPtr &func_graph, const converter::Flags &flag);
-  STATUS WeightFormatTransform(const FuncGraphPtr &graph);
-  STATUS HardCodeMindir(const CNodePtr &conv_node, const FuncGraphPtr &graph);
   QuantType quant_type_ = schema::QuantType_QUANT_NONE;
   size_t Hex2ByteArray(const std::string &hex_str, unsigned char *byte_array, size_t max_len);
 };
diff --git a/mindspore/lite/tools/converter/legacy_optimizer/graph/batchnorm_convert_scale_pass.cc b/mindspore/lite/tools/converter/legacy_optimizer/graph/batchnorm_convert_scale_pass.cc
index 40292965fd7..2f6a27ed99b 100644
--- a/mindspore/lite/tools/converter/legacy_optimizer/graph/batchnorm_convert_scale_pass.cc
+++ b/mindspore/lite/tools/converter/legacy_optimizer/graph/batchnorm_convert_scale_pass.cc
@@ -230,15 +230,8 @@ STATUS BatchNormConvertScalePass::GetTransParam(MetaGraphT *graph, const std::un
   return RET_OK;
 }
 
-// BatchNorm weight Tensor definition:
-// caffe
-//   estimated_mean  --0
-//   estimated_variance  --1
-// tensorflow
-//   scale    -- 0
-//   bias        --1
-//   estimated_mean  --2
-//   estimated_variance  --3
+// caffe:estimated_mean:0 estimated_variance:1
+// tensorflow scale:0,bias:1,estimated_mean:2,estimated_variance:3
 STATUS BatchNormConvertScalePass::GetBnWeightTensors(MetaGraphT *graph, BNWeightTensors *bnWeightTensors,
                                                      const std::unique_ptr<CNodeT> &bnNode) {
   MS_ASSERT(graph != nullptr);
@@ -250,19 +243,6 @@ STATUS BatchNormConvertScalePass::GetBnWeightTensors(MetaGraphT *graph, BNWeight
   if (fmkType == converter::FmkType_CAFFE) {
     bnWeightTensors->meanTensor = graph->allTensors.at(bnWeightTensorIdxes[CAFFE_BATCHNORM_MEAN_INDEX]).get();
     bnWeightTensors->varianceTensor = graph->allTensors.at(bnWeightTensorIdxes[CAFFE_BATCHNORM_VARIANCE_INDEX]).get();
-    auto scaleTensor = graph->allTensors.at(bnWeightTensorIdxes[CAFFE_BATCHNORM_SCALE_INDEX]).get();
-
-    // calibrate mean and variance
-    float scale_factor_data = (reinterpret_cast<float *>(scaleTensor->data.data()))[0];
-    float scale_factor = scale_factor_data == 0 ? 0 : 1 / scale_factor_data;
-    auto mean_data = reinterpret_cast<float *>(bnWeightTensors->meanTensor->data.data());
-    auto variance_data = reinterpret_cast<float *>(bnWeightTensors->varianceTensor->data.data());
-    for (size_t i = 0; i < GetShapeSize(*bnWeightTensors->meanTensor); i++) {
-      mean_data[i] *= scale_factor;
-    }
-    for (size_t i = 0; i < GetShapeSize(*bnWeightTensors->varianceTensor); i++) {
-      variance_data[i] *= scale_factor;
-    }
   } else {
     bnWeightTensors->scaleTensor = graph->allTensors.at(bnWeightTensorIdxes[TF_BATCHNORM_SCALE_INDEX]).get();
     bnWeightTensors->biasTensor = graph->allTensors.at(bnWeightTensorIdxes[TF_BATCHNORM_BIAS_INDEX]).get();
@@ -274,11 +254,24 @@ STATUS BatchNormConvertScalePass::GetBnWeightTensors(MetaGraphT *graph, BNWeight
     MS_LOG(ERROR) << "BatchNorm's mean tensor is nullptr";
     return RET_ERROR;
   }
-
   if (bnWeightTensors->varianceTensor == nullptr) {
     MS_LOG(ERROR) << "BatchNorm's variance tensor is nullptr";
     return RET_ERROR;
   }
+  if (fmkType == converter::FmkType_CAFFE) {
+    auto scaleTensor = graph->allTensors.at(bnWeightTensorIdxes[CAFFE_BATCHNORM_SCALE_INDEX]).get();
+    // calibrate mean and variance
+    float scale_factor_data = (reinterpret_cast<float *>(scaleTensor->data.data()))[0];
+    float scale_factor = scale_factor_data == 0 ? 0 : 1 / scale_factor_data;
+    auto mean_data = reinterpret_cast<float *>(bnWeightTensors->meanTensor->data.data());
+    auto variance_data = reinterpret_cast<float *>(bnWeightTensors->varianceTensor->data.data());
+    for (size_t i = 0; i < GetShapeSize(*bnWeightTensors->meanTensor); i++) {
+      mean_data[i] *= scale_factor;
+    }
+    for (size_t i = 0; i < GetShapeSize(*bnWeightTensors->varianceTensor); i++) {
+      variance_data[i] *= scale_factor;
+    }
+  }
   bnChannel = bnWeightTensors->meanTensor->data.size() * sizeof(uint8_t) / sizeof(float);
   if (bnChannel <= 0) {
     MS_LOG(ERROR) << "BatchNorm's channel less or equal 0";
@@ -289,14 +282,12 @@ STATUS BatchNormConvertScalePass::GetBnWeightTensors(MetaGraphT *graph, BNWeight
     MS_LOG(ERROR) << "conv kernel num expected to be equal to variance size";
     return RET_ERROR;
   }
-
   if (bnWeightTensors->scaleTensor != nullptr) {
     if (bnChannel != bnWeightTensors->scaleTensor->data.size() * sizeof(uint8_t) / sizeof(float)) {
       MS_LOG(ERROR) << "conv kernel num  expected to be equal to scale size";
       return RET_ERROR;
     }
   }
-
   if (bnWeightTensors->biasTensor != nullptr) {
     if (bnChannel != bnWeightTensors->biasTensor->data.size() * sizeof(uint8_t) / sizeof(float)) {
       MS_LOG(ERROR) << "conv kernel num expected to be equal to bias size";
diff --git a/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.cc b/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.cc
index f73367307d3..18564ad112f 100644
--- a/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.cc
+++ b/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.cc
@@ -39,6 +39,11 @@ namespace {
 namespace {
 constexpr size_t kConvWeightIndex = 2;
 constexpr size_t kConvWeightShapeSize = 4;
+constexpr size_t kFcWeightFirstShapeIndex = 0;
+constexpr size_t kFcWeightSecondShapeIndex = 1;
+constexpr size_t kFcBiasFirstShapeIndex = 0;
+constexpr size_t kFcBiasSecondShapeIndex = 1;
+constexpr size_t kFcBiasThirdShapeIndex = 2;
 }  // namespace
 bool IsSkipedLayer(const caffe::LayerParameter &layer) {
   if (layer.type() == "Input" || layer.type() == "Dropout" || layer.type() == "Split") {
@@ -50,12 +55,14 @@ bool IsSkipedLayer(const caffe::LayerParameter &layer) {
 void FcSqueezeWeightBias(const caffe::LayerParameter &layer, int blob_index, std::vector<int32_t> *shape) {
   if (layer.type() == "InnerProduct") {
     if (blob_index == 0) {
-      if (shape->size() == kConvWeightShapeSize && shape->at(0) == 1 && shape->at(1) == 1) {
+      if (shape->size() == kConvWeightShapeSize && shape->at(kFcWeightFirstShapeIndex) == 1 &&
+          shape->at(kFcWeightSecondShapeIndex) == 1) {
         shape->erase(shape->begin());
         shape->erase(shape->begin());
       }
     } else if (blob_index == 1) {
-      if (shape->size() == kConvWeightShapeSize && shape->at(0) == 1 && shape->at(1) == 1 && shape->at(2) == 1) {
+      if (shape->size() == kConvWeightShapeSize && shape->at(kFcBiasFirstShapeIndex) == 1 &&
+          shape->at(kFcBiasSecondShapeIndex) == 1 && shape->at(kFcBiasThirdShapeIndex) == 1) {
         shape->erase(shape->begin());
         shape->erase(shape->begin());
         shape->erase(shape->begin());
@@ -105,112 +112,14 @@ FuncGraphPtr CaffeModelParser::Parse(const converter::ConverterParameters &flag)
     ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
     return nullptr;
   }
-  auto unify_format = std::make_shared<UnifyFormatToNHWC>(lite::converter::FmkType_CAFFE, false);
+  auto unify_format = std::make_shared<UnifyFormatToNHWC>(lite::converter::FmkType_CAFFE, false, quant_type_);
   if (!unify_format->Run(res_graph_)) {
     MS_LOG(ERROR) << "Run insert transpose failed.";
     return nullptr;
   }
-  if ((status = WeightFormatTransform(res_graph_)) != RET_OK) {
-    MS_LOG(ERROR) << "WeightFormatTransform failed.";
-    ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
-    return nullptr;
-  }
   return res_graph_;
 }
 
-STATUS CaffeModelParser::WeightFormatTransform(const FuncGraphPtr &graph) {
-  MS_ASSERT(graph != nullptr);
-  auto node_list = TopoSort(graph->get_return());
-  for (auto &node : node_list) {
-    if (!utils::isa<CNodePtr>(node)) {
-      continue;
-    }
-    auto conv_cnode = node->cast<CNodePtr>();
-    if (!opt::CheckPrimitiveType(node, prim::kPrimConv2DFusion) &&
-        !opt::CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) &&
-        !opt::CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion)) {
-      continue;
-    }
-    MS_ASSERT(conv_cnode->inputs().size() > kConvWeightIndex);
-    auto weight_node = conv_cnode->input(kConvWeightIndex);
-    MS_ASSERT(weight_node != nullptr);
-    auto tensor_info = opt::GetTensorInfo(weight_node);
-    if (tensor_info == nullptr) {
-      MS_LOG(ERROR) << "weight node must param value";
-      return RET_OK;
-    }
-    auto status = HardCodeCaffe(conv_cnode, tensor_info, graph);
-    if (status != lite::RET_OK) {
-      MS_LOG(ERROR) << "Format hard code failed: " << status << ", node: " << node->fullname_with_scope();
-      return RET_ERROR;
-    }
-  }
-  return RET_OK;
-}
-
-STATUS CaffeModelParser::HardCodeCaffe(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info,
-                                       const FuncGraphPtr &graph) {
-  MS_ASSERT(conv_cnode != nullptr);
-  MS_ASSERT(tensor_info != nullptr);
-  auto weight_node = conv_node->input(kConvWeightIndex);
-  auto weight_value = opt::GetTensorInfo(weight_node);
-  if (weight_value == nullptr) {
-    MS_LOG(DEBUG) << "weight node must param value";
-    return RET_OK;
-  }
-  schema::Format weight_dst_format = schema::Format::Format_KHWC;
-  STATUS status = RET_OK;
-  schema::Format weight_src_format = Format_NUM_OF_FORMAT;
-  switch (quant_type_) {
-    case QuantType_PostTraining:
-    case QuantType_WeightQuant:
-    case QuantType_QUANT_NONE: {
-      weight_src_format = schema::Format::Format_KCHW;
-    } break;
-    default: {
-      MS_LOG(ERROR) << "Unsupported quantType: " << EnumNameQuantType(quant_type_)
-                    << ", node: " << conv_node->fullname_with_scope();
-      return lite::RET_ERROR;
-    }
-  }
-  if (utils::isa<CNodePtr>(weight_node)) {
-    auto status =
-      HandleWeightConst(graph, conv_node, weight_node->cast<CNodePtr>(), weight_src_format, weight_dst_format);
-    if (status != lite::RET_OK) {
-      MS_LOG(ERROR) << "handle weight-const failed.";
-      return RET_ERROR;
-    }
-  }
-  weight_value = opt::GetTensorInfo(weight_node);
-  if (weight_value != nullptr) {
-    status = opt::TransFilterFormat(weight_value, schema::Format::Format_KCHW, weight_dst_format);
-    if (status != RET_OK) {
-      MS_LOG(ERROR) << "TransFilter " << EnumNameFormat(schema::EnumValuesFormat()[weight_dst_format]) << "To"
-                    << EnumNameFormat(weight_dst_format) << " failed, node : " << conv_node->fullname_with_scope()
-                    << "quant type:" << quant_type_;
-      return RET_ERROR;
-    }
-    auto type_id = static_cast<TypeId>(weight_value->data_type());
-    auto shape = weight_value->shape();
-    std::vector<int64_t> shape_vector(shape.begin(), shape.end());
-    auto abstract = lite::CreateTensorAbstract(shape_vector, type_id);
-    if (abstract == nullptr) {
-      MS_LOG(ERROR) << "Create tensor abstarct failed";
-      return RET_ERROR;
-    }
-    weight_node->set_abstract(abstract);
-  }
-  if (utils::isa<ParameterPtr>(weight_node)) {
-    auto status =
-      HandleWeightSharing(graph, KHWC, weight_node->cast<ParameterPtr>(), weight_src_format, weight_dst_format);
-    if (status != lite::RET_OK) {
-      MS_LOG(ERROR) << "handle weight-sharing failed.";
-      return RET_ERROR;
-    }
-  }
-  return lite::RET_OK;
-}
-
 STATUS CaffeModelParser::ConvertLayers() {
   STATUS status = RET_OK;
   std::map<std::string, caffe::LayerParameter> weight_layers;
diff --git a/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.h b/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.h
index 57b265e8c57..91a6c28a303 100644
--- a/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.h
+++ b/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.h
@@ -56,10 +56,6 @@ class CaffeModelParser : public ModelParser {
 
   std::string GetOriginLayerName(const std::string &layer_name);
 
-  STATUS WeightFormatTransform(const FuncGraphPtr &graph);
-
-  STATUS HardCodeCaffe(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info, const FuncGraphPtr &graph);
-
   STATUS ConvertGraphInputsOfLayer();
 
   STATUS ConvertGraphInputsOfDim();
diff --git a/mindspore/lite/tools/converter/parser/conv1d_inout_adjust.cc b/mindspore/lite/tools/converter/parser/conv1d_inout_adjust.cc
index 92f306af4f3..bc35c5f055e 100644
--- a/mindspore/lite/tools/converter/parser/conv1d_inout_adjust.cc
+++ b/mindspore/lite/tools/converter/parser/conv1d_inout_adjust.cc
@@ -123,9 +123,11 @@ bool Conv1DInOutAdjust::Run(const FuncGraphPtr &func_graph) {
     std::vector<int64_t> axis;
     switch (conv2d_node->get_format()) {
       case mindspore::Format::NWC:
+        conv2d_node->set_format(mindspore::NHWC);
         axis = {1};
         break;
       case mindspore::Format::NCW:
+        conv2d_node->set_format(mindspore::NCHW);
         axis = {2};
         break;
       default:
diff --git a/mindspore/lite/tools/converter/parser/onnx/onnx_conv_transpose_parser.cc b/mindspore/lite/tools/converter/parser/onnx/onnx_conv_transpose_parser.cc
index 4412e0d992a..a9235f6a0a6 100644
--- a/mindspore/lite/tools/converter/parser/onnx/onnx_conv_transpose_parser.cc
+++ b/mindspore/lite/tools/converter/parser/onnx/onnx_conv_transpose_parser.cc
@@ -77,24 +77,27 @@ ops::PrimitiveC *OnnxDeConvParser::Parse(const onnx::GraphProto &onnx_graph, con
     std::find_if(onnx_graph.initializer().begin(), onnx_graph.initializer().end(),
                  [onnx_conv_weight](const onnx::TensorProto &proto) { return proto.name() == onnx_conv_weight; });
   if (node_iter == onnx_graph.initializer().end()) {
-    MS_LOG(ERROR) << "not find node: " << onnx_conv_weight.c_str();
-    return nullptr;
-  }
-  std::vector<int> weight_shape;
-  auto size = (*node_iter).dims_size();
-  weight_shape.reserve(size);
-  for (int i = 0; i < size; ++i) {
-    weight_shape.emplace_back((*node_iter).dims(i));
-  }
-  if (weight_shape.size() != 4) {
-    MS_LOG(ERROR) << "weight_shape.size() should be 4, but is " << weight_shape.size();
-    return nullptr;
-  }
-  prim->set_in_channel(weight_shape[0]);
-  prim->set_out_channel(weight_shape[1] * group);
+    // in_channel and out_channnel is set to 1 by default.
+    prim->set_in_channel(1);
+    prim->set_out_channel(1);
+    MS_LOG(WARNING) << "parsing of channelIn/Out is delayed.";
+  } else {
+    std::vector<int> weight_shape;
+    auto size = (*node_iter).dims_size();
+    weight_shape.reserve(size);
+    for (int i = 0; i < size; ++i) {
+      weight_shape.emplace_back((*node_iter).dims(i));
+    }
+    if (weight_shape.size() != 4) {
+      MS_LOG(ERROR) << "weight_shape.size() should be 4, but is " << weight_shape.size();
+      return nullptr;
+    }
+    prim->set_in_channel(weight_shape[0]);
+    prim->set_out_channel(weight_shape[1] * group);
 
-  if (group != 1 && weight_shape[1] == 1) {
-    prim->AddAttr(ops::kIsDepthWise, MakeValue<bool>(true));
+    if (group != 1 && weight_shape[1] == 1) {
+      prim->AddAttr(ops::kIsDepthWise, MakeValue<bool>(true));
+    }
   }
 
   return prim.release();
diff --git a/mindspore/lite/tools/converter/parser/onnx/onnx_inputs_adjust.cc b/mindspore/lite/tools/converter/parser/onnx/onnx_inputs_adjust.cc
index 188a6a3600e..155f5330dd3 100644
--- a/mindspore/lite/tools/converter/parser/onnx/onnx_inputs_adjust.cc
+++ b/mindspore/lite/tools/converter/parser/onnx/onnx_inputs_adjust.cc
@@ -196,6 +196,7 @@ STATUS OnnxInputAdjust::ReplaceTransposeWithGraphInput(const FuncGraphPtr &func_
   auto shape_ptr = param_node->abstract()->GetShapeTrack()->cast<abstract::ShapePtr>();
   if (shape_ptr == nullptr) {
     MS_LOG(ERROR) << "shape is nullptr.";
+    return lite::RET_ERROR;
   }
   auto shape_vector = shape_ptr->shape();
   if (shape_vector.size() != opt::kInputSizeFour) {
diff --git a/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.cc b/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.cc
index d343245b488..948cb8fbf48 100644
--- a/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.cc
+++ b/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.cc
@@ -95,154 +95,14 @@ FuncGraphPtr OnnxModelParser::Parse(const converter::ConverterParameters &flag)
     ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
     return nullptr;
   }
-  auto unify_format = std::make_shared<UnifyFormatToNHWC>(lite::converter::FmkType_ONNX, false);
+  auto unify_format = std::make_shared<UnifyFormatToNHWC>(lite::converter::FmkType_ONNX, false, quant_type_);
   if (!unify_format->Run(res_graph_)) {
     MS_LOG(ERROR) << "Run insert transpose failed.";
     return nullptr;
   }
-  if ((status = WeightFormatTransform(all_func_graphs)) != RET_OK) {
-    MS_LOG(ERROR) << "WeightFormatTransform failed.";
-    ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
-    return nullptr;
-  }
   return res_graph_;
 }
 
-STATUS OnnxModelParser::WeightFormatTransform(const std::set<FuncGraphPtr> &all_func_graphs) {
-  for (const auto &graph : all_func_graphs) {
-    MS_ASSERT(graph != nullptr);
-    auto node_list = TopoSort(graph->get_return());
-    for (auto &node : node_list) {
-      if (!utils::isa<CNodePtr>(node)) {
-        continue;
-      }
-      auto conv_cnode = node->cast<CNodePtr>();
-      if (!opt::CheckPrimitiveType(node, prim::kPrimConv2DFusion) &&
-          !opt::CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) &&
-          !opt::CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion)) {
-        continue;
-      }
-      MS_ASSERT(conv_cnode->inputs().size() > kConvWeightIndex);
-      auto weight_node = conv_cnode->input(kConvWeightIndex);
-      MS_ASSERT(weight_node != nullptr);
-      auto tensor_info = opt::GetTensorInfo(weight_node);
-      auto status = HardCodeONNX(conv_cnode, tensor_info, graph);
-      if (status != lite::RET_OK) {
-        MS_LOG(ERROR) << "Format hard code failed: " << status << ", node: " << node->fullname_with_scope();
-        return RET_ERROR;
-      }
-    }
-  }
-  return RET_OK;
-}
-
-lite::STATUS OnnxModelParser::HardCodeONNX(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info,
-                                           const FuncGraphPtr &graph) {
-  MS_ASSERT(conv_cnode != nullptr);
-  MS_ASSERT(tensor_info != nullptr);
-  auto prim = GetValueNode<PrimitivePtr>(conv_node->input(0));
-  if (prim == nullptr) {
-    MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive.";
-    return lite::RET_ERROR;
-  }
-  bool is_depth_wise = prim->GetAttr(ops::kIsDepthWise) != nullptr && GetValue<bool>(prim->GetAttr(ops::kIsDepthWise));
-  int64_t format = prim->GetAttr(ops::kFormat) != nullptr ? GetValue<int64_t>(prim->GetAttr(ops::kFormat)) : 0;
-  schema::Format weight_dst_format = schema::Format::Format_KHWC;
-  STATUS status = RET_OK;
-  schema::Format weight_src_format = Format_NUM_OF_FORMAT;
-  auto weight_node = conv_node->input(kConvWeightIndex);
-  switch (quant_type_) {
-    case QuantType_AwareTraining: {
-      // sum up from current onnx quant models
-      if (opt::CheckPrimitiveType(conv_node, prim::kPrimConv2DFusion)) {
-        if (!is_depth_wise) {
-          weight_src_format = schema::Format::Format_KHWC;
-          prim->AddAttr(ops::kFormat, MakeValue<int64_t>(weight_dst_format));
-        } else {
-          prim->AddAttr(ops::kFormat, MakeValue<int64_t>(weight_dst_format));
-          weight_src_format = schema::Format::Format_CHWK;
-        }
-      } else if (opt::CheckPrimitiveType(conv_node, prim::kPrimConv2dTransposeFusion) && !is_depth_wise) {
-        prim->AddAttr(ops::kFormat, MakeValue<int64_t>(weight_dst_format));
-        weight_src_format = schema::Format::Format_KCHW;
-      } else {
-        MS_LOG(ERROR) << "Unsupported op: " << conv_node->fullname_with_scope();
-        return lite::RET_ERROR;
-      }
-    } break;
-    case QuantType_PostTraining:
-    case QuantType_WeightQuant:
-    case QuantType_QUANT_NONE: {
-      // conv (K x C/group x kH x kW) group = 1
-      // depth (K x C/group x kH x kW) group = channelOut ==> (K, multiplier, H, W)
-      // deconv (C x K/group x kH x kW) group = 1
-      // dedepth (C x K/group x kH x kW) group = channelIn ==> (C, multiplier, H, W)
-      if (opt::CheckPrimitiveType(conv_node, prim::kPrimConv2DFusion) ||
-          opt::CheckPrimitiveType(conv_node, prim::kPrimConv2dTransposeFusion)) {
-        if (format == schema::Format::Format_NHWC) {
-          prim->AddAttr(ops::kFormat, MakeValue<int64_t>(Format_NHWC));
-          weight_src_format = schema::Format::Format_KHWC;
-        } else if (format == schema::Format::Format_KHWC) {
-          weight_src_format = schema::Format::Format_KHWC;
-        } else {
-          prim->AddAttr(ops::kFormat, MakeValue<int64_t>(weight_dst_format));
-          weight_src_format = schema::Format::Format_KCHW;
-        }
-      }
-    } break;
-    default: {
-      MS_LOG(ERROR) << "Unsupported quantType: " << EnumNameQuantType(quant_type_)
-                    << ", node: " << conv_node->fullname_with_scope();
-      return lite::RET_ERROR;
-    }
-  }
-  status = DoWeightFormatTransform(conv_node, weight_node, graph, weight_src_format, weight_dst_format);
-  if (status != RET_OK) {
-    return RET_ERROR;
-  }
-  return lite::RET_OK;
-}
-int OnnxModelParser::DoWeightFormatTransform(const CNodePtr &conv_node, const AnfNodePtr &weight_node,
-                                             const FuncGraphPtr &graph, schema::Format weight_src_format,
-                                             schema::Format weight_dst_format) {
-  if (utils::isa<CNodePtr>(weight_node)) {
-    auto status =
-      HandleWeightConst(graph, conv_node, weight_node->cast<CNodePtr>(), weight_src_format, weight_dst_format);
-    if (status != lite::RET_OK) {
-      MS_LOG(ERROR) << "handle weight-const failed.";
-      return RET_ERROR;
-    }
-  }
-  auto weight_value = opt::GetTensorInfo(weight_node);
-  if (weight_value != nullptr) {
-    auto status = opt::TransFilterFormat(weight_value, weight_src_format, weight_dst_format);
-    if (status != RET_OK) {
-      MS_LOG(ERROR) << "TransFilter " << EnumNameFormat(schema::EnumValuesFormat()[weight_src_format]) << "To"
-                    << EnumNameFormat(weight_dst_format) << " failed, node : " << conv_node->fullname_with_scope()
-                    << "quant type:" << quant_type_;
-      return RET_ERROR;
-    }
-    auto type_id = static_cast<TypeId>(weight_value->data_type());
-    auto shape = weight_value->shape();
-    std::vector<int64_t> shape_vector(shape.begin(), shape.end());
-    auto abstract = lite::CreateTensorAbstract(shape_vector, type_id);
-    if (abstract == nullptr) {
-      MS_LOG(ERROR) << "Create tensor abstarct failed";
-      return RET_ERROR;
-    }
-    weight_node->set_abstract(abstract);
-  }
-  if (utils::isa<ParameterPtr>(weight_node)) {
-    auto status =
-      HandleWeightSharing(graph, KHWC, weight_node->cast<ParameterPtr>(), weight_src_format, weight_dst_format);
-    if (status != lite::RET_OK) {
-      MS_LOG(ERROR) << "handle weight-sharing failed.";
-      return RET_ERROR;
-    }
-  }
-  return RET_OK;
-}
-
 STATUS OnnxModelParser::InitOriginModel(const std::string &model_file) {
   auto status = ValidateFileStr(model_file, ".onnx");
   if (status != RET_OK) {
diff --git a/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.h b/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.h
index d4a170069ae..10ea0de5781 100644
--- a/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.h
+++ b/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.h
@@ -92,10 +92,7 @@ class OnnxModelParser : public ModelParser {
   STATUS ConvertIfSubgraph(const onnx::GraphProto &onnx_graph, const FuncGraphPtr &anf_graph,
                            const std::string &subgrah_name, const std::string &if_node_name,
                            const std::string &root_node_name);
-  STATUS WeightFormatTransform(const std::set<FuncGraphPtr> &all_func_graphs);
-  STATUS HardCodeONNX(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info, const FuncGraphPtr &graph);
-  int DoWeightFormatTransform(const CNodePtr &conv_node, const AnfNodePtr &weight_node, const FuncGraphPtr &graph,
-                              schema::Format weight_src_format, schema::Format weight_dst_format);
+
   onnx::ModelProto onnx_model_;
   onnx::GraphProto onnx_root_graph_;
   std::vector<FuncGraphPtr> all_subgraphs_;
diff --git a/mindspore/lite/tools/converter/parser/onnx/onnx_pad_adjust.cc b/mindspore/lite/tools/converter/parser/onnx/onnx_pad_adjust.cc
index ea01385c10e..d48cce87626 100644
--- a/mindspore/lite/tools/converter/parser/onnx/onnx_pad_adjust.cc
+++ b/mindspore/lite/tools/converter/parser/onnx/onnx_pad_adjust.cc
@@ -98,8 +98,8 @@ bool OnnxPadAdjust::Run(const FuncGraphPtr &func_graph) {
     if (!input_node->isa<CNode>()) {
       continue;
     }
-    // reshape the padding of pad operator to 2 x 4.
-    std::vector<int> shape_pre = {2, 4};
+    // reshape the padding of pad operator to 2 x i.
+    std::vector<int> shape_pre = {2, -1};
     auto reshape_pre = NewReshapeOpNode(func_graph, input_node, shape_pre);
     if (reshape_pre == nullptr) {
       MS_LOG(ERROR) << "create reshape failed.";
diff --git a/mindspore/lite/tools/converter/parser/parser_utils.cc b/mindspore/lite/tools/converter/parser/parser_utils.cc
index 5e3d9cbb8e1..6d00a18da3b 100644
--- a/mindspore/lite/tools/converter/parser/parser_utils.cc
+++ b/mindspore/lite/tools/converter/parser/parser_utils.cc
@@ -17,6 +17,7 @@
 #include <memory>
 #include <algorithm>
 #include <vector>
+#include <set>
 #include <string>
 #include "tools/converter/parser/tf_bidirection_gru_cf_fusion.h"
 #include "tools/converter/parser/unused_node_remove_pass.h"
@@ -30,7 +31,15 @@
 namespace mindspore::lite {
 namespace {
 constexpr size_t kNumWeightIndex = 2;
+bool IsWeightNodeSensitive(const AnfNodePtr &node) {
+  return opt::CheckPrimitiveType(node, prim::kPrimConv2DFusion) ||
+         opt::CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) ||
+         opt::CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion) ||
+         opt::CheckPrimitiveType(node, prim::kPrimApplyMomentum) || opt::CheckPrimitiveType(node, prim::kPrimSGD) ||
+         opt::CheckPrimitiveType(node, prim::kPrimAdam);
 }
+}  // namespace
+
 void GetAllFuncGraph(const FuncGraphPtr &func_graph, std::set<FuncGraphPtr> *all_func_graphs) {
   if (all_func_graphs->find(func_graph) == all_func_graphs->end()) {
     all_func_graphs->insert(func_graph);
@@ -106,6 +115,7 @@ int GetTransposePerm(schema::Format src_format, schema::Format dst_format, std::
   }
   return lite::RET_OK;
 }
+
 int GetTransposePermSharing(schema::Format src_format, schema::Format dst_format, std::vector<int> *perm) {
   MS_ASSERT(perm != nullptr);
   auto src_format_str = std::string(schema::EnumNameFormat(src_format));
@@ -125,112 +135,74 @@ int GetTransposePermSharing(schema::Format src_format, schema::Format dst_format
   return lite::RET_OK;
 }
 
-int TransposeInsertForWeightSharing(const FuncGraphPtr &graph, int64_t dst_format, int64_t format,
-                                    const ParameterPtr &weight_node, std::vector<int> perm) {
-  MS_ASSERT(graph != nullptr);
-  MS_ASSERT(weight_node != nullptr);
-  auto node_list = TopoSort(graph->get_return());
-  std::vector<CNodePtr> adjust_nodes;
-  for (auto &node : node_list) {
-    if (!utils::isa<CNodePtr>(node)) {
-      continue;
-    }
-    if (opt::CheckPrimitiveType(node, prim::kPrimApplyMomentum) || opt::CheckPrimitiveType(node, prim::kPrimSGD) ||
-        opt::CheckPrimitiveType(node, prim::kPrimAdam)) {
-      continue;
-    }
-    auto cnode = node->cast<CNodePtr>();
-    auto inputs = cnode->inputs();
-    if (std::any_of(inputs.begin(), inputs.end(),
-                    [&](const AnfNodePtr &anf_node) { return weight_node == anf_node; })) {
-      if (opt::CheckPrimitiveType(node, prim::kPrimConv2DFusion) ||
-          opt::CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) ||
-          opt::CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion)) {
-        auto prim = GetValueNode<PrimitivePtr>(cnode->input(0));
-        prim->AddAttr(ops::kFormat, MakeValue<int64_t>(format));
-        continue;
-      }
-      adjust_nodes.push_back(cnode);
+AnfNodePtr GetRealConvWeightNode(const FuncGraphPtr &graph, const CNodePtr &cnode) {
+  MS_ASSERT(graph != nullptr && cnode != nullptr);
+  if (!opt::CheckPrimitiveType(cnode, prim::kPrimConv2DFusion) &&
+      !opt::CheckPrimitiveType(cnode, opt::kPrimConv2DBackpropInputFusion) &&
+      !opt::CheckPrimitiveType(cnode, prim::kPrimConv2dTransposeFusion)) {
+    MS_LOG(ERROR) << "cnode is not a member of convolution's family.";
+    return nullptr;
+  }
+  auto weight_node = cnode->input(opt::kInputIndexTwo);
+  bool is_real_weight =
+    !opt::CheckPrimitiveType(weight_node, opt::kPrimIdentity) && !opt::CheckPrimitiveType(weight_node, prim::kPrimLoad);
+  while (!is_real_weight) {
+    if (!utils::isa<CNode>(weight_node)) {
+      MS_LOG(ERROR) << "weight node is invalid.";
+      return nullptr;
     }
+    auto weight_cnode = weight_node->cast<CNodePtr>();
+    weight_node = weight_cnode->input(1);
+    is_real_weight = !opt::CheckPrimitiveType(weight_node, opt::kPrimIdentity) &&
+                     !opt::CheckPrimitiveType(weight_node, prim::kPrimLoad);
   }
-  if (adjust_nodes.empty()) {
-    MS_LOG(DEBUG) << "do not need to adjust nodes.";
-    return lite::RET_OK;
-  }
-  auto perm_node = opt::BuildIntVecParameterNode(graph, perm, weight_node->fullname_with_scope() + "_sharing_perm");
-  auto prim = std::make_shared<ops::Transpose>();
-  prim->AddAttr("quant_params", std::make_shared<QuantParamHolder>(1, 1));
-  prim->AddAttr(ops::kFormat, MakeValue<int64_t>(dst_format));
-  auto transpose_node = graph->NewCNode(prim, {weight_node, perm_node});
-  if (!weight_node->has_default()) {
-    MS_LOG(DEBUG) << "Weight parameter should has default parameter.";
-    return lite::RET_ERROR;
-  }
-  auto weight_tensor = weight_node->default_param()->cast<tensor::TensorPtr>();
-  if (weight_tensor == nullptr) {
-    MS_LOG(DEBUG) << "Default parameter of weight parameter should be a tensor.";
-    return lite::RET_ERROR;
-  }
-  auto abstract = CreateTensorAbstract(weight_tensor->shape_c(), weight_tensor->data_type());
-  if (abstract == nullptr) {
-    MS_LOG(ERROR) << "Create tensor abstarct failed";
-    return RET_ERROR;
-  }
-  transpose_node->set_abstract(abstract);
-  transpose_node->set_fullname_with_scope(weight_node->fullname_with_scope() + "_sharing_post");
-  for (auto &adjust_node : adjust_nodes) {
-    auto inputs = adjust_node->inputs();
-    std::replace_if(
-      inputs.begin(), inputs.end(), [&weight_node](const AnfNodePtr &anf_node) { return weight_node == anf_node; },
-      transpose_node);
-    adjust_node->set_inputs(inputs);
-  }
-  return lite::RET_OK;
+  auto manager = Manage(graph);
+  MS_ASSERT(manager != nullptr);
+  manager->Replace(cnode->input(opt::kInputIndexTwo), weight_node);
+  return weight_node;
 }
 
-int HandleWeightSharing(const FuncGraphPtr &graph, int64_t format, const ParameterPtr &weight_node,
-                        schema::Format src_format, schema::Format dst_format) {
-  MS_ASSERT(graph != nullptr);
-  MS_ASSERT(weight_node != nullptr);
+int UnifyConvWeightFormat(const FuncGraphPtr &graph, const CNodePtr &cnode, schema::Format src_format,
+                          schema::Format dst_format, std::set<AnfNodePtr> *has_visited) {
+  MS_ASSERT(graph != nullptr && cnode != nullptr && has_visited != nullptr);
   if (src_format == dst_format) {
     return lite::RET_OK;
   }
-  std::vector<int> perm;
-  auto status = GetTransposePermSharing(src_format, dst_format, &perm);
-  if (status != lite::RET_OK) {
-    MS_LOG(ERROR) << "get perm failed.";
-    return status;
+  if (!opt::CheckPrimitiveType(cnode, prim::kPrimConv2DFusion) &&
+      !opt::CheckPrimitiveType(cnode, opt::kPrimConv2DBackpropInputFusion) &&
+      !opt::CheckPrimitiveType(cnode, prim::kPrimConv2dTransposeFusion)) {
+    MS_LOG(ERROR) << "cnode is not a member of convolution's family.";
+    return RET_ERROR;
   }
-  status = TransposeInsertForWeightSharing(graph, dst_format, format, weight_node, perm);
-  if (status != lite::RET_OK) {
-    MS_LOG(ERROR) << "transpose insert failed.";
+  if (GetRealConvWeightNode(graph, cnode) == nullptr) {
+    MS_LOG(ERROR) << "current conv node is invalid, node name is " << cnode->fullname_with_scope();
+    return RET_ERROR;
+  }
+  bool is_const_weight = true;
+  auto weight_node = cnode->input(opt::kInputIndexTwo);
+  if (utils::isa<CNode>(weight_node)) {
+    is_const_weight = false;
+  } else if (utils::isa<Parameter>(weight_node)) {
+    auto weight_param_node = weight_node->cast<ParameterPtr>();
+    if (!weight_param_node->has_default()) {
+      is_const_weight = false;
+    }
+  }
+  int status;
+  if (is_const_weight) {
+    status = UnifyConstConvWeight(graph, weight_node, src_format, dst_format, has_visited);
+  } else {
+    status = UnifyVariableConvWeight(graph, weight_node, src_format, dst_format, has_visited);
+  }
+  if (status != RET_OK) {
+    MS_LOG(ERROR) << "unfiy coneight failed, cnode name is " << cnode->fullname_with_scope();
   }
   return status;
 }
 
-int TransposeInsertForWeightConst(const FuncGraphPtr &graph, const CNodePtr &conv_node, const CNodePtr &weight_node,
-                                  std::vector<int> perm) {
-  MS_ASSERT(graph != nullptr);
-  MS_ASSERT(weight_node != nullptr);
-  auto manager = Manage(graph);
-  if (opt::CheckPrimitiveType(weight_node, opt::kPrimIdentity) ||
-      opt::CheckPrimitiveType(weight_node, prim::kPrimLoad)) {
-    manager->Replace(weight_node, weight_node->input(1));
-    return RET_OK;
-  }
-  auto perm_node = opt::BuildIntVecParameterNode(graph, perm, weight_node->fullname_with_scope() + "_const_perm");
-  auto prim = std::make_shared<ops::Transpose>();
-  prim->AddAttr("quant_params", std::make_shared<QuantParamHolder>(1, 1));
-  auto transpose_node = graph->NewCNode(prim, {weight_node, perm_node});
-  transpose_node->set_fullname_with_scope(weight_node->fullname_with_scope() + "_const_post");
-  conv_node->set_input(kNumWeightIndex, transpose_node);
-  return lite::RET_OK;
-}
-
-int HandleWeightConst(const FuncGraphPtr &graph, const CNodePtr &conv_node, const CNodePtr &weight_node,
-                      schema::Format src_format, schema::Format dst_format) {
-  MS_ASSERT(graph != nullptr);
-  MS_ASSERT(weight_node != nullptr);
+int UnifyVariableConvWeight(const FuncGraphPtr &graph, const AnfNodePtr &weight_node, schema::Format src_format,
+                            schema::Format dst_format, std::set<AnfNodePtr> *has_visited) {
+  MS_ASSERT(graph != nullptr && weight_node != nullptr && has_visited != nullptr);
   if (src_format == dst_format) {
     return lite::RET_OK;
   }
@@ -240,10 +212,142 @@ int HandleWeightConst(const FuncGraphPtr &graph, const CNodePtr &conv_node, cons
     MS_LOG(ERROR) << "get perm failed.";
     return status;
   }
-  status = TransposeInsertForWeightConst(graph, conv_node, weight_node, perm);
-  if (status != lite::RET_OK) {
-    MS_LOG(ERROR) << "transpose insert failed.";
+  auto manager = Manage(graph);
+  MS_ASSERT(manager != nullptr);
+  CNodePtr trans_cnode = nullptr;
+  auto weight_node_users = manager->node_users()[weight_node];
+  for (auto &weight_node_user : weight_node_users) {
+    auto post_node = weight_node_user.first;
+    if (!utils::isa<CNodePtr>(post_node)) {
+      MS_LOG(ERROR) << "post node is invalid.";
+      return RET_ERROR;
+    }
+    if (!IsWeightNodeSensitive(post_node)) {
+      continue;
+    }
+    has_visited->insert(post_node);
+    if (trans_cnode == nullptr) {
+      trans_cnode = opt::GenTransposeNode(graph, weight_node, perm, weight_node->fullname_with_scope() + "_post_perm");
+      MS_ASSERT(trans_cnode != nullptr);
+      auto abstract = weight_node->abstract();
+      ShapeVector shape;
+      if (abstract != nullptr) {
+        ShapeVector weight_shape;
+        if (opt::FetchShapeFromAbstract(abstract, &weight_shape) != RET_OK) {
+          MS_LOG(ERROR) << "fetch shape from abstract failed.";
+          return RET_ERROR;
+        }
+        if (!weight_shape.empty()) {
+          if (weight_shape.size() != opt::kInputSizeFour) {
+            MS_LOG(ERROR) << "conv weight shape is invalid, which is not 4D, now is " << weight_shape.size();
+            return RET_ERROR;
+          }
+          std::transform(perm.begin(), perm.end(), std::back_inserter(shape),
+                         [&weight_shape](const int index) { return weight_shape[index]; });
+        }
+        abstract = abstract->Clone();
+      } else {
+        abstract = CreateTensorAbstract(shape, TypeId::kNumberTypeFloat32);
+        MS_ASSERT(abstract != nullptr);
+      }
+      abstract->set_shape(std::make_shared<abstract::Shape>(shape));
+      trans_cnode->set_abstract(abstract);
+    }
+    auto post_cnode = post_node->cast<CNodePtr>();
+    auto tr = manager->Transact();
+    tr.SetEdge(post_cnode, weight_node_user.second, trans_cnode);
+    tr.Commit();
   }
-  return status;
+  return RET_OK;
+}
+
+int UnifyConstConvWeight(const FuncGraphPtr &graph, const AnfNodePtr &weight_node, schema::Format src_format,
+                         schema::Format dst_format, std::set<AnfNodePtr> *has_visited) {
+  MS_ASSERT(graph != nullptr && weight_node != nullptr && has_visited != nullptr);
+  if (src_format == dst_format) {
+    return lite::RET_OK;
+  }
+  auto weight_value = opt::GetTensorInfo(weight_node);
+  if (weight_value == nullptr) {
+    MS_LOG(ERROR) << "conv weight is non-const.";
+    return RET_ERROR;
+  }
+  auto status = opt::TransFilterFormat(weight_value, src_format, dst_format);
+  if (status != RET_OK) {
+    MS_LOG(ERROR) << "TransFilter " << EnumNameFormat(src_format) << "To" << EnumNameFormat(dst_format)
+                  << " failed, node : " << weight_node->fullname_with_scope();
+    return RET_ERROR;
+  }
+  auto type_id = static_cast<TypeId>(weight_value->data_type());
+  auto shape = weight_value->shape();
+  auto abstract = CreateTensorAbstract(shape, type_id);
+  if (abstract == nullptr) {
+    MS_LOG(ERROR) << "Create tensor abstarct failed";
+    return RET_ERROR;
+  }
+  weight_node->set_abstract(abstract);
+  if (HandleConstConvWeightShared(graph, weight_node, src_format, dst_format, has_visited) != RET_OK) {
+    MS_LOG(ERROR) << "handle const conv weight-shared failed, node name is " << weight_node->fullname_with_scope();
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int HandleConstConvWeightShared(const FuncGraphPtr &graph, const AnfNodePtr &weight_node, schema::Format src_format,
+                                schema::Format dst_format, std::set<AnfNodePtr> *has_visited) {
+  MS_ASSERT(graph != nullptr && weight_node != nullptr && has_visited != nullptr);
+  if (src_format == dst_format) {
+    return RET_OK;
+  }
+  std::vector<int> perm;
+  auto status = GetTransposePermSharing(src_format, dst_format, &perm);
+  if (status != RET_OK) {
+    MS_LOG(ERROR) << "get perm failed.";
+    return status;
+  }
+  auto manager = Manage(graph);
+  MS_ASSERT(manager != nullptr);
+  CNodePtr trans_cnode = nullptr;
+  auto weight_node_users = manager->node_users()[weight_node];
+  for (auto &weight_node_user : weight_node_users) {
+    auto post_node = weight_node_user.first;
+    if (!utils::isa<CNodePtr>(post_node)) {
+      MS_LOG(ERROR) << "post node is invalid.";
+      return RET_ERROR;
+    }
+    if (IsWeightNodeSensitive(post_node)) {
+      has_visited->insert(post_node);
+      continue;
+    }
+    if (trans_cnode == nullptr) {
+      trans_cnode = opt::GenTransposeNode(graph, weight_node, perm, weight_node->fullname_with_scope() + "_post_perm");
+      MS_ASSERT(trans_cnode != nullptr);
+      auto prim = GetValueNode<PrimitivePtr>(trans_cnode->input(0));
+      MS_ASSERT(prim != nullptr);
+      prim->AddAttr(ops::kFormat, MakeValue<int64_t>(dst_format));
+      auto weight_value = opt::GetTensorInfo(weight_node);
+      MS_ASSERT(weight_value != nullptr);
+      auto weight_shape = weight_value->shape();
+      ShapeVector shape;
+      if (!weight_shape.empty()) {
+        if (weight_shape.size() != opt::kInputSizeFour) {
+          MS_LOG(ERROR) << "conv weight shape is invalid, which is not 4D, now is " << weight_shape.size();
+          return RET_ERROR;
+        }
+        std::transform(perm.begin(), perm.end(), std::back_inserter(shape),
+                       [&weight_shape](const int index) { return weight_shape[index]; });
+      }
+      auto abstract = weight_node->abstract();
+      MS_ASSERT(abstract != nullptr);
+      abstract = abstract->Clone();
+      abstract->set_shape(std::make_shared<abstract::Shape>(shape));
+      trans_cnode->set_abstract(abstract);
+    }
+    auto post_cnode = post_node->cast<CNodePtr>();
+    auto tr = manager->Transact();
+    tr.SetEdge(post_cnode, weight_node_user.second, trans_cnode);
+    tr.Commit();
+  }
+  return RET_OK;
 }
 }  // namespace mindspore::lite
diff --git a/mindspore/lite/tools/converter/parser/parser_utils.h b/mindspore/lite/tools/converter/parser/parser_utils.h
index d34379367d2..913ff3d8c10 100644
--- a/mindspore/lite/tools/converter/parser/parser_utils.h
+++ b/mindspore/lite/tools/converter/parser/parser_utils.h
@@ -30,14 +30,15 @@ void GetAllFuncGraph(const FuncGraphPtr &func_graph, std::set<FuncGraphPtr> *all
 int CommonAnfAdjust(const std::set<FuncGraphPtr> &all_func_graphs);
 int GetTransposePerm(schema::Format src_format, schema::Format dst_format, std::vector<int> *perm);
 int GetTransposePermSharing(schema::Format src_format, schema::Format dst_format, std::vector<int> *perm);
-int TransposeInsertForWeightConst(const FuncGraphPtr &graph, const CNodePtr &conv_node, const CNodePtr &weight_node,
-                                  std::vector<int> perm);
-int HandleWeightConst(const FuncGraphPtr &graph, const CNodePtr &conv_node, const CNodePtr &weight_node,
-                      schema::Format src_format, schema::Format dst_format);
-int TransposeInsertForWeightSharing(const FuncGraphPtr &graph, int64_t dst_format, int64_t format,
-                                    const ParameterPtr &weight_node, std::vector<int> perm);
-int HandleWeightSharing(const FuncGraphPtr &graph, int64_t format, const ParameterPtr &weight_node,
-                        schema::Format src_format, schema::Format dst_format);
+AnfNodePtr GetRealConvWeightNode(const FuncGraphPtr &graph, const CNodePtr &cnode);
+int UnifyConvWeightFormat(const FuncGraphPtr &graph, const CNodePtr &cnode, schema::Format src_format,
+                          schema::Format dst_format, std::set<AnfNodePtr> *has_visited);
+int UnifyVariableConvWeight(const FuncGraphPtr &graph, const AnfNodePtr &weight_node, schema::Format src_format,
+                            schema::Format dst_format, std::set<AnfNodePtr> *has_visited);
+int UnifyConstConvWeight(const FuncGraphPtr &graph, const AnfNodePtr &weight_node, schema::Format src_format,
+                         schema::Format dst_format, std::set<AnfNodePtr> *has_visited);
+int HandleConstConvWeightShared(const FuncGraphPtr &graph, const AnfNodePtr &weight_node, schema::Format src_format,
+                                schema::Format dst_format, std::set<AnfNodePtr> *has_visited);
 }  // namespace lite
 }  // namespace mindspore
 
diff --git a/mindspore/lite/tools/converter/parser/tf/tf_model_parser.cc b/mindspore/lite/tools/converter/parser/tf/tf_model_parser.cc
index 14d4718c1c1..bea44401e44 100644
--- a/mindspore/lite/tools/converter/parser/tf/tf_model_parser.cc
+++ b/mindspore/lite/tools/converter/parser/tf/tf_model_parser.cc
@@ -576,150 +576,16 @@ FuncGraphPtr TFModelParser::Parse(const converter::ConverterParameters &flag) {
     ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
     return nullptr;
   }
-  auto unify_format = std::make_shared<UnifyFormatToNHWC>(lite::converter::FmkType_TF, false);
+  auto unify_format = std::make_shared<UnifyFormatToNHWC>(lite::converter::FmkType_TF, false, quant_type_);
   if (!unify_format->Run(res_graph_)) {
     MS_LOG(ERROR) << "Run insert transpose failed.";
     return nullptr;
   }
-  if ((status = WeightFormatTransform(res_graph_)) != RET_OK) {
-    MS_LOG(ERROR) << "WeightFormatTransform failed.";
-    ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
-    return nullptr;
-  }
   res_graph_->set_manager(nullptr);
   static auto root_func_manager = Manage(res_graph_);
   return res_graph_;
 }
 
-STATUS TFModelParser::WeightFormatTransform(const FuncGraphPtr &graph) {
-  MS_ASSERT(graph != nullptr);
-  auto node_list = TopoSort(graph->get_return());
-  for (auto &node : node_list) {
-    if (!utils::isa<CNodePtr>(node)) {
-      continue;
-    }
-    auto conv_cnode = node->cast<CNodePtr>();
-    if (!opt::CheckPrimitiveType(node, prim::kPrimConv2DFusion) &&
-        !opt::CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) &&
-        !opt::CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion)) {
-      continue;
-    }
-    MS_ASSERT(conv_cnode->inputs().size() > kConvWeightIndex);
-    auto weight_node = conv_cnode->input(kConvWeightIndex);
-    MS_ASSERT(weight_node != nullptr);
-    auto tensor_info = opt::GetTensorInfo(weight_node);
-    auto status = HardCodeTF(conv_cnode, tensor_info, graph);
-    if (status != lite::RET_OK) {
-      MS_LOG(ERROR) << "Format hard code failed: " << status << ", node: " << node->fullname_with_scope();
-      return RET_ERROR;
-    }
-  }
-  return RET_OK;
-}
-
-STATUS TFModelParser::HardCodeTF(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info,
-                                 const FuncGraphPtr &graph) {
-  MS_ASSERT(conv_cnode != nullptr);
-  MS_ASSERT(tensor_info != nullptr);
-  auto prim = GetValueNode<PrimitivePtr>(conv_node->input(0));
-  if (prim == nullptr) {
-    MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive.";
-    return RET_ERROR;
-  }
-  bool is_depth_wise = prim->GetAttr(ops::kIsDepthWise) != nullptr && GetValue<bool>(prim->GetAttr(ops::kIsDepthWise));
-  int64_t format = prim->GetAttr(ops::kFormat) != nullptr ? GetValue<int64_t>(prim->GetAttr(ops::kFormat)) : 0;
-  schema::Format weight_dst_format = schema::Format::Format_KHWC;
-  STATUS status = RET_OK;
-  schema::Format weight_src_format = Format_NUM_OF_FORMAT;
-  auto weight_node = conv_node->input(kConvWeightIndex);
-  auto weight_value = opt::GetTensorInfo(weight_node);
-  switch (quant_type_) {
-    case QuantType_AwareTraining:
-    case QuantType_PostTraining:
-    case QuantType_WeightQuant:
-    case QuantType_QUANT_NONE: {
-      if (opt::CheckPrimitiveType(conv_node, prim::kPrimConv2DFusion)) {
-        if (!is_depth_wise) {
-          prim->AddAttr(ops::kFormat, MakeValue<int64_t>(weight_dst_format));
-          weight_src_format = schema::Format::Format_HWCK;
-        } else {
-          prim->AddAttr(ops::kFormat, MakeValue<int64_t>(weight_dst_format));
-          weight_src_format = schema::Format::Format_HWKC;
-        }
-      } else if (opt::CheckPrimitiveType(conv_node, prim::kPrimConv2dTransposeFusion) && !is_depth_wise) {
-        prim->AddAttr(ops::kFormat, MakeValue<int64_t>(weight_dst_format));
-        weight_src_format = schema::Format::Format_HWCK;
-      }
-      if (format == Format_NCHW) {
-        prim->AddAttr(ops::kFormat, MakeValue<int64_t>(Format_NCHW));
-      } else if (format == Format_KHWC) {
-        prim->AddAttr(ops::kFormat, MakeValue<int64_t>(weight_dst_format));
-        weight_src_format = schema::Format::Format_KHWC;
-      }
-    } break;
-    default: {
-      MS_LOG(ERROR) << "Unsupported op: " << conv_node->fullname_with_scope();
-      return lite::RET_ERROR;
-    }
-  }
-  status = DoWeightFormatTransform(conv_node, weight_node, graph, weight_src_format, weight_dst_format);
-  if (status != RET_OK) {
-    return RET_ERROR;
-  }
-  if (format == Format_NCHW) {
-    prim->AddAttr(ops::kFormat, MakeValue<int64_t>(Format_NCHW));
-  }
-  return RET_OK;
-}
-
-int TFModelParser::DoWeightFormatTransform(const CNodePtr &conv_node, const AnfNodePtr &weight_node,
-                                           const FuncGraphPtr &graph, schema::Format weight_src_format,
-                                           schema::Format weight_dst_format) {
-  auto prim = GetValueNode<PrimitivePtr>(conv_node->input(0));
-  if (prim == nullptr) {
-    MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive.";
-    return RET_ERROR;
-  }
-  int64_t format = prim->GetAttr(ops::kFormat) != nullptr ? GetValue<int64_t>(prim->GetAttr(ops::kFormat)) : 0;
-
-  if (utils::isa<CNodePtr>(weight_node)) {
-    auto status =
-      HandleWeightConst(graph, conv_node, weight_node->cast<CNodePtr>(), weight_src_format, weight_dst_format);
-    if (status != lite::RET_OK) {
-      MS_LOG(ERROR) << "handle weight-const failed.";
-      return RET_ERROR;
-    }
-  }
-  auto weight_value = opt::GetTensorInfo(weight_node);
-  if (weight_value != nullptr) {
-    auto status = opt::TransFilterFormat(weight_value, weight_src_format, weight_dst_format);
-    if (status != RET_OK) {
-      MS_LOG(ERROR) << "TransFilter " << EnumNameFormat(schema::EnumValuesFormat()[weight_dst_format]) << "To"
-                    << EnumNameFormat(weight_dst_format) << " failed, node : " << conv_node->fullname_with_scope()
-                    << "quant type:" << quant_type_;
-      return RET_ERROR;
-    }
-    auto type_id = static_cast<TypeId>(weight_value->data_type());
-    auto shape = weight_value->shape();
-    std::vector<int64_t> shape_vector(shape.begin(), shape.end());
-    auto abstract = CreateTensorAbstract(shape_vector, type_id);
-    if (abstract == nullptr) {
-      MS_LOG(ERROR) << "Create tensor abstarct failed";
-      return RET_ERROR;
-    }
-    weight_node->set_abstract(abstract);
-  }
-  if (utils::isa<ParameterPtr>(weight_node)) {
-    auto status =
-      HandleWeightSharing(graph, format, weight_node->cast<ParameterPtr>(), weight_src_format, weight_dst_format);
-    if (status != lite::RET_OK) {
-      MS_LOG(ERROR) << "handle weight-sharing failed.";
-      return RET_ERROR;
-    }
-  }
-  return RET_OK;
-}
-
 STATUS TFModelParser::ConvertSubgraphInputs(std::map<std::string, const tensorflow::NodeDef *> *tf_sub_node_map,
                                             std::unordered_map<std::string, AnfNodePtr> *anf_sub_node_map,
                                             const tensorflow::FunctionDef &tf_sub_fuction, const CNodePtr &cnode,
diff --git a/mindspore/lite/tools/converter/parser/tf/tf_model_parser.h b/mindspore/lite/tools/converter/parser/tf/tf_model_parser.h
index 2a63210d61f..f0ecc57a254 100644
--- a/mindspore/lite/tools/converter/parser/tf/tf_model_parser.h
+++ b/mindspore/lite/tools/converter/parser/tf/tf_model_parser.h
@@ -95,13 +95,6 @@ class TFModelParser : public ModelParser {
 
   STATUS ConnectNullInput();
 
-  STATUS WeightFormatTransform(const FuncGraphPtr &graph);
-
-  STATUS HardCodeTF(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info, const FuncGraphPtr &graph);
-
-  int DoWeightFormatTransform(const CNodePtr &conv_node, const AnfNodePtr &weight_node, const FuncGraphPtr &graph,
-                              schema::Format weight_src_format, schema::Format weight_dst_format);
-
   std::unique_ptr<tensorflow::GraphDef> tf_root_graph_;                     // tf root graph def
   std::map<std::string, const tensorflow::NodeDef *> tf_root_graph_nodes_;  // tf root graph node map
   std::unordered_map<std::string, AnfNodePtr> anf_root_node_map_;
diff --git a/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.cc b/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.cc
index 79e127b2001..7d29bfcb66b 100644
--- a/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.cc
+++ b/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.cc
@@ -105,128 +105,13 @@ FuncGraphPtr TfliteModelParser::Parse(const converter::ConverterParameters &flag
     ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
     return nullptr;
   }
-  auto unify_format = std::make_shared<UnifyFormatToNHWC>(lite::converter::FmkType_TFLITE, false);
+  auto unify_format = std::make_shared<UnifyFormatToNHWC>(lite::converter::FmkType_TFLITE, false, quant_type_);
   if (!unify_format->Run(res_graph_)) {
     MS_LOG(ERROR) << "Run insert transpose failed.";
     return nullptr;
   }
-  if ((status = WeightFormatTransform(res_graph_)) != RET_OK) {
-    MS_LOG(ERROR) << "WeightFormatTransform failed.";
-    ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status);
-    return nullptr;
-  }
   return res_graph_;
 }
-STATUS TfliteModelParser::WeightFormatTransform(const FuncGraphPtr &graph) {
-  MS_ASSERT(graph != nullptr);
-  auto node_list = TopoSort(graph->get_return());
-  for (auto &node : node_list) {
-    if (!utils::isa<CNodePtr>(node)) {
-      continue;
-    }
-    auto conv_cnode = node->cast<CNodePtr>();
-    if (!opt::CheckPrimitiveType(node, prim::kPrimConv2DFusion) &&
-        !opt::CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) &&
-        !opt::CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion)) {
-      continue;
-    }
-    MS_ASSERT(conv_cnode->inputs().size() > kConvWeightIndex);
-    auto weight_node = conv_cnode->input(kConvWeightIndex);
-    MS_ASSERT(weight_node != nullptr);
-    auto tensor_info = opt::GetTensorInfo(weight_node);
-    auto status = HardCodeTflite(conv_cnode, tensor_info, graph);
-    if (status != lite::RET_OK) {
-      MS_LOG(ERROR) << "Format hard code failed: " << status << ", node: " << node->fullname_with_scope();
-      return RET_ERROR;
-    }
-  }
-  return RET_OK;
-}
-
-STATUS TfliteModelParser::HardCodeTflite(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info,
-                                         const FuncGraphPtr &graph) {
-  MS_ASSERT(conv_cnode != nullptr);
-  auto prim = GetValueNode<PrimitivePtr>(conv_node->input(0));
-  if (prim == nullptr) {
-    MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive.";
-    return lite::RET_ERROR;
-  }
-  bool is_depth_wise = prim->GetAttr(ops::kIsDepthWise) != nullptr && GetValue<bool>(prim->GetAttr(ops::kIsDepthWise));
-  schema::Format weight_dst_format = schema::Format::Format_KHWC;
-  STATUS status = RET_OK;
-  schema::Format weight_src_format = Format_NUM_OF_FORMAT;
-  auto weight_node = conv_node->input(kConvWeightIndex);
-  int64_t format = prim->GetAttr(ops::kFormat) != nullptr ? GetValue<int64_t>(prim->GetAttr(ops::kFormat)) : 0;
-  switch (quant_type_) {
-    case QuantType_AwareTraining:
-    case QuantType_PostTraining:
-    case QuantType_WeightQuant:
-    case QuantType_QUANT_NONE: {
-      if (format == KHWC) {
-        weight_src_format = schema::Format::Format_KHWC;
-      } else if (opt::CheckPrimitiveType(conv_node, prim::kPrimConv2DFusion)) {
-        if (!is_depth_wise) {
-          weight_src_format = schema::Format::Format_KHWC;
-        } else {
-          weight_src_format = schema::Format::Format_CHWK;
-        }
-      } else if (opt::CheckPrimitiveType(conv_node, prim::kPrimConv2dTransposeFusion) && !is_depth_wise) {
-        weight_src_format = schema::Format::Format_CHWK;
-      }
-    } break;
-    default: {
-      MS_LOG(ERROR) << "Unsupported quantType: " << EnumNameQuantType(quant_type_)
-                    << ", node: " << conv_node->fullname_with_scope();
-      return RET_ERROR;
-    }
-  }
-  status = DoWeightFormatTransform(conv_node, weight_node, graph, weight_src_format, weight_dst_format);
-  if (status != RET_OK) {
-    return RET_ERROR;
-  }
-  return lite::RET_OK;
-}
-
-int TfliteModelParser::DoWeightFormatTransform(const CNodePtr &conv_node, const AnfNodePtr &weight_node,
-                                               const FuncGraphPtr &graph, schema::Format weight_src_format,
-                                               schema::Format weight_dst_format) {
-  if (utils::isa<CNodePtr>(weight_node)) {
-    auto status =
-      HandleWeightConst(graph, conv_node, weight_node->cast<CNodePtr>(), weight_src_format, weight_dst_format);
-    if (status != lite::RET_OK) {
-      MS_LOG(ERROR) << "handle weight-const failed.";
-      return RET_ERROR;
-    }
-  }
-  auto weight_value = opt::GetTensorInfo(weight_node);
-  if (weight_value != nullptr) {
-    auto status = opt::TransFilterFormat(weight_value, weight_src_format, weight_dst_format);
-    if (status != RET_OK) {
-      MS_LOG(ERROR) << "TransFilter " << EnumNameFormat(schema::EnumValuesFormat()[weight_dst_format]) << "To"
-                    << EnumNameFormat(weight_dst_format) << " failed, node : " << conv_node->fullname_with_scope()
-                    << "quant type:" << quant_type_;
-      return RET_ERROR;
-    }
-    auto type_id = static_cast<TypeId>(weight_value->data_type());
-    auto shape = weight_value->shape();
-    std::vector<int64_t> shape_vector(shape.begin(), shape.end());
-    auto abstract = lite::CreateTensorAbstract(shape_vector, type_id);
-    if (abstract == nullptr) {
-      MS_LOG(ERROR) << "Create tensor abstarct failed";
-      return RET_ERROR;
-    }
-    weight_node->set_abstract(abstract);
-  }
-  if (utils::isa<ParameterPtr>(weight_node)) {
-    auto status =
-      HandleWeightSharing(graph, KHWC, weight_node->cast<ParameterPtr>(), weight_src_format, weight_dst_format);
-    if (status != lite::RET_OK) {
-      MS_LOG(ERROR) << "handle weight-sharing failed.";
-      return RET_ERROR;
-    }
-  }
-  return RET_OK;
-}
 
 std::string GetTensorName(size_t index, const tflite::BuiltinOperator &op_type, const std::string &op_name) {
   std::string tensor_name = op_name + "/input-" + std::to_string(index);
diff --git a/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.h b/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.h
index b45c2ee033c..78d8b22d2e8 100644
--- a/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.h
+++ b/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.h
@@ -52,10 +52,6 @@ class TfliteModelParser : public ModelParser {
   STATUS ConvertGraphOutputs();
   static STATUS SetTensorQuantParam(const tflite::TensorT *tflite_tensor, std::vector<QuantParamT> *quant_params,
                                     int round_type = 1);
-  int DoWeightFormatTransform(const CNodePtr &conv_node, const AnfNodePtr &weight_node, const FuncGraphPtr &graph,
-                              schema::Format weight_src_format, schema::Format weight_dst_format);
-  STATUS WeightFormatTransform(const FuncGraphPtr &graph);
-  STATUS HardCodeTflite(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info, const FuncGraphPtr &graph);
   QuantType quant_type_ = schema::QuantType_QUANT_NONE;
 };
 }  // namespace lite
diff --git a/mindspore/lite/tools/converter/parser/unify_format.cc b/mindspore/lite/tools/converter/parser/unify_format.cc
index 29ea6005d41..f3a07842db2 100644
--- a/mindspore/lite/tools/converter/parser/unify_format.cc
+++ b/mindspore/lite/tools/converter/parser/unify_format.cc
@@ -15,14 +15,162 @@
  */
 
 #include "tools/converter/parser/unify_format.h"
+#include <map>
 
 namespace mindspore {
 namespace lite {
 namespace {
 constexpr int kInputChannal = 3;
+STATUS DecideMINDIRConvWeightSrcFormat(const CNodePtr &cnode, schema::QuantType quant_type,
+                                       schema::Format *src_format) {
+  MS_ASSERT(cnode != nullptr && src_format != nullptr);
+  auto prim = GetValueNode<PrimitivePtr>(cnode->input(0));
+  if (prim == nullptr) {
+    MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive.";
+    return lite::RET_ERROR;
+  }
+  int64_t format = prim->GetAttr(ops::kFormat) != nullptr ? GetValue<int64_t>(prim->GetAttr(ops::kFormat)) : 0;
+  if (format == schema::Format_NHWC) {
+    *src_format = schema::Format_KHWC;
+  } else if (format == schema::Format_NCHW) {
+    *src_format = schema::Format_KCHW;
+  } else {
+    MS_LOG(ERROR) << "cnode format is invalid.";
+    return RET_ERROR;
+  }
+  return RET_OK;
 }
-void UnifyFormatToNHWC::GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) {
-  MS_ASSERT(cnode != nullptr);
+
+STATUS DecideTFConvWeightSrcFormat(const CNodePtr &cnode, schema::QuantType quant_type, schema::Format *src_format) {
+  MS_ASSERT(cnode != nullptr && src_format != nullptr);
+  auto prim = GetValueNode<PrimitivePtr>(cnode->input(0));
+  if (prim == nullptr) {
+    MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive.";
+    return lite::RET_ERROR;
+  }
+  bool is_depth_wise = prim->GetAttr(ops::kIsDepthWise) != nullptr && GetValue<bool>(prim->GetAttr(ops::kIsDepthWise));
+  switch (quant_type) {
+    case QuantType_AwareTraining:
+    case QuantType_PostTraining:
+    case QuantType_WeightQuant:
+    case QuantType_QUANT_NONE: {
+      if (opt::CheckPrimitiveType(cnode, prim::kPrimConv2DFusion)) {
+        if (!is_depth_wise) {
+          *src_format = schema::Format_HWCK;
+        } else {
+          *src_format = schema::Format_HWKC;
+        }
+      } else if (opt::CheckPrimitiveType(cnode, prim::kPrimConv2dTransposeFusion) && !is_depth_wise) {
+        *src_format = schema::Format::Format_HWCK;
+      } else {
+        MS_LOG(ERROR) << "depthwise-conv2dTranspose need to check.";
+        return RET_ERROR;
+      }
+    } break;
+    default: {
+      MS_LOG(ERROR) << "Unsupported op: " << cnode->fullname_with_scope();
+      return lite::RET_ERROR;
+    }
+  }
+  return RET_OK;
+}
+
+STATUS DecideTFLITEConvWeightSrcFormat(const CNodePtr &cnode, schema::QuantType quant_type,
+                                       schema::Format *src_format) {
+  MS_ASSERT(cnode != nullptr && src_format != nullptr);
+  auto prim = GetValueNode<PrimitivePtr>(cnode->input(0));
+  if (prim == nullptr) {
+    MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive.";
+    return lite::RET_ERROR;
+  }
+  bool is_depth_wise = prim->GetAttr(ops::kIsDepthWise) != nullptr && GetValue<bool>(prim->GetAttr(ops::kIsDepthWise));
+  switch (quant_type) {
+    case QuantType_AwareTraining:
+    case QuantType_PostTraining:
+    case QuantType_WeightQuant:
+    case QuantType_QUANT_NONE: {
+      if (opt::CheckPrimitiveType(cnode, prim::kPrimConv2DFusion)) {
+        if (!is_depth_wise) {
+          *src_format = schema::Format_KHWC;
+        } else {
+          *src_format = schema::Format_CHWK;
+        }
+      } else if (opt::CheckPrimitiveType(cnode, prim::kPrimConv2dTransposeFusion) && !is_depth_wise) {
+        *src_format = schema::Format_CHWK;
+      } else {
+        MS_LOG(ERROR) << "cannot decide weight format, current situation need to check.";
+        return RET_NOT_SUPPORT;
+      }
+    } break;
+    default: {
+      MS_LOG(ERROR) << "Unsupported quantType: " << EnumNameQuantType(quant_type)
+                    << ", node: " << cnode->fullname_with_scope();
+      return RET_ERROR;
+    }
+  }
+  return RET_OK;
+}
+
+STATUS DecideCAFFEConvWeightSrcFormat(const CNodePtr &cnode, schema::QuantType quant_type, schema::Format *src_format) {
+  MS_ASSERT(cnode != nullptr && src_format != nullptr);
+  *src_format = schema::Format_KCHW;
+  return RET_OK;
+}
+
+STATUS DecideONNXConvWeightSrcFormat(const CNodePtr &cnode, schema::QuantType quant_type, schema::Format *src_format) {
+  MS_ASSERT(cnode != nullptr && src_format != nullptr);
+  auto prim = GetValueNode<PrimitivePtr>(cnode->input(0));
+  if (prim == nullptr) {
+    MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive.";
+    return lite::RET_ERROR;
+  }
+  bool is_depth_wise = prim->GetAttr(ops::kIsDepthWise) != nullptr && GetValue<bool>(prim->GetAttr(ops::kIsDepthWise));
+  int64_t format = prim->GetAttr(ops::kFormat) != nullptr ? GetValue<int64_t>(prim->GetAttr(ops::kFormat)) : 0;
+  switch (quant_type) {
+    case QuantType_AwareTraining: {
+      if (opt::CheckPrimitiveType(cnode, prim::kPrimConv2DFusion)) {
+        if (!is_depth_wise) {
+          *src_format = schema::Format_KHWC;
+        } else {
+          *src_format = schema::Format_CHWK;
+        }
+      } else if (opt::CheckPrimitiveType(cnode, prim::kPrimConv2dTransposeFusion) && !is_depth_wise) {
+        *src_format = schema::Format_KCHW;
+      } else {
+        MS_LOG(ERROR) << "Unsupported op: " << cnode->fullname_with_scope();
+        return lite::RET_ERROR;
+      }
+    } break;
+    case QuantType_PostTraining:
+    case QuantType_WeightQuant:
+    case QuantType_QUANT_NONE: {
+      if (opt::CheckPrimitiveType(cnode, prim::kPrimConv2DFusion) ||
+          opt::CheckPrimitiveType(cnode, prim::kPrimConv2dTransposeFusion)) {
+        if (format == schema::Format_NHWC) {
+          *src_format = schema::Format_KHWC;
+        } else if (format == schema::Format_NCHW) {
+          *src_format = schema::Format_KCHW;
+        } else {
+          MS_LOG(ERROR) << "format is invalid, format is " << format;
+          return RET_ERROR;
+        }
+      } else {
+        MS_LOG(ERROR) << "d an unsupported op type, which need to check. the type is " << prim->name();
+        return RET_NOT_SUPPORT;
+      }
+    } break;
+    default: {
+      MS_LOG(ERROR) << "Unsupported quantType: " << EnumNameQuantType(quant_type)
+                    << ", node: " << cnode->fullname_with_scope();
+      return lite::RET_ERROR;
+    }
+  }
+  return RET_OK;
+}
+}  // namespace
+
+STATUS UnifyFormatToNHWC::GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) {
+  MS_ASSERT(cnode != nullptr && trans_info != nullptr);
   auto prim_node = cnode->input(0);
   auto prim = GetValueNode<PrimitivePtr>(prim_node);
   MS_ASSERT(prim != nullptr);
@@ -30,7 +178,7 @@ void UnifyFormatToNHWC::GetTransNodeFormatType(const CNodePtr &cnode, opt::Trans
   auto &specify_nchw_op_map = opt::GetNCHWOpMap();
   if (fmk_type_ == lite::converter::FmkType_TFLITE) {
     if (specify_nchw_op_map.find(prim->name()) == specify_nchw_op_map.end()) {
-      return;
+      return lite::RET_OK;
     }
     trans_info->pre_ = opt::kNHWC2NCHW;
     trans_info->post_ = opt::kNCHW2NHWC;
@@ -47,12 +195,13 @@ void UnifyFormatToNHWC::GetTransNodeFormatType(const CNodePtr &cnode, opt::Trans
     if (specify_nhwc_op_map.find(prim->name()) != specify_nhwc_op_map.end()) {
       if (fmk_type_ == lite::converter::FmkType_ONNX && prim->GetAttr(ops::kFormat) != nullptr &&
           GetValue<int64_t>(prim->GetAttr(ops::kFormat)) == NHWC) {
-        return;
+        return lite::RET_OK;
       }
       trans_info->pre_ = opt::kNCHW2NHWC;
       trans_info->post_ = opt::kNHWC2NCHW;
     }
   }
+  return lite::RET_OK;
 }
 
 void UnifyFormatToNHWC::SetSensitiveOps() {
@@ -63,6 +212,7 @@ void UnifyFormatToNHWC::SetSensitiveOps() {
 }
 
 bool UnifyFormatToNHWC::DecideWhetherHandleGraphInput(const FuncGraphPtr &func_graph, const ShapeVector &shape) {
+  MS_ASSERT(func_graph != nullptr);
   if (fmk_type_ == converter::FmkType_TF || fmk_type_ == converter::FmkType_TFLITE) {
     return false;
   }
@@ -74,5 +224,29 @@ bool UnifyFormatToNHWC::DecideWhetherHandleGraphInput(const FuncGraphPtr &func_g
 }
 
 bool UnifyFormatToNHWC::DecideWhetherInferShapeForNewNode() { return false; }
+
+STATUS UnifyFormatToNHWC::DecideConvWeightSrcAndDstFormat(const CNodePtr &cnode, schema::Format *src_format,
+                                                          schema::Format *dst_format) {
+  MS_ASSERT(cnode != nullptr && src_format != nullptr && dst_format != nullptr);
+  *dst_format = schema::Format_KHWC;
+  std::map<converter::FmkType, std::function<int(const CNodePtr &, schema::QuantType, schema::Format *)>>
+    decide_functions = {{converter::FmkType_MS, DecideMINDIRConvWeightSrcFormat},
+                        {converter::FmkType_TF, DecideTFConvWeightSrcFormat},
+                        {converter::FmkType_TFLITE, DecideTFLITEConvWeightSrcFormat},
+                        {converter::FmkType_CAFFE, DecideCAFFEConvWeightSrcFormat},
+                        {converter::FmkType_ONNX, DecideONNXConvWeightSrcFormat}};
+  auto iter = decide_functions.find(fmk_type_);
+  if (iter == decide_functions.end()) {
+    MS_LOG(ERROR) << "current fmk don't support, please check.";
+    return RET_NOT_SUPPORT;
+  }
+  auto decide_func = iter->second;
+  MS_ASSERT(decide_func != nullptr);
+  if (decide_func(cnode, quant_type_, src_format) != RET_OK) {
+    MS_LOG(ERROR) << "run decide function failed, cannot decide conv weight format.";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/converter/parser/unify_format.h b/mindspore/lite/tools/converter/parser/unify_format.h
index 49f20f44c4f..1ef43187504 100644
--- a/mindspore/lite/tools/converter/parser/unify_format.h
+++ b/mindspore/lite/tools/converter/parser/unify_format.h
@@ -24,15 +24,19 @@ namespace mindspore {
 namespace lite {
 class UnifyFormatToNHWC : public opt::ToFormatBase {
  public:
-  explicit UnifyFormatToNHWC(FmkType fmk_type = lite::converter::FmkType_MS, bool train_flag = false)
-      : ToFormatBase(fmk_type, train_flag) {}
+  explicit UnifyFormatToNHWC(FmkType fmk_type = lite::converter::FmkType_MS, bool train_flag = false,
+                             schema::QuantType quant_type = schema::QuantType_QUANT_NONE)
+      : ToFormatBase(fmk_type, train_flag), quant_type_(quant_type) {}
   ~UnifyFormatToNHWC() override = default;
 
  private:
-  void GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) override;
+  STATUS GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) override;
   void SetSensitiveOps() override;
   bool DecideWhetherHandleGraphInput(const FuncGraphPtr &func_graph, const ShapeVector &shape) override;
   bool DecideWhetherInferShapeForNewNode() override;
+  STATUS DecideConvWeightSrcAndDstFormat(const CNodePtr &cnode, schema::Format *src_format,
+                                         schema::Format *dst_format) override;
+  schema::QuantType quant_type_{schema::QuantType_QUANT_NONE};
 };
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc
index 6e5376b90dd..6e4bce06232 100644
--- a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc
+++ b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc
@@ -1454,7 +1454,10 @@ KernelCallBack PostTrainingQuantizer::GetBeforeCallBack(bool int8_op) {
         auto tensor = beforeInputs[0];
         MS_ASSERT(tensor != nullptr);
         auto lite_tensor = dynamic_cast<mindspore::lite::Tensor *>(tensor);
-        MS_ASSERT(lite_tensor != nullptr);
+        if (lite_tensor == nullptr) {
+          MS_LOG(ERROR) << "Before inputs is not a lite::Tensor";
+          return false;
+        }
         if (tensor->data_type() != kNumberTypeInt8) {
           MS_LOG(ERROR) << "unexpected tensor type: " << tensor->data_type();
           return false;
@@ -1513,7 +1516,10 @@ KernelCallBack PostTrainingQuantizer::GetInt8AfterCallBack() {
       auto tensor = afterOutputs[0];
       MS_ASSERT(tensor != nullptr);
       auto lite_tensor = dynamic_cast<mindspore::lite::Tensor *>(tensor);
-      MS_ASSERT(lite_tensor != nullptr);
+      if (lite_tensor == nullptr) {
+        MS_LOG(ERROR) << "Before inputs is not a lite::Tensor";
+        return false;
+      }
       if (tensor->data_type() != kNumberTypeInt8) {
         MS_LOG(ERROR) << "unexpected tensor type: " << tensor->data_type();
         return false;
diff --git a/mindspore/lite/tools/converter/quantizer/quant_cast.cc b/mindspore/lite/tools/converter/quantizer/quant_cast.cc
index 6e05fdcced2..82dca0ec3c5 100644
--- a/mindspore/lite/tools/converter/quantizer/quant_cast.cc
+++ b/mindspore/lite/tools/converter/quantizer/quant_cast.cc
@@ -41,14 +41,13 @@ STATUS QuantCast::Run(const FuncGraphPtr &graph) {
   auto cnodes = graph->GetOrderedCnodes();
   for (auto &cnode : cnodes) {
     auto primitive_c = GetValueNode<std::shared_ptr<ops::PrimitiveC>>(cnode->input(0));
-    auto primitive_quant_param_holder = GetCNodeQuantHolder(primitive_c);
-    MS_ASSERT(primitive_quant_param_holder != nullptr);
-    auto curnode_quant_type = schema::QuantType_QUANT_NONE;
     if (primitive_c == nullptr) {
       MS_LOG(WARNING) << "primitive_c is nullptr: " << cnode->fullname_with_scope();
-    } else {
-      curnode_quant_type = primitive_quant_param_holder->quant_type();
+      continue;
     }
+    auto primitive_quant_param_holder = GetCNodeQuantHolder(primitive_c);
+    MS_ASSERT(primitive_quant_param_holder != nullptr);
+    auto curnode_quant_type = primitive_quant_param_holder->quant_type();
     if (primitive_c->name() == ops::kNameGather) {
       continue;
     }
diff --git a/mindspore/lite/tools/converter/quantizer/quant_helper/attention_quant_type_determiner.cc b/mindspore/lite/tools/converter/quantizer/quant_helper/attention_quant_type_determiner.cc
index 980e88dcf2f..f9cf72ca306 100644
--- a/mindspore/lite/tools/converter/quantizer/quant_helper/attention_quant_type_determiner.cc
+++ b/mindspore/lite/tools/converter/quantizer/quant_helper/attention_quant_type_determiner.cc
@@ -19,14 +19,19 @@
 #include "mindspore/core/utils/log_adapter.h"
 #include "mindspore/core/ir/dtype/type_id.h"
 namespace mindspore::lite {
+const size_t kWeightQueryIndex = 4;
+const size_t kWeightKeyIndex = 5;
+const size_t kWeightValueIndex = 6;
+const size_t kWeightOutputIndex = 10;
+
 bool AttentionQuantTypeDeterminer::DetermineQuantWeight(const mindspore::schema::MetaGraphT &graph,
                                                         mindspore::schema::CNodeT *node) {
   MS_ASSERT(node->inputIndex.size() >= 2);
   auto &input_tensor = graph.allTensors.at(node->inputIndex.at(kInputIndex));
-  auto &weight_query_tensor = graph.allTensors.at(node->inputIndex.at(4));
-  auto &weight_key_tensor = graph.allTensors.at(node->inputIndex.at(5));
-  auto &weight_value_tensor = graph.allTensors.at(node->inputIndex.at(6));
-  auto &weight_output_tensor = graph.allTensors.at(node->inputIndex.at(10));
+  auto &weight_query_tensor = graph.allTensors.at(node->inputIndex.at(kWeightQueryIndex));
+  auto &weight_key_tensor = graph.allTensors.at(node->inputIndex.at(kWeightKeyIndex));
+  auto &weight_value_tensor = graph.allTensors.at(node->inputIndex.at(kWeightValueIndex));
+  auto &weight_output_tensor = graph.allTensors.at(node->inputIndex.at(kWeightOutputIndex));
 
   if (!quant::TensorQuantParamsInited(*input_tensor) && quant::TensorQuantParamsInited(*weight_query_tensor) &&
       quant::TensorQuantParamsInited(*weight_key_tensor) && quant::TensorQuantParamsInited(*weight_value_tensor) &&
diff --git a/mindspore/lite/tools/converter/quantizer/quant_helper/conv_quant_param_propogator.cc b/mindspore/lite/tools/converter/quantizer/quant_helper/conv_quant_param_propogator.cc
index 006871f0fb5..3e6ae8a22dd 100644
--- a/mindspore/lite/tools/converter/quantizer/quant_helper/conv_quant_param_propogator.cc
+++ b/mindspore/lite/tools/converter/quantizer/quant_helper/conv_quant_param_propogator.cc
@@ -58,5 +58,4 @@ STATUS ConvQuantParamPropogator::PropogateQuantParams(mindspore::schema::MetaGra
   }
   return RET_OK;
 }
-
 }  // namespace mindspore::lite
diff --git a/mindspore/lite/tools/converter/quantizer/quant_helper/default_quant_all_quant_type_determiner.cc b/mindspore/lite/tools/converter/quantizer/quant_helper/default_quant_all_quant_type_determiner.cc
index 2783bb08929..40a676c3f8f 100644
--- a/mindspore/lite/tools/converter/quantizer/quant_helper/default_quant_all_quant_type_determiner.cc
+++ b/mindspore/lite/tools/converter/quantizer/quant_helper/default_quant_all_quant_type_determiner.cc
@@ -16,7 +16,6 @@
 #include "tools/converter/quantizer/quant_helper/default_quant_all_quant_type_determiner.h"
 
 namespace mindspore::lite {
-
 bool DefaultQuantAllQuantTypeDeterminer::DetermineQuantAll(const schema::MetaGraphT &graph, schema::CNodeT *node) {
   return true;
 }
diff --git a/mindspore/lite/tools/converter/quantizer/quant_helper/only_need_inputs_quant_type_determiner.cc b/mindspore/lite/tools/converter/quantizer/quant_helper/only_need_inputs_quant_type_determiner.cc
index b32b338efed..bae725fa398 100644
--- a/mindspore/lite/tools/converter/quantizer/quant_helper/only_need_inputs_quant_type_determiner.cc
+++ b/mindspore/lite/tools/converter/quantizer/quant_helper/only_need_inputs_quant_type_determiner.cc
@@ -16,7 +16,6 @@
 #include "tools/converter/quantizer/quant_helper/only_need_inputs_quant_type_determiner.h"
 
 namespace mindspore::lite {
-
 bool OnlyNeedInputsQuantTypeDeterminer::DetermineQuantAll(const schema::MetaGraphT &graph, schema::CNodeT *node) {
   UpdateQuantParamsNum(graph, *node);
   if (input_inited_quant_params_ == node->inputIndex.size()) {
diff --git a/mindspore/lite/tools/converter/quantizer/quant_helper/quant_node_helper.cc b/mindspore/lite/tools/converter/quantizer/quant_helper/quant_node_helper.cc
index fea5dab604a..283ef442e03 100644
--- a/mindspore/lite/tools/converter/quantizer/quant_helper/quant_node_helper.cc
+++ b/mindspore/lite/tools/converter/quantizer/quant_helper/quant_node_helper.cc
@@ -142,5 +142,4 @@ QuantHelperRegister::~QuantHelperRegister() {
   }
   this->register_map_.clear();
 }
-
 }  // namespace mindspore::lite
diff --git a/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc b/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc
index 7d3b6f5be16..53ff184e15e 100644
--- a/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc
+++ b/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc
@@ -143,7 +143,7 @@ STATUS WeightQuantizer::DoMulQuantize(const CNodePtr &cnode) {
 
           auto status = RET_ERROR;
           auto per_channel = true;
-          if (i == 3) {
+          if (i == kInputSize2) {
             per_channel = false;
           }
           if (type_id_ == kNumberTypeInt8) {
diff --git a/mindspore/lite/tools/converter/registry/model_parser_registry.cc b/mindspore/lite/tools/converter/registry/model_parser_registry.cc
index 93796131f3c..975df7a09e0 100644
--- a/mindspore/lite/tools/converter/registry/model_parser_registry.cc
+++ b/mindspore/lite/tools/converter/registry/model_parser_registry.cc
@@ -46,6 +46,5 @@ int ModelParserRegistry::RegParser(const FmkType fmk, ModelParserCreator creator
   instance->parsers_[fmk] = creator;
   return RET_OK;
 }
-
 }  // namespace lite
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/cropper/build_cropper_config.sh b/mindspore/lite/tools/cropper/build_cropper_config.sh
index 832c17ca656..feba431a9ab 100644
--- a/mindspore/lite/tools/cropper/build_cropper_config.sh
+++ b/mindspore/lite/tools/cropper/build_cropper_config.sh
@@ -3,7 +3,8 @@
 CURRENT_PATH=$(pwd)
 MINDSPORE_HOME="${CURRENT_PATH}/../../../.."
 echo "MINDSPORE_HOME path is ${MINDSPORE_HOME}"
-CROPPER_OUTPUT_DIR=${MINDSPORE_HOME}/mindspore/lite/build/tools/cropper
+cd "${MINDSPORE_HOME}" || exit 1
+CROPPER_OUTPUT_DIR=mindspore/lite/build/tools/cropper
 mkdir -p ${CROPPER_OUTPUT_DIR}
 MAPPING_OUTPUT_FILE_NAME_TMP=${CROPPER_OUTPUT_DIR}/cropper_mapping_tmp.cfg
 CPU_MAPPING_OUTPUT_FILE=${CROPPER_OUTPUT_DIR}/cropper_mapping_cpu.cfg
@@ -22,7 +23,7 @@ if [ ${MSLIBS_CACHE_PATH} ]; then
   FLATBUFFERS=${FLATBUFFERS_LIST[0]}
   echo "FLATBUFFERS path is ${FLATBUFFERS}"
 else
-  FLATBUFFERS=$(ls -d ${MINDSPORE_HOME}/mindspore/lite/build/.mslib/flatbuffers_*/include)
+  FLATBUFFERS=$(ls -d mindspore/lite/build/.mslib/flatbuffers_*/include)
   echo "FLATBUFFERS path is ${FLATBUFFERS}"
 fi
 
@@ -103,7 +104,6 @@ getOpsFile() {
 
 getCommonFile() {
   echo "start get common files"
-  cd "${MINDSPORE_HOME}" || exit 1
   include_h=()
   while IFS='' read -r line; do include_h+=("$line"); done < <(ls mindspore/lite/include/*.h)
   regist_include_h=()
@@ -142,33 +142,33 @@ getCommonFile() {
   done
 
   cxx_api_files=()
-  while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/cxx_api/graph/*.cc)
-  while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/cxx_api/model/*.cc)
-  while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/cxx_api/tensor/*.cc)
-  while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/cxx_api/*.cc)
+  while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls mindspore/lite/src/cxx_api/graph/*.cc)
+  while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls mindspore/lite/src/cxx_api/model/*.cc)
+  while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls mindspore/lite/src/cxx_api/tensor/*.cc)
+  while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls mindspore/lite/src/cxx_api/*.cc)
   mindrt_files=()
-  while IFS='' read -r line; do mindrt_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/core/mindrt/src/*.cc)
-  while IFS='' read -r line; do mindrt_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/core/mindrt/src/async/*.cc)
-  while IFS='' read -r line; do mindrt_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/core/mindrt/src/actor/*.cc)
+  while IFS='' read -r line; do mindrt_files+=("$line"); done < <(ls mindspore/core/mindrt/src/*.cc)
+  while IFS='' read -r line; do mindrt_files+=("$line"); done < <(ls mindspore/core/mindrt/src/async/*.cc)
+  while IFS='' read -r line; do mindrt_files+=("$line"); done < <(ls mindspore/core/mindrt/src/actor/*.cc)
   src_files=()
-  while IFS='' read -r line; do src_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/*.cc)
+  while IFS='' read -r line; do src_files+=("$line"); done < <(ls mindspore/lite/src/*.cc)
   regist_files=()
-  while IFS='' read -r line; do regist_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/registry/*.cc)
+  while IFS='' read -r line; do regist_files+=("$line"); done < <(ls mindspore/lite/src/registry/*.cc)
   common_files=()
-  while IFS='' read -r line; do common_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/common/*.cc)
+  while IFS='' read -r line; do common_files+=("$line"); done < <(ls mindspore/lite/src/common/*.cc)
   runtime_files_cc=()
-  while IFS='' read -r line; do runtime_files_cc+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/runtime/*.cc)
+  while IFS='' read -r line; do runtime_files_cc+=("$line"); done < <(ls mindspore/lite/src/runtime/*.cc)
   # sava all assembly files
   assembly_files=()
-  while IFS='' read -r line; do assembly_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/*/*.S)
+  while IFS='' read -r line; do assembly_files+=("$line"); done < <(ls mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/*/*.S)
   others_files_c=(
-    "${MINDSPORE_HOME}"/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/nnacl_utils.c
-    "${MINDSPORE_HOME}"/mindspore/lite/src/runtime/kernel/arm/fp16/common_fp16.cc
-    "${MINDSPORE_HOME}"/mindspore/lite/src/runtime/infer_manager.cc
-    "${MINDSPORE_HOME}"/mindspore/lite/src/ops/populate/populate_register.cc
-    "${MINDSPORE_HOME}"/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.c
-    "${MINDSPORE_HOME}"/mindspore/core/utils/status.cc
-    "${MINDSPORE_HOME}"/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.c
+    mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/nnacl_utils.c
+    mindspore/lite/src/runtime/kernel/arm/fp16/common_fp16.cc
+    mindspore/lite/src/runtime/infer_manager.cc
+    mindspore/lite/src/ops/populate/populate_register.cc
+    mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.c
+    mindspore/core/utils/status.cc
+    mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.c
   )
   all_files=("${src_files[@]}" "${regist_files[@]}" "${common_files[@]}" "${runtime_files_cc[@]}"
     "${others_files_c[@]}" "${assembly_files[@]}" "${mindrt_files[@]}"
@@ -238,11 +238,11 @@ getOpsFileWithNoDeepSearch() {
       local depend_file=("${ret}" "${ret_h}")
       for array_file in ${depend_file[@]}; do
         # only add existing files
-        if [[ -e ${MINDSPORE_HOME}/mindspore/lite/${array_file%h*}cc ]]; then
+        if [[ -e mindspore/lite/${array_file%h*}cc ]]; then
           array_file_split=$(echo ${array_file} | awk -F '/' '{print $NF}')
           echo "${type},${3},${array_file_split%h*}cc.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP}
         fi
-        if [[ -e ${MINDSPORE_HOME}/mindspore/lite/${array_file%h*}c ]]; then
+        if [[ -e mindspore/lite/${array_file%h*}c ]]; then
           array_file_split=$(echo ${array_file} | awk -F '/' '{print $NF}')
           echo "${type},${3},${array_file_split%h*}c.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP}
         fi
@@ -255,7 +255,7 @@ getOpsFileWithNoDeepSearch() {
 generateOpsList() {
   echo "start generate operator list"
   ops_list=()
-  while IFS='' read -r line; do ops_list+=("$line"); done < <(grep -Rn "^table" "${MINDSPORE_HOME}/mindspore/lite/schema/ops.fbs" | awk -F ' ' '{print $2}')
+  while IFS='' read -r line; do ops_list+=("$line"); done < <(grep -Rn "^table" "mindspore/lite/schema/ops.fbs" | awk -F ' ' '{print $2}')
   ops_num=$((${#ops_list[@]}))
   echo "ops nums:${ops_num}"
 }
@@ -263,15 +263,16 @@ echo "Start getting all file associations."
 generateOpsList
 getCommonFile
 wait
+sleep 1
 # get src/ops
-getOpsFile "REG_POPULATE\(PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/ops/populate" "prototype" &
-getOpsFile "REG_INFER\(.*?, PrimType_" "${MINDSPORE_HOME}/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer" "prototype" &
+getOpsFile "REG_POPULATE\(PrimitiveType_" "mindspore/lite/src/ops/populate" "prototype" &
+getOpsFile "REG_INFER\(.*?, PrimType_" "mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer" "prototype" &
 # support for cpu
-getOpsFile "REG_KERNEL\(.*?, kNumberTypeFloat32, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/arm" "kNumberTypeFloat32" &
-getOpsFile "REG_KERNEL\(.*?, kNumberTypeFloat16, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/arm" "kNumberTypeFloat16" &
-getOpsFile "REG_KERNEL\(.*?, kNumberTypeInt8, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/arm" "kNumberTypeInt8" &
-getOpsFile "REG_KERNEL\(.*?, kNumberTypeInt32, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/arm" "kNumberTypeInt32" &
-getOpsFile "REG_KERNEL\(.*?, kNumberTypeBool, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/arm" "kNumberTypeInt32" &
+getOpsFile "REG_KERNEL\(.*?, kNumberTypeFloat32, PrimitiveType_" "mindspore/lite/src/runtime/kernel/arm" "kNumberTypeFloat32" &
+getOpsFile "REG_KERNEL\(.*?, kNumberTypeFloat16, PrimitiveType_" "mindspore/lite/src/runtime/kernel/arm" "kNumberTypeFloat16" &
+getOpsFile "REG_KERNEL\(.*?, kNumberTypeInt8, PrimitiveType_" "mindspore/lite/src/runtime/kernel/arm" "kNumberTypeInt8" &
+getOpsFile "REG_KERNEL\(.*?, kNumberTypeInt32, PrimitiveType_" "mindspore/lite/src/runtime/kernel/arm" "kNumberTypeInt32" &
+getOpsFile "REG_KERNEL\(.*?, kNumberTypeBool, PrimitiveType_" "mindspore/lite/src/runtime/kernel/arm" "kNumberTypeInt32" &
 wait
 sleep 1
 # remove duplicate files
@@ -280,12 +281,12 @@ chmod 444 ${CPU_MAPPING_OUTPUT_FILE}
 
 # support for gpu
 opencl_files=()
-while IFS='' read -r line; do opencl_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/*.cc)
-while IFS='' read -r line; do opencl_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/runtime/gpu/*.cc)
-while IFS='' read -r line; do opencl_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/runtime/gpu/opencl/*.cc)
+while IFS='' read -r line; do opencl_files+=("$line"); done < <(ls mindspore/lite/src/runtime/kernel/opencl/*.cc)
+while IFS='' read -r line; do opencl_files+=("$line"); done < <(ls mindspore/lite/src/runtime/gpu/*.cc)
+while IFS='' read -r line; do opencl_files+=("$line"); done < <(ls mindspore/lite/src/runtime/gpu/opencl/*.cc)
 opencl_others_files=(
-  "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc"
-  "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc"
+  "mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc"
+  "mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc"
 )
 opencl_files=("${opencl_files[@]}" "${opencl_others_files[@]}")
 # shellcheck disable=SC2068
@@ -294,11 +295,11 @@ for file in ${opencl_files[@]}; do
   echo "CommonFile,common,${file}.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP}
 done
 
-getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeFloat32, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeFloat32" &
-getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeFloat16, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeFloat16" &
-getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeInt8, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeInt8" &
-getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeInt32, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeInt32" &
-getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeBool, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeInt32" &
+getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeFloat32, PrimitiveType_" "mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeFloat32" &
+getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeFloat16, PrimitiveType_" "mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeFloat16" &
+getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeInt8, PrimitiveType_" "mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeInt8" &
+getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeInt32, PrimitiveType_" "mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeInt32" &
+getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeBool, PrimitiveType_" "mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeInt32" &
 sleep 1
 wait
 sort ${MAPPING_OUTPUT_FILE_NAME_TMP} | uniq >${GPU_MAPPING_OUTPUT_FILE}
@@ -306,10 +307,10 @@ chmod 444 ${GPU_MAPPING_OUTPUT_FILE}
 
 # support for npu
 npu_files=()
-while IFS='' read -r line; do npu_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/delegate/delegate.cc)
-while IFS='' read -r line; do npu_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/delegate/npu/*.cc)
-while IFS='' read -r line; do npu_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/delegate/npu/op/*.cc)
-while IFS='' read -r line; do npu_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/delegate/npu/pass/*.cc)
+while IFS='' read -r line; do npu_files+=("$line"); done < <(ls mindspore/lite/src/delegate/delegate.cc)
+while IFS='' read -r line; do npu_files+=("$line"); done < <(ls mindspore/lite/src/delegate/npu/*.cc)
+while IFS='' read -r line; do npu_files+=("$line"); done < <(ls mindspore/lite/src/delegate/npu/op/*.cc)
+while IFS='' read -r line; do npu_files+=("$line"); done < <(ls mindspore/lite/src/delegate/npu/pass/*.cc)
 
 # shellcheck disable=SC2068
 for file in ${npu_files[@]}; do
diff --git a/mindspore/lite/tools/cropper/cropper.cc b/mindspore/lite/tools/cropper/cropper.cc
index 07d6d0a4b1a..e121cd40d6c 100644
--- a/mindspore/lite/tools/cropper/cropper.cc
+++ b/mindspore/lite/tools/cropper/cropper.cc
@@ -126,7 +126,7 @@ int Cropper::GetModelOps() {
 
 int Cropper::GetModelFiles() {
   if (!this->flags_->model_file_.empty()) {
-    auto files = StringSplit(this->flags_->model_file_, std::string(kDelimComma));
+    auto files = StrSplit(this->flags_->model_file_, std::string(kDelimComma));
     for (const auto &file : files) {
       if (ValidFileSuffix(file, "ms") != RET_OK) {
         return RET_INPUT_PARAM_INVALID;
@@ -177,7 +177,7 @@ int Cropper::GetOpMatchFiles() {
   while (!in_file.eof()) {
     in_file.getline(buf, kBufSize);
     std::string buf_str = buf;
-    auto mapping = StringSplit(buf_str, kDelimComma);
+    auto mapping = StrSplit(buf_str, kDelimComma);
     if (!mapping.empty()) {
       std::string primitive = mapping.at(0);
       std::string type = mapping.at(1);
diff --git a/mindspore/lite/tools/dataset/cropper/build_lib.py b/mindspore/lite/tools/dataset/cropper/build_lib.py
index 8d34137bfcc..ba295b2245b 100644
--- a/mindspore/lite/tools/dataset/cropper/build_lib.py
+++ b/mindspore/lite/tools/dataset/cropper/build_lib.py
@@ -124,7 +124,8 @@ def main():
     if not user_ops:
         warnings.warn('No MindData Ops detected in your code...')
         remove_unused_objects([], [], all_object_files)
-        with open(os.path.join(OBJECTS_DIR, ALL_DEPS_FILENAME), 'w') as _:
+        with os.fdopen(os.open(os.path.join(OBJECTS_DIR, ALL_DEPS_FILENAME), os.O_WRONLY | os.O_CREAT, 0o660),
+                       "w+") as _:
             pass
         exit(0)
 
@@ -141,7 +142,8 @@ def main():
     remove_unused_objects(final_deps, ESSENTIAL_OBJECTS, all_object_files)
 
     # write all dependencies to the file (for extracting external ones)
-    with open(os.path.join(OBJECTS_DIR, ALL_DEPS_FILENAME), 'w') as fout:
+    with os.fdopen(os.open(os.path.join(OBJECTS_DIR, ALL_DEPS_FILENAME), os.O_WRONLY | os.O_CREAT, 0o660),
+                   "w+") as fout:
         fout.write("\n".join(unique_deps) + '\n')
 
 
diff --git a/mindspore/lite/tools/dataset/cropper/cropper_configure.py b/mindspore/lite/tools/dataset/cropper/cropper_configure.py
index 864928dc9d6..440b2e9dc1c 100644
--- a/mindspore/lite/tools/dataset/cropper/cropper_configure.py
+++ b/mindspore/lite/tools/dataset/cropper/cropper_configure.py
@@ -362,13 +362,15 @@ def main():
     dependencies.update(other_dependencies)
     errors += err
 
-    with open(os.path.join(OUTPUT_LOCATION, DEPENDENCIES_FILENAME), "w") as f:
+    with os.fdopen(os.open(os.path.join(OUTPUT_LOCATION, DEPENDENCIES_FILENAME), os.O_WRONLY | os.O_CREAT, 0o660),
+                   "w+") as f:
         json.dump(dependencies, f)
 
-    with open(os.path.join(OUTPUT_LOCATION, ASSOCIATIONS_FILENAME), "w") as f:
+    with os.fdopen(os.open(os.path.join(OUTPUT_LOCATION, ASSOCIATIONS_FILENAME), os.O_WRONLY | os.O_CREAT, 0o660),
+                   "w+") as f:
         json.dump(all_associations, f)
 
-    with open(os.path.join(OUTPUT_LOCATION, ERRORS_FILENAME), "w") as f:
+    with os.fdopen(os.open(os.path.join(OUTPUT_LOCATION, ERRORS_FILENAME), os.O_WRONLY | os.O_CREAT, 0o660), "w+") as f:
         f.write(errors)
 
 
diff --git a/mindspore/lite/tools/optimizer/common/gllo_utils.cc b/mindspore/lite/tools/optimizer/common/gllo_utils.cc
index ebb43ea0f87..c92ccc899ee 100644
--- a/mindspore/lite/tools/optimizer/common/gllo_utils.cc
+++ b/mindspore/lite/tools/optimizer/common/gllo_utils.cc
@@ -530,6 +530,9 @@ tensor::TensorPtr GetTensorInfo(const AnfNodePtr &node) {
   }
   auto param = node->cast<ParameterPtr>();
   MS_ASSERT(param != nullptr);
+  if (!param->has_default()) {
+    return nullptr;
+  }
   auto tensor_info = std::dynamic_pointer_cast<tensor::Tensor>(param->default_param());
   return tensor_info;
 }
@@ -1493,10 +1496,14 @@ CNodePtr GenTransposeNode(const FuncGraphPtr &func_graph, const AnfNodePtr &inpu
   MS_ASSERT(trans_prim != nullptr);
   auto cnode = func_graph->NewCNode(trans_prim, {input_node, perm_node});
   MS_ASSERT(cnode != nullptr);
+  auto manager = Manage(func_graph);
+  MS_ASSERT(manager != nullptr);
+  auto tr = manager->Transact();
+  tr.SetEdge(cnode, 1, input_node);
+  tr.SetEdge(cnode, kInputIndexTwo, perm_node);
+  tr.Commit();
   cnode->set_fullname_with_scope(cnode_name);
-  size_t input_size = 2;
-  size_t output_size = 1;
-  auto quant_params_holder = std::make_shared<lite::QuantParamHolder>(input_size, output_size);
+  auto quant_params_holder = std::make_shared<lite::QuantParamHolder>(kInputSizeTwo, 1);
   auto trans_insert_prim = GetValueNode<PrimitivePtr>(cnode->input(0));
   trans_insert_prim->AddAttr("quant_params", quant_params_holder);
   return cnode;
diff --git a/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.cc b/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.cc
index dbf191e73fe..d9390489d95 100644
--- a/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.cc
+++ b/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.cc
@@ -71,6 +71,10 @@ STATUS DeleteRedundantTranspose::DeleteNot4DTranspose(const FuncGraphPtr &func_g
     }
     if (!shape.empty() && shape.size() != perm.size()) {
       MS_LOG(DEBUG) << "transpose node need to be deleted.";
+      if (UpdateNodeFormat(func_graph, cnode) != lite::RET_OK) {
+        MS_LOG(ERROR) << "update cnode format failed.";
+        return lite::RET_ERROR;
+      }
       manager->Replace(node, cnode->input(1));
     }
   }
@@ -129,6 +133,33 @@ STATUS DeleteRedundantTranspose::TransTransFusion(const FuncGraphPtr &func_graph
   return lite::RET_OK;
 }
 
+STATUS DeleteRedundantTranspose::UpdateNodeFormat(const FuncGraphPtr &func_graph, const CNodePtr &cnode) {
+  MS_ASSERT(func_graph != nullptr && cnode != nullptr);
+  auto manager = func_graph->manager();
+  MS_ASSERT(manager != nullptr);
+  auto prim = GetValueNode<PrimitivePtr>(cnode->input(0));
+  MS_ASSERT(prim != nullptr);
+  if (prim->GetAttr(ops::kFormat) == nullptr) {
+    return lite::RET_OK;
+  }
+  auto format = GetValue<int64_t>(prim->GetAttr(ops::kFormat));
+  auto node_users = manager->node_users()[cnode];
+  for (auto &node_user : node_users) {
+    if (node_user.second != 1) {
+      continue;
+    }
+    if (!utils::isa<CNode>(node_user.first)) {
+      MS_LOG(ERROR) << "post node is not cnode, which is invalid.";
+      return lite::RET_ERROR;
+    }
+    auto post_cnode = node_user.first->cast<CNodePtr>();
+    auto post_prim = GetValueNode<PrimitivePtr>(post_cnode->input(0));
+    MS_ASSERT(post_prim != nullptr);
+    post_prim->AddAttr(ops::kFormat, MakeValue<int64_t>(format));
+  }
+  return lite::RET_OK;
+}
+
 bool DeleteRedundantTranspose::Run(const FuncGraphPtr &func_graph) {
   MS_ASSERT(func_graph != nullptr);
   auto manager = Manage(func_graph, true);
diff --git a/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.h b/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.h
index 41894313d44..71d89e14555 100644
--- a/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.h
+++ b/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.h
@@ -31,6 +31,7 @@ class DeleteRedundantTranspose : public Pass {
  private:
   STATUS DeleteNot4DTranspose(const FuncGraphPtr &func_graph);
   STATUS TransTransFusion(const FuncGraphPtr &func_graph);
+  STATUS UpdateNodeFormat(const FuncGraphPtr &func_graph, const CNodePtr &node);
 };
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/optimizer/format/to_format_base.cc b/mindspore/lite/tools/optimizer/format/to_format_base.cc
index ec39ce5739b..5e46a31c170 100644
--- a/mindspore/lite/tools/optimizer/format/to_format_base.cc
+++ b/mindspore/lite/tools/optimizer/format/to_format_base.cc
@@ -15,10 +15,12 @@
  */
 
 #include "tools/optimizer/format/to_format_base.h"
+#include <set>
 #include "ops/op_utils.h"
 #include "src/common/common.h"
 #include "src/common/utils.h"
 #include "tools/common/tensor_util.h"
+#include "tools/converter/parser/parser_utils.h"
 
 using mindspore::lite::NHWC_SHAPE;
 namespace mindspore {
@@ -67,8 +69,17 @@ STATUS ToFormatBase::GenNewInput(const FuncGraphPtr &func_graph, const CNodePtr
   return lite::RET_OK;
 }
 
-STATUS ToFormatBase::ModifyCNodeAbstract(const CNodePtr &cnode) {
+STATUS ToFormatBase::ModifyCNode(const CNodePtr &cnode) {
   MS_ASSERT(cnode != nullptr);
+  auto prim = GetValueNode<PrimitivePtr>(cnode->input(0));
+  if (prim == nullptr) {
+    MS_LOG(ERROR) << "current node's prim is nullptr, " << cnode->fullname_with_scope();
+    return lite::RET_ERROR;
+  }
+  auto insert_pos = sensitive_ops_[prim->name()];
+  if (insert_pos.empty() || std::find(insert_pos.begin(), insert_pos.end(), 1) != insert_pos.end()) {
+    prim->AddAttr(ops::kFormat, MakeValue<int64_t>(format_));
+  }
   auto abstract_base = cnode->abstract();
   std::vector<AbstractBasePtr> abstracts;
   if (utils::isa<abstract::AbstractTuple>(abstract_base)) {
@@ -216,7 +227,10 @@ STATUS ToFormatBase::HandleGraphInput(const FuncGraphPtr &func_graph) {
 STATUS ToFormatBase::HandleGraphNode(const FuncGraphPtr &func_graph, const CNodePtr &cnode) {
   MS_ASSERT(func_graph != nullptr && cnode != nullptr);
   opt::TransTypePair trans_info;
-  GetTransNodeFormatType(cnode, &trans_info);
+  if (GetTransNodeFormatType(cnode, &trans_info) != lite::RET_OK) {
+    MS_LOG(ERROR) << "obtain node's transferring format type failed, " << cnode->fullname_with_scope();
+    return lite::RET_ERROR;
+  }
   if (trans_info.pre_ == opt::kNONE || trans_info.post_ == opt::kNONE) {
     return lite::RET_NO_CHANGE;
   }
@@ -229,7 +243,7 @@ STATUS ToFormatBase::HandleGraphNode(const FuncGraphPtr &func_graph, const CNode
   if (opt::CheckPrimitiveType(cnode, prim::kPrimAdam) || opt::CheckPrimitiveType(cnode, prim::kPrimSGD)) {
     return lite::RET_OK;
   }
-  if (ModifyCNodeAbstract(cnode) != lite::RET_OK) {
+  if (ModifyCNode(cnode) != lite::RET_OK) {
     MS_LOG(ERROR) << "adjust cnode's output shape failed, " << cnode->fullname_with_scope();
     return lite::RET_ERROR;
   }
@@ -281,6 +295,59 @@ bool ToFormatBase::BasicProcess(const FuncGraphPtr &func_graph, bool main_graph)
   return true;
 }
 
+STATUS ToFormatBase::ConvWeightFormatTrans(const FuncGraphPtr &graph, std::set<AnfNodePtr> *has_visited) {
+  MS_ASSERT(graph != nullptr && has_visited != nullptr);
+  auto node_list = TopoSort(graph->get_return());
+  schema::Format src_format = schema::Format_NUM_OF_FORMAT;
+  schema::Format dst_format = schema::Format_NUM_OF_FORMAT;
+  for (auto &node : node_list) {
+    if (!utils::isa<CNodePtr>(node)) {
+      continue;
+    }
+    auto cnode = node->cast<CNodePtr>();
+    if (CheckPrimitiveType(node, prim::kPrimIf) || CheckPrimitiveType(node, prim::kPrimWhile)) {
+      auto sub_func_graph = GetValueNode<FuncGraphPtr>(cnode->input(1));
+      if (sub_func_graph == nullptr) {
+        lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_NULL_PTR);
+        return false;
+      }
+      if (ConvWeightFormatTrans(sub_func_graph, has_visited) != lite::RET_OK) {
+        MS_LOG(ERROR) << "transform conv weight format failed.";
+        return lite::RET_ERROR;
+      }
+      sub_func_graph = GetValueNode<FuncGraphPtr>(cnode->input(kInputIndexTwo));
+      if (sub_func_graph == nullptr) {
+        lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_NULL_PTR);
+        return false;
+      }
+      if (ConvWeightFormatTrans(sub_func_graph, has_visited) != lite::RET_OK) {
+        MS_LOG(ERROR) << "transform conv weight format failed.";
+        return lite::RET_ERROR;
+      }
+      continue;
+    }
+    if (!CheckPrimitiveType(node, prim::kPrimConv2DFusion) &&
+        !CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) &&
+        !CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion)) {
+      continue;
+    }
+    if (has_visited->find(node) != has_visited->end()) {
+      continue;
+    }
+    has_visited->insert(node);
+    if (DecideConvWeightSrcAndDstFormat(cnode, &src_format, &dst_format) != lite::RET_OK) {
+      MS_LOG(ERROR) << "weight's src format and dst format get failed.";
+      return lite::RET_ERROR;
+    }
+    auto status = lite::UnifyConvWeightFormat(graph, cnode, src_format, dst_format, has_visited);
+    if (status != lite::RET_OK) {
+      MS_LOG(ERROR) << "unify conv weight failed, current node name is " << cnode->fullname_with_scope();
+      return status;
+    }
+  }
+  return lite::RET_OK;
+}
+
 bool ToFormatBase::Run(const FuncGraphPtr &func_graph) {
   MS_ASSERT(func_graph != nullptr);
   if (format_ != mindspore::NHWC && format_ != mindspore::NCHW) {
@@ -297,6 +364,12 @@ bool ToFormatBase::Run(const FuncGraphPtr &func_graph) {
     MS_LOG(ERROR) << "create NodeInferShape object failed.";
     return false;
   }
+  std::set<AnfNodePtr> has_visited;
+  auto status = ConvWeightFormatTrans(func_graph, &has_visited);
+  if (status != lite::RET_OK) {
+    MS_LOG(ERROR) << "Conv2D weight FormatTrans failed: " << status;
+    return false;
+  }
   SetSensitiveOps();
   auto node_list = TopoSort(func_graph->get_return());
   for (auto &node : node_list) {
diff --git a/mindspore/lite/tools/optimizer/format/to_format_base.h b/mindspore/lite/tools/optimizer/format/to_format_base.h
index fc1aeea487e..03a214697f5 100644
--- a/mindspore/lite/tools/optimizer/format/to_format_base.h
+++ b/mindspore/lite/tools/optimizer/format/to_format_base.h
@@ -18,6 +18,7 @@
 #define MINDSPORE_LITE_TOOLS_OPTIMIZER_FORMAT_TO_FORMAT_BASE_H_
 
 #include <memory>
+#include <set>
 #include <string>
 #include <unordered_map>
 #include <vector>
@@ -45,13 +46,16 @@ class ToFormatBase : public Pass {
   STATUS InsertPreTransNode(const FuncGraphPtr &func_graph, const CNodePtr &cnode, const std::vector<int> &perm);
   STATUS GenNewInput(const FuncGraphPtr &func_graph, const CNodePtr &cnode, std::vector<int> perm, bool before,
                      size_t index = 0);
-  STATUS ModifyCNodeAbstract(const CNodePtr &cnode);
+  STATUS ModifyCNode(const CNodePtr &cnode);
+  STATUS ConvWeightFormatTrans(const FuncGraphPtr &graph, std::set<AnfNodePtr> *has_visited);
 
  protected:
-  virtual void GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) = 0;
+  virtual STATUS GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) = 0;
   virtual void SetSensitiveOps() { sensitive_ops_ = opt::GetNHWCOpMap(); }
   virtual bool DecideWhetherHandleGraphInput(const FuncGraphPtr &func_graph, const ShapeVector &shape) { return true; }
   virtual bool DecideWhetherInferShapeForNewNode() { return true; }
+  virtual STATUS DecideConvWeightSrcAndDstFormat(const CNodePtr &cnode, schema::Format *src_format,
+                                                 schema::Format *dst_format) = 0;
   FmkType fmk_type_{lite::converter::FmkType_MS};
   bool train_flag_{false};
   mindspore::Format format_{mindspore::NHWC};
diff --git a/mindspore/lite/tools/optimizer/format/to_nchw_format.cc b/mindspore/lite/tools/optimizer/format/to_nchw_format.cc
index dc5b23f37a5..b7d853e5e13 100644
--- a/mindspore/lite/tools/optimizer/format/to_nchw_format.cc
+++ b/mindspore/lite/tools/optimizer/format/to_nchw_format.cc
@@ -18,16 +18,36 @@
 
 namespace mindspore {
 namespace opt {
-
-void ToNCHWFormat::GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) {
+STATUS ToNCHWFormat::GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) {
   MS_ASSERT(cnode != nullptr);
   auto prim_node = cnode->input(0);
   auto prim = GetValueNode<PrimitivePtr>(prim_node);
   MS_ASSERT(prim != nullptr);
+  if (prim->GetAttr(ops::kFormat) != nullptr) {
+    auto node_format = GetValue<int64_t>(prim->GetAttr(ops::kFormat));
+    if (node_format == mindspore::NCHW) {
+      MS_LOG(DEBUG) << "node's format has been nchw, no need to transfer, " << cnode->fullname_with_scope();
+      return lite::RET_OK;
+    }
+    if (node_format != mindspore::NHWC) {
+      MS_LOG(ERROR) << "node's format is invalid, which must be nhwc or nchw, now is " << node_format
+                    << ", node name is " << cnode->fullname_with_scope();
+      return lite::RET_ERROR;
+    }
+  }
   if (sensitive_ops_.find(prim->name()) != sensitive_ops_.end()) {
     trans_info->pre_ = opt::kNHWC2NCHW;
     trans_info->post_ = opt::kNCHW2NHWC;
   }
+  return lite::RET_OK;
+}
+
+STATUS ToNCHWFormat::DecideConvWeightSrcAndDstFormat(const CNodePtr &cnode, schema::Format *src_format,
+                                                     schema::Format *dst_format) {
+  MS_ASSERT(cnode != nullptr && src_format != nullptr && dst_format != nullptr);
+  *src_format = schema::Format_KHWC;
+  *dst_format = schema::Format_KCHW;
+  return lite::RET_OK;
 }
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/optimizer/format/to_nchw_format.h b/mindspore/lite/tools/optimizer/format/to_nchw_format.h
index 43de093698a..d2e2d000ea6 100644
--- a/mindspore/lite/tools/optimizer/format/to_nchw_format.h
+++ b/mindspore/lite/tools/optimizer/format/to_nchw_format.h
@@ -30,7 +30,9 @@ class ToNCHWFormat : public ToFormatBase {
   ~ToNCHWFormat() = default;
 
  private:
-  void GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) override;
+  STATUS GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) override;
+  STATUS DecideConvWeightSrcAndDstFormat(const CNodePtr &cnode, schema::Format *src_format,
+                                         schema::Format *dst_format) override;
 };
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/optimizer/format/to_nhwc_format.cc b/mindspore/lite/tools/optimizer/format/to_nhwc_format.cc
index 7bf2c613792..33f786772db 100644
--- a/mindspore/lite/tools/optimizer/format/to_nhwc_format.cc
+++ b/mindspore/lite/tools/optimizer/format/to_nhwc_format.cc
@@ -18,15 +18,36 @@
 
 namespace mindspore {
 namespace opt {
-void ToNHWCFormat::GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) {
+STATUS ToNHWCFormat::GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) {
   MS_ASSERT(cnode != nullptr);
   auto prim_node = cnode->input(0);
   auto prim = GetValueNode<PrimitivePtr>(prim_node);
   MS_ASSERT(prim != nullptr);
+  if (prim->GetAttr(ops::kFormat) != nullptr) {
+    auto node_format = GetValue<int64_t>(prim->GetAttr(ops::kFormat));
+    if (node_format == mindspore::NHWC) {
+      MS_LOG(DEBUG) << "node's format has been nhwc, no need to transfer, " << cnode->fullname_with_scope();
+      return lite::RET_OK;
+    }
+    if (node_format != mindspore::NCHW) {
+      MS_LOG(ERROR) << "node's format is invalid, which must be nhwc or nchw, now is " << node_format
+                    << ", node name is " << cnode->fullname_with_scope();
+      return lite::RET_ERROR;
+    }
+  }
   if (sensitive_ops_.find(prim->name()) != sensitive_ops_.end()) {
     trans_info->pre_ = opt::kNCHW2NHWC;
     trans_info->post_ = opt::kNHWC2NCHW;
   }
+  return lite::RET_OK;
+}
+
+STATUS ToNHWCFormat::DecideConvWeightSrcAndDstFormat(const CNodePtr &cnode, schema::Format *src_format,
+                                                     schema::Format *dst_format) {
+  MS_ASSERT(cnode != nullptr && src_format != nullptr && dst_format != nullptr);
+  *src_format = schema::Format_KCHW;
+  *dst_format = schema::Format_KHWC;
+  return lite::RET_OK;
 }
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/optimizer/format/to_nhwc_format.h b/mindspore/lite/tools/optimizer/format/to_nhwc_format.h
index c9c36fff4d4..d16b861b6fc 100644
--- a/mindspore/lite/tools/optimizer/format/to_nhwc_format.h
+++ b/mindspore/lite/tools/optimizer/format/to_nhwc_format.h
@@ -28,7 +28,9 @@ class ToNHWCFormat : public ToFormatBase {
   ~ToNHWCFormat() = default;
 
  private:
-  void GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) override;
+  STATUS GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) override;
+  STATUS DecideConvWeightSrcAndDstFormat(const CNodePtr &cnode, schema::Format *src_format,
+                                         schema::Format *dst_format) override;
 };
 }  // namespace opt
 }  // namespace mindspore
diff --git a/mindspore/lite/tools/optimizer/fusion/batchmatmul_fusion.cc b/mindspore/lite/tools/optimizer/fusion/batchmatmul_fusion.cc
index 795e2845f98..05ce9dd9846 100644
--- a/mindspore/lite/tools/optimizer/fusion/batchmatmul_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/batchmatmul_fusion.cc
@@ -148,7 +148,6 @@ std::shared_ptr<ops::MatMul> BuildMatMulPrim(const CNodePtr &stack_cnode) {
   matmul_cvalue->AddAttr("quant_params", quant_params_holder);
   return matmul_cvalue;
 }
-
 }  // namespace
 const BaseRef BatchMatMulFusion::DefinePattern() const {
   auto pack_var = std::make_shared<CondVar>(IsStackNode);
diff --git a/mindspore/lite/tools/optimizer/fusion/conv_conv_fusion.cc b/mindspore/lite/tools/optimizer/fusion/conv_conv_fusion.cc
index d2cf34f00de..1163e76ad20 100644
--- a/mindspore/lite/tools/optimizer/fusion/conv_conv_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/conv_conv_fusion.cc
@@ -55,6 +55,10 @@ bool IsCommonConvNode(const BaseRef &n) {
 }
 STATUS GenNewConvBias(const ParameterPtr &down_bias_node, const ParameterPtr &down_weight_node,
                       const ParameterPtr &up_bias_node, const ParameterPtr &new_bias_node) {
+  if (down_weight_node == nullptr || up_bias_node == nullptr || new_bias_node == nullptr) {
+    MS_LOG(ERROR) << "Input  down_weight_node or up_bias_node or new_bias_node is nullptr";
+    return RET_FAILED;
+  }
   float *down_bias_data = nullptr;
   if (down_bias_node != nullptr) {
     auto down_bias_param = std::dynamic_pointer_cast<tensor::Tensor>(down_bias_node->default_param());
diff --git a/mindspore/lite/tools/optimizer/fusion/multi_head_attention_fusion.cc b/mindspore/lite/tools/optimizer/fusion/multi_head_attention_fusion.cc
index eb48e8c14c8..bf37a8395d0 100644
--- a/mindspore/lite/tools/optimizer/fusion/multi_head_attention_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/multi_head_attention_fusion.cc
@@ -21,6 +21,7 @@
 namespace mindspore::opt {
 namespace {
 const auto &p1 = std::placeholders::_1;
+const size_t kWeightShapeSize = 2;
 }  // namespace
 
 MultiHeadAttentionFusion::MultiHeadAttentionFusion(const string &name, bool multigraph)
@@ -244,7 +245,8 @@ std::shared_ptr<ops::Attention> MultiHeadAttentionFusion::BuildAttentionPrim(con
     MS_LOG(ERROR) << "Get reshape k data failed";
     return nullptr;
   }
-  if (shape_k.size() < 2 || shape_v.size() < 2 || shape_k.at(shape_k.size() - 2) != shape_v.at(shape_v.size() - 2)) {
+  if (shape_k.size() < kWeightShapeSize || shape_v.size() < kWeightShapeSize ||
+      shape_k.at(shape_k.size() - kWeightShapeSize) != shape_v.at(shape_v.size() - kWeightShapeSize)) {
     MS_LOG(ERROR) << "Shape k or shape v is invalid.";
     return nullptr;
   }
diff --git a/mindspore/lite/tools/optimizer/fusion/tflite_rel_pos_multi_head_attention_fusion.cc b/mindspore/lite/tools/optimizer/fusion/tflite_rel_pos_multi_head_attention_fusion.cc
index 0e01129ec9c..619f7a5d3f5 100644
--- a/mindspore/lite/tools/optimizer/fusion/tflite_rel_pos_multi_head_attention_fusion.cc
+++ b/mindspore/lite/tools/optimizer/fusion/tflite_rel_pos_multi_head_attention_fusion.cc
@@ -23,6 +23,14 @@
 namespace mindspore::opt {
 namespace {
 const auto &p1 = std::placeholders::_1;
+const size_t kWeightQueryIndex = 4;
+const size_t kWeightKeyIndex = 5;
+const size_t kWeightValueIndex = 6;
+const size_t kWeightPosIndex = 7;
+const size_t kWeightOutputIndex = 10;
+const size_t kStackParamSize = 2;
+const size_t kInputSize = 16;
+const size_t kOutputSize = 2;
 }  // namespace
 
 TfliteRelPosMultiHeadAttentionFusion::TfliteRelPosMultiHeadAttentionFusion(const string &name, bool multigraph)
@@ -37,7 +45,7 @@ TfliteRelPosMultiHeadAttentionFusion::TfliteRelPosMultiHeadAttentionFusion(const
   output_prim_ = std::make_shared<CondVar>(std::bind(IsOpType, p1, prim::kPrimFullConnection));
   pos_prim_ = std::make_shared<CondVar>(std::bind(IsOpType, p1, prim::kPrimFullConnection));
 
-  for (size_t i = 0; i < 2; i++) {
+  for (size_t i = 0; i < kStackParamSize; i++) {
     query_stack_params_.emplace_back(std::make_shared<Var>());
     key_stack_params_.emplace_back(std::make_shared<Var>());
     value_stack_params_.emplace_back(std::make_shared<Var>());
@@ -157,38 +165,38 @@ CNodePtr TfliteRelPosMultiHeadAttentionFusion::CreateRelPosMultiHeadAttentionNod
     MS_LOG(ERROR) << "Build attention primitive failed.";
     return nullptr;
   }
-  auto quant_params_holder = std::make_shared<lite::QuantParamHolder>(16, 1);
+  auto quant_params_holder = std::make_shared<lite::QuantParamHolder>(kInputSize, kOutputSize);
   auto query_prim = GetValueNode<PrimitivePtr>(utils::cast<AnfNodePtr>((*equiv)[query_prim_]));
   auto query_quant_param_holder = query_prim->GetAttr("quant_params");
   if (query_quant_param_holder != nullptr) {
     quant_params_holder->set_input_quant_param(
-      4, query_quant_param_holder->cast<lite::QuantParamHolderPtr>()->get_input_quant_params().at(1));
+      kWeightQueryIndex, query_quant_param_holder->cast<lite::QuantParamHolderPtr>()->get_input_quant_params().at(1));
   }
   auto key_prim = GetValueNode<PrimitivePtr>(utils::cast<AnfNodePtr>((*equiv)[key_prim_]));
   auto key_quant_param_holder = key_prim->GetAttr("quant_params");
   if (key_quant_param_holder != nullptr) {
     quant_params_holder->set_input_quant_param(
-      5, key_quant_param_holder->cast<lite::QuantParamHolderPtr>()->get_input_quant_params().at(1));
+      kWeightKeyIndex, key_quant_param_holder->cast<lite::QuantParamHolderPtr>()->get_input_quant_params().at(1));
   }
   auto value_prim = GetValueNode<PrimitivePtr>(utils::cast<AnfNodePtr>((*equiv)[value_prim_]));
   auto value_quant_param_holder = value_prim->GetAttr("quant_params");
   if (value_quant_param_holder != nullptr) {
     quant_params_holder->set_input_quant_param(
-      6, value_quant_param_holder->cast<lite::QuantParamHolderPtr>()->get_input_quant_params().at(1));
+      kWeightValueIndex, value_quant_param_holder->cast<lite::QuantParamHolderPtr>()->get_input_quant_params().at(1));
   }
 
   auto pos_prim = GetValueNode<PrimitivePtr>(utils::cast<AnfNodePtr>((*equiv)[pos_prim_]));
   auto pos_quant_param_holder = pos_prim->GetAttr("quant_params");
   if (pos_quant_param_holder != nullptr) {
     quant_params_holder->set_input_quant_param(
-      7, pos_quant_param_holder->cast<lite::QuantParamHolderPtr>()->get_input_quant_params().at(1));
+      kWeightPosIndex, pos_quant_param_holder->cast<lite::QuantParamHolderPtr>()->get_input_quant_params().at(1));
   }
 
   auto output_prim = GetValueNode<PrimitivePtr>(utils::cast<AnfNodePtr>((*equiv)[output_prim_]));
   auto output_quant_param_holder = output_prim->GetAttr("quant_params");
   if (output_quant_param_holder != nullptr) {
     quant_params_holder->set_input_quant_param(
-      10, output_quant_param_holder->cast<lite::QuantParamHolderPtr>()->get_input_quant_params().at(1));
+      kWeightOutputIndex, output_quant_param_holder->cast<lite::QuantParamHolderPtr>()->get_input_quant_params().at(1));
   }
 
   attention_prim->AddAttr("quant_params", quant_params_holder);
@@ -273,7 +281,7 @@ const VectorRef TfliteRelPosMultiHeadAttentionFusion::DefineProcessInputPattern(
     result = VectorRef({std::make_shared<CondVar>(std::bind(IsOpType, p1, prim::kPrimAddFusion)), result, bias});
   }
 
-  MS_ASSERT(stack_params.size() == 2);
+  MS_ASSERT(stack_params.size() == kStackParamSize);
   auto stack = VectorRef({std::make_shared<CondVar>(std::bind(IsOpType, p1, prim::kPrimStack)), std::make_shared<Var>(),
                           std::make_shared<Var>(), stack_params.at(0), stack_params.at(1)});
   result = VectorRef({std::make_shared<CondVar>(std::bind(IsOpType, p1, prim::kPrimReshape)), result, stack});
diff --git a/mindspore/lite/tools/optimizer/graph/node_infershape.cc b/mindspore/lite/tools/optimizer/graph/node_infershape.cc
index 6e11780ff7b..c34d8bc8c56 100644
--- a/mindspore/lite/tools/optimizer/graph/node_infershape.cc
+++ b/mindspore/lite/tools/optimizer/graph/node_infershape.cc
@@ -43,20 +43,6 @@ void FreeTensors(std::vector<lite::Tensor *> *tensors) {
   tensors->resize(0);
 }
 
-void SetConvWeightFormat(const CNodePtr &cnode, const std::vector<lite::Tensor *> &inputs) {
-  MS_ASSERT(cnode != nullptr);
-  if (!CheckPrimitiveType(cnode, prim::kPrimConv2DFusion) &&
-      !CheckPrimitiveType(cnode, kPrimConv2DBackpropInputFusion) &&
-      !CheckPrimitiveType(cnode, prim::kPrimConv2dTransposeFusion)) {
-    return;
-  }
-  auto prim = GetValueNode<PrimitivePtr>(cnode->input(0));
-  MS_ASSERT(prim != nullptr);
-  if (prim->GetAttr(ops::kFormat) != nullptr && inputs.size() > 1) {
-    inputs[1]->set_format(static_cast<mindspore::Format>(GetValue<int64_t>(prim->GetAttr(ops::kFormat))));
-  }
-}
-
 void RectifyFormat(const CNodePtr &cnode, const std::vector<lite::Tensor *> &inputs, FmkType fmk_type) {
   MS_ASSERT(cnode != nullptr);
   if (fmk_type != lite::converter::FmkType_ONNX) {
@@ -114,7 +100,6 @@ STATUS NodeInferShape::InferShape(const CNodePtr &cnode) {
     MS_LOG(ERROR) << "get inputs failed.";
     return lite::RET_ERROR;
   }
-  SetConvWeightFormat(cnode, inputs);
   if (GetCNodeOutputTensors(cnode, &outputs) != lite::RET_OK) {
     FreeTensors(&inputs);
     FreeTensors(&outputs);
diff --git a/mindspore/lite/tools/optimizer/graph/slice_prepose_pass.cc b/mindspore/lite/tools/optimizer/graph/slice_prepose_pass.cc
index 0881ec0af45..ad862310a21 100644
--- a/mindspore/lite/tools/optimizer/graph/slice_prepose_pass.cc
+++ b/mindspore/lite/tools/optimizer/graph/slice_prepose_pass.cc
@@ -401,6 +401,10 @@ bool SlicePreposePass::SiblingsAreSameSlice(const FuncGraphPtr &graph, const Nod
 
   auto first_slice_cnode = slices.front();
   auto first_slice_node = GetSlice(first_slice_cnode);
+  if (first_slice_node == nullptr) {
+    MS_LOG(ERROR) << "GetSlice return nullptr";
+    return false;
+  }
   auto first_axes = first_slice_node->get_axes();
   auto first_begin = GetSliceBeginAndSize(first_slice_cnode, SliceBeginIndex);
   auto first_size = GetSliceBeginAndSize(first_slice_cnode, SliceSizeIndex);
diff --git a/mindspore/nn/acc/base.py b/mindspore/nn/acc/base.py
index a0be25582d6..b8c5587d7c3 100644
--- a/mindspore/nn/acc/base.py
+++ b/mindspore/nn/acc/base.py
@@ -133,56 +133,62 @@ class ParameterProcess:
 
         if isinstance(origin_params_copy[0], Parameter):
             group_params = [{"params": parameters}]
-        else:
-            group_params = []
-            params_name = [param.name for param in parameters]
-            new_params_count = copy.deepcopy(params_name)
-            new_params_clone = {}
-            max_key_number = 0
-            for group_param in origin_params_copy:
-                if 'order_params' in group_param.keys():
-                    new_group_param = copy.deepcopy(group_param)
-                    new_group_param['order_params'] = parameters
-                    group_params.append(new_group_param)
-                    continue
-                params_value = []
-                for param in group_param['params']:
-                    if param.name in params_name:
-                        index = params_name.index(param.name)
-                        params_value.append(parameters[index])
-                        new_params_count.remove(param.name)
+            return group_params
+
+        group_params = []
+        params_name = [param.name for param in parameters]
+        new_params_count = copy.deepcopy(params_name)
+        new_params_clone = {}
+        max_key_number = 0
+        for group_param in origin_params_copy:
+            if 'order_params' in group_param.keys():
                 new_group_param = copy.deepcopy(group_param)
-                new_group_param['params'] = params_value
+                new_group_param['order_params'] = parameters
                 group_params.append(new_group_param)
-                if len(group_param.keys()) > max_key_number:
-                    max_key_number = len(group_param.keys())
-                    new_params_clone = copy.deepcopy(group_param)
-            if new_params_count:
-                params_value = []
-                for param in new_params_count:
-                    index = params_name.index(param)
+                continue
+            params_value = []
+            for param in group_param['params']:
+                if param.name in params_name:
+                    index = params_name.index(param.name)
                     params_value.append(parameters[index])
-                if new_params_clone:
-                    new_params_clone['params'] = params_value
-                    group_params.append(new_params_clone)
-                else:
-                    group_params.append({"params": params_value})
+                    new_params_count.remove(param.name)
+            new_group_param = copy.deepcopy(group_param)
+            new_group_param['params'] = params_value
+            group_params.append(new_group_param)
+            if len(group_param.keys()) > max_key_number:
+                max_key_number = len(group_param.keys())
+                new_params_clone = copy.deepcopy(group_param)
+        if new_params_count:
+            params_value = []
+            for param in new_params_count:
+                index = params_name.index(param)
+                params_value.append(parameters[index])
+            if new_params_clone:
+                new_params_clone['params'] = params_value
+                group_params.append(new_params_clone)
+            else:
+                group_params.append({"params": params_value})
         return group_params
 
+
 _gradient_accumulation_op = C.MultitypeFuncGraph("gradient_accumulation_op")
 
+
 @_gradient_accumulation_op.register("Int64", "Tensor", "Tensor")
 def _cumulative_grad(accumulation_step, cumulative_grad, grad):
     """Apply gradient accumulation to cumulative grad."""
     return P.AssignAdd()(cumulative_grad, grad / accumulation_step)
 
+
 _gradient_clear_op = C.MultitypeFuncGraph("gradient_clear_op")
 
+
 @_gradient_clear_op.register("Tensor")
 def  _clear_grad(cumulative_grad):
     zero_grad = P.ZerosLike()(cumulative_grad)
     return F.assign(cumulative_grad, zero_grad)
 
+
 class GradientAccumulation(Cell):
     """
     After accumulating the gradients of multiple steps, call to optimize its update.
diff --git a/mindspore/nn/acc/grad_freeze.py b/mindspore/nn/acc/grad_freeze.py
index dd8835953ec..8e84d4f12ab 100644
--- a/mindspore/nn/acc/grad_freeze.py
+++ b/mindspore/nn/acc/grad_freeze.py
@@ -243,6 +243,7 @@ class GradientFreeze:
 
         return network, optimizer
 
+
 def freeze_cell(reducer_flag, network, optimizer, sens, grad, use_grad_accumulation, mean=None, degree=None,
                 max_accumulation_step=1):
     """Provide freeze network cell."""
diff --git a/mindspore/nn/acc/less_batch_normalization.py b/mindspore/nn/acc/less_batch_normalization.py
index c2c6683afef..d1d35b4a94d 100644
--- a/mindspore/nn/acc/less_batch_normalization.py
+++ b/mindspore/nn/acc/less_batch_normalization.py
@@ -81,6 +81,7 @@ class CommonHeadLastFN(Cell):
         x = self.multiplier * x
         return x
 
+
 class LessBN(Cell):
     """
     Reduce the number of BN automatically to improve the network performance
diff --git a/mindspore/nn/cell.py b/mindspore/nn/cell.py
index 46ed2ce34d5..8ab61f3a042 100755
--- a/mindspore/nn/cell.py
+++ b/mindspore/nn/cell.py
@@ -1247,17 +1247,18 @@ class Cell(Cell_):
         for param in params:
             param.set_param_ps(init_in_server)
 
-    def set_param_fl(self, push_to_server=False, pull_from_server=False):
+    def set_param_fl(self, push_to_server=False, pull_from_server=False, requires_aggr=True):
         """
         Set the way of parameter and server interaction.
 
         Args:
             push_to_server (bool): Whether the parameter should be pushed to server. Default: False.
             pull_from_server (bool): Whether the parameter should be pulled from server. Default: False.
+            requires_aggr (bool): Whether the parameter should be aggregated in the server. Default: True.
         """
         params = self.parameters_and_names()
         for param in params:
-            param[1].set_param_fl(push_to_server, pull_from_server)
+            param[1].set_param_fl(push_to_server, pull_from_server, requires_aggr)
 
     def set_comm_fusion(self, fusion_type, recurse=True):
         """
@@ -1403,8 +1404,7 @@ class GraphCell(Cell):
     Examples:
         >>> import numpy as np
         >>> import mindspore.nn as nn
-        >>> from mindspore import Tensor
-        >>> from mindspore.train import export, load
+        >>> from mindspore import Tensor, export, load
         >>>
         >>> net = nn.Conv2d(1, 1, kernel_size=3, weight_init="ones")
         >>> input = Tensor(np.ones([1, 1, 3, 3]).astype(np.float32))
diff --git a/mindspore/nn/layer/activation.py b/mindspore/nn/layer/activation.py
index b947e5eb873..634d3d0ee07 100644
--- a/mindspore/nn/layer/activation.py
+++ b/mindspore/nn/layer/activation.py
@@ -40,6 +40,7 @@ __all__ = ['Softmax',
            'ELU',
            'LogSigmoid',
            'SoftShrink',
+           'HShrink',
            ]
 
 
@@ -803,6 +804,51 @@ class SoftShrink(Cell):
         output = self.softshrink(input_x)
         return output
 
+class HShrink(Cell):
+    r"""
+    Applies the hard shrinkage function element-wise, each element complies the follow function:
+
+    .. math::
+        \text{HardShrink}(x) =
+        \begin{cases}
+        x, & \text{ if } x > \lambda \\
+        x, & \text{ if } x < -\lambda \\
+        0, & \text{ otherwise }
+        \end{cases}
+
+    Args:
+        lambd (float): The value for the HardShrink formulation. Default: 0.5
+
+    Inputs:
+        - **input_x** (Tensor) - The input of HardShrink with data type of float16 or float32.
+
+    Outputs:
+        Tensor, the same shape and data type as the input.
+
+    Supported Platforms:
+        ``Ascend``
+
+    Raises:
+        TypeError: If `lambd` is not a float.
+        TypeError: If dtype of `input_x` is neither float16 nor float32.
+
+    Examples:
+        >>> input_x = Tensor(np.array([[ 0.5,  1,  2.0],[0.0533,0.0776,-2.1233]]),mstype.float32)
+        >>> hshrink = nn.HShrink()
+        >>> output = hshrink(input_x)
+        >>> print(output)
+        [[ 0.      1.      2.    ]
+        [ 0.      0.     -2.1233]]
+    """
+
+    def __init__(self, lambd=0.5):
+        super(HShrink, self).__init__()
+        self.hshrink = P.HShrink(lambd)
+
+    def construct(self, input_x):
+        return self.hshrink(input_x)
+
+
 _activation = {
     'softmax': Softmax,
     'logsoftmax': LogSoftmax,
@@ -819,6 +865,7 @@ _activation = {
     'hsigmoid': HSigmoid,
     'logsigmoid': LogSigmoid,
     'softshrink': SoftShrink,
+    'hshrink': HShrink,
 }
 
 
diff --git a/mindspore/nn/loss/__init__.py b/mindspore/nn/loss/__init__.py
index d0c87236362..1bd4bc7714d 100644
--- a/mindspore/nn/loss/__init__.py
+++ b/mindspore/nn/loss/__init__.py
@@ -19,13 +19,13 @@ Cells of loss function. Loss function in machine learning is the target of the m
 It shows how well the model works on a dataset and the optimization target which the optimizer is searching.
 """
 
-from .loss import LossBase, L1Loss, MSELoss, SmoothL1Loss, FocalLoss,\
+from .loss import LossBase, L1Loss, MSELoss, SmoothL1Loss, SoftMarginLoss, FocalLoss,\
     SoftmaxCrossEntropyWithLogits, BCELoss, CosineEmbeddingLoss, \
     SampledSoftmaxLoss, DiceLoss, BCEWithLogitsLoss, MultiClassDiceLoss,\
     RMSELoss, MAELoss
 
 
-__all__ = ['LossBase', 'L1Loss', 'MSELoss', 'SmoothL1Loss', 'FocalLoss',
+__all__ = ['LossBase', 'L1Loss', 'MSELoss', 'SmoothL1Loss', 'SoftMarginLoss', 'FocalLoss',
            'SoftmaxCrossEntropyWithLogits', 'BCELoss', 'BCEWithLogitsLoss',
            'CosineEmbeddingLoss', 'SampledSoftmaxLoss', 'DiceLoss', 'MultiClassDiceLoss',
            'RMSELoss', 'MAELoss']
diff --git a/mindspore/nn/loss/loss.py b/mindspore/nn/loss/loss.py
index bbf0adfe61f..29acf71030f 100644
--- a/mindspore/nn/loss/loss.py
+++ b/mindspore/nn/loss/loss.py
@@ -436,6 +436,53 @@ class SmoothL1Loss(LossBase):
         return self.smooth_l1_loss(base, target)
 
 
+class SoftMarginLoss(LossBase):
+    r"""
+    A loss class for two-class classification problems.
+
+    SoftMarginLoss creates a criterion that optimizes a two-class classification
+    logistic loss between input tensor :math:`x` and target tensor :math:`y`
+    (containing 1 or -1).
+
+    .. math::
+        \text{loss}(x, y) = \sum_i \frac{\log(1 + \exp(-y[i]*x[i]))}{\text{x.nelement}()}
+
+    Args:
+        reduction (str): Apply specific reduction method to the output: 'none', 'mean', 'sum'. Default: "mean".
+
+    Inputs:
+        - **logits** (Tensor) - Predict data. Data type must be float16 or float32.
+        - **labels** (Tensor) - Ground truth data, with the same type and shape as `logits`.
+
+    Outputs:
+        Tensor or Scalar, if `reduction` is "none", its shape is the same as `logits`.
+        Otherwise, a scalar value will be returned.
+
+    Raises:
+        TypeError: If `logits` or `labels` is not a Tensor.
+        TypeError: If dtype of `logits` or `labels` is neither float16 nor float32.
+        ValueError: If shape of `logits` is not the same as `labels`.
+        ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
+
+    Supported Platforms:
+        ``Ascend``
+
+    Examples:
+        >>> loss = ops.SoftMarginLoss()
+        >>> logits = Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]), mindspore.float32)
+        >>> labels = Tensor(np.array([[-1, 1], [1, -1]]), mindspore.float32)
+        >>> output = loss(logits, labels)
+        >>> print(output)
+        0.6764238
+    """
+    def __init__(self, reduction='mean'):
+        super(SoftMarginLoss, self).__init__()
+        self.soft_margin_loss = P.SoftMarginLoss(reduction)
+
+    def construct(self, base, target):
+        return self.soft_margin_loss(base, target)
+
+
 class SoftmaxCrossEntropyWithLogits(LossBase):
     r"""
     Computes softmax cross entropy between logits and labels.
@@ -1282,10 +1329,10 @@ class FocalLoss(LossBase):
                 convert_weight = self.squeeze(convert_weight)
             log_probability = log_probability * convert_weight
 
-        weight = F.pows(-probability + 1.0, self.gamma)
+        weight = F.pows(-1 * probability + 1.0, self.gamma)
         if target.shape[1] == 1:
-            loss = (-weight * log_probability).mean(axis=1)
+            loss = (-1 * weight * log_probability).mean(axis=1)
         else:
-            loss = (-weight * targets * log_probability).mean(axis=-1)
+            loss = (-1 * weight * targets * log_probability).mean(axis=-1)
 
         return self.get_loss(loss)
diff --git a/mindspore/nn/wrap/grad_reducer.py b/mindspore/nn/wrap/grad_reducer.py
index f10e9e640f1..07f363da3d0 100644
--- a/mindspore/nn/wrap/grad_reducer.py
+++ b/mindspore/nn/wrap/grad_reducer.py
@@ -101,7 +101,6 @@ def _tensors_allreduce(degree, mean, allgather, allreduce, allreduce_filter, gra
 
 
 @reduce_opt.register("Tensor", "Bool", "Bool", "Tensor")
-
 def _tensors_allreduce_post(degree, mean, allreduce_filter, grad):
     """
     Apply allreduce on gradient in PyNative mode.
@@ -125,7 +124,6 @@ def _tensors_allreduce_post(degree, mean, allreduce_filter, grad):
 
 
 @reduce_opt.register("Tensor", "Bool", "Function", "Function", "Bool", "Tensor", "Bool")
-
 def _tensors_allreduce_ps(degree, mean, allgather, allreduce, allreduce_filter, grad, ps_parameter):
     """
     Apply allreduce on gradient.
@@ -154,7 +152,6 @@ def _tensors_allreduce_ps(degree, mean, allgather, allreduce, allreduce_filter,
 
 
 @reduce_opt.register("Tensor", "Bool", "Function", "Function", "Bool", "RowTensor")
-
 def _tensors_allreduce_with_sparse(degree, mean, allgather, allreduce, allreduce_filter, grad):
     """
     Apply allgather on gradient instead of allreduce for sparse feature.
@@ -181,7 +178,6 @@ def _tensors_allreduce_with_sparse(degree, mean, allgather, allreduce, allreduce
 
 
 @reduce_opt.register("Tensor", "Bool", "Function", "Function", "Bool", "RowTensor", "Bool")
-
 def _tensors_allreduce_with_sparse_ps(degree, mean, allgather, allreduce, allreduce_filter, grad, ps_parameter):
     """
     Apply allgather on gradient instead of allreduce for sparse feature.
@@ -215,7 +211,6 @@ _get_datatype = C.MultitypeFuncGraph("_get_datatype")
 
 
 @_get_datatype.register("Tensor")
-
 def _tensors_get_datatype(grad):
     """
     Acquire gradient datatype.
@@ -230,7 +225,6 @@ def _tensors_get_datatype(grad):
 
 
 @_get_datatype.register("RowTensor")
-
 def _tensors_get_datatype_with_sparse(grad):
     """
     Acquire gradient datatype.
@@ -248,7 +242,6 @@ _cast_datatype = C.MultitypeFuncGraph("_cast_datatype")
 
 
 @_cast_datatype.register("TypeType", "Tensor")
-
 def _tensors_cast_datatype(datatype, grad):
     """
     Cast gradient to datatype.
@@ -264,7 +257,6 @@ def _tensors_cast_datatype(datatype, grad):
 
 
 @_cast_datatype.register("TypeType", "RowTensor")
-
 def _tensors_cast_datatype_with_sparse(datatype, grad):
     """
     Cast gradient to datatype.
diff --git a/mindspore/nn/wrap/loss_scale.py b/mindspore/nn/wrap/loss_scale.py
index 735ef2edcec..eeecc30d60c 100644
--- a/mindspore/nn/wrap/loss_scale.py
+++ b/mindspore/nn/wrap/loss_scale.py
@@ -30,12 +30,11 @@ reciprocal = P.Reciprocal()
 
 
 @_grad_scale.register("Tensor", "Tensor")
-
 def tensor_grad_scale(scale, grad):
     return grad * F.cast(reciprocal(scale), F.dtype(grad))
 
-@_grad_scale.register("Tensor", "RowTensor")
 
+@_grad_scale.register("Tensor", "RowTensor")
 def tensor_grad_scale_row_tensor(scale, grad):
     return RowTensor(grad.indices,
                      grad.values * F.cast(reciprocal(scale), F.dtype(grad.values)),
@@ -46,12 +45,11 @@ grad_overflow = P.FloatStatus()
 
 
 @_grad_overflow.register("Tensor")
-
 def _tensor_grad_overflow(grad):
     return grad_overflow(grad)
 
-@_grad_overflow.register("RowTensor")
 
+@_grad_overflow.register("RowTensor")
 def _tensor_grad_overflow_row_tensor(grad):
     return grad_overflow(grad.values)
 
@@ -88,15 +86,14 @@ class DynamicLossScaleUpdateCell(Cell):
     Examples:
         >>> import numpy as np
         >>> from mindspore import Tensor, Parameter, nn
-        >>> from mindspore.ops import operations as P
-        >>> from mindspore.nn.wrap.cell_wrapper import WithLossCell
+        >>> import mindspore.ops as ops
         >>>
         >>> class Net(nn.Cell):
         ...     def __init__(self, in_features, out_features):
         ...         super(Net, self).__init__()
         ...         self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
         ...                                 name='weight')
-        ...         self.matmul = P.MatMul()
+        ...         self.matmul = ops.MatMul()
         ...
         ...     def construct(self, x):
         ...         output = self.matmul(x, self.weight)
@@ -106,7 +103,7 @@ class DynamicLossScaleUpdateCell(Cell):
         >>> net = Net(in_features, out_features)
         >>> loss = nn.MSELoss()
         >>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
-        >>> net_with_loss = WithLossCell(net, loss)
+        >>> net_with_loss = nn.WithLossCell(net, loss)
         >>> manager = nn.DynamicLossScaleUpdateCell(loss_scale_value=2**12, scale_factor=2, scale_window=1000)
         >>> train_network = nn.TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=manager)
         >>> input = Tensor(np.ones([out_features, in_features]), mindspore.float32)
@@ -179,15 +176,14 @@ class FixedLossScaleUpdateCell(Cell):
     Examples:
         >>> import numpy as np
         >>> from mindspore import Tensor, Parameter, nn
-        >>> from mindspore.ops import operations as P
-        >>> from mindspore.nn.wrap.cell_wrapper import WithLossCell
+        >>> from mindspore.ops as ops
         >>>
         >>> class Net(nn.Cell):
         ...     def __init__(self, in_features, out_features):
         ...         super(Net, self).__init__()
         ...         self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
         ...                                 name='weight')
-        ...         self.matmul = P.MatMul()
+        ...         self.matmul = ops.MatMul()
         ...
         ...     def construct(self, x):
         ...         output = self.matmul(x, self.weight)
@@ -197,7 +193,7 @@ class FixedLossScaleUpdateCell(Cell):
         >>> net = Net(in_features, out_features)
         >>> loss = nn.MSELoss()
         >>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
-        >>> net_with_loss = WithLossCell(net, loss)
+        >>> net_with_loss = nn.WithLossCell(net, loss)
         >>> manager = nn.FixedLossScaleUpdateCell(loss_scale_value=2**12)
         >>> train_network = nn.TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=manager)
         >>> input = Tensor(np.ones([out_features, in_features]), mindspore.float32)
@@ -253,16 +249,15 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
     Examples:
         >>> import numpy as np
         >>> from mindspore import Tensor, Parameter, nn
-        >>> from mindspore.ops import operations as P
-        >>> from mindspore.nn.wrap.cell_wrapper import WithLossCell
-        >>> from mindspore.common import dtype as mstype
+        >>> from mindspore.ops as ops
+        >>> from mindspore import dtype as mstype
         >>>
         >>> class Net(nn.Cell):
         ...     def __init__(self, in_features, out_features):
         ...         super(Net, self).__init__()
         ...         self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
         ...                                 name='weight')
-        ...         self.matmul = P.MatMul()
+        ...         self.matmul = ops.MatMul()
         ...
         ...     def construct(self, x):
         ...         output = self.matmul(x, self.weight)
@@ -273,7 +268,7 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
         >>> net = Net(in_features, out_features)
         >>> loss = nn.MSELoss()
         >>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
-        >>> net_with_loss = WithLossCell(net, loss)
+        >>> net_with_loss = nn.WithLossCell(net, loss)
         >>> manager = nn.DynamicLossScaleUpdateCell(loss_scale_value=2**12, scale_factor=2, scale_window=1000)
         >>> train_network = nn.TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=manager)
         >>> input = Tensor(np.ones([out_features, in_features]), mindspore.float32)
@@ -284,7 +279,7 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell):
         >>> net = Net(in_features, out_features)
         >>> loss = nn.MSELoss()
         >>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
-        >>> net_with_loss = WithLossCell(net, loss)
+        >>> net_with_loss = nn.WithLossCell(net, loss)
         >>> inputs = Tensor(np.ones([size, in_features]).astype(np.float32))
         >>> label = Tensor(np.zeros([size, out_features]).astype(np.float32))
         >>> scaling_sens = Tensor(np.full((1), np.finfo(np.float32).max), dtype=mstype.float32)
diff --git a/mindspore/numpy/array_creations.py b/mindspore/numpy/array_creations.py
index 3861a35b06c..b6c0bc4f537 100644
--- a/mindspore/numpy/array_creations.py
+++ b/mindspore/numpy/array_creations.py
@@ -49,6 +49,7 @@ _reduce_min_keepdims = P.ReduceMin(True)
 _reduce_max_keepdims = P.ReduceMax(True)
 _reduce_mean_keepdims = P.ReduceMean(True)
 
+
 def array(obj, dtype=None, copy=True, ndmin=0):
     """
     Creates a tensor.
@@ -89,7 +90,7 @@ def array(obj, dtype=None, copy=True, ndmin=0):
             _raise_value_error("Empty tensor cannot be expanded beyond the current dimension.")
         res = _expand(res, ndmin)
 
-    if copy:
+    if copy and isinstance(obj, Tensor):
         res = copy_(res)
     elif dtype is not None and dtype != res.dtype:
         res = res.astype(dtype)
@@ -2208,17 +2209,14 @@ def _pad_linear(arr, pad_width, end_values):
     dtype = arr.dtype
     end_values = _convert_pad_to_nd(end_values, ndim)
     for i in range(ndim):
-        # shape [..., 1, ...]
         left_value = _slice_along_axis(arr, i, 0, 1)
         right_value = _slice_along_axis(arr, i, shape[i]-1, shape[i])
         pad_before = ()
         pad_after = ()
         if pad_width[i][0] > 0:
-            # shape [..., pad_width[i][0], ...]
             pad_before = (linspace(end_values[i][0], left_value, num=pad_width[i][0],
                                    endpoint=False, dtype=dtype, axis=i).squeeze(i+1),)
         if pad_width[i][1] > 0:
-            # shape [..., pad_width[i][1], ...]
             pad_after = linspace(right_value, end_values[i][1], num=pad_width[i][1]+1,
                                  endpoint=True, dtype=dtype, axis=i).squeeze(i+1)
             pad_after = (_slice_along_axis(pad_after, i, 1, pad_width[i][1]+1),)
@@ -2227,6 +2225,58 @@ def _pad_linear(arr, pad_width, end_values):
     return arr
 
 
+def _add_pads_before(arr, pad_args, mode):
+    """handle pads before the array"""
+    idx, array_length, times_to_pad_before, additional_pad_before, reflect_type = pad_args
+    curr_pad = None
+    endpoint_adder = None
+    edge_before = _slice_along_axis(arr, idx, 0, 1)
+    if mode == "reflect":
+        endpoint_adder = 1
+    else:
+        endpoint_adder = 0
+    # Deal with paddings before the original array
+    for times in range(times_to_pad_before):
+        if times < times_to_pad_before - 1:
+            endpoint = array_length
+        else:
+            endpoint = additional_pad_before + endpoint_adder
+        if endpoint != endpoint_adder:
+            curr_pad = _slice_along_axis(arr, idx, endpoint_adder, endpoint)
+            curr_pad = flip(curr_pad, axis=idx)
+            if reflect_type == "odd":
+                curr_pad = 2 * edge_before - curr_pad
+            arr = P.Concat(idx)((curr_pad, arr))
+            edge_before = _slice_along_axis(arr, idx, 0, 1)
+    return arr
+
+
+def _add_pads_after(arr, pad_args, mode):
+    """handle pads after the array"""
+    idx, array_length, times_to_pad_after, additional_pad_after, reflect_type = pad_args
+    curr_pad = None
+    endpoint_adder = None
+    edge_end = _slice_along_axis(arr, idx, arr.shape[idx]-1, arr.shape[idx])
+    if mode == "reflect":
+        endpoint_adder = 1
+    else:
+        endpoint_adder = 0
+    # Deal with paddings after the original array
+    for times in range(times_to_pad_after):
+        if times < times_to_pad_after - 1:
+            startpoint = arr.shape[idx] - array_length
+        else:
+            startpoint = arr.shape[idx] - additional_pad_after - endpoint_adder
+        if startpoint != arr.shape[idx] - endpoint_adder:
+            curr_pad = _slice_along_axis(arr, idx, startpoint, arr.shape[idx] - endpoint_adder)
+            curr_pad = flip(curr_pad, axis=idx)
+            if reflect_type == "odd":
+                curr_pad = 2 * edge_end - curr_pad
+            arr = P.Concat(idx)((arr, curr_pad))
+            edge_end = _slice_along_axis(arr, idx, arr.shape[idx]-1, arr.shape[idx])
+    return arr
+
+
 def _pad_symmetric(arr, pad_width, reflect_type):
     """pad the array with symmetric paddings"""
     for i in range(arr.ndim):
@@ -2235,41 +2285,18 @@ def _pad_symmetric(arr, pad_width, reflect_type):
         has_pad_before = (pad_width[i][0] > 0)
         has_pad_after = (pad_width[i][1] > 0)
 
-        edge_before = _slice_along_axis(arr, i, 0, 1)
-        edge_end = _slice_along_axis(arr, i, array_length-1, array_length)
         times_to_pad_before = pad_width[i][0] // array_length + 1
         additional_pad_before = pad_width[i][0] % array_length
         times_to_pad_after = pad_width[i][1] // array_length + 1
         additional_pad_after = pad_width[i][1] % array_length
-        curr_pad = None
         if has_pad_before:
             # Deal with paddings before the original array
-            for times in range(times_to_pad_before):
-                if times < times_to_pad_before - 1:
-                    endpoint = array_length
-                else:
-                    endpoint = additional_pad_before
-                if endpoint != 0:
-                    curr_pad = _slice_along_axis(arr, i, 0, endpoint)
-                    curr_pad = flip(curr_pad, axis=i)
-                    if reflect_type == "odd":
-                        curr_pad = 2 * edge_before - curr_pad
-                    arr = P.Concat(i)((curr_pad, arr))
-                    edge_before = _slice_along_axis(arr, i, 0, 1)
+            pad_args = (i, array_length, times_to_pad_before, additional_pad_before, reflect_type)
+            arr = _add_pads_before(arr, pad_args, "symmetric")
         if has_pad_after:
             # Deal with paddings after the original array
-            for times in range(times_to_pad_after):
-                if times < times_to_pad_after - 1:
-                    startpoint = arr.shape[i] - array_length
-                else:
-                    startpoint = arr.shape[i] - additional_pad_after
-                if startpoint != arr.shape[i]:
-                    curr_pad = _slice_along_axis(arr, i, startpoint, arr.shape[i])
-                    curr_pad = flip(curr_pad, axis=i)
-                    if reflect_type == "odd":
-                        curr_pad = 2 * edge_end - curr_pad
-                    arr = P.Concat(i)((arr, curr_pad))
-                    edge_end = _slice_along_axis(arr, i, arr.shape[i]-1, arr.shape[i])
+            pad_args = (i, array_length, times_to_pad_after, additional_pad_after, reflect_type)
+            arr = _add_pads_after(arr, pad_args, "symmetric")
     return arr
 
 
@@ -2278,7 +2305,6 @@ def _pad_reflect(arr, pad_width, reflect_type):
     pad the array with reflect paddings, this is very similar to symmetric paddings,
     but differs at how edges are selected.
     """
-    # pylint: disable=too-many-nested-blocks
     for i in range(arr.ndim):
         array_length = arr.shape[i]
         if array_length == 1:
@@ -2288,42 +2314,19 @@ def _pad_reflect(arr, pad_width, reflect_type):
             has_pad_before = (pad_width[i][0] > 0)
             has_pad_after = (pad_width[i][1] > 0)
 
-            edge_before = _slice_along_axis(arr, i, 0, 1)
-            edge_end = _slice_along_axis(arr, i, array_length-1, array_length)
             pad_size = array_length - 1
             times_to_pad_before = pad_width[i][0] // pad_size + 1
             additional_pad_before = pad_width[i][0] % pad_size
             times_to_pad_after = pad_width[i][1] // pad_size + 1
             additional_pad_after = pad_width[i][1] % pad_size
-            curr_pad = None
             if has_pad_before:
                 # Deal with paddings before the original array
-                for times in range(times_to_pad_before):
-                    if times < times_to_pad_before - 1:
-                        endpoint = array_length
-                    else:
-                        endpoint = additional_pad_before + 1
-                    if endpoint != 1:
-                        curr_pad = _slice_along_axis(arr, i, 1, endpoint)
-                        curr_pad = flip(curr_pad, axis=i)
-                        if reflect_type == "odd":
-                            curr_pad = 2 * edge_before - curr_pad
-                        arr = P.Concat(i)((curr_pad, arr))
-                        edge_before = _slice_along_axis(arr, i, 0, 1)
+                pad_args = (i, array_length, times_to_pad_before, additional_pad_before, reflect_type)
+                arr = _add_pads_before(arr, pad_args, "reflect")
             if has_pad_after:
                 # Deal with paddings after the original array
-                for times in range(times_to_pad_after):
-                    if times < times_to_pad_after - 1:
-                        startpoint = arr.shape[i] - array_length
-                    else:
-                        startpoint = arr.shape[i] - additional_pad_after - 1
-                    if startpoint != arr.shape[i]-1:
-                        curr_pad = _slice_along_axis(arr, i, startpoint, arr.shape[i]-1)
-                        curr_pad = flip(curr_pad, axis=i)
-                        if reflect_type == "odd":
-                            curr_pad = 2 * edge_end - curr_pad
-                        arr = P.Concat(i)((arr, curr_pad))
-                        edge_end = _slice_along_axis(arr, i, arr.shape[i]-1, arr.shape[i])
+                pad_args = (i, array_length, times_to_pad_after, additional_pad_after, reflect_type)
+                arr = _add_pads_after(arr, pad_args, "reflect")
     return arr
 
 
@@ -2476,7 +2479,7 @@ def pad(arr, pad_width, mode="constant", stat_length=None, constant_values=0,
         constant_values = _convert_pad_to_nd(constant_values, arr.ndim)
         return _pad_constant(arr, pad_width, constant_values)
     if mode in ("maximum", "minimum", "mean", "median"):
-        # TODO: support median mode once P.Sort/P.Median is supported on GPU/CPU
+        # support median mode once P.Sort/P.Median is supported on GPU/CPU
         if mode == "median":
             _raise_unimplemented_error("median mode is not supported yet")
         return _pad_statistic(arr, pad_width, stat_length, stat_func[mode])
diff --git a/mindspore/numpy/array_ops.py b/mindspore/numpy/array_ops.py
index e7f01776ab3..92189ae52c0 100644
--- a/mindspore/numpy/array_ops.py
+++ b/mindspore/numpy/array_ops.py
@@ -773,12 +773,12 @@ def atleast_1d(*arys):
         >>> output = np.atleast_1d(a, b, c)
         >>> print(output)
             [Tensor(shape=[2, 3], dtype=Float32, value=
-            [[1.00000000e+000, 1.00000000e+000, 1.00000000e+000],
-            [1.00000000e+000, 1.00000000e+000, 1.00000000e+000]]),
-            Tensor(shape=[1], dtype=Float32, value= [1.00000000e+000]),
+            [[1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
+            [1.00000000e+00, 1.00000000e+00, 1.00000000e+00]]),
+            Tensor(shape=[1], dtype=Float32, value= [1.00000000e+00]),
             Tensor(shape=[5], dtype=Float32,
-            value= [1.00000000e+000, 1.00000000e+000, 1.00000000e+000,
-            1.00000000e+000, 1.00000000e+000])]
+            value= [1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
+            1.00000000e+00, 1.00000000e+00])]
     """
     return _atleast_xd(1, arys)
 
@@ -810,12 +810,12 @@ def atleast_2d(*arys):
         >>> output = np.atleast_2d(a, b, c)
         >>> print(output)
             [Tensor(shape=[2, 3], dtype=Float32, value=
-            [[1.00000000e+000, 1.00000000e+000, 1.00000000e+000],
-            [1.00000000e+000, 1.00000000e+000, 1.00000000e+000]]),
-            Tensor(shape=[1, 1], dtype=Float32, value= [[1.00000000e+000]]),
+            [[1.00000000e+00, 1.00000000e+00, 1.00000000e+00],
+            [1.00000000e+00, 1.00000000e+00, 1.00000000e+00]]),
+            Tensor(shape=[1, 1], dtype=Float32, value= [[1.00000000e+00]]),
             Tensor(shape=[1, 5], dtype=Float32,
-            value= [[1.00000000e+000, 1.00000000e+000, 1.00000000e+000,
-            1.00000000e+000, 1.00000000e+000]])]
+            value= [[1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
+            1.00000000e+00, 1.00000000e+00]])]
     """
     return _atleast_xd(2, arys)
 
@@ -850,12 +850,12 @@ def atleast_3d(*arys):
         >>> output = np.atleast_3d(a, b, c)
         >>> print(output)
             [Tensor(shape=[2, 3, 1], dtype=Float32, value=
-            [[[1.00000000e+000], [1.00000000e+000], [1.00000000e+000]],
-            [[1.00000000e+000], [1.00000000e+000], [1.00000000e+000]]]),
-            Tensor(shape=[1, 1, 1], dtype=Float32, value= [[[1.00000000e+000]]]),
+            [[[1.00000000e+00], [1.00000000e+00], [1.00000000e+00]],
+            [[1.00000000e+00], [1.00000000e+00], [1.00000000e+00]]]),
+            Tensor(shape=[1, 1, 1], dtype=Float32, value= [[[1.00000000e+00]]]),
             Tensor(shape=[1, 5, 1], dtype=Float32,
-            value= [[[1.00000000e+000], [1.00000000e+000], [1.00000000e+000],
-            [1.00000000e+000], [1.00000000e+000]]])]
+            value= [[[1.00000000e+00], [1.00000000e+00], [1.00000000e+00],
+            [1.00000000e+00], [1.00000000e+00]]])]
     """
     res = []
     for arr in arys:
@@ -1444,6 +1444,7 @@ def _split(x, indices_or_sections, opname, axis=0):
             should be integer, tuple(int) or list(int), but got", indices_or_sections)
     return res
 
+
 @constexpr
 def convert_neg_indices(indices, ndim):
     """converts negative values in tuple/list indices"""
@@ -1452,6 +1453,7 @@ def convert_neg_indices(indices, ndim):
     indices = tuple([canonicalizer(axis) for axis in indices])
     return indices
 
+
 def _split_sub_tensors(x, indices, axis):
     """
     Splits the input tensor `x` into multiple sub-tensors
diff --git a/mindspore/numpy/math_ops.py b/mindspore/numpy/math_ops.py
index ed25813e789..031e4716993 100644
--- a/mindspore/numpy/math_ops.py
+++ b/mindspore/numpy/math_ops.py
@@ -2234,7 +2234,7 @@ def convolve(a, v, mode='full'):
         a, v = v, a
         a_size, v_size = v_size, a_size
     v = v[::-1]
-    return _compute_1D_conv(a, v, mode).astype(final_dtype)
+    return _compute_1d_conv(a, v, mode).astype(final_dtype)
 
 
 def _handle_weights(weights, num_samples):
@@ -3923,6 +3923,23 @@ def _gradient_along_axis(f, h, axis):
     return a_grad / h
 
 
+def check_gradient_arguments(f, axis, edge_order):
+    """check arguments for gradient"""
+    if edge_order != 1:
+        _raise_unimplemented_error("edge_order != 1 not implemented")
+    if not isinstance(f, Tensor):
+        f = asarray_const(f)
+    if f.dtype != mstype.float64:
+        f = f.astype(mstype.float32)
+    if axis is None:
+        axis = F.make_range(f.ndim)
+    else:
+        _check_axis_type(axis, True, True, True)
+        axis = _canonicalize_axis(axis, f.ndim)
+        axis = (axis,) if isinstance(axis, int) else axis
+    return f, axis, edge_order
+
+
 def gradient(f, *varargs, axis=None, edge_order=1):
     """
     Returns the gradient of a N-dimensional array.
@@ -3969,18 +3986,7 @@ def gradient(f, *varargs, axis=None, edge_order=1):
         [1.  1.  1. ]]
     """
     # This implementation was adapted from Numpy and jax.numpy
-    if edge_order != 1:
-        _raise_unimplemented_error("edge_order != 1 not implemented")
-    if not isinstance(f, Tensor):
-        f = asarray_const(f)
-    if f.dtype != mstype.float64:
-        f = f.astype(mstype.float32)
-    if axis is None:
-        axis = F.make_range(f.ndim)
-    else:
-        _check_axis_type(axis, True, True, True)
-        axis = _canonicalize_axis(axis, f.ndim)
-        axis = (axis,) if isinstance(axis, int) else axis
+    f, axis, edge_order = check_gradient_arguments(f, axis, edge_order)
 
     len_axes = len(axis)
     n = len(varargs)
@@ -4370,7 +4376,7 @@ def interp(x, xp, fp, left=None, right=None):
         >>> print(np.interp(3.14, xp, fp, right=UNDEF))
         -99.0
     """
-    # TODO implement period once sort is supported
+    # implement period once sort is supported
     x, xp, fp = _to_tensor(x, xp, fp)
     if F.rank(xp) != 1 or F.rank(fp) != 1:
         _raise_value_error('xp and fp must be 1-d sequences')
@@ -4378,7 +4384,6 @@ def interp(x, xp, fp, left=None, right=None):
     if fp.size != size:
         _raise_value_error('the y-coordinates must have the same length as `xp`')
 
-    shape = F.shape(x)
     xp = xp.astype(mstype.float32)
     fp = fp.astype(mstype.float32)
 
@@ -4392,20 +4397,17 @@ def interp(x, xp, fp, left=None, right=None):
     y_1 = F.gather_nd(fp, indices_1)
     res = (y_0*(x_1 - x) + y_1*(x - x_0))/(x_1 - x_0)
     res = F.select(F.equal(x_0, x_1), y_0, res)
-    # where x < xp[0], y = left or xp[0]
-    # where x > xp[-1], y = right or xp[-1]
+
     idx_0 = _to_tensor([0])
     idx_last = _to_tensor([size - 1])
     if left is None:
         left = F.gather_nd(fp, idx_0)
-    left = full(shape, left, mstype.float32)
+    left = full(F.shape(x), left, mstype.float32)
     if right is None:
         right = F.gather_nd(fp, idx_last)
-    right = full(shape, right, mstype.float32)
-    choose_left = F.tensor_lt(x, F.gather_nd(xp, idx_0))
-    choose_right = F.tensor_gt(x, F.gather_nd(xp, idx_last))
-    res = F.select(choose_left, left, res)
-    res = F.select(choose_right, right, res)
+    right = full(F.shape(x), right, mstype.float32)
+    res = F.select(F.tensor_lt(x, F.gather_nd(xp, idx_0)), left, res)
+    res = F.select(F.tensor_gt(x, F.gather_nd(xp, idx_last)), right, res)
     return res
 
 
@@ -4723,6 +4725,31 @@ def _factor_flattened_hist(nbin):
     return factor
 
 
+def _get_histogramdd_count(ndim, bin_edges, sample, weights):
+    """Returns count for histogramdd."""
+    data_indices = []
+    nbin = ()
+    flattened_bin_size = 1
+    for i in F.make_range(ndim):
+        data_to_bins = searchsorted(bin_edges[i], sample[:, i], 'right')
+        bin_size = _type_convert(int, bin_edges[i].size)
+        data_to_bins = where_(sample[:, i] == bin_edges[i][-1], _to_tensor(bin_size - 1), data_to_bins)
+        data_indices.append(data_to_bins)
+        nbin += (bin_size + 1,)
+        flattened_bin_size *= (bin_size + 1)
+
+    factor = F.reshape(_to_tensor(_factor_flattened_hist(nbin)), (ndim, 1))
+    stacked_indices = stack(data_indices) * factor
+    if _get_device() == 'Ascend':
+        stacked_indices = F.cast(stacked_indices, mstype.float32)
+    flattened_hist = F.reduce_sum(stacked_indices.astype(mstype.float32), 0)
+    count = bincount(flattened_hist.astype(mstype.int32), weights, length=flattened_bin_size)
+    count = F.reshape(count, nbin)
+    slices = _list_comprehensions(ndim, F.make_slice(1, -1, 1), True)
+    count = count[slices]
+    return count
+
+
 def histogramdd(sample, bins=10, range=None, weights=None, density=False): # pylint: disable=redefined-builtin
     """
     Computes the multidimensional histogram of some data.
@@ -4823,26 +4850,7 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False): # pyl
         bin_edges.append(edges)
         dedges.append(diff(edges))
 
-    data_indices = []
-    nbin = ()
-    flattened_bin_size = 1
-    for i in F.make_range(ndim):
-        data_to_bins = searchsorted(bin_edges[i], sample[:, i], 'right')
-        bin_size = _type_convert(int, bin_edges[i].size)
-        data_to_bins = where_(sample[:, i] == bin_edges[i][-1], _to_tensor(bin_size - 1), data_to_bins)
-        data_indices.append(data_to_bins)
-        nbin += (bin_size + 1,)
-        flattened_bin_size *= (bin_size + 1)
-
-    factor = F.reshape(_to_tensor(_factor_flattened_hist(nbin)), (ndim, 1))
-    stacked_indices = stack(data_indices) * factor
-    if _get_device() == 'Ascend':
-        stacked_indices = F.cast(stacked_indices, mstype.float32)
-    flattened_hist = F.reduce_sum(stacked_indices.astype(mstype.float32), 0)
-    count = bincount(flattened_hist.astype(mstype.int32), weights, length=flattened_bin_size)
-    count = F.reshape(count, nbin)
-    slices = _list_comprehensions(ndim, F.make_slice(1, -1, 1), True)
-    count = count[slices]
+    count = _get_histogramdd_count(ndim, bin_edges, sample, weights)
 
     if density:
         s = F.reduce_sum(count.astype(mstype.float32))
@@ -5079,7 +5087,7 @@ def polysub(a1, a2):
         >>> print(np.polysub([2, 10, -2], [3, 10, -4]))
         [-1  0  2]
     """
-    return polyadd(a1, -_to_tensor(a2))
+    return polyadd(a1, F.neg_tensor(_to_tensor(a2)))
 
 
 def polyval(p, x):
@@ -5485,51 +5493,48 @@ def ravel_multi_index(multi_index, dims, mode='clip', order='C'):
     return sum_((multi_index * strides).astype('float32'), axis=0)
 
 
-def _vector_norm(x, ord, axis, keepdims): # pylint: disable=redefined-builtin
+def _vector_norm(x, _ord, axis, keepdims):
     """Returns norm of a vector."""
-    if _in(ord, ('fro', 'nuc')):
+    if _in(_ord, ('fro', 'nuc')):
         _raise_value_error('Frobenius norm and nuclear norm are only defined for vectors')
-    if ord is None:
-        ord = 2
-    if ord == inf:
+    if _ord is None:
+        _ord = 2
+    if _ord == inf:
         res = P.ReduceMax(keepdims)(absolute(x), axis)
-    elif ord == -inf:
+    elif _ord == -inf:
         res = P.ReduceMin(keepdims)(absolute(x), axis)
-    elif ord == 0:
+    elif _ord == 0:
         res = P.ReduceSum(keepdims)(F.not_equal(x, 0).astype(mstype.float32), axis)
     else:
-        res = power(P.ReduceSum(keepdims)(power(absolute(x), ord), axis), 1./ord)
+        res = power(P.ReduceSum(keepdims)(power(absolute(x), _ord), axis), 1./_ord)
     return res
 
 
-def _matrix_norm(x, ord, axis, keepdims): # pylint: disable=redefined-builtin
+def _matrix_norm(x, _ord, axis, keepdims):
     """Returns norm of a matrix."""
-    if ord == 0:
+    if _ord == 0:
         _raise_value_error('for 0 axis, norm is defined only for 2-D matrices')
-    if ord == 'nuc':
+    if _ord == 'nuc':
         _raise_unimplemented_error('nuclear norm is not implemented')
-    if _in(ord, (2, -2)):
+    if _in(_ord, (2, -2)):
         _raise_unimplemented_error('2-norm is not implemented for matrices')
-    if _in(ord, (None, 'fro')):
-        res = F.sqrt(P.ReduceSum(keepdims)(F.square(x), axis))
-    else:
-        axis0, axis1 = axis
-        if not keepdims:
-            if _check_is_inf(_abs(ord)) and axis0 > axis1:
-                axis0 -= 1
-            elif _abs(ord) == 1 and axis1 > axis0:
-                axis1 -= 1
-        if _check_is_inf(ord):
-            res = P.ReduceMax(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis1), axis0)
-        elif _check_is_inf(ord, True):
-            res = P.ReduceMin(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis1), axis0)
-        elif ord == 1:
-            res = P.ReduceMax(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis0), axis1)
-        elif ord == -1:
-            res = P.ReduceMin(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis0), axis1)
-        else:
-            return _raise_value_error('invalid norm order for matrices')
-    return res
+    if _in(_ord, (None, 'fro')):
+        return F.sqrt(P.ReduceSum(keepdims)(F.square(x), axis))
+    axis0, axis1 = axis
+    if not keepdims:
+        if _check_is_inf(_abs(_ord)) and axis0 > axis1:
+            axis0 -= 1
+        elif _abs(_ord) == 1 and axis1 > axis0:
+            axis1 -= 1
+    if _check_is_inf(_ord):
+        return P.ReduceMax(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis1), axis0)
+    if _check_is_inf(_ord, True):
+        return P.ReduceMin(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis1), axis0)
+    if _ord == 1:
+        return P.ReduceMax(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis0), axis1)
+    if _ord == -1:
+        return P.ReduceMin(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis0), axis1)
+    return _raise_value_error('invalid norm order for matrices')
 
 
 def norm(x, ord=None, axis=None, keepdims=False): # pylint: disable=redefined-builtin
@@ -5827,11 +5832,11 @@ def correlate(a, v, mode='valid'):
     v = v.astype(promote_dtype)
     if a.size < v.size:
         a, v = v, a
-        return _compute_1D_conv(a, v, mode)[::-1]
-    return _compute_1D_conv(a, v, mode)
+        return _compute_1d_conv(a, v, mode)[::-1]
+    return _compute_1d_conv(a, v, mode)
 
 
-def _compute_1D_conv(a, v, mode):
+def _compute_1d_conv(a, v, mode):
     """Returns a 1-D sequence which is the cross-correlate of two 1-D sequences (`a` and `v`)."""
     v_size = F.shape_mul(v.shape)
     if mode not in ('same', 'full', 'valid'):
diff --git a/mindspore/numpy/utils_const.py b/mindspore/numpy/utils_const.py
index da55b9e45df..dbb5edd91b3 100644
--- a/mindspore/numpy/utils_const.py
+++ b/mindspore/numpy/utils_const.py
@@ -136,6 +136,8 @@ def _can_broadcast(*shapes):
         _infer_out_shape(*shapes)
     except ValueError:
         return False
+    finally:
+        pass
     return True
 
 
diff --git a/mindspore/ops/_grad/grad_array_ops.py b/mindspore/ops/_grad/grad_array_ops.py
index 4fcff7b163b..d025f7a8ec1 100644
--- a/mindspore/ops/_grad/grad_array_ops.py
+++ b/mindspore/ops/_grad/grad_array_ops.py
@@ -264,9 +264,13 @@ def get_bprop_embedding_lookup(self):
     def bprop_sparse(x, indices, offset, out, dout):
         x_shp = shape_op(x)
         new_indices = sub_op(indices, offset)
-        # Reshape the 'new_indices'
-        new_indices_shape_changed = (size_op(new_indices),)
-        new_indices = reshape_op(new_indices, new_indices_shape_changed)
+        indices_size = size_op(new_indices)
+        if indices_size > 0:
+            # Reshape the 'new_indices'
+            new_indices_shape_changed = (indices_size,)
+            new_indices = reshape_op(new_indices, new_indices_shape_changed)
+        else:
+            new_indices_shape_changed = ()
         x_shp_tail = x_shp[1:]
         actual_dout_shape_changed = new_indices_shape_changed + x_shp_tail
         # Reshape the 'actual_dout' on device
diff --git a/mindspore/ops/_grad_experimental/grad_nn_ops.py b/mindspore/ops/_grad_experimental/grad_nn_ops.py
index acb3f84dc31..56e25b989e8 100644
--- a/mindspore/ops/_grad_experimental/grad_nn_ops.py
+++ b/mindspore/ops/_grad_experimental/grad_nn_ops.py
@@ -34,6 +34,19 @@ def get_bprop_ctc_loss_v2(self):
     return bprop
 
 
+@bprop_getters.register(P.SoftMarginLoss)
+def get_bprop_soft_margin_loss(self):
+    """Grad definition for `SoftMarginLoss` operation."""
+    grad = G.SoftMarginLossGrad(reduction=self.reduction)
+
+    def bprop(predict, label, out, dout):
+        dx = grad(predict, label, dout)
+        dy = grad(label, predict, dout)
+        return dx, dy
+
+    return bprop
+
+
 @bprop_getters.register(P.SoftShrink)
 def get_bprop_softshrink(self):
     """Grad definition for `SoftShrink` operation."""
@@ -44,3 +57,15 @@ def get_bprop_softshrink(self):
         return (dx,)
 
     return bprop
+
+
+@bprop_getters.register(P.HShrink)
+def get_bprop_hshrink(self):
+    """Grad definition for `HShrinkGrad` operation."""
+    grad = G.HShrinkGrad(self.lambd)
+
+    def bprop(features, out, gradients):
+        dx = grad(gradients, features)
+        return (dx,)
+
+    return bprop
diff --git a/mindspore/ops/_op_impl/akg/ascend/__init__.py b/mindspore/ops/_op_impl/akg/ascend/__init__.py
index 61e9dea9db4..41127a2806a 100644
--- a/mindspore/ops/_op_impl/akg/ascend/__init__.py
+++ b/mindspore/ops/_op_impl/akg/ascend/__init__.py
@@ -44,5 +44,6 @@ from .sqrt import _sqrt_akg
 from .square import _square_akg
 from .sub import _sub_akg
 from .prod_force_se_a import _prod_force_se_a_akg
+from .load_im2col import _load_im2col_akg
 
 # Please insert op register in lexicographical order of the filename.
diff --git a/mindspore/ops/_op_impl/cpu/__init__.py b/mindspore/ops/_op_impl/cpu/__init__.py
index 6ac57186a4b..0b1f418ecf8 100644
--- a/mindspore/ops/_op_impl/cpu/__init__.py
+++ b/mindspore/ops/_op_impl/cpu/__init__.py
@@ -64,3 +64,4 @@ from .one_hot import _one_hot_cpu
 from .pad import _pad_cpu
 from .range import _range_cpu
 from .tensor_copy_slices import _tensor_copy_slices_cpu
+from .l2loss import _l2loss_cpu
diff --git a/mindspore/ops/_op_impl/tbe/__init__.py b/mindspore/ops/_op_impl/tbe/__init__.py
index a017bc4d416..7108c57a533 100644
--- a/mindspore/ops/_op_impl/tbe/__init__.py
+++ b/mindspore/ops/_op_impl/tbe/__init__.py
@@ -150,6 +150,7 @@ from .logical_or import _logical_or_tbe
 from .reduce_max import _reduce_max_tbe
 from .reduce_min import _reduce_min_tbe
 from .reduce_sum import _reduce_sum_tbe
+from .reduce_sum_ds import _reduce_sum_ds_tbe
 from .round import _round_tbe
 from .tanh import _tanh_tbe
 from .tanh_grad import _tanh_grad_tbe
@@ -219,6 +220,8 @@ from .arg_max_with_value import _arg_max_with_value_tbe
 from .arg_min_with_value import _arg_min_with_value_tbe
 from .smooth_l1_loss import _smooth_l1_loss_tbe
 from .smooth_l1_loss_grad import _smooth_l1_loss_grad_tbe
+from .soft_margin_loss import _soft_margin_loss_tbe
+from .soft_margin_loss_grad import _soft_margin_loss_grad_tbe
 from .fused_mul_add import _fused_mul_add_tbe
 from .fused_mul_add_n import _fused_mul_add_n_tbe
 from .fused_mul_apply_momentum import _fused_mul_apply_momentum_tbe
@@ -394,3 +397,5 @@ from .soft_shrink import _soft_shrink_tbe
 from .soft_shrink_grad import _soft_shrink_grad_tbe
 from .hsigmoid_grad import _hsigmoid_grad_tbe
 from .hsigmoid import _hsigmoid_tbe
+from .hshrink import _hshrink_tbe
+from .hshrink_grad import _hshrink_grad_tbe
diff --git a/mindspore/ops/bprop_mindir/Identity_bprop.mindir b/mindspore/ops/bprop_mindir/Identity_bprop.mindir
index ad7f1ccef67..39bfa0862c2 100644
--- a/mindspore/ops/bprop_mindir/Identity_bprop.mindir
+++ b/mindspore/ops/bprop_mindir/Identity_bprop.mindir
@@ -1,9 +1,9 @@
 
-0.1.0	MindSpore*1.1.0:�
+0.1.0	MindSpore*1.4.0:�
 �
-bprop.10:doutbprop.10:[CNode]12:2bprop.10:[CNode]11:1"S-Prim-MakeTuple:HGradients/Default/network-NetIdentity/gradIdentity/S-Prim-MakeTuple-op15bprop.10*
+bprop.15:doutbprop.15:[CNode]17:2bprop.15:[CNode]16:1"S-Prim-MakeTuple:HGradients/Default/network-NetIdentity/gradIdentity/S-Prim-MakeTuple-op15bprop.15*
 
-bprop.10:x*
-bprop.10:out*
-bprop.10:dout2
-bprop.10:[CNode]12:2:�027af68f320ba40d9fbd0893da424c07f9c3a4ec82e98f9543bff9b5a15547a2102a58399653345b09bd6f5b337c4b81c4f8900664c0abc09fb80f38f8e95be82366f7bd59ea5ec135e982de03b4f7cab6b61d833d046a6e13f78bdaf2fb2b224c332efad4a51b4773cb78093dd53a4ca850b2dc6cdd5f2ae47106b3fda77bb3565f906930f68ca2413e9ad958d105e129e717cd183b95d11d65a8b0b030fc0d65c0e00bc893ef15ec6199798d6c8c46997153587d375b3240c1195ff2c7278c7e635a08323207b4cb3f73fd8437b4d7ee28a7676a68f005a7749bd19e5ed4eca0593a639478ea8dfad17fdbe39f66855cc459eb58bcaf5eac44185e03b16374a6c407ad6a3b57190d3702d6a45031d13b97bb6952735edf94fb36f73dbff6cdab258748286fc6d783abacce203dfc79d2fc31e23a427ce1f86e08777a687f71c414b8c313aac4f85c6217fbbb7009dd079b2d5548f8b695a470a11cb8cc83e6f5e78f5b3c67f2e7bf339b250c3638aee952e1a073002e2834011401f3827260
\ No newline at end of file
+bprop.15:x*
+bprop.15:out*
+bprop.15:dout2
+bprop.15:[CNode]17:2:�027af68f320ba40d9fbd0893da424c07f9c3a4ec82e98f9543bff9b5a15547a2102a58399653345b09bd6f5b337c4b81c4f8900664c0abc09fb80f38f8e95be82366f7bd59ea5ec135e982de03b4f7cab6b61d833d046a6e13f78bdaf2fb2b224c332efad4a51b4773cb78093dd53a4ca850b2dc6cdd5f2ae47106b3fda77bb3565f906930f68ca2413e9ad958d105e129e717cd183b95d11d65a8b0b030fc0d65c0e00bc893ef15ec6199798d6c8c46997153587d375b3240c1195ff2c7278c7e635a08323207b4cb3f73fd8437b4d7ee28a7676a68f005a7749bd19e5ed4ec99802e8da0efad2a3f80e99bfdcc99c4d54f2769de69733086a4722cb141371ba6c407ad6a3b57190d3702d6a45031d13b97bb6952735edf94fb36f73dbff6cdab258748286fc6d783abacce203dfc79d2fc31e23a427ce1f86e08777a687f71c414b8c313aac4f85c6217fbbb7009dd079b2d5548f8b695a470a11cb8cc83e6f5e78f5b3c67f2e7bf339b250c3638aee952e1a073002e2834011401f3827260
\ No newline at end of file
diff --git a/mindspore/ops/bprop_mindir/ReLU_bprop.mindir b/mindspore/ops/bprop_mindir/ReLU_bprop.mindir
index 56ae56bfac1..728be19742d 100644
--- a/mindspore/ops/bprop_mindir/ReLU_bprop.mindir
+++ b/mindspore/ops/bprop_mindir/ReLU_bprop.mindir
@@ -1,11 +1,11 @@
 
-0.1.0	MindSpore*1.1.0:�
+0.1.0	MindSpore*1.4.0:�
 �
-bprop.2:dout
-bprop.2:outbprop.2:dx:1bprop.2:dx:1"S-Prim-ReluGrad:>Gradients/Default/network-NetRelu/gradReLU/S-Prim-ReluGrad-op5
+bprop.4:dout
+bprop.4:outbprop.4:dx:1bprop.4:dx:1"S-Prim-ReluGrad:>Gradients/Default/network-NetRelu/gradReLU/S-Prim-ReluGrad-op5
 �
-bprop.2:dx:1bprop.2:[CNode]4:3bprop.2:[CNode]3:2"S-Prim-MakeTuple:?Gradients/Default/network-NetRelu/gradReLU/S-Prim-MakeTuple-op6bprop.2*
-	bprop.2:x*
-bprop.2:out*
-bprop.2:dout2
-bprop.2:[CNode]4:3:�027af68f320ba40d9fbd0893da424c07f9c3a4ec82e98f9543bff9b5a15547a2102a58399653345b09bd6f5b337c4b81c4f8900664c0abc09fb80f38f8e95be82366f7bd59ea5ec135e982de03b4f7cab6b61d833d046a6e13f78bdaf2fb2b224c332efad4a51b4773cb78093dd53a4ca850b2dc6cdd5f2ae47106b3fda77bb3565f906930f68ca2413e9ad958d105e129e717cd183b95d11d65a8b0b030fc0d65c0e00bc893ef15ec6199798d6c8c46997153587d375b3240c1195ff2c7278c7e635a08323207b4cb3f73fd8437b4d7ee28a7676a68f005a7749bd19e5ed4eca0593a639478ea8dfad17fdbe39f66855cc459eb58bcaf5eac44185e03b16374a6c407ad6a3b57190d3702d6a45031d13b97bb6952735edf94fb36f73dbff6cdab258748286fc6d783abacce203dfc79d2fc31e23a427ce1f86e08777a687f71c414b8c313aac4f85c6217fbbb7009dd079b2d5548f8b695a470a11cb8cc83e6f5e78f5b3c67f2e7bf339b250c3638aee952e1a073002e2834011401f3827260
\ No newline at end of file
+bprop.4:dx:1bprop.4:[CNode]6:3bprop.4:[CNode]5:2"S-Prim-MakeTuple:?Gradients/Default/network-NetRelu/gradReLU/S-Prim-MakeTuple-op6bprop.4*
+	bprop.4:x*
+bprop.4:out*
+bprop.4:dout2
+bprop.4:[CNode]6:3:�027af68f320ba40d9fbd0893da424c07f9c3a4ec82e98f9543bff9b5a15547a2102a58399653345b09bd6f5b337c4b81c4f8900664c0abc09fb80f38f8e95be82366f7bd59ea5ec135e982de03b4f7cab6b61d833d046a6e13f78bdaf2fb2b224c332efad4a51b4773cb78093dd53a4ca850b2dc6cdd5f2ae47106b3fda77bb3565f906930f68ca2413e9ad958d105e129e717cd183b95d11d65a8b0b030fc0d65c0e00bc893ef15ec6199798d6c8c46997153587d375b3240c1195ff2c7278c7e635a08323207b4cb3f73fd8437b4d7ee28a7676a68f005a7749bd19e5ed4ec99802e8da0efad2a3f80e99bfdcc99c4d54f2769de69733086a4722cb141371ba6c407ad6a3b57190d3702d6a45031d13b97bb6952735edf94fb36f73dbff6cdab258748286fc6d783abacce203dfc79d2fc31e23a427ce1f86e08777a687f71c414b8c313aac4f85c6217fbbb7009dd079b2d5548f8b695a470a11cb8cc83e6f5e78f5b3c67f2e7bf339b250c3638aee952e1a073002e2834011401f3827260
\ No newline at end of file
diff --git a/mindspore/ops/composite/random_ops.py b/mindspore/ops/composite/random_ops.py
index 2d29a362c36..f3edf17e973 100644
--- a/mindspore/ops/composite/random_ops.py
+++ b/mindspore/ops/composite/random_ops.py
@@ -251,7 +251,7 @@ def gamma(shape, alpha, beta, seed=None):
         >>> output = ops.gamma(shape, alpha, beta, seed=5)
         >>> result = output.shape
         >>> print(output)
-       [[[ 2.2132034  5.8855834]]
+        [[[ 2.2132034  5.8855834]]
          [ 3.3981476  7.5805717]
         [[ 3.3981476  7.5805717]]
          [ 3.7190282 19.941492]
@@ -264,7 +264,7 @@ def gamma(shape, alpha, beta, seed=None):
         >>> output = ops.gamma(shape, alpha, beta, seed=5)
         >>> result = output.shape
         >>> print(output)
-       [[[ 5.6085486  7.8280783]]
+        [[[ 5.6085486  7.8280783]]
          [ 15.97684  16.116285]
         [[ 1.8347423  1.713663]]
          [ 3.2434065 15.667398]
diff --git a/mindspore/ops/functional.py b/mindspore/ops/functional.py
index 66bb25e84b2..22173821784 100644
--- a/mindspore/ops/functional.py
+++ b/mindspore/ops/functional.py
@@ -116,6 +116,10 @@ bitwise_and = P.BitwiseAnd()
 bitwise_or = P.BitwiseOr()
 bitwise_xor = P.BitwiseXor()
 invert = P.Invert()
+erf = P.Erf()
+erfc = P.Erfc()
+sort = P.Sort()
+tensor_range = P.Range()
 
 scalar_to_array = P.ScalarToArray()
 scalar_to_tensor = P.ScalarToTensor()
diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py
index bf79430ed1c..2237f7f0f5f 100644
--- a/mindspore/ops/operations/__init__.py
+++ b/mindspore/ops/operations/__init__.py
@@ -76,9 +76,9 @@ from .nn_ops import (LSTM, SGD, Adam, FusedSparseAdam, FusedSparseLazyAdam, Adam
                      MaxPool, DataFormatDimMap,
                      AvgPool, Conv2DBackpropInput, ComputeAccidentalHits,
                      MaxPoolWithArgmax, OneHot, Pad, MirrorPad, Mish, PReLU, ReLU, ReLU6, ReLUV2, HSwish, HSigmoid,
-                     ResizeBilinear, Sigmoid, SeLU,
+                     ResizeBilinear, Sigmoid, SeLU, HShrink,
                      SigmoidCrossEntropyWithLogits, NLLLoss, BCEWithLogitsLoss,
-                     SmoothL1Loss, Softmax, Softsign, Softplus, LRN, RNNTLoss, DynamicRNN, DynamicGRUV2,
+                     SmoothL1Loss, SoftMarginLoss, Softmax, Softsign, Softplus, LRN, RNNTLoss, DynamicRNN, DynamicGRUV2,
                      SoftmaxCrossEntropyWithLogits, ROIAlign,
                      SparseSoftmaxCrossEntropyWithLogits, Tanh,
                      TopK, BinaryCrossEntropy, KLDivLoss, SparseApplyAdagrad, LARSUpdate, ApplyFtrl, SparseApplyFtrl,
@@ -96,7 +96,8 @@ from .other_ops import (Assign, InplaceAssign, IOU, BoundingBoxDecode, BoundingB
 from ._thor_ops import (CusBatchMatMul, CusCholeskyTrsm, CusFusedAbsMax1, CusImg2Col, CusMatMulCubeDenseLeft,
                         CusMatMulCubeFraczRightMul, CusMatMulCube, CusMatrixCombine, CusTranspose02314,
                         CusMatMulCubeDenseRight,
-                        CusMatMulCubeFraczLeftCast, Im2Col, UpdateThorGradient, Cholesky, CholeskyTrsm, DetTriangle,
+                        CusMatMulCubeFraczLeftCast, Im2Col, LoadIm2Col, UpdateThorGradient, Cholesky, CholeskyTrsm,
+                        DetTriangle,
                         ProdForceSeA)
 from .sparse_ops import (SparseToDense, SparseTensorDenseMatmul)
 from ._embedding_cache_ops import (CacheSwapTable, UpdateCache, MapCacheIdx, SubAndFilter,
@@ -107,9 +108,18 @@ from .sponge_ops import (BondForce, BondEnergy, BondAtomEnergy, BondForceWithAto
                          AngleEnergy, AngleAtomEnergy, AngleForceWithAtomEnergy, PMEReciprocalForce,
                          LJForce, LJEnergy, LJForceWithPMEDirectForce, PMEExcludedForce, PMEEnergy, Dihedral14LJForce,
                          Dihedral14LJForceWithDirectCF, Dihedral14LJEnergy, Dihedral14LJCFForceWithAtomEnergy,
-                         Dihedral14LJAtomEnergy, Dihedral14CFEnergy, Dihedral14CFAtomEnergy, MDIterationLeapFrog,
-                         GetCenterOfGeometry, MDTemperature, NeighborListUpdate, MDIterationLeapFrogLiujian,
+                         Dihedral14LJAtomEnergy, Dihedral14CFEnergy, Dihedral14CFAtomEnergy,
+                         GetCenterOfGeometry, MDTemperature, MDIterationLeapFrogLiujian,
                          CrdToUintCrd, MDIterationSetupRandState, TransferCrd, FFT3D, IFFT3D)
+from .sponge_update_ops import (v0coordinaterefresh, v1coordinaterefresh, v2coordinaterefresh, v3coordinaterefresh,
+                                v0forceredistribute, v1forceredistribute, v2forceredistribute, v3forceredistribute,
+                                restrainenergy, restrainforcewithatomenergyandvirial, constrainforcecyclewithvirial,
+                                refreshuintcrd, lastcrdtodr, refreshcrdvel, calculatenowrapcrd, refreshboxmaptimes,
+                                totalc6get, copyfrctosystemgrad, CrdToUintCrdQuarter,
+                                MDIterationLeapFrogLiujianWithMaxVel, GetCenterOfMass, MapCenterOfMass,
+                                NeighborListUpdate, MDIterationLeapFrog,
+                                MDIterationLeapFrogWithMaxVel, MDIterationGradientDescent,
+                                BondForceWithAtomEnergyAndVirial, ConstrainForceCycle)
 
 __all__ = [
     'Unique',
@@ -276,6 +286,7 @@ __all__ = [
     'FloatStatus',
     'Reciprocal',
     'SmoothL1Loss',
+    'SoftMarginLoss',
     'L2Loss',
     'CTCLoss',
     'CTCGreedyDecoder',
@@ -485,7 +496,34 @@ __all__ = [
     "TensorScatterSub",
     "SoftShrink",
     "FFT3D",
-    "IFFT3D"
+    "IFFT3D",
+    "HShrink",
+    "v0coordinaterefresh",
+    "v1coordinaterefresh",
+    "v2coordinaterefresh",
+    "v3coordinaterefresh",
+    "v0forceredistribute",
+    "v1forceredistribute",
+    "v2forceredistribute",
+    "v3forceredistribute",
+    "restrainenergy",
+    "restrainforcewithatomenergyandvirial",
+    "constrainforcecyclewithvirial",
+    "refreshuintcrd",
+    "lastcrdtodr",
+    "refreshcrdvel",
+    "calculatenowrapcrd",
+    "refreshboxmaptimes",
+    "totalc6get",
+    "copyfrctosystemgrad",
+    "CrdToUintCrdQuarter",
+    "MDIterationLeapFrogLiujianWithMaxVel",
+    "GetCenterOfMass",
+    "MapCenterOfMass",
+    "MDIterationLeapFrogWithMaxVel",
+    "MDIterationGradientDescent",
+    "BondForceWithAtomEnergyAndVirial",
+    "ConstrainForceCycle",
 ]
 
 __all__.sort()
diff --git a/mindspore/ops/operations/_grad_ops.py b/mindspore/ops/operations/_grad_ops.py
index 22f361f7060..208604b368d 100644
--- a/mindspore/ops/operations/_grad_ops.py
+++ b/mindspore/ops/operations/_grad_ops.py
@@ -1831,6 +1831,15 @@ class SmoothL1LossGrad(PrimitiveWithInfer):
         return dloss
 
 
+class SoftMarginLossGrad(Primitive):
+    """Computes gradient for prediction on SoftMarginLoss."""
+
+    @prim_attr_register
+    def __init__(self, reduction="mean"):
+        self.init_prim_io_names(inputs=['predict', 'label', "dout"], outputs=['gradient'])
+        self.reduction = validator.check_string(reduction, ['none', 'sum', 'mean'], 'reduction', self.name)
+
+
 class StridedSliceGrad(PrimitiveWithInfer):
     """
     Performs grad of StridedSlice operation.
@@ -2212,3 +2221,37 @@ class SoftShrinkGrad(Primitive):
         self.init_prim_io_names(inputs=['input_grad', 'input_x'], outputs=['output'])
         validator.check_value_type("lambd", lambd, [float], self.name)
         validator.check_number("lambd", lambd, 0, Rel.GE, self.name)
+
+
+class HShrinkGrad(Primitive):
+    """
+    Computes gradients for HShrinkGrad operation.
+
+    Args:
+        Lambd (float): the λ value for the Hardshrink formulation. Default: 0.5
+
+    Inputs:
+        - **Gradients** (Tensor) - the gradients of loss to output of HShrink function.
+          Currently gradients data type only support float16 and float32.
+        - **Features** (Tensor) - Must be the input `input_x` of the forward operator HSHrink.
+          Currently features data type only support float16 and float32.
+
+    Outputs:
+        backprops - Tensor, with the same shape and data type as `features`.
+
+    Rasise:
+        ValueError: If `lambd` is not a float.
+        ValueError: If shape of `gradients` is not the same as `features`.
+        TypeError: If dtype of `gradients` is not the same as `features`.
+        TypeError: If dtype of `gradients` or `features` is neither float16 nor float32.
+
+    Supported Platforms:
+        ``Ascend``
+    """
+
+    @prim_attr_register
+    def __init__(self, lambd=0.5):
+        validator.check_value_type("lambd", lambd, [float], self.name)
+        if lambd < 0.0:
+            lambd = 0.0
+            self.add_prim_attr('lambd', lambd)
diff --git a/mindspore/ops/operations/_thor_ops.py b/mindspore/ops/operations/_thor_ops.py
index 537560d0ca2..8627f4c40bc 100644
--- a/mindspore/ops/operations/_thor_ops.py
+++ b/mindspore/ops/operations/_thor_ops.py
@@ -31,6 +31,7 @@ __all__ = ["CusBatchMatMul",
            "CusTranspose02314",
            "CusMatMulCubeDenseRight",
            "CusMatMulCubeFraczLeftCast",
+           "LoadIm2Col"
            ]
 
 
@@ -362,6 +363,7 @@ class CusTranspose02314(PrimitiveWithInfer):
 
     def get_bprop(self):
         """Get backprop for CusTranspose02314."""
+
         def bprop(x, out, dout):
             return (C.zeros_like(x),)
 
@@ -529,6 +531,55 @@ class Im2Col(PrimitiveWithInfer):
         return x_dtype
 
 
+class LoadIm2Col(PrimitiveWithInfer):
+    """
+    extracts image patches from image.
+
+    The rank of input_x1 must be `4`, data_format is "NCHW".
+    Only supports when C is divisible by 16.
+
+    Inputs:
+        - **input_x1** (Tensor) - The feature map.
+          The shape of the tensor is :math:`(N, C, H, W)`.
+    Outputs:
+        Tensor.
+    Examples:
+        >>> input_x = Tensor(np.random.rand(32, 16, 224, 224).astype(np.float16))
+        >>> img2col = ops.LoadIm2Col(kernel_size=(7,7), stride=(2,2))
+        >>> output = img2col(input_x)
+    """
+
+    @prim_attr_register
+    def __init__(self,
+                 ksizes,
+                 strides,
+                 pad_mode="same",
+                 dilates=(1, 1, 1, 1)):
+        """Initialize LoadIm2Col"""
+
+        self.init_prim_io_names(inputs=['x1'], outputs=['y'])
+        self.ksizes = ksizes
+        self.strides = strides
+        self.pad_mode = validator.check_string(pad_mode, ['same'], 'pad_mode', self.name)
+        self.dilation = dilates
+
+    def infer_shape(self, data1_shape):
+        bs, c, h, w = data1_shape
+        stride_h, stride_w = self.strides
+        k_w, k_h = self.ksizes
+        h_out = math.ceil(h / stride_h)
+        w_out = math.ceil(w / stride_w)
+        m = h_out * w_out
+        if m % 16 != 0:
+            shape = [(bs * m) // 16, (c * k_h * k_w) // 16, 16, 16]
+        else:
+            shape = [bs, m // 16, (c * k_h * k_w) // 16, 16, 16]
+        return shape
+
+    def infer_dtype(self, data1_dtype):
+        return data1_dtype
+
+
 class UpdateThorGradient(PrimitiveWithInfer):
     """
     Updates Thor Gradient with Approximate Fisher info matrix(for GPU backend).
diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py
index e80ab1c250c..50afe154728 100755
--- a/mindspore/ops/operations/array_ops.py
+++ b/mindspore/ops/operations/array_ops.py
@@ -739,6 +739,7 @@ class Unique(Primitive):
 
     Inputs:
         - **input_x** (Tensor) - The input tensor.
+          The shape is :math:`(N,*)` where :math:`*` means, any number of additional dimensions.
 
     Outputs:
         Tuple, containing Tensor objects `(y, idx), `y` is a tensor with the
@@ -1202,7 +1203,7 @@ class Size(PrimitiveWithInfer):
         else:
             size = functools.reduce(lambda x, y: x * y, x['shape'])
         out = {'shape': None,
-               'dtype': mstype.int32,
+               'dtype': mstype.int64,
                'value': size}
         return out
 
@@ -1285,7 +1286,6 @@ class Ones(PrimitiveWithInfer):
         ``Ascend`` ``GPU`` ``CPU``
 
     Examples:
-        >>> from mindspore.ops import operations as ops
         >>> ones = ops.Ones()
         >>> output = ones((2, 2), mindspore.float32)
         >>> print(output)
@@ -1347,7 +1347,6 @@ class Zeros(Primitive):
         ``Ascend`` ``GPU`` ``CPU``
 
     Examples:
-        >>> from mindspore.ops import operations as ops
         >>> zeros = ops.Zeros()
         >>> output = zeros((2, 2), mindspore.float32)
         >>> print(output)
@@ -1369,6 +1368,7 @@ class OnesLike(Primitive):
 
     Inputs:
         - **input_x** (Tensor) - Input tensor.
+          The shape is :math:`(N,*)` where :math:`*` means, any number of additional dimensions.
 
     Outputs:
         Tensor, has the same shape and type as `input_x` but filled with ones.
@@ -1401,6 +1401,7 @@ class ZerosLike(Primitive):
 
     Inputs:
         - **input_x** (Tensor) - Input tensor. The data type is int32, int64, float16 or float32.
+          The shape is :math:`(N,*)` where :math:`*` means, any number of additional dimensions.
 
     Outputs:
         Tensor, has the same shape and data type as `input_x` but filled with zeros.
@@ -1655,7 +1656,7 @@ class Argmax(PrimitiveWithInfer):
 
     Inputs:
         - **input_x** (Tensor) - Input tensor. :math:`(N,*)` where :math:`*` means, any number of additional dimensions.
-        Support data type list as follows:
+          Support data type list as follows:
 
           - Ascend: Float16, Float32.
           - GPU: Float16, Float32.
@@ -1716,6 +1717,7 @@ class Argmin(PrimitiveWithInfer):
 
     Inputs:
         - **input_x** (Tensor) - Input tensor.
+          The shape is :math:`(N,*)` where :math:`*` means, any number of additional dimensions.
 
     Outputs:
         Tensor, indices of the min value of input tensor across the axis.
@@ -1860,7 +1862,7 @@ class ArgMinWithValue(PrimitiveWithInfer):
         >>> input_x = Tensor(np.array([0.0, 0.4, 0.6, 0.7, 0.1]), mindspore.float32)
         >>> output = ops.ArgMinWithValue()(input_x)
         >>> print(output)
-        (Tensor(shape=[], dtype=Int32, value= 0), Tensor(shape=[], dtype=Float32, value= 0.0))
+        (Tensor(shape=[], dtype=Int32, value= 0), Tensor(shape=[], dtype=Float32, value= 0))
         >>> output = ops.ArgMinWithValue(keep_dims=True)(input_x)
         >>> print(output)
         (Tensor(shape=[1], dtype=Int32, value= [0]), Tensor(shape=[1], dtype=Float32, value= [ 0.00000000e+00]))
@@ -2299,13 +2301,14 @@ class Concat(PrimitiveWithInfer):
 
     Inputs:
         - **input_x** (tuple, list) - A tuple or a list of input tensors.
-          `input_x`, `input_y` should has same data type.
-        - **input_y** (tuple, list) - A tuple or a list of input tensors.
-          `input_x`, `input_y` should has same data type.
+          Suppose there are two tensors in this tuple or list, namely x1 and x2.
+          To perform `Concat` in the axis 0 direction, except for the 0th axis, all other axes should be equal,
+          that is, :math:`x1.shape[1] == x2.shape[1], x1.shape[2] == x2.shape[2], ..., x1.shape[R] == x2.shape[R]',
+          where the :math:`R' indicates the last axis.
 
     Outputs:
         Tensor, the shape is :math:`(x_1, x_2, ..., \sum_{i=1}^Nx_{mi}, ..., x_R)`.
-          The data type is the same with `input_X` and `input_y`.
+          The data type is the same with `input_x`.
 
     Raises:
         TypeError: If `axis` is not an int.
@@ -2314,17 +2317,17 @@ class Concat(PrimitiveWithInfer):
         ``Ascend`` ``GPU`` ``CPU``
 
     Examples:
-        >>> input_x = Tensor(np.array([[0, 1], [2, 1]]).astype(np.float32))
-        >>> input_y = Tensor(np.array([[0, 1], [2, 1]]).astype(np.float32))
+        >>> input_x1 = Tensor(np.array([[0, 1], [2, 1]]).astype(np.float32))
+        >>> input_x2 = Tensor(np.array([[0, 1], [2, 1]]).astype(np.float32))
         >>> op = ops.Concat()
-        >>> output = op((input_x, input_y))
+        >>> output = op((input_x1, input_x2))
         >>> print(output)
         [[0. 1.]
          [2. 1.]
          [0. 1.]
          [2. 1.]]
         >>> op = ops.Concat(1)
-        >>> output = op((input_x, input_y))
+        >>> output = op((input_x1, input_x2))
         >>> print(output)
         [[0. 1. 0. 1.]
          [2. 1. 2. 1.]]
@@ -2658,6 +2661,7 @@ class Slice(PrimitiveWithInfer):
 
     Inputs:
         - **input_x** (Tensor): The target tensor.
+          The shape is :math:`(N,*)` where :math:`*` means, any number of additional dimensions.
         - **begin** (Union[tuple, list]): The beginning of the slice. Only constant value(>=0) is allowed.
         - **size** (Union[tuple, list]): The size of the slice. Only constant value is allowed.
 
@@ -2733,6 +2737,7 @@ class ReverseV2(PrimitiveWithInfer):
 
     Inputs:
         - **input_x** (Tensor) - The target tensor. The data type is Number except float64.
+          The shape is :math:`(N,*)` where :math:`*` means, any number of additional dimensions.
 
     Outputs:
         Tensor, has the same shape and type as `input_x`.
@@ -2795,7 +2800,7 @@ class Rint(PrimitiveWithInfer):
 
     Inputs:
         - **input_x** (Tensor) - The target tensor, which must be one of the following types:
-          float16, float32.
+          float16, float32. The shape is :math:`(N,*)` where :math:`*` means, any number of additional dimensions.
 
     Outputs:
         Tensor, has the same shape and type as `input_x`.
diff --git a/mindspore/ops/operations/inner_ops.py b/mindspore/ops/operations/inner_ops.py
index d21cb5d4be4..16fbe1993ae 100755
--- a/mindspore/ops/operations/inner_ops.py
+++ b/mindspore/ops/operations/inner_ops.py
@@ -502,8 +502,7 @@ class AdamWeightDecay(PrimitiveWithInfer):
     Examples:
         >>> import numpy as np
         >>> import mindspore.nn as nn
-        >>> from mindspore import Tensor, Parameter
-        >>> from mindspore.ops import operations as ops
+        >>> from mindspore import Tensor, Parameter, ops
         >>> class Net(nn.Cell):
         ...     def __init__(self):
         ...         super(Net, self).__init__()
diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py
index 7d4dd49cee4..dd036edc5c4 100644
--- a/mindspore/ops/operations/math_ops.py
+++ b/mindspore/ops/operations/math_ops.py
@@ -371,10 +371,43 @@ class _Reduce(PrimitiveWithInfer):
         input_shp = input_x['shape']
         args = {'input_x': input_x['dtype']}
         validator.check_tensors_dtypes_same_and_valid(args, valid_dtype, self.name)
-
-        if axis_v is None:
+        if not isinstance(axis, mstype.tensor_type) and axis_v is None:
             raise ValueError(f"For {self.name}, axis must be const.")
-        input_shp = _infer_shape_reduce(input_shp, axis_v, self.keep_dims, self.name)
+        out_shape = _infer_shape_reduce(input_shp, axis_v, self.keep_dims, self.name)
+        if -1 in input_shp:
+            if axis_v is None:
+                max_v = max(input_shp)
+                if 'max_shape' and 'min_shape' in input_x:
+                    input_max_shp = input_x['max_shape']
+                    max_v = max(input_max_shp)
+                axis_shape_list = axis['shape']
+                if len(axis_shape_list) != 1:
+                    raise ValueError("axis_shape must be 1-D, but got ", len(axis_shape_list))
+                axis_shape = axis_shape_list[0]
+                if len(axis_shape) == 1 and axis_shape[0] == -1 and not self.keep_dims:
+                    out_shape = np.array([-2]).tolist()
+                    output_min_shape = np.ones_like(input_shp).tolist()
+                    output_max_shape = max_v * np.ones_like(input_shp)
+                    output_max_shape = output_max_shape.tolist()
+                elif not self.keep_dims:
+                    out_shape = -1 * np.ones_like(input_shp[:-axis_shape])
+                    out_shape = out_shape.tolist()
+                    output_min_shape = np.ones_like(out_shape).tolist()
+                    output_max_shape = max_v * np.ones_like(out_shape)
+                    output_max_shape = output_max_shape.tolist()
+                else:
+                    out_shape = -1 * np.ones_like(input_shp)
+                    out_shape = out_shape.tolist()
+                    output_min_shape = np.ones_like(input_shp).tolist()
+                    output_max_shape = max_v * np.ones_like(input_shp)
+                    output_max_shape = output_max_shape.tolist()
+            else:
+                output_max_shape = _infer_shape_reduce(input_x['max_shape'], axis_v, self.keep_dims, self.name)
+                output_min_shape = _infer_shape_reduce(input_x['min_shape'], axis_v, self.keep_dims, self.name)
+        else:
+            output_max_shape = out_shape
+            output_min_shape = out_shape
+
         value = None
         if input_x['value'] is not None:
             prim_map = {
@@ -386,20 +419,13 @@ class _Reduce(PrimitiveWithInfer):
 
             if np_reduce_func is not None:
                 value = input_x['value'].asnumpy()
-                if not axis_v and axis_v != 0:
+                if not axis_v:
                     axis_v = [i for i in range(len(input_x['shape']))]
                     axis_v = tuple(axis_v)
                 value = np_reduce_func(value, axis_v, keepdims=self.keep_dims)
                 value = np.array(value)
                 value = Tensor(value)
-        if 'max_shape' and 'min_shape' in input_x:
-            output_max_shape = _infer_shape_reduce(input_x['max_shape'], axis_v, self.keep_dims, self.name)
-            output_min_shape = _infer_shape_reduce(input_x['min_shape'], axis_v, self.keep_dims, self.name)
-        else:
-            output_max_shape = input_shp
-            output_min_shape = input_shp
-
-        return {'shape': input_shp,
+        return {'shape': out_shape,
                 'min_shape': output_min_shape,
                 'max_shape': output_max_shape,
                 'dtype': input_x['dtype'],
@@ -1013,7 +1039,7 @@ class MatMul(PrimitiveWithCheck):
 
      .. math::
 
-        (Output)_{i j}=\\sum_{k=1}^{p} a_{i k} b_{k j}=a_{i 1} b_{1 j}+a_{i 2} b_{2 j}+\\cdots+a_{i p} b_{p j}, p\\in N
+        (Output)_{i j}=\sum_{k=1}^{p} a_{i k} b_{k j}=a_{i 1} b_{1 j}+a_{i 2} b_{2 j}+\cdots+a_{i p} b_{p j}, p\in N
 
     where the :math:`i,j` indicates the output of the i-th row and j-th column element.
 
@@ -3248,10 +3274,10 @@ class ApproximateEqual(_LogicBinaryOp):
 
     .. math::
 
-    out_i = \begin{cases}
-      & \text{ if } \left | x_{i} - y_{i} \right | < \text{tolerance},\ \ True\  \\
-      & \text{ if } \left | x_{i} - y_{i} \right | \ge  \text{tolerance},\ \ False\
-    \end{cases}
+        out_i = \begin{cases}
+        & \text{ if } \left | x_{i} - y_{i} \right | < \text{tolerance},\ \ True  \\
+        & \text{ if } \left | x_{i} - y_{i} \right | \ge \text{tolerance},\ \  False
+        \end{cases}
 
     where :math:`\text{tolerance}` indicates Acceptable maximum tolerance.
 
@@ -3759,10 +3785,10 @@ class IsNan(PrimitiveWithInfer):
 
     .. math::
 
-    out_i = \begin{cases}
-      & \text{ if } x_{i} = \text{Nan},\ \ True\  \\
-      & \text{ if } x_{i} \ne  \text{Nan},\ \ False\
-    \end{cases}
+        out_i = \begin{cases}
+          & \text{ if } x_{i} = \text{Nan},\ \ True \\
+          & \text{ if } x_{i} \ne  \text{Nan},\ \ False
+        \end{cases}
 
     where :math:`Nan` means not a number.
 
@@ -3805,10 +3831,10 @@ class IsInf(PrimitiveWithInfer):
 
     .. math::
 
-    out_i = \begin{cases}
-      & \text{ if } x_{i} = \text{Inf},\ \ True\  \\
-      & \text{ if } x_{i} \ne  \text{Inf},\ \ False\
-    \end{cases}
+        out_i = \begin{cases}
+        & \text{ if } x_{i} = \text{Inf},\ \ True \\
+        & \text{ if } x_{i} \ne \text{Inf},\ \ False
+        \end{cases}
 
     where :math:`Inf` means not a number.
 
@@ -3851,10 +3877,10 @@ class IsFinite(PrimitiveWithInfer):
 
     .. math::
 
-    out_i = \begin{cases}
-      & \text{ if } x_{i} = \text{Finite},\ \ True\  \\
-      & \text{ if } x_{i} \ne  \text{Finite},\ \ False\
-    \end{cases}
+        out_i = \begin{cases}
+          & \text{ if } x_{i} = \text{Finite},\ \ True\  \\
+          & \text{ if } x_{i} \ne \text{Finite},\ \ False
+        \end{cases}
 
     Inputs:
         - **x** (Tensor) - The input tensor.
diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py
index 6c60d2a1d0b..46b6ce6ffd7 100755
--- a/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/ops/operations/nn_ops.py
@@ -2076,6 +2076,7 @@ class Conv2DBackpropInput(Primitive):
         self.init_prim_io_names(inputs=['out_backprop', 'filter', 'input_sizes'], outputs=['output'])
         self.out_channel = validator.check_positive_int(out_channel, 'out_channel', self.name)
         self.kernel_size = _check_positive_int_or_tuple('kernel_size', kernel_size, self.name)
+        self.add_prim_attr('kernel_size', self.kernel_size)
         self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.name)
         if context.get_context("device_target") != "GPU" and self.format == "NHWC":
             raise ValueError("NHWC format only support in GPU target.")
@@ -2658,6 +2659,53 @@ class SmoothL1Loss(PrimitiveWithInfer):
         return prediction
 
 
+class SoftMarginLoss(Primitive):
+    r"""
+    SoftMarginLoss operation.
+
+    Creates a criterion that optimizes a two-class classification
+    logistic loss between input tensor :math:`x` and target tensor :math:`y`
+    (containing 1 or -1).
+
+    .. math::
+        \text{loss}(x, y) = \sum_i \frac{\log(1 + \exp(-y[i]*x[i]))}{\text{x.nelement}()}
+
+    Args:
+        reduction (str): Apply specific reduction method to the output: 'none', 'mean', 'sum'. Default: "mean".
+
+    Inputs:
+        - **logits** (Tensor) - Predict data. Data type must be float16 or float32.
+        - **labels** (Tensor) - Ground truth data, with the same type and shape as `logits`.
+
+    Outputs:
+        Tensor or Scalar, if `reduction` is "none", its shape is the same as `logits`.
+        Otherwise, a scalar value will be returned.
+
+    Raises:
+        TypeError: If `logits` or `labels` is not a Tensor.
+        TypeError: If dtype of `logits` or `labels` is neither float16 nor float32.
+        ValueError: If shape of `logits` is not the same as `labels`.
+        ValueError: If `reduction` is not one of 'none', 'mean', 'sum'.
+
+    Supported Platforms:
+        ``Ascend``
+
+    Examples:
+        >>> loss = ops.SoftMarginLoss()
+        >>> logits = Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]), mindspore.float32)
+        >>> labels = Tensor(np.array([[-1, 1], [1, -1]]), mindspore.float32)
+        >>> output = loss(logits, labels)
+        >>> print(output)
+        0.6764238
+    """
+
+    @prim_attr_register
+    def __init__(self, reduction="mean"):
+        """Initialize SoftMarginLoss"""
+        self.init_prim_io_names(inputs=['predict', 'label'], outputs=['loss'])
+        self.reduction = validator.check_string(reduction, ['none', 'sum', 'mean'], 'reduction', self.name)
+
+
 class L2Loss(PrimitiveWithInfer):
     """
     Calculates half of the L2 norm of a tensor without using the `sqrt`.
@@ -2678,7 +2726,7 @@ class L2Loss(PrimitiveWithInfer):
         TypeError: If dtype of `input_x` is neither float16 nor float32.
 
     Supported Platforms:
-        ``Ascend`` ``GPU``
+        ``Ascend`` ``GPU`` ``CPU``
 
     Examples
         >>> input_x = Tensor(np.array([1, 2, 3]), mindspore.float16)
@@ -4097,7 +4145,7 @@ class MirrorPad(PrimitiveWithInfer):
         ``Ascend`` ``GPU`` ``CPU``
 
     Examples:
-        # case1: mode="REFLECT"
+        >>> # case1: mode="REFLECT"
         >>> class Net(nn.Cell):
         ...    def __init__(self, mode):
         ...        super(Net, self).__init__()
@@ -8606,7 +8654,6 @@ class SoftShrink(Primitive):
         x + \lambda, & \text{ if } x < -\lambda \\
         0, & \text{ otherwise }
         \end{cases}
-
     Args:
         lambd: the :math:`\lambda` must be no less than zero value for the Softshrink formulation. Default: 0.5.
 
@@ -8640,3 +8687,49 @@ class SoftShrink(Primitive):
         """Initialize SoftShrink"""
         validator.check_value_type("lambd", lambd, [float], self.name)
         validator.check_number("lambd", lambd, 0, Rel.GE, self.name)
+
+
+class HShrink(Primitive):
+    r"""
+    Applies the hard shrinkage function element-wise, each element complies the follow function:
+
+    .. math::
+        \text{HardShrink}(x) =
+        \begin{cases}
+        x, & \text{ if } x > \lambda \\
+        x, & \text{ if } x < -\lambda \\
+        0, & \text{ otherwise }
+        \end{cases}
+
+    Args:
+        lambd (float): The value for the HardShrink formulation. Default: 0.5
+
+    Inputs:
+        - **input_x** (Tensor) - The input of HardShrink with data type of float16 or float32.
+
+    Outputs:
+        Tensor, the same shape and data type as the input.
+
+    Supported Platforms:
+        ``Ascend``
+
+    Raises:
+        TypeError: If `lambd` is not a float.
+        TypeError: If dtype of `input_x` is neither float16 nor float32.
+
+    Examples:
+        >>> input_x = Tensor(np.array([[ 0.5,  1,  2.0],[0.0533,0.0776,-2.1233]]),mstype.float32)
+        >>> hshrink = P.HShrink()
+        >>> output = hshrink(input_x)
+        >>> print(output)
+        [[ 0.      1.      2.    ]
+        [ 0.      0.     -2.1233]]
+    """
+
+    @prim_attr_register
+    def __init__(self, lambd=0.5):
+        """Initialize HShrink"""
+        validator.check_value_type('lambd', lambd, [float], self.name)
+        if lambd < 0.0:
+            lambd = 0.0
+            self.add_prim_attr('lambd', lambd)
diff --git a/mindspore/ops/operations/other_ops.py b/mindspore/ops/operations/other_ops.py
index 9c44f386a09..4e746ac04a7 100644
--- a/mindspore/ops/operations/other_ops.py
+++ b/mindspore/ops/operations/other_ops.py
@@ -291,8 +291,7 @@ class CheckValid(PrimitiveWithInfer):
         >>> import mindspore
         >>> import mindspore.nn as nn
         >>> import numpy as np
-        >>> from mindspore import Tensor
-        >>> from mindspore.ops import operations as ops
+        >>> from mindspore import Tensor, ops
         >>> class Net(nn.Cell):
         ...     def __init__(self):
         ...         super(Net, self).__init__()
diff --git a/mindspore/ops/operations/sponge_ops.py b/mindspore/ops/operations/sponge_ops.py
index af6fff4e60c..8e9773ba5af 100644
--- a/mindspore/ops/operations/sponge_ops.py
+++ b/mindspore/ops/operations/sponge_ops.py
@@ -1950,95 +1950,6 @@ class Dihedral14CFAtomEnergy(PrimitiveWithInfer):
         return charge_dtype
 
 
-class MDIterationLeapFrog(PrimitiveWithInfer):
-    """
-    One step of classical leap frog algorithm to solve the finite difference
-    Hamiltonian equations of motion for certain system, using Langevin dynamics
-    with Liu's thermostat scheme. Assume the number of atoms is n and the target
-    control temperature is T.
-
-    Detailed iteration formula can be found in this paper: A unified thermostat
-    scheme for efficient configurational sampling for classical/quantum canonical
-    ensembles via molecular dynamics. DOI: 10.1063/1.4991621.
-
-    Because there is a large amount of inputs and each of them are related,
-    there is no way to construct `Examples` using random methods. For details, refer the webpage `SPONGE in MindSpore
-    <https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/hpc/sponge>`_.
-
-    Args:
-        float4_numbers(int32): total length to store random numbers.
-        atom_numbers(int32): the number of atoms n.
-        dt(float32): time step for finite difference.
-        half_dt(float32): half of time step for finite difference.
-        exp_gamma(float32): parameter in Liu's dynamic, equals exp(-gamma_ln * dt),
-                            where gamma_ln is the firction factor in Langvin dynamics.
-        max_velocity(float32): the upper limit of velocity, when the veclocity overflows,
-                               scale it to the upper limit.
-        is_max_velocity(int32): whether the max velocity control is open or not.
-
-    Inputs:
-        - **mass_inverse** (Tensor) - The inverse value of mass of each atom.
-          The data type is float32 and the shape is :math:`(n,]`
-        - **sqrt_mass** (Tensor) - The inverse square root value
-          of effect mass in Liu's dynamics of each atom. The data type is float32 and the shape is :math:`(n,]`
-
-    Outputs:
-        - **vel** (Tensor) - The velocity of each atom.
-          The data type is float32 and the shape is :math:`(n, 3]`
-        - **crd** (Tensor) - The coordinate of each atom.
-          The data type is float32 and the shape is :math:`(n, 3]`
-        - **frc** (Tensor) - The force felt by each atom.
-          The data type is float32 and the shape is :math:`(n, 3]`
-        - **acc** (Tensor) - The acceleration of each atom.
-          The data type is float32 and the shape is :math:`(n, 3]`
-
-    Supported Platforms:
-        ``GPU``
-    """
-
-    @prim_attr_register
-    def __init__(self, float4_numbers, atom_numbers, half_dt, dt, exp_gamma, is_max_velocity, max_velocity):
-        """Initialize MDIterationLeapFrog."""
-        validator.check_value_type('float4_numbers', float4_numbers, int, self.name)
-        validator.check_value_type('atom_numbers', atom_numbers, int, self.name)
-        validator.check_value_type('half_dt', half_dt, float, self.name)
-        validator.check_value_type('dt', dt, float, self.name)
-        validator.check_value_type('exp_gamma', exp_gamma, float, self.name)
-        validator.check_value_type('is_max_velocity', is_max_velocity, int, self.name)
-        validator.check_value_type('max_velocity', max_velocity, float, self.name)
-        self.float4_numbers = float4_numbers
-        self.atom_numbers = atom_numbers
-        self.half_dt = half_dt
-        self.dt = dt
-        self.exp_gamma = exp_gamma
-        self.is_max_velocity = is_max_velocity
-        self.max_velocity = max_velocity
-
-        self.init_prim_io_names(
-            inputs=['mass_inverse', 'sqrt_mass'],
-            outputs=['vel', 'crd', 'frc', 'acc'])
-        self.add_prim_attr('float4_numbers', self.float4_numbers)
-        self.add_prim_attr('atom_numbers', self.atom_numbers)
-        self.add_prim_attr('half_dt', self.half_dt)
-        self.add_prim_attr('dt', self.dt)
-        self.add_prim_attr('exp_gamma', self.exp_gamma)
-        self.add_prim_attr('is_max_velocity', self.is_max_velocity)
-        self.add_prim_attr('max_velocity', self.max_velocity)
-
-    def infer_shape(self, mass_inverse_shape, sqrt_mass_shape):
-        cls_name = self.name
-        n = self.atom_numbers
-        validator.check_int(mass_inverse_shape[0], n, Rel.EQ, "mass_inverse", cls_name)
-        validator.check_int(sqrt_mass_shape[0], n, Rel.EQ, "sqrt_mass", cls_name)
-        return [self.atom_numbers, 3], [self.atom_numbers, 3], [self.atom_numbers, 3], [self.atom_numbers, 3]
-
-    def infer_dtype(self, mass_inverse_dtype, sqrt_mass_dtype):
-        validator.check_tensor_dtype_valid('mass_inverse', mass_inverse_dtype, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('sqrt_mass', sqrt_mass_dtype, [mstype.float32], self.name)
-
-        return mass_inverse_dtype, mass_inverse_dtype, mass_inverse_dtype, mass_inverse_dtype
-
-
 class PMEReciprocalForce(PrimitiveWithInfer):
     """
     Calculate the reciprocal part of long-range Coulumb force using
@@ -2710,219 +2621,6 @@ class MDTemperature(PrimitiveWithInfer):
         validator.check_tensor_dtype_valid('atom_mass', atom_mass_dtype, [mstype.float32], self.name)
         return atom_mass_dtype
 
-
-class NeighborListUpdate(PrimitiveWithInfer):
-    """
-    Update (or construct if first time) the Verlet neighbor list for the
-    calculation of short-ranged force. Assume the number of atoms is n,
-    the number of grids divided is G, the maximum number of atoms in one
-    grid is m, the maximum number of atoms in single atom's neighbor list
-    is L, and the number of total atom in excluded list is E.
-
-    Because there is a large amount of inputs and each of them are related,
-    there is no way to construct `Examples` using random methods. For details, refer the webpage `SPONGE in MindSpore
-    <https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/hpc/sponge>`_.
-
-    Args:
-        grid_numbers(int32): the total number of grids divided.
-        not_first_time(int32): whether to construct the neighbor
-          list first time or not.
-        nxy(int32): the total number of grids divided in xy plane.
-        excluded_atom_numbers(int32): the total atom numbers in the excluded list.
-        cutoff(float32): the cutoff distance for short-range force calculation. Default: 10.0.
-        skin(float32): the overflow value of cutoff to maintain a neighbor list. Default: 2.0.
-        cutoff_square(float32): the square value of cutoff.
-        half_skin_square(float32): skin*skin/4, indicates the maximum
-          square value of the distance atom allowed to move between two updates.
-        cutoff_with_skin(float32): cutoff + skin, indicates the
-          radius of the neighbor list for each atom.
-        half_cutoff_with_skin(float32): cutoff_with_skin/2.
-        cutoff_with_skin_square(float32): the square value of cutoff_with_skin.
-        refresh_interval(int32): the number of iteration steps between two updates of neighbor list. Default: 20.
-        max_atom_in_grid_numbers(int32): the maximum number of atoms in one grid. Default: 64.
-        max_neighbor_numbers(int32): The maximum number of neighbors. Default: 800.
-
-    Inputs:
-        - **atom_numbers_in_grid_bucket** (Tensor) - The number of atoms in each grid bucket.
-          The data type is int32 and the shape is :math:`(G,)`.
-        - **bucket** (Tensor) - The atom indices in each grid bucket.
-          The data type is int32 and the shape is :math:`(G, m)`.
-        - **crd** (Tensor) - The coordinates of each atom.
-          The data type is float32 and the shape is :math:`(n,)`.
-        - **box_length** (Tensor) - The length of 3 dimensions of the simulation box.
-          The data type is float32 and the shape is :math:`(3,)`.
-        - **grid_n** (Tensor) - The number of grids divided of 3 dimensions of the simulation box.
-          The data type is float32 and the shape is :math:`(3,)`.
-        - **grid_length_inverse** (Scalar) - the inverse value of grid length.
-          The data type is float32 and the shape is :math:`(n,)`.
-        - **atom_in_grid_serial** (Tensor) - The grid index for each atom.
-          The data type is int32 and the shape is :math:`(n,)`.
-        - **old_crd** (Tensor) - The coordinates before update of each atom.
-          The data type is float32 and the shape is :math:`(n, 3)`.
-        - **crd_to_uint_crd_cof** (Tensor) - The scale factor
-          between the unsigned int value and the real space coordinates.
-          The data type is float32 and the shape is :math:`(3,)`.
-        - **uint_crd** (Tensor) - The unsigned int coordinates value fo each atom.
-          The data type is uint32 and the shape is :math:`(n, 3)`.
-        - **gpointer** (Tensor) - The 125 nearest neighbor grids (including self) of each grid.
-          G is the number of nearest neighbor grids. The data type is int32 and the shape is :math:`(G, 125)`.
-        - **nl_atom_numbers** (Tensor) - The number of atoms in neighbor list of each atom.
-          The data type is int32 and the shape is :math:`(n,)`.
-        - **nl_atom_serial** (Tensor) - The indices of atoms in neighbor list of each atom.
-          The data type is int32 and the shape is :math:`(n, L)`.
-        - **uint_dr_to_dr_cof** (Tensor) - The scale factor between
-          the real space coordinates and the unsigned int value. The data type is float32 and the shape is :math:`(3,)`.
-        - **excluded_list_start** (Tensor) - The start excluded index in excluded list for each atom.
-          The data type is int32 and the shape is :math:`(n,)`.
-        - **excluded_numbers** (Tensor) - The number of atom excluded in excluded list for each atom.
-          The data type is int32 and the shape is :math:`(n,)`.
-        - **excluded_list** (Tensor) - The contiguous join of excluded list of each atom.
-          The data type is int32 and the shape is :math:`(E,)`.
-        - **need_refresh_flag** (Tensor) - Whether the neighbor list of each atom need update or not.
-          The data type is int32 and the shape is :math:`(n,)`.
-        - **refresh_count** (Tensor) - Count how many iteration steps have passed since last update.
-          The data type is int32 and the shape is :math:`(n,)`.
-
-    Outputs:
-        - **res** (Scalar)
-          The data type is float32.
-
-    Supported Platforms:
-        ``GPU``
-    """
-
-    @prim_attr_register
-    def __init__(self, grid_numbers, atom_numbers, not_first_time, nxy, excluded_atom_numbers,
-                 cutoff_square, half_skin_square, cutoff_with_skin, half_cutoff_with_skin, cutoff_with_skin_square,
-                 refresh_interval=20, cutoff=10.0, skin=2.0, max_atom_in_grid_numbers=64, max_neighbor_numbers=800):
-        """Initialize NeighborListUpdate."""
-        self.grid_numbers = grid_numbers
-        self.atom_numbers = atom_numbers
-        self.refresh_interval = refresh_interval
-        self.not_first_time = not_first_time
-        self.cutoff = cutoff
-        self.skin = skin
-        self.max_atom_in_grid_numbers = max_atom_in_grid_numbers
-        self.nxy = nxy
-        self.excluded_atom_numbers = excluded_atom_numbers
-        self.cutoff_square = cutoff_square
-        self.half_skin_square = half_skin_square
-        self.cutoff_with_skin = cutoff_with_skin
-        self.half_cutoff_with_skin = half_cutoff_with_skin
-        self.cutoff_with_skin_square = cutoff_with_skin_square
-        self.max_neighbor_numbers = max_neighbor_numbers
-        self.init_prim_io_names(
-            inputs=['atom_numbers_in_grid_bucket', 'bucket', 'crd', 'box_length', 'grid_n', 'grid_length_inverse',
-                    'atom_in_grid_serial', 'old_crd', 'crd_to_uint_crd_cof', 'uint_crd', 'gpointer', 'nl_atom_numbers',
-                    'nl_atom_serial', 'uint_dr_to_dr_cof', 'excluded_list_start', 'excluded_list', 'excluded_numbers',
-                    'need_refresh_flag', 'refresh_count'], outputs=['res'])
-
-        self.add_prim_attr('grid_numbers', self.grid_numbers)
-        self.add_prim_attr('atom_numbers', self.atom_numbers)
-        self.add_prim_attr('refresh_interval', self.refresh_interval)
-        self.add_prim_attr('not_first_time', self.not_first_time)
-        self.add_prim_attr('cutoff', self.cutoff)
-        self.add_prim_attr('skin', self.skin)
-        self.add_prim_attr('max_atom_in_grid_numbers', self.max_atom_in_grid_numbers)
-        self.add_prim_attr('nxy', self.nxy)
-        self.add_prim_attr('excluded_atom_numbers', self.excluded_atom_numbers)
-        self.add_prim_attr('cutoff_square', self.cutoff_square)
-        self.add_prim_attr('half_skin_square', self.half_skin_square)
-        self.add_prim_attr('cutoff_with_skin', self.cutoff_with_skin)
-        self.add_prim_attr('half_cutoff_with_skin', self.half_cutoff_with_skin)
-        self.add_prim_attr('cutoff_with_skin_square', self.cutoff_with_skin_square)
-
-    def infer_shape(self, atom_numbers_in_grid_bucket_shape, bucket_shape, crd_shape, box_length_shape, grid_n_shape,
-                    grid_length_inverse_shape, atom_in_grid_serial_shape, old_crd_shape, crd_to_uint_crd_cof_shape,
-                    uint_crd_shape, gpointer_shape, nl_atom_numbers_shape, nl_atom_serial_shape,
-                    uint_dr_to_dr_cof_shape, excluded_list_start_shape, excluded_list_shape, excluded_numbers_shape,
-                    need_refresh_flag_shape, refresh_count_shape):
-        validator.check_int(len(atom_numbers_in_grid_bucket_shape), 1, Rel.EQ,
-                            "atom_numbers_in_grid_bucket_dim", self.name)
-        validator.check_int(len(bucket_shape), 2, Rel.EQ, "bucket_dim", self.name)
-        validator.check_int(len(crd_shape), 2, Rel.EQ, "crd_dim", self.name)
-        validator.check_int(len(box_length_shape), 1, Rel.EQ, "box_length_dim", self.name)
-        validator.check_int(len(grid_n_shape), 1, Rel.EQ, "grid_n_dim", self.name)
-        validator.check_int(len(grid_length_inverse_shape), 1, Rel.EQ, "grid_length_inverse_dim", self.name)
-        validator.check_int(len(atom_in_grid_serial_shape), 1, Rel.EQ, "atom_in_grid_serial_dim", self.name)
-        validator.check_int(len(old_crd_shape), 2, Rel.EQ, "old_crd_dim", self.name)
-        validator.check_int(len(crd_to_uint_crd_cof_shape), 1, Rel.EQ, "crd_to_uint_crd_cof_dim", self.name)
-        validator.check_int(len(uint_crd_shape), 2, Rel.EQ, "uint_crd_dim", self.name)
-        validator.check_int(len(gpointer_shape), 2, Rel.EQ, "gpointer_dim", self.name)
-        validator.check_int(len(nl_atom_numbers_shape), 1, Rel.EQ, "nl_atom_numbers_dim", self.name)
-        validator.check_int(len(nl_atom_serial_shape), 2, Rel.EQ, "nl_atom_serial_dim", self.name)
-        validator.check_int(len(uint_dr_to_dr_cof_shape), 1, Rel.EQ, "uint_dr_to_dr_cof_dim", self.name)
-        validator.check_int(len(excluded_list_start_shape), 1, Rel.EQ, "excluded_list_start_dim", self.name)
-        validator.check_int(len(excluded_list_shape), 1, Rel.EQ, "excluded_list_dim", self.name)
-        validator.check_int(len(excluded_numbers_shape), 1, Rel.EQ, "excluded_numbers_dim", self.name)
-        validator.check_int(len(need_refresh_flag_shape), 1, Rel.EQ, "need_refresh_flag_dim", self.name)
-
-        validator.check_int(atom_numbers_in_grid_bucket_shape[0], self.grid_numbers, Rel.EQ,
-                            "atom_numbers_in_grid_bucket", self.name)
-        validator.check_int(bucket_shape[0], self.grid_numbers, Rel.EQ, "bucket", self.name)
-        validator.check_int(bucket_shape[1], self.max_atom_in_grid_numbers, Rel.EQ, "bucket", self.name)
-        validator.check_int(crd_shape[0], self.atom_numbers, Rel.EQ, "crd", self.name)
-        validator.check_int(crd_shape[1], 3, Rel.EQ, "crd", self.name)
-        validator.check_int(box_length_shape[0], 3, Rel.EQ, "box_length", self.name)
-        validator.check_int(grid_n_shape[0], 3, Rel.EQ, "grid_n", self.name)
-        validator.check_int(grid_length_inverse_shape[0], 3, Rel.EQ, "grid_length_inverse", self.name)
-        validator.check_int(atom_in_grid_serial_shape[0], self.atom_numbers, Rel.EQ, "atom_in_grid_serial",
-                            self.name)
-        validator.check_int(old_crd_shape[0], self.atom_numbers, Rel.EQ, "old_crd", self.name)
-        validator.check_int(old_crd_shape[1], 3, Rel.EQ, "old_crd", self.name)
-        validator.check_int(crd_to_uint_crd_cof_shape[0], 3, Rel.EQ, "crd_to_uint_crd_cof", self.name)
-        validator.check_int(uint_crd_shape[0], self.atom_numbers, Rel.EQ, "uint_crd", self.name)
-        validator.check_int(uint_crd_shape[1], 3, Rel.EQ, "uint_crd", self.name)
-        validator.check_int(gpointer_shape[0], self.grid_numbers, Rel.EQ, "gpointer", self.name)
-        validator.check_int(gpointer_shape[1], 125, Rel.EQ, "gpointer", self.name)
-        validator.check_int(nl_atom_numbers_shape[0], self.atom_numbers, Rel.EQ, "nl_atom_numbers", self.name)
-        validator.check_int(nl_atom_serial_shape[0], self.atom_numbers, Rel.EQ, "nl_atom_serial", self.name)
-        validator.check_int(nl_atom_serial_shape[1], self.max_neighbor_numbers, Rel.EQ, "nl_atom_serial",
-                            self.name)
-        validator.check_int(uint_dr_to_dr_cof_shape[0], 3, Rel.EQ, "uint_dr_to_dr_cof", self.name)
-        validator.check_int(excluded_list_start_shape[0], self.atom_numbers, Rel.EQ, "excluded_list_start",
-                            self.name)
-        validator.check_int(excluded_list_shape[0], self.excluded_atom_numbers, Rel.EQ, "excluded_list",
-                            self.name)
-        validator.check_int(excluded_numbers_shape[0], self.atom_numbers, Rel.EQ, "excluded_numbers", self.name)
-        validator.check_int(need_refresh_flag_shape[0], 1, Rel.EQ, "need_refresh_flag", self.name)
-
-        return [1,]
-
-    def infer_dtype(self, atom_numbers_in_grid_bucket_dtype, bucket_dtype, crd_dtype, box_length_dtype, grid_n_dtype,
-                    grid_length_inverse_dtype, atom_in_grid_serial_dtype, old_crd_dtype, crd_to_uint_crd_cof_dtype,
-                    uint_crd_dtype, gpointer_dtype, nl_atom_numbers_dtype, nl_atom_serial_dtype,
-                    uint_dr_to_dr_cof_dtype, excluded_list_start_dtype, excluded_list_dtype, excluded_numbers_dtype,
-                    need_refresh_flag_dtype, refresh_count_dtype):
-        validator.check_tensor_dtype_valid('atom_numbers_in_grid_bucket', atom_numbers_in_grid_bucket_dtype,
-                                           [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('bucket', bucket_dtype, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('crd', crd_dtype, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('box_length', box_length_dtype, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('grid_n', grid_n_dtype, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('grid_length_inverse', grid_length_inverse_dtype, [mstype.float32],
-                                           self.name)
-        validator.check_tensor_dtype_valid('atom_in_grid_serial', atom_in_grid_serial_dtype, [mstype.int32],
-                                           self.name)
-        validator.check_tensor_dtype_valid('old_crd', old_crd_dtype, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('crd_to_uint_crd_cof', crd_to_uint_crd_cof_dtype, [mstype.float32],
-                                           self.name)
-        validator.check_tensor_dtype_valid('uint_crd', uint_crd_dtype, [mstype.uint32], self.name)
-        validator.check_tensor_dtype_valid('gpointer', gpointer_dtype, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('nl_atom_numbers', nl_atom_numbers_dtype, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('nl_atom_serial', nl_atom_serial_dtype, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('uint_dr_to_dr_cof', uint_dr_to_dr_cof_dtype, [mstype.float32],
-                                           self.name)
-        validator.check_tensor_dtype_valid('excluded_list_start', excluded_list_start_dtype, [mstype.int32],
-                                           self.name)
-        validator.check_tensor_dtype_valid('excluded_list', excluded_list_dtype, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('excluded_numbers', excluded_numbers_dtype, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('need_refresh_flag', need_refresh_flag_dtype, [mstype.int32],
-                                           self.name)
-
-        return mstype.float32
-
-
 class MDIterationLeapFrogWithRF(PrimitiveWithInfer):
     """
     One step of classical leap frog algorithm to solve the finite difference
diff --git a/mindspore/ops/operations/sponge_update_ops.py b/mindspore/ops/operations/sponge_update_ops.py
index 85f6b33e848..1c1be718ef5 100644
--- a/mindspore/ops/operations/sponge_update_ops.py
+++ b/mindspore/ops/operations/sponge_update_ops.py
@@ -998,7 +998,7 @@ class MapCenterOfMass(PrimitiveWithInfer):
         return mstype.float32
 
 
-class NeighborListUpdateNew(PrimitiveWithInfer):
+class NeighborListUpdate(PrimitiveWithInfer):
     """
     Update (or construct if first time) the Verlet neighbor list for the
     calculation of short-ranged force. Assume the number of atoms is n,
diff --git a/mindspore/ops/primitive.py b/mindspore/ops/primitive.py
index b47752b753e..2c94d657245 100644
--- a/mindspore/ops/primitive.py
+++ b/mindspore/ops/primitive.py
@@ -101,8 +101,8 @@ class Primitive(Primitive_):
             value (Any): Attribute value.
 
         Examples:
-            >>> import mindspore.ops as P
-            >>> a = P.Add()
+            >>> import mindspore.ops as ops
+            >>> a = ops.Add()
             >>> a = a.add_prim_attr("attr",1)
             >>> out = a.attrs["attr"]
             >>> print(out)
@@ -120,8 +120,8 @@ class Primitive(Primitive_):
         Args:
             name (str): Attribute Name.
         Examples:
-            >>> import mindspore.ops as P
-            >>> a = P.Add()
+            >>> import mindspore.ops as ops
+            >>> a = ops.Add()
             >>> a = a.add_prim_attr("attr",1)
             >>> a = a.del_prim_attr("attr")
             >>> print(a.attrs)
@@ -143,8 +143,8 @@ class Primitive(Primitive_):
         Args:
             stage (int): The stage id for the current operation.
         Examples:
-            >>> from mindspore.ops import operations as P
-            >>> add = P.Add()
+            >>> from mindspore.ops as ops
+            >>> add = ops.Add()
             >>> print(add.set_stage(0))
             Prim[Add]<stage=0>
         """
@@ -162,8 +162,8 @@ class Primitive(Primitive_):
         Args:
             strategy (tuple): Strategy describes the distributed parallel mode of the current primitive.
         Examples:
-            >>> from mindspore.ops import operations as P
-            >>> add = P.Add()
+            >>> from mindspore.ops as ops
+            >>> add = ops.Add()
             >>> print(add.shard(((1, 1), (1, 1))))
             Prim[Add]<strategy=((1, 1), (1, 1))>
         """
@@ -190,8 +190,8 @@ class Primitive(Primitive_):
         Args:
             instance_name (str): Instance name of primitive operator set by user.
         Examples:
-            >>> import mindspore.ops as P
-            >>> a = P.Add()
+            >>> import mindspore.ops as ops
+            >>> a = ops.Add()
             >>> a.set_prim_instance_name("add")
             >>> print(a.instance_name)
             add
@@ -270,8 +270,8 @@ class Primitive(Primitive_):
             inputs (list[str]): list of inputs names.
             outputs (list[str]): list of outputs names.
         Examples:
-            >>> import mindspore.ops as P
-            >>> a = P.Add()
+            >>> import mindspore.ops as ops
+            >>> a = ops.Add()
             >>> a.init_prim_io_names(["x","y"],["sum"])
             >>> print(a.input_names)
             ['x','y']
@@ -631,14 +631,14 @@ def constexpr(fn=None, get_instance=True, name=None):
         >>> def tuple_len(x):
         ...     return len(x)
         ...
-        >>> tuple_len(a)
+        >>> print(tuple_len(a))
         2
         >>> # make an operator class to calculate tuple len
         >>> @constexpr(get_instance=False, name="TupleLen")
         >>> def tuple_len_class(x):
         ...     return len(x)
         ...
-        >>> tuple_len_class()(a)
+        >>> print(tuple_len_class()(a))
         2
     """
 
diff --git a/mindspore/profiler/common/exceptions/error_code.py b/mindspore/profiler/common/exceptions/error_code.py
index 0514f52dab2..a14d8cbba1b 100644
--- a/mindspore/profiler/common/exceptions/error_code.py
+++ b/mindspore/profiler/common/exceptions/error_code.py
@@ -15,7 +15,6 @@
 """Profiler error code and messages."""
 from enum import unique, Enum
 
-
 _GENERAL_MASK = 0b00001 << 7
 _PARSER_MASK = 0b00010 << 7
 _ANALYSER_MASK = 0b00011 << 7
@@ -24,6 +23,7 @@ _ANALYSER_MASK = 0b00011 << 7
 class ProfilerMgrErrors(Enum):
     """Enum definition for profiler errors"""
 
+
 @unique
 class ProfilerErrors(ProfilerMgrErrors):
     """Profiler error codes."""
@@ -53,8 +53,6 @@ class ProfilerErrors(ProfilerMgrErrors):
     PIPELINE_OP_NOT_EXIST_ERROR = 8 | _ANALYSER_MASK
 
 
-
-
 @unique
 class ProfilerErrorMsg(Enum):
     """Profiler error messages."""
diff --git a/mindspore/profiler/common/exceptions/exceptions.py b/mindspore/profiler/common/exceptions/exceptions.py
index d5821d59540..f999fbf8730 100644
--- a/mindspore/profiler/common/exceptions/exceptions.py
+++ b/mindspore/profiler/common/exceptions/exceptions.py
@@ -46,7 +46,6 @@ class ProfilerException(Exception):
         self.message = message
         self.http_code = http_code
 
-
     @property
     def error_code(self):
         """
diff --git a/mindspore/profiler/parser/aicpu_data_parser.py b/mindspore/profiler/parser/aicpu_data_parser.py
index 3f6796f66c6..aee9e2a3307 100644
--- a/mindspore/profiler/parser/aicpu_data_parser.py
+++ b/mindspore/profiler/parser/aicpu_data_parser.py
@@ -45,9 +45,10 @@ class DataPreProcessParser:
         self._source_file_name = self._get_source_file()
         self._ms_kernel_flag = 3
         self._other_kernel_flag = 6
-        self._thread_flag = 7
         self._ms_kernel_run_end_index = 2
         self._other_kernel_run_end_index = 5
+        self._dispatch_time_index = 5
+        self._total_time_index = 6
         self._result_list = []
         self._min_cycle_counter = float('inf')
 
@@ -66,10 +67,10 @@ class DataPreProcessParser:
     def _get_kernel_result(self, number, node_list, thread_list):
         """Get the profiling data form different aicpu kernel"""
         try:
-            if len(node_list) == self._ms_kernel_flag and len(thread_list) == self._thread_flag:
+            if len(node_list) == self._ms_kernel_flag:
                 node_type_name = node_list[0].split(':')[-1]
                 run_end_index = self._ms_kernel_run_end_index
-            elif len(node_list) == self._other_kernel_flag and len(thread_list) == self._thread_flag:
+            elif len(node_list) == self._other_kernel_flag:
                 node_type_name = node_list[0].split(':')[-1].split('/')[-1].split('-')[0]
                 run_end_index = self._other_kernel_run_end_index
             else:
@@ -82,8 +83,8 @@ class DataPreProcessParser:
             run_start = node_list[1].split(':')[-1].split(' ')[0]
             run_end = node_list[run_end_index].split(':')[-1].split(' ')[0]
             exe_time = (float(run_end) - float(run_start)) / self._ms_unit
-            total_time = float(thread_list[-1].split('=')[-1].split()[0]) / self._ms_unit
-            dispatch_time = float(thread_list[-2].split('=')[-1].split()[0]) / self._ms_unit
+            total_time = float(thread_list[self._total_time_index].split('=')[-1].split()[0]) / self._ms_unit
+            dispatch_time = float(thread_list[self._dispatch_time_index].split('=')[-1].split()[0]) / self._ms_unit
 
             return [number, node_type_name, total_time, dispatch_time, exe_time,
                     run_start_counter, run_end_counter]
diff --git a/mindspore/profiler/parser/container.py b/mindspore/profiler/parser/container.py
index 476545dd6d1..a96e1b365bf 100644
--- a/mindspore/profiler/parser/container.py
+++ b/mindspore/profiler/parser/container.py
@@ -23,6 +23,7 @@ class HWTSContainer:
     Args:
         split_list (list): The split list of metadata in HWTS output file.
     """
+
     def __init__(self, split_list):
         self._op_name = ''
         self._duration = None
@@ -79,6 +80,7 @@ class TimelineContainer:
     Args:
         split_list (list): The split list of metadata in op_compute output file.
     """
+
     def __init__(self, split_list):
         self._op_name = split_list[0]
         self._stream_id = str(split_list[1])
@@ -121,6 +123,7 @@ class MemoryGraph:
     Args:
         graph_proto (proto): Graph proto, defined in profiler module.
     """
+
     def __init__(self, graph_proto):
         self._graph_proto = graph_proto
         self.graph_id = graph_proto.graph_id
@@ -153,6 +156,7 @@ class MemoryNode:
     Args:
         node_proto (proto): Node proto.
     """
+
     def __init__(self, node_proto):
         self._node_proto = node_proto
         self.node_id = node_proto.node_id
@@ -192,6 +196,7 @@ class MemoryTensor:
     Args:
         tensor_proto (proto): Tensor proto.
     """
+
     def __init__(self, tensor_proto):
         self._tensor_proto = tensor_proto
         self.tensor_id = tensor_proto.tensor_id
diff --git a/mindspore/profiler/parser/flops_parser.py b/mindspore/profiler/parser/flops_parser.py
index 3d9f3b2441c..f779ba8678d 100644
--- a/mindspore/profiler/parser/flops_parser.py
+++ b/mindspore/profiler/parser/flops_parser.py
@@ -83,6 +83,10 @@ class FlopsParser:
             op_avg_time = op_avg_time_dict[op_name]
             # Time unit of op_avg_time is ms.
             # The unit of gflop_per_second is GFLOPS(1e9).
+            if float(op_avg_time) == 0.0:
+                raise ValueError("All operators take 0 ms.")
+            if peak_flops == 0:
+                raise ValueError("The frequency of an operator is 0.")
             gflop_per_second = task_fops / float(op_avg_time)
             flops_utilization = (gflop_per_second * 1e9 / peak_flops) * 100
             self._flops_summary['FLOPs'] += task_fops
@@ -170,9 +174,9 @@ class FlopsParser:
         # These formula is provided by HISI profiling.
         # a cube_fp16 instruction has (16**3)*2 float point operation.
         # a cube_fp16 instruction has 16*16*32*2 float point operation.
-        cube_fops = cube_fp16_exec*(16**3)*2 + cube_int8_exec*16*16*32*2
-        vec_fops = vec_fp32*32 + vec_fp16_128lane_exec*128 + \
-                   vec_fp16_64lane_exec*64 + vec_int32_exec*64 + vec_misc_exec*32
+        cube_fops = cube_fp16_exec * (16 ** 3) * 2 + cube_int8_exec * 16 * 16 * 32 * 2
+        vec_fops = vec_fp32 * 32 + vec_fp16_128lane_exec * 128 + \
+                   vec_fp16_64lane_exec * 64 + vec_int32_exec * 64 + vec_misc_exec * 32
         task_fops = cube_fops + vec_fops
 
         return task_fops
@@ -231,14 +235,14 @@ class FlopsParser:
             suffix_name = "(recompute_Gradients)"
         else:
             suffix_name = f"({top_level_scope})"
-        scope_list = list(map(lambda x: x+suffix_name, scope_list))
+        scope_list = list(map(lambda x: x + suffix_name, scope_list))
         scope_list[0] = top_level_scope
 
         # Add root node (refers to total flops).
         scope_list.insert(0, "Total")
         scope_depth = len(scope_list)
         for idx in range(scope_depth - 1):
-            key_name = scope_list[idx] + " " + scope_list[idx+1]
+            key_name = scope_list[idx] + " " + scope_list[idx + 1]
             self._flops_each_scope.setdefault(key_name, 0)
             self._flops_each_scope[key_name] += task_fops
 
diff --git a/mindspore/profiler/parser/hccl_parser.py b/mindspore/profiler/parser/hccl_parser.py
index 3077d04681d..d83db58271d 100644
--- a/mindspore/profiler/parser/hccl_parser.py
+++ b/mindspore/profiler/parser/hccl_parser.py
@@ -157,7 +157,7 @@ class HcclParser:
             csv_reader = csv.reader(src_file)
             # index_0:step_num, index_1:start_point, index_2:end_point
             # The unit of time stamp is 10ns. To convert it to μs, you need to divide it by 100.
-            step_timestamps_info = [[info[0], float(info[1])/100, float(info[2])/100]
+            step_timestamps_info = [[info[0], float(info[1]) / 100, float(info[2]) / 100]
                                     for info in csv_reader if info[0].isdigit()]
 
         return step_timestamps_info
@@ -219,6 +219,7 @@ class HcclParser:
 
     def _calculate_communication_operator_iter_cost(self, file_path):
         """Calculate the time-consuming of communication operator in one execution round."""
+
         def _inner_calculate_communication_operator_iter_cost(events):
             total_notify_wait = self._calculate_notify_wait_time(events)
             # Divide information by src dst rank_id.
@@ -362,7 +363,7 @@ class HcclParser:
         rdma_communication_size = 0
         rdma_communication_wait_time = 0
         start_index = 0
-        end_index = len(trace_event)-1
+        end_index = len(trace_event) - 1
         while start_index < end_index:
             first_task_type = trace_event[start_index].get("args").get("task type")
             if first_task_type == CommunicationInfo.RDMASEND.value and start_index < end_index - 1:
@@ -386,10 +387,10 @@ class HcclParser:
         # The unit of rdma_communication_wait_time is ms.
         # The unit of rdma_bandwidth is KB/s.
         # The unit of rdma_communication_size is k_byte and The unit of rdma_communication_time is ms.
-        rdma_communication_wait_time = rdma_communication_wait_time/1e3
-        rdma_communication_size = rdma_communication_size/1e3
-        rdma_communication_time = rdma_communication_time/1e3
-        rdma_bandwidth = rdma_communication_size/(rdma_communication_time/1e3) \
+        rdma_communication_wait_time = rdma_communication_wait_time / 1e3
+        rdma_communication_size = rdma_communication_size / 1e3
+        rdma_communication_time = rdma_communication_time / 1e3
+        rdma_bandwidth = rdma_communication_size / (rdma_communication_time / 1e3) \
             if rdma_communication_size else 0
 
         return [rdma_communication_time, rdma_communication_size, rdma_bandwidth, rdma_communication_wait_time]
@@ -413,9 +414,9 @@ class HcclParser:
 
         # The unit of sdma_bandwidth is KB/s.
         # The unit of sdma_communication_size is k_byte and The unit of sdma_communication_time is ms.
-        sdma_communication_time = sdma_communication_time/1e3
-        sdma_communication_size = sdma_communication_size/1e3
-        sdma_bandwidth = sdma_communication_size/(sdma_communication_time/1e3) \
+        sdma_communication_time = sdma_communication_time / 1e3
+        sdma_communication_size = sdma_communication_size / 1e3
+        sdma_bandwidth = sdma_communication_size / (sdma_communication_time / 1e3) \
             if sdma_communication_size else 0
         return [sdma_communication_time, sdma_communication_size, sdma_bandwidth]
 
@@ -427,7 +428,7 @@ class HcclParser:
             if task_type == CommunicationInfo.NOTIFY_WAIT.value:
                 total_notify_wait_time += item.get("dur", 0)
         # The unit of total_notify_wait_time is ms.
-        total_notify_wait_time = total_notify_wait_time/1e3
+        total_notify_wait_time = total_notify_wait_time / 1e3
         return total_notify_wait_time
 
     def _calculate_communication_average_value(self, communication_info: list):
@@ -436,8 +437,8 @@ class HcclParser:
         if communication_info_size == 0:
             return []
         # index1: communication_cost,index2:wait_cost,index3:link_info
-        communication_cost_average = sum([i[1] for i in communication_info])/communication_info_size
-        wait_cost_average = sum([i[2] for i in communication_info])/communication_info_size
+        communication_cost_average = sum([i[1] for i in communication_info]) / communication_info_size
+        wait_cost_average = sum([i[2] for i in communication_info]) / communication_info_size
         link_info = [i[3] for i in communication_info]
         calculate_type = 'average'
         link_average_info = self._calculate_link_value(link_info, calculate_type)
diff --git a/mindspore/profiler/parser/hwts_log_parser.py b/mindspore/profiler/parser/hwts_log_parser.py
index ff140ec8e3a..76a3471e6b7 100644
--- a/mindspore/profiler/parser/hwts_log_parser.py
+++ b/mindspore/profiler/parser/hwts_log_parser.py
@@ -20,6 +20,7 @@ from mindspore import log as logger
 from mindspore.profiler.common.validator.validate_path import \
     validate_and_normalize_path
 
+
 class HWTSLogParser:
     """
     The Parser for hwts log files.
@@ -112,8 +113,8 @@ class HWTSLogParser:
 
                 if int(task_id) < 25000:
                     task_id = str(stream_id) + "_" + str(task_id)
-                result_data += ("%-14s %-4s %-8s %-9s %-8s %-15s %s\n" %(log_type[int(ms_type, 2)], cnt, core_id,
-                                                                         blk_id, task_id, syscnt, stream_id))
+                result_data += ("%-14s %-4s %-8s %-9s %-8s %-15s %s\n" % (log_type[int(ms_type, 2)], cnt, core_id,
+                                                                          blk_id, task_id, syscnt, stream_id))
 
         fwrite_format(self._output_filename, data_source=self._dst_file_title, is_start=True)
         fwrite_format(self._output_filename, data_source=self._dst_file_column_title)
diff --git a/mindspore/profiler/parser/integrator.py b/mindspore/profiler/parser/integrator.py
index 472441a254a..916e36a4501 100644
--- a/mindspore/profiler/parser/integrator.py
+++ b/mindspore/profiler/parser/integrator.py
@@ -113,6 +113,8 @@ class Integrator:
                 op_type_time_cache[op_type][0] += op_time
                 op_type_time_cache[op_type][1] += 1
 
+        if self._total_time == 0:
+            raise ValueError("The total time of operations can not be 0.")
         op_type_file_name = 'aicore_intermediate_' + self._device_id + '_type.csv'
         op_type_file_path = os.path.join(self._profiling_dir, op_type_file_name)
         with open(op_type_file_path, 'w') as type_file:
@@ -1059,6 +1061,7 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
             framework_info (dict): The framework metadata.
             aicpu_info (dict): The metadata of AI CPU operator.
             min_cycle_counter (float): The minimum cycle counter of the timeline.
+            source_path (str): The source of file.
         """
         if min_cycle_counter == float('inf'):
             min_cycle_counter = 0
diff --git a/mindspore/profiler/parser/memory_usage_parser.py b/mindspore/profiler/parser/memory_usage_parser.py
index cd68a0de3db..2dccb77ad73 100644
--- a/mindspore/profiler/parser/memory_usage_parser.py
+++ b/mindspore/profiler/parser/memory_usage_parser.py
@@ -34,6 +34,7 @@ GIGABYTES = 1024 * 1024 * 1024
 
 class MemoryUsageParser:
     """MemoryUsageParser to parse memory raw data."""
+
     def __init__(self, profiling_dir, device_id):
         self._profiling_dir = profiling_dir
         self._device_id = device_id
@@ -163,6 +164,7 @@ class MemoryUsageParser:
 
 class GraphMemoryParser:
     """Parse memory usage data for each graph."""
+
     def __init__(self, graph_proto, points, framework):
         self.graph = None
         self.nodes = OrderedDict()
@@ -238,7 +240,7 @@ class GraphMemoryParser:
             if index == 0:
                 node.mem_change = self._mem_change[index] - self.graph.static_mem
             else:
-                node.mem_change = self._mem_change[index] - self._mem_change[index-1]
+                node.mem_change = self._mem_change[index] - self._mem_change[index - 1]
 
             self._update_nodes(node)
             self._update_tensor_source(node)
@@ -308,7 +310,7 @@ class GraphMemoryParser:
             elif life_long == 'LifeLongGraphStart':  # lifetime is from graph start to tensor end
                 if life_end is not None and life_end >= 0:
                     tensor.life_start = 0
-                    self._update_mem_change(size, 0, life_end+1, tensor_id)
+                    self._update_mem_change(size, 0, life_end + 1, tensor_id)
                 else:
                     logger.info('Cannot locate lifetime end for tensor: %s', tensor_id)
             elif life_long == 'LifeLongGraphEnd':  # lifetime is from tensor start to graph end
@@ -319,7 +321,7 @@ class GraphMemoryParser:
                     logger.info('Cannot locate lifetime start for tensor: %s', tensor_id)
             elif life_long == 'LifeLongNone':  # lifetime is from tensor start to tensor end
                 if life_start is not None and life_end is not None and life_start <= life_end:
-                    self._update_mem_change(size, life_start, life_end+1, tensor_id)
+                    self._update_mem_change(size, life_start, life_end + 1, tensor_id)
                 else:
                     logger.info('Cannot locate lifetime start or end for tensor: %s', tensor_id)
 
diff --git a/mindspore/profiler/parser/minddata_analyzer.py b/mindspore/profiler/parser/minddata_analyzer.py
index 96352c80fe4..34390da9882 100644
--- a/mindspore/profiler/parser/minddata_analyzer.py
+++ b/mindspore/profiler/parser/minddata_analyzer.py
@@ -304,6 +304,8 @@ class MinddataProfilingAnalyzer:
         if metrics and metrics['output_queue']:
             queue_size = metrics['output_queue']['size']
             queue_length = metrics['output_queue']['length']
+            if queue_length == 0:
+                raise ValueError("The input queue can not be None.")
             queue_average_size = round(sum(queue_size) / len(queue_size), 2) if queue_size else -1
             queue_utilization_pct = round(100 * queue_average_size / queue_length, 2)
             # Compute percentage of time queue is empty
diff --git a/mindspore/profiler/parser/minddata_parser.py b/mindspore/profiler/parser/minddata_parser.py
index a200acc9bcf..805ac5f9906 100644
--- a/mindspore/profiler/parser/minddata_parser.py
+++ b/mindspore/profiler/parser/minddata_parser.py
@@ -20,8 +20,10 @@ from mindspore import log as logger
 from mindspore.profiler.common.validator.validate_path import \
     validate_and_normalize_path
 
+
 class MinddataParser:
     """Minddata Aicpu Parser."""
+
     @staticmethod
     def parse_minddata_aicpu_data(minddata_aicpu_source_path):
         """
diff --git a/mindspore/profiler/parser/minddata_pipeline_parser.py b/mindspore/profiler/parser/minddata_pipeline_parser.py
index d73bfd7c115..94999dbec46 100644
--- a/mindspore/profiler/parser/minddata_pipeline_parser.py
+++ b/mindspore/profiler/parser/minddata_pipeline_parser.py
@@ -262,8 +262,12 @@ class MinddataPipelineParser:
             output_queue = metrics.get('output_queue')
             if output_queue:
                 queue_size = output_queue.get('size')
+                if queue_size is None:
+                    raise ValueError("The queue can not be None.")
                 queue_average_size = sum(queue_size) / len(queue_size)
                 queue_length = output_queue.get('length')
+                if queue_length == 0:
+                    raise ValueError("The length of queue can not be 0.")
                 queue_usage_rate = queue_average_size / queue_length
 
         children_id = op_node.get('children')
diff --git a/mindspore/profiler/parser/optime_parser.py b/mindspore/profiler/parser/optime_parser.py
index 2725d7cc154..bedf25a398a 100644
--- a/mindspore/profiler/parser/optime_parser.py
+++ b/mindspore/profiler/parser/optime_parser.py
@@ -24,6 +24,7 @@ from mindspore.profiler.parser.container import HWTSContainer
 
 TIMELINE_FILE_COLUMN_TITLE = 'op_name, stream_id, start_time(ms), duration(ms)'
 
+
 class OPComputeTimeParser:
     """
     Join hwts info and framework info, get op time info, and output to the result file.
@@ -102,10 +103,12 @@ class OPComputeTimeParser:
         for op_name, time in op_name_time_dict.items():
             if op_name in op_name_stream_dict.keys():
                 stream_id = op_name_stream_dict[op_name]
+                if op_name_count_dict[op_name] == 0:
+                    raise ValueError("The number of operations can not be 0.")
                 avg_time = time / op_name_count_dict[op_name]
                 total_time += avg_time
-                result_data += ("%s %s  %s\n" %(op_name, str(avg_time), stream_id))
-        result_data += ("total op  %s 0" %(str(total_time)))
+                result_data += ("%s %s  %s\n" % (op_name, str(avg_time), stream_id))
+        result_data += ("total op  %s 0" % (str(total_time)))
 
         timeline_data = []
         for op_name, time in op_name_time_dict.items():
@@ -146,8 +149,8 @@ class OPComputeTimeParser:
         Args:
             timeline_data (list): The metadata to be written into the file.
                 [
-                    ['op_name_1', 'stream_id_1', 'start_time_1', 'durarion_1'],
-                    ['op_name_2', 'stream_id_2', 'start_time_2', 'durarion_2'],
+                    ['op_name_1', 'stream_id_1', 'start_time_1', 'duration_1'],
+                    ['op_name_2', 'stream_id_2', 'start_time_2', 'duration_2'],
                     [...]
                 ]
         """
diff --git a/mindspore/profiler/parser/step_trace_parser.py b/mindspore/profiler/parser/step_trace_parser.py
index f1755ba3dd7..185b84779fc 100644
--- a/mindspore/profiler/parser/step_trace_parser.py
+++ b/mindspore/profiler/parser/step_trace_parser.py
@@ -348,12 +348,12 @@ class BaseStepTraceParser:
                 csv_writer = csv.writer(file_handle)
                 if not self._is_training_mode:
                     self._header[FP_DURATION] = 'fp'
-                    self._header = self._header[:BP_POINT] + self._header[BP_POINT+1:TAIL]
+                    self._header = self._header[:BP_POINT] + self._header[BP_POINT + 1:TAIL]
                 csv_writer.writerow(self._header)
                 for row_data in self._result:
                     if not self._is_training_mode:
                         row_data[FP_DURATION] += row_data[TAIL]
-                        row_data = row_data[:BP_POINT] + row_data[BP_POINT+1:TAIL]
+                        row_data = row_data[:BP_POINT] + row_data[BP_POINT + 1:TAIL]
                     csv_writer.writerow(row_data)
             os.chmod(self._output_path, stat.S_IREAD | stat.S_IWRITE)
         except (IOError, OSError) as err:
diff --git a/mindspore/profiler/profiling.py b/mindspore/profiler/profiling.py
index 7442fb9eac1..7f8474ddb03 100644
--- a/mindspore/profiler/profiling.py
+++ b/mindspore/profiler/profiling.py
@@ -47,12 +47,14 @@ from mindspore.nn.cell import Cell
 
 INIT_OP_NAME = 'Default/InitDataSetQueue'
 
+
 class ProfileOption(Enum):
     """
     Profile Option Enum which be used in Profiler.profile.
     """
     trainable_parameters = 0
 
+
 class Profiler:
     """
     Performance profiling API.
@@ -67,9 +69,9 @@ class Profiler:
             and analysed,will deal with all op if null; Different op types should be separated by comma.
         ascend_job_id (str): (Ascend only) The directory where the profiling files to be parsed are located;
             This parameter is used to support offline parsing.
-        profile_communication(bool): Whether to collect communication performance data, collect when True.
-            Default is False.
-        profile_memory(bool): Whether to collect tensor memory data, collect when True.Default is False.
+        profile_communication (bool): Whether to collect communication performance data in a multi devices training.
+            collect when True. Default is False. Setting this parameter has no effect during single device training.
+        profile_memory (bool): Whether to collect tensor memory data, collect when True.Default is False.
 
     Examples:
         >>> import numpy as np
@@ -145,29 +147,7 @@ class Profiler:
             if kwargs:
                 logger.warning("Params not be supported yet on GPU.")
         elif self._device_target and self._device_target == "Ascend":
-            optypes_not_deal = kwargs.pop("optypes_not_deal", "Variable")
-            if not isinstance(optypes_not_deal, str):
-                raise TypeError("The parameter optypes_not_deal must be str.")
-            job_dir = kwargs.pop("ascend_job_id", "")
-            if job_dir:
-                job_dir = validate_and_normalize_path(job_dir)
-                if not os.path.exists(job_dir):
-                    msg = f"Invalid ascend_job_id: {job_dir}, Please pass the absolute path of the JOB dir"
-                    logger.error(msg)
-                    raise ValueError(msg)
-                self._output_path, _ = os.path.split(job_dir)
-            self._profile_communication = kwargs.pop("profile_communication", False)
-            if not isinstance(self._profile_communication, bool):
-                raise TypeError("The parameter profile_communication must be bool.")
-            if self._profile_communication:
-                hccl_option = {"output": self._output_path, "task_trace": "on"}
-                os.environ['PROFILING_OPTIONS'] = json.dumps(hccl_option)
-            self._profile_memory = kwargs.pop("profile_memory", False)
-            if not isinstance(self._profile_memory, bool):
-                raise TypeError("The parameter profile_memory must be bool")
-            if kwargs:
-                logger.warning("There are invalid params which don't work.")
-
+            self._parse_parameter_for_ascend(**kwargs)
             os.environ['DEVICE_ID'] = self._dev_id
 
             profiling_options = json.dumps(self._construct_profiling_options())
@@ -185,7 +165,6 @@ class Profiler:
             if not os.path.exists(data_path):
                 os.makedirs(data_path, exist_ok=True)
 
-            self._filt_optype_names = optypes_not_deal.split(",") if optypes_not_deal else []
             # add job id env through user input later
             self._job_id_env = 0
             self._start_time = int(time.time() * 10000000)
@@ -211,10 +190,46 @@ class Profiler:
             "aic_metrics": "PipeUtilization",
             "aicpu": "on",
             "profile_memory": profile_memory
-            }
+        }
 
         return profiling_options
 
+    def _parse_parameter_for_ascend(self, **kwargs):
+        """Parse parameter in Proflier when the device target is Ascend."""
+        optypes_not_deal = kwargs.pop("optypes_not_deal", "Variable")
+        if not isinstance(optypes_not_deal, str):
+            raise TypeError("The parameter optypes_not_deal must be str.")
+        self._filt_optype_names = optypes_not_deal.split(",") if optypes_not_deal else []
+        job_dir = kwargs.pop("ascend_job_id", "")
+        if job_dir:
+            job_dir = validate_and_normalize_path(job_dir)
+            if not os.path.exists(job_dir):
+                msg = f"Invalid ascend_job_id: {job_dir}, Please pass the absolute path of the JOB dir"
+                logger.error(msg)
+                raise ValueError(msg)
+            self._output_path, _ = os.path.split(job_dir)
+
+        env_rank_id = os.getenv("RANK_ID")
+        env_table_file = os.getenv("RANK_TABLE_FILE")
+        env_hccl_path = os.getenv("MINDSPORE_HCCL_CONFIG_PATH")
+        # Determine whether it is multi card training.
+        if env_rank_id and (env_table_file or env_hccl_path):
+            self._profile_communication = kwargs.pop("profile_communication", False)
+        if "profile_communication" in kwargs:
+            kwargs.pop("profile_communication")
+            logger.warning("The profile_communication parameter is invalid in single device training "
+                           " which doesn't work.")
+        if not isinstance(self._profile_communication, bool):
+            raise TypeError("The parameter profile_communication must be bool.")
+        if self._profile_communication:
+            hccl_option = {"output": self._output_path, "task_trace": "on"}
+            os.environ['PROFILING_OPTIONS'] = json.dumps(hccl_option)
+        self._profile_memory = kwargs.pop("profile_memory", False)
+        if not isinstance(self._profile_memory, bool):
+            raise TypeError("The parameter profile_memory must be bool")
+        if kwargs:
+            logger.warning("There are invalid params which don't work.")
+
     def analyse(self):
         """
         Collect and analyse performance data, called after training or during training. The example shows above.
@@ -539,7 +554,7 @@ class Profiler:
             for line in f.readlines():
                 if "clock_realtime" in line:
                     # 16 means the first digit of the timestamp, len(line)-3 means the last.
-                    job_start_time = line[16:len(line)-3]
+                    job_start_time = line[16:len(line) - 3]
 
         return job_start_time
 
@@ -651,7 +666,7 @@ class Profiler:
 
             return select_time
 
-        if "output_path" not in kwargs or kwargs.get("output_path") is None:
+        if kwargs.get("output_path") is None:
             if "output_path" in kwargs:
                 kwargs.pop("output_path")
             # Environment variables are mainly set for the convenience of cloud profiler.
@@ -684,6 +699,9 @@ class Profiler:
         if not os.path.exists(hccl_path):
             os.makedirs(hccl_path, exist_ok=True)
             os.chmod(hccl_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
+        logger.info("Start call the interface HCCLParseOP parsing hccl info...")
+        logger.info('Warm Prompt: It could take a few minutes if you are training '
+                    'with a complex network or more than 10 steps.')
         # Call the interface HCCLParseOP parsing hccl info.
         try:
             from hccl_parser.entry import hccl_parse_op
@@ -693,11 +711,14 @@ class Profiler:
                          "The hccl_parser-{version}-py3-none-any.whl package is usually located "
                          "in the /usr/local/Ascend/tools Directory", err)
             raise ImportError(err)
+        logger.info("Parse hccl info successfully.")
+        logger.info("Start analyse hccl info.")
         hccl_parse = HcclParser(hccl_path, self._dev_id, self._output_path)
         hccl_parse.parse()
+        logger.info("Analyse hccl info successfully.")
 
     @staticmethod
-    def profile(network=None, profile_option=None):
+    def profile(network, profile_option):
         """
         Get the number of trainable parameters in the training network.
 
diff --git a/mindspore/run_check/_check_version.py b/mindspore/run_check/_check_version.py
index 69d2df67750..fa52264ac12 100644
--- a/mindspore/run_check/_check_version.py
+++ b/mindspore/run_check/_check_version.py
@@ -207,7 +207,7 @@ class AscendEnvChecker(EnvChecker):
     """ascend environment check"""
 
     def __init__(self):
-        self.version = ["1.78.23.3.230"]
+        self.version = ["1.79.T10.0.B100"]
         atlas_nnae_version = "/usr/local/Ascend/nnae/latest/fwkacllib/version.info"
         atlas_toolkit_version = "/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/version.info"
         hisi_fwk_version = "/usr/local/Ascend/fwkacllib/version.info"
diff --git a/mindspore/train/callback/_lr_scheduler_callback.py b/mindspore/train/callback/_lr_scheduler_callback.py
index 2d9c095cfb0..536b5c2202a 100644
--- a/mindspore/train/callback/_lr_scheduler_callback.py
+++ b/mindspore/train/callback/_lr_scheduler_callback.py
@@ -32,9 +32,9 @@ class LearningRateScheduler(Callback):
         learning_rate_function (Function): The function about how to change the learning rate during training.
 
     Examples:
+        >>> from mindspore import Model
         >>> from mindspore.train.callback import LearningRateScheduler
         >>> import mindspore.nn as nn
-        >>> from mindspore.train import Model
         ...
         >>> def learning_rate_function(lr, cur_step_num):
         ...     if cur_step_num%1000 == 0:
diff --git a/mindspore/train/callback/_summary_collector.py b/mindspore/train/callback/_summary_collector.py
index 8ba5ee457e3..779d30a4f28 100644
--- a/mindspore/train/callback/_summary_collector.py
+++ b/mindspore/train/callback/_summary_collector.py
@@ -150,8 +150,8 @@ class SummaryCollector(Callback):
         >>> import mindspore.nn as nn
         >>> from mindspore import context
         >>> from mindspore.train.callback import SummaryCollector
-        >>> from mindspore.train import Model
-        >>> from mindspore.nn.metrics import Accuracy
+        >>> from mindspore import Model
+        >>> from mindspore.nn import Accuracy
         >>>
         >>> if __name__ == '__main__':
         ...     # If the device_target is GPU, set the device_target to "GPU"
diff --git a/mindspore/train/loss_scale_manager.py b/mindspore/train/loss_scale_manager.py
index 02a134fd590..501aebb5c1c 100644
--- a/mindspore/train/loss_scale_manager.py
+++ b/mindspore/train/loss_scale_manager.py
@@ -115,8 +115,7 @@ class DynamicLossScaleManager(LossScaleManager):
         scale_window (int): Maximum continuous normal steps when there is no overflow. Default: 2000.
 
     Examples:
-        >>> from mindspore import Model, nn
-        >>> from mindspore.train.loss_scale_manager import DynamicLossScaleManager
+        >>> from mindspore import Model, nn, DynamicLossScaleManager
         >>>
         >>> net = Net()
         >>> loss_scale_manager = DynamicLossScaleManager()
diff --git a/mindspore/train/model.py b/mindspore/train/model.py
index d87ec722425..23412cd1f5d 100644
--- a/mindspore/train/model.py
+++ b/mindspore/train/model.py
@@ -274,6 +274,8 @@ class Model:
 
     def _update_metrics(self, outputs):
         """Update metrics local values."""
+        if isinstance(outputs, Tensor):
+            outputs = (outputs,)
         if not isinstance(outputs, tuple):
             raise ValueError("The `outputs` is not tuple.")
 
@@ -365,6 +367,8 @@ class Model:
                                                                         dataset_sink_mode=True,
                                                                         sink_size=sink_size)
             self._train_network = train_network
+            if context.get_auto_parallel_context("pipeline_stages") > 1 and valid_dataset:
+                self._train_network.add_flags_recursive(is_first_iteration=True)
             for inputs in train_dataset_helper:
                 self._train_network.compile(*inputs)
                 break
@@ -378,6 +382,8 @@ class Model:
                                                                        dataset=valid_dataset,
                                                                        dataset_sink_mode=True)
             self._eval_network = eval_network
+            if context.get_auto_parallel_context("pipeline_stages") > 1:
+                self._eval_network.add_flags_recursive(is_first_iteration=False)
             for inputs in valid_dataset_helper:
                 self._eval_network.compile(*inputs)
                 break
@@ -615,8 +621,7 @@ class Model:
                              Default: -1.
 
         Examples:
-            >>> from mindspore import Model, nn
-            >>> from mindspore.train.loss_scale_manager import FixedLossScaleManager
+            >>> from mindspore import Model, nn, FixedLossScaleManager
             >>>
             >>> # For details about how to build the dataset, please refer to the tutorial
             >>> # document on the official website.
@@ -872,10 +877,9 @@ class Model:
             >>> # mindspore.cn.
             >>> import numpy as np
             >>> import mindspore as ms
-            >>> from mindspore import Model, context, Tensor, nn
+            >>> from mindspore import Model, context, Tensor, nn, FixedLossScaleManager
             >>> from mindspore.context import ParallelMode
             >>> from mindspore.communication import init
-            >>> from mindspore.train.loss_scale_manager import FixedLossScaleManager
             >>>
             >>> context.set_context(mode=context.GRAPH_MODE)
             >>> init()
diff --git a/mindspore/train/serialization.py b/mindspore/train/serialization.py
index 1e4c96c7b1e..dea204c29ff 100644
--- a/mindspore/train/serialization.py
+++ b/mindspore/train/serialization.py
@@ -27,7 +27,7 @@ from collections import defaultdict
 import numpy as np
 
 import mindspore.nn as nn
-import mindspore.context as context
+from mindspore import context
 from mindspore import log as logger
 from mindspore.train.checkpoint_pb2 import Checkpoint
 from mindspore.train.print_pb2 import Print
@@ -275,8 +275,6 @@ def save_checkpoint(save_obj, ckpt_file_name, integrated_save=True,
             data = param["data"].asnumpy().reshape(-1)
             data_list[key].append(data)
 
-    if not isinstance(ckpt_file_name, str):
-        raise ValueError("The ckpt_file_name must be string.")
     ckpt_file_name = os.path.realpath(ckpt_file_name)
     if async_save:
         thr = Thread(target=_exec_save, args=(ckpt_file_name, data_list, enc_key, enc_mode), name="asyn_save_ckpt")
@@ -331,8 +329,7 @@ def load(file_name, **kwargs):
     Examples:
         >>> import numpy as np
         >>> import mindspore.nn as nn
-        >>> from mindspore import Tensor
-        >>> from mindspore.train import export, load
+        >>> from mindspore import Tensor, export, load
         >>>
         >>> net = nn.Conv2d(1, 1, kernel_size=3, weight_init="ones")
         >>> input = Tensor(np.ones([1, 1, 3, 3]).astype(np.float32))
@@ -602,8 +599,6 @@ def _save_graph(network, file_name):
     """
     logger.info("Execute the process of saving graph.")
 
-    if not isinstance(file_name, str):
-        raise ValueError("The ckpt_file_name must be string.")
     file_name = os.path.realpath(file_name)
     graph_pb = network.get_func_graph_proto()
     if graph_pb:
@@ -719,7 +714,7 @@ def export(net, *inputs, file_name, file_format='AIR', **kwargs):
               Default: 127.5.
             - std_dev (float): The variance of input data after preprocessing,
               used for quantizing the first layer of network. Default: 127.5.
-            - enc_key (str): Byte type key used for encryption. Tha valid length is 16, 24, or 32.
+            - enc_key (byte): Byte type key used for encryption. Tha valid length is 16, 24, or 32.
             - enc_mode (str): Specifies the encryption mode, take effect when enc_key is set.
               Option: 'AES-GCM' | 'AES-CBC'. Default: 'AES-GCM'.
 
@@ -733,11 +728,8 @@ def export(net, *inputs, file_name, file_format='AIR', **kwargs):
     """
     logger.info("exporting model file:%s format:%s.", file_name, file_format)
     check_input_data(*inputs, data_class=Tensor)
-    if not isinstance(file_name, str):
-        raise ValueError("Args file_name {} must be string, please check it".format(file_name))
-    file_name = os.path.realpath(file_name)
-
     Validator.check_file_name_by_regular(file_name)
+    file_name = os.path.realpath(file_name)
     net = _quant_export(net, *inputs, file_format=file_format, **kwargs)
     if 'enc_key' in kwargs.keys():
         if file_format != 'MINDIR':
@@ -1199,9 +1191,8 @@ def merge_sliced_parameter(sliced_parameters, strategy=None):
 
     Examples:
         >>> import numpy as np
-        >>> from mindspore import Tensor
+        >>> from mindspore import Tensor, merge_sliced_parameter
         >>> from mindspore.common.parameter import Parameter
-        >>> from mindspore.train import merge_sliced_parameter
         >>>
         >>> sliced_parameters = [
         ...                      Parameter(Tensor(np.array([0.00023915, 0.00013939, -0.00098059])),
diff --git a/mindspore/train/train_thor/convert_utils.py b/mindspore/train/train_thor/convert_utils.py
index 34d6166e450..7ce34a94b95 100644
--- a/mindspore/train/train_thor/convert_utils.py
+++ b/mindspore/train/train_thor/convert_utils.py
@@ -195,7 +195,7 @@ class ConvertModelUtils():
         Examples:
             >>> from mindspore.nn.optim import thor
             >>> from mindspore.train.model import Model
-            >>> from mindspore.train.loss_scale_manager import FixedLossScaleManager
+            >>> from mindspore import FixedLossScaleManager
             >>>
             >>> net = Net()
             >>> loss_manager = FixedLossScaleManager(128, drop_overflow_update=False)
diff --git a/model_zoo/official/cv/centerface/src/centerface.py b/model_zoo/official/cv/centerface/src/centerface.py
index aae19169a39..b023ada4c6c 100644
--- a/model_zoo/official/cv/centerface/src/centerface.py
+++ b/model_zoo/official/cv/centerface/src/centerface.py
@@ -310,8 +310,8 @@ class TrainingWrapper(nn.Cell):
         else:
             cond = self.less_equal(self.base, flag_sum)
 
-        ret = (loss, cond, sens)
-        return F.depend(ret, self.optimizer(grads))
+        self.optimizer(grads)
+        return (loss, cond, sens)
 
 
 class CenterFaceWithNms(nn.Cell):
diff --git a/model_zoo/official/cv/cnnctc/src/cnn_ctc.py b/model_zoo/official/cv/cnnctc/src/cnn_ctc.py
index 3e46d30db0f..60af01aae9f 100644
--- a/model_zoo/official/cv/cnnctc/src/cnn_ctc.py
+++ b/model_zoo/official/cv/cnnctc/src/cnn_ctc.py
@@ -135,10 +135,8 @@ class CNNCTCTrainOneStepWithLossScaleCell(nn.Cell):
             #apply grad reducer on grads
             grads = self.grad_reducer(grads)
 
-        success = self.optimizer(grads)
-
-        ret = (loss, scaling_sens)
-        return F.depend(ret, success)
+        self.optimizer(grads)
+        return (loss, scaling_sens)
 
 class CNNCTC_Model(nn.Cell):
 
diff --git a/model_zoo/official/cv/crnn/src/crnn_for_train.py b/model_zoo/official/cv/crnn/src/crnn_for_train.py
index fad288c36f4..90a3d83e659 100644
--- a/model_zoo/official/cv/crnn/src/crnn_for_train.py
+++ b/model_zoo/official/cv/crnn/src/crnn_for_train.py
@@ -108,4 +108,5 @@ class TrainOneStepCellWithGradClip(Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
diff --git a/model_zoo/official/cv/crnn_seq2seq_ocr/src/attention_ocr.py b/model_zoo/official/cv/crnn_seq2seq_ocr/src/attention_ocr.py
index 172867b4b1b..1871eb65f58 100755
--- a/model_zoo/official/cv/crnn_seq2seq_ocr/src/attention_ocr.py
+++ b/model_zoo/official/cv/crnn_seq2seq_ocr/src/attention_ocr.py
@@ -184,4 +184,5 @@ class TrainingWrapper(nn.Cell):
         grads = self.grad(self.network, weights)(*args, sens)
         if self.reducer_flag:
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
diff --git a/model_zoo/official/cv/crnn_seq2seq_ocr/src/seq2seq.py b/model_zoo/official/cv/crnn_seq2seq_ocr/src/seq2seq.py
index 4bd4dc7a951..3c8b2be5e13 100755
--- a/model_zoo/official/cv/crnn_seq2seq_ocr/src/seq2seq.py
+++ b/model_zoo/official/cv/crnn_seq2seq_ocr/src/seq2seq.py
@@ -109,7 +109,7 @@ class AttnDecoderRNN(nn.Cell):
         output = self.relu(output)
 
         gru_hidden = self.squeeze1(hidden)
-        output, hidden, _, _, _, _ = self.gru(output, gru_hidden)
+        output, hidden = self.gru(output, gru_hidden)
         output = self.squeeze1(output)
         output = self.log_softmax(self.out(output))
 
diff --git a/model_zoo/official/cv/ctpn/default_config.yaml b/model_zoo/official/cv/ctpn/default_config.yaml
index 8a0fc80f31a..40958e477d8 100644
--- a/model_zoo/official/cv/ctpn/default_config.yaml
+++ b/model_zoo/official/cv/ctpn/default_config.yaml
@@ -114,13 +114,13 @@ pretraining_dataset_file: ""
 finetune_dataset_file: ""
 
 # pretrain lr
-pre_base_lr: 0.0009
+pre_base_lr: 0.009
 pre_warmup_step: 30000
 pre_warmup_ratio: 1/3
 pre_total_epoch: 100
 
 # finetune lr
-fine_base_lr: 0.0005
+fine_base_lr: 0.005
 fine_warmup_step: 300
 fine_warmup_ratio: 1/3
 fine_total_epoch: 50
diff --git a/model_zoo/official/cv/ctpn/src/ctpn.py b/model_zoo/official/cv/ctpn/src/ctpn.py
index f764a5e4b65..1f1e2826a43 100644
--- a/model_zoo/official/cv/ctpn/src/ctpn.py
+++ b/model_zoo/official/cv/ctpn/src/ctpn.py
@@ -92,8 +92,8 @@ class CTPN(nn.Cell):
         self.num_step = config.num_step
         self.input_size = config.input_size
         self.hidden_size = config.hidden_size
-        self.vgg16_feature_extractor = VGG16FeatureExtraction()
-        self.conv = nn.Conv2d(512, 512, kernel_size=3, padding=0, pad_mode='same')
+        self.vgg16_feature_extractor = VGG16FeatureExtraction().to_float(mstype.float16)
+        self.conv = nn.Conv2d(512, 512, kernel_size=3, padding=0, pad_mode='same').to_float(mstype.float16)
         self.rnn = BiLSTM(self.config, batch_size=self.batch_size).to_float(mstype.float16)
         self.reshape = P.Reshape()
         self.transpose = P.Transpose()
diff --git a/model_zoo/official/cv/ctpn/src/network_define.py b/model_zoo/official/cv/ctpn/src/network_define.py
index e1458bdbac0..c95fbabdaf6 100644
--- a/model_zoo/official/cv/ctpn/src/network_define.py
+++ b/model_zoo/official/cv/ctpn/src/network_define.py
@@ -18,7 +18,6 @@ import time
 import numpy as np
 import mindspore.nn as nn
 from mindspore.common.tensor import Tensor
-from mindspore.ops import functional as F
 from mindspore.ops import composite as C
 from mindspore import ParameterTuple
 from mindspore.train.callback import Callback
@@ -140,4 +139,5 @@ class TrainOneStepCell(nn.Cell):
         grads = self.grad(self.network, weights)(x, gt_bbox, gt_label, gt_num, img_shape, self.sens)
         if self.reduce_flag:
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
diff --git a/model_zoo/official/cv/deeptext/src/network_define.py b/model_zoo/official/cv/deeptext/src/network_define.py
index 2fcd9bb6c44..0895741001b 100644
--- a/model_zoo/official/cv/deeptext/src/network_define.py
+++ b/model_zoo/official/cv/deeptext/src/network_define.py
@@ -18,7 +18,6 @@ import time
 import numpy as np
 import mindspore.nn as nn
 from mindspore.common.tensor import Tensor
-from mindspore.ops import functional as F
 from mindspore.ops import composite as C
 from mindspore import ParameterTuple
 from mindspore.train.callback import Callback
@@ -150,4 +149,5 @@ class TrainOneStepCell(nn.Cell):
         grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, self.sens)
         if self.reduce_flag:
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
diff --git a/model_zoo/official/cv/faster_rcnn/default_config.yaml b/model_zoo/official/cv/faster_rcnn/default_config.yaml
index a1fc08caf1a..a6ca4fbe2bc 100644
--- a/model_zoo/official/cv/faster_rcnn/default_config.yaml
+++ b/model_zoo/official/cv/faster_rcnn/default_config.yaml
@@ -124,7 +124,7 @@ weight_decay: 0.00001
 epoch_size: 20
 save_checkpoint: True
 save_checkpoint_epochs: 1
-keep_checkpoint_max: 20
+keep_checkpoint_max: 5
 save_checkpoint_path: "./"
 
 # Number of threads used to process the dataset in parallel
diff --git a/model_zoo/official/cv/faster_rcnn/default_config_101.yaml b/model_zoo/official/cv/faster_rcnn/default_config_101.yaml
index b6a16195514..c06337dada2 100644
--- a/model_zoo/official/cv/faster_rcnn/default_config_101.yaml
+++ b/model_zoo/official/cv/faster_rcnn/default_config_101.yaml
@@ -125,7 +125,7 @@ weight_decay: 0.00001
 epoch_size: 20
 save_checkpoint: True
 save_checkpoint_epochs: 1
-keep_checkpoint_max: 20
+keep_checkpoint_max: 5
 save_checkpoint_path: "./"
 
 # Number of threads used to process the dataset in parallel
diff --git a/model_zoo/official/cv/faster_rcnn/default_config_152.yaml b/model_zoo/official/cv/faster_rcnn/default_config_152.yaml
index d2755194040..896c0b02fc5 100644
--- a/model_zoo/official/cv/faster_rcnn/default_config_152.yaml
+++ b/model_zoo/official/cv/faster_rcnn/default_config_152.yaml
@@ -125,7 +125,7 @@ weight_decay: 0.00001
 epoch_size: 20
 save_checkpoint: True
 save_checkpoint_epochs: 1
-keep_checkpoint_max: 20
+keep_checkpoint_max: 5
 save_checkpoint_path: "./"
 
 # Number of threads used to process the dataset in parallel
diff --git a/model_zoo/official/cv/faster_rcnn/src/network_define.py b/model_zoo/official/cv/faster_rcnn/src/network_define.py
index 531cd32c6e5..4219667f84e 100644
--- a/model_zoo/official/cv/faster_rcnn/src/network_define.py
+++ b/model_zoo/official/cv/faster_rcnn/src/network_define.py
@@ -18,7 +18,6 @@ import time
 import numpy as np
 import mindspore.nn as nn
 from mindspore.common.tensor import Tensor
-from mindspore.ops import functional as F
 from mindspore.ops import composite as C
 from mindspore import ParameterTuple
 from mindspore.train.callback import Callback
@@ -147,4 +146,5 @@ class TrainOneStepCell(nn.Cell):
         grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, self.sens)
         if self.reduce_flag:
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
diff --git a/model_zoo/official/cv/maskrcnn/src/network_define.py b/model_zoo/official/cv/maskrcnn/src/network_define.py
index 662cd99cefb..2269c23db49 100644
--- a/model_zoo/official/cv/maskrcnn/src/network_define.py
+++ b/model_zoo/official/cv/maskrcnn/src/network_define.py
@@ -18,7 +18,6 @@ import time
 import numpy as np
 import mindspore.nn as nn
 from mindspore.common.tensor import Tensor
-from mindspore.ops import functional as F
 from mindspore.ops import composite as C
 from mindspore import ParameterTuple
 from mindspore.train.callback import Callback
@@ -146,5 +145,5 @@ class TrainOneStepCell(nn.Cell):
         grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, gt_mask, self.sens)
         if self.reduce_flag:
             grads = self.grad_reducer(grads)
-
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
diff --git a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/network_define.py b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/network_define.py
index 7825a19ebcc..4c5b4a89b45 100644
--- a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/network_define.py
+++ b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/network_define.py
@@ -177,7 +177,8 @@ class TrainOneStepCell(nn.Cell):
         if self.reduce_flag:
             grads = self.grad_reducer(grads)
         grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
 
 class MaskRcnn_Mobilenetv1_Infer(nn.Cell):
     def __init__(self, config):
diff --git a/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py b/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py
index f54dc4edeed..39787b928a0 100755
--- a/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py
+++ b/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py
@@ -934,4 +934,5 @@ class NASNetAMobileTrainOneStepWithClipGradient(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
diff --git a/model_zoo/official/cv/openpose/src/loss.py b/model_zoo/official/cv/openpose/src/loss.py
index 943b033279f..312dba9a633 100644
--- a/model_zoo/official/cv/openpose/src/loss.py
+++ b/model_zoo/official/cv/openpose/src/loss.py
@@ -199,4 +199,5 @@ class TrainOneStepWithClipGradientCell(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
diff --git a/model_zoo/official/cv/psenet/README.md b/model_zoo/official/cv/psenet/README.md
index 9e22490b416..a8654fc34b1 100644
--- a/model_zoo/official/cv/psenet/README.md
+++ b/model_zoo/official/cv/psenet/README.md
@@ -2,6 +2,7 @@
 
 - [PSENet Description](#PSENet-description)
 - [Dataset](#dataset)
+- [Pretrained Model](#Pretrained-model)
 - [Features](#features)
     - [Mixed Precision](#mixed-precision)
 - [Environment Requirements](#environment-requirements)
@@ -15,6 +16,7 @@
         - [Distributed GPU Training](#distributed-gpu-training)
     - [Evaluation Process](#evaluation-process)
         - [Evaluation](#evaluation)
+        - [Result](#result)
     - [Inference Process](#inference-process)
         - [Export MindIR](#export-mindir)
         - [Infer on Ascend310](#infer-on-ascend310)
@@ -48,6 +50,19 @@ Dataset used: [ICDAR2015](https://rrc.cvc.uab.es/?ch=4&com=tasks#TextLocalizatio
 A training set of 1000 images containing about 4500 readable words
 A testing set containing about 2000 readable words
 
+unzip dataset files and needn't transform to mindrecord.
+
+# [Pretrained Model](#contents)
+
+download pytorch pretrained model: [resnet50-19c8e357.pth](https://download.pytorch.org/models/resnet50-19c8e357.pth)
+transform pytorch model to mindspore model
+
+```shell
+cd src
+
+python psenet_model_torch2mindspore.py --torch_file=/path_to_model/resnet50-19c8e357.pth --output_path=../
+```
+
 # [Environment Requirements](#contents)
 
 - Hardware（Ascend or GPU）
@@ -61,34 +76,100 @@ A testing set containing about 2000 readable words
 - install [pyblind11](https://github.com/pybind/pybind11)
 - install [Opencv3.4](https://docs.opencv.org/3.4.9/)
 
+```shell
+# install pybind11
+pip install pybind11
+
+# install opencv3.4.9
+wget https://github.com/opencv/opencv/archive/3.4.9.zip
+unzip 3.4.9.zip
+cd opencv-3.4.9
+mkdir build
+cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=/usr/local -D WITH_WEBP=OFF ..
+make -j4 # -j指定线程数，用户根据机器配置修改参数
+make install
+
+# export environment variables
+export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/local/include
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib64
+```
+
 # [Quick Start](#contents)
 
 After installing MindSpore via the official website, you can start training and evaluation as follows:
 
-```python
+```shell
 # run distributed training example
 bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] [PRED_TRAINED PATH] [TRAIN_ROOT_DIR]
 
-#download opencv library
-download pyblind11, opencv3.4
-
-#install pyblind11 opencv3.4
-setup pyblind11(install the library by the pip command)
-setup opencv3.4(compile source code install the library)
-
-#enter the path ,run Makefile to product file
+#enter the path ,run Makefile
 cd ./src/ETSNET/pse/;make
 
 #run test.py
 python test.py --ckpt pretrained_model.ckpt --TEST_ROOT_DIR [test root path]
 
-#download eval method from [here](https://rrc.cvc.uab.es/?ch=4&com=tasks#TextLocalization).
-#click "My Methods" button,then download Evaluation Scripts
+#go to Evaluation Process for details
 download script.py
 # run evaluation example
 bash scripts/run_eval_ascend.sh
 ```
 
+- running on ModelArts
+- If you want to train the model on modelarts, you can refer to the [official guidance document] of modelarts (https://support.huaweicloud.com/modelarts/)
+
+```python
+#  Example of using distributed training on modelarts :
+#  Data set storage method
+
+#  ├── ICDAR2015                                                    # dir
+#    ├── train                                                      # train dir
+#       ├── ic15                                                    # train_dataset dir
+#           ├── ch4_training_images
+#           ├── ch4_training_localization_transcription_gt
+#       ├── train_predtrained                                       # predtrained dir
+#    ├── eval                                                       # eval dir
+#       ├── ic15                                                    # eval dataset dir
+#           ├── ch4_test_images
+#           ├── challenge4_Test_Task1_GT
+#       ├── checkpoint                                              # ckpt files dir
+
+# (1) Choose either a (modify yaml file parameters) or b (modelArts create training job to modify parameters) 。
+#       a. set "enable_modelarts=True" 。
+#          set "run_distribute=True"
+#          set "TRAIN_MODEL_SAVE_PATH=/cache/train/outputs_imagenet/"
+#          set "TRAIN_ROOT_DIR=/cache/data/ic15/"
+#          set "pre_trained=/cache/data/train_predtrained/pred file name" Without pre-training weights  train_pretrained=""
+
+#       b. add "enable_modelarts=True" Parameters are on the interface of modearts。
+#          Set the parameters required by method a on the modelarts interface
+#          Note: The path parameter does not need to be quoted
+
+# (2) Set the path of the network configuration file  "_config_path=/The path of config in default_config.yaml/"
+# (3) Set the code path on the modelarts interface "/path/psenet"。
+# (4) Set the model's startup file on the modelarts interface "train.py" 。
+# (5) Set the data path of the model on the modelarts interface ".../ICDAR2015/train"(choices ICDAR2015/train Folder path) ,
+# The output path of the model "Output file path" and the log path of the model "Job log path" 。
+# (6) start trainning the model。
+
+# Example of using model inference on modelarts
+# (1) Place the trained model to the corresponding position of the bucket。
+# (2) chocie a or b。
+#       a. set "enable_modelarts=True" 。
+#          set "TEST_ROOT_DIR=/cache/data/ic15/"
+#          set "ckpt=/cache/data/checkpoint/ckpt file"
+
+#       b. Add "enable_modelarts=True" parameter on the interface of modearts。
+#          Set the parameters required by method a on the modelarts interface
+#          Note: The path parameter does not need to be quoted
+
+# (3) Set the path of the network configuration file "_config_path=/The path of config in default_config.yaml/"
+# (4) Set the code path on the modelarts interface "/path/psenet"。
+# (5) Set the model's startup file on the modelarts interface "eval.py" 。
+# (6) Set the data path of the model on the modelarts interface ".../ICDAR2015/eval"(choices ICDAR2015/eval Folder path) ,
+# The output path of the model "Output file path" and the log path of the model "Job log path"  。
+# (7) Start model inference。
+```
+
 # [Script Description](#contents)
 
 ## [Script and Sample Code](#contents)
@@ -156,7 +237,7 @@ Major parameters in default_config.yaml are:
   Please follow the instructions in the link below: <https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools>.
 
 ```shell
-bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] [PRED_TRAINED PATH] [TRAIN_ROOT_DIR]
+bash scripts/run_distribute_train.sh [RANK_FILE] [PRETRAINED_PATH] [TRAIN_ROOT_DIR]
 ```
 
 rank_table_file which is specified by RANK_TABLE_FILE is needed when you are running a distribute task. You can generate it by using the [hccl_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools).
@@ -195,66 +276,27 @@ time: 2021-07-24 04:01:07, epoch: 90, step: 31, loss is 0.58495
 
 ### run test code
 
-```test
+```shell
+python test.py --ckpt [CKPK_PATH] --TEST_ROOT_DIR [TEST_DATA_DIR]
+
+# click [Here](https://rrc.cvc.uab.es/?ch=4&com=tasks#TextLocalization) to download evaluation scripts
+# choose My Methods -> Offline evaluation -> Evaluation Scripts
+# download data and put it in /path_to_data
+mkdir eval_ic15
+ln -s /path_to_data/script_test_ch4_t1_e1-1577983151.zip eval_ic15/script_test_ch4_t1_e1-1577983151.zip
+
+cd eval_ic15
+unzip script_test_ch4_t1_e1-1577983151.zip
+cd ..
+
+sh ./script/run_eval_ascend.sh
 python test.py --ckpt [CKPK PATH] --TEST_ROOT_DIR [TEST DATA DIR]
 
 ```
 
-- running on ModelArts
-- If you want to train the model on modelarts, you can refer to the [official guidance document] of modelarts (https://support.huaweicloud.com/modelarts/)
+### [Result](#contents)
 
-```python
-#  Example of using distributed training on modelarts :
-#  Data set storage method
-
-#  ├── ICDAR2015                                                    # dir
-#    ├── train                                                      # train dir
-#       ├── ic15                                                    # train_dataset dir
-#           ├── ch4_training_images
-#           ├── ch4_training_localization_transcription_gt
-#       ├── train_predtrained                                       # predtrained dir
-#    ├── eval                                                       # eval dir
-#       ├── ic15                                                    # eval dataset dir
-#           ├── ch4_test_images
-#           ├── challenge4_Test_Task1_GT
-#       ├── checkpoint                                              # ckpt files dir
-
-# (1) Choose either a (modify yaml file parameters) or b (modelArts create training job to modify parameters) 。
-#       a. set "enable_modelarts=True" 。
-#          set "run_distribute=True"
-#          set "TRAIN_MODEL_SAVE_PATH=/cache/train/outputs_imagenet/"
-#          set "TRAIN_ROOT_DIR=/cache/data/ic15/"
-#          set "pre_trained=/cache/data/train_predtrained/pred file name" Without pre-training weights  train_pretrained=""
-
-#       b. add "enable_modelarts=True" Parameters are on the interface of modearts。
-#          Set the parameters required by method a on the modelarts interface
-#          Note: The path parameter does not need to be quoted
-
-# (2) Set the path of the network configuration file  "_config_path=/The path of config in default_config.yaml/"
-# (3) Set the code path on the modelarts interface "/path/psenet"。
-# (4) Set the model's startup file on the modelarts interface "train.py" 。
-# (5) Set the data path of the model on the modelarts interface ".../ICDAR2015/train"(choices ICDAR2015/train Folder path) ,
-# The output path of the model "Output file path" and the log path of the model "Job log path" 。
-# (6) start trainning the model。
-
-# Example of using model inference on modelarts
-# (1) Place the trained model to the corresponding position of the bucket。
-# (2) chocie a or b。
-#       a. set "enable_modelarts=True" 。
-#          set "TEST_ROOT_DIR=/cache/data/ic15/"
-#          set "ckpt=/cache/data/checkpoint/ckpt file"
-
-#       b. Add "enable_modelarts=True" parameter on the interface of modearts。
-#          Set the parameters required by method a on the modelarts interface
-#          Note: The path parameter does not need to be quoted
-
-# (3) Set the path of the network configuration file "_config_path=/The path of config in default_config.yaml/"
-# (4) Set the code path on the modelarts interface "/path/psenet"。
-# (5) Set the model's startup file on the modelarts interface "eval.py" 。
-# (6) Set the data path of the model on the modelarts interface ".../ICDAR2015/eval"(choices ICDAR2015/eval Folder path) ,
-# The output path of the model "Output file path" and the log path of the model "Job log path"  。
-# (7) Start model inference。
-```
+Calculated!{"precision": 0.8147966668299853，"recall"：0.8006740491092923，"hmean"：0.8076736279747451，"AP"：0}
 
 ### Eval Script for ICDAR2015
 
@@ -342,8 +384,9 @@ The `res` folder is generated in the upper-level directory. For details about th
 | Loss Function              | LossCallBack                                                |
 | outputs                    | probability                                                 |
 | Loss                       | 0.35                                                        |
-| Speed                      | 1pc: 444 ms/step;  8pcs: 446 ms/step                        |
-| Total time                 | 1pc: 75.48 h;  8pcs: 7.11 h                                |
+| Parameters                 | batch_size = 4                                              |
+| Speed                      | 1pc: 444 ms/step(fps: 9.0);  8pcs: 446 ms/step(fps: 71)     |
+| Total time                 | 1pc: 75.48 h;  8pcs: 7.11 h                                 |
 | Parameters (M)             | 27.36                                                       |
 | Checkpoint for Fine tuning | 109.44M (.ckpt file)                                        |
 | Scripts                    | <https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/psenet> |
diff --git a/model_zoo/official/cv/psenet/README_CN.md b/model_zoo/official/cv/psenet/README_CN.md
index 7355e1e44e8..770e9872bac 100644
--- a/model_zoo/official/cv/psenet/README_CN.md
+++ b/model_zoo/official/cv/psenet/README_CN.md
@@ -5,6 +5,7 @@
 - [PSENet示例](#psenet示例)
     - [概述](#概述)
 - [数据集](#数据集)
+- [预训练模型](#预训练模型)
 - [环境要求](#环境要求)
 - [快速入门](#快速入门)
     - [脚本说明](#脚本说明)
@@ -14,9 +15,7 @@
             - [分布式训练](#分布式训练)
         - [评估过程](#评估过程)
             - [运行测试代码](#运行测试代码)
-                - [ICDAR2015评估脚本](#icdar2015评估脚本)
-                - [用法](#用法)
-                - [结果](#结果)
+            - [结果](#结果)
         - [推理过程](#推理过程)
             - [导出MindIR](#导出mindir)
             - [在Ascend310执行推理](#在ascend310执行推理)
@@ -48,6 +47,21 @@
 训练集：包括约4500个可读单词的1000张图像。
 测试集：约2000个可读单词。
 
+下载得到的训练和推理数据解压后备用，不需要转为mindrecord数据
+
+# 预训练模型
+
+下载pytorch的预训练模型: [resnet50-19c8e357.pth](https://download.pytorch.org/models/resnet50-19c8e357.pth)
+将pytorch模型转为mindspore模型
+
+```shell
+cd src
+
+python psenet_model_torch2mindspore.py --torch_file=/path_to_model/resnet50-19c8e357.pth --output_path=../
+```
+
+执行完成，src的上层目录得到文件pretrained_model.ckpt文件，用于接下来的训练
+
 # 环境要求
 
 - 硬件：昇腾处理器（Ascend）
@@ -62,36 +76,100 @@
 - 安装[pyblind11](https://github.com/pybind/pybind11)
 - 安装[Opencv3.4](https://docs.opencv.org/3.4.9/)
 
+```shell
+# 使用pip安装pybind11
+pip install pybind11
+
+# 使用源码安装opencv3.4.9
+wget https://github.com/opencv/opencv/archive/3.4.9.zip
+unzip 3.4.9.zip
+cd opencv-3.4.9
+mkdir build
+cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=/usr/local -D WITH_WEBP=OFF ..
+make -j4 # -j指定线程数，用户根据机器配置修改参数
+make install
+
+# opencv安装在/usr/local目录下，将该目录添加到环境变量中
+export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/local/include
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib64
+```
+
 # 快速入门
 
 通过官方网站安装MindSpore后，您可以按照如下步骤进行训练和评估：
 
-```python
+```shell
 # 分布式训练运行示例
-bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] [PRED_TRAINED PATH] [TRAIN_ROOT_DIR]
+# 第一个参数为rank_table文件，第二个参数为生成的预训练模型，第三个参数为下载的训练数据集
+bash scripts/run_distribute_train.sh [RANK_FILE] [PRETRAINED_PATH] [TRAIN_ROOT_DIR]
 
-# 下载opencv库
-download pyblind11, opencv3.4
-
-# 安装pyblind11 opencv3.4
-setup pyblind11(install the library by the pip command)
-setup opencv3.4(compile source code install the library)
-
-# 单击[此处](https://rrc.cvc.uab.es/?ch=4&com=tasks#TextLocalization)下载评估方法
-# 点击"我的方法"按钮，下载评估脚本
-
-# 输入路径，运行Makefile，找到产品文件
+# 进入路径，运行Makefile
 cd ./src/ETSNET/pse/;make clean&&make
 
 # 运行test.py
-python test.py --ckpt pretrained_model.ckpt --TEST_ROOT_DIR [test root path]
-
+python test.py --ckpt [CKPK_PATH] --TEST_ROOT_DIR [TEST_DATA_DIR]
 
+# 具体见评估过程
 download script.py
 # 运行评估示例
 bash scripts/run_eval_ascend.sh
 ```
 
+- 如果要在modelarts上进行模型的训练，可以参考modelarts的[官方指导文档](https://support.huaweicloud.com/modelarts/) 开始进行模型的训练和推理，具体操作如下：
+
+```ModelArts
+#  在ModelArts上使用分布式训练示例:
+#  数据集存放方式
+
+#  ├── ICDAR2015                                                    # dir
+#    ├── train                                                      # train dir
+#       ├── ic15                                                    # train_dataset dir
+#           ├── ch4_training_images
+#           ├── ch4_training_localization_transcription_gt
+#       ├── train_predtrained                                       # predtrained dir
+#    ├── eval                                                       # eval dir
+#       ├── ic15                                                    # eval dataset dir
+#           ├── ch4_test_images
+#           ├── challenge4_Test_Task1_GT
+#       ├── checkpoint                                              # ckpt files dir
+
+# (1) 选择a(修改yaml文件参数)或者b(ModelArts创建训练作业修改参数)其中一种方式。
+#       a. 设置 "enable_modelarts=True"
+#          设置 "run_distribute=True"
+#          设置 "TRAIN_MODEL_SAVE_PATH=/cache/train/outputs/"
+#          设置 "TRAIN_ROOT_DIR=/cache/data/ic15/"
+#          设置 "pre_trained=/cache/data/train_predtrained/pred file name" 如果没有预训练权重 pre_trained=""
+
+#       b. 增加 "enable_modelarts=True" 参数在modearts的界面上。
+#          在modelarts的界面上设置方法a所需要的参数
+#          注意：路径参数不需要加引号
+
+# (2)设置网络配置文件的路径 "_config_path=/The path of config in default_config.yaml/"
+# (3) 在modelarts的界面上设置代码的路径 "/path/psenet"。
+# (4) 在modelarts的界面上设置模型的启动文件 "train.py" 。
+# (5) 在modelarts的界面上设置模型的数据路径 ".../ICDAR2015/train"(选择ICDAR2015/train文件夹路径) ,
+# 模型的输出路径"Output file path" 和模型的日志路径 "Job log path" 。
+# (6) 开始模型的训练。
+
+# 在modelarts上使用模型推理的示例
+# (1) 把训练好的模型地方到桶的对应位置。
+# (2) 选择a或者b其中一种方式。
+#        a.设置 "enable_modelarts=True"
+#          设置 "TEST_ROOT_DIR=/cache/data/ic15"
+#          设置 "ckpt=/cache/data/checkpoint/ckpt file"
+
+#       b. 增加 "enable_modelarts=True" 参数在modearts的界面上。
+#          在modelarts的界面上设置方法a所需要的参数
+#          注意：路径参数不需要加引号
+
+# (3) 设置网络配置文件的路径 "_config_path=/The path of config in default_config.yaml/"
+# (4) 在modelarts的界面上设置代码的路径 "/path/psenet"。
+# (5) 在modelarts的界面上设置模型的启动文件 "eval.py" 。
+# (6) 在modelarts的界面上设置模型的数据路径 "../ICDAR2015/eval"(选择ICDAR2015/eval文件夹路径) ,
+# 模型的输出路径"Output file path" 和模型的日志路径 "Job log path" 。
+# (7) 开始模型的推理。
+```
+
 ## 脚本说明
 
 ## 脚本和样例代码
@@ -153,7 +231,8 @@ bash scripts/run_eval_ascend.sh
   请遵循链接中的说明：[链接](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools)
 
 ```shell
-bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] [PRED_TRAINED PATH] [TRAIN_ROOT_DIR]
+# 第一个参数为rank_table文件，第二个参数为生成的预训练模型，第三个参数为下载的训练数据集
+bash scripts/run_distribute_train.sh [RANK_FILE] [PRETRAINED_PATH] [TRAIN_ROOT_DIR]
 ```
 
 上述shell脚本将在后台运行分布训练。可以通过`device[X]/test_*.log`文件查看结果。
@@ -173,81 +252,24 @@ device_1/log:epcoh： 2, step: 40，loss is 0.76629
 
 ### 运行测试代码
 
-```test
-python test.py --ckpt [CKPK PATH] --TEST_ROOT_DIR [TEST DATA DIR]
-
-```
-
-- 如果要在modelarts上进行模型的训练，可以参考modelarts的[官方指导文档](https://support.huaweicloud.com/modelarts/) 开始进行模型的训练和推理，具体操作如下：
-
-```ModelArts
-#  在ModelArts上使用分布式训练示例:
-#  数据集存放方式
-
-#  ├── ICDAR2015                                                    # dir
-#    ├── train                                                      # train dir
-#       ├── ic15                                                    # train_dataset dir
-#           ├── ch4_training_images
-#           ├── ch4_training_localization_transcription_gt
-#       ├── train_predtrained                                       # predtrained dir
-#    ├── eval                                                       # eval dir
-#       ├── ic15                                                    # eval dataset dir
-#           ├── ch4_test_images
-#           ├── challenge4_Test_Task1_GT
-#       ├── checkpoint                                              # ckpt files dir
-
-# (1) 选择a(修改yaml文件参数)或者b(ModelArts创建训练作业修改参数)其中一种方式。
-#       a. 设置 "enable_modelarts=True"
-#          设置 "run_distribute=True"
-#          设置 "TRAIN_MODEL_SAVE_PATH=/cache/train/outputs/"
-#          设置 "TRAIN_ROOT_DIR=/cache/data/ic15/"
-#          设置 "pre_trained=/cache/data/train_predtrained/pred file name" 如果没有预训练权重 pre_trained=""
-
-#       b. 增加 "enable_modelarts=True" 参数在modearts的界面上。
-#          在modelarts的界面上设置方法a所需要的参数
-#          注意：路径参数不需要加引号
-
-# (2)设置网络配置文件的路径 "_config_path=/The path of config in default_config.yaml/"
-# (3) 在modelarts的界面上设置代码的路径 "/path/psenet"。
-# (4) 在modelarts的界面上设置模型的启动文件 "train.py" 。
-# (5) 在modelarts的界面上设置模型的数据路径 ".../ICDAR2015/train"(选择ICDAR2015/train文件夹路径) ,
-# 模型的输出路径"Output file path" 和模型的日志路径 "Job log path" 。
-# (6) 开始模型的训练。
-
-# 在modelarts上使用模型推理的示例
-# (1) 把训练好的模型地方到桶的对应位置。
-# (2) 选择a或者b其中一种方式。
-#        a.设置 "enable_modelarts=True"
-#          设置 "TEST_ROOT_DIR=/cache/data/ic15"
-#          设置 "ckpt=/cache/data/checkpoint/ckpt file"
-
-#       b. 增加 "enable_modelarts=True" 参数在modearts的界面上。
-#          在modelarts的界面上设置方法a所需要的参数
-#          注意：路径参数不需要加引号
-
-# (3) 设置网络配置文件的路径 "_config_path=/The path of config in default_config.yaml/"
-# (4) 在modelarts的界面上设置代码的路径 "/path/psenet"。
-# (5) 在modelarts的界面上设置模型的启动文件 "eval.py" 。
-# (6) 在modelarts的界面上设置模型的数据路径 "../ICDAR2015/eval"(选择ICDAR2015/eval文件夹路径) ,
-# 模型的输出路径"Output file path" 和模型的日志路径 "Job log path" 。
-# (7) 开始模型的推理。
-```
-
-### ICDAR2015评估脚本
-
-#### 用法
-
-第一步：单击[此处](https://rrc.cvc.uab.es/?ch=4&com=tasks#TextLocalization)下载评估方法。  
-
-第二步：单击"我的方法"按钮，下载评估脚本。
-
-第三步：建议将评估方法根符号链接到$MINDSPORE/model_zoo/psenet/eval_ic15/。如果您的文件夹结构不同，您可能需要更改评估脚本文件中的相应路径。  
-
 ```shell
-bash ./script/run_eval_ascend.sh.sh  
+# 第一个参数为训练得到的模型文件，第二个参数为下载得到的推理数据集
+python test.py --ckpt [CKPK_PATH] --TEST_ROOT_DIR [TEST_DATA_DIR]
+
+# 单击[此处](https://rrc.cvc.uab.es/?ch=4&com=tasks#TextLocalization)下载评估方法
+# 点击"My Methods"按钮，选择Offline evaluation -> Evaluation Scripts
+# 下载完成后，将数据放在/path_to_data路径
+mkdir eval_ic15
+ln -s /path_to_data/script_test_ch4_t1_e1-1577983151.zip eval_ic15/script_test_ch4_t1_e1-1577983151.zip
+
+cd eval_ic15
+unzip script_test_ch4_t1_e1-1577983151.zip
+cd ..
+
+bash ./script/run_eval_ascend.sh
 ```
 
-#### 结果
+### 结果
 
 Calculated!{"precision": 0.8147966668299853，"recall"：0.8006740491092923，"hmean"：0.8076736279747451，"AP"：0}
 
@@ -317,7 +339,8 @@ bash run_infer_310.sh [MINDIR_PATH] [DATA_PATH] [DEVICE_ID]
 | 损失函数 | LossCallBack |
 | 输出 | 概率 |
 | 损失 | 0.35 |
-| 速度 | 1卡：444毫秒/步；8卡：446毫秒/步
+| 训练参数 | batch_size = 4 |
+| 速度 | 1卡：444毫秒/步(fps: 9.0)；8卡：446毫秒/步(fps: 71) |
 | 总时间 | 1卡：75.48小时；8卡：7.11小时|
 | 参数(M) | 27.36 |
 | 微调检查点 | 109.44M （.ckpt file） |
diff --git a/model_zoo/official/cv/psenet/requirements.txt b/model_zoo/official/cv/psenet/requirements.txt
index 9d316731512..bee48e58af9 100644
--- a/model_zoo/official/cv/psenet/requirements.txt
+++ b/model_zoo/official/cv/psenet/requirements.txt
@@ -2,3 +2,5 @@ numpy
 opencv-python
 pillow
 pyyaml
+Polygon3
+pyclipper
diff --git a/model_zoo/official/cv/psenet/src/ETSNET/pse/Makefile b/model_zoo/official/cv/psenet/src/ETSNET/pse/Makefile
index 541e9ba3b37..eac5bc1e8e6 100644
--- a/model_zoo/official/cv/psenet/src/ETSNET/pse/Makefile
+++ b/model_zoo/official/cv/psenet/src/ETSNET/pse/Makefile
@@ -13,8 +13,7 @@
 # limitations under the License.
 # ============================================================================
 
-mindspore_home = ${MINDSPORE_HOME}
-CXXFLAGS = -I include -I ${mindspore_home}/model_zoo/official/cv/psenet -std=c++11 -O3
+CXXFLAGS = -std=c++11 -O3
 CXX_SOURCES = adaptor.cpp
 opencv_home = ${OPENCV_HOME}
 OPENCV = -I$(opencv_home)/include -L$(opencv_home)/lib64 -lopencv_superres -lopencv_ml -lopencv_objdetect \
diff --git a/model_zoo/official/cv/psenet/src/ETSNET/pse/adaptor.cpp b/model_zoo/official/cv/psenet/src/ETSNET/pse/adaptor.cpp
index 8885e848fec..f4e343e9fc3 100644
--- a/model_zoo/official/cv/psenet/src/ETSNET/pse/adaptor.cpp
+++ b/model_zoo/official/cv/psenet/src/ETSNET/pse/adaptor.cpp
@@ -13,7 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "src/ETSNET/pse/adaptor.h"
 #include <pybind11/pybind11.h>
 #include <pybind11/numpy.h>
 #include <pybind11/stl.h>
@@ -26,6 +25,7 @@
 #include <opencv2/core/core.hpp>
 #include <opencv2/highgui/highgui.hpp>
 #include <opencv2/imgproc/imgproc.hpp>
+#include "./adaptor.h"
 
 using std::vector;
 using std::queue;
diff --git a/model_zoo/official/cv/psenet/src/network_define.py b/model_zoo/official/cv/psenet/src/network_define.py
index 09ffe610209..3f55a996903 100644
--- a/model_zoo/official/cv/psenet/src/network_define.py
+++ b/model_zoo/official/cv/psenet/src/network_define.py
@@ -23,7 +23,6 @@ from mindspore import ParameterTuple
 from mindspore.common.tensor import Tensor
 from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
 from mindspore.ops import composite as C
-from mindspore.ops import functional as F
 from mindspore.train.callback import Callback
 
 __all__ = ['LossCallBack', 'WithLossCell', 'TrainOneStepCell']
@@ -144,4 +143,5 @@ class TrainOneStepCell(nn.Cell):
         grads = self.grad(self.network, weights)(img, gt_text, gt_kernels, training_mask, self.sens)
         if self.reducer_flag:
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
diff --git a/model_zoo/official/cv/psenet/train.py b/model_zoo/official/cv/psenet/train.py
index b11e45ecce5..d8519e2fd51 100644
--- a/model_zoo/official/cv/psenet/train.py
+++ b/model_zoo/official/cv/psenet/train.py
@@ -100,7 +100,7 @@ def train():
 
     if config.pre_trained:
         param_dict = load_checkpoint(config.pre_trained)
-        load_param_into_net(net, param_dict)
+        load_param_into_net(net, param_dict, strict_load=True)
         print('Load Pretrained parameters done!')
 
     criterion = DiceLoss(batch_size=config.TRAIN_BATCH_SIZE)
diff --git a/model_zoo/official/cv/resnet/README.md b/model_zoo/official/cv/resnet/README.md
index 2a2271a6bfb..0878db4c40e 100644
--- a/model_zoo/official/cv/resnet/README.md
+++ b/model_zoo/official/cv/resnet/README.md
@@ -202,6 +202,19 @@ If you want to run in modelarts, please check the official documentation of [mod
 .
 └──resnet
   ├── README.md
+  ├── config                               # parameter configuration
+    ├── resnet18_cifar10_config.yaml
+    ├── resnet18_cifar10_config_gpu.yaml
+    ├── resnet18_imagenet2012_config.yaml
+    ├── resnet18_imagenet2012_config_gpu.yaml
+    ├── resnet34_imagenet2012_config.yaml
+    ├── resnet50_cifar10_config.yaml
+    ├── resnet50_imagenet2012_Acc_config.yaml     # High performance version: The performance is improved by more than 10% and the precision decrease less than 1%
+    ├── resnet50_imagenet2012_Ascend_Thor_config.yaml
+    ├── resnet50_imagenet2012_config.yaml
+    ├── resnet50_imagenet2012_GPU_Thor_config.yaml
+    ├── resnet101_imagenet2012_config.yaml
+    └── se-resnet50_imagenet2012_config.yaml
   ├── scripts
     ├── run_distribute_train.sh            # launch ascend distributed training(8 pcs)
     ├── run_parameter_server_train.sh      # launch ascend parameter server training(8 pcs)
@@ -226,16 +239,6 @@ If you want to run in modelarts, please check the official documentation of [mod
        ├──device_adapter.py                # device adapter
        ├──local_adapter.py                 # local adapter
        ├──moxing_adapter.py                # moxing adapter
-  ├── resnet18_cifar10_config.yaml         # parameter configuration
-  ├── resnet18_imagenet2012_config.yaml    # parameter configuration
-  ├── resnet34_imagenet2012_config.yaml    # parameter configuration
-  ├── resnet50_cifar10_config.yaml         # parameter configuration
-  ├── resnet50_imagenet2012_Acc_config.yaml # parameter configuration
-  ├── resnet50_imagenet2012_Ascend_Thor_config.yaml # parameter configuration
-  ├── resnet50_imagenet2012_config.yaml    # parameter configuration
-  ├── resnet50_imagenet2012_GPU_Thor_config.yaml # parameter configuration
-  ├── resnet101_imagenet2012_config.yaml   # parameter configuration
-  ├── se-resnet50_imagenet2012_config.yaml # parameter configuration
   ├── export.py                            # export model for inference
   ├── mindspore_hub_conf.py                # mindspore hub interface
   ├── eval.py                              # eval net
@@ -713,42 +716,42 @@ Total data: 50000, top1 accuracy: 0.76844, top5 accuracy: 0.93522.
 
 #### ResNet18 on CIFAR-10
 
-| Parameters                 | Ascend 910                                                   |
-| -------------------------- | -------------------------------------- |
-| Model Version              | ResNet18                                                |
-| Resource                   | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8  |
-| uploaded Date              | 02/25/2021 (month/day/year)                          |
-| MindSpore Version          | 1.1.1                                                       |
-| Dataset                    | CIFAR-10                                                    |
-| Training Parameters        | epoch=90, steps per epoch=195, batch_size = 32             |
-| Optimizer                  | Momentum                                                         |
-| Loss Function              | Softmax Cross Entropy                                       |
-| outputs                    | probability                                                 |
-| Loss                       | 0.0002519517                                                    |
-| Speed                      | 13 ms/step（8pcs）                     |
-| Total time                 | 4 mins                          |
-| Parameters (M)             | 11.2                                                        |
-| Checkpoint for Fine tuning | 86M (.ckpt file)                                         |
+| Parameters                 | Ascend 910                                                   | GPU |
+| -------------------------- | -------------------------------------- | -------------------------------------- |
+| Model Version              | ResNet18                                                |  ResNet18 |
+| Resource                   | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8  |  PCIE V100-32G        |
+| uploaded Date              | 02/25/2021 (month/day/year)                          | 07/23/2021 (month/day/year)  |
+| MindSpore Version          | 1.1.1                                                       | 1.3.0 |
+| Dataset                    | CIFAR-10                                                    | CIFAR-10 |
+| Training Parameters        | epoch=90, steps per epoch=195, batch_size = 32             | epoch=90, steps per epoch=195, batch_size = 32      |
+| Optimizer                  | Momentum                                                         | Momentum                                   |
+| Loss Function              | Softmax Cross Entropy                                       | Softmax Cross Entropy                             |
+| outputs                    | probability                                                 | probability               |
+| Loss                       | 0.0002519517                                                    |  0.0015517382    |
+| Speed                      | 13 ms/step（8pcs）                     | 29 ms/step（8pcs） |
+| Total time                 | 4 mins                          | 11 minds    |
+| Parameters (M)             | 11.2                                                        | 11.2          |
+| Checkpoint for Fine tuning | 86M (.ckpt file)                                         | 85.4 (.ckpt file)     |
 | Scripts                    | [Link](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) |
 
 #### ResNet18 on ImageNet2012
 
-| Parameters                 | Ascend 910                                                   |
-| -------------------------- | -------------------------------------- |
-| Model Version              | ResNet18                                                |
-| Resource                   | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8  |
-| uploaded Date              | 02/25/2021 (month/day/year)  ；                        |
-| MindSpore Version          | 1.1.1                                                       |
-| Dataset                    | ImageNet2012                                                    |
-| Training Parameters        | epoch=90, steps per epoch=626, batch_size = 256             |
-| Optimizer                  | Momentum                                                         |
-| Loss Function              | Softmax Cross Entropy                                       |
-| outputs                    | probability                                                 |
-| Loss                       | 2.15702                                                   |
-| Speed                      | 110ms/step（8pcs）  (may need to set_numa_enbale in dataset.py)                    |
-| Total time                 | 110 mins                        |
-| Parameters (M)             | 11.7                                                       |
-| Checkpoint for Fine tuning | 90M (.ckpt file)                                         |
+| Parameters                 | Ascend 910                                                   | GPU |
+| -------------------------- | -------------------------------------- | -------------------------------------- |
+| Model Version              | ResNet18                                                | ResNet18     |
+| Resource                   | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8  | PCIE V100-32G   |
+| uploaded Date              | 02/25/2021 (month/day/year)  ；                        | 07/23/2021 (month/day/year)  |
+| MindSpore Version          | 1.1.1                                                       | 1.3.0 |
+| Dataset                    | ImageNet2012                                                    | ImageNet2012 |
+| Training Parameters        | epoch=90, steps per epoch=626, batch_size = 256             | epoch=90, steps per epoch=625, batch_size = 256             |
+| Optimizer                  | Momentum                                                         | Momentum  |
+| Loss Function              | Softmax Cross Entropy                                       | Softmax Cross Entropy    |
+| outputs                    | probability                                                 | probability              |
+| Loss                       | 2.15702                                                   | 2.168664 |
+| Speed                      | 110ms/step（8pcs）  (may need to set_numa_enbale in dataset.py)                    | 107 ms/step（8pcs）                |
+| Total time                 | 110 mins                        | 130 mins            |
+| Parameters (M)             | 11.7                                                       | 11.7 |
+| Checkpoint for Fine tuning | 90M (.ckpt file)                                         |  90M (.ckpt file)                                         |
 | Scripts                    | [Link](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) |
 
 #### ResNet50 on CIFAR-10
diff --git a/model_zoo/official/cv/resnet/README_CN.md b/model_zoo/official/cv/resnet/README_CN.md
index 18c39d777e6..64a97707f16 100755
--- a/model_zoo/official/cv/resnet/README_CN.md
+++ b/model_zoo/official/cv/resnet/README_CN.md
@@ -188,6 +188,19 @@ bash run_eval_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]  [CONFIG_PATH]
 .
 └──resnet
   ├── README.md
+  ├── config                              # 参数配置
+    ├── resnet18_cifar10_config.yaml
+    ├── resnet18_cifar10_config_gpu.yaml
+    ├── resnet18_imagenet2012_config.yaml
+    ├── resnet18_imagenet2012_config_gpu.yaml
+    ├── resnet34_imagenet2012_config.yaml
+    ├── resnet50_cifar10_config.yaml
+    ├── resnet50_imagenet2012_Acc_config.yaml     # 高性能版本：性能提高超过10%而精度下降少于1%
+    ├── resnet50_imagenet2012_Ascend_Thor_config.yaml
+    ├── resnet50_imagenet2012_config.yaml
+    ├── resnet50_imagenet2012_GPU_Thor_config.yaml
+    ├── resnet101_imagenet2012_config.yaml
+    ├── se-resnet50_imagenet2012_config.yaml
   ├── scripts
     ├── run_distribute_train.sh            # 启动Ascend分布式训练（8卡）
     ├── run_parameter_server_train.sh      # 启动Ascend参数服务器训练(8卡)
@@ -209,17 +222,6 @@ bash run_eval_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH]  [CONFIG_PATH]
        ├── device_adapter.py               # 设备配置
        ├── local_adapter.py                # 本地设备配置
        └── moxing_adapter.py               # modelarts设备配置
-  ├── resnet18_cifar10_config.yaml         # 参数配置
-  ├── resnet18_imagenet2012_config.yaml    # 参数配置
-  ├── resnet34_imagenet2012_config.yaml    # 参数配置
-  ├── resnet50_cifar10_config.yaml         # 参数配置
-  ├── resnet50_imagenet2012_Acc_config.yaml # 参数配置
-  ├── resnet50_imagenet2012_Ascend_Thor_config.yaml # 参数配置
-  ├── resnet50_imagenet2012_config.yaml    # 参数配置
-  ├── resnet50_imagenet2012_GPU_Thor_config.yaml # 参数配置
-  ├── resnet101_imagenet2012_config.yaml   # 参数配置
-  ├── se-resnet50_imagenet2012_config.yaml # 参数配置
-  ├── eval.py                              # 评估网络
   ├── eval.py                              # 评估网络
   └── train.py                             # 训练网络
 ```
@@ -674,42 +676,42 @@ Total data: 50000, top1 accuracy: 0.76844, top5 accuracy: 0.93522.
 
 #### CIFAR-10上的ResNet18
 
-| 参数                 | Ascend 910                                                   |
-| -------------------------- | -------------------------------------- |
-| 模型版本              | ResNet18                                                |
-| 资源                   | Ascend 910；CPU 2.60GHz，192核；内存 755G；系统 Euler2.8  |
-| 上传日期              | 2021-02-25                          |
-| MindSpore版本          | 1.1.1                                                       |
-| 数据集                    | CIFAR-10                                                    |
-| 训练参数        | epoch=90, steps per epoch=195, batch_size = 32             |
-| 优化器                  | Momentum                                                         |
-| 损失函数              | Softmax交叉熵                                       |
-| 输出                    | 概率                                                 |
-| 损失                       | 0.0002519517                                                   |
-| 速度                      | 13毫秒/步（8卡）                     |
-| 总时长                 | 4分钟                          |
-| 参数(M)             | 11.2                                                         |
+| 参数                 | Ascend 910                                                   | GPU |
+| -------------------------- | -------------------------------------- | -------------------------------------- |
+| 模型版本              | ResNet18                                                | ResNet18 |
+| 资源                   | Ascend 910；CPU 2.60GHz，192核；内存 755G；系统 Euler2.8  | PCIE V100-32G        |
+| 上传日期              | 2021-02-25                          | 2021-07-23     |
+| MindSpore版本          | 1.1.1                                                       | 1.3.0                |
+| 数据集                    | CIFAR-10                                                    | CIFAR-10           |
+| 训练参数        | epoch=90, steps per epoch=195, batch_size = 32             | epoch=90, steps per epoch=195, batch_size = 32  |
+| 优化器                  | Momentum                                                         | Momentum|
+| 损失函数              | Softmax交叉熵                                       | Softmax交叉熵 |
+| 输出                    | 概率                                                 | 概率 |
+| 损失                       | 0.0002519517                                                   | 0.0015517382    |
+| 速度                      | 13毫秒/步（8卡）                     | 29毫秒/步（8卡）       |
+| 总时长                 | 4分钟                          | 11分钟       |
+| 参数(M)             | 11.2                                                         | 11.2                         |
 | 微调检查点 | 86（.ckpt文件）                                         |
 | 脚本                    | [链接](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) |
 
 #### ImageNet2012上的ResNet18
 
-| 参数                 | Ascend 910                                                   |
-| -------------------------- | -------------------------------------- |
-| 模型版本              | ResNet18                                               |
-| 资源                   |  Ascend 910；CPU 2.60GHz，192核；内存 755G；系统 Euler2.8 |
-| 上传日期              | 2020-04-01  ;                        |
-| MindSpore版本          | 1.1.1                                                       |
-| 数据集                    | ImageNet2012                                                    |
-| 训练参数        | epoch=90, steps per epoch=626, batch_size = 256             |
-| 优化器                  | Momentum                                                         |
-| 损失函数              | Softmax交叉熵                                       |
-| 输出                    | 概率                                                 |
-| 损失                       | 2.15702                                                       |
-| 速度                      | 110毫秒/步（8卡） (可能需要在datasetpy中增加set_numa_enbale绑核操作)                    |
-| 总时长                 | 110分钟                          |
-| 参数(M)             | 11.7                                                         |
-| 微调检查点| 90M（.ckpt文件）                                         |
+| 参数                 | Ascend 910                                                   | GPU |
+| -------------------------- | -------------------------------------- | -------------------------------------- |
+| 模型版本              | ResNet18                                               | RESNET18 |
+| 资源                   |  Ascend 910；CPU 2.60GHz，192核；内存 755G；系统 Euler2.8 |  PCIE V100-32G        |
+| 上传日期              | 2020-04-01  ;                        | 2021-07-23 |
+| MindSpore版本          | 1.1.1                                                       | 1.3.0 |
+| 数据集                    | ImageNet2012                                                    | ImageNet2012           |
+| 训练参数        | epoch=90, steps per epoch=626, batch_size = 256             |  epoch=90, steps per epoch=625, batch_size = 256  |
+| 优化器                  | Momentum                                                         |  Momentum|
+| 损失函数              | Softmax交叉熵                                       | Softmax交叉熵 |
+| 输出                    | 概率                                                 |  概率 |
+| 损失                       | 2.15702                                                       | 2.168664 |
+| 速度                      | 110毫秒/步（8卡） (可能需要在datasetpy中增加set_numa_enbale绑核操作)                    | 107毫秒/步（8卡） |
+| 总时长                 | 110分钟                          | 130分钟       |
+| 参数(M)             | 11.7                                                         | 11.7 |
+| 微调检查点| 90M（.ckpt文件）                                         |  90M（.ckpt文件） |
 | 脚本                    | [链接](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) |
 
 #### CIFAR-10上的ResNet50
diff --git a/model_zoo/official/cv/resnet/scripts/run_distribute_train.sh b/model_zoo/official/cv/resnet/scripts/run_distribute_train.sh
index 6967dae9a80..c5f3903be96 100755
--- a/model_zoo/official/cv/resnet/scripts/run_distribute_train.sh
+++ b/model_zoo/official/cv/resnet/scripts/run_distribute_train.sh
@@ -35,7 +35,7 @@ get_real_path(){
 
 PATH1=$(get_real_path $1)
 PATH2=$(get_real_path $2)
-CONFIG_FILE=$3
+CONFIG_FILE=$(get_real_path $3)
 
 if [ $# == 4 ]
 then 
@@ -101,7 +101,7 @@ do
     mkdir ./train_parallel$i
     cp ../*.py ./train_parallel$i
     cp *.sh ./train_parallel$i
-    cp -r ../*.yaml ./train_parallel$i
+    cp -r ../config/*.yaml ./train_parallel$i
     cp -r ../src ./train_parallel$i
     cd ./train_parallel$i || exit
     echo "start training for rank $RANK_ID, device $DEVICE_ID"
diff --git a/model_zoo/official/cv/resnet/scripts/run_distribute_train_gpu.sh b/model_zoo/official/cv/resnet/scripts/run_distribute_train_gpu.sh
index b44116f9923..39dacf98653 100755
--- a/model_zoo/official/cv/resnet/scripts/run_distribute_train_gpu.sh
+++ b/model_zoo/official/cv/resnet/scripts/run_distribute_train_gpu.sh
@@ -34,7 +34,7 @@ get_real_path(){
 }
 
 PATH1=$(get_real_path $1)
-CONFIG_FILE=$2
+CONFIG_FILE=$(get_real_path $2)
 
 if [ $# == 3 ]
 then 
@@ -80,7 +80,7 @@ rm -rf ./train_parallel
 mkdir ./train_parallel
 cp ../*.py ./train_parallel
 cp *.sh ./train_parallel
-cp -r ../*.yaml ./train_parallel
+cp -r ../config/*.yaml ./train_parallel
 cp -r ../src ./train_parallel
 cd ./train_parallel || exit
 
diff --git a/model_zoo/official/cv/resnet/scripts/run_eval.sh b/model_zoo/official/cv/resnet/scripts/run_eval.sh
index 85c75682c3b..97a7ba85c71 100755
--- a/model_zoo/official/cv/resnet/scripts/run_eval.sh
+++ b/model_zoo/official/cv/resnet/scripts/run_eval.sh
@@ -30,7 +30,7 @@ get_real_path(){
 
 PATH1=$(get_real_path $1)
 PATH2=$(get_real_path $2)
-CONFIG_FILE=$3
+CONFIG_FILE=$(get_real_path $3)
 
 
 if [ ! -d $PATH1 ]
@@ -58,7 +58,7 @@ fi
 mkdir ./eval
 cp ../*.py ./eval
 cp *.sh ./eval
-cp -r ../*.yaml ./eval
+cp -r ../config/*.yaml ./eval
 cp -r ../src ./eval
 cd ./eval || exit
 env > env.log
diff --git a/model_zoo/official/cv/resnet/scripts/run_eval_gpu.sh b/model_zoo/official/cv/resnet/scripts/run_eval_gpu.sh
index ed93cb09c08..97114b7a456 100755
--- a/model_zoo/official/cv/resnet/scripts/run_eval_gpu.sh
+++ b/model_zoo/official/cv/resnet/scripts/run_eval_gpu.sh
@@ -30,7 +30,7 @@ get_real_path(){
 
 PATH1=$(get_real_path $1)
 PATH2=$(get_real_path $2)
-CONFIG_FILE=$3
+CONFIG_FILE=$(get_real_path $3)
 
 
 if [ ! -d $PATH1 ]
@@ -58,7 +58,7 @@ fi
 mkdir ./eval
 cp ../*.py ./eval
 cp *.sh ./eval
-cp -r ../*.yaml ./eval
+cp -r ../config/*.yaml ./eval
 cp -r ../src ./eval
 cd ./eval || exit
 env > env.log
diff --git a/model_zoo/official/cv/resnet/scripts/run_infer.sh b/model_zoo/official/cv/resnet/scripts/run_infer.sh
index 34ae0fadadc..b73e956c18a 100644
--- a/model_zoo/official/cv/resnet/scripts/run_infer.sh
+++ b/model_zoo/official/cv/resnet/scripts/run_infer.sh
@@ -30,7 +30,7 @@ get_real_path(){
 
 PATH1=$(get_real_path $1)
 PATH2=$(get_real_path $2)
-CONFIG_FILE=$3
+CONFIG_FILE=$(get_real_path $3)
 
 
 if [ ! -d $PATH1 ]
@@ -56,7 +56,7 @@ then
     rm -rf ./infer
 fi
 mkdir ./infer
-cp ../*.yaml ./infer
+cp ../config/*.yaml ./infer
 cp ../*.py ./infer
 cp *.sh ./infer
 cp -r ../src ./infer
diff --git a/model_zoo/official/cv/resnet/scripts/run_infer_310.sh b/model_zoo/official/cv/resnet/scripts/run_infer_310.sh
index d49002a575b..79ff34bb8d3 100644
--- a/model_zoo/official/cv/resnet/scripts/run_infer_310.sh
+++ b/model_zoo/official/cv/resnet/scripts/run_infer_310.sh
@@ -87,7 +87,7 @@ function preprocess_data()
     fi
     mkdir preprocess_Result
     BASE_PATH=$(dirname "$(dirname "$(readlink -f $0)")")
-    CONFIG_FILE="${BASE_PATH}/$1"
+    CONFIG_FILE="${BASE_PATH}/config/$1"
 
     python3.7 ../preprocess.py --data_path=$data_path --output_path=./preprocess_Result --config_path=$CONFIG_FILE &> preprocess.log
 }
diff --git a/model_zoo/official/cv/resnet/scripts/run_parameter_server_train.sh b/model_zoo/official/cv/resnet/scripts/run_parameter_server_train.sh
index e3dd2d6372a..0cd85f336cd 100644
--- a/model_zoo/official/cv/resnet/scripts/run_parameter_server_train.sh
+++ b/model_zoo/official/cv/resnet/scripts/run_parameter_server_train.sh
@@ -30,7 +30,7 @@ get_real_path(){
 
 PATH1=$(get_real_path $1)
 PATH2=$(get_real_path $2)
-CONFIG_FILE=$3
+CONFIG_FILE=$(get_real_path $3)
 
 if [ $# == 4 ]
 then 
@@ -71,7 +71,7 @@ export DEVICE_ID=0
 export RANK_ID=0
 rm -rf ./sched
 mkdir ./sched
-cp ../*.yaml ./sched
+cp ../config/*.yaml ./sched
 cp ../*.py ./sched
 cp *.sh ./sched
 cp -r ../src ./sched
@@ -97,7 +97,7 @@ do
     export RANK_ID=$i
     rm -rf ./server_$i
     mkdir ./server_$i
-    cp ../*.yaml ./server_$i
+    cp ../config/*.yaml ./server_$i
     cp ../*.py ./server_$i
     cp *.sh ./server_$i
     cp -r ../src ./server_$i
@@ -125,7 +125,7 @@ do
     export RANK_ID=$i
     rm -rf ./worker_$i
     mkdir ./worker_$i
-    cp ../*.yaml ./worker_$i
+    cp ../config/*.yaml ./worker_$i
     cp ../*.py ./worker_$i
     cp *.sh ./worker_$i
     cp -r ../src ./worker_$i
diff --git a/model_zoo/official/cv/resnet/scripts/run_parameter_server_train_gpu.sh b/model_zoo/official/cv/resnet/scripts/run_parameter_server_train_gpu.sh
index ba83f209644..38eac825e35 100755
--- a/model_zoo/official/cv/resnet/scripts/run_parameter_server_train_gpu.sh
+++ b/model_zoo/official/cv/resnet/scripts/run_parameter_server_train_gpu.sh
@@ -29,7 +29,7 @@ get_real_path(){
 }
 
 PATH1=$(get_real_path $1)
-CONFIG_FILE=$2
+CONFIG_FILE=$(get_real_path $2)
 if [ $# == 3 ]
 then 
     PATH2=$(get_real_path $3)
@@ -60,7 +60,7 @@ export MS_SCHED_PORT=8081
 export MS_ROLE=MS_SCHED
 rm -rf ./sched
 mkdir ./sched
-cp ../*.yaml ./sched 
+cp ../config/*.yaml ./sched 
 cp ../*.py ./sched
 cp *.sh ./sched
 cp -r ../src ./sched
@@ -85,7 +85,7 @@ for((i=0;i<$MS_SERVER_NUM;i++));
 do
     rm -rf ./server_$i
     mkdir ./server_$i
-    cp ../*.yaml ./server_$i
+    cp ../config/*.yaml ./server_$i
     cp ../*.py ./server_$i
     cp *.sh ./server_$i
     cp -r ../src ./server_$i
@@ -110,7 +110,7 @@ done
 export MS_ROLE=MS_WORKER
 rm -rf ./worker
 mkdir ./worker
-cp ../*.yaml ./worker 
+cp ../config/*.yaml ./worker 
 cp ../*.py ./worker
 cp *.sh ./worker
 cp -r ../src ./worker
diff --git a/model_zoo/official/cv/resnet/scripts/run_standalone_train.sh b/model_zoo/official/cv/resnet/scripts/run_standalone_train.sh
index 402e01a6869..a0381dbeafe 100755
--- a/model_zoo/official/cv/resnet/scripts/run_standalone_train.sh
+++ b/model_zoo/official/cv/resnet/scripts/run_standalone_train.sh
@@ -34,7 +34,7 @@ get_real_path(){
 }
 
 PATH1=$(get_real_path $1)
-CONFIG_FILE=$2
+CONFIG_FILE=$(get_real_path $2)
 if [ $# == 3 ]
 then
     PATH2=$(get_real_path $3)
@@ -80,7 +80,7 @@ then
     rm -rf ./train
 fi
 mkdir ./train
-cp ../*.yaml ./train
+cp ../config/*.yaml ./train
 cp ../*.py ./train
 cp *.sh ./train
 cp -r ../src ./train
diff --git a/model_zoo/official/cv/resnet/scripts/run_standalone_train_gpu.sh b/model_zoo/official/cv/resnet/scripts/run_standalone_train_gpu.sh
index edb85580acb..581d5521911 100755
--- a/model_zoo/official/cv/resnet/scripts/run_standalone_train_gpu.sh
+++ b/model_zoo/official/cv/resnet/scripts/run_standalone_train_gpu.sh
@@ -34,7 +34,7 @@ get_real_path(){
 }
 
 PATH1=$(get_real_path $1)
-CONFIG_FILE=$2
+CONFIG_FILE=$(get_real_path $2)
 
 if [ $# == 3 ]
 then
@@ -83,7 +83,7 @@ then
     rm -rf ./train
 fi
 mkdir ./train
-cp ../*.yaml ./train
+cp ../config/*.yaml ./train
 cp ../*.py ./train
 cp *.sh ./train
 cp -r ../src ./train
diff --git a/model_zoo/official/cv/resnet/src/model_utils/config.py b/model_zoo/official/cv/resnet/src/model_utils/config.py
index d8f6518f1ad..19678722f34 100644
--- a/model_zoo/official/cv/resnet/src/model_utils/config.py
+++ b/model_zoo/official/cv/resnet/src/model_utils/config.py
@@ -21,7 +21,7 @@ import argparse
 from pprint import pprint, pformat
 import yaml
 
-_config_path = "./resnet50_cifar10_config.yaml"
+_config_path = "./config/resnet50_cifar10_config.yaml"
 
 class Config:
     """
@@ -118,7 +118,7 @@ def get_config():
     parser = argparse.ArgumentParser(description="default name", add_help=False)
     current_dir = os.path.dirname(os.path.abspath(__file__))
     parser.add_argument("--config_path", type=str, default=os.path.join(current_dir, \
-        "../resnet50_cifar10_config.yaml"), help="Config file path")
+        "../config/resnet50_cifar10_config.yaml"), help="Config file path")
     path_args, _ = parser.parse_known_args()
     default, helper, choices = parse_yaml(path_args.config_path)
     pprint(default)
diff --git a/model_zoo/official/cv/resnet/src/resnet.py b/model_zoo/official/cv/resnet/src/resnet.py
index 0405e38cafa..54174d4ad7d 100755
--- a/model_zoo/official/cv/resnet/src/resnet.py
+++ b/model_zoo/official/cv/resnet/src/resnet.py
@@ -23,7 +23,7 @@ from mindspore.ops import functional as F
 from mindspore.common.tensor import Tensor
 
 
-def _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size):
+def conv_variance_scaling_initializer(in_channel, out_channel, kernel_size):
     fan_in = in_channel * kernel_size * kernel_size
     scale = 1.0
     scale /= max(1., fan_in)
@@ -108,7 +108,7 @@ def kaiming_uniform(inputs_shape, a=0., mode='fan_in', nonlinearity='leaky_relu'
 
 def _conv3x3(in_channel, out_channel, stride=1, use_se=False, res_base=False):
     if use_se:
-        weight = _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=3)
+        weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=3)
     else:
         weight_shape = (out_channel, in_channel, 3, 3)
         weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu'))
@@ -121,7 +121,7 @@ def _conv3x3(in_channel, out_channel, stride=1, use_se=False, res_base=False):
 
 def _conv1x1(in_channel, out_channel, stride=1, use_se=False, res_base=False):
     if use_se:
-        weight = _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=1)
+        weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=1)
     else:
         weight_shape = (out_channel, in_channel, 1, 1)
         weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu'))
@@ -134,7 +134,7 @@ def _conv1x1(in_channel, out_channel, stride=1, use_se=False, res_base=False):
 
 def _conv7x7(in_channel, out_channel, stride=1, use_se=False, res_base=False):
     if use_se:
-        weight = _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=7)
+        weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=7)
     else:
         weight_shape = (out_channel, in_channel, 7, 7)
         weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu'))
@@ -207,7 +207,7 @@ class ResidualBlock(nn.Cell):
             self.bn2 = _bn(channel)
 
         self.conv3 = _conv1x1(channel, out_channel, stride=1, use_se=self.use_se)
-        self.bn3 = _bn_last(out_channel)
+        self.bn3 = _bn(out_channel)
         if self.se_block:
             self.se_global_pool = P.ReduceMean(keep_dims=False)
             self.se_dense_0 = _fc(out_channel, int(out_channel / 4), use_se=self.use_se)
diff --git a/model_zoo/official/cv/resnet/train.py b/model_zoo/official/cv/resnet/train.py
index e1440dd65e0..7048543b7ac 100755
--- a/model_zoo/official/cv/resnet/train.py
+++ b/model_zoo/official/cv/resnet/train.py
@@ -14,9 +14,10 @@
 # ============================================================================
 """train resnet."""
 import os
+import numpy as np
 from mindspore import context
 from mindspore import Tensor
-from mindspore.nn.optim import Momentum, thor
+from mindspore.nn.optim import Momentum, thor, LARS
 from mindspore.train.model import Model
 from mindspore.context import ParallelMode
 from mindspore.train.train_thor import ConvertModelUtils
@@ -37,6 +38,7 @@ from src.metric import DistAccuracy, ClassifyCorrectCell
 from src.model_utils.config import config
 from src.model_utils.moxing_adapter import moxing_wrapper
 from src.model_utils.device_adapter import get_rank_id, get_device_num
+from src.resnet import conv_variance_scaling_initializer
 
 set_seed(1)
 
@@ -130,13 +132,26 @@ def init_weight(net):
     else:
         for _, cell in net.cells_and_names():
             if isinstance(cell, nn.Conv2d):
-                cell.weight.set_data(weight_init.initializer(weight_init.XavierUniform(),
-                                                             cell.weight.shape,
-                                                             cell.weight.dtype))
+                if config.conv_init == "XavierUniform":
+                    cell.weight.set_data(weight_init.initializer(weight_init.XavierUniform(),
+                                                                 cell.weight.shape,
+                                                                 cell.weight.dtype))
+                elif config.conv_init == "TruncatedNormal":
+                    weight = conv_variance_scaling_initializer(cell.in_channels,
+                                                               cell.out_channels,
+                                                               cell.kernel_size[0])
+                    cell.weight.set_data(weight)
             if isinstance(cell, nn.Dense):
-                cell.weight.set_data(weight_init.initializer(weight_init.TruncatedNormal(),
-                                                             cell.weight.shape,
-                                                             cell.weight.dtype))
+                if config.dense_init == "TruncatedNormal":
+                    cell.weight.set_data(weight_init.initializer(weight_init.TruncatedNormal(),
+                                                                 cell.weight.shape,
+                                                                 cell.weight.dtype))
+                elif config.dense_init == "RandomNormal":
+                    in_channel = cell.in_channels
+                    out_channel = cell.out_channels
+                    weight = np.random.normal(loc=0, scale=0.01, size=out_channel * in_channel)
+                    weight = Tensor(np.reshape(weight, (out_channel, in_channel)), dtype=cell.weight.dtype)
+                    cell.weight.set_data(weight)
 
 def init_lr(step_size):
     """init lr"""
@@ -163,6 +178,21 @@ def init_loss_scale():
         loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
     return loss
 
+
+def init_group_params(net):
+    decayed_params = []
+    no_decayed_params = []
+    for param in net.trainable_params():
+        if 'beta' not in param.name and 'gamma' not in param.name and 'bias' not in param.name:
+            decayed_params.append(param)
+        else:
+            no_decayed_params.append(param)
+
+    group_params = [{'params': decayed_params, 'weight_decay': config.weight_decay},
+                    {'params': no_decayed_params},
+                    {'order_params': net.trainable_params()}]
+    return group_params
+
 def run_eval(target, model, ckpt_save_dir, cb):
     """run_eval"""
     if config.run_eval:
@@ -205,18 +235,11 @@ def train_net():
     init_weight(net=net)
     lr = Tensor(init_lr(step_size=step_size))
     # define opt
-    decayed_params = []
-    no_decayed_params = []
-    for param in net.trainable_params():
-        if 'beta' not in param.name and 'gamma' not in param.name and 'bias' not in param.name:
-            decayed_params.append(param)
-        else:
-            no_decayed_params.append(param)
-
-    group_params = [{'params': decayed_params, 'weight_decay': config.weight_decay},
-                    {'params': no_decayed_params},
-                    {'order_params': net.trainable_params()}]
+    group_params = init_group_params(net)
     opt = Momentum(group_params, lr, config.momentum, loss_scale=config.loss_scale)
+    if config.optimizer == "LARS":
+        opt = LARS(opt, epsilon=config.lars_epsilon, coefficient=config.lars_coefficient,
+                   lars_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name and 'bias' not in x.name)
     loss = init_loss_scale()
     loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
     dist_eval_network = ClassifyCorrectCell(net) if config.run_distribute else None
diff --git a/model_zoo/official/cv/retinaface_resnet50/src/network.py b/model_zoo/official/cv/retinaface_resnet50/src/network.py
index 337a4e9acac..3be88a8da28 100644
--- a/model_zoo/official/cv/retinaface_resnet50/src/network.py
+++ b/model_zoo/official/cv/retinaface_resnet50/src/network.py
@@ -19,7 +19,6 @@ import numpy as np
 
 import mindspore
 import mindspore.nn as nn
-from mindspore.ops import functional as F
 from mindspore.ops import operations as P
 from mindspore.ops import composite as C
 from mindspore import context, Tensor
@@ -524,4 +523,5 @@ class TrainingWrapper(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
diff --git a/model_zoo/official/cv/retinanet/src/retinanet.py b/model_zoo/official/cv/retinanet/src/retinanet.py
index 6e9c4f312b6..58557d8dbd8 100644
--- a/model_zoo/official/cv/retinanet/src/retinanet.py
+++ b/model_zoo/official/cv/retinanet/src/retinanet.py
@@ -316,7 +316,8 @@ class TrainingWrapper(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
 
 class resnet(nn.Cell):
     """
diff --git a/model_zoo/official/cv/shufflenetv1/eval.py b/model_zoo/official/cv/shufflenetv1/eval.py
index cc267910fb9..9941a8443c6 100644
--- a/model_zoo/official/cv/shufflenetv1/eval.py
+++ b/model_zoo/official/cv/shufflenetv1/eval.py
@@ -39,7 +39,7 @@ def test():
     # step_size = dataset.get_dataset_size()
 
     # define net
-    net = shufflenetv1(model_size=config.model_size)
+    net = shufflenetv1(model_size=config.model_size, n_class=config.num_classes)
 
     # load checkpoint
     param_dict = load_checkpoint(config.ckpt_path)
diff --git a/model_zoo/official/cv/shufflenetv1/export.py b/model_zoo/official/cv/shufflenetv1/export.py
index dec005028b6..5f5709d8c8b 100644
--- a/model_zoo/official/cv/shufflenetv1/export.py
+++ b/model_zoo/official/cv/shufflenetv1/export.py
@@ -38,7 +38,7 @@ if config.device_target == "Ascend":
 
 @moxing_wrapper(pre_process=modelarts_pre_process)
 def model_export():
-    net = ShuffleNetV1(model_size=config.model_size)
+    net = ShuffleNetV1(model_size=config.model_size, n_class=config.num_classes)
 
     param_dict = load_checkpoint(config.ckpt_path)
     load_param_into_net(net, param_dict)
diff --git a/model_zoo/official/cv/shufflenetv1/train.py b/model_zoo/official/cv/shufflenetv1/train.py
index 048f9bf030c..0e591e10b59 100644
--- a/model_zoo/official/cv/shufflenetv1/train.py
+++ b/model_zoo/official/cv/shufflenetv1/train.py
@@ -58,7 +58,7 @@ def train():
         context.set_context(device_id=config.device_id)
 
     # define network
-    net = ShuffleNetV1(model_size=config.model_size)
+    net = ShuffleNetV1(model_size=config.model_size, n_class=config.num_classes)
 
     # define loss
     loss = CrossEntropySmooth(sparse=True, reduction="mean", smooth_factor=config.label_smooth_factor,
diff --git a/model_zoo/official/cv/ssd/src/ssd.py b/model_zoo/official/cv/ssd/src/ssd.py
index 7108240ffc5..171c9178054 100644
--- a/model_zoo/official/cv/ssd/src/ssd.py
+++ b/model_zoo/official/cv/ssd/src/ssd.py
@@ -525,7 +525,8 @@ class TrainingWrapper(nn.Cell):
         if self.use_global_norm:
             grads = self.hyper_map(F.partial(grad_scale, F.scalar_to_array(self.sens)), grads)
             grads = C.clip_by_global_norm(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
 
 
 class SSDWithMobileNetV2(nn.Cell):
diff --git a/model_zoo/official/cv/unet/README.md b/model_zoo/official/cv/unet/README.md
index f369e865a4d..00736eb65cb 100644
--- a/model_zoo/official/cv/unet/README.md
+++ b/model_zoo/official/cv/unet/README.md
@@ -98,12 +98,12 @@ If set `split`=1.0, you should split train dataset and val dataset by directorie
 
 We support script to convert COCO and a Cell_Nuclei dataset used in used in [Unet++ original paper](https://arxiv.org/abs/1912.05074) to mulyi-class dataset format.
 
-1. Select `*yaml` in `unet`.
+1. Select `*.yaml` file under `unet` and modify the parameters as needed.
 
 2. run script to convert to mulyi-class dataset format:
 
 ```shell
-python preprocess_dataset.py -d /data/save_data_path
+python preprocess_dataset.py --config_path path/unet/*.yaml  --data_path /data/save_data_path
 ```
 
 ## [Environment Requirements](#contents)
@@ -481,7 +481,7 @@ Export MindIR on local
 Before exporting, you need to modify the parameter in the configuration — checkpoint_file_path and batch_ Size . checkpoint_ file_ Path is the CKPT file path, batch_ Size is set to 1.
 
 ```shell
-python export.py --config_path=[CONFIG_PATH]
+python export.py --config_path=[CONFIG_PATH] --checkpoint_file_path=[model_ckpt_path] --file_name=[air_model_name] --file_format=AIR
 ```
 
 The checkpoint_file_path parameter is required,
diff --git a/model_zoo/official/cv/unet/README_CN.md b/model_zoo/official/cv/unet/README_CN.md
index 7f599f4c491..cd2641c329a 100644
--- a/model_zoo/official/cv/unet/README_CN.md
+++ b/model_zoo/official/cv/unet/README_CN.md
@@ -102,12 +102,12 @@ UNet++是U-Net的增强版本，使用了新的跨层链接方式和深层监督
 
 我们提供了一个脚本来将 COCO 和 Cell_Nuclei 数据集（[Unet++ 原论文](https://arxiv.org/abs/1912.05074) 中使用）转换为multi-class格式。
 
-1. 在`src/model_utils/`下选择对应的yaml文件。
+1. 在unet下选择*.yaml文件，根据需要修改参数。
 
 2. 运行转换脚本:
 
 ```shell
-python preprocess_dataset.py -d /data/save_data_path
+python preprocess_dataset.py --config_path path/unet/*.yaml  --data_path /data/save_data_path
 ```
 
 ## 环境要求
@@ -480,7 +480,7 @@ python eval.py --data_path=/path/to/data/ --checkpoint_file_path=/path/to/checkp
 本地导出mindir
 
 ```shell
-python export.py --config_path=[CONFIG_PATH]
+python export.py --config_path=[CONFIG_PATH] --checkpoint_file_path=[model_ckpt_path] --file_name=[air_model_name] --file_format=AIR
 ```
 
 ModelArts导出mindir
diff --git a/model_zoo/official/cv/unet/preprocess_dataset.py b/model_zoo/official/cv/unet/preprocess_dataset.py
index 494b348a83f..a630fa652e8 100644
--- a/model_zoo/official/cv/unet/preprocess_dataset.py
+++ b/model_zoo/official/cv/unet/preprocess_dataset.py
@@ -19,7 +19,7 @@ Images within one folder is an image, the image file named `"image.png"`, the ma
 import os
 import cv2
 import numpy as np
-from model_zoo.official.cv.unet.src.model_utils.config import config
+from src.model_utils.config import config
 
 def annToMask(ann, height, width):
     """Convert annotation to RLE and then to binary mask."""
diff --git a/model_zoo/official/cv/vgg16/README.md b/model_zoo/official/cv/vgg16/README.md
index f59c86dab3a..902fc41e3c9 100644
--- a/model_zoo/official/cv/vgg16/README.md
+++ b/model_zoo/official/cv/vgg16/README.md
@@ -27,6 +27,7 @@
         - [Export MindIR](#export-mindir)
         - [Infer on Ascend310](#infer-on-ascend310)
         - [result](#result)
+        - [Post Training Quantization](#post-training-quantization)
     - [Model Description](#model-description)
         - [Performance](#performance)
             - [Training Performance](#training-performance)
@@ -530,6 +531,40 @@ Inference result is saved in current path, you can find result like this in acc.
 'acc': 0.92
 ```
 
+### [Post Training Quantization](#contents)
+
+Relative executing script files reside in the directory "ascend310_quant_infer". Please implement following steps sequentially to complete post quantization.
+Current quantization project bases on CIFAR-10 dataset.
+
+1. Generate data of .bin format required for AIR model inference at Ascend310 platform.
+
+```shell
+python export_bin.py --config_path [YMAL CONFIG PATH] --data_dir [DATA DIR] --result_path [RESULT PATH]
+```
+
+2. Export quantized AIR model.
+
+Post quantization of model requires special toolkits for exporting quantized AIR model. Please refer to [official website](https://www.hiascend.com/software/cann/community).
+
+```shell
+python post_quant.py --config_path [YMAL CONFIG PATH] --ckpt_file [CKPT_PATH] --data_dir [DATASET PATH]
+```
+
+The quantized AIR file will be stored as "./results/vgg_quant.air".
+
+3. Implement inference at Ascend310 platform.
+
+```shell
+# Ascend310 quant inference
+bash run_quant_infer.sh [AIR_PATH] [DATA_PATH] [LABEL_PATH]
+```
+
+Inference result is saved in current path, you can find result like this in acc.log file.
+
+```bash
+'acc': 0.91
+```
+
 ## [Model Description](#contents)
 
 ### [Performance](#contents)
diff --git a/model_zoo/official/cv/vgg16/README_CN.md b/model_zoo/official/cv/vgg16/README_CN.md
index 4efbb025f44..740a1d3b57a 100644
--- a/model_zoo/official/cv/vgg16/README_CN.md
+++ b/model_zoo/official/cv/vgg16/README_CN.md
@@ -29,6 +29,7 @@
         - [导出MindIR](#导出mindir)
         - [在Ascend310执行推理](#在ascend310执行推理)
         - [结果](#结果)
+        - [训练后量化推理](#训练后量化推理)
     - [模型描述](#模型描述)
         - [性能](#性能)
             - [训练性能](#训练性能)
@@ -533,6 +534,39 @@ bash run_infer_310.sh [MINDIR_PATH] [DATASET_NAME] [DATASET_PATH] [NEED_PREPROCE
 'acc': 0.92
 ```
 
+### [训练后量化推理](#contents)
+
+训练后量化推理的相关执行脚本文件在"ascend310_quant_infer"目录下，依次执行以下步骤实现训练后量化推理。本训练后量化工程基于CIFAR-10数据集。
+
+1、生成Ascend310平台AIR模型推理需要的.bin格式数据。
+
+```shell
+python export_bin.py --config_path [YMAL CONFIG PATH] --data_dir [DATA DIR] --result_path [RESULT PATH]
+```
+
+2、导出训练后量化的AIR格式模型。
+
+导出训练后量化模型需要配套的量化工具包，参考[官方地址](https://www.hiascend.com/software/cann/community)
+
+```shell
+python post_quant.py --config_path [YMAL_CONFIG_PATH] --ckpt_file [CKPT_PATH] --data_dir [DATASET PATH]
+```
+
+导出的模型会存储在./result/vgg_quant.air。
+
+3、在Ascend310执行推理量化模型。
+
+```shell
+# Ascend310 inference
+bash run_quant_infer.sh [AIR_PATH] [DATA_PATH] [LABEL_PATH]
+```
+
+推理结果保存在脚本执行的当前路径，可以在acc.log中看到精度计算结果。
+
+```bash
+'acc': 0.91
+```
+
 ## 模型描述
 
 ### 性能
diff --git a/model_zoo/official/cv/vgg16/ascend310_quant_infer/run_quant_infer.sh b/model_zoo/official/cv/vgg16/ascend310_quant_infer/run_quant_infer.sh
index 31bba45de8e..56f958ea641 100644
--- a/model_zoo/official/cv/vgg16/ascend310_quant_infer/run_quant_infer.sh
+++ b/model_zoo/official/cv/vgg16/ascend310_quant_infer/run_quant_infer.sh
@@ -16,7 +16,6 @@
 
 if [ $# -lt 3 ]; then
     echo "Usage: bash run_quant_infer.sh [AIR_PATH] [DATA_PATH] [LABEL_PATH]"
-    echo "Example: bash run_quant_infer.sh ./vgg_quant.air ./00_data ./cifar10_label_ids.npy"
 exit 1
 fi
 
diff --git a/model_zoo/official/cv/warpctc/src/warpctc_for_train.py b/model_zoo/official/cv/warpctc/src/warpctc_for_train.py
index bc261c01a7e..82671e15e92 100755
--- a/model_zoo/official/cv/warpctc/src/warpctc_for_train.py
+++ b/model_zoo/official/cv/warpctc/src/warpctc_for_train.py
@@ -105,4 +105,5 @@ class TrainOneStepCellWithGradClip(Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
diff --git a/model_zoo/official/cv/yolov3_darknet53/src/yolo.py b/model_zoo/official/cv/yolov3_darknet53/src/yolo.py
index b5cee676427..bd49548c69c 100644
--- a/model_zoo/official/cv/yolov3_darknet53/src/yolo.py
+++ b/model_zoo/official/cv/yolov3_darknet53/src/yolo.py
@@ -444,4 +444,5 @@ class TrainingWrapper(nn.Cell):
         grads = self.grad(self.network, weights)(*args, sens)
         if self.reducer_flag:
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
diff --git a/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py b/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py
index 81a77d855f2..4e9747be0b8 100644
--- a/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py
+++ b/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py
@@ -436,4 +436,5 @@ class TrainingWrapper(nn.Cell):
         grads = self.grad(self.network, weights)(*args, sens)
         if self.reducer_flag:
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
diff --git a/model_zoo/official/cv/yolov3_resnet18/scripts/run_eval.sh b/model_zoo/official/cv/yolov3_resnet18/scripts/run_eval.sh
index e0ccd093497..804d5dc39f2 100644
--- a/model_zoo/official/cv/yolov3_resnet18/scripts/run_eval.sh
+++ b/model_zoo/official/cv/yolov3_resnet18/scripts/run_eval.sh
@@ -27,4 +27,4 @@ export RANK_SIZE=1
 export DEVICE_ID=$1
 export RANK_ID=$1
 
-python eval.py --ckpt_path=$2 --mindrecord_dir=$3 --image_dir=$4 --anno_path=$5
+python eval.py --ckpt_path=$2 --eval_mindrecord_dir=$3 --image_dir=$4 --anno_path=$5
diff --git a/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py b/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py
index f1bfbe14550..91ac4081e4b 100644
--- a/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py
+++ b/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py
@@ -672,7 +672,8 @@ class TrainingWrapper(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
 
 
 class YoloBoxScores(nn.Cell):
diff --git a/model_zoo/official/cv/yolov4/src/yolo.py b/model_zoo/official/cv/yolov4/src/yolo.py
index f375f750b11..074016abeba 100644
--- a/model_zoo/official/cv/yolov4/src/yolo.py
+++ b/model_zoo/official/cv/yolov4/src/yolo.py
@@ -184,12 +184,12 @@ class YOLOv4(nn.Cell):
         con6 = self.conv6(con5)
         con7 = self.conv7(con6)
 
-        ups1 = P.ResizeNearestNeighbor((img_hight / 16, img_width / 16))(con7)
+        ups1 = P.ResizeNearestNeighbor((img_hight // 16, img_width // 16))(con7)
         con8 = self.conv8(feature_map2)
         con9 = self.concat((ups1, con8))
         con10, _ = self.backblock0(con9)
         con11 = self.conv9(con10)
-        ups2 = P.ResizeNearestNeighbor((img_hight / 8, img_width / 8))(con11)
+        ups2 = P.ResizeNearestNeighbor((img_hight // 8, img_width // 8))(con11)
         con12 = self.conv10(feature_map1)
         con13 = self.concat((ups2, con12))
         con14, small_object_output = self.backblock1(con13)
@@ -515,7 +515,8 @@ class TrainingWrapper(nn.Cell):
         grads = self.grad(self.network, weights)(*args, sens)
         if self.reducer_flag:
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
 
 
 class Giou(nn.Cell):
diff --git a/model_zoo/official/cv/yolov5/README.md b/model_zoo/official/cv/yolov5/README.md
index d666c644179..4203e2debf8 100644
--- a/model_zoo/official/cv/yolov5/README.md
+++ b/model_zoo/official/cv/yolov5/README.md
@@ -378,7 +378,7 @@ YOLOv5 on 118K images(The annotation and data format must be the same as coco201
 | outputs                    | heatmaps                                                    |
 | Loss                       | 53                                                          |
 | Speed                      | 1p 55 img/s 8p 440 img/s(shape=640)                         |
-| Total time                 | 80h                                                         |
+| Total time                 | 24h(8pcs)                                                         |
 | Checkpoint for Fine tuning | 58M (.ckpt file)                                            |
 | Scripts                    | <https://gitee.com/mindspore/mindspore/tree/master/model_zoo/>|
 
diff --git a/model_zoo/official/cv/yolov5/README_CN.md b/model_zoo/official/cv/yolov5/README_CN.md
index d1ac34047eb..c9d7a5861ec 100644
--- a/model_zoo/official/cv/yolov5/README_CN.md
+++ b/model_zoo/official/cv/yolov5/README_CN.md
@@ -45,22 +45,22 @@ YOLOv5作为先进的检测器，它比所有可用的替代检测器更快（FP
 - 目录结构如下，由用户定义目录和文件的名称：
 
     ```shell
-        ©À©¤©¤ dataset
-            ©À©¤©¤ YOLOv5
-                ©À©¤©¤ annotations
-                ©¦   ©À©¤ train.json
-                ©¦   ©¸©¤ val.json
-                ©À©¤ images
-                    ©À©¤ train
-                    ©¦    ©¸©¤images
-                    ©¦       ©À©¤picture1.jpg
-                    ©¦       ©À©¤ ...
-                    ©¦       ©¸©¤picturen.jpg
-                    ©¸©¤ val
-                        ©¸©¤images
-                            ©À©¤picture1.jpg
-                            ©À©¤ ...
-                            ©¸©¤picturen.jpg
+        ├── dataset
+            ├── YOLOv5
+                ├── annotations
+                │   ├─ train.json
+                │   └─ val.json
+                ├─ images
+                    ├─ train
+                    │    └─images
+                    │       ├─picture1.jpg
+                    │       ├─ ...
+                    │       └─picturen.jpg
+                    └─ val
+                        └─images
+                            ├─picture1.jpg
+                            ├─ ...
+                            └─picturen.jpg
     ```
 
 建议用户使用MS COCO数据集来体验模型，
@@ -125,34 +125,34 @@ bash run_eval.sh dataset/xxx checkpoint/xxx.ckpt
 ## [脚本和示例代码](#目录)
 
 ```python
-©¸©¤yolov5
-  ©À©¤README.md
-  ©À©¤mindspore_hub_conf.md             # Mindspore Hub配置
-  ©À©¤ascend310_infer                   # 用于310推理
-  ©À©¤scripts
-    ©À©¤run_standalone_train.sh         # 在Ascend中启动单机训练（1卡）
-    ©À©¤run_distribute_train.sh         # 在Ascend中启动分布式训练（8卡）
-    ©À©¤run_infer_310.sh                # 在Ascend中启动310推理
-    ©¸©¤run_eval.sh                     # 在Ascend中启动评估
-  ©À©¤src
-    ©À©¤__init__.py                     # Python初始化文件
-    ©À©¤config.py                       # 参数配置
-    ©À©¤yolov5_backbone.py              # 网络骨干
-    ©À©¤distributed_sampler.py          # 数据集迭代器
-    ©À©¤initializer.py                  # 参数初始化器
-    ©À©¤logger.py                       # 日志函数
-    ©À©¤loss.py                         # 损失函数
-    ©À©¤lr_scheduler.py                 # 生成学习率
-    ©À©¤transforms.py                   # 预处理数据
-    ©À©¤util.py                         # 工具函数
-    ©À©¤yolo.py                         # YOLOv5网络
-    ©À©¤yolo_dataset.py                 # 为YOLOv5创建数据集
+└─yolov5
+  ├─README.md
+  ├─mindspore_hub_conf.md             # Mindspore Hub配置
+  ├─ascend310_infer                   # 用于310推理
+  ├─scripts
+    ├─run_standalone_train.sh         # 在Ascend中启动单机训练（1卡）
+    ├─run_distribute_train.sh         # 在Ascend中启动分布式训练（8卡）
+    ├─run_infer_310.sh                # 在Ascend中启动310推理
+    ├─run_eval.sh                     # 在Ascend中启动评估
+  ├─src
+    ├─__init__.py                     # Python初始化文件
+    ├─config.py                       # 参数配置
+    ├─yolov5_backbone.py              # 网络骨干
+    ├─distributed_sampler.py          # 数据集迭代器
+    ├─initializer.py                  # 参数初始化器
+    ├─logger.py                       # 日志函数
+    ├─loss.py                         # 损失函数
+    ├─lr_scheduler.py                 # 生成学习率
+    ├─transforms.py                   # 预处理数据
+    ├─util.py                         # 工具函数
+    ├─yolo.py                         # YOLOv5网络
+    ├─yolo_dataset.py                 # 为YOLOv5创建数据集
 
-  ©À©¤eval.py                           # 评估验证结果
-  ©À©¤export.py                         # 将MindSpore模型转换为AIR模型
-  ©À©¤preprocess.py                     # 310推理前处理脚本
-  ©À©¤postprocess.py                    # 310推理后处理脚本
-  ©¸©¤train.py                          # 训练网络
+  ├─eval.py                           # 评估验证结果
+  ├─export.py                         # 将MindSpore模型转换为AIR模型
+  ├─preprocess.py                     # 310推理前处理脚本
+  ├─postprocess.py                    # 310推理后处理脚本
+  ├─train.py                          # 训练网络
 ```
 
 ## [脚本参数](#目录)
@@ -378,7 +378,7 @@ YOLOv5应用于118000张图像上（标注和数据格式必须与COCO 2017相
 |输出|heatmaps                                                    |
 | 损失                       | 53                                                         |
 |速度| 1卡：55 img/s；8卡：440 img/s（shape=640）|
-| 总时长                 | 80小时                                                         |
+| 总时长                 | 24小时(8卡)                                                         |
 | 微调检查点 | 58M （.ckpt文件）                                           |
 |脚本| <https://gitee.com/mindspore/mindspore/tree/master/model_zoo/> |
 
diff --git a/model_zoo/official/cv/yolov5/src/yolo.py b/model_zoo/official/cv/yolov5/src/yolo.py
index c881fd6ce00..c514fb81c28 100644
--- a/model_zoo/official/cv/yolov5/src/yolo.py
+++ b/model_zoo/official/cv/yolov5/src/yolo.py
@@ -427,7 +427,8 @@ class TrainingWrapper(nn.Cell):
         grads = self.grad(self.network, weights)(*args, sens)
         if self.reducer_flag:
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
 
 
 class Giou(nn.Cell):
diff --git a/model_zoo/official/gnn/gat/src/utils.py b/model_zoo/official/gnn/gat/src/utils.py
index c7bae8c8b86..441ef7c48ee 100644
--- a/model_zoo/official/gnn/gat/src/utils.py
+++ b/model_zoo/official/gnn/gat/src/utils.py
@@ -18,7 +18,6 @@ from mindspore.common.parameter import ParameterTuple
 from mindspore import Tensor
 from mindspore.common import dtype as mstype
 from mindspore.ops import composite as C
-from mindspore.ops import functional as F
 from mindspore.ops import operations as P
 
 
@@ -150,7 +149,8 @@ class TrainOneStepCell(nn.Cell):
         loss = self.network(feature, biases)
         sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
         grads = self.grad(self.network, weights)(feature, biases, sens)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
 
 
 class TrainGAT(nn.Cell):
diff --git a/model_zoo/official/nlp/bert/README.md b/model_zoo/official/nlp/bert/README.md
index ff091011be9..c2753423fad 100644
--- a/model_zoo/official/nlp/bert/README.md
+++ b/model_zoo/official/nlp/bert/README.md
@@ -786,3 +786,15 @@ In run_pretrain.py, we set a random seed to make sure that each node has the sam
 # [ModelZoo Homepage](#contents)
 
 Please check the official [homepage](https://gitee.com/mindspore/mindspore/tree/master/model_zoo).
+
+# FAQ
+
+- **Q: How to resolve the continually overflow?**
+
+  **A**: Continually overflow is usually caused by using too high learning rate.
+  You could try lower `learning_rate` to use lower base learning rate or higher `power` to make learning rate decrease faster in config yaml.
+
+- **Q: Why the training process failed with error for the shape can not match?**
+  **A**: This is usually caused by the config `seq_length` of model can't match the dataset. You could check and modified the `seq_length` in yaml config according to the dataset you used.
+  The parameter of model won't change with `seq_length`, the shapes of parameter only depends on model config `max_position_embeddings`.
+
diff --git a/model_zoo/official/nlp/bert/README_CN.md b/model_zoo/official/nlp/bert/README_CN.md
index 76718d870fb..26cb64eb178 100644
--- a/model_zoo/official/nlp/bert/README_CN.md
+++ b/model_zoo/official/nlp/bert/README_CN.md
@@ -744,3 +744,11 @@ run_pretrain.py中设置了随机种子，确保分布式训练中每个节点
 # ModelZoo主页
 
 请浏览官网[主页](https://gitee.com/mindspore/mindspore/tree/master/model_zoo)。
+
+# FAQ
+
+- **Q: 运行过程中发生持续溢出怎么办？**
+  **A**： 持续溢出通常是因为使用了较高的学习率导致训练不收敛。可以考虑修改yaml配置文件中的参数，调低`learning_rate`来降低初始学习率或提高`power`加速学习率衰减。
+
+- **Q: 运行报错shape不匹配是什么问题？**
+  **A**： Bert模型中的shape不匹配通常是因为模型参数配置和使用的数据集规格不匹配，主要是句长问题，可以考虑修改`seq_length`参数来匹配所使用的具体数据集。改变该参数不影响权重的规格，权重的规格仅与`max_position_embeddings`参数有关。
\ No newline at end of file
diff --git a/model_zoo/official/nlp/bert/src/bert_for_finetune.py b/model_zoo/official/nlp/bert/src/bert_for_finetune.py
index 210339ccd01..b59f310cbd7 100644
--- a/model_zoo/official/nlp/bert/src/bert_for_finetune.py
+++ b/model_zoo/official/nlp/bert/src/bert_for_finetune.py
@@ -152,12 +152,9 @@ class BertFinetuneCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond)
-        return F.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, cond)
 
 class BertSquadCell(nn.Cell):
     """
@@ -245,12 +242,9 @@ class BertSquadCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond)
-        return F.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, cond)
 
 class BertCLS(nn.Cell):
     """
diff --git a/model_zoo/official/nlp/bert/src/bert_for_pre_training.py b/model_zoo/official/nlp/bert/src/bert_for_pre_training.py
index 36fca77faef..433ef03c99b 100644
--- a/model_zoo/official/nlp/bert/src/bert_for_pre_training.py
+++ b/model_zoo/official/nlp/bert/src/bert_for_pre_training.py
@@ -311,8 +311,8 @@ class BertTrainOneStepCell(nn.TrainOneStepCell):
         if self.enable_clip_grad:
             grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
         grads = self.grad_reducer(grads)
-        succ = self.optimizer(grads)
-        return F.depend(loss, succ)
+        self.optimizer(grads)
+        return loss
 
 
 grad_scale = C.MultitypeFuncGraph("grad_scale")
@@ -400,12 +400,9 @@ class BertTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond, scaling_sens)
-        return F.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, cond, scaling_sens)
 
 
 class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell):
@@ -475,9 +472,8 @@ class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell)
         overflow = cond
         if self.loss_scaling_manager is not None:
             overflow = self.loss_scaling_manager(scaling_sens, cond)
-        succ = self.optimizer(grads, overflow)
-        ret = (loss, cond, scaling_sens)
-        return F.depend(ret, succ)
+        self.optimizer(grads, overflow)
+        return (loss, cond, scaling_sens)
 
 cast = P.Cast()
 add_grads = C.MultitypeFuncGraph("add_grads")
@@ -634,9 +630,7 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
         accu_overflow = self.select(overflow, self.one, self.zero)
         self.accu_overflow = self.select(is_accu_step, accu_overflow, self.zero)
 
-        if is_accu_step:
-            succ = False
-        else:
+        if not is_accu_step:
             # apply grad reducer on grads
             grads = self.grad_reducer(self.accu_grads)
             scaling = scaling_sens * self.degree * self.accumulation_steps
@@ -653,13 +647,10 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
             overflow = self.reshape(overflow, (()))
             if sens is None:
                 overflow = self.loss_scaling_manager(self.loss_scale, overflow)
-            if overflow:
-                succ = False
-            else:
-                succ = self.optimizer(grads)
+            if not overflow:
+                self.optimizer(grads)
 
-        ret = (mean_loss, overflow, scaling_sens)
-        return F.depend(ret, succ)
+        return (mean_loss, overflow, scaling_sens)
 
 
 class BertTrainAccumulationAllReduceEachWithLossScaleCell(nn.Cell):
diff --git a/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py b/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py
index 58770011b75..6b845d28da5 100644
--- a/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py
+++ b/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py
@@ -311,8 +311,8 @@ class BertTrainOneStepCell(nn.TrainOneStepCell):
         if self.enable_clip_grad:
             grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
         grads = self.grad_reducer(grads)
-        succ = self.optimizer(grads)
-        return F.depend(loss, succ)
+        self.optimizer(grads)
+        return loss
 
 
 grad_scale = C.MultitypeFuncGraph("grad_scale")
@@ -400,12 +400,9 @@ class BertTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond, scaling_sens)
-        return F.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, cond, scaling_sens)
 
 
 class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell):
@@ -475,9 +472,8 @@ class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell)
         overflow = cond
         if self.loss_scaling_manager is not None:
             overflow = self.loss_scaling_manager(scaling_sens, cond)
-        succ = self.optimizer(grads, overflow)
-        ret = (loss, cond, scaling_sens)
-        return F.depend(ret, succ)
+        self.optimizer(grads, overflow)
+        return (loss, cond, scaling_sens)
 
 cast = P.Cast()
 add_grads = C.MultitypeFuncGraph("add_grads")
@@ -634,9 +630,7 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
         accu_overflow = self.select(overflow, self.one, self.zero)
         self.accu_overflow = self.select(is_accu_step, accu_overflow, self.zero)
 
-        if is_accu_step:
-            succ = False
-        else:
+        if not is_accu_step:
             # apply grad reducer on grads
             grads = self.grad_reducer(self.accu_grads)
             scaling = scaling_sens * self.degree * self.accumulation_steps
@@ -653,13 +647,10 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
             overflow = self.reshape(overflow, (()))
             if sens is None:
                 overflow = self.loss_scaling_manager(self.loss_scale, overflow)
-            if overflow:
-                succ = False
-            else:
-                succ = self.optimizer(grads)
+            if not overflow:
+                self.optimizer(grads)
 
-        ret = (mean_loss, overflow, scaling_sens)
-        return F.depend(ret, succ)
+        return (mean_loss, overflow, scaling_sens)
 
 
 class BertTrainAccumulationAllReduceEachWithLossScaleCell(nn.Cell):
diff --git a/model_zoo/official/nlp/cpm/src/cpm_train.py b/model_zoo/official/nlp/cpm/src/cpm_train.py
index 3087c3979a0..8c50abe4024 100644
--- a/model_zoo/official/nlp/cpm/src/cpm_train.py
+++ b/model_zoo/official/nlp/cpm/src/cpm_train.py
@@ -254,11 +254,9 @@ class CPMTrainOneStepWithLossScaleCell(TrainOneStepWithLossScaleCell):
 
         cond = self.get_overflow_status(status, grads)
         overflow = self.process_loss_scale(cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        return F.depend(loss, succ), cond, scaling_sens
+        if not overflow:
+            self.optimizer(grads)
+        return loss, cond, scaling_sens
 
 
 cast = P.Cast()
@@ -352,7 +350,6 @@ class CPMTrainAccuStepsWithLossScaleCell(TrainOneStepWithLossScaleCell):
         accu_overflow = self.select(overflow, self.one, self.zero)
 
         if self.accumulation:
-            succ = False
             self.accu_overflow = accu_overflow
         else:
             my_zero = F.depend(self.zero, accu_overflow)
@@ -378,9 +375,7 @@ class CPMTrainAccuStepsWithLossScaleCell(TrainOneStepWithLossScaleCell):
             overflow = self.reshape(overflow, (()))
             overflow = self.process_loss_scale(overflow)
 
-            if overflow:
-                succ = False
-            else:
-                succ = self.optimizer(grads)
+            if not overflow:
+                self.optimizer(grads)
 
-        return F.depend(loss, succ), overflow, scaling_sens
+        return loss, overflow, scaling_sens
diff --git a/model_zoo/official/nlp/dgu/src/bert_for_finetune.py b/model_zoo/official/nlp/dgu/src/bert_for_finetune.py
index 16a8da5043b..265a6bb7584 100644
--- a/model_zoo/official/nlp/dgu/src/bert_for_finetune.py
+++ b/model_zoo/official/nlp/dgu/src/bert_for_finetune.py
@@ -152,12 +152,9 @@ class BertFinetuneCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond)
-        return F.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, cond)
 
 class BertSquadCell(nn.Cell):
     """
@@ -245,12 +242,9 @@ class BertSquadCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond)
-        return F.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, cond)
 
 class BertCLS(nn.Cell):
     """
diff --git a/model_zoo/official/nlp/dgu/src/bert_for_pre_training.py b/model_zoo/official/nlp/dgu/src/bert_for_pre_training.py
index c99c9318f4e..e75e928c97c 100644
--- a/model_zoo/official/nlp/dgu/src/bert_for_pre_training.py
+++ b/model_zoo/official/nlp/dgu/src/bert_for_pre_training.py
@@ -308,8 +308,8 @@ class BertTrainOneStepCell(nn.TrainOneStepCell):
                                                            mstype.float32))
         grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
         grads = self.grad_reducer(grads)
-        succ = self.optimizer(grads)
-        return F.depend(loss, succ)
+        self.optimizer(grads)
+        return loss
 
 
 grad_scale = C.MultitypeFuncGraph("grad_scale")
@@ -397,12 +397,9 @@ class BertTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond, scaling_sens)
-        return F.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, cond, scaling_sens)
 
 
 class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell):
@@ -472,9 +469,8 @@ class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell)
         overflow = cond
         if self.loss_scaling_manager is not None:
             overflow = self.loss_scaling_manager(scaling_sens, cond)
-        succ = self.optimizer(grads, overflow)
-        ret = (loss, cond, scaling_sens)
-        return F.depend(ret, succ)
+        self.optimizer(grads, overflow)
+        return (loss, cond, scaling_sens)
 
 cast = P.Cast()
 add_grads = C.MultitypeFuncGraph("add_grads")
@@ -631,9 +627,7 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
         accu_overflow = self.select(overflow, self.one, self.zero)
         self.accu_overflow = self.select(is_accu_step, accu_overflow, self.zero)
 
-        if is_accu_step:
-            succ = False
-        else:
+        if not is_accu_step:
             # apply grad reducer on grads
             grads = self.grad_reducer(self.accu_grads)
             scaling = scaling_sens * self.degree * self.accumulation_steps
@@ -650,13 +644,10 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
             overflow = self.reshape(overflow, (()))
             if sens is None:
                 overflow = self.loss_scaling_manager(self.loss_scale, overflow)
-            if overflow:
-                succ = False
-            else:
-                succ = self.optimizer(grads)
+            if not overflow:
+                self.optimizer(grads)
 
-        ret = (mean_loss, overflow, scaling_sens)
-        return F.depend(ret, succ)
+        return (mean_loss, overflow, scaling_sens)
 
 
 class BertTrainAccumulationAllReduceEachWithLossScaleCell(nn.Cell):
diff --git a/model_zoo/official/nlp/emotect/src/ernie_for_finetune.py b/model_zoo/official/nlp/emotect/src/ernie_for_finetune.py
index a951bc65eb7..93b6010517f 100755
--- a/model_zoo/official/nlp/emotect/src/ernie_for_finetune.py
+++ b/model_zoo/official/nlp/emotect/src/ernie_for_finetune.py
@@ -172,12 +172,9 @@ class ErnieFinetuneCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond)
-        return F.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, cond)
 
 class ErnieCLS(nn.Cell):
     """
diff --git a/model_zoo/official/nlp/fasttext/src/fasttext_train.py b/model_zoo/official/nlp/fasttext/src/fasttext_train.py
index 86c0d6fbf04..cddd78227f0 100644
--- a/model_zoo/official/nlp/fasttext/src/fasttext_train.py
+++ b/model_zoo/official/nlp/fasttext/src/fasttext_train.py
@@ -138,5 +138,5 @@ class FastTextTrainOneStepCell(nn.Cell):
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
 
-        succ = self.optimizer(grads)
-        return F.depend(loss, succ)
+        self.optimizer(grads)
+        return loss
diff --git a/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/gnmt_for_train.py b/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/gnmt_for_train.py
index 76d5aa0502f..2ec0b80a033 100644
--- a/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/gnmt_for_train.py
+++ b/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/gnmt_for_train.py
@@ -284,9 +284,6 @@ class GNMTTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond, scaling_sens)
-        return F.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, cond, scaling_sens)
diff --git a/model_zoo/official/nlp/gpt/src/gpt_wrapcell.py b/model_zoo/official/nlp/gpt/src/gpt_wrapcell.py
index b995daf283f..615c728f061 100644
--- a/model_zoo/official/nlp/gpt/src/gpt_wrapcell.py
+++ b/model_zoo/official/nlp/gpt/src/gpt_wrapcell.py
@@ -151,9 +151,6 @@ class GPTTrainOneStepWithLossScaleCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond, scaling_sens)
-        return F.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, cond, scaling_sens)
diff --git a/model_zoo/official/nlp/gru/README.md b/model_zoo/official/nlp/gru/README.md
index 652b72de0f2..421ace5a515 100644
--- a/model_zoo/official/nlp/gru/README.md
+++ b/model_zoo/official/nlp/gru/README.md
@@ -46,7 +46,7 @@ In this model, we use the Multi30K dataset as our train and test dataset.As trai
 
 # [Environment Requirements](#content)
 
-- Hardware（Ascend）
+- Hardware（Ascend or GPU）
     - Prepare hardware environment with Ascend processor.
 - Framework
     - [MindSpore](https://gitee.com/mindspore/mindspore)
@@ -81,15 +81,27 @@ nltk.download()
     After dataset preparation, you can start training and evaluation as follows:
 
     ```bash
-    # run training example
     cd ./scripts
-    bash run_standalone_train.sh [TRAIN_DATASET_PATH]
+    # download dataset
+    bash download_dataset.sh
+
+    # preprocess dataset
+    bash preprocess.sh [DATASET_PATH]
+
+    # create mindrecord
+    bash create_dataset.sh [DATASET_PATH] [DATASET_PATH]
+
+    # run training example
+    bash run_standalone_train_{platform}.sh [TRAIN_DATASET_PATH]
 
     # run distributed training example
-    bash run_distribute_train_ascend.sh [RANK_TABLE_FILE] [TRAIN_DATASET_PATH]
+    bash run_distribute_train_{platform}.sh [RANK_TABLE_FILE] [TRAIN_DATASET_PATH]
+    # platform: ascend or gpu
+    # do not need [RANK_TABLE_FILE] if you use GPU
 
     # run evaluation example
-    bash run_eval.sh [CKPT_FILE] [DATASET_PATH]
+    bash run_eval_{platform}.sh [CKPT_FILE] [DATASET_PATH]
+    # platform: ascend or gpu
     ```
 
 - Running on ModelArts (If you want to run in modelarts, please check the official documentation of [modelarts](https://support.huaweicloud.com/modelarts/), and you can start training as follows)
@@ -158,7 +170,6 @@ The GRU network script and code result are as follows:
   │   ├──local_adapter.py                    // Local adapter
   │   ├──moxing_adapter.py                   // Moxing adapter for ModelArts
   ├── src
-  |   ├──gru.py                              // gru cell architecture.
   │   ├──create_data.py                      // Dataset preparation.
   │   ├──dataset.py                          // Dataset loader to feed into model.
   │   ├──gru_for_infer.py                    // GRU eval model architecture.
@@ -167,16 +178,24 @@ The GRU network script and code result are as follows:
   │   ├──lr_schedule.py                      // Learning rate scheduler.
   │   ├──parse_output.py                     // Parse output file.
   │   ├──preprocess.py                       // Dataset preprocess.
+  |   ├──rnn_cells.py                        // rnn cell architecture.
+  |   ├──rnns.py                             // rnn layer architecture.
   │   ├──seq2seq.py                          // Seq2seq architecture.
+  |   ├──utils.py                            // utils for rnn.
   │   ├──tokenization.py                     // tokenization for the dataset.
   │   ├──weight_init.py                      // Initialize weights in the net.
   ├── scripts
   │   ├──create_dataset.sh                   // shell script for create dataset.
+  │   ├──download_dataset.sh                 // shell script for download dataset.
   │   ├──parse_output.sh                     // shell script for parse eval output file to calculate BLEU.
   │   ├──preprocess.sh                       // shell script for preprocess dataset.
-  │   ├──run_distributed_train.sh            // shell script for distributed train on ascend.
-  │   ├──run_eval.sh                         // shell script for standalone eval on ascend.
-  │   ├──run_standalone_train.sh             // shell script for standalone eval on ascend.
+  │   ├──run_distributed_train_ascend.sh     // shell script for distributed train on ascend.
+  │   ├──run_distributed_train_gpu.sh        // shell script for distributed train on gpu.
+  │   ├──run_eval_ascend.sh                  // shell script for standalone eval on ascend.
+  │   ├──run_eval_gpu.sh                     // shell script for standalone eval on gpu.
+  │   ├──run_infer_310.sh                    // shell script for 310 inference.
+  │   ├──run_standalone_train_ascend.sh      // shell script for standalone eval on ascend.
+  │   ├──run_standalone_train_gpu.sh         // shell script for standalone eval on gpu.
   ├── default_config.yaml                    // Configurations
   ├── postprocess.py                         // GRU postprocess script.
   ├── preprocess.py                          // GRU preprocess script.
@@ -188,7 +207,14 @@ The GRU network script and code result are as follows:
 
 ## [Dataset Preparation](#content)
 
-Firstly, we should download the dataset from the WMT16 official net.After downloading the Multi30k dataset file, we get six dataset file, which is show as below.And we should in put the in same directory.
+Firstly, we should download the dataset from the WMT16 official net.
+
+```bash
+cd scripts
+bash download_dataset.sh
+```
+
+After downloading the Multi30k dataset file, we get six dataset file, which is show as below.And we should in put the in same directory.
 
 ```text
 train.de
@@ -250,14 +276,17 @@ Parameters for both training and evaluation can be set in config.py. All the dat
 
     ```bash
     cd ./scripts
-    bash run_standalone_train.sh [DATASET_PATH]
+    bash run_standalone_train_{platform}.sh [DATASET_PATH]
+    # platform: ascend or gpu
     ```
 
 - Running scripts for distributed training of GRU. Task training on multiple device and run the following command in bash to be executed in `scripts/`:
 
     ``` bash
     cd ./scripts
-    bash run_distributed_train.sh [RANK_TABLE_PATH] [DATASET_PATH]
+    bash run_distributed_train_{platform}.sh [RANK_TABLE_PATH] [DATASET_PATH]
+    # platform: ascend or gpu
+    # do not need [RANK_TABLE_FILE] if you use GPU
     ```
 
 ## [Inference Process](#content)
@@ -266,7 +295,8 @@ Parameters for both training and evaluation can be set in config.py. All the dat
 
     ``` bash
     cd ./scripts
-    bash run_eval.sh [CKPT_FILE] [DATASET_PATH]
+    bash run_eval_{platform}.sh [CKPT_FILE] [DATASET_PATH]
+    # platform: ascend or gpu
     ```
 
 - After evalulation, we will get eval/target.txt and eval/output.txt.Then we can use scripts/parse_output.sh to get the translation.
@@ -354,35 +384,35 @@ perl multi-bleu.perl target.txt.forbleu < output.txt.forbleu
 
 ### Training Performance
 
-| Parameters                 | Ascend                                                         |
-| -------------------------- | -------------------------------------------------------------- |
-| Resource                   | Ascend 910; OS Euler2.8                                                     |
-| uploaded Date              | 01/18/2021 (month/day/year)                                    |
-| MindSpore Version          | 1.1.0                                                          |
-| Dataset                    | Multi30k Dataset                                |
-| Training Parameters        | epoch=30, batch_size=16                                        |
-| Optimizer                  | Adam                                                           |
-| Loss Function              | NLLLoss                                                        |
-| outputs                    | probability                                                    |
-| Speed                      | 50ms/step (1pcs)                                              |
-| Epoch Time                 | 13.4s (1pcs)                                                   |
-| Loss                       | 2.5984                                                          |
-| Params (M)                 | 21                                                            |
-| Checkpoint for inference   | 272M (.ckpt file)                                              |
-| Scripts                    | [gru](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/gru) |
+| Parameters                 | Ascend                        | GPU                       |
+| -------------------------- | ----------------------------- |---------------------------|
+| Resource                   | Ascend 910; OS Euler2.8       | GTX1080Ti, Ubuntu 18.04   |
+| uploaded Date              | 06/05/2021 (month/day/year)   | 06/05/2021 (month/day/year) |
+| MindSpore Version          | 1.2.0                         |1.2.0                      |
+| Dataset                    | Multi30k Dataset              | Multi30k Dataset          |
+| Training Parameters        | epoch=30, batch_size=16       | epoch=30, batch_size=16   |
+| Optimizer                  | Adam                          | Adam                      |
+| Loss Function              | NLLLoss                       | NLLLoss                   |
+| outputs                    | probability                   | probability               |
+| Speed                      | 35ms/step (1pcs)              | 200ms/step (1pcs)         |
+| Epoch Time                 | 64.4s (1pcs)                  | 361.5s (1pcs)             |
+| Loss                       | 3.86888                       |2.533958                   |
+| Params (M)                 | 21                            | 21                        |
+| Checkpoint for inference   | 272M (.ckpt file)             | 272M (.ckpt file)         |
+| Scripts                    | [gru](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/gru) |[gru](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/gru) |
 
 ### Inference Performance
 
-| Parameters          | Ascend                      |
-| ------------------- | --------------------------- |
-| Resource            | Ascend 910; OS Euler2.8                   |
-| Uploaded Date       | 01/18/2020 (month/day/year) |
-| MindSpore Version   | 1.1.0                       |
-| Dataset             | Multi30K                    |
-| batch_size          | 1                         |
-| outputs             | label index                 |
-| Accuracy            | BLEU: 30.30                        |
-| Model for inference | 272M (.ckpt file)           |
+| Parameters          | Ascend                      | GPU |
+| ------------------- | --------------------------- |---------------------------|
+| Resource            | Ascend 910; OS Euler2.8     | GTX1080Ti, Ubuntu 18.04   |
+| Uploaded Date       | 06/05/2021 (month/day/year) | 06/05/2021 (month/day/year)|
+| MindSpore Version   | 1.2.0                       | 1.2.0                     |
+| Dataset             | Multi30K                    | Multi30K                  |
+| batch_size          | 1                           | 1                         |
+| outputs             | label index                 | label index               |
+| Accuracy            | BLEU: 31.26                 | BLEU: 29.30               |
+| Model for inference | 272M (.ckpt file)           | 272M (.ckpt file)         |
 
 # [Random Situation Description](#content)
 
diff --git a/model_zoo/official/nlp/gru/default_config.yaml b/model_zoo/official/nlp/gru/default_config.yaml
index c8599cce906..4c1ffebb54b 100644
--- a/model_zoo/official/nlp/gru/default_config.yaml
+++ b/model_zoo/official/nlp/gru/default_config.yaml
@@ -36,6 +36,8 @@ scale_factor: 2
 scale_window: 2000
 warmup_ratio: 0.333333
 teacher_force_ratio: 0.5
+compute_type: mstype.float16
+dtype: mstype.float32
 
 run_distribute: False
 dataset_path: ""
diff --git a/model_zoo/official/nlp/gru/model_utils/config.py b/model_zoo/official/nlp/gru/model_utils/config.py
index ad0d7497a8e..42cde250dff 100644
--- a/model_zoo/official/nlp/gru/model_utils/config.py
+++ b/model_zoo/official/nlp/gru/model_utils/config.py
@@ -20,6 +20,8 @@ import ast
 import argparse
 from pprint import pformat
 import yaml
+import mindspore.common.dtype as mstype
+
 
 class Config:
     """
@@ -108,6 +110,24 @@ def merge(args, cfg):
         cfg[item] = args_var[item]
     return cfg
 
+def parse_dtype(dtype):
+    if dtype not in ["mstype.float32", "mstype.float16"]:
+        raise ValueError("Not supported dtype")
+
+    if dtype == "mstype.float32":
+        return mstype.float32
+    if dtype == "mstype.float16":
+        return mstype.float16
+    return None
+
+def extra_operations(cfg):
+    """
+    Do extra work on config
+    Args:
+        config: Object after instantiation of class 'Config'.
+    """
+    cfg.dtype = parse_dtype(cfg.dtype)
+    cfg.compute_type = parse_dtype(cfg.compute_type)
 
 def get_config():
     """
@@ -121,6 +141,8 @@ def get_config():
     default, helper, choices = parse_yaml(path_args.config_path)
     args = parse_cli_to_yaml(parser=parser, cfg=default, helper=helper, choices=choices, cfg_path=path_args.config_path)
     final_config = merge(args, default)
-    return Config(final_config)
+    final_config = Config(final_config)
+    extra_operations(final_config)
+    return final_config
 
 config = get_config()
diff --git a/model_zoo/official/nlp/gru/scripts/create_dataset.sh b/model_zoo/official/nlp/gru/scripts/create_dataset.sh
index 6d6521b9ab3..9626cd7d1d4 100644
--- a/model_zoo/official/nlp/gru/scripts/create_dataset.sh
+++ b/model_zoo/official/nlp/gru/scripts/create_dataset.sh
@@ -17,7 +17,6 @@ echo "==========================================================================
 echo "Please run the script as: "
 echo "sh create_dataset.sh DATASET_PATH OUTPUT_PATH"
 echo "for example: sh create_dataset.sh /path/multi30k/ /path/multi30k/mindrecord/"
-echo "DATASET_NAME including ag, dbpedia, and yelp_p"
 echo "It is better to use absolute path."
 echo "=============================================================================================================="
 ulimit -u unlimited
diff --git a/model_zoo/official/nlp/gru/scripts/run_distribute_train_ascend.sh b/model_zoo/official/nlp/gru/scripts/run_distribute_train_ascend.sh
index bc99c693497..c5e7f87b48f 100644
--- a/model_zoo/official/nlp/gru/scripts/run_distribute_train_ascend.sh
+++ b/model_zoo/official/nlp/gru/scripts/run_distribute_train_ascend.sh
@@ -47,6 +47,7 @@ exit 1
 fi
 
 ulimit -u unlimited
+export DEVICE_TARGET="Ascend"
 export DEVICE_NUM=8
 export RANK_SIZE=8
 export RANK_TABLE_FILE=$PATH1
@@ -65,6 +66,6 @@ do
     cd ./train_parallel$i || exit
     echo "start training for rank $RANK_ID, device $DEVICE_ID"
     env > env.log
-    python train.py --run_distribute=True --dataset_path=$DATASET_PATH &> log &
+    python train.py --device_target=$DEVICE_TARGET --run_distribute=True --dataset_path=$DATASET_PATH &> log &
     cd ..
-done
\ No newline at end of file
+done
diff --git a/model_zoo/official/nlp/gru/src/gru_for_train.py b/model_zoo/official/nlp/gru/src/gru_for_train.py
index b60cb2d7e9f..647eed4d101 100644
--- a/model_zoo/official/nlp/gru/src/gru_for_train.py
+++ b/model_zoo/official/nlp/gru/src/gru_for_train.py
@@ -234,9 +234,51 @@ class GRUTrainOneStepWithLossScaleCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond, scaling_sens)
-        return F.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, cond, scaling_sens)
+
+class GRUTrainOneStepCell(nn.TrainOneStepCell):
+    """
+    Encapsulation class of GRU network training.
+    Append an optimizer to the training network after that the construct
+    function can be called to create the backward graph.
+    Args:
+        network (Cell): The training network. Note that loss function should have been added.
+        optimizer (Optimizer): Optimizer for updating the weights.
+        sens (Number): The adjust parameter. Default: 1.0.
+        enable_clip_grad (boolean): If True, clip gradients in GRUTrainOneStepCell. Default: True.
+    """
+
+    def __init__(self, network, optimizer, sens=1.0, enable_clip_grad=True):
+        super(GRUTrainOneStepCell, self).__init__(network, optimizer, sens)
+        self.cast = P.Cast()
+        self.hyper_map = C.HyperMap()
+        self.clip_gradients = ClipGradients()
+        self.enable_clip_grad = enable_clip_grad
+
+    def set_sens(self, value):
+        self.sens = value
+
+    def construct(self,
+                  encoder_inputs,
+                  decoder_inputs,
+                  teacher_force,
+                  sens=None):
+        """Defines the computation performed."""
+
+        weights = self.weights
+        loss = self.network(encoder_inputs,
+                            decoder_inputs,
+                            teacher_force)
+
+        grads = self.grad(self.network, weights)(encoder_inputs,
+                                                 decoder_inputs,
+                                                 teacher_force,
+                                                 self.cast(F.tuple_to_array((self.sens,)),
+                                                           mstype.float32))
+        if self.enable_clip_grad:
+            grads = self.clip_gradients(grads, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE)
+        grads = self.grad_reducer(grads)
+        succ = self.optimizer(grads)
+        return F.depend(loss, succ)
diff --git a/model_zoo/official/nlp/gru/src/seq2seq.py b/model_zoo/official/nlp/gru/src/seq2seq.py
index 06ef8daa15f..97c117d0101 100644
--- a/model_zoo/official/nlp/gru/src/seq2seq.py
+++ b/model_zoo/official/nlp/gru/src/seq2seq.py
@@ -18,8 +18,8 @@ from mindspore import Tensor
 import mindspore.nn as nn
 import mindspore.ops.operations as P
 import mindspore.common.dtype as mstype
-from src.gru import BidirectionGRU, GRU
 from src.weight_init import dense_default_state
+from src.rnns import GRU
 
 class Attention(nn.Cell):
     '''
@@ -29,8 +29,8 @@ class Attention(nn.Cell):
         super(Attention, self).__init__()
         self.text_len = config.max_length
         self.attn = nn.Dense(in_channels=config.hidden_size * 3,
-                             out_channels=config.hidden_size).to_float(mstype.float16)
-        self.fc = nn.Dense(config.hidden_size, 1, has_bias=False).to_float(mstype.float16)
+                             out_channels=config.hidden_size).to_float(config.compute_type)
+        self.fc = nn.Dense(config.hidden_size, 1, has_bias=False).to_float(config.compute_type)
         self.expandims = P.ExpandDims()
         self.tanh = P.Tanh()
         self.softmax = P.Softmax()
@@ -39,6 +39,9 @@ class Attention(nn.Cell):
         self.concat = P.Concat(axis=2)
         self.squeeze = P.Squeeze(axis=2)
         self.cast = P.Cast()
+        self.dtype = config.dtype
+        self.compute_type = config.compute_type
+
     def construct(self, hidden, encoder_outputs):
         '''
         Attention construction
@@ -58,9 +61,9 @@ class Attention(nn.Cell):
         energy = self.tanh(out)
         attention = self.fc(energy)
         attention = self.squeeze(attention)
-        attention = self.cast(attention, mstype.float32)
+        attention = self.cast(attention, self.dtype)
         attention = self.softmax(attention)
-        attention = self.cast(attention, mstype.float16)
+        attention = self.cast(attention, self.compute_type)
         return attention
 
 class Encoder(nn.Cell):
@@ -76,8 +79,9 @@ class Encoder(nn.Cell):
         self.vocab_size = config.src_vocab_size
         self.embedding_size = config.encoder_embedding_size
         self.embedding = nn.Embedding(self.vocab_size, self.embedding_size)
-        self.rnn = BidirectionGRU(config, is_training=is_training).to_float(mstype.float16)
-        self.fc = nn.Dense(2*self.hidden_size, self.hidden_size).to_float(mstype.float16)
+        self.rnn = GRU(input_size=self.embedding_size, \
+            hidden_size=self.hidden_size, bidirectional=True).to_float(config.compute_type)
+        self.fc = nn.Dense(2*self.hidden_size, self.hidden_size).to_float(config.compute_type)
         self.shape = P.Shape()
         self.transpose = P.Transpose()
         self.p = P.Print()
@@ -85,6 +89,8 @@ class Encoder(nn.Cell):
         self.text_len = config.max_length
         self.squeeze = P.Squeeze(axis=0)
         self.tanh = P.Tanh()
+        self.concat = P.Concat(2)
+        self.dtype = config.dtype
 
     def construct(self, src):
         '''
@@ -99,8 +105,10 @@ class Encoder(nn.Cell):
         '''
         embedded = self.embedding(src)
         embedded = self.transpose(embedded, (1, 0, 2))
-        embedded = self.cast(embedded, mstype.float16)
+        embedded = self.cast(embedded, self.dtype)
         output, hidden = self.rnn(embedded)
+        hidden = self.transpose(hidden, (1, 0, 2))
+        hidden = hidden.view(hidden.shape[0], -1)
         hidden = self.fc(hidden)
         hidden = self.tanh(hidden)
         return output, hidden
@@ -118,7 +126,8 @@ class Decoder(nn.Cell):
         self.vocab_size = config.trg_vocab_size
         self.embedding_size = config.decoder_embedding_size
         self.embedding = nn.Embedding(self.vocab_size, self.embedding_size)
-        self.rnn = GRU(config, is_training=is_training).to_float(mstype.float16)
+        self.rnn = GRU(input_size=self.embedding_size + self.hidden_size*2, \
+            hidden_size=self.hidden_size).to_float(config.compute_type)
         self.text_len = config.max_length
         self.shape = P.Shape()
         self.transpose = P.Transpose()
@@ -130,11 +139,13 @@ class Decoder(nn.Cell):
         self.log_softmax = P.LogSoftmax(axis=1)
         weight, bias = dense_default_state(self.embedding_size+self.hidden_size*3, self.vocab_size)
         self.fc = nn.Dense(self.embedding_size+self.hidden_size*3, self.vocab_size,
-                           weight_init=weight, bias_init=bias).to_float(mstype.float16)
+                           weight_init=weight, bias_init=bias).to_float(config.compute_type)
         self.attention = Attention(config)
         self.bmm = P.BatchMatMul()
         self.dropout = nn.Dropout(0.7)
         self.expandims = P.ExpandDims()
+        self.dtype = config.dtype
+
     def construct(self, inputs, hidden, encoder_outputs):
         '''
         Decoder construction
@@ -150,21 +161,22 @@ class Decoder(nn.Cell):
         '''
         embedded = self.embedding(inputs)
         embedded = self.transpose(embedded, (1, 0, 2))
-        embedded = self.cast(embedded, mstype.float16)
+        embedded = self.cast(embedded, self.dtype)
         attn = self.attention(hidden, encoder_outputs)
         attn = self.expandims(attn, 1)
         encoder_outputs = self.transpose(encoder_outputs, (1, 0, 2))
         weight = self.bmm(attn, encoder_outputs)
         weight = self.transpose(weight, (1, 0, 2))
+        weight = self.cast(weight, self.dtype)
         emd_con = self.concat((embedded, weight))
         output, hidden = self.rnn(emd_con)
+        output = self.cast(output, self.dtype)
         out = self.concat((embedded, output, weight))
         out = self.squeeze(out)
         hidden = self.squeeze(hidden)
         prediction = self.fc(out)
         prediction = self.dropout(prediction)
-        prediction = self.cast(prediction, mstype.float32)
-        prediction = self.cast(prediction, mstype.float32)
+        prediction = self.cast(prediction, self.dtype)
         pred_prob = self.log_softmax(prediction)
         pred_prob = self.expandims(pred_prob, 0)
         return pred_prob, hidden
diff --git a/model_zoo/official/nlp/gru/src/weight_init.py b/model_zoo/official/nlp/gru/src/weight_init.py
index 48a1ad2460e..1f92efc14f0 100644
--- a/model_zoo/official/nlp/gru/src/weight_init.py
+++ b/model_zoo/official/nlp/gru/src/weight_init.py
@@ -15,21 +15,7 @@
 """weight init"""
 import math
 import numpy as np
-from mindspore import Tensor, Parameter
-
-def gru_default_state(batch_size, input_size, hidden_size, num_layers=1, bidirectional=False):
-    '''Weight init for gru cell'''
-    stdv = 1 / math.sqrt(hidden_size)
-    weight_i = Parameter(Tensor(
-        np.random.uniform(-stdv, stdv, (input_size, 3*hidden_size)).astype(np.float32)), name='weight_i')
-    weight_h = Parameter(Tensor(
-        np.random.uniform(-stdv, stdv, (hidden_size, 3*hidden_size)).astype(np.float32)), name='weight_h')
-    bias_i = Parameter(Tensor(
-        np.random.uniform(-stdv, stdv, (3*hidden_size)).astype(np.float32)), name='bias_i')
-    bias_h = Parameter(Tensor(
-        np.random.uniform(-stdv, stdv, (3*hidden_size)).astype(np.float32)), name='bias_h')
-    init_h = Tensor(np.zeros((batch_size, hidden_size)).astype(np.float16))
-    return weight_i, weight_h, bias_i, bias_h, init_h
+from mindspore import Tensor
 
 def dense_default_state(in_channel, out_channel):
     '''Weight init for dense cell'''
diff --git a/model_zoo/official/nlp/gru/train.py b/model_zoo/official/nlp/gru/train.py
index de219f93a4b..2d795e9ff77 100644
--- a/model_zoo/official/nlp/gru/train.py
+++ b/model_zoo/official/nlp/gru/train.py
@@ -15,17 +15,19 @@
 """train script"""
 import os
 import time
+import mindspore.common.dtype as mstype
 from mindspore.context import ParallelMode
 from mindspore import context
-from mindspore.communication.management import init
+from mindspore.communication.management import init, get_rank
 from mindspore.train.callback import Callback, CheckpointConfig, ModelCheckpoint, TimeMonitor
 from mindspore.train import Model
 from mindspore.common import set_seed
 from mindspore.train.loss_scale_manager import DynamicLossScaleManager
 from mindspore.nn.optim import Adam
+from mindspore import log as logger
 
 from src.seq2seq import Seq2Seq
-from src.gru_for_train import GRUWithLossCell, GRUTrainOneStepWithLossScaleCell
+from src.gru_for_train import GRUWithLossCell, GRUTrainOneStepWithLossScaleCell, GRUTrainOneStepCell
 from src.dataset import create_gru_dataset
 from src.lr_schedule import dynamic_lr
 
@@ -72,13 +74,20 @@ class LossCallBack(Callback):
                                                                      cb_params.cur_step_num,
                                                                      str(cb_params.net_outputs)))
         with open("./loss_{}.log".format(self.rank_id), "a+") as f:
-            f.write("time: {}, epoch: {}, step: {}, loss: {}, overflow: {}, loss_scale: {}".format(
-                time_stamp_current - time_stamp_first,
-                cb_params.cur_epoch_num,
-                cb_params.cur_step_num,
-                str(cb_params.net_outputs[0].asnumpy()),
-                str(cb_params.net_outputs[1].asnumpy()),
-                str(cb_params.net_outputs[2].asnumpy())))
+            if context.get_context("device_target") == "Ascend":
+                f.write("time: {}, epoch: {}, step: {}, loss: {}, overflow: {}, loss_scale: {}".format(
+                    time_stamp_current - time_stamp_first,
+                    cb_params.cur_epoch_num,
+                    cb_params.cur_step_num,
+                    str(cb_params.net_outputs[0].asnumpy()),
+                    str(cb_params.net_outputs[1].asnumpy()),
+                    str(cb_params.net_outputs[2].asnumpy())))
+            else:
+                f.write("time: {}, epoch: {}, step: {}, loss: {}".format(
+                    time_stamp_current - time_stamp_first,
+                    cb_params.cur_epoch_num,
+                    cb_params.cur_step_num,
+                    str(cb_params.net_outputs.asnumpy())))
             f.write('\n')
 
 
@@ -139,13 +148,32 @@ def modelarts_pre_process():
 @moxing_wrapper(pre_process=modelarts_pre_process)
 def run_train():
     """run train."""
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=get_device_id(), save_graphs=False)
-    rank = get_rank_id()
+    context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target,
+                        device_id=get_device_id(), save_graphs=False)
+    if config.device_target == "GPU":
+        if config.compute_type != mstype.float32:
+            logger.warning('GPU only support fp32 temporarily, run with fp32.')
+            config.compute_type = mstype.float32
+
     device_num = get_device_num()
     if config.run_distribute:
-        context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
-                                          gradients_mean=True)
-        init()
+        if config.device_target == "Ascend":
+            rank = get_rank_id()
+            context.set_auto_parallel_context(device_num=device_num,
+                                              parallel_mode=ParallelMode.DATA_PARALLEL,
+                                              gradients_mean=True)
+            init()
+        elif config.device_target == "GPU":
+            rank = get_rank()
+            init("nccl")
+            context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL,
+                                              gradients_mean=True)
+        else:
+            raise ValueError(config.device_target)
+    else:
+        rank = 0
+        device_num = 1
+
     mindrecord_file = config.dataset_path
     if not os.path.exists(mindrecord_file):
         print("dataset file {} not exists, please check!".format(mindrecord_file))
@@ -162,8 +190,10 @@ def run_train():
                                             scale_factor=config.scale_factor,
                                             scale_window=config.scale_window)
     update_cell = scale_manager.get_update_cell()
-    netwithgrads = GRUTrainOneStepWithLossScaleCell(network, opt, update_cell)
-
+    if config.device_target == "Ascend":
+        netwithgrads = GRUTrainOneStepWithLossScaleCell(network, opt, update_cell)
+    else:
+        netwithgrads = GRUTrainOneStepCell(network, opt)
     time_cb = TimeMonitor(data_size=dataset_size)
     loss_cb = LossCallBack(rank_id=rank)
     cb = [time_cb, loss_cb]
@@ -171,10 +201,10 @@ def run_train():
     if config.save_checkpoint:
         ckpt_config = CheckpointConfig(save_checkpoint_steps=config.ckpt_epoch * dataset_size,
                                        keep_checkpoint_max=config.keep_checkpoint_max)
-        save_ckpt_path = os.path.join(config.outputs_dir, 'ckpt_' + str(get_rank_id()) + '/')
+        save_ckpt_path = os.path.join(config.outputs_dir, 'ckpt_' + str(rank) + '/')
         ckpt_cb = ModelCheckpoint(config=ckpt_config,
                                   directory=save_ckpt_path,
-                                  prefix='{}'.format(get_rank_id()))
+                                  prefix='{}'.format(rank))
         cb += [ckpt_cb]
     netwithgrads.set_train(True)
     model = Model(netwithgrads)
diff --git a/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py b/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py
index 23ff47d1a14..2164e17c1dc 100644
--- a/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py
+++ b/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py
@@ -368,10 +368,7 @@ class TransformerTrainOneStepWithLossScaleCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
+        if not overflow:
+            self.optimizer(grads)
 
-        ret = (loss, cond, scaling_sens)
-        return F.depend(ret, succ)
+        return (loss, cond, scaling_sens)
diff --git a/model_zoo/official/nlp/pangu_alpha/src/dataset.py b/model_zoo/official/nlp/pangu_alpha/src/dataset.py
index b8966d870c4..1ebafc072fd 100644
--- a/model_zoo/official/nlp/pangu_alpha/src/dataset.py
+++ b/model_zoo/official/nlp/pangu_alpha/src/dataset.py
@@ -67,7 +67,7 @@ def get_input_data_batch_slice_map(input_ids, eod_id, rank, dis, eod_reset):
 
 
 def create_dataset(batch_size, data_path, device_num=1, rank=0, drop=True, full_batch=False, data_start_index=0,
-                   eod_reset=False, eod_id=9, column_name='input_ids', epoch=1):
+                   eod_reset=False, eod_id=9, column_name='input_ids', epoch=1, num_samples=None):
     """
     Create dataset
 
@@ -99,7 +99,8 @@ def create_dataset(batch_size, data_path, device_num=1, rank=0, drop=True, full_
     data.sort()
 
     # Load data files and preprocess
-    dataset = ds.MindDataset(data[data_start_index:], columns_list=[column_name], shuffle=False)
+    dataset = ds.MindDataset(data[data_start_index:], columns_list=[column_name],
+                             shuffle=False, num_samples=num_samples)
     type_cast_op = C.TypeCast(mstype.int32)
     type_cast_op_float = C.TypeCast(mstype.float16)
 
diff --git a/model_zoo/official/nlp/pangu_alpha/src/pangu_alpha_wrapcell.py b/model_zoo/official/nlp/pangu_alpha/src/pangu_alpha_wrapcell.py
index 4ea05370aa2..92d4100ea8a 100644
--- a/model_zoo/official/nlp/pangu_alpha/src/pangu_alpha_wrapcell.py
+++ b/model_zoo/official/nlp/pangu_alpha/src/pangu_alpha_wrapcell.py
@@ -147,11 +147,9 @@ class PanguAlphaTrainOneStepWithLossScaleCell(TrainOneStepWithLossScaleCell):
         overflow = self.process_loss_scale(cond)
         # If overflow, surpass weights update
         # if not, update weights
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        return F.depend(loss, succ), cond, scaling_sens
+        if not overflow:
+            self.optimizer(grads)
+        return loss, cond, scaling_sens
 
 class PanguAlphaTrainPipelineWithLossScaleCell(nn.Cell):
     """
@@ -255,9 +253,6 @@ class PanguAlphaTrainPipelineWithLossScaleCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, overflow, scaling_sens)
-        return F.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, overflow, scaling_sens)
diff --git a/model_zoo/official/nlp/pangu_alpha/src/utils.py b/model_zoo/official/nlp/pangu_alpha/src/utils.py
index 63a6a73cd09..83465a8d3f1 100644
--- a/model_zoo/official/nlp/pangu_alpha/src/utils.py
+++ b/model_zoo/official/nlp/pangu_alpha/src/utils.py
@@ -405,6 +405,10 @@ def get_args(inference=False):
                         required=False,
                         default=None,
                         help='Location of data.')
+    parser.add_argument('--eval_data_url',
+                        required=False,
+                        default=None,
+                        help='Location of eval data.')
     parser.add_argument('--train_url',
                         required=False,
                         default=None,
@@ -448,6 +452,14 @@ def get_args(inference=False):
                         type=int,
                         default=0,
                         help="Enable incremental training. Default 0.")
+    parser.add_argument("--train_and_eval_mode",
+                        type=int,
+                        default=0,
+                        help="Enable evaling while training. Default 0.")
+    parser.add_argument("--eval_steps",
+                        type=int,
+                        default=10,
+                        help="The eval step in train and eval mode. Default 10.")
     add_training_params(parser)
     if inference:
         add_inference_params(parser)
diff --git a/model_zoo/official/nlp/pangu_alpha/train.py b/model_zoo/official/nlp/pangu_alpha/train.py
index fd2a83a3784..e184260cc7a 100644
--- a/model_zoo/official/nlp/pangu_alpha/train.py
+++ b/model_zoo/official/nlp/pangu_alpha/train.py
@@ -18,13 +18,12 @@ PanguAlpha train script
 
 import os
 import math
-import time
 from mindspore import context
 from mindspore.train.model import Model
 import mindspore.communication.management as D
 from mindspore.context import ParallelMode
 import mindspore.nn as nn
-from mindspore.train.callback import TimeMonitor, Callback
+from mindspore.train.callback import TimeMonitor
 from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell
 import mindspore.common.dtype as mstype
 from mindspore.parallel import set_algo_parameters
@@ -37,40 +36,10 @@ from src.pangu_alpha_wrapcell import PanguAlphaTrainOneStepWithLossScaleCell, Pa
 from src.pangu_alpha_config import PANGUALPHAConfig, set_parse
 from src.utils import LearningRate, get_args, FP32StateAdamWeightDecay
 from src.utils import download_data
+from src.callbacks import EvalCallBack, LossCallBack
+from src.metrics import PPLMetric
 
 
-class LossCallBack(Callback):
-    """
-    Monitor the loss in training.
-    If the loss in NAN or INF terminating training.
-    """
-
-    def __init__(self, dataset_size=-1, local_rank=0, has_trained_epoch=0, has_trained_step=0, micro_size=1):
-        super(LossCallBack, self).__init__()
-        self._dataset_size = dataset_size
-        self.local_rank = local_rank
-        self.has_trained_epoch = has_trained_epoch
-        self.has_trained_step = has_trained_step
-        self.micro_size = micro_size
-        print("load has trained epoch :{} and step: {}".format(has_trained_epoch, has_trained_step), flush=True)
-
-    def step_end(self, run_context):
-        """
-        Print loss after each step
-        """
-        cb_params = run_context.original_args()
-        if self._dataset_size > 0 and self.local_rank % 8 == 0:
-            percent, epoch_num = math.modf(cb_params.cur_step_num /
-                                           self._dataset_size)
-            if percent == 0:
-                epoch_num -= 1
-            date = time.asctime(time.localtime(time.time()))
-            loss_value = cb_params.net_outputs[0].asnumpy() / self.micro_size
-            print("time: {} local_rank: {}, epoch: {}, step: {}, output is {}, overflow is {}, scale is {}".
-                  format(date, int(self.local_rank), int(epoch_num) + int(self.has_trained_epoch),
-                         cb_params.cur_step_num + int(self.has_trained_step), loss_value,
-                         cb_params.net_outputs[1].asnumpy(), cb_params.net_outputs[2].asnumpy()))
-
 
 project_root = os.path.abspath(
     os.path.dirname(os.path.realpath(__file__)) + os.path.sep + "..")
@@ -101,73 +70,59 @@ def run_train(args_opt):
     The main training process.
     """
     # Set execution mode
-    context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target)
-    context.set_context(variable_memory_max_size="31GB")
+    context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target, variable_memory_max_size="31GB")
     # Set parallel context
     if args_opt.distribute == "true":
         D.init()
         device_num = D.get_group_size()
         rank = D.get_rank()
         print("rank_id is {}, device_num is {}".format(rank, device_num))
-
         context.reset_auto_parallel_context()
         context.set_auto_parallel_context(
-            parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL,
-            gradients_mean=False,
-            full_batch=bool(args_opt.full_batch),
-            strategy_ckpt_load_file=args_opt.strategy_load_ckpt_path,
+            parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, gradients_mean=False,
+            full_batch=bool(args_opt.full_batch), strategy_ckpt_load_file=args_opt.strategy_load_ckpt_path,
             enable_parallel_optimizer=bool(args_opt.optimizer_shard))
         set_algo_parameters(elementwise_op_strategy_follow=True)
         _set_multi_subgraphs()
-
     else:
         rank = 0
         device_num = 1
     context.set_context(save_graphs=False, save_graphs_path="./graphs_of_device_id_" + str(rank))
     # copy data from the cloud to the /cache/Data
     cache_url = '/cache/Data/'
+    eval_cache_url = '/cache/EvalData/'
     if args_opt.offline:
         cache_url = args_opt.data_url
+        eval_cache_url = args_opt.eval_data_url
     else:
         download_data(src_data_url=args_opt.data_url, tgt_data_path=cache_url, rank=rank)
+        download_data(src_data_url=args_opt.eval_data_url, tgt_data_path=eval_cache_url, rank=rank)
     # Set model property
     model_parallel_num = args_opt.op_level_model_parallel_num
     data_parallel_num = int(device_num / model_parallel_num)
+    if data_parallel_num <= 1 and args_opt.optimizer_shard == 1:
+        raise ValueError("The dp must large than 1 when applying optimizer shard.")
     batch_size = args_opt.per_batch_size * data_parallel_num
     config = PANGUALPHAConfig(
-        data_parallel_num=data_parallel_num,
-        model_parallel_num=model_parallel_num,
-        batch_size=batch_size,
-        seq_length=args_opt.seq_length,
-        vocab_size=args_opt.vocab_size,
-        embedding_size=args_opt.embedding_size,
-        num_layers=args_opt.num_layers,
-        num_heads=args_opt.num_heads,
-        expand_ratio=4,
-        dropout_rate=0.1,
-        compute_dtype=mstype.float16,
-        stage_num=args_opt.stage_num,
-        micro_size=args_opt.micro_size,
-        eod_reset=bool(args_opt.eod_reset),
-        load_ckpt_path=args_opt.load_ckpt_path,
+        data_parallel_num=data_parallel_num, model_parallel_num=model_parallel_num,
+        batch_size=batch_size, seq_length=args_opt.seq_length,
+        vocab_size=args_opt.vocab_size, embedding_size=args_opt.embedding_size,
+        num_layers=args_opt.num_layers, num_heads=args_opt.num_heads,
+        expand_ratio=4, dropout_rate=0.1, compute_dtype=mstype.float16,
+        stage_num=args_opt.stage_num, micro_size=args_opt.micro_size,
+        eod_reset=bool(args_opt.eod_reset), load_ckpt_path=args_opt.load_ckpt_path,
         param_init_type=mstype.float32 if args_opt.param_init_type == 'fp32' else mstype.float16,
         word_emb_dp=bool(args_opt.word_emb_dp))
     print("===config is: ", config, flush=True)
-
     # Define network
     pangu_alpha = PanguAlpha(config)
     loss = CrossEntropyLoss(config)
-    pangu_alpha_with_loss = PanguAlphaWithLoss(config, pangu_alpha, loss)
-    pangu_alpha_with_loss = _VirtualDatasetCell(pangu_alpha_with_loss)
-
+    pangu_alpha_with_loss_net = PanguAlphaWithLoss(config, pangu_alpha, loss)
+    pangu_alpha_with_loss = _VirtualDatasetCell(pangu_alpha_with_loss_net)
     print("=====args_opt is: ", args_opt, flush=True)
-
     # Warm-up and cosine decay learning rate
-    lr = LearningRate(learning_rate=args_opt.start_lr,
-                      end_learning_rate=args_opt.end_lr,
-                      warmup_steps=args_opt.warmup_step,
-                      decay_steps=200000)
-
+    lr = LearningRate(learning_rate=args_opt.start_lr, end_learning_rate=args_opt.end_lr,
+                      warmup_steps=args_opt.warmup_step, decay_steps=200000)
     params = pangu_alpha.trainable_params()
     group_params = set_weight_decay(params)
     if args_opt.optimizer == "lamb":
@@ -180,36 +135,37 @@ def run_train(args_opt):
     loss_scale_value = math.pow(2, 32)
     epoch_num = args_opt.epoch_size
     # Dataset loading mindrecord files
-    ds = create_dataset(config.batch_size, data_path=cache_url,
-                        data_start_index=0, eod_reset=config.eod_reset, full_batch=bool(args_opt.full_batch),
-                        eod_id=args_opt.eod_id, device_num=device_num, rank=rank,
-                        column_name=args_opt.data_column_name, epoch=epoch_num)
-    step_per_epoch = ds.get_dataset_size()
-    callback_size = args_opt.sink_size
-    actual_epoch_num = int(epoch_num * step_per_epoch / callback_size)
-    callback = [
-        TimeMonitor(callback_size),
-        LossCallBack(callback_size, rank, 0, 0)
-    ]
+    ds = create_dataset(config.batch_size, data_path=cache_url, data_start_index=0, eod_reset=config.eod_reset,
+                        full_batch=bool(args_opt.full_batch), eod_id=args_opt.eod_id, device_num=device_num,
+                        rank=rank, column_name=args_opt.data_column_name, epoch=epoch_num)
+    actual_epoch_num = int(epoch_num * ds.get_dataset_size() / args_opt.sink_size)
+    callback = [TimeMonitor(args_opt.sink_size), LossCallBack(args_opt.sink_size, rank, 0, 0)]
     update_cell = DynamicLossScaleUpdateCell(loss_scale_value=loss_scale_value, scale_factor=2, scale_window=1000)
     pangu_alpha_with_grads = PanguAlphaTrainOneStepWithLossScaleCell(
         pangu_alpha_with_loss, optimizer=optimizer, scale_update_cell=update_cell, enable_global_norm=True,
         config=config)
-    model = Model(pangu_alpha_with_grads)
+    if args_opt.train_and_eval_mode:
+        ds_eval = create_dataset(config.batch_size, data_path=eval_cache_url,
+                                 data_start_index=0, eod_reset=config.eod_reset, full_batch=bool(args_opt.full_batch),
+                                 eod_id=args_opt.eod_id, device_num=device_num, rank=rank,
+                                 column_name=args_opt.data_column_name, epoch=epoch_num,
+                                 num_samples=args_opt.eval_steps * config.batch_size)
+        ppl_metric = PPLMetric(config.seq_length)
+        model = Model(pangu_alpha_with_grads, eval_network=pangu_alpha_with_loss, metrics={"ppl": ppl_metric})
+        callback.append(EvalCallBack(model, ds_eval, ppl_metric))
+    else:
+        model = Model(pangu_alpha_with_grads)
     if args_opt.incremental_training:
         from mindspore.train.serialization import load_distributed_checkpoint
-        strategy = model.infer_train_layout(train_dataset=ds, sink_size=callback_size)
+        strategy = model.infer_train_layout(train_dataset=ds, sink_size=args_opt.sink_size)
         print("======start load_distributed checkpoint", flush=True)
         # For 2.6B and 13B models, the number of ckpt files is 512.
-        ckpt_name = 'filerted'
-        ckpt_file_list = [os.path.join(args_opt.load_ckpt_path, f"{ckpt_name}_{ckpt_rank}.ckpt") for ckpt_rank in
+        ckpt_file_list = [os.path.join(args_opt.load_ckpt_path, f"filerted_{ckpt_rank}.ckpt") for ckpt_rank in
                           range(0, 512)]
         print(f"Loading from path {ckpt_file_list[0]}", flush=True)
-        # Load checkpoint files
         load_distributed_checkpoint(model.train_network, ckpt_file_list, strategy)
     print("Dataset size: {}, actual_epoch_num: {}".format(ds.get_dataset_size(), actual_epoch_num), flush=True)
-    model.train(actual_epoch_num, ds, callbacks=callback, sink_size=callback_size, dataset_sink_mode=True)
-
+    model.train(actual_epoch_num, ds, callbacks=callback, sink_size=args_opt.sink_size, dataset_sink_mode=True)
 
 def run_train_pipeline(args_opt):
     r"""
@@ -224,12 +180,9 @@ def run_train_pipeline(args_opt):
         print("rank_id is {}, device_num is {}".format(rank_id, device_num))
         context.reset_auto_parallel_context()
         context.set_auto_parallel_context(
-            parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL,
-            gradients_mean=False,
-            full_batch=bool(args_opt.full_batch),
-            loss_repeated_mean=True,
-            device_num=device_num,
-            enable_parallel_optimizer=bool(args_opt.optimizer_shard),
+            parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, gradients_mean=False,
+            full_batch=bool(args_opt.full_batch), loss_repeated_mean=True,
+            device_num=device_num, enable_parallel_optimizer=bool(args_opt.optimizer_shard),
             pipeline_stages=args_opt.stage_num)
         set_algo_parameters(elementwise_op_strategy_follow=True)
         _set_multi_subgraphs()
@@ -238,13 +191,18 @@ def run_train_pipeline(args_opt):
         device_num = 1
     # copy data from the cloud to the /cache/Data
     cache_url = '/cache/Data/'
+    eval_cache_url = '/cache/EvalData/'
     if args_opt.offline:
         cache_url = args_opt.data_url
+        eval_cache_url = args_opt.eval_data_url
     else:
         download_data(src_data_url=args_opt.data_url, tgt_data_path=cache_url, rank=rank_id)
+        download_data(src_data_url=args_opt.eval_data_url, tgt_data_path=eval_cache_url, rank=rank_id)
     model_parallel_num = args_opt.op_level_model_parallel_num
     stage_device_num = int(device_num / args_opt.stage_num)
     data_parallel_num = int(stage_device_num / model_parallel_num)
+    if data_parallel_num <= 1 and args_opt.optimizer_shard == 1:
+        raise ValueError("The dp must large than 1 when applying optimizer shard.")
     per_batch_size = args_opt.per_batch_size
     batch_size = per_batch_size * data_parallel_num * args_opt.micro_size
     config = PANGUALPHAConfig(
@@ -267,8 +225,8 @@ def run_train_pipeline(args_opt):
     print("===config is: ", config, flush=True)
     pangu_alpha = PanguAlpha(config)
     loss = CrossEntropyLoss(config)
-    pangu_alpha_with_loss = PipelineCell(PanguAlphaWithLoss(config, pangu_alpha, loss), config.micro_size)
-    pangu_alpha_with_loss = _VirtualDatasetCell(pangu_alpha_with_loss)
+    pangu_alpha_with_loss_net = PipelineCell(PanguAlphaWithLoss(config, pangu_alpha, loss), config.micro_size)
+    pangu_alpha_with_loss = _VirtualDatasetCell(pangu_alpha_with_loss_net)
     print("=====args_opt is: ", args_opt, flush=True)
     lr = LearningRate(learning_rate=args_opt.start_lr, end_learning_rate=args_opt.end_lr,
                       warmup_steps=args_opt.warmup_step, decay_steps=args_opt.decay_steps)
@@ -294,6 +252,8 @@ def run_train_pipeline(args_opt):
     update_cell = DynamicLossScaleUpdateCell(loss_scale_value=loss_scale_value, scale_factor=2, scale_window=1000)
     pangu_alpha_with_grads = PanguAlphaTrainPipelineWithLossScaleCell(
         pangu_alpha_with_loss, optimizer=optimizer, config=config, scale_update_cell=update_cell)
+    if args_opt.train_and_eval_mode:
+        raise ValueError("The pipeline train_and_eval_mode is not supported yet")
     model = Model(pangu_alpha_with_grads)
     model.train(actual_epoch_num, ds, callbacks=callback,
                 sink_size=callback_size, dataset_sink_mode=True)
diff --git a/model_zoo/official/nlp/q8bert/src/q8bert.py b/model_zoo/official/nlp/q8bert/src/q8bert.py
index c6549b30f84..e752e5d97ed 100644
--- a/model_zoo/official/nlp/q8bert/src/q8bert.py
+++ b/model_zoo/official/nlp/q8bert/src/q8bert.py
@@ -212,12 +212,9 @@ class BertTrainWithLossScaleCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond, scaling_sens)
-        return F.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, cond, scaling_sens)
 
 
 class BertTrainCell(nn.Cell):
@@ -271,8 +268,8 @@ class BertTrainCell(nn.Cell):
         # apply grad reducer on grads
         grads = self.grad_reducer(grads)
         grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
-        succ = self.optimizer(grads)
-        return F.depend(loss, succ)
+        self.optimizer(grads)
+        return loss
 
 
 class BertNetworkWithLoss_td(nn.Cell):
@@ -451,12 +448,9 @@ class BertEvaluationWithLossScaleCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond)
-        return F.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, cond)
 
 
 class BertEvaluationCell(nn.Cell):
@@ -507,5 +501,5 @@ class BertEvaluationCell(nn.Cell):
         # apply grad reducer on grads
         grads = self.grad_reducer(grads)
         grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
-        succ = self.optimizer(grads)
-        return F.depend(loss, succ)
+        self.optimizer(grads)
+        return loss
diff --git a/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py b/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py
index 3b1468fd41d..c2e8f9f91a3 100644
--- a/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py
+++ b/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py
@@ -285,12 +285,9 @@ class BertTrainWithLossScaleCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond, scaling_sens)
-        return F.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, cond, scaling_sens)
 
 class BertTrainCell(nn.Cell):
     """
@@ -343,8 +340,8 @@ class BertTrainCell(nn.Cell):
         # apply grad reducer on grads
         grads = self.grad_reducer(grads)
         grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
-        succ = self.optimizer(grads)
-        return F.depend(loss, succ)
+        self.optimizer(grads)
+        return loss
 
 class BertNetworkWithLoss_td(nn.Cell):
     """
@@ -551,12 +548,9 @@ class BertEvaluationWithLossScaleCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond, scaling_sens)
-        return F.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, cond, scaling_sens)
 
 
 class BertEvaluationCell(nn.Cell):
@@ -606,5 +600,5 @@ class BertEvaluationCell(nn.Cell):
         # apply grad reducer on grads
         grads = self.grad_reducer(grads)
         grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
-        succ = self.optimizer(grads)
-        return F.depend(loss, succ)
+        self.optimizer(grads)
+        return loss
diff --git a/model_zoo/official/nlp/transformer/src/transformer_for_train.py b/model_zoo/official/nlp/transformer/src/transformer_for_train.py
index 05555bf2df6..8fa2ce1a227 100644
--- a/model_zoo/official/nlp/transformer/src/transformer_for_train.py
+++ b/model_zoo/official/nlp/transformer/src/transformer_for_train.py
@@ -187,8 +187,8 @@ class TransformerTrainOneStepCell(nn.TrainOneStepCell):
         grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads)
         # apply grad reducer on grads
         grads = self.grad_reducer(grads)
-        succ = self.optimizer(grads)
-        return F.depend(loss, succ)
+        self.optimizer(grads)
+        return loss
 
 
 grad_scale = C.MultitypeFuncGraph("grad_scale")
@@ -277,12 +277,9 @@ class TransformerTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell)
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond, scaling_sens)
-        return F.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, cond, scaling_sens)
 
 
 cast = P.Cast()
@@ -444,9 +441,7 @@ class TransformerTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
         accu_overflow = self.select(overflow, self.one, self.zero)
         self.accu_overflow = self.select(is_accu_step, accu_overflow, self.zero)
 
-        if is_accu_step:
-            succ = False
-        else:
+        if not is_accu_step:
             # apply grad reducer on grads
             grads = self.grad_reducer(self.accu_grads)
             scaling = scaling_sens * self.degree * self.accumulation_steps
@@ -463,10 +458,7 @@ class TransformerTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell):
             overflow = self.reshape(overflow, (()))
             if sens is None:
                 overflow = self.loss_scaling_manager(self.loss_scale, overflow)
-            if overflow:
-                succ = False
-            else:
-                succ = self.optimizer(grads)
+            if not overflow:
+                self.optimizer(grads)
 
-        ret = (mean_loss, overflow, scaling_sens)
-        return F.depend(ret, succ)
+        return (mean_loss, overflow, scaling_sens)
diff --git a/model_zoo/official/recommend/ncf/src/ncf.py b/model_zoo/official/recommend/ncf/src/ncf.py
index 6a9bb21059f..c48af973ca7 100644
--- a/model_zoo/official/recommend/ncf/src/ncf.py
+++ b/model_zoo/official/recommend/ncf/src/ncf.py
@@ -20,7 +20,6 @@ from mindspore.nn.layer.activation import get_activation
 import mindspore.common.dtype as mstype
 from mindspore.ops import operations as P
 from mindspore.common.initializer import initializer
-from mindspore.ops import functional as F
 from mindspore.ops import composite as C
 from mindspore.context import ParallelMode
 from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
@@ -261,7 +260,8 @@ class TrainStepWrap(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
 
 
 class PredictWithSigmoid(nn.Cell):
diff --git a/model_zoo/official/rl/dqn/README.md b/model_zoo/official/rl/dqn/README.md
index 5149708cddd..5731d24741b 100644
--- a/model_zoo/official/rl/dqn/README.md
+++ b/model_zoo/official/rl/dqn/README.md
@@ -34,8 +34,8 @@ The overall network architecture of DQN is show below:
 - Framework
     - [MindSpore](https://www.mindspore.cn/install/en)
 - For more information, please check the resources below：
-    - [MindSpore Tutorials](https://www.mindspore.cn/tutorials/en/master/index.html)
-    - [MindSpore Python API](https://www.mindspore.cn/docs/api/en/master/index.html)
+    - [MindSpore Tutorials](https://www.mindspore.cn/tutorial/training/en/master/index.html)
+    - [MindSpore Python API](https://www.mindspore.cn/doc/api_python/en/master/index.html)
 
 - third-party libraries
 
@@ -50,6 +50,7 @@ pip install gym
 ```python
 ├── dqn
   ├── README.md              # descriptions about DQN
+  ├── README_CH.md              # descriptions about DQN in Chinese
   ├── scripts
   │   ├──run_standalone_eval_ascend.sh        # shell script for evaluation with Ascend
   │   ├──run_standalone_eval_gpu.sh         # shell script for evaluation with GPU
@@ -86,7 +87,7 @@ pip install gym
       GPU: python train.py --device_target GPU --ckpt_path ckpt > log.txt 2>&1 &  
 
   shell:
-      Ascend:bash run_standalone_train_ascend.sh ckpt
+      Ascend: bash run_standalone_train_ascend.sh ckpt
       GPU: bash run_standalone_train_gpu.sh ckpt
 ```
 
@@ -95,29 +96,29 @@ pip install gym
 ```shell
 # evaluat example
   python
-      Ascend: python eval.py --device_target Ascend --ckpt_path .ckpt/checkpoint_dqn.ckpt
-      GPU: python eval.py --device_target GPU --ckpt_path .ckpt/checkpoint_dqn.ckpt
+      Ascend: python eval.py --device_target Ascend --ckpt_path ./ckpt/dqn.ckpt
+      GPU: python eval.py --device_target GPU --ckpt_path ./ckpt/dqn.ckpt
 
   shell:
-      Ascend: bash run_standalone_eval_ascend.sh .ckpt/checkpoint_dqn.ckpt
-      GPU: bash run_standalone_eval_gpu.sh .ckpt/checkpoint_dqn.ckpt
+      Ascend: bash run_standalone_eval_ascend.sh ./ckpt/dqn.ckpt
+      GPU: bash run_standalone_eval_gpu.sh ./ckpt/dqn.ckpt
 ```
 
 ## [Performance](#content)
 
 ### Inference Performance
 
-| Parameters                 | DQN                                                         |
-| -------------------------- | ----------------------------------------------------------- |
-| Resource                   | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8             |
-| uploaded Date              | 03/10/2021 (month/day/year)                                 |
-| MindSpore Version          | 1.1.0                                                       |
-| Training Parameters        | batch_size = 512, lr=0.001                                  |
-| Optimizer                  | RMSProp                                                     |
-| Loss Function              | MSELoss                                                     |
-| outputs                    | probability                                                 |
-| Params (M)                 | 7.3k                                                       |
-| Scripts                    | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/rl/dqn |
+| parameter                 | Ascend                                                          |GPU             |
+| -------------------------- | ------------------------------------------------------- | ----------------------------------------------------------- |
+| Resource                   | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8  |GPU             |
+| uploaded Date              | 03/10/2021 (month/day/year)                                 | 07/28/2021 (month/day/year)                   |
+| MindSpore Version          | 1.1.0                                                    | 1.2.0                                                       |
+| Training Parameters        | batch_size = 512, lr=0.001                                  | batch_size = 32, lr=0.01                                  |
+| Optimizer                  | RMSProp                                        |Adam                                      |
+| Loss Function              | MSELoss                                        |MSELoss                                                     |
+| outputs                    | Reward                                                 | Reward                                                 |
+| Params (M)                 | 7.3k                                                       | 7.3k                                                       |
+| Scripts                    | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/rl/dqn | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/rl/dqn |
 
 ## [Description of Random Situation](#content)
 
@@ -125,4 +126,4 @@ We use random seed in train.py.
 
 ## [ModeZoo Homepage](#contents)  
 
-Please check the official [homepage](https://gitee.com/mindspore/mindspore/tree/master/model_zoo).
+Please check the official [homepage](https://gitee.com/mindspore/mindspore/tree/master/model_zoo).
\ No newline at end of file
diff --git a/model_zoo/official/rl/dqn/README_CN.md b/model_zoo/official/rl/dqn/README_CN.md
index 8e014d50cd5..6fca820d493 100644
--- a/model_zoo/official/rl/dqn/README_CN.md
+++ b/model_zoo/official/rl/dqn/README_CN.md
@@ -35,10 +35,10 @@ DQN网络的模型结构见论文：
 - 硬件
     - Ascend或GPU处理器
 - 框架
-    - [MindSpore](https://www.mindspore.cn/install/)
+    - [MindSpore](https://www.mindspore.cn/install/en)
 - 通过下面网址可以获得更多信息：
-    - [MindSpore Tutorials](https://www.mindspore.cn/tutorials/zh-CN/master/index.html)
-    - [MindSpore Python API](https://www.mindspore.cn/docs/api/zh-CN/master/index.html)
+    - [MindSpore Tutorials](https://www.mindspore.cn/tutorial/training/en/master/index.html)
+    - [MindSpore Python API](https://www.mindspore.cn/doc/api_python/en/master/index.html)
 
 - 第三方库
 
@@ -115,7 +115,7 @@ pip install gym
 | 损失函数              | MSELoss                                                | MSELoss                                                |
 | 输出                    | 游戏得分值                                                 | 游戏得分值                                                 |
 | 参数量(M)                 | 7.3k                                                       | 7.3k                                                       |
-| 脚本 | <<<<https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/rl/dqn>>>> | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/rl/dqn |
+| 脚本 | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/rl/dqn | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/rl/dqn |
 
 # 随机情况描述
 
diff --git a/model_zoo/official/rl/dqn/eval.py b/model_zoo/official/rl/dqn/eval.py
index 7f61abf4772..d222f0ea802 100644
--- a/model_zoo/official/rl/dqn/eval.py
+++ b/model_zoo/official/rl/dqn/eval.py
@@ -19,23 +19,30 @@ import gym
 from mindspore import context
 from mindspore.common import set_seed
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
+
+from src.config_gpu import config_dqn as cfg_gpu
 from src.config import config_dqn as cfg
 from src.agent import Agent
 
 parser = argparse.ArgumentParser(description='MindSpore dqn Example')
-parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU'],
+parser.add_argument('--device_target', type=str, default='Ascend', choices=['Ascend', 'GPU'],
                     help='device where the code will be implemented (default: Ascend)')
 parser.add_argument('--ckpt_path', type=str, default=None, help='if is test, must provide\
                     path where the trained ckpt file')
 args = parser.parse_args()
 set_seed(1)
 
-
 if __name__ == "__main__":
     context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
-    env = gym.make('CartPole-v1')
+    if args.device_target == 'GPU':
+        cfg = cfg_gpu
+
+    env = gym.make(cfg.game)
+    env = env.unwrapped
     cfg.state_space_dim = env.observation_space.shape[0]
     cfg.action_space_dim = env.action_space.n
+    cfg.env_a_shape = 0 if isinstance(env.action_space.sample(),
+                                      int) else env.action_space.sample().shape  # to confirm the shape
     agent = Agent(**cfg)
 
     # load checkpoint
@@ -46,22 +53,25 @@ if __name__ == "__main__":
             raise ValueError("Load param into net fail!")
 
     score = 0
-    agent.load_dict()
-    for episode in range(50):
-        s0 = env.reset()
-        total_reward = 1
+    for episode in range(cfg.EPOCH):
+        s = env.reset()
+        ep_r = 0
         while True:
-            a0 = agent.eval_act(s0)
-            s1, r1, done, _ = env.step(a0)
+            a, flag = agent.act(s)
+            s_, r, done, _ = env.step(a)
 
-            if done:
-                r1 = -1
+            # modify the reward
+            x, x_dot, theta, theta_dot = s_
+            r1 = (env.x_threshold - abs(x)) / env.x_threshold - 0.8
+            r2 = (env.theta_threshold_radians - abs(theta)) / env.theta_threshold_radians - 0.5
+            r = r1 + r2
 
+            ep_r += r
             if done:
                 break
+            s = s_
 
-            total_reward += r1
-            s0 = s1
-        score += total_reward
-        print("episode", episode, "total_reward", total_reward)
-    print("mean_reward", score/50)
+        score += ep_r
+        print("episode", episode, "total_reward", ep_r)
+    print("mean_reward", score / cfg.EPOCH)
+    
\ No newline at end of file
diff --git a/model_zoo/official/rl/dqn/scripts/run_standalone_train_gpu.sh b/model_zoo/official/rl/dqn/scripts/run_standalone_train_gpu.sh
index f4bc6545126..bab8ab781cf 100755
--- a/model_zoo/official/rl/dqn/scripts/run_standalone_train_gpu.sh
+++ b/model_zoo/official/rl/dqn/scripts/run_standalone_train_gpu.sh
@@ -17,5 +17,4 @@
 # an simple tutorial as follows, more parameters can be setting
 script_self=$(readlink -f "$0")
 self_path=$(dirname "${script_self}")
-CKPT_PATH=$1
-python -s ${self_path}/../train.py --device_target="GPU" --ckpt_path=$CKPT_PATH > log.txt 2>&1 &
+python -s ${self_path}/../train.py --device_target="GPU" > log.txt 2>&1 &
diff --git a/model_zoo/official/rl/dqn/src/agent.py b/model_zoo/official/rl/dqn/src/agent.py
index c76841cdb58..16aeb479d75 100644
--- a/model_zoo/official/rl/dqn/src/agent.py
+++ b/model_zoo/official/rl/dqn/src/agent.py
@@ -14,14 +14,14 @@
 # ============================================================================
 """Agent of reinforcement learning network"""
 
-import random
 import math
 import numpy as np
 import mindspore.nn as nn
-from mindspore import Tensor
 import mindspore.common.dtype as mstype
-from src.dqn import DQN, WithLossCell
 
+from mindspore import Tensor, load_param_into_net
+from mindspore.ops import operations as P
+from src.dqn import DQN, WithLossCell
 
 class Agent:
     """
@@ -30,65 +30,93 @@ class Agent:
     def __init__(self, **kwargs):
         for key, value in kwargs.items():
             setattr(self, key, value)
-        self.policy_net = DQN(self.state_space_dim, 256, self.action_space_dim)
-        self.target_net = DQN(self.state_space_dim, 256, self.action_space_dim)
-        self.optimizer = nn.RMSProp(self.policy_net.trainable_params(), learning_rate=self.lr)
-        loss_fn = nn.MSELoss()
-        loss_q_net = WithLossCell(self.policy_net, loss_fn)
-        self.policy_net_train = nn.TrainOneStepCell(loss_q_net, self.optimizer)
-        self.policy_net_train.set_train(mode=True)
-        self.buffer = []
+        self.policy_net = DQN(self.state_space_dim, self.hidden_size, self.action_space_dim)
+        self.target_net = DQN(self.state_space_dim, self.hidden_size, self.action_space_dim)
+        self.policy_net.training = True
+        self.policy_net.requires_grad = True
+        self.learn_step_counter = 0  # for target updating
+        self.memory_counter = 0  # for storing memory
+        self.memory = np.zeros((self.memory_capacity, self.state_space_dim * 2 + 2))  # initialize memory
+        if self.dev == 'Ascend':
+            self.optimizer = nn.RMSProp(self.policy_net.trainable_params(), learning_rate=self.lr)
+        else:
+            self.optimizer = nn.Adam(self.policy_net.trainable_params(), learning_rate=self.lr)
+        self.loss_func = nn.MSELoss()
+        self.loss_net = WithLossCell(self.policy_net, self.loss_func)
+        self.train_net = nn.TrainOneStepCell(self.loss_net, self.optimizer)
+        self.train_net.set_train()
+
         self.steps = 0
 
-    def act(self, s0):
+        self.cast = P.Cast()
+        self.expand = P.ExpandDims()
+        self.reshape = P.Reshape()
+        self.argmax = P.ArgMaxWithValue(axis=1, keep_dims=True)
+        self.gather = P.GatherD()
+
+    def act(self, x):
         """
-        Agent choose action.
+        get action
         """
         self.steps += 1
-        epsi = self.epsi_low + (self.epsi_high - self.epsi_low) * (math.exp(-1.0 * self.steps / self.decay))
-        if random.random() < epsi:
-            a0 = random.randrange(self.action_space_dim)
+        if self.dev == 'GPU':
+            epsilon = self.epsi_high
         else:
-            s0 = np.expand_dims(s0, axis=0)
-            s0 = Tensor(s0, mstype.float32)
-            a0 = self.policy_net(s0).asnumpy()
-            a0 = np.argmax(a0)
-        return a0
+            epsilon = self.epsi_low + (self.epsi_high - self.epsi_low) * (math.exp(-1.0 * self.steps / self.decay))
+        flag_com = False
+        if np.random.uniform() < epsilon:
+            x = Tensor(x, mstype.float32)
+            x = self.expand(x, 0)
+            actions_value = self.policy_net.construct(x)
+            action = actions_value.asnumpy()
+            action = np.argmax(action)
+            flag_com = True
+        else:  # random
+            action = np.random.randint(0, self.action_space_dim)
+            action = action if self.env_a_shape == 0 else self.reshape(action, self.env_a_shape)
+        return action, flag_com
 
-    def eval_act(self, s0):
-        self.steps += 1
-        s0 = np.expand_dims(s0, axis=0)
-        s0 = Tensor(s0, mstype.float32)
-        a0 = self.policy_net(s0).asnumpy()
-        a0 = np.argmax(a0)
-        return a0
+    def eval_act(self, x):
+        """
+        choose action in eval
+        """
+        x = Tensor(x, mstype.float32)
+        x = self.expand(x, 0)
+        actions_value = self.policy_net.construct(x)
+        action = actions_value.asnumpy()
+        action = np.argmax(action)
+        return action
 
-    def put(self, *transition):
-        if len(self.buffer) == self.capacity:
-            self.buffer.pop(0)
-        self.buffer.append(transition)
-
-    def load_dict(self):
-        for target_item, source_item in zip(self.target_net.parameters_dict(), self.policy_net.parameters_dict()):
-            target_param = self.target_net.parameters_dict()[target_item]
-            source_param = self.policy_net.parameters_dict()[source_item]
-            target_param.set_data(source_param.data)
+    def store_transition(self, s, a, r, s_):
+        """
+        store transition
+        """
+        transition = np.hstack((s, [a, r], s_))
+        index = self.memory_counter % self.memory_capacity
+        self.memory[index, :] = transition
+        self.memory_counter += 1
 
     def learn(self):
         """
         Agent learn from experience data.
         """
-        if (len(self.buffer)) < self.batch_size:
-            return
 
-        samples = random.sample(self.buffer, self.batch_size)
-        s0, a0, r1, s1 = zip(*samples)
-        s1 = Tensor(s1, mstype.float32)
-        s0 = Tensor(s0, mstype.float32)
-        a0 = Tensor(np.expand_dims(a0, axis=1))
-        next_state_values = self.target_net(s1).asnumpy()
-        next_state_values = np.max(next_state_values, axis=1)
+        if self.learn_step_counter % self.target_replace_iter == 0:
+            load_param_into_net(self.target_net, self.policy_net.parameters_dict())
 
-        y_true = r1 + self.gamma * next_state_values
-        y_true = Tensor(np.expand_dims(y_true, axis=1), mstype.float32)
-        self.policy_net_train(s0, a0, y_true)
+        self.learn_step_counter += 1
+
+        sample_index = np.random.choice(self.memory_capacity, self.batch_size)
+
+        b_memory = self.memory[sample_index, :]
+        b_s = Tensor(b_memory[:, :self.state_space_dim], mstype.float32)
+        b_a = Tensor(b_memory[:, self.state_space_dim:self.state_space_dim + 1].astype(int), mstype.int32)
+        b_r = Tensor(b_memory[:, self.state_space_dim + 1:self.state_space_dim + 2], mstype.float32)
+        b_s_ = Tensor(b_memory[:, -self.state_space_dim:], mstype.float32)
+
+        q_next = self.target_net(b_s_)
+        q_next_numpy = q_next.asnumpy()
+        tem_ = Tensor(np.max(q_next_numpy, axis=1).reshape(-1, 1))
+        q_target = b_r + self.gamma * tem_
+        self.train_net(b_s, q_target, b_a)
+        
\ No newline at end of file
diff --git a/model_zoo/official/rl/dqn/src/config.py b/model_zoo/official/rl/dqn/src/config.py
index 6d7a7ef53f4..6f4efaed994 100644
--- a/model_zoo/official/rl/dqn/src/config.py
+++ b/model_zoo/official/rl/dqn/src/config.py
@@ -19,13 +19,20 @@ network config setting, will be used in train.py and eval.py
 from easydict import EasyDict as edict
 
 config_dqn = edict({
+    'dev': 'Ascend',
     'gamma': 0.8,
     'epsi_high': 0.9,
     'epsi_low': 0.05,
-    'decay': 200,
     'lr': 0.001,
     'capacity': 100000,
     'batch_size': 512,
+    'target_replace_iter': 100,
+    'memory_capacity': 2000,
+    'game': 'CartPole-v1',
     'state_space_dim': 4,
-    'action_space_dim': 2
+    'action_space_dim': 2,
+    'env_a_shape': 0,
+    'hidden_size': 256,
+    'decay': 200,
+    'EPOCH': 50
 })
diff --git a/model_zoo/official/rl/dqn/src/dqn.py b/model_zoo/official/rl/dqn/src/dqn.py
index 1a3e0b2dd89..5d5dfd60843 100644
--- a/model_zoo/official/rl/dqn/src/dqn.py
+++ b/model_zoo/official/rl/dqn/src/dqn.py
@@ -17,8 +17,10 @@
 import mindspore.nn as nn
 import mindspore.ops as ops
 
-
-class DQN(nn. Cell):
+class DQN(nn.Cell):
+    """
+    DQN net
+    """
     def __init__(self, input_size, hidden_size, output_size):
         super(DQN, self).__init__()
         self.linear1 = nn.Dense(input_size, hidden_size)
@@ -26,6 +28,9 @@ class DQN(nn. Cell):
         self.relu = nn.ReLU()
 
     def construct(self, x):
+        """
+        model construct
+        """
         x = self.relu(self.linear1(x))
         return self.linear2(x)
 
@@ -40,8 +45,12 @@ class WithLossCell(nn.Cell):
         self._loss_fn = loss_fn
         self.gather = ops.GatherD()
 
-    def construct(self, x, act, label):
+    def construct(self, x, label, index):
+        """
+        compute loss
+        """
         out = self._backbone(x)
-        out = self.gather(out, 1, act)
+        out = self.gather(out, 1, index)
         loss = self._loss_fn(out, label)
         return loss
+        
\ No newline at end of file
diff --git a/model_zoo/official/rl/dqn/train.py b/model_zoo/official/rl/dqn/train.py
index 435c960a171..40a1234028a 100644
--- a/model_zoo/official/rl/dqn/train.py
+++ b/model_zoo/official/rl/dqn/train.py
@@ -16,57 +16,93 @@
 
 import os
 import argparse
+import timeit
 import gym
+import numpy as np
 from mindspore import context
 from mindspore.common import set_seed
 from mindspore.train.serialization import save_checkpoint
 from src.config import config_dqn as cfg
+from src.config_gpu import config_dqn as cfg_gpu
 from src.agent import Agent
 
 parser = argparse.ArgumentParser(description='MindSpore dqn Example')
-parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU'],
+parser.add_argument('--device_target', type=str, default='Ascend', choices=['Ascend', 'GPU'],
                     help='device where the code will be implemented (default: Ascend)')
 parser.add_argument('--ckpt_path', type=str, default="./ckpt", help='if is test, must provide\
                     path where the trained ckpt file')
 args = parser.parse_args()
 set_seed(1)
 
+def save_ckpt(path, model, ckpt_name):
+    """
+    save ckpt file
+    """
+    if not os.path.exists(path):
+        os.makedirs(path)
+
+    ckpt_name = path + ckpt_name
+    save_checkpoint(model, ckpt_name)
+
 
 if __name__ == "__main__":
     context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
     if args.device_target == 'GPU':
-        # Enable graph kernel
-        context.set_context(enable_graph_kernel=True, graph_kernel_flags="--enable_parallel_fusion")
-    env = gym.make('CartPole-v1')
+        cfg = cfg_gpu
+        context.set_context(device_id=1)
+
+    env = gym.make(cfg.game)
+    env = env.unwrapped
     cfg.state_space_dim = env.observation_space.shape[0]
     cfg.action_space_dim = env.action_space.n
+    cfg.env_a_shape = 0 if isinstance(env.action_space.sample(),
+                                      int) else env.action_space.sample().shape
     agent = Agent(**cfg)
-    agent.load_dict()
 
-    for episode in range(300):
-        s0 = env.reset()
+    rewards = []
+    count = 0
+    times = []
+
+    print('\nCollecting experience...')
+    for episode in range(400):
+        s = env.reset()
         total_reward = 1
+        ep_r = 0
         while True:
-            a0 = agent.act(s0)
-            s1, r1, done, _ = env.step(a0)
+            start = timeit.default_timer()
+            a, flag = agent.act(s)
+            s_, r, done_, _ = env.step(a)
 
-            if done:
-                r1 = -1
+            # modify the reward
+            x, x_dot, theta, theta_dot = s_
+            r1 = (env.x_threshold - abs(x)) / env.x_threshold - 0.8
+            r2 = (env.theta_threshold_radians - abs(theta)) / env.theta_threshold_radians - 0.5
+            r = r1 + r2
 
-            agent.put(s0, a0, r1, s1)
+            if flag:
+                end = timeit.default_timer()
+                differences = end - start
+                times.append(differences)
+                count += 1
+                    # pass
 
-            if done:
+            agent.store_transition(s, a, r, s_)
+            ep_r += r
+            if agent.memory_counter > cfg.memory_capacity:
+                agent.learn()
+                if done_:
+                    print("episode", episode, "total_reward", round(ep_r, 2))
+                    rewards.append(round(ep_r, 2))
+            if done_:
                 break
+            s = s_
+    env.close()
+    save_ckpt(os.path.realpath(args.ckpt_path), agent.policy_net, "/dqn.ckpt")
+    rewards_numpy = np.array(rewards)
 
-            total_reward += r1
-            s0 = s1
-            agent.learn()
-        agent.load_dict()
-        print("episode", episode, "total_reward", total_reward)
+    times.remove(min(times))
+    times.remove(max(times))
+    times_numpy = np.array(times)
 
-    path = os.path.realpath(args.ckpt_path)
-    if not os.path.exists(path):
-        os.makedirs(path)
-
-    ckpt_name = path + "/dqn.ckpt"
-    save_checkpoint(agent.policy_net, ckpt_name)
+    print(rewards_numpy.mean(), times_numpy.mean())
+    
\ No newline at end of file
diff --git a/model_zoo/research/cv/AVA_cifar/src/network_define.py b/model_zoo/research/cv/AVA_cifar/src/network_define.py
index 8e102cd486a..132e7033b34 100644
--- a/model_zoo/research/cv/AVA_cifar/src/network_define.py
+++ b/model_zoo/research/cv/AVA_cifar/src/network_define.py
@@ -15,7 +15,6 @@
 """define network"""
 
 import mindspore.nn as nn
-from mindspore.ops import functional as F
 from mindspore.ops import composite as C
 from mindspore import ParameterTuple
 from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
@@ -83,4 +82,5 @@ class TrainOneStepCell(nn.Cell):
         grads = self.grad(self.net_with_loss, weights)(data3, data2, data1, label)
         if self.reduce_flag:
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
diff --git a/model_zoo/research/cv/AVA_hpa/src/network_define_pretrain.py b/model_zoo/research/cv/AVA_hpa/src/network_define_pretrain.py
index 1084f084168..4ab7d928e6f 100644
--- a/model_zoo/research/cv/AVA_hpa/src/network_define_pretrain.py
+++ b/model_zoo/research/cv/AVA_hpa/src/network_define_pretrain.py
@@ -14,7 +14,6 @@
 # ============================================================================
 """define pretrain network"""
 import mindspore.nn as nn
-from mindspore.ops import functional as F
 from mindspore.ops import composite as C
 from mindspore.ops import operations as P
 from mindspore import ParameterTuple
@@ -85,4 +84,5 @@ class TrainOneStepCell(nn.Cell):
         grads = self.grad(self.net_with_loss, weights)(data1, data2, data3, label)
         if self.reduce_flag:
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
diff --git a/model_zoo/research/cv/AVA_hpa/src/network_define_train.py b/model_zoo/research/cv/AVA_hpa/src/network_define_train.py
index d5e4ad32fba..01167b1c6d6 100644
--- a/model_zoo/research/cv/AVA_hpa/src/network_define_train.py
+++ b/model_zoo/research/cv/AVA_hpa/src/network_define_train.py
@@ -14,7 +14,6 @@
 # ============================================================================
 """define training network"""
 import mindspore.nn as nn
-from mindspore.ops import functional as F
 from mindspore.ops import composite as C
 from mindspore.ops import operations as P
 from mindspore import ParameterTuple
@@ -84,4 +83,5 @@ class TrainOneStepCell(nn.Cell):
         grads = self.grad(self.net_with_loss, weights)(data, label)
         if self.reduce_flag:
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
diff --git a/model_zoo/research/cv/AttGAN/src/cell.py b/model_zoo/research/cv/AttGAN/src/cell.py
index 5271048c6ea..ec8d9a2928d 100644
--- a/model_zoo/research/cv/AttGAN/src/cell.py
+++ b/model_zoo/research/cv/AttGAN/src/cell.py
@@ -116,7 +116,8 @@ class TrainOneStepCellGen(nn.Cell):
         grads = self.grad(self.network, weights)(img_a, att_a, att_a_, att_b, att_b_, sens)
         if self.reducer_flag:
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads)), gf_loss, gc_loss, gr_loss
+        self.optimizer(grads)
+        return loss, gf_loss, gc_loss, gr_loss
 
 
 class TrainOneStepCellDis(nn.Cell):
@@ -152,4 +153,5 @@ class TrainOneStepCellDis(nn.Cell):
         if self.reducer_flag:
             grads = self.grad_reducer(grads)
 
-        return F.depend(loss, self.optimizer(grads)), d_real_loss, d_fake_loss, dc_loss, df_gp
+        self.optimizer(grads)
+        return loss, d_real_loss, d_fake_loss, dc_loss, df_gp
diff --git a/model_zoo/research/cv/FaceDetection/src/network_define.py b/model_zoo/research/cv/FaceDetection/src/network_define.py
index 6a342119c43..0284586929a 100644
--- a/model_zoo/research/cv/FaceDetection/src/network_define.py
+++ b/model_zoo/research/cv/FaceDetection/src/network_define.py
@@ -138,10 +138,8 @@ class TrainOneStepWithLossScaleCell(nn.Cell):
         else:
             cond = self.less_equal(self.base, flag_sum)
 
-        opt = self.optimizer(grads)
-
-        ret = (loss, cond, scaling_sens)
-        return F.depend(ret, opt)
+        self.optimizer(grads)
+        return (loss, cond, scaling_sens)
 
 
 class BuildTrainNetworkV2(nn.Cell):
diff --git a/model_zoo/research/cv/ICNet/README.md b/model_zoo/research/cv/ICNet/README.md
index c2496b09bd7..8b330c1d874 100644
--- a/model_zoo/research/cv/ICNet/README.md
+++ b/model_zoo/research/cv/ICNet/README.md
@@ -23,7 +23,7 @@
 
 ICNet(Image Cascade Network) propose a full convolution network which incorporates multi-resolution branches under proper label guidance to address the challenge of real-time semantic segmentation.
 
-[paper](https://arxiv.org/abs/1704.08545)ECCV2018
+[paper](https://arxiv.org/abs/1704.08545) from ECCV2018
 
 # [Model Architecture](#Contents)
 
@@ -31,7 +31,7 @@ ICNet takes cascade image inputs (i.e., low-, medium- and high resolution images
 
 # [Dataset](#Content)
 
-used Dataset :[Cityscape Dataset Website](https://www.cityscapes-dataset.com/)
+used Dataset :[Cityscape Dataset Website](https://www.cityscapes-dataset.com/) (please download 1st and 3rd zip)
 
 It contains 5,000 finely annotated images split into training, validation and testing sets with 2,975, 500, and 1,525 images respectively.
 
@@ -64,6 +64,16 @@ It contains 5,000 finely annotated images split into training, validation and te
     ├── export.py                                  # export mindir
     ├── postprocess.py                             # 310 infer calculate accuracy
     ├── README.md                                  # descriptions about ICNet
+    ├── Res50V1_PRE                                # scripts for pretrain
+    │   ├── scripts
+    │   │   └── run_distribute_train.sh
+    │   ├── src
+    │   │   ├── config.py
+    │   │   ├── CrossEntropySmooth.py
+    │   │   ├── dataset.py
+    │   │   ├── lr_generator.py
+    │   │   └── resnet50_v1.py
+    │   └── train.py
     ├── scripts
     │   ├── run_distribute_train8p.sh              # multi cards distributed training in ascend
     │   ├── run_eval.sh                            # validation script
@@ -95,7 +105,7 @@ Set script parameters in src/model_utils/icnet.yaml .
 
 ```bash
 name: "icnet"
-backbone: "resnet50"
+backbone: "resnet50v1"
 base_size: 1024    # during augmentation, shorter size will be resized between [base_size*0.5, base_size*2.0]
 crop_size: 960     # end of augmentation, crop to training
 ```
@@ -116,9 +126,8 @@ valid_batch_size: 1
 cityscapes_root: "/data/cityscapes/" # set dataset path
 epochs: 160
 val_epoch: 1
-ckpt_dir: "./ckpt/"                  # ckpt and training log will be saved here
 mindrecord_dir: ''                   # set mindrecord path
-pretrained_model_path: '/root/ResNet50V1B-150_625.ckpt' # set the pretrained model path correctly
+pretrained_model_path: '/root/ResNet50V1B-150_625.ckpt' # use the latest checkpoint file after pre-training
 save_checkpoint_epochs: 5
 keep_checkpoint_max: 10
 ```
@@ -137,18 +146,28 @@ keep_checkpoint_max: 10
 
 [MINDRCORD_PATH] in script should be consistent with 'mindrecord_dir' in config file.
 
-### Distributed Training
+### Pre-training
 
-- Run distributed train in ascend processor environment
+The folder Res50V1_PRE contains the scripts for pre-training and its dataset is [image net](https://image-net.org/). More details in [GENet_Res50](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/cv/GENet_Res50)
+
+- Usage:
 
 ```shell
-    bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] [PROJECT_PATH]
+    bash run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH]
 ```
 
 - Notes:
 
 The hccl.json file specified by [RANK_TABLE_FILE] is used when running distributed tasks. You can use [hccl_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools) to generate this file.
 
+### Distributed Training
+
+- Run distributed train in ascend processor environment
+
+```shell
+    bash scripts/run_distribute_train8p.sh [RANK_TABLE_FILE] [PROJECT_PATH]
+```
+
 ### Training Result
 
 The training results will be saved in the example path, The folder name starts with "ICNet-".You can find the checkpoint file and similar results below in LOG(0-7)/log.txt.
@@ -174,7 +193,7 @@ epoch time: 97117.785 ms, per step time: 1044.277 ms
 Check the checkpoint path used for evaluation before running the following command.
 
 ```shell
-    bash run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] [PROJECT_PATH]
+    bash run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] [PROJECT_PATH] [DEVICE_ID]
 ```
 
 ### Evaluation Result
@@ -196,7 +215,7 @@ avgtime 0.19648232793807982
     bash run_infer_310.sh [The path of the MINDIR for 310 infer] [The path of the dataset for 310 infer]  0
 ```
 
-Note:: Before executing 310 infer, create the MINDIR/AIR model using "python export.py --ckpt-file [The path of the CKPT for exporting]".
+- Note: Before executing 310 infer, create the MINDIR/AIR model using "python export.py --ckpt-file [The path of the CKPT for exporting]".
 
 # [Model Description](#Content)
 
@@ -204,7 +223,7 @@ Note:: Before executing 310 infer, create the MINDIR/AIR model using "python exp
 
 ### Training Performance
 
-|Parameter              | MaskRCNN                                                   |
+|Parameter              | ICNet                                                   |
 | ------------------- | --------------------------------------------------------- |
 |resources              | Ascend 910；CPU 2.60GHz, 192core；memory：755G |
 |Upload date            |2021.6.1                    |
diff --git a/model_zoo/research/cv/ICNet/eval.py b/model_zoo/research/cv/ICNet/eval.py
index bccbb3ed434..e2ab20fac6e 100644
--- a/model_zoo/research/cv/ICNet/eval.py
+++ b/model_zoo/research/cv/ICNet/eval.py
@@ -74,7 +74,6 @@ class Evaluator:
             mask = self._mask_transform(mask)  # mask shape: (H,w)
 
             image = Tensor(image)
-            print(image)
 
             expand_dims = ops.ExpandDims()
             image = expand_dims(image, 0)
@@ -84,8 +83,8 @@ class Evaluator:
             end_time = time.time()
             step_time = end_time - start_time
 
-            expand_dims = ops.ExpandDims()
-            mask = expand_dims(mask, 0)
+            output = np.array(output)
+            mask = np.expand_dims(mask, axis=0)
             self.metric.update(output, mask)
             list_time.append(step_time)
 
diff --git a/model_zoo/research/cv/ICNet/scripts/run_eval.sh b/model_zoo/research/cv/ICNet/scripts/run_eval.sh
index 74495640f9a..396d49719d2 100644
--- a/model_zoo/research/cv/ICNet/scripts/run_eval.sh
+++ b/model_zoo/research/cv/ICNet/scripts/run_eval.sh
@@ -14,9 +14,9 @@
 # limitations under the License.
 # ============================================================================
 
-if [ $# != 3 ]
+if [ $# != 4 ]
 then
-    echo "Usage: bash run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] [PROJECT_PATH]"
+    echo "Usage: bash run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] [PROJECT_PATH] [DEVICE_ID]"
 exit 1
 fi
 
@@ -53,7 +53,7 @@ fi
 
 ulimit -u unlimited
 export DEVICE_NUM=1
-export DEVICE_ID=0
+export DEVICE_ID=$4
 export RANK_SIZE=1
 export RANK_ID=0
 
@@ -68,6 +68,6 @@ cp -r ../src ./eval
 cd ./eval || exit
 env > env.log
 echo "start evaluation for device $DEVICE_ID"
-python eval.py --dataset_path=$PATH1 --checkpoint_path=$PATH2 --project_path=$PATH3 &> log &
+python eval.py --dataset_path=$PATH1 --checkpoint_path=$PATH2 --project_path=$PATH3 --device=$4 &> log &
 
 cd ..
diff --git a/model_zoo/research/cv/ICNet/src/model_utils/icnet.yaml b/model_zoo/research/cv/ICNet/src/model_utils/icnet.yaml
index 9fc8d38a8a5..649ff114b8d 100644
--- a/model_zoo/research/cv/ICNet/src/model_utils/icnet.yaml
+++ b/model_zoo/research/cv/ICNet/src/model_utils/icnet.yaml
@@ -1,7 +1,7 @@
 ### 1.Model
 model:
   name: "icnet"
-  backbone: "resnet50"
+  backbone: "resnet50v1"
   base_size: 1024    # during augmentation, shorter size will be resized between [base_size*0.5, base_size*2.0]
   crop_size: 960     # end of augmentation, crop to training
 
diff --git a/model_zoo/research/cv/IPT/src/loss.py b/model_zoo/research/cv/IPT/src/loss.py
index 30ae4ea9f85..11a3a986ae9 100644
--- a/model_zoo/research/cv/IPT/src/loss.py
+++ b/model_zoo/research/cv/IPT/src/loss.py
@@ -144,12 +144,9 @@ class IPTTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond, scaling_sens)
-        return F.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, cond, scaling_sens)
 
 
 class SupConLoss(nn.Cell):
diff --git a/model_zoo/research/cv/IPT/src/utils.py b/model_zoo/research/cv/IPT/src/utils.py
index 9928281a0c7..e2d77b0d887 100644
--- a/model_zoo/research/cv/IPT/src/utils.py
+++ b/model_zoo/research/cv/IPT/src/utils.py
@@ -23,7 +23,6 @@ from mindspore.common import dtype as mstype
 from mindspore.context import ParallelMode
 from mindspore.ops import operations as P
 from mindspore.ops import composite as C
-from mindspore.ops import functional as F
 from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
 from mindspore.parallel._utils import _get_parallel_mode
 from mindspore.train.serialization import save_checkpoint
@@ -82,7 +81,8 @@ class MyTrainOneStepCell(nn.Cell):
         grads = self.grad(self.network, weights)(*args, sens)
         if self.reducer_flag:
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
 
 
 def sub_mean(x):
diff --git a/model_zoo/research/cv/LearningToSeeInTheDark/src/myutils.py b/model_zoo/research/cv/LearningToSeeInTheDark/src/myutils.py
index 12f118deb17..428e7ae5819 100644
--- a/model_zoo/research/cv/LearningToSeeInTheDark/src/myutils.py
+++ b/model_zoo/research/cv/LearningToSeeInTheDark/src/myutils.py
@@ -225,11 +225,7 @@ class GNMTTrainOneStepWithLossScaleCell(nn.Cell):
 
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
+        if not overflow:
+            self.optimizer(grads)
         self.loss_scalar("loss", loss)
-        ret = (loss, cond, scaling_sens)
-
-        return F.depend(ret, succ)
+        return (loss, cond, scaling_sens)
diff --git a/model_zoo/research/cv/MaskedFaceRecognition/model/model.py b/model_zoo/research/cv/MaskedFaceRecognition/model/model.py
index df7ec1f42fe..15d38021b9e 100644
--- a/model_zoo/research/cv/MaskedFaceRecognition/model/model.py
+++ b/model_zoo/research/cv/MaskedFaceRecognition/model/model.py
@@ -22,7 +22,6 @@ from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits, L1Loss
 from  mindspore.nn import Momentum
 from mindspore.ops import operations as P
 from mindspore.ops import composite as C
-from mindspore.ops import functional as F
 from mindspore.common.initializer import HeNormal
 from mindspore.common.initializer import Normal
 from mindspore  import Tensor
@@ -382,7 +381,8 @@ class TrainStepWrap(nn.Cell):
         if not self.is_train:
             return loss
         grads = self.grad(self.network, weights)(x, labels1, labels2)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
 
 
 class TestStepWrap(nn.Cell):
diff --git a/model_zoo/research/cv/ProtoNet/README.md b/model_zoo/research/cv/ProtoNet/README.md
index 741c6e3889b..e424789b6a6 100644
--- a/model_zoo/research/cv/ProtoNet/README.md
+++ b/model_zoo/research/cv/ProtoNet/README.md
@@ -29,7 +29,12 @@ Proto-Net contains 2 parts named Encoder and Relation. The former one has 4 conv
 
 Note that you can run the scripts based on the dataset mentioned in original paper or widely used in relevant domain/network architecture. In the following sections, we will introduce how to run the scripts using the related dataset below.
 
-Dataset used: [omniglot](https://github.com/brendenlake/omniglot)
+The dataset omniglot can be obtained from (https://github.com/orobix/Prototypical-Networks-for-Few-shot-Learning-PyTorch/blob/master/). You can obtain the dataset after running the scripts.
+
+```bash
+cd src
+python train.py
+```
 
 - Dataset size 4.02M，32462 28*28 in 1622 classes
     - Train 1,200 classes  
@@ -39,7 +44,7 @@ Dataset used: [omniglot](https://github.com/brendenlake/omniglot)
 
 - The directory structure is as follows:
 
-```text
+```shell
 └─Data
     ├─raw
     ├─spilts
@@ -67,13 +72,13 @@ Dataset used: [omniglot](https://github.com/brendenlake/omniglot)
 
 After installing MindSpore via the official website, you can start training and evaluation as follows:
 
-```shell
-# enter script dir, train ProtoNet in standalone
-sh run_standalone_train_ascend.sh dataset 1 20 20
-# enter script dir, train ProtoNet in distribution
-sh run_distribution_ascend.sh dataset rank_table dataset 20
+```python
+# enter script dir, train ProtoNet
+sh run_standalone_train_ascend.sh "../dataset" 1 60 500
 # enter script dir, evaluate ProtoNet
-sh run_standalone_eval_ascend.sh dataset best.ckpt 1 20
+sh run_standalone_eval_ascend.sh "../dataset" "./output/best_ck.ckpt" 1 5
+# enter script dir, train ProtoNet distributed
+sh run_distribution_ascend.sh "./rank_table.json" "../dataset" 60 500
 ```
 
 ## [Script and Sample Code](#contents)
@@ -120,8 +125,7 @@ Major parameters in train.py and config.py as follows:
 ### Training
 
 ```bash
-# enter script dir, train ProtoNet in standalone
-sh run_standalone_train_ascend.sh dataset 1 20 20
+sh run_standalone_train_ascend.sh "../dataset" 1 60 500
 ```
 
 The model checkpoint will be saved in the current directory.
@@ -133,11 +137,11 @@ The model checkpoint will be saved in the current directory.
 Before running the command below, please check the checkpoint path used for evaluation.
 
 ```bash
-# enter script dir, evaluate ProtoNet
-sh run_standalone_eval_ascend.sh dataset best.ckpt 1 20
+sh run_standalone_eval_ascend.sh "../dataset" "./output/best_ck.ckpt" 1 5
 ```
 
-```text
+```shell
+
 Test Acc: 0.9954400658607483  Loss: 0.02102319709956646
 ```
 
@@ -149,9 +153,9 @@ Test Acc: 0.9954400658607483  Loss: 0.02102319709956646
 
 | Parameters                 | ProtoNet                                                   |
 | -------------------------- | ---------------------------------------------------------- |
-| Resource                   | CentOs 8.2; Ascend 910; CPU 2.60GHz; 192cores; Memory 755G             |
+| Resource                   | CentOs 8.2; Ascend 910 ; CPU 2.60GHz，192cores；Memory 755G             |
 | uploaded Date              | 03/26/2021 (month/day/year)                                 |
-| MindSpore Version          | 1.2.0                                                     |
+| MindSpore Version          | 1.1.1                                                      |
 | Dataset                    | OMNIGLOT                                                    |
 | Training Parameters        | episode=500, class_num = 5, lr=0.001, classes_per_it_tr=60, num_support_tr=5, num_query_tr=5, classes_per_it_val=20, num_support_val=5, num_query_val=15         |
 | Optimizer                  | Adam                                                         |
@@ -161,7 +165,7 @@ Test Acc: 0.9954400658607483  Loss: 0.02102319709956646
 | Speed                      | 215 ms/step                          |
 | Total time                 | 3 h 23m (8p)                |
 | Checkpoint for Fine tuning | 440 KB (.ckpt file)                                         |
-| Scripts                    | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/cv/ProtoNet |
+| Scripts                    | https://gitee.com/mindspore/mindspore/tree/r1.1/model_zoo/research/cv/protonet |
 
 # [ModelZoo Homepage](#contents)
 
diff --git a/model_zoo/research/cv/ProtoNet/eval.py b/model_zoo/research/cv/ProtoNet/eval.py
index 27d7cf3daa7..612fa3ae5b0 100644
--- a/model_zoo/research/cv/ProtoNet/eval.py
+++ b/model_zoo/research/cv/ProtoNet/eval.py
@@ -15,14 +15,13 @@
 """
 ProtoNet evaluation script.
 """
-import os
+import numpy as np
 from mindspore import dataset as ds
 from mindspore import load_checkpoint
 import mindspore.context as context
 from src.protonet import ProtoNet
 from src.parser_util import get_parser
 from src.PrototypicalLoss import PrototypicalLoss
-import numpy as np
 from model_init import init_dataloader
 from train import WithLossCell
 
@@ -67,5 +66,5 @@ if __name__ == '__main__':
                                options.classes_per_it_val, is_train=False)
     Net = WithLossCell(Net, loss_fn)
     val_dataloader = init_dataloader(options, 'val', datapath)
-    load_checkpoint(os.path.join(ckptpath, 'best_ck.ckpt'), net=Net)
+    load_checkpoint(ckptpath, net=Net)
     test(val_dataloader, Net)
diff --git a/model_zoo/research/cv/ProtoNet/scripts/run_distribution_ascend.sh b/model_zoo/research/cv/ProtoNet/scripts/run_distribution_ascend.sh
index ce0977ca511..e44f598945a 100644
--- a/model_zoo/research/cv/ProtoNet/scripts/run_distribution_ascend.sh
+++ b/model_zoo/research/cv/ProtoNet/scripts/run_distribution_ascend.sh
@@ -16,7 +16,7 @@
 # an simple tutorial as follows, more parameters can be setting
 if [ $# != 4 ]
 then
-    echo "Usage: sh run_distribution_ascend.sh [RANK_TABLE_FILE] [DATA_PATH] [TRAIN_CLASS]"
+    echo "Usage: sh run_distribution_ascend.sh [RANK_TABLE_FILE] [DATA_PATH] [TRAIN_CLASS] [EPOCHS]"
 exit 1
 fi
 
@@ -33,6 +33,7 @@ RANK_TABLE_FILE=$(realpath $1)
 export RANK_TABLE_FILE
 export DATA_PATH=$2
 export TRAIN_CLASS=$3
+export EPOCHS=$4
 echo "RANK_TABLE_FILE=${RANK_TABLE_FILE}"
 
 export SERVER_ID=0
@@ -43,13 +44,16 @@ do
     export RANK_ID=$((rank_start + i))
     rm -rf ./train_parallel$i
     mkdir ./train_parallel$i
-    cp -r ./src ./train_parallel$i
-    cp ./train.py ./train_parallel$i
+    cp -r ../src ./train_parallel$i
+    cp ../train.py ./train_parallel$i
+    cp ../model_init.py ./train_parallel$i
     echo "start training for rank $RANK_ID, device $DEVICE_ID"
     cd ./train_parallel$i ||exit
     env > env.log
-    python train.py --data_path=$DATA_PATH \
+    python train.py --dataset_root=$DATA_PATH \
                     --device_id=$DEVICE_ID --device_target="Ascend" \
-                    --classes_per_it_tr=$TRAIN_CLASS > log 2>&1 &
+                    --classes_per_it_tr=$TRAIN_CLASS\
+                    --experiment_root=./output\
+                    --epochs=$EPOCHS > log 2>&1 &
     cd ..
 done
diff --git a/model_zoo/research/cv/ProtoNet/src/parser_util.py b/model_zoo/research/cv/ProtoNet/src/parser_util.py
index 906d5385bd7..6aa7d6ffb16 100644
--- a/model_zoo/research/cv/ProtoNet/src/parser_util.py
+++ b/model_zoo/research/cv/ProtoNet/src/parser_util.py
@@ -49,7 +49,7 @@ def get_parser():
     parser.add_argument('-exp', '--experiment_root',
                         type=str,
                         help='root where to store models, losses and accuracies',
-                        default='..' + os.sep + 'output')
+                        default='.' + os.sep + 'output')
 
     parser.add_argument('-nep', '--epochs',
                         type=int,
diff --git a/model_zoo/research/cv/SRGAN/src/trainonestep/train_gan.py b/model_zoo/research/cv/SRGAN/src/trainonestep/train_gan.py
index 6c7b0792742..59cf30efd0c 100644
--- a/model_zoo/research/cv/SRGAN/src/trainonestep/train_gan.py
+++ b/model_zoo/research/cv/SRGAN/src/trainonestep/train_gan.py
@@ -59,7 +59,8 @@ class TrainOneStepD(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads_d = self.grad_reducer(grads_d)
-        return ops.depend(ld, self.optimizer(grads_d))
+        self.optimizer(grads_d)
+        return ld
 
 class TrainOnestepG(nn.Cell):
     """
@@ -103,4 +104,5 @@ class TrainOnestepG(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads_g = self.grad_reducer(grads_g)
-        return ops.depend(lg, self.optimizer(grads_g))
+        self.optimizer(grads_g)
+        return lg
diff --git a/model_zoo/research/cv/SRGAN/src/trainonestep/train_psnr.py b/model_zoo/research/cv/SRGAN/src/trainonestep/train_psnr.py
index e9182b755e8..620ef823124 100644
--- a/model_zoo/research/cv/SRGAN/src/trainonestep/train_psnr.py
+++ b/model_zoo/research/cv/SRGAN/src/trainonestep/train_psnr.py
@@ -59,5 +59,6 @@ class TrainOnestepPSNR(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        return ops.depend(psnr_loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return psnr_loss
     
\ No newline at end of file
diff --git a/model_zoo/research/cv/STGAN/src/models/networks.py b/model_zoo/research/cv/STGAN/src/models/networks.py
index da83c30c7c8..1cbd4cfd5a3 100644
--- a/model_zoo/research/cv/STGAN/src/models/networks.py
+++ b/model_zoo/research/cv/STGAN/src/models/networks.py
@@ -413,7 +413,8 @@ class TrainOneStepGenerator(nn.Cell):
         grads = self.grad(self.network, self.weights)(real_x, c_org, c_trg,
                                                       attr_diff, sens)
         grads = self.grad_reducer(grads)
-        return (ops.depend(loss_G, self.optimizer(grads)), fake_x, loss_G,
+        self.optimizer(grads)
+        return (loss_G, fake_x, loss_G,
                 loss_fake_G, loss_cls_G, loss_rec_G, loss_adv_G)
 
 
@@ -451,5 +452,6 @@ class TrainOneStepDiscriminator(nn.Cell):
         grads = self.grad(self.network, self.weights)(real_x, c_org, c_trg,
                                                       attr_diff, alpha, sens)
         grads = self.grad_reducer(grads)
-        return (ops.depend(loss_D, self.optimizer(grads)), loss_D, loss_real_D,
+        self.optimizer(grads)
+        return (loss_D, loss_D, loss_real_D,
                 loss_fake_D, loss_cls_D, loss_gp_D, loss_adv_D, attr_diff)
diff --git a/model_zoo/research/cv/SiamFC/readme.md b/model_zoo/research/cv/SiamFC/readme.md
new file mode 100644
index 00000000000..21026f95241
--- /dev/null
+++ b/model_zoo/research/cv/SiamFC/readme.md
@@ -0,0 +1,195 @@
+# Contents
+
+- [SiamFC Description](#SiamFC-Description)
+- [Model Architecture](#SiamFC-Architecture)
+- [Dataset](#SiamFC-dataset)
+- [Environmental requirements](#Environmental)
+- [Quick Start](#quick-start)
+- [Script Description](#script-description)
+    - [Script and Sample Code](#script-and-sample-code)
+    - [Script Parameters](#script-parameters)
+    - [Training Process](#training-process)
+        - [Training](#training)
+    - [Evaluation Process](#evaluation-process)
+        - [Evaluation](#evaluation)
+- [Model Description](#model-description)
+    - [Performance](#performance)
+        - [Evaluation Performance](#evaluation-performance)
+
+# [SiamFC Description](#Contents)
+
+Siamfc proposes a new full convolution twin network as the basic tracking algorithm, which is trained end-to-end on ilsvrc15 target tracking video data set. Our tracker exceeds the real-time requirement in frame rate. Although it is very simple, it achieves the best performance on multiple benchmarks.
+
+[paper](https://arxiv.org/pdf/1606.09549.pdf)  Luca Bertinetto Jack Valmadre Jo˜ao F. Henriques Andrea Vedaldi Philip H. S. Torr
+Department of Engineering Science, University of Oxford
+
+# [Model Architecture](#Contents)
+
+Siamfc first uses full convolution alexnet for feature extraction online and offline, and uses twin network to train the template and background respectively. On line, after getting the box of the first frame, it carries out centrrop, and then loads checkpoint to track the subsequent frames. In order to find the box, it needs to carry out a series of penalties on the score graph, Finally, the final prediction point is obtained by twice trilinear interpolation.
+
+# [Dataset](#Contents)
+
+used Dataset :[ILSVRC2015-VID](http://bvisionweb1.cs.unc.edu/ilsvrc2015/ILSVRC2015_VID.tar.gz)
+
+- Dataset size : 85GB ,total 30 type
+    - Training set: a total of 3862 videos and their corresponding frame pictures and box positions
+    - Verification set: 555 videos and corresponding pictures and box locations
+    - Test set: a total of 973 videos and corresponding pictures and box locations
+- Data format: the image is in h*w*C format, the box position includes the coordinates of the lower left corner and the upper right corner, the format is XML, and the XML needs to be parsed
+
+# [Environmental requirements](#Contents)
+
+- Hardware :(Ascend)
+    - Prepare ascend processor to build hardware environment
+- frame:
+    - [Mindspore](https://www.mindspore.cn/install)
+- For details, please refer to the following resources:
+    - [MindSpore course](https://www.mindspore.cn/tutorial/training/zh-CN/master/index.html)
+    - [MindSpore Python API](https://www.mindspore.cn/doc/api_python/zh-CN/master/index.html)
+- more API
+    - got10k toolkit
+    - opencv
+    - lmdb
+
+# [quick start](#Contents)
+
+After installing mindspree through the official website, you can follow the following steps to train and evaluate:
+
+- Run the python script to preprocess the data set
+
+  python src/create_dataset_ILSVRC.py -d data_dir -o output_dir
+
+- Run Python script to create LMDB
+
+  python src/create_lmdb.py -d data_dir -o output_dir
+
+  for example：
+  data_dir = '/data/VID/ILSVRC_VID_CURATION_train'
+  output_dir = '/data/VID/ILSVRC_VID_CURATION_train.lmdb'
+
+  __Remarks:The encrypted pathname is used as the index.Therefore,you cannot change the location of the dataset
+  after creating it, because you need to find the corresponding image according to the index.__
+
+- Run the script for training
+
+  bash run_standalone_train_ascend.sh [Device_ID] [Dataset_path]
+  Remarks:For the training set position after preprocessing
+
+- more
+
+  This example is single card training.
+
+- Run the script for evaluation
+
+  python eval.py,need got10k toolkit,the dataset is OTB2013(50) or OTB2015(100)
+
+# [Script description](#Contents)
+
+## Script and sample code
+
+```python
+    ├── SiamFC
+        ├── README.md                    // Notes on siamfc
+        ├── scripts
+        │   ├──ma-pre-start.sh          // Create environment before modelarts training
+        │   ├──run_standalone_train_ascend.sh             // Single card training in ascend
+        │   ├──run_distribution_ascend.sh          // Multi card distributed training in ascend
+        ├── src
+        │   ├──alexnet.py             // Create dataset
+        │   ├──config.py              // Alexnet architecture
+        │   ├──custom_transforms.py   //Data set processing
+        │   ├──dataset.py            //GeneratorDataset
+        │   ├──Groupconv.py        //Mindpore does not support group convolution at present. This is an alternative
+        │   ├──lr_generator.py       //Dynamic learning rate
+        │   ├──tracker.py           //Trace script
+        │   ├──utils.py             // utils
+        │   ├──create_dataset_ILSVRC.py     // Create dataset
+        │   ├──create_lmdb.py               //Create LMDB
+        ├── train.py               // Training script
+        ├── eval.py               //  Evaluation script
+```
+
+## Script parameters
+
+python train.py and config.py The main parameters are as follows:
+
+- data_path：An absolutely complete path to training and evaluation data sets.
+- epoch_size：Total training rounds
+- batch_size：Training batch size.
+- image_height：The image height is used as the model input.
+- image_width：The image width is used as the model input.
+- exemplar_size：Template size
+- instance_size：Sample size.
+- lr：Learning rate.
+- frame_range：Select the frame interval of the template and sample.
+- response_scale：Scaling factor of score chart.
+
+## Training process
+
+### Training
+
+- Running in ascend processor environment
+
+```python
+  python train.py  --device_id=${DEVICE_ID} --data_path=${DATASET_PATH}
+```
+
+- After training, the loss value is as follows:
+
+```bash
+  grep "loss is " log
+  epoch: 1 step: 1, loss is 1.14123213
+  ...
+  epoch: 1 step: 1536, loss is 0.5234123
+  epoch: 1 step: 1537, loss is 0.4523326
+  epoch: 1 step: 1538, loss is 0.6235748
+ ...
+```
+
+- Model checkpoints are saved in the current directory.
+
+- After training, the loss value is as follows:
+
+```bash
+  grep "loss is " log:
+  epoch: 30 step: 1, loss is 0.12534634
+  ...
+  epoch: 30 step: 1560, loss is 0.2364573
+  epoch: 30 step: 1561, loss is 0.156347
+  epoch: 30 step: 1561, loss is 0.173423
+```
+
+## Evaluation process
+
+Check the checkpoint path used for evaluation before running the following command.
+
+- Running in ascend processor environment
+
+```bash
+  python eval.py  --device_id=${DEVICE_ID} --model_path=${MODEL_PATH}
+```
+
+  The results were as follows:
+
+```bash
+  SiamFC_159_50_6650.ckpt -prec_score:0.777 -succ_score:0.589 _succ_rate:0.754
+```
+
+# [Model description](#Contents)
+
+## performance
+
+### Evaluate performance
+
+|parameter   | Ascend        |
+| -------------------------- | ---------------------------------------------- |
+|resources     | Ascend 910；CPU 2.60GHz, 192core；memory：755G |
+|Upload date   |2021.5.20         |
+|mindspore version   |mindspore1.2.0     |
+|training parameter | epoch=50,step=6650,batch_size=8,lr_init=1e-2,lr_endl=1e-5   |
+|optimizer     |SGD optimizer，momentum=0.0,weight_decay=0.0    |
+|loss function     |BCEWithLogits   |
+|training speed    | epoch time：285693.557 ms per step time :42.961 ms |
+|total time        |about 5 hours    |
+|Script URL        |https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/cv/SiamFC  |
+|Random number seed         |set_seed = 1234     |
diff --git a/model_zoo/research/cv/advanced_east/src/model.py b/model_zoo/research/cv/advanced_east/src/model.py
index 532ec8d8cba..29f78eb3cce 100644
--- a/model_zoo/research/cv/advanced_east/src/model.py
+++ b/model_zoo/research/cv/advanced_east/src/model.py
@@ -19,7 +19,6 @@ import mindspore
 import mindspore.nn as nn
 from mindspore.ops import operations as P
 from mindspore.ops import composite as C
-from mindspore.ops import functional as F
 from mindspore.ops import ResizeNearestNeighbor
 from mindspore import Tensor, ParameterTuple, Parameter
 from mindspore.common.initializer import initializer, TruncatedNormal
@@ -410,7 +409,8 @@ class TrainStepWrap(nn.Cell):
         loss = self.network(image, label)
         sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
         grads = self.grad(self.network, weights)(image, label, sens)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
 
 
 def get_AdvancedEast_net(args):
diff --git a/model_zoo/research/cv/arcface/README_CN.md b/model_zoo/research/cv/arcface/README_CN.md
index 25d07b67638..f08a44a5d21 100644
--- a/model_zoo/research/cv/arcface/README_CN.md
+++ b/model_zoo/research/cv/arcface/README_CN.md
@@ -55,13 +55,13 @@
 
 ```python
 # 分布式训练运行示例
-sh scripts/run_distribute_train.sh /path/dataset /path/rank_table
+bash scripts/run_distribute_train.sh /path/dataset /path/rank_table
 
 # 单机训练运行示例
-sh scripts/run_standalone_train.sh /path/dataset
+bash scripts/run_standalone_train.sh /path/dataset
 
 # 运行评估示例
-sh scripts/run_eval.sh /path/evalset /path/ckpt
+bash scripts/run_eval.sh /path/evalset /path/ckpt
 ```
 
 ## 脚本说明
@@ -108,7 +108,7 @@ train.py和val.py中主要参数如下：
 ### 分布式训练
 
 ```shell
-sh scripts/run_distribute_train.sh /path/dataset /path/rank_table
+bash scripts/run_distribute_train.sh /path/dataset /path/rank_table
 ```
 
 上述shell脚本将在后台运行分布训练。可以通过`device[X]/train.log`文件查看结果。
@@ -134,7 +134,7 @@ epoch time: 1104929.793 ms, per step time: 97.162 ms
   在运行以下命令之前，请检查用于评估的检查点路径。请将检查点路径设置为绝对全路径，例如“username/arcface/arcface-11372-1.ckpt”。
 
   ```bash
-  sh scripts/run_eval.sh /path/evalset /path/ckpt
+  bash scripts/run_eval.sh /path/evalset /path/ckpt
   ```
 
   上述python命令将在后台运行，您可以通过eval.log文件查看结果。测试数据集的准确性如下：
diff --git a/model_zoo/research/cv/arcface/scripts/run_distribute_train.sh b/model_zoo/research/cv/arcface/scripts/run_distribute_train.sh
index 6c953ab1097..35989366537 100644
--- a/model_zoo/research/cv/arcface/scripts/run_distribute_train.sh
+++ b/model_zoo/research/cv/arcface/scripts/run_distribute_train.sh
@@ -27,13 +27,13 @@ get_real_path(){
     echo "$(realpath -m $PWD/$1)"
   fi
 }
-RANK_SIZE=8
 DATA_PATH=$(get_real_path $1)
 RANK_TABLE=$(get_real_path $2)
 
 EXEC_PATH=$(pwd)
 echo "$EXEC_PATH"
 export RANK_TABLE_FILE=$RANK_TABLE
+export RANK_SIZE=8
 
 for((i=0;i<RANK_SIZE;i++))
 do
diff --git a/model_zoo/research/cv/arcface/train.py b/model_zoo/research/cv/arcface/train.py
index 5729e9bd493..8930bd35ce1 100644
--- a/model_zoo/research/cv/arcface/train.py
+++ b/model_zoo/research/cv/arcface/train.py
@@ -26,6 +26,7 @@ from mindspore.train.model import Model, ParallelMode
 from mindspore import dtype as mstype
 from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor, TimeMonitor
 from mindspore.communication.management import init
+from mindspore.communication import management as MutiDev
 from mindspore.parallel import _cost_model_context as cost_model_context
 from mindspore.parallel import set_algo_parameters
 
@@ -140,22 +141,26 @@ if __name__ == "__main__":
 
     model = Model(train_net, optimizer=optimizer)
 
+    time_cb = TimeMonitor(data_size=train_dataset.get_dataset_size())
+    loss_cb = LossMonitor()
+    cb = [time_cb, loss_cb]
     config_ck = CheckpointConfig(
-        save_checkpoint_steps=60, keep_checkpoint_max=20)
+        save_checkpoint_steps=60, keep_checkpoint_max=5)
     if args.modelarts:
         ckpt_cb = ModelCheckpoint(prefix="ArcFace-", config=config_ck,
                                   directory='/cache/train_output/')
+        cb.append(ckpt_cb)
     else:
-        ckpt_cb = ModelCheckpoint(prefix="ArcFace-", config=config_ck,
-                                  directory=args.train_url)
-    time_cb = TimeMonitor(data_size=train_dataset.get_dataset_size())
-    loss_cb = LossMonitor()
-    cb = [ckpt_cb, time_cb, loss_cb]
-    if args.device_id == 0 or args.device_num == 1:
-        model.train(train_epoch, train_dataset,
-                    callbacks=cb, dataset_sink_mode=True)
-    else:
-        model.train(train_epoch, train_dataset, dataset_sink_mode=True)
+        if args.device_num == 8 and MutiDev.get_rank() % 8 == 0:
+            ckpt_cb = ModelCheckpoint(prefix="ArcFace-", config=config_ck,
+                                      directory=args.train_url)
+            cb.append(ckpt_cb)
+        if args.device_num == 1:
+            ckpt_cb = ModelCheckpoint(prefix="ArcFace-", config=config_ck,
+                                      directory=args.train_url)
+            cb.append(ckpt_cb)
+
+    model.train(train_epoch, train_dataset, callbacks=cb, dataset_sink_mode=True)
     if args.modelarts:
         mox.file.copy_parallel(
             src_url='/cache/train_output', dst_url=args.train_url)
diff --git a/model_zoo/research/cv/centernet/src/centernet_pose.py b/model_zoo/research/cv/centernet/src/centernet_pose.py
index 929f658e481..a9a0322ee52 100644
--- a/model_zoo/research/cv/centernet/src/centernet_pose.py
+++ b/model_zoo/research/cv/centernet/src/centernet_pose.py
@@ -232,9 +232,8 @@ class CenterNetWithoutLossScaleCell(nn.Cell):
         grads = self.grad(self.network, weights)(image, hm, reg_mask, ind, wh, kps,
                                                  kps_mask, reg, hm_hp, hp_offset,
                                                  hp_ind, hp_mask)
-        succ = self.optimizer(grads)
-        ret = loss
-        return ops.depend(ret, succ)
+        self.optimizer(grads)
+        return loss
 
 
 class CenterNetWithLossScaleCell(nn.Cell):
@@ -309,9 +308,8 @@ class CenterNetWithLossScaleCell(nn.Cell):
         else:
             cond = self.less_equal(self.base, flag_sum)
 
-        succ = self.optimizer(grads)
-        ret = (loss, cond, scaling_sens)
-        return ops.depend(ret, succ)
+        self.optimizer(grads)
+        return (loss, cond, scaling_sens)
 
 class CenterNetMultiPoseEval(nn.Cell):
     """
diff --git a/model_zoo/research/cv/centernet_det/src/centernet_det.py b/model_zoo/research/cv/centernet_det/src/centernet_det.py
index 9ade7aa7418..c8bc5eaade7 100644
--- a/model_zoo/research/cv/centernet_det/src/centernet_det.py
+++ b/model_zoo/research/cv/centernet_det/src/centernet_det.py
@@ -250,9 +250,8 @@ class CenterNetWithoutLossScaleCell(nn.Cell):
         weights = self.weights
         loss = self.network(image, hm, reg_mask, ind, wh, reg)
         grads = self.grad(self.network, weights)(image, hm, reg_mask, ind, wh, reg)
-        succ = self.optimizer(grads)
-        ret = loss
-        return ops.depend(ret, succ)
+        self.optimizer(grads)
+        return loss
 
 
 class CenterNetWithLossScaleCell(nn.Cell):
@@ -320,12 +319,9 @@ class CenterNetWithLossScaleCell(nn.Cell):
         else:
             cond = self.less_equal(self.base, flag_sum)
         overflow = cond
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond, scaling_sens)
-        return ops.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, cond, scaling_sens)
 
 
 class CenterNetDetEval(nn.Cell):
diff --git a/model_zoo/research/cv/centernet_resnet50_v1/src/centernet_det.py b/model_zoo/research/cv/centernet_resnet50_v1/src/centernet_det.py
index cf762a10b2c..8425faeeb74 100644
--- a/model_zoo/research/cv/centernet_resnet50_v1/src/centernet_det.py
+++ b/model_zoo/research/cv/centernet_resnet50_v1/src/centernet_det.py
@@ -208,9 +208,8 @@ class CenterNetWithoutLossScaleCell(nn.Cell):
         weights = self.weights
         loss = self.network(image, hm, reg_mask, ind, wh, reg)
         grads = self.grad(self.network, weights)(image, hm, reg_mask, ind, wh, reg)
-        succ = self.optimizer(grads)
-        ret = loss
-        return ops.depend(ret, succ)
+        self.optimizer(grads)
+        return loss
 
 
 class CenterNetWithLossScaleCell(nn.Cell):
@@ -279,12 +278,9 @@ class CenterNetWithLossScaleCell(nn.Cell):
         else:
             cond = self.less_equal(self.base, flag_sum)
         overflow = cond
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond, scaling_sens)
-        return ops.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, cond, scaling_sens)
 
 
 class CenterNetDetEval(nn.Cell):
diff --git a/model_zoo/research/cv/dem/src/demnet.py b/model_zoo/research/cv/dem/src/demnet.py
index 3ea6da1b37c..84c3d4ead06 100644
--- a/model_zoo/research/cv/dem/src/demnet.py
+++ b/model_zoo/research/cv/dem/src/demnet.py
@@ -125,4 +125,5 @@ class MyTrainOneStepCell(nn.Cell):
         grads = self.grad(self.network, weights)(*inputs, sens)
         grads = self.grad_reducer(grads)
         grads = ops.clip_by_global_norm(grads, 0.2)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
diff --git a/model_zoo/research/cv/glore_res200/README_CN.md b/model_zoo/research/cv/glore_res200/README_CN.md
index 7a840691dbb..d8a55b640cf 100644
--- a/model_zoo/research/cv/glore_res200/README_CN.md
+++ b/model_zoo/research/cv/glore_res200/README_CN.md
@@ -93,7 +93,7 @@
 
 ```python
 # 分布式训练
-用法:bash run_distribute_train.sh [DATASET_PATH] [RANK_SIZE]
+用法:bash run_distribute_train.sh [DATASET_PATH] [RANK_TABLE]
 
 # 单机训练
 用法:bash run_standalone_train.sh [DATASET_PATH] [DEVICE_ID]
@@ -155,7 +155,7 @@
 
 ```text
 "class_num":1000,                # 数据集类数
-"batch_size":128,                # 输入张量的批次大小
+"batch_size":80,                 # 输入张量的批次大小
 "loss_scale":1024,               # 损失等级
 "momentum":0.08,                 # 动量优化器
 "weight_decay":0.0002,           # 权重衰减
@@ -203,7 +203,7 @@
 
 ```text
 # 分布式训练
-用法:bash run_distribute_train.sh [DATASET_PATH] [RANK_SIZE]
+用法:bash run_distribute_train.sh [DATASET_PATH] [RANK_TABLE]
 
 # 单机训练
 用法:bash run_standalone_train.sh [DATASET_PATH] [DEVICE_ID]
@@ -292,15 +292,15 @@ result:{'top_1 acc':0.802303685897436}
 | 模型版本              | Glore_resnet200                             |Glore_resnet200                     |
 | 资源                   | Ascend 910；CPU：2.60GHz，192核；内存：2048G |GPU-V100(SXM2)                     |
 | 上传日期              | 2021-03-34                                   |2021-05-25                         |
-| MindSpore版本          | 1.1.1                                   |1.2.0                          |
+| MindSpore版本          | 1.3.0                                   |1.2.0                          |
 | 数据集                    | ImageNet2012                             | ImageNet2012                      |
-| 训练参数        | epoch=150, steps per epoch=1251, batch_size = 128  |epoch=150, steps per epoch=2502, batch_size = 64 |
+| 训练参数        | epoch=150, steps per epoch=2001, batch_size = 80  |epoch=150, steps per epoch=2502, batch_size = 64 |
 | 优化器                  | NAG                                        | NAG                                           |
 | 损失函数              | SoftmaxCrossEntropyExpand                    |SoftmaxCrossEntropyExpand          |
 | 输出                    | 概率                                       |概率                               |
-| 损失                       |0.7068262                                |0.55614954                        |
-| 速度                      | 630.343毫秒/步（8卡）                     |912.211 毫秒/步（8卡）             |
-| 总时长                 | 33时45分钟                                   |94时08分                          |
+| 损失                       |0.8068262                                |0.55614954                        |
+| 速度                      | 400.343毫秒/步（8卡）                     |912.211 毫秒/步（8卡）             |
+| 总时长                 | 33时35分钟                                   |94时08分                          |
 | 参数(M)             | 70.6                                           |70.6
 | 微调检查点| 807.57M（.ckpt文件）                                      |808.28(.ckpt)
 | 脚本                    | [链接](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/cv/glore_res200) |
@@ -314,9 +314,9 @@ result:{'top_1 acc':0.802303685897436}
 | 模型版本       | Glore_resnet200              |  Glore_resnet200           |
 | 资源            | Ascend 910                |   GPU                       |
 | 上传日期       | 2021-3-24                  |2021-05-25                    |
-| MindSpore版本   | 1.1.1                 |1.2.0                    |
-| 数据集             | 12万张图像              |12万张图像                   |
-| batch_size          | 128                   |64                          |
+| MindSpore版本   | 1.3.0                 |1.2.0                    |
+| 数据集             | 120万张图像              |120万张图像                   |
+| batch_size          | 80                   |64                          |
 | 输出             | 概率                     |概率                         |
 | 准确性            | 8卡: 80.23%             |8卡：80.603%                 |
 
diff --git a/model_zoo/research/cv/glore_res200/scripts/run_distribute_train.sh b/model_zoo/research/cv/glore_res200/scripts/run_distribute_train.sh
index d920cd1820c..8468b34efc6 100644
--- a/model_zoo/research/cv/glore_res200/scripts/run_distribute_train.sh
+++ b/model_zoo/research/cv/glore_res200/scripts/run_distribute_train.sh
@@ -17,35 +17,28 @@
 echo "=============================================================================================================="
 echo "Please run the script as: "
 echo "bash run_distribute_train.sh DATA_PATH RANK_SIZE"
-echo "For example: bash run_distribute_train.sh /path/dataset 8"
+echo "For example: bash run_distribute_train.sh /path/dataset /path/rank_table"
 echo "It is better to use the absolute path."
 echo "=============================================================================================================="
 set -e
-DATA_PATH=$1
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+DATA_PATH=$(get_real_path $1)
 export DATA_PATH=${DATA_PATH}
-RANK_SIZE=$2
-
-EXEC_PATH=$(pwd)
+RANK_TABLE=$(get_real_path $2)
+export RANK_TABLE_FILE=${RANK_TABLE}
+export RANK_SIZE=8
 
 echo "$EXEC_PATH"
 
-test_dist_8pcs()
-{
-    export RANK_TABLE_FILE=${EXEC_PATH}/rank_table_8pcs.json
-    export RANK_SIZE=8
-}
-
-test_dist_2pcs()
-{
-    export RANK_TABLE_FILE=${EXEC_PATH}/rank_table_2pcs.json
-    export RANK_SIZE=2
-}
-
-test_dist_${RANK_SIZE}pcs
-
 export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
 
-for((i=1;i<${RANK_SIZE};i++))
+for((i=1;i<8;i++))
 do
     rm -rf device$i
     mkdir device$i
@@ -75,7 +68,7 @@ export DEVICE_ID=0
 export RANK_ID=0
 echo "start training for device 0"
 env > env0.log
-python3 train.py --data_url $1 --isModelArts False --run_distribute True > train0.log 2>&1
+python3 train.py --data_url $1 --isModelArts False --run_distribute True > train0.log 2>&1 &
 
 if [ $? -eq 0 ];then
     echo "training success"
diff --git a/model_zoo/research/cv/glore_res200/src/config.py b/model_zoo/research/cv/glore_res200/src/config.py
index ce2fe8bc249..88def1bdaa5 100644
--- a/model_zoo/research/cv/glore_res200/src/config.py
+++ b/model_zoo/research/cv/glore_res200/src/config.py
@@ -18,7 +18,7 @@ network config setting, will be used in train.py
 from easydict import EasyDict
 config1 = EasyDict({
     "class_num": 1000,
-    "batch_size": 128,
+    "batch_size": 80,
     "loss_scale": 1024,
     "momentum": 0.08,
     "weight_decay": 0.0002,
diff --git a/model_zoo/research/cv/glore_res200/train.py b/model_zoo/research/cv/glore_res200/train.py
index 728b61231f5..513c63274e4 100644
--- a/model_zoo/research/cv/glore_res200/train.py
+++ b/model_zoo/research/cv/glore_res200/train.py
@@ -30,6 +30,7 @@ from mindspore.train.loss_scale_manager import FixedLossScaleManager
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
 from mindspore.communication.management import init, get_group_size, get_rank
 import mindspore.nn as nn
+from mindspore.common import set_seed
 import mindspore.common.initializer as weight_init
 from src.lr_generator import get_lr
 from src.config import config1, config2
@@ -64,6 +65,7 @@ elif args_opt.device_target == "GPU":
 random.seed(1)
 np.random.seed(1)
 de.config.set_seed(1)
+set_seed(1)
 
 if __name__ == '__main__':
 
diff --git a/model_zoo/research/cv/hardnet/README_CN.md b/model_zoo/research/cv/hardnet/README_CN.md
index b9eb10bdd63..fe2409488c2 100644
--- a/model_zoo/research/cv/hardnet/README_CN.md
+++ b/model_zoo/research/cv/hardnet/README_CN.md
@@ -89,7 +89,7 @@ HarDNet指的是Harmonic DenseNet: A low memory traffic network，其突出的
   # 运行分布式训练示例
   python3 train.py > train.log 2>&1 & --dataset_path /path/dataset --pre_ckpt_path /path/pretrained_path --isModelArts False
   OR
-  bash run_distribute_train.sh /path/dataset /path/pretrain_path 8
+  bash run_distribute_train.sh /path/dataset /path/pretrain_path /path/rank_table
 
   # 运行评估示例
   python3 eval.py > eval.log 2>&1 & --dataset_path /path/dataset --ckpt_path /path/ckpt
@@ -242,7 +242,7 @@ HarDNet指的是Harmonic DenseNet: A low memory traffic network，其突出的
   ```bash
   python3 train.py > train.log 2>&1 & --dataset_path /path/dataset --pre_ckpt_path /path/pretrained_path --isModelArts False
   OR
-  bash run_distribute_train.sh /path/dataset /path/pretrain_path 8
+  bash run_distribute_train.sh /path/dataset /path/pretrain_path /path/rank_table
   ```
 
   上述shell脚本将在后台运行分布训练。您可以通过train_parallel[X]/log文件查看结果。采用以下方式达到损失值：
diff --git a/model_zoo/research/cv/hardnet/scripts/run_distribute_train.sh b/model_zoo/research/cv/hardnet/scripts/run_distribute_train.sh
index a5476ca1787..994d50a457b 100644
--- a/model_zoo/research/cv/hardnet/scripts/run_distribute_train.sh
+++ b/model_zoo/research/cv/hardnet/scripts/run_distribute_train.sh
@@ -16,40 +16,28 @@
 
 echo "=============================================================================================================="
 echo "Please run the script as: "
-echo "bash run_distribute_train.sh DATA_PATH pretrain_path RANK_SIZE"
-echo "For example: bash run_distribute_train.sh /path/dataset /path/pretrain_path 8"
+echo "bash run_distribute_train.sh DATA_PATH pretrain_path RANK_TABLE"
+echo "For example: bash run_distribute_train.sh /path/dataset /path/pretrain_path /path/rank_table"
 echo "It is better to use the absolute path."
 echo "=============================================================================================================="
 set -e
-DATA_PATH=$1
-PRETRAINED_PATH=$2
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+DATA_PATH=$(get_real_path $1)
+PRETRAINED_PATH=$(get_real_path $2)
+RANK_TABLE=$(get_real_path $3)
 export DATA_PATH=${DATA_PATH}
-RANK_SIZE=$3
-
+export RANK_SIZE=8
+export RANK_TABLE_FILE=$RANK_TABLE
 EXEC_PATH=$(pwd)
 
 echo "$EXEC_PATH"
 
-test_dist_8pcs()
-{
-    export RANK_TABLE_FILE=${EXEC_PATH}/rank_table_8pcs.json
-    export RANK_SIZE=8
-}
-
-test_dist_4pcs()
-{
-    export RANK_TABLE_FILE=${EXEC_PATH}/rank_table_4pcs.json
-    export RANK_SIZE=4
-}
-
-test_dist_2pcs()
-{
-    export RANK_TABLE_FILE=${EXEC_PATH}/rank_table_2pcs.json
-    export RANK_SIZE=2
-}
-
-test_dist_${RANK_SIZE}pcs
-
 export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
 
 for((i=1;i<${RANK_SIZE};i++))
@@ -82,7 +70,7 @@ export DEVICE_ID=0
 export RANK_ID=0
 echo "start training for device 0"
 env > env0.log
-nohup python3 -u train.py --dataset_path ${DATA_PATH} --isModelArts False  --distribute True --pre_ckpt_path ${PRETRAINED_PATH} > train0.log 2>&1
+nohup python3 -u train.py --dataset_path ${DATA_PATH} --isModelArts False  --distribute True --pre_ckpt_path ${PRETRAINED_PATH} > train0.log 2>&1 &
 
 if [ $? -eq 0 ];then
     echo "training success"
diff --git a/model_zoo/research/cv/midas/src/midas_net.py b/model_zoo/research/cv/midas/src/midas_net.py
index fe2afed0a08..8df3c229e50 100644
--- a/model_zoo/research/cv/midas/src/midas_net.py
+++ b/model_zoo/research/cv/midas/src/midas_net.py
@@ -22,7 +22,6 @@ from mindspore.ops import operations as P
 from mindspore.ops import composite as C
 from mindspore.ops.operations import Add, Split, Concat
 from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
-from mindspore.ops import functional as F
 from src.custom_op import SEBlock, GroupConv
 from src.blocks_ms import Interpolate, FeatureFusionBlock
 from src.loss import ScaleAndShiftInvariantLoss
@@ -390,4 +389,5 @@ class TrainOneStepCell(nn.Cell):
         if self.reduce_flag:
             grads = self.grad_reducer(grads)
 
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
diff --git a/model_zoo/research/cv/resnext152_64x4d/README.md b/model_zoo/research/cv/resnext152_64x4d/README.md
index f06051c8ba4..1e099732d3f 100644
--- a/model_zoo/research/cv/resnext152_64x4d/README.md
+++ b/model_zoo/research/cv/resnext152_64x4d/README.md
@@ -37,8 +37,8 @@ The overall network architecture of ResNeXt is show below:
 Dataset used: [imagenet](http://www.image-net.org/)
 
 - Dataset size: ~125G, 1.2W colorful images in 1000 classes
-    - Train: 120G, 1.2W images
-    - Test: 5G, 50000 images
+- Train: 120G, 1.2W images
+- Test: 5G, 50000 images
 - Data format: RGB images
 - Note: Data will be processed in src/dataset.py
 
@@ -46,19 +46,19 @@ Dataset used: [imagenet](http://www.image-net.org/)
 
 ## [Mixed Precision](#contents)
 
-The [mixed precision](https://www.mindspore.cn/docs/programming_guide/en/master/enable_mixed_precision.html) training method accelerates the deep learning neural network training process by using both the single-precision and half-precision data formats, and maintains the network precision achieved by the single-precision training at the same time. Mixed precision training can accelerate the computation process, reduce memory usage, and enable a larger model or batch size to be trained on specific hardware.
+The [mixed precision](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/enable_mixed_precision.html) training method accelerates the deep learning neural network training process by using both the single-precision and half-precision data formats, and maintains the network precision achieved by the single-precision training at the same time. Mixed precision training can accelerate the computation process, reduce memory usage, and enable a larger model or batch size to be trained on specific hardware.
 
 For FP16 operators, if the input data type is FP32, the backend of MindSpore will automatically handle it with reduced precision. Users could check the reduced-precision operators by enabling INFO log and then searching ‘reduce precision’.
 
 # [Environment Requirements](#contents)
 
 - Hardware（Ascend）
-    - Prepare hardware environment with Ascend  processor.
+- Prepare hardware environment with Ascend processor. If you want to try Ascend, please send the [application form](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx) to ascend@huawei.com. Once approved, you can get the resources.
 - Framework
-    - [MindSpore](https://www.mindspore.cn/install/en)
+- [MindSpore](https://www.mindspore.cn/install/en)
 - For more information, please check the resources below：
-    - [MindSpore Tutorials](https://www.mindspore.cn/tutorials/en/master/index.html)
-    - [MindSpore Python API](https://www.mindspore.cn/docs/api/en/master/index.html)
+- [MindSpore Tutorials](https://www.mindspore.cn/tutorial/training/en/master/index.html)
+- [MindSpore Python API](https://www.mindspore.cn/doc/api_python/en/master/index.html)
 
 # [Script description](#contents)
 
@@ -145,18 +145,18 @@ or shell script:
 ```script
 Ascend:
     # distribute training example(8p)
-    bash run_distribute_train.sh RANK_TABLE_FILE DATA_PATH
+    sh run_distribute_train.sh RANK_TABLE_FILE DATA_PATH
     # standalone training
-    bash run_standalone_train.sh DEVICE_ID DATA_PATH
+    sh run_standalone_train.sh DEVICE_ID DATA_PATH
 ```
 
 #### Launch
 
 ```bash
 # distributed training example(8p) for Ascend
-bash scripts/run_distribute_train.sh RANK_TABLE_FILE DATA_PATH
+sh scripts/run_distribute_train.sh RANK_TABLE_FILE /dataset/train
 # standalone training example for Ascend
-bash scripts/run_standalone_train.sh DEVICE_ID DATA_PATH
+sh scripts/run_standalone_train.sh 0 /dataset/train
 ```
 
 You can find checkpoint file together with result in log.
@@ -175,7 +175,7 @@ or shell script:
 
 ```script
 # Evaluation
-bash run_eval.sh DEVICE_ID DATA_PATH PRETRAINED_CKPT_PATH PLATFORM
+sh run_eval.sh DEVICE_ID DATA_PATH PRETRAINED_CKPT_PATH PLATFORM
 ```
 
 PLATFORM is Ascend, default is Ascend.
@@ -184,10 +184,10 @@ PLATFORM is Ascend, default is Ascend.
 
 ```bash
 # Evaluation with checkpoint
-bash scripts/run_eval.sh DEVICE_ID PRETRAINED_CKPT_PATH PLATFORM
+sh scripts/run_eval.sh 0 /opt/npu/datasets/classification/val /resnext152_100.ckpt Ascend
 
-# Directly use the script to run
-python eval.py --data_dir ~/imagenet/val/ --platform Ascend --pretrained ~/best_acc_4.ckpt
+#Directly use the script to run
+python eval.py --data_dir /opt/npu/pvc/dataset/storage/imagenet/val/ --platform Ascend --pretrained /root/test/resnext152_64x4d/outputs_demo/best_acc_4.ckpt
 ```
 
 #### Result
@@ -213,31 +213,31 @@ python export.py --device_target [PLATFORM] --ckpt_file [CKPT_PATH] --file_forma
 
 ### Training Performance
 
-| Parameters                 | ResNeXt152                                    |
-| -------------------------- | --------------------------------------------- |
-| Resource                   | Ascend 910, cpu:2.60GHz 192cores, memory:755G |
-| uploaded Date              | 06/30/2021                                    |
-| MindSpore Version          | 1.2                                           |
-| Dataset                    | ImageNet                                      |
-| Training Parameters        | src/config.py                                 |
-| Optimizer                  | Momentum                                      |
-| Loss Function              | SoftmaxCrossEntropy                           |
-| Loss                       | 1.28923                                       |
-| Accuracy                   | 80.08%(TOP1)                                  |
-| Total time                 | 7.8 h 8ps                                     |
-| Checkpoint for Fine tuning | 192 M(.ckpt file)                             |
+| Parameters                 | ResNeXt152                                    |      |
+| -------------------------- | --------------------------------------------- | ---- |
+| Resource                   | Ascend 910, cpu:2.60GHz 192cores, memory:755G |      |
+| uploaded Date              | 06/30/2021                                    |      |
+| MindSpore Version          | 1.2                                           |      |
+| Dataset                    | ImageNet                                      |      |
+| Training Parameters        | src/config.py                                 |      |
+| Optimizer                  | Momentum                                      |      |
+| Loss Function              | SoftmaxCrossEntropy                           |      |
+| Loss                       | 1.28923                                       |      |
+| Accuracy                   | 80.08%(TOP1)                                  |      |
+| Total time                 | 7.8 h 8ps                                     |      |
+| Checkpoint for Fine tuning | 192 M(.ckpt file)                             |      |
 
 #### Inference Performance
 
-| Parameters        |                  |
-| ----------------- | ---------------- |
-| Resource          | Ascend 910       |
-| uploaded Date     | 06/20/2021       |
-| MindSpore Version | 1.2              |
-| Dataset           | ImageNet, 1.2W   |
-| batch_size        | 1                |
-| outputs           | probability      |
-| Accuracy          | acc=80.08%(TOP1) |
+| Parameters        |      |      |                  |
+| ----------------- | ---- | ---- | ---------------- |
+| Resource          |      |      | Ascend 910       |
+| uploaded Date     |      |      | 06/20/2021       |
+| MindSpore Version |      |      | 1.2              |
+| Dataset           |      |      | ImageNet, 1.2W   |
+| batch_size        |      |      | 1                |
+| outputs           |      |      | probability      |
+| Accuracy          |      |      | acc=80.08%(TOP1) |
 
 # [Description of Random Situation](#contents)
 
diff --git a/model_zoo/research/cv/resnext152_64x4d/README_CN.md b/model_zoo/research/cv/resnext152_64x4d/README_CN.md
index b0ee1604e6e..28fe5d76433 100644
--- a/model_zoo/research/cv/resnext152_64x4d/README_CN.md
+++ b/model_zoo/research/cv/resnext152_64x4d/README_CN.md
@@ -51,19 +51,19 @@ ResNeXt整体网络架构如下：
 
 ## 混合精度
 
-采用[混合精度](https://www.mindspore.cn/docs/programming_guide/zh-CN/master/enable_mixed_precision.html)的训练方法使用支持单精度和半精度数据来提高深度学习神经网络的训练速度，同时保持单精度训练所能达到的网络精度。混合精度训练提高计算速度、减少内存使用的同时，支持在特定硬件上训练更大的模型或实现更大批次的训练。
+采用[混合精度](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/enable_mixed_precision.html)的训练方法使用支持单精度和半精度数据来提高深度学习神经网络的训练速度，同时保持单精度训练所能达到的网络精度。混合精度训练提高计算速度、减少内存使用的同时，支持在特定硬件上训练更大的模型或实现更大批次的训练。
 
 以FP16算子为例，如果输入数据类型为FP32，MindSpore后台会自动降低精度来处理数据。用户可打开INFO日志，搜索“reduce precision”查看精度降低的算子。
 
 # 环境要求
 
 - 硬件（Ascend）
-    - 使用Ascend处理器来搭建硬件环境。
+    - 准备Ascend处理器搭建硬件环境。如需试用昇腾处理器，请发送[申请表](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx)至ascend@huawei.com，审核通过即可获得资源。
 - 框架
     - [MindSpore](https://www.mindspore.cn/install)
 - 如需查看详情，请参见如下资源：
-    - [MindSpore教程](https://www.mindspore.cn/tutorials/zh-CN/master/index.html)
-    - [MindSpore Python API](https://www.mindspore.cn/docs/api/zh-CN/master/index.html)
+    - [MindSpore教程](https://www.mindspore.cn/tutorial/training/zh-CN/master/index.html)
+    - [MindSpore Python API](https://www.mindspore.cn/doc/api_python/zh-CN/master/index.html)
 
 # 脚本说明
 
@@ -149,18 +149,18 @@ python train.py --data_dir ~/imagenet/train/ --platform Ascend --is_distributed
 ```shell
 Ascend:
     # 分布式训练示例（8卡）
-    bash run_distribute_train.sh RANK_TABLE_FILE DATA_PATH
+    sh run_distribute_train.sh RANK_TABLE_FILE DATA_PATH
     # 单机训练
-    bash run_standalone_train.sh DEVICE_ID DATA_PATH
+    sh run_standalone_train.sh DEVICE_ID DATA_PATH
 ```
 
 ### 样例
 
 ```shell
 # Ascend分布式训练示例（8卡）
-bash scripts/run_distribute_train.sh RANK_TABLE_FILE DATA_PATH
+sh scripts/run_distribute_train.sh RANK_TABLE_FILE /dataset/train
 # Ascend单机训练示例
-bash scripts/run_standalone_train.sh DEVICE_ID DATA_PATH
+sh scripts/run_standalone_train.sh 0 /dataset/train
 ```
 
 您可以在日志中找到检查点文件和结果。
@@ -179,7 +179,7 @@ python eval.py --data_dir ~/imagenet/val/ --platform Ascend --pretrained resnext
 
 ```shell
 # 评估
-bash run_eval.sh DEVICE_ID DATA_PATH PRETRAINED_CKPT_PATH PLATFORM
+sh run_eval.sh DEVICE_ID DATA_PATH PRETRAINED_CKPT_PATH PLATFORM
 ```
 
 PLATFORM is Ascend, default is Ascend.
@@ -188,10 +188,10 @@ PLATFORM is Ascend, default is Ascend.
 
 ```shell
 # 检查点评估
-bash scripts/run_eval.sh DEVICE_ID PRETRAINED_CKPT_PATH PLATFORM
+sh scripts/run_eval.sh 0 /opt/npu/datasets/classification/val /resnext152_100.ckpt Ascend
 
 #或者直接使用脚本运行
-python eval.py --data_dir ~/imagenet/val/ --platform Ascend --pretrained ~/best_acc_0.ckpt
+python eval.py --data_dir /opt/npu/pvc/dataset/storage/imagenet/val/ --platform Ascend --pretrained /root/test/resnext152_64x4d/outputs_demo/best_acc_0.ckpt
 ```
 
 #### 结果
@@ -217,31 +217,31 @@ python export.py --device_target [PLATFORM] --ckpt_file [CKPT_PATH] --file_forma
 
 ### 训练性能
 
-| 参数 | ResNeXt152 |
-| -------------------------- | ---------------------------------------------------------- |
-| 资源                   | Ascend 910；CPU：2.60GHz，192核；内存：755GB              |
-| 上传日期              | 2021-6-30                                          |
-| MindSpore版本          | 1.2                                                    |
-| 数据集 | ImageNet |
-| 训练参数        | src/config.py                                           |
-| 优化器                  | Momentum                                                        |
-| 损失函数             | Softmax交叉熵 |
-| 损失                       | 1.2892 |
-| 准确率 | 80.08%(TOP1)                                          |
-| 总时长                 | 7.8小时 （8卡） |
-| 调优检查点 | 192 M（.ckpt文件） |
+| 参数 | ResNeXt152 | |
+| -------------------------- | ---------------------------------------------------------- | ------------------------- |
+| 资源                   | Ascend 910；CPU：2.60GHz，192核；内存：755GB              |           |
+| 上传日期              | 2021-6-30                                          |       |
+| MindSpore版本          | 1.2                                                    |                      |
+| 数据集 | ImageNet |  |
+| 训练参数        | src/config.py                                           |           |
+| 优化器                  | Momentum                                                        |                  |
+| 损失函数             | Softmax交叉熵 |  |
+| 损失                       | 1.2892 |  |
+| 准确率 | 80.08%(TOP1)                                          |      |
+| 总时长                 | 7.8小时 （8卡） |  |
+| 调优检查点 | 192 M（.ckpt文件） |      |
 
 #### 推理性能
 
-| 参数                 |                      |
-| -------------------------- | -------------------- |
-| 资源                   | Ascend 910          |
-| 上传日期              | 2021-6-20 |
-| MindSpore版本         | 1.2             |
-| 数据集 | ImageNet， 1.2万 |
-| batch_size                 | 1                    |
-| 输出 | 概率 |
-| 准确率 | acc=80.08%(TOP1) |
+| 参数                 |                               |                           |                      |
+| -------------------------- | ----------------------------- | ------------------------- | -------------------- |
+| 资源                   |                     |  | Ascend 910          |
+| 上传日期              |                                            |    | 2021-6-20 |
+| MindSpore版本         |      |                      | 1.2             |
+| 数据集 |      |      | ImageNet， 1.2万 |
+| batch_size                 |      |      | 1                    |
+| 输出 |      |      | 概率 |
+| 准确率 |               |           | acc=80.08%(TOP1) |
 
 # 随机情况说明
 
diff --git a/model_zoo/research/cv/resnext152_64x4d/scripts/run_distribute_train.sh b/model_zoo/research/cv/resnext152_64x4d/scripts/run_distribute_train.sh
index e0b10e8f0b1..2cfc0045d1e 100644
--- a/model_zoo/research/cv/resnext152_64x4d/scripts/run_distribute_train.sh
+++ b/model_zoo/research/cv/resnext152_64x4d/scripts/run_distribute_train.sh
@@ -52,7 +52,6 @@ do
     --is_distribute=1 \
     --device_id=$DEVICE_ID \
     --pretrained=$PATH_CHECKPOINT \
-    --data_dir=$DATA_DIR \
-    --run_eval=False > log_less.txt 2>&1 &
+    --data_dir=$DATA_DIR > log_less.txt 2>&1 &
     cd ../
 done
diff --git a/model_zoo/research/cv/resnext152_64x4d/scripts/run_standalone_train.sh b/model_zoo/research/cv/resnext152_64x4d/scripts/run_standalone_train.sh
index 07cb60cfe6d..6f96801064f 100644
--- a/model_zoo/research/cv/resnext152_64x4d/scripts/run_standalone_train.sh
+++ b/model_zoo/research/cv/resnext152_64x4d/scripts/run_standalone_train.sh
@@ -26,6 +26,5 @@ python train.py  \
     --is_distribute=0 \
     --device_id=$DEVICE_ID \
     --pretrained=$PATH_CHECKPOINT \
-    --data_dir=$DATA_DIR \
-    --run_eval=False > log.txt 2>&1 &
+    --data_dir=$DATA_DIR > log.txt 2>&1 &
 
diff --git a/model_zoo/research/cv/resnext152_64x4d/train.py b/model_zoo/research/cv/resnext152_64x4d/train.py
index 90586184fd6..6e8436e7aef 100644
--- a/model_zoo/research/cv/resnext152_64x4d/train.py
+++ b/model_zoo/research/cv/resnext152_64x4d/train.py
@@ -146,7 +146,7 @@ def parse_args(cloud_args=None):
     #dataset of eval dataset
     parser.add_argument('--eval_data_dir',
                         type=str,
-                        default='',
+                        default='/opt/npu/pvc/dataset/storage/imagenet/val',
                         help='eval data dir')
     parser.add_argument('--eval_per_batch_size',
                         default=32,
@@ -289,6 +289,9 @@ def train(cloud_args=None):
     # checkpoint save
     progress_cb = ProgressMonitor(args)
     callbacks = [progress_cb,]
+    #eval dataset
+    if args.eval_data_dir is None or (not os.path.isdir(args.eval_data_dir)):
+        raise ValueError("{} is not a existing path.".format(args.eval_data_dir))
     #code like eval.py
     #if run eval
     if args.run_eval:
diff --git a/model_zoo/research/cv/retinanet_resnet101/src/retinahead.py b/model_zoo/research/cv/retinanet_resnet101/src/retinahead.py
index b62bc8a6ac1..6b4dff20463 100644
--- a/model_zoo/research/cv/retinanet_resnet101/src/retinahead.py
+++ b/model_zoo/research/cv/retinanet_resnet101/src/retinahead.py
@@ -246,7 +246,8 @@ class TrainingWrapper(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
 
 
 class retinanetInferWithDecoder(nn.Cell):
diff --git a/model_zoo/research/cv/retinanet_resnet152/src/retinahead.py b/model_zoo/research/cv/retinanet_resnet152/src/retinahead.py
index b62bc8a6ac1..6b4dff20463 100644
--- a/model_zoo/research/cv/retinanet_resnet152/src/retinahead.py
+++ b/model_zoo/research/cv/retinanet_resnet152/src/retinahead.py
@@ -246,7 +246,8 @@ class TrainingWrapper(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
 
 
 class retinanetInferWithDecoder(nn.Cell):
diff --git a/model_zoo/research/cv/simple_baselines/README.md b/model_zoo/research/cv/simple_baselines/README.md
index 23f562e2b9d..fe453b8027c 100644
--- a/model_zoo/research/cv/simple_baselines/README.md
+++ b/model_zoo/research/cv/simple_baselines/README.md
@@ -82,13 +82,13 @@ simple_baselines的总体网络架构如下：
 
 ```text
 # 分布式训练
-用法：sh run_distribute_train.sh --is_model_arts False --run_distribute True
+用法：bash run_distribute_train.sh RANK_TABLE
 
 # 单机训练
-用法：sh run_standalone_train.sh --device_id 0 --is_model_arts False --run_distribute False
+用法：bash run_standalone_train.sh DEVICE_ID
 
 # 运行评估示例
-用法：sh run_eval.sh
+用法：bash run_eval.sh
 ```
 
 # 脚本说明
@@ -183,13 +183,13 @@ config.TEST.NMS_THRE = 1.0                                       # nms阈值
 
 ```text
 # 分布式训练
-用法：sh run_distribute_train.sh --is_model_arts False --run_distribute True
+用法：bash run_distribute_train.sh RANK_TABLE
 
 # 单机训练
-用法：sh run_standalone_train.sh --device_id 0 --is_model_arts False --run_distribute False
+用法：bash run_standalone_train.sh DEVICE_ID
 
 # 运行评估示例
-用法：sh run_eval.sh
+用法：bash run_eval.sh
 ```
 
 ### 结果
@@ -219,7 +219,7 @@ epoch:140 step:2340, loss is 0.0003393
 
 ```bash
 # 评估
-sh eval.sh
+bash eval.sh
 ```
 
 ### 结果
diff --git a/model_zoo/research/cv/simple_baselines/scripts/run_distribute_train.sh b/model_zoo/research/cv/simple_baselines/scripts/run_distribute_train.sh
index b568b3d400b..a91edd71221 100644
--- a/model_zoo/research/cv/simple_baselines/scripts/run_distribute_train.sh
+++ b/model_zoo/research/cv/simple_baselines/scripts/run_distribute_train.sh
@@ -16,31 +16,24 @@
 
 echo "========================================================================"
 echo "Please run the script as: "
-echo "bash run.sh RANK_SIZE"
-echo "For example: bash run_distribute.sh 8"
+echo "bash run.sh RANK_TABLE"
+echo "For example: bash run_distribute.sh RANK_TABLE"
 echo "It is better to use the absolute path."
 echo "========================================================================"
 set -e
-
-RANK_SIZE=$1
-export RANK_SIZE
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+RANK_TABLE=$(get_real_path $1)
 
 EXEC_PATH=$(pwd)
 echo "$EXEC_PATH"
-
-test_dist_8pcs()
-{
-    export RANK_TABLE_FILE=${EXEC_PATH}/rank_table_8pcs.json
-    export RANK_SIZE=8
-}
-
-test_dist_2pcs()
-{
-    export RANK_TABLE_FILE=${EXEC_PATH}/rank_table_2pcs.json
-    export RANK_SIZE=2
-}
-
-test_dist_${RANK_SIZE}pcs
+export RANK_TABLE_FILE=$RANK_TABLE
+export RANK_SIZE=8
 
 export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python
 
diff --git a/model_zoo/research/cv/squeezenet1_1/README.md b/model_zoo/research/cv/squeezenet1_1/README.md
index 74e614f7847..beba897264d 100644
--- a/model_zoo/research/cv/squeezenet1_1/README.md
+++ b/model_zoo/research/cv/squeezenet1_1/README.md
@@ -149,6 +149,13 @@ For more configuration details, please refer the script `config.py`.
   Usage: sh scripts/run_standalone_train.sh [DEVICE_ID] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional)
   ```
 
+```shell
+# standalone training example
+sh scripts/run_standalone_train.sh 0 /data/imagenet/train
+```
+
+checkpoint can be produced in training process and be saved in the folder ./train/ckpt_squeezenet.
+
 For distributed training, a hccl configuration file with JSON format needs to be created in advance.
 
 Please follow the instructions in the link [hccl_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools).
@@ -182,11 +189,9 @@ Usage: sh scripts/run_eval.sh [DEVICE_ID] [DATASET_PATH] [CHECKPOINT_PATH]
 
 ```shell
 # evaluation example
-sh scripts/run_eval.sh 0 ~/data/imagenet/train ckpt_squeezenet/squeezenet_imagenet-200_40036.ckpt
+sh scripts/run_eval.sh 0 /data/imagenet/val ./train/ckpt_squeezenet/squeezenet_imagenet-200_40036.ckpt
 ```
 
-checkpoint can be produced in training process.
-
 ### Result
 
 Evaluation result will be stored in the example path, whose folder name is "eval". Under this, you can find result like the followings in log.
diff --git a/model_zoo/research/cv/squeezenet1_1/eval.py b/model_zoo/research/cv/squeezenet1_1/eval.py
index 755f0dbe284..2ff0adcdb0f 100644
--- a/model_zoo/research/cv/squeezenet1_1/eval.py
+++ b/model_zoo/research/cv/squeezenet1_1/eval.py
@@ -25,7 +25,6 @@ from src.CrossEntropySmooth import CrossEntropySmooth
 from src.squeezenet import SqueezeNet as squeezenet
 from src.dataset import create_dataset_imagenet as create_dataset
 from src.config import config
-import moxing as mox
 
 local_data_url = '/cache/data'
 local_ckpt_url = '/cache/ckpt.ckpt'
@@ -33,7 +32,7 @@ local_ckpt_url = '/cache/ckpt.ckpt'
 parser = argparse.ArgumentParser(description='Image classification')
 parser.add_argument('--dataset', type=str, default='imagenet', help='Dataset.')
 parser.add_argument('--net', type=str, default='squeezenet', help='Model.')
-parser.add_argument('--run_cloudbrain', type=ast.literal_eval, default=True,
+parser.add_argument('--run_cloudbrain', type=ast.literal_eval, default=False,
                     help='Whether it is running on CloudBrain platform.')
 parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path')
 parser.add_argument('--dataset_path', type=str, default='', help='Dataset path')
@@ -60,6 +59,7 @@ if __name__ == '__main__':
 
     # create dataset
     if args_opt.run_cloudbrain:
+        import moxing as mox
         mox.file.copy_parallel(args_opt.checkpoint_path, local_ckpt_url)
         mox.file.copy_parallel(args_opt.data_url, local_data_url)
         dataset = create_dataset(dataset_path=local_data_url,
@@ -81,7 +81,10 @@ if __name__ == '__main__':
     net = squeezenet(num_classes=config.class_num)
 
     # load checkpoint
-    param_dict = load_checkpoint(local_ckpt_url)
+    if args_opt.run_cloudbrain:
+        param_dict = load_checkpoint(local_ckpt_url)
+    else:
+        param_dict = load_checkpoint(args_opt.checkpoint_path)
     load_param_into_net(net, param_dict)
     net.set_train(False)
 
diff --git a/model_zoo/research/cv/squeezenet1_1/train.py b/model_zoo/research/cv/squeezenet1_1/train.py
index fd01d4441d0..bf4a3f29b3f 100644
--- a/model_zoo/research/cv/squeezenet1_1/train.py
+++ b/model_zoo/research/cv/squeezenet1_1/train.py
@@ -37,9 +37,9 @@ from src.dataset import create_dataset_imagenet as create_dataset
 parser = argparse.ArgumentParser(description='SqueezeNet1_1')
 parser.add_argument('--net', type=str, default='squeezenet', help='Model.')
 parser.add_argument('--dataset', type=str, default='imagenet', help='Dataset.')
-parser.add_argument('--run_cloudbrain', type=ast.literal_eval, default=True,
+parser.add_argument('--run_cloudbrain', type=ast.literal_eval, default=False,
                     help='Whether it is running on CloudBrain platform.')
-parser.add_argument('--run_distribute', type=bool, default=True, help='Run distribute')
+parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute')
 parser.add_argument('--device_num', type=int, default=1, help='Device num.')
 parser.add_argument('--dataset_path', type=str, default='', help='Dataset path')
 parser.add_argument('--device_target', type=str, default='Ascend', help='Device target')
diff --git a/model_zoo/research/cv/ssd_ghostnet/src/ssd_ghostnet.py b/model_zoo/research/cv/ssd_ghostnet/src/ssd_ghostnet.py
index c4c04105dd1..a57fcafb2d6 100644
--- a/model_zoo/research/cv/ssd_ghostnet/src/ssd_ghostnet.py
+++ b/model_zoo/research/cv/ssd_ghostnet/src/ssd_ghostnet.py
@@ -591,7 +591,8 @@ class TrainingWrapper(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
 
 
 class SSDWithGhostNet(nn.Cell):
diff --git a/model_zoo/research/cv/ssd_mobilenetV2/src/ssd.py b/model_zoo/research/cv/ssd_mobilenetV2/src/ssd.py
index 7671660cbf3..ff5dfdfd9ef 100644
--- a/model_zoo/research/cv/ssd_mobilenetV2/src/ssd.py
+++ b/model_zoo/research/cv/ssd_mobilenetV2/src/ssd.py
@@ -388,7 +388,8 @@ class TrainingWrapper(nn.Cell):
         if self.use_global_norm:
             grads = self.hyper_map(F.partial(grad_scale, F.scalar_to_array(self.sens)), grads)
             grads = C.clip_by_global_norm(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
 
 
 class SSDWithMobileNetV2(nn.Cell):
diff --git a/model_zoo/research/cv/ssd_mobilenetV2_FPNlite/src/ssd.py b/model_zoo/research/cv/ssd_mobilenetV2_FPNlite/src/ssd.py
index 15191e29c11..c9df5eb3c54 100644
--- a/model_zoo/research/cv/ssd_mobilenetV2_FPNlite/src/ssd.py
+++ b/model_zoo/research/cv/ssd_mobilenetV2_FPNlite/src/ssd.py
@@ -296,7 +296,8 @@ class TrainingWrapper(nn.Cell):
         if self.use_global_norm:
             grads = self.hyper_map(F.partial(grad_scale, F.scalar_to_array(self.sens)), grads)
             grads = C.clip_by_global_norm(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
 
 
 
diff --git a/model_zoo/research/cv/ssd_resnet50/src/ssd.py b/model_zoo/research/cv/ssd_resnet50/src/ssd.py
index 7edccbaf659..7ec90034385 100644
--- a/model_zoo/research/cv/ssd_resnet50/src/ssd.py
+++ b/model_zoo/research/cv/ssd_resnet50/src/ssd.py
@@ -457,7 +457,8 @@ class TrainingWrapper(nn.Cell):
         if self.use_global_norm:
             grads = self.hyper_map(F.partial(grad_scale, F.scalar_to_array(self.sens)), grads)
             grads = C.clip_by_global_norm(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
 
 class SsdInferWithDecoder(nn.Cell):
     """
diff --git a/model_zoo/research/cv/wideresnet/README_CN.md b/model_zoo/research/cv/wideresnet/README_CN.md
index 22d00098ec9..5bdbdcb888f 100644
--- a/model_zoo/research/cv/wideresnet/README_CN.md
+++ b/model_zoo/research/cv/wideresnet/README_CN.md
@@ -55,13 +55,15 @@ WideResNet的总体网络架构如下：[链接](https://arxiv.org/abs/1605.0714
 - 下载数据集，目录结构如下：
 
 ```text
-└─cifar-10-batches-bin
+└─train
     ├─data_batch_1.bin                  # 训练数据集
     ├─data_batch_2.bin                  # 训练数据集
     ├─data_batch_3.bin                  # 训练数据集
     ├─data_batch_4.bin                  # 训练数据集
     ├─data_batch_5.bin                  # 训练数据集
     └─test_batch.bin                    # 评估数据集
+└─eval
+    └─test_batch.bin                    # 评估数据集
 ```
 
 # 环境要求
@@ -82,15 +84,23 @@ WideResNet的总体网络架构如下：[链接](https://arxiv.org/abs/1605.0714
 
 ```Shell
 # 分布式训练
-用法：sh run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH]（可选）
+用法：
+cd scripts
+bash run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH] [MODELART]
 
 # 单机训练
-用法：sh run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH]（可选）
+用法：
+cd scripts
+bash run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH] [MODELART]
 
 # 运行评估示例
-用法：sh run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH]
+用法：
+cd scripts
+bash run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] [MODELART]
 ```
 
+若没有[PRETRAINED_CKPT_PATH]，使用 “” 作为参数运行脚本。
+
 # 脚本说明
 
 ## 脚本及样例代码
@@ -149,13 +159,19 @@ WideResNet的总体网络架构如下：[链接](https://arxiv.org/abs/1605.0714
 
 ```Shell
 # 分布式训练
-用法：sh run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH]（可选）
+用法：
+cd scripts
+bash run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH] [MODELART]
 
 # 单机训练
-用法：sh run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH]（可选）
+用法：
+cd scripts
+bash run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH] [MODELART]
 
 ```
 
+若没有[PRETRAINED_CKPT_PATH]，使用 “” 作为参数运行脚本。
+
 分布式训练需要提前创建JSON格式的HCCL配置文件。
 
 具体操作，参见[hccn_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools)中的说明。
@@ -203,12 +219,16 @@ epoch: 4 step: 195, loss is 1.221174
 
 ```Shell
 # 评估
-Usage: sh run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH]
+用法：
+cd scripts
+bash run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] [MODELART]
 ```
 
 ```Shell
 # 评估示例
-sh  run_eval.sh  /cifar10  WideResNet_best.ckpt
+用法：
+cd scripts
+bash run_eval.sh  /cifar10  WideResNet_best.ckpt
 ```
 
 训练过程中可以生成检查点。
diff --git a/model_zoo/research/hpc/sponge/main.py b/model_zoo/research/hpc/sponge/main.py
index 9f37635f6c8..503946d8370 100644
--- a/model_zoo/research/hpc/sponge/main.py
+++ b/model_zoo/research/hpc/sponge/main.py
@@ -16,14 +16,14 @@
 import argparse
 import time
 
-from src.simulation import Simulation
-from src.mdnn import Mdnn, TransCrdToCV
 import mindspore.context as context
 from mindspore import Tensor
 from mindspore import load_checkpoint
+from src.mdnn import Mdnn, TransCrdToCV
+from src.simulation import Simulation
 
 parser = argparse.ArgumentParser(description='SPONGE Controller')
-parser.add_argument('--i', type=str, default=None, help='Input file')
+parser.add_argument('--i', type=str, default=None, help='Input .in file')
 parser.add_argument('--amber_parm', type=str, default=None, help='Paramter file in AMBER type')
 parser.add_argument('--c', type=str, default=None, help='Initial coordinates file')
 parser.add_argument('--r', type=str, default="restrt", help='')
@@ -36,6 +36,7 @@ parser.add_argument('--checkpoint', type=str, default="", help='Checkpoint file'
 args_opt = parser.parse_args()
 
 context.set_context(mode=context.GRAPH_MODE, device_target="GPU", device_id=args_opt.device_id, save_graphs=False)
+# context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU", device_id=args_opt.device_id, save_graphs=False)
 
 if __name__ == "__main__":
     simulation = Simulation(args_opt)
@@ -53,7 +54,8 @@ if __name__ == "__main__":
         if steps == simulation.md_info.step_limit - 1:
             print_step = 0
         temperature, total_potential_energy, sigma_of_bond_ene, sigma_of_angle_ene, sigma_of_dihedral_ene, \
-        nb14_lj_energy_sum, nb14_cf_energy_sum, LJ_energy_sum, ee_ene, _ = simulation(Tensor(steps), Tensor(print_step))
+        nb14_lj_energy_sum, nb14_cf_energy_sum, LJ_energy_sum, ee_ene, _, _, _, _ = simulation(Tensor(steps),
+                                                                                               Tensor(print_step))
 
         if steps == 0:
             compiler_time = time.time()
diff --git a/model_zoo/research/hpc/sponge/src/angle.py b/model_zoo/research/hpc/sponge/src/angle.py
index 38a1e4f3a79..a8e90dd4aae 100644
--- a/model_zoo/research/hpc/sponge/src/angle.py
+++ b/model_zoo/research/hpc/sponge/src/angle.py
@@ -13,12 +13,46 @@
 # limitations under the License.
 # ============================================================================
 '''Angle'''
+
+
 class Angle:
     '''Angle'''
+
     def __init__(self, controller):
+        self.module_name = "angle"
+        self.h_atom_a = []
+        self.h_atom_b = []
+        self.h_atom_c = []
+        self.h_angle_k = []
+        self.h_angle_theta0 = []
+        self.angle_numbers = 0
         if controller.amber_parm is not None:
             file_path = controller.amber_parm
             self.read_information_from_amberfile(file_path)
+            self.is_initialized = 1
+        else:
+            self.read_in_file(controller)
+
+    def read_in_file(self, controller):
+        """read_in_file"""
+        print("START INITIALIZING ANGLE:")
+        name = self.module_name + "_in_file"
+        if name in controller.Command_Set:
+            path = controller.Command_Set[name]
+            file = open(path, 'r')
+            context = file.readlines()
+            self.angle_numbers = int(context[0].strip())
+            print("    angle_numbers is ", self.angle_numbers)
+            for i in range(self.angle_numbers):
+                val = list(map(float, context[i + 1].strip().split()))
+                self.h_atom_a.append(int(val[0]))
+                self.h_atom_b.append(int(val[1]))
+                self.h_atom_c.append(int(val[2]))
+                self.h_angle_k.append(val[3])
+                self.h_angle_theta0.append(val[4])
+            self.is_initialized = 1
+            file.close()
+        print("END INITIALIZING ANGLE")
 
     def read_information_from_amberfile(self, file_path):
         '''read amber file'''
@@ -64,9 +98,9 @@ class Angle:
                         information.extend(value)
                         count += len(value)
                 for _ in range(self.angle_with_H_numbers):
-                    self.h_atom_a[angle_count] = information[angle_count * 4 + 0] / 3
-                    self.h_atom_b[angle_count] = information[angle_count * 4 + 1] / 3
-                    self.h_atom_c[angle_count] = information[angle_count * 4 + 2] / 3
+                    self.h_atom_a[angle_count] = int(information[angle_count * 4 + 0] / 3)
+                    self.h_atom_b[angle_count] = int(information[angle_count * 4 + 1] / 3)
+                    self.h_atom_c[angle_count] = int(information[angle_count * 4 + 2] / 3)
                     self.h_type[angle_count] = information[angle_count * 4 + 3] - 1
                     angle_count += 1
 
@@ -86,9 +120,9 @@ class Angle:
                         information.extend(value)
                         count += len(value)
                 for _ in range(self.angle_without_H_numbers):
-                    self.h_atom_a[angle_count] = information[(angle_count - self.angle_with_H_numbers) * 4 + 0] / 3
-                    self.h_atom_b[angle_count] = information[(angle_count - self.angle_with_H_numbers) * 4 + 1] / 3
-                    self.h_atom_c[angle_count] = information[(angle_count - self.angle_with_H_numbers) * 4 + 2] / 3
+                    self.h_atom_a[angle_count] = int(information[(angle_count - self.angle_with_H_numbers) * 4 + 0] / 3)
+                    self.h_atom_b[angle_count] = int(information[(angle_count - self.angle_with_H_numbers) * 4 + 1] / 3)
+                    self.h_atom_c[angle_count] = int(information[(angle_count - self.angle_with_H_numbers) * 4 + 2] / 3)
                     self.h_type[angle_count] = information[(angle_count - self.angle_with_H_numbers) * 4 + 3] - 1
                     angle_count += 1
                 break
diff --git a/model_zoo/research/hpc/sponge/src/bond.py b/model_zoo/research/hpc/sponge/src/bond.py
index 4cc5b659bd4..e0287f115e5 100644
--- a/model_zoo/research/hpc/sponge/src/bond.py
+++ b/model_zoo/research/hpc/sponge/src/bond.py
@@ -13,15 +13,45 @@
 # limitations under the License.
 # ============================================================================
 '''Bond'''
+
+
 class Bond:
     '''Bond'''
-    def __init__(self, controller, md_info):
-
-        self.atom_numbers = md_info.atom_numbers
 
+    def __init__(self, controller):
+        self.module_name = "bond"
+        self.h_atom_a = []
+        self.h_atom_b = []
+        self.h_k = []
+        self.h_r0 = []
+        self.bond_numbers = 0
+        self.is_initialized = 0
         if controller.amber_parm is not None:
             file_path = controller.amber_parm
             self.read_information_from_amberfile(file_path)
+            self.is_initialized = 1
+        else:
+            self.read_in_file(controller)
+
+    def read_in_file(self, controller):
+        """read_in_file"""
+        print("START INITIALIZING BOND:")
+        name = self.module_name + "_in_file"
+        if name in controller.Command_Set:
+            path = controller.Command_Set[name]
+            file = open(path, 'r')
+            context = file.readlines()
+            self.bond_numbers = int(context[0].strip())
+            print("    bond_numbers is ", self.bond_numbers)
+            for i in range(self.bond_numbers):
+                val = list(map(float, context[i + 1].strip().split()))
+                self.h_atom_a.append(int(val[0]))
+                self.h_atom_b.append(int(val[1]))
+                self.h_k.append(val[2])
+                self.h_r0.append(val[3])
+            self.is_initialized = 1
+            file.close()
+        print("END INITIALIZING BOND")
 
     def read_information_from_amberfile(self, file_path):
         '''read amber file'''
@@ -103,8 +133,8 @@ class Bond:
                         count += len(value)
 
                 for i in range(self.bond_with_hydrogen):
-                    self.h_atom_a[i] = information[3 * i + 0] / 3
-                    self.h_atom_b[i] = information[3 * i + 1] / 3
+                    self.h_atom_a[i] = int(information[3 * i + 0] / 3)
+                    self.h_atom_b[i] = int(information[3 * i + 1] / 3)
                     tmpi = information[3 * i + 2] - 1
                     self.h_k[i] = self.bond_type_k[tmpi]
                     self.h_r0[i] = self.bond_type_r[tmpi]
@@ -126,8 +156,8 @@ class Bond:
                         count += len(value)
 
                 for i in range(self.bond_with_hydrogen, self.bond_numbers):
-                    self.h_atom_a[i] = information[3 * (i - self.bond_with_hydrogen) + 0] / 3
-                    self.h_atom_b[i] = information[3 * (i - self.bond_with_hydrogen) + 1] / 3
+                    self.h_atom_a[i] = int(information[3 * (i - self.bond_with_hydrogen) + 0] / 3)
+                    self.h_atom_b[i] = int(information[3 * (i - self.bond_with_hydrogen) + 1] / 3)
                     tmpi = information[3 * (i - self.bond_with_hydrogen) + 2] - 1
                     self.h_k[i] = self.bond_type_k[tmpi]
                     self.h_r0[i] = self.bond_type_r[tmpi]
diff --git a/model_zoo/research/hpc/sponge/src/dihedral.py b/model_zoo/research/hpc/sponge/src/dihedral.py
index 2d06c0e3b13..0eed5f9a8a0 100644
--- a/model_zoo/research/hpc/sponge/src/dihedral.py
+++ b/model_zoo/research/hpc/sponge/src/dihedral.py
@@ -18,11 +18,52 @@ import math
 
 class Dihedral:
     '''Dihedral'''
+
     def __init__(self, controller):
         self.CONSTANT_Pi = 3.1415926535897932
+        self.module_name = "dihedral"
+        self.h_atom_a = []
+        self.h_atom_b = []
+        self.h_atom_c = []
+        self.h_atom_d = []
+        self.h_ipn = []
+        self.h_pn = []
+        self.h_pk = []
+        self.h_gamc = []
+        self.h_gams = []
+        self.dihedral_numbers = 0
         if controller.amber_parm is not None:
             file_path = controller.amber_parm
             self.read_information_from_amberfile(file_path)
+            self.is_initialized = 1
+        else:
+            self.read_in_file(controller)
+
+    def read_in_file(self, controller):
+        """read_in_file"""
+        print("START INITIALIZING DIHEDRAL:")
+        name = self.module_name + "_in_file"
+        if name in controller.Command_Set:
+            path = controller.Command_Set[name]
+            file = open(path, 'r')
+            context = file.readlines()
+            self.dihedral_numbers = int(context[0].strip())
+            print("    dihedral_numbers is ", self.dihedral_numbers)
+            for i in range(self.dihedral_numbers):
+                val = list(map(float, context[i + 1].strip().split()))
+                self.h_atom_a.append(int(val[0]))
+                self.h_atom_b.append(int(val[1]))
+                self.h_atom_c.append(int(val[2]))
+                self.h_atom_d.append(int(val[3]))
+                self.h_ipn.append(val[4])
+                self.h_pn.append(val[4])
+                self.h_pk.append(val[5])
+                self.h_gamc.append(math.cos(val[6]) * val[5])
+                self.h_gams.append(math.sin(val[6]) * val[5])
+
+            self.is_initialized = 1
+            file.close()
+        print("END INITIALIZING DIHEDRAL")
 
     def read_information_from_amberfile(self, file_path):
         '''read amber file'''
@@ -108,11 +149,11 @@ class Dihedral:
         self.h_atom_b = [0] * self.dihedral_numbers
         self.h_atom_c = [0] * self.dihedral_numbers
         self.h_atom_d = [0] * self.dihedral_numbers
-        self.pk = []
-        self.gamc = []
-        self.gams = []
-        self.pn = []
-        self.ipn = []
+        self.h_pk = []
+        self.h_gamc = []
+        self.h_gams = []
+        self.h_pn = []
+        self.h_ipn = []
         for idx, val in enumerate(context):
             if "%FLAG DIHEDRALS_INC_HYDROGEN" in val:
                 count = 0
@@ -132,20 +173,20 @@ class Dihedral:
                     self.h_atom_c[i] = information[i * 5 + 2] / 3
                     self.h_atom_d[i] = abs(information[i * 5 + 3] / 3)
                     tmpi = information[i * 5 + 4] - 1
-                    self.pk.append(self.pk_type[tmpi])
+                    self.h_pk.append(self.pk_type[tmpi])
                     tmpf = self.phase_type[tmpi]
                     if abs(tmpf - self.CONSTANT_Pi) <= 0.001:
                         tmpf = self.CONSTANT_Pi
                     tmpf2 = math.cos(tmpf)
                     if abs(tmpf2) < 1e-6:
                         tmpf2 = 0
-                    self.gamc.append(tmpf2 * self.pk[i])
+                    self.h_gamc.append(tmpf2 * self.h_pk[i])
                     tmpf2 = math.sin(tmpf)
                     if abs(tmpf2) < 1e-6:
                         tmpf2 = 0
-                    self.gams.append(tmpf2 * self.pk[i])
-                    self.pn.append(abs(self.pn_type[tmpi]))
-                    self.ipn.append(int(self.pn[i] + 0.001))
+                    self.h_gams.append(tmpf2 * self.h_pk[i])
+                    self.h_pn.append(abs(self.pn_type[tmpi]))
+                    self.h_ipn.append(int(self.h_pn[i] + 0.001))
                 break
         for idx, val in enumerate(context):
             if "%FLAG DIHEDRALS_WITHOUT_HYDROGEN" in val:
@@ -166,20 +207,20 @@ class Dihedral:
                     self.h_atom_c[i] = information[(i - self.dihedral_with_hydrogen) * 5 + 2] / 3
                     self.h_atom_d[i] = abs(information[(i - self.dihedral_with_hydrogen) * 5 + 3] / 3)
                     tmpi = information[(i - self.dihedral_with_hydrogen) * 5 + 4] - 1
-                    self.pk.append(self.pk_type[tmpi])
+                    self.h_pk.append(self.pk_type[tmpi])
                     tmpf = self.phase_type[tmpi]
                     if abs(tmpf - self.CONSTANT_Pi) <= 0.001:
                         tmpf = self.CONSTANT_Pi
                     tmpf2 = math.cos(tmpf)
                     if abs(tmpf2) < 1e-6:
                         tmpf2 = 0
-                    self.gamc.append(tmpf2 * self.pk[i])
+                    self.h_gamc.append(tmpf2 * self.h_pk[i])
                     tmpf2 = math.sin(tmpf)
                     if abs(tmpf2) < 1e-6:
                         tmpf2 = 0
-                    self.gams.append(tmpf2 * self.pk[i])
-                    self.pn.append(abs(self.pn_type[tmpi]))
-                    self.ipn.append(int(self.pn[i] + 0.001))
+                    self.h_gams.append(tmpf2 * self.h_pk[i])
+                    self.h_pn.append(abs(self.pn_type[tmpi]))
+                    self.h_ipn.append(int(self.h_pn[i] + 0.001))
                 break
         for i in range(self.dihedral_numbers):
             if self.h_atom_c[i] < 0:
diff --git a/model_zoo/research/hpc/sponge/src/langevin_liujian_md.py b/model_zoo/research/hpc/sponge/src/langevin_liujian_md.py
index 0f25929f9d5..6552f2b23df 100644
--- a/model_zoo/research/hpc/sponge/src/langevin_liujian_md.py
+++ b/model_zoo/research/hpc/sponge/src/langevin_liujian_md.py
@@ -20,37 +20,72 @@ import numpy as np
 
 class Langevin_Liujian:
     '''LagevinLiuJian'''
+
     def __init__(self, controller, atom_numbers):
+        self.module_name = "langevin_liu"
         self.atom_numbers = atom_numbers
+        self.h_mass = []
+        print("START INITIALIZING LANGEVIN_LIU DYNAMICS:")
         if controller.amber_parm is not None:
             file_path = controller.amber_parm
             self.read_information_from_amberfile(file_path)
-
+        else:
+            self.read_mass_file(controller)
         self.CONSTANT_TIME_CONVERTION = 20.455
         self.CONSTANT_kB = 0.00198716
 
         self.target_temperature = 300.0 if "target_temperature" not in controller.Command_Set else float(
             controller.Command_Set["target_temperature"])
-        self.gamma_ln = 1.0 if "langevin_gamma" not in controller.Command_Set else float(
-            controller.Command_Set["langevin_gamma"])
-        self.rand_seed = 1 if "langevin_seed" not in controller.Command_Set else float(
-            controller.Command_Set["langevin_seed"])
-        self.max_velocity = 10000.0 if "velocity_max" not in controller.Command_Set else float(
-            controller.Command_Set["velocity_max"])
-        assert self.max_velocity > 0
-        print("target temperature is ", self.target_temperature)
-        print("friction coefficient is ", self.gamma_ln, "ps^-1")
-        print("random seed is ", self.rand_seed)
-        self.dt = float(controller.Command_Set["dt"])
-        self.dt *= self.CONSTANT_TIME_CONVERTION
+        self.gamma_ln = 1.0
+        if "gamma" in controller.Command_Set:
+            self.gamma_ln = float(controller.Command_Set["gamma"])
+        if "langevin_liu_gamma" in controller.Command_Set:
+            self.gamma_ln = float(controller.Command_Set["langevin_liu_gamma"])
+        print("    langevin_liu_gamma is ", self.gamma_ln)
+
+        self.random_seed = 1 if "seed" not in controller.Command_Set else int(
+            controller.Command_Set["seed"])
+
+        print("    target temperature is {} K".format(self.target_temperature))
+        print("    friction coefficient is {} ps^-1".format(self.gamma_ln))
+        print("    random seed is ", self.random_seed)
+        self.dt = 0.001 if "dt" not in controller.Command_Set else float(
+            controller.Command_Set["dt"]) * self.CONSTANT_TIME_CONVERTION
         self.half_dt = 0.5 * self.dt
-        self.rand_state = np.float32(np.zeros([math.ceil(3 * self.atom_numbers / 4.0) * 16,]))
+
+        self.float4_numbers = math.ceil(3 * self.atom_numbers / 4.0)
+        self.rand_state = np.float32(np.zeros([self.float4_numbers * 16,]))
         self.gamma_ln = self.gamma_ln / self.CONSTANT_TIME_CONVERTION
         self.exp_gamma = math.exp(-1 * self.gamma_ln * self.dt)
         self.sqrt_gamma = math.sqrt((1. - self.exp_gamma * self.exp_gamma) * self.target_temperature * self.CONSTANT_kB)
         self.h_sqrt_mass = [0] * self.atom_numbers
         for i in range(self.atom_numbers):
-            self.h_sqrt_mass[i] = self.sqrt_gamma * math.sqrt(1. / self.h_mass[i])
+            self.h_sqrt_mass[i] = self.sqrt_gamma * math.sqrt(1. / self.h_mass[i]) if self.h_mass[i] != 0 else 0
+
+        self.max_velocity = 0
+        if "velocity_max" in controller.Command_Set:
+            self.max_velocity = float(controller.Command_Set["velocity_max"])
+        if "langevin_liu_velocity_max" in controller.Command_Set:
+            self.max_velocity = float(controller.Command_Set["langevin_liu_velocity_max"])
+        print("    max velocity is ", self.max_velocity)
+
+        self.h_mass_inverse = [0] * self.atom_numbers
+        for i in range(self.atom_numbers):
+            self.h_mass_inverse[i] = 1. / self.h_mass[i] if self.h_mass[i] != 0 else 0
+
+        self.is_initialized = 1
+
+        print("END INITIALIZING LANGEVIN_LIU DYNAMICS")
+
+    def read_mass_file(self, controller):
+        if "mass_in_file" in controller.Command_Set:
+            path = controller.Command_Set["mass_in_file"]
+            file = open(path, 'r')
+            context = file.readlines()
+            for idx, val in enumerate(context):
+                if idx > 0:
+                    self.h_mass.append(float(val.strip()))
+            file.close()
 
     def read_information_from_amberfile(self, file_path):
         '''read amber file'''
diff --git a/model_zoo/research/hpc/sponge/src/lennard_jones.py b/model_zoo/research/hpc/sponge/src/lennard_jones.py
index b7617c11d66..4b92affb7c7 100644
--- a/model_zoo/research/hpc/sponge/src/lennard_jones.py
+++ b/model_zoo/research/hpc/sponge/src/lennard_jones.py
@@ -13,12 +13,95 @@
 # limitations under the License.
 # ============================================================================
 '''Lennard Jones'''
+import mindspore.common.dtype as mstype
+from mindspore import Tensor
+from mindspore.ops import operations as P
+
+
 class Lennard_Jones_Information:
     '''Lennard Jones'''
-    def __init__(self, controller):
+
+    def __init__(self, controller, cutoff, box_length):
+        self.module_name = "LJ"
+        self.is_initialized = 0
+        self.CONSTANT_UINT_MAX_FLOAT = 4294967296.0
+        self.CONSTANT_Pi = 3.1415926535897932
+        self.cutoff = cutoff
+        self.box_length = box_length
+
         if controller.amber_parm is not None:
             file_path = controller.amber_parm
             self.read_information_from_amberfile(file_path)
+            self.is_initialized = 1
+        else:
+            self.read_in_file(controller)
+
+        if self.is_initialized:
+            self.totalc6get = P.totalc6get(self.atom_numbers)
+            self.read_information()
+
+    def read_in_file(self, controller):
+        """read_in_file"""
+        print("START INITIALIZING LENNADR JONES INFORMATION:")
+        name = self.module_name + "_in_file"
+        # print("read_in_file " + name)
+        if name in controller.Command_Set:
+            path = controller.Command_Set[name]
+            file = open(path, 'r')
+            context = file.readlines()
+            self.atom_numbers, self.atom_type_numbers = map(int, context[0].strip().split())
+            print("    atom_numbers is ", self.atom_numbers)
+            print("    atom_LJ_type_number is ", self.atom_type_numbers)
+            self.pair_type_numbers = self.atom_type_numbers * (self.atom_type_numbers + 1) / 2
+            self.h_LJ_A = []
+            self.h_LJ_B = []
+            self.h_atom_LJ_type = []
+            startidx = 1
+            count = 0
+            print(startidx)
+            while count < self.atom_type_numbers:
+                if context[startidx].strip():
+                    val = list(map(float, context[startidx].strip().split()))
+                    # print(val)
+                    count += 1
+                    self.h_LJ_A.extend(val)
+                startidx += 1
+            assert len(self.h_LJ_A) == self.pair_type_numbers
+            self.h_LJ_A = [x * 12.0 for x in self.h_LJ_A]
+
+            count = 0
+            print(startidx)
+            while count < self.atom_type_numbers:
+                if context[startidx].strip():
+                    val = list(map(float, context[startidx].strip().split()))
+                    # print(val)
+                    count += 1
+                    self.h_LJ_B.extend(val)
+                startidx += 1
+            assert len(self.h_LJ_B) == self.pair_type_numbers
+            self.h_LJ_B = [x * 6.0 for x in self.h_LJ_B]
+            for idx, val in enumerate(context):
+                if idx > startidx:
+                    self.h_atom_LJ_type.append(int(val.strip()))
+            file.close()
+            self.is_initialized = 1
+        print("END INITIALIZING LENNADR JONES INFORMATION")
+
+    def read_information(self):
+        """read_information"""
+        self.uint_dr_to_dr_cof = [1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length[0],
+                                  1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length[1],
+                                  1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length[2]]
+        print("copy lj type to new crd")
+        self.atom_LJ_type = Tensor(self.h_atom_LJ_type, mstype.int32)
+        self.LJ_B = Tensor(self.h_LJ_B, mstype.float32)
+        self.factor = self.totalc6get(self.atom_LJ_type, self.LJ_B)
+        print("        factor is: ", self.factor)
+        self.long_range_factor = float(self.factor.asnumpy())
+        self.long_range_factor *= -2.0 / 3.0 * self.CONSTANT_Pi / self.cutoff / self.cutoff / self.cutoff / 6.0
+        self.volume = self.box_length[0] * self.box_length[1] * self.box_length[1]
+        print("        long range correction factor is: ", self.long_range_factor)
+        print("    End initializing long range LJ correction")
 
     def read_information_from_amberfile(self, file_path):
         '''read amber file'''
@@ -35,9 +118,9 @@ class Lennard_Jones_Information:
                     self.atom_numbers = value[0]
                     self.atom_type_numbers = value[1]
                     self.pair_type_numbers = int(
-                        self.atom_type_numbers * (self.atom_type_numbers + 1) / 2)  # TODO 这个地方有问题啊
+                        self.atom_type_numbers * (self.atom_type_numbers + 1) / 2)  # TODO
                     break
-        self.atom_LJ_type = [0] * self.atom_numbers
+        self.h_atom_LJ_type = [0] * self.atom_numbers
         for idx, val in enumerate(context):
             if "%FLAG ATOM_TYPE_INDEX" in val:
                 count = 0
@@ -52,9 +135,9 @@ class Lennard_Jones_Information:
                         information.extend(value)
                         count += len(value)
                 for i in range(self.atom_numbers):
-                    self.atom_LJ_type[i] = information[i] - 1
+                    self.h_atom_LJ_type[i] = information[i] - 1
                 break
-        self.LJ_A = [0] * self.pair_type_numbers
+        self.h_LJ_A = [0] * self.pair_type_numbers
         for idx, val in enumerate(context):
             if "%FLAG LENNARD_JONES_ACOEF" in val:
                 count = 0
@@ -69,9 +152,9 @@ class Lennard_Jones_Information:
                         information.extend(value)
                         count += len(value)
                 for i in range(self.pair_type_numbers):
-                    self.LJ_A[i] = 12.0 * information[i]
+                    self.h_LJ_A[i] = 12.0 * information[i]
                 break
-        self.LJ_B = [0] * self.pair_type_numbers
+        self.h_LJ_B = [0] * self.pair_type_numbers
         for idx, val in enumerate(context):
             if "%FLAG LENNARD_JONES_BCOEF" in val:
                 count = 0
@@ -86,5 +169,5 @@ class Lennard_Jones_Information:
                         information.extend(value)
                         count += len(value)
                 for i in range(self.pair_type_numbers):
-                    self.LJ_B[i] = 6.0 * information[i]
+                    self.h_LJ_B[i] = 6.0 * information[i]
                 break
diff --git a/model_zoo/research/hpc/sponge/src/md_information.py b/model_zoo/research/hpc/sponge/src/md_information.py
index f4dc2e26f17..263608b8e99 100644
--- a/model_zoo/research/hpc/sponge/src/md_information.py
+++ b/model_zoo/research/hpc/sponge/src/md_information.py
@@ -14,53 +14,206 @@
 # ============================================================================
 '''MD Information'''
 import numpy as np
+from src.system_information import (periodic_box_condition_information, system_information,
+                                    non_bond_information, NVE_iteration, residue_information, trajectory_output)
 
 
 class md_information:
     '''MD Information'''
+
     def __init__(self, controller):
         CONSTANT_TIME_CONVERTION = 20.455
-        CONSTANT_UINT_MAX_FLOAT = 4294967296.0
+
         self.md_task = controller.md_task
-        self.mode = 0 if "mode" not in controller.Command_Set else int(controller.Command_Set["mode"])
-        self.dt = 0.001 * CONSTANT_TIME_CONVERTION if "dt" not in controller.Command_Set else float(
-            controller.Command_Set["dt"]) * CONSTANT_TIME_CONVERTION
-        self.skin = 2.0 if "skin" not in controller.Command_Set else float(controller.Command_Set["skin"])
-        self.trans_vec = [self.skin, self.skin, self.skin]
-        self.trans_vec_minus = -1 * self.trans_vec
-        self.step_limit = 1000 if "step_limit" not in controller.Command_Set else int(
-            controller.Command_Set["step_limit"])
+
         self.netfrc = 0 if "net_force" not in controller.Command_Set else int(controller.Command_Set["net_force"])
         self.ntwx = 1000 if "write_information_interval" not in controller.Command_Set else int(
             controller.Command_Set["write_information_interval"])
-        self.ntce = self.step_limit + 1 if "calculate_energy_interval" not in controller.Command_Set else int(
-            controller.Command_Set["calculate_energy_interval"])
         self.atom_numbers = 0
         self.residue_numbers = 0
         self.density = 0.0
         self.lin_serial = []
         self.h_res_start = []
         self.h_res_end = []
+
+        self.h_charge = []
         self.h_mass = []
         self.h_mass_inverse = []
         self.h_charge = []
+        self.coordinate = []
+        self.box_length = []
+        self.vel = []
+        self.crd = []
+        self.velocity = []
+
+        self.mode = self.read_mode(controller)
+        # read dt
+        self.dt = 0.001 * CONSTANT_TIME_CONVERTION if "dt" not in controller.Command_Set else float(
+            controller.Command_Set["dt"]) * CONSTANT_TIME_CONVERTION
+        self.dt_in_ps = 0.001 if "dt" not in controller.Command_Set else float(controller.Command_Set["dt"])
 
         if controller.amber_parm is not None:
             self.read_basic_system_information_from_amber_file(controller.amber_parm)
-
-        if "amber_irest" in controller.Command_Set:
-            amber_irest = int(controller.Command_Set["amber_irest"])
             if controller.initial_coordinates_file is not None:
-                self.read_basic_system_information_from_rst7(controller.initial_coordinates_file, amber_irest)
+                self.read_basic_system_information_from_rst7(controller.initial_coordinates_file)
+        else:
+            self.read_coordinate_and_velocity(controller)
+            self.read_mass(controller)
+            self.read_charge(controller)
+        self.crd = self.coordinate
 
-        self.crd_to_uint_crd_cof = [CONSTANT_UINT_MAX_FLOAT / self.box_length[0],
-                                    CONSTANT_UINT_MAX_FLOAT / self.box_length[1],
-                                    CONSTANT_UINT_MAX_FLOAT / self.box_length[2]]
-        self.uint_dr_to_dr_cof = [1.0 / self.crd_to_uint_crd_cof[0], 1.0 / self.crd_to_uint_crd_cof[1],
-                                  1.0 / self.crd_to_uint_crd_cof[2]]
-        self.density *= 1e24 / 6.023e23 / (self.box_length[0] * self.box_length[1] * self.box_length[2])
+        self.sys = system_information(controller, self)
+        self.nb = non_bond_information(controller, self)
+        self.output = trajectory_output(controller, self)
+        self.nve = NVE_iteration(controller, self)
+        self.res = residue_information(controller, self)
+        self.pbc = periodic_box_condition_information(controller, self.box_length)
+
+        if not self.h_res_start:
+            self.h_res_start = self.res.h_res_start
+            self.h_res_end = self.res.h_res_end
+            self.residue_numbers = self.res.residue_numbers
+
+        # Atom_Information_Initial
+        self.acc = np.zeros([self.atom_numbers, 3])
+        self.frc = np.zeros([self.atom_numbers, 3])
+        self.sys.freedom = 3 * self.atom_numbers
+        self.is_initialized = 1
 
         self.velocity = np.reshape(np.asarray(self.velocity, np.float32), [self.atom_numbers, 3])
+        self.step_limit = self.sys.step_limit
+
+    def read_mode(self, controller):
+        """read_mode"""
+        if "mode" in controller.Command_Set:
+            if controller.Command_Set["mode"] in ["NVT", "nvt", "1"]:
+                print("    Mode set to NVT\n")
+                mode = 1
+            elif controller.Command_Set["mode"] in ["NPT", "npt", "2"]:
+                print("    Mode set to NPT\n")
+                mode = 2
+            elif controller.Command_Set["mode"] in ["Minimization", "minimization", "-1"]:
+                print("    Mode set to Energy Minimization\n")
+                mode = -1
+            elif controller.Command_Set["mode"] in ["NVE", "nve", "0"]:
+                print("    Mode set to NVE\n")
+                mode = 0
+            else:
+                print(
+                    "    Warning: Mode {} is not match. Set to NVE as default\n".format(controller.Command_Set["mode"]))
+                mode = 0
+        else:
+            print("    Mode set to NVE as default\n")
+            mode = 0
+        return mode
+
+    def read_coordinate_in_file(self, path):
+        '''read coordinates file'''
+        file = open(path, 'r')
+        print("    Start reading coordinate_in_file:\n")
+        context = file.readlines()
+        atom_numbers = int(context[0].strip())
+        if self.atom_numbers != 0:
+            if self.atom_numbers is not atom_numbers:
+                print("        Error: atom_numbers is not equal: ", atom_numbers, self.atom_numbers)
+                exit(1)
+        else:
+            self.atom_numbers = atom_numbers
+            print("        atom_numbers is ", self.atom_numbers)
+
+        for idx in range(self.atom_numbers):
+            coord = list(map(float, context[idx + 1].strip().split()))
+            self.coordinate.append(coord)
+
+        self.box_length = list(map(float, context[-1].strip().split()))[:3]
+        print(" box_length is: x: {}, y: {}, z: {}".format(
+            self.box_length[0], self.box_length[1], self.box_length[2]))
+        self.crd = self.coordinate
+        file.close()
+
+    def read_velocity_in_file(self, path):
+        '''read velocity file'''
+        file = open(path, 'r')
+        print("    Start reading velocity_in_file:\n")
+        context = file.readlines()
+        for idx, val in enumerate(context):
+            if idx == 0:
+                atom_numbers = int(val.strip())
+                if self.atom_numbers > 0 and atom_numbers != self.atom_numbers:
+                    print("        Error: atom_numbers is not equal: %d %d\n", idx, self.atom_numbers)
+                    exit(1)
+                else:
+                    self.atom_numbers = atom_numbers
+            else:
+                vel = list(map(float, val.strip().split()))
+                self.velocity.append(vel)
+        self.vel = self.velocity
+        file.close()
+
+    def read_coordinate_and_velocity(self, controller):
+        """read_coordinate_and_velocity"""
+        if "coordinate_in_file" in controller.Command_Set:
+            self.read_coordinate_in_file(controller.Command_Set["coordinate_in_file"])
+            if "velocity_in_file" in controller.Command_Set:
+                self.read_velocity_in_file(controller.Command_Set["velocity_in_file"])
+            else:
+                print("    Velocity is set to zero as default\n")
+                self.velocity = [0] * 3 * self.atom_numbers
+
+    def read_mass(self, controller):
+        """read_mass"""
+        print("    Start reading mass:")
+        if "mass_in_file" in controller.Command_Set:
+            path = controller.Command_Set["mass_in_file"]
+            file = open(path, 'r')
+            self.total_mass = 0
+            context = file.readlines()
+            for idx, val in enumerate(context):
+                if idx == 0:
+                    atom_numbers = int(val.strip())
+                    if self.atom_numbers > 0 and (atom_numbers != self.atom_numbers):
+                        print("        Error: atom_numbers is not equal: ", atom_numbers, self.atom_numbers)
+                        exit(1)
+                    else:
+                        self.atom_numbers = atom_numbers
+                else:
+                    mass = float(val.strip())
+                    self.h_mass.append(mass)
+                    self.total_mass += mass
+                    if mass == 0:
+                        self.h_mass_inverse.append(0.0)
+                    else:
+                        self.h_mass_inverse.append(1 / mass)
+            file.close()
+        else:
+            print("    mass is set to 20 as default")
+            self.total_mass = 20 * self.atom_numbers
+            self.h_mass = [20] * self.atom_numbers
+            self.h_mass_inverse = [1 / 20] * self.atom_numbers
+
+        print("    End reading mass")
+
+    def read_charge(self, controller):
+        """read_charge"""
+        if "charge_in_file" in controller.Command_Set:
+            print("    Start reading charge:")
+            path = controller.Command_Set["charge_in_file"]
+            file = open(path, 'r')
+            context = file.readlines()
+            for idx, val in enumerate(context):
+                if idx == 0:
+                    atom_numbers = int(val.strip())
+                    if self.atom_numbers > 0 and (atom_numbers != self.atom_numbers):
+                        print("        Error: atom_numbers is not equal: %d %d\n", idx, self.atom_numbers)
+                        exit(1)
+                    else:
+                        self.atom_numbers = atom_numbers
+                else:
+                    self.h_charge.append(float(val.strip()))
+            file.close()
+        else:
+            self.h_charge = [0.0] * self.atom_numbers
+        print("    End reading charge")
 
     def read_basic_system_information_from_amber_file(self, path):
         '''read amber file'''
@@ -137,11 +290,13 @@ class md_information:
                         count += len(value)
                 break
 
-    def read_basic_system_information_from_rst7(self, path, irest):
+    def read_basic_system_information_from_rst7(self, path):
         '''read rst7 file'''
         file = open(path, 'r')
         context = file.readlines()
         file.close()
+        x = context[1].strip().split()
+        irest = 1 if len(x) > 1 else 0
         atom_numbers = int(context[1].strip().split()[0])
         if atom_numbers != self.atom_numbers:
             print("ERROR")
@@ -151,7 +306,7 @@ class md_information:
         count = 0
         start_idx = 1
         if irest == 1:
-            self.simulation_start_time = float(context[1].strip().split()[1])
+            self.simulation_start_time = float(x[1])
             while count <= 6 * self.atom_numbers + 3:
                 start_idx += 1
                 value = list(map(float, context[start_idx].strip().split()))
@@ -169,4 +324,6 @@ class md_information:
             self.coordinate = information[: 3 * self.atom_numbers]
             self.velocity = [0.0] * (3 * self.atom_numbers)
             self.box_length = information[3 * self.atom_numbers:3 * self.atom_numbers + 3]
+        self.coordinate = np.array(self.coordinate).reshape([-1, 3])
+        self.velocity = np.array(self.velocity).reshape([-1, 3])
         print("system size is ", self.box_length[0], self.box_length[1], self.box_length[2])
diff --git a/model_zoo/research/hpc/sponge/src/nb14.py b/model_zoo/research/hpc/sponge/src/nb14.py
index 9c37ec79e02..b28f13645d8 100644
--- a/model_zoo/research/hpc/sponge/src/nb14.py
+++ b/model_zoo/research/hpc/sponge/src/nb14.py
@@ -13,21 +13,51 @@
 # limitations under the License.
 # ============================================================================
 '''NON BOND'''
+
+
 class NON_BOND_14:
     '''NON BOND'''
-    def __init__(self, controller, dihedral, atom_numbers):
-        self.dihedral_with_hydrogen = dihedral.dihedral_with_hydrogen
-        self.dihedral_numbers = dihedral.dihedral_numbers
-        self.dihedral_type_numbers = dihedral.dihedral_type_numbers
-        self.atom_numbers = atom_numbers
 
+    def __init__(self, controller, dihedral, atom_numbers):
+        self.module_name = "nb14"
+        self.atom_numbers = atom_numbers
+        self.h_atom_a = []
+        self.h_atom_b = []
+        self.h_lj_scale_factor = []
+        self.h_cf_scale_factor = []
+        self.nb14_numbers = 0
+        self.is_initialized = 0
         if controller.amber_parm is not None:
+            self.dihedral_with_hydrogen = dihedral.dihedral_with_hydrogen
+            self.dihedral_numbers = dihedral.dihedral_numbers
+            self.dihedral_type_numbers = dihedral.dihedral_type_numbers
             file_path = controller.amber_parm
             self.read_information_from_amberfile(file_path)
-        self.h_atom_a = self.h_atom_a[:self.nb14_numbers]
-        self.h_atom_b = self.h_atom_b[:self.nb14_numbers]
-        self.h_lj_scale_factor = self.h_lj_scale_factor[:self.nb14_numbers]
-        self.h_cf_scale_factor = self.h_cf_scale_factor[:self.nb14_numbers]
+            self.h_atom_a = self.h_atom_a[:self.nb14_numbers]
+            self.h_atom_b = self.h_atom_b[:self.nb14_numbers]
+            self.h_lj_scale_factor = self.h_lj_scale_factor[:self.nb14_numbers]
+            self.h_cf_scale_factor = self.h_cf_scale_factor[:self.nb14_numbers]
+            self.is_initialized = 1
+        else:
+            self.read_in_file(controller)
+
+    def read_in_file(self, controller):
+        """read_in_file"""
+        name = self.module_name + "_in_file"
+        if name in controller.Command_Set:
+            path = controller.Command_Set[name]
+            file = open(path, 'r')
+            context = file.readlines()
+            self.nb14_numbers = int(context[0].strip())
+            print("    non-bond 14 numbers is", self.nb14_numbers)
+            for i in range(self.nb14_numbers):
+                val = list(map(float, context[i + 1].strip().split()))
+                self.h_atom_a.append(int(val[0]))
+                self.h_atom_b.append(int(val[1]))
+                self.h_lj_scale_factor.append(val[2])
+                self.h_cf_scale_factor.append(val[3])
+            self.is_initialized = 1
+            file.close()
 
     def read_information_from_amberfile(self, file_path):
         '''read amber file'''
diff --git a/model_zoo/research/hpc/sponge/src/neighbor_list.py b/model_zoo/research/hpc/sponge/src/neighbor_list.py
index 607f6d258c2..81c5868bd56 100644
--- a/model_zoo/research/hpc/sponge/src/neighbor_list.py
+++ b/model_zoo/research/hpc/sponge/src/neighbor_list.py
@@ -13,17 +13,24 @@
 # limitations under the License.
 # ============================================================================
 '''Neighbor List'''
+
+
 class neighbor_list:
     '''Neighbor List'''
+
     def __init__(self, controller, atom_numbers, box_length):
-        self.refresh_interval = 20 if "neighbor_list_refresh_interval" not in controller.Command_Set else int(
-            controller.Command_Set["neighbor_list_refresh_interval"])
+        self.CONSTANT_UINT_MAX_FLOAT = 4294967296.0
+        print("START INITIALIZING NEIGHBOR LIST:")
+        self.module_name = "neighbor_list"
+        self.refresh_interval = 20 if "refresh_interval" not in controller.Command_Set else int(
+            controller.Command_Set["refresh_interval"])
         self.max_atom_in_grid_numbers = 64 if "max_atom_in_grid_numbers" not in controller.Command_Set else int(
             controller.Command_Set["max_atom_in_grid_numbers"])
         self.max_neighbor_numbers = 800 if "max_neighbor_numbers" not in controller.Command_Set else int(
             controller.Command_Set["max_neighbor_numbers"])
+
         self.skin = 2.0 if "skin" not in controller.Command_Set else float(controller.Command_Set["skin"])
-        self.cutoff = 10.0 if "cut" not in controller.Command_Set else float(controller.Command_Set["cut"])
+        self.cutoff = 10.0 if "cutoff" not in controller.Command_Set else float(controller.Command_Set["cutoff"])
         self.cutoff_square = self.cutoff * self.cutoff
         self.cutoff_with_skin = self.cutoff + self.skin
         self.half_cutoff_with_skin = 0.5 * self.cutoff_with_skin
@@ -31,15 +38,17 @@ class neighbor_list:
         self.half_skin_square = 0.25 * self.skin * self.skin
         self.atom_numbers = atom_numbers
         self.box_length = box_length
+        self.update_volume()
+
+        self.initial_neighbor_grid()
+        self.not_first_time = 0
+        self.is_initialized = 1
+        self.refresh_count = [0]
 
         if controller.amber_parm is not None:
             file_path = controller.amber_parm
             self.read_information_from_amberfile(file_path)
 
-        self.Initial_Neighbor_Grid()
-        self.not_first_time = 0
-        self.refresh_count = [0]
-
     def read_information_from_amberfile(self, file_path):
         '''read amber file'''
         file = open(file_path, 'r')
@@ -117,20 +126,23 @@ class neighbor_list:
                     self.excluded_list.extend(tmp_list)
                 break
 
-    def Initial_Neighbor_Grid(self):
+    def initial_neighbor_grid(self):
         '''init neighbor grid'''
         half_cutoff = self.half_cutoff_with_skin
         self.Nx = int(self.box_length[0] / half_cutoff)
         self.Ny = int(self.box_length[1] / half_cutoff)
         self.Nz = int(self.box_length[2] / half_cutoff)
         self.grid_N = [self.Nx, self.Ny, self.Nz]
-        self.grid_length = [self.box_length[0] / self.Nx, self.box_length[1] / self.Ny, self.box_length[2] / self.Nz]
+        self.grid_length = [self.box_length[0] / self.Nx,
+                            self.box_length[1] / self.Ny,
+                            self.box_length[2] / self.Nz]
         self.grid_length_inverse = [1.0 / self.grid_length[0], 1.0 / self.grid_length[1], 1.0 / self.grid_length[2]]
+
         self.Nxy = self.Nx * self.Ny
         self.grid_numbers = self.Nz * self.Nxy
-
         self.atom_numbers_in_grid_bucket = [0] * self.grid_numbers
         self.bucket = [-1] * (self.grid_numbers * self.max_atom_in_grid_numbers)
+
         self.pointer = []
         temp_grid_serial = [0] * 125
         for i in range(self.grid_numbers):
@@ -160,3 +172,11 @@ class neighbor_list:
                         count += 1
             temp_grid_serial = sorted(temp_grid_serial)
             self.pointer.extend(temp_grid_serial)
+
+    def update_volume(self):
+        self.quarter_crd_to_uint_crd_cof = [0.25 * self.CONSTANT_UINT_MAX_FLOAT / self.box_length[0],
+                                            0.25 * self.CONSTANT_UINT_MAX_FLOAT / self.box_length[1],
+                                            0.25 * self.CONSTANT_UINT_MAX_FLOAT / self.box_length[2]]
+        self.uint_dr_to_dr_cof = [1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length[0],
+                                  1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length[1],
+                                  1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length[2]]
diff --git a/model_zoo/research/hpc/sponge/src/particle_mesh_ewald.py b/model_zoo/research/hpc/sponge/src/particle_mesh_ewald.py
index fd7f20f0104..4b22137d045 100644
--- a/model_zoo/research/hpc/sponge/src/particle_mesh_ewald.py
+++ b/model_zoo/research/hpc/sponge/src/particle_mesh_ewald.py
@@ -19,23 +19,40 @@ import math
 class Particle_Mesh_Ewald():
     '''PME'''
     def __init__(self, controller, md_info):
-        self.cutoff = 10.0 if "cut" not in controller.Command_Set else float(controller.Command_Set["cut"])
-        self.tolerance = 0.00001 if "PME_Direct_Tolerance" not in controller.Command_Set else float(
-            controller.Command_Set["PME_Direct_Tolerance"])
+        self.module_name = "PME"
+        self.CONSTANT_Pi = 3.1415926535897932
+        self.cutoff = 10.0 if "cutoff" not in controller.Command_Set else float(controller.Command_Set["cutoff"])
+        self.tolerance = 0.00001 if "Direct_Tolerance" not in controller.Command_Set else float(
+            controller.Command_Set["Direct_Tolerance"])
         self.fftx = -1 if "fftx" not in controller.Command_Set else int(controller.Command_Set["fftx"])
         self.ffty = -1 if "ffty" not in controller.Command_Set else int(controller.Command_Set["ffty"])
         self.fftz = -1 if "fftz" not in controller.Command_Set else int(controller.Command_Set["fftz"])
         self.atom_numbers = md_info.atom_numbers
         self.box_length = md_info.box_length
 
+        self.volume = self.box_length[0] * self.box_length[1] * self.box_length[1]
+
         if self.fftx < 0:
             self.fftx = self.Get_Fft_Patameter(self.box_length[0])
         if self.ffty < 0:
             self.ffty = self.Get_Fft_Patameter(self.box_length[1])
         if self.fftz < 0:
             self.fftz = self.Get_Fft_Patameter(self.box_length[2])
+        print("    fftx: ", self.fftx)
+        print("    ffty: ", self.ffty)
+        print("    fftz: ", self.fftz)
+        print("pme cutoff", self.cutoff)
+        print("pme tolerance", self.tolerance)
+        self.PME_Nall = self.fftx * self.ffty * self.fftz
+        self.PME_Nin = self.ffty * self.fftz
+        self.PME_Nfft = self.fftx * self.ffty * (int(self.fftz / 2) + 1)
+        self.PME_inverse_box_vector = [self.fftx / self.box_length[0],
+                                       self.ffty / self.box_length[1],
+                                       self.fftz / self.box_length[2]]
 
         self.beta = self.Get_Beta(self.cutoff, self.tolerance)
+        self.neutralizing_factor = -0.5 * self.CONSTANT_Pi / (self.beta * self.beta * self.volume)
+        self.is_initialized = 1
 
     def Get_Beta(self, cutoff, tolerance):
         '''GET BETA'''
diff --git a/model_zoo/research/hpc/sponge/src/simulation.py b/model_zoo/research/hpc/sponge/src/simulation.py
index e5474806c61..e02c844c476 100644
--- a/model_zoo/research/hpc/sponge/src/simulation.py
+++ b/model_zoo/research/hpc/sponge/src/simulation.py
@@ -13,23 +13,29 @@
 # limitations under the License.
 # ============================================================================
 '''Simulation'''
-import numpy as np
 
-import mindspore.common.dtype as mstype
-from mindspore import Tensor
-from mindspore import nn
-from mindspore.common.parameter import Parameter
-from mindspore.ops import functional as F
-from mindspore.ops import operations as P
+import numpy as np
 from src.angle import Angle
+from src.bd_baro import BD_BARO
 from src.bond import Bond
+from src.crd_molecular_map import CoordinateMolecularMap
 from src.dihedral import Dihedral
 from src.langevin_liujian_md import Langevin_Liujian
 from src.lennard_jones import Lennard_Jones_Information
+from src.mc_baro import MC_BARO
 from src.md_information import md_information
 from src.nb14 import NON_BOND_14
 from src.neighbor_list import neighbor_list
 from src.particle_mesh_ewald import Particle_Mesh_Ewald
+from src.restrain import Restrain_Information
+from src.simple_constrain import Simple_Constarin
+from src.vatom import Virtual_Information
+
+import mindspore.common.dtype as mstype
+from mindspore import Tensor, nn
+from mindspore.common.parameter import Parameter
+from mindspore.ops import functional as F
+from mindspore.ops import operations as P
 
 
 class controller:
@@ -47,6 +53,7 @@ class controller:
         self.Command_Set = {}
         self.md_task = None
         self.commands_from_in_file()
+        self.punctuation = ","
 
     def commands_from_in_file(self):
         '''command from in file'''
@@ -55,10 +62,12 @@ class controller:
         file.close()
         self.md_task = context[0].strip()
         for val in context:
-            if "=" in val:
+            val = val.strip()
+            if val and val[0] != '#' and ("=" in val):
+                val = val[:val.index(",")] if ',' in val else val
                 assert len(val.strip().split("=")) == 2
                 flag, value = val.strip().split("=")
-                value = value.replace(",", '')
+                value = value.replace(" ", "")
                 flag = flag.replace(" ", "")
                 if flag not in self.Command_Set:
                     self.Command_Set[flag] = value
@@ -73,14 +82,99 @@ class Simulation(nn.Cell):
         super(Simulation, self).__init__()
         self.control = controller(args_opt)
         self.md_info = md_information(self.control)
-        self.bond = Bond(self.control, self.md_info)
+        self.mode = self.md_info.mode
+        self.bond = Bond(self.control)
+        self.bond_is_initialized = self.bond.is_initialized
         self.angle = Angle(self.control)
+        self.angle_is_initialized = self.angle.is_initialized
         self.dihedral = Dihedral(self.control)
+        self.dihedral_is_initialized = self.dihedral.is_initialized
         self.nb14 = NON_BOND_14(self.control, self.dihedral, self.md_info.atom_numbers)
+        self.nb14_is_initialized = self.nb14.is_initialized
         self.nb_info = neighbor_list(self.control, self.md_info.atom_numbers, self.md_info.box_length)
-        self.LJ_info = Lennard_Jones_Information(self.control)
+        self.LJ_info = Lennard_Jones_Information(self.control, self.md_info.nb.cutoff, self.md_info.sys.box_length)
+        self.LJ_info_is_initialized = self.LJ_info.is_initialized
+
         self.liujian_info = Langevin_Liujian(self.control, self.md_info.atom_numbers)
+        self.liujian_info_is_initialized = self.liujian_info.is_initialized
         self.pme_method = Particle_Mesh_Ewald(self.control, self.md_info)
+        self.pme_is_initialized = self.pme_method.is_initialized
+        self.restrain = Restrain_Information(self.control, self.md_info.atom_numbers, self.md_info.crd)
+        self.restrain_is_initialized = self.restrain.is_initialized
+        self.simple_constrain_is_initialized = 0
+
+        self.simple_constrain = Simple_Constarin(self.control, self.md_info, self.bond, self.angle, self.liujian_info)
+        self.simple_constrain_is_initialized = self.simple_constrain.is_initialized
+        self.freedom = self.simple_constrain.system_freedom
+
+        self.vatom = Virtual_Information(self.control, self.md_info, self.md_info.sys.freedom)
+        self.vatom_is_initialized = 1
+
+        self.random = P.UniformReal(seed=1)
+        self.pow = P.Pow()
+
+        self.mol_map = CoordinateMolecularMap(self.md_info.atom_numbers, self.md_info.sys.box_length, self.md_info.crd,
+                                              self.md_info.nb.excluded_atom_numbers, self.md_info.nb.h_excluded_numbers,
+                                              self.md_info.nb.h_excluded_list_start, self.md_info.nb.h_excluded_list)
+        self.mol_map_is_initialized = 1
+        self.init_params()
+        self.init_Tensor()
+        self.op_define()
+        self.op_define_2()
+        self.depend = P.Depend()
+        self.print = P.Print()
+        self.total_count = Parameter(Tensor(0, mstype.int32), requires_grad=False)
+        self.accept_count = Parameter(Tensor(0, mstype.int32), requires_grad=False)
+        self.is_molecule_map_output = self.md_info.output.is_molecule_map_output
+        self.target_pressure = self.md_info.sys.target_pressure
+        self.Nx = self.nb_info.Nx
+        self.Ny = self.nb_info.Ny
+        self.Nz = self.nb_info.Nz
+        self.PME_inverse_box_vector = Parameter(Tensor(self.pme_method.PME_inverse_box_vector, mstype.float32),
+                                                requires_grad=False)
+        self.mc_baro_is_initialized = 0
+        self.bd_baro_is_initialized = 0
+
+        if self.mode == 2 and self.control.Command_Set["barostat"] == "monte_carlo":
+            self.mc_baro = MC_BARO(self.control, self.md_info.atom_numbers, self.md_info.sys.target_pressure,
+                                   self.md_info.sys.box_length, self.md_info.res.is_initialized, self.md_info.mode)
+            self.mc_baro_is_initialized = self.mc_baro.is_initialized
+            self.update_interval = self.mc_baro.update_interval
+            self.mc_baro_energy_old = Parameter(Tensor(0, mstype.float32), requires_grad=False)
+            self.potential = Parameter(Tensor(0, mstype.float32), requires_grad=False)
+            self.frc_backup = Parameter(Tensor(np.zeros([self.atom_numbers, 3]), mstype.float32), requires_grad=False)
+            self.crd_backup = Parameter(Tensor(np.zeros([self.atom_numbers, 3]), mstype.float32), requires_grad=False)
+            self.crd_scale_factor = Parameter(Tensor(0.0, mstype.float32), requires_grad=False)
+            self.system_reinitializing_count = Parameter(Tensor(0, mstype.int32), requires_grad=False)
+            self.mc_baro_energy_new = Parameter(Tensor(0.0, mstype.float32), requires_grad=False)
+            self.scale_coordinate_by_residue = Parameter(Tensor(0, mstype.float32), requires_grad=False)
+            self.extra_term = Parameter(Tensor(0, mstype.float32), requires_grad=False)
+            self.DeltaV = Parameter(Tensor(0.0, mstype.float32), requires_grad=False)
+            self.target_temperature = self.md_info.sys.target_temperature
+            self.VDevided = Parameter(Tensor(0.0, mstype.float32), requires_grad=False)
+            self.log = P.Log()
+            self.mc_baro_accept_possibility = Parameter(Tensor(0, mstype.float32), requires_grad=False)
+            self.exp = P.Exp()
+            self.mc_baro_newV = self.mc_baro.newV
+            self.mc_baro_V0 = Parameter(Tensor(self.mc_baro.V0, mstype.float32), requires_grad=False)
+            self.mc_baro_newV = self.mc_baro.newV
+            self.check_interval = self.mc_baro.check_interval
+
+        if self.mode == 2 and self.control.Command_Set["barostat"] == "berendsen":
+            self.bd_baro = BD_BARO(self.control, self.md_info.sys.target_pressure, self.md_info.sys.box_length,
+                                   self.md_info.mode)
+            self.bd_baro_is_initialized = self.bd_baro.is_initialized
+            self.update_interval = self.bd_baro.update_interval
+            self.pressure = Parameter(Tensor(self.md_info.sys.d_pressure, mstype.float32), requires_grad=False)
+            self.compressibility = self.bd_baro.compressibility
+            self.bd_baro_dt = self.bd_baro.dt
+            self.bd_baro_taup = self.bd_baro.taup
+            self.system_reinitializing_count = Parameter(Tensor(0, mstype.int32), requires_grad=False)
+            self.bd_baro_newV = Parameter(Tensor(self.bd_baro.newV, mstype.float32), requires_grad=False)
+            self.bd_baro_V0 = Parameter(Tensor(self.bd_baro.V0, mstype.float32), requires_grad=False)
+
+    def init_params(self):
+        """init_params"""
         self.bond_energy_sum = Tensor(0, mstype.int32)
         self.angle_energy_sum = Tensor(0, mstype.int32)
         self.dihedral_energy_sum = Tensor(0, mstype.int32)
@@ -101,7 +195,8 @@ class Simulation(nn.Cell):
         self.grid_numbers = self.nb_info.grid_numbers
         self.max_atom_in_grid_numbers = self.nb_info.max_atom_in_grid_numbers
         self.max_neighbor_numbers = self.nb_info.max_neighbor_numbers
-        self.excluded_atom_numbers = self.nb_info.excluded_atom_numbers
+        # self.excluded_atom_numbers = self.nb_info.excluded_atom_numbers
+        self.excluded_atom_numbers = self.md_info.nb.excluded_atom_numbers
         self.refresh_count = Parameter(Tensor(self.nb_info.refresh_count, mstype.int32), requires_grad=False)
         self.refresh_interval = self.nb_info.refresh_interval
         self.skin = self.nb_info.skin
@@ -115,24 +210,39 @@ class Simulation(nn.Cell):
         self.fftx = self.pme_method.fftx
         self.ffty = self.pme_method.ffty
         self.fftz = self.pme_method.fftz
-        self.random_seed = self.liujian_info.rand_seed
+        self.random_seed = self.liujian_info.random_seed
         self.dt = self.liujian_info.dt
         self.half_dt = self.liujian_info.half_dt
         self.exp_gamma = self.liujian_info.exp_gamma
-        self.init_Tensor()
-        self.op_define()
         self.update = False
         self.file = None
         self.datfile = None
+        self.max_velocity = self.liujian_info.max_velocity
+
+        # bingshui
+        self.CONSTANT_kB = 0.00198716
 
     def init_Tensor(self):
         '''init tensor'''
+        # MD_Reset_Atom_Energy_And_Virial
+        self.uint_crd = Parameter(Tensor(np.zeros([self.atom_numbers, 3], dtype=np.uint32), mstype.uint32),
+                                  requires_grad=False)
+        self.need_potential = Tensor(0, mstype.int32)
+        self.need_pressure = Tensor(0, mstype.int32)
+        # self.potential = Tensor(0, mstype.float32)
+        self.atom_energy = Parameter(Tensor([0] * self.atom_numbers, mstype.float32), requires_grad=False)
+        self.atom_virial = Parameter(Tensor([0] * self.atom_numbers, mstype.float32), requires_grad=False)
+        self.frc = Parameter(Tensor(np.zeros([self.atom_numbers, 3]), mstype.float32), requires_grad=False)
+
         self.crd = Parameter(
-            Tensor(np.float32(np.asarray(self.md_info.coordinate).reshape([self.atom_numbers, 3])), mstype.float32),
+            Tensor(np.array(self.md_info.coordinate).reshape([self.atom_numbers, 3]), mstype.float32),
             requires_grad=False)
-        self.crd_to_uint_crd_cof = Tensor(np.asarray(self.md_info.crd_to_uint_crd_cof, np.float32), mstype.float32)
-        self.uint_dr_to_dr_cof = Parameter(
-            Tensor(np.asarray(self.md_info.uint_dr_to_dr_cof, np.float32), mstype.float32), requires_grad=False)
+        self.crd_to_uint_crd_cof = Tensor(np.asarray(self.md_info.pbc.crd_to_uint_crd_cof, np.float32), mstype.float32)
+        self.quarter_crd_to_uint_crd_cof = Tensor(np.asarray(self.md_info.pbc.quarter_crd_to_uint_crd_cof, np.float32),
+                                                  mstype.float32)
+
+        self.uint_dr_to_dr_cof = Parameter(Tensor(self.md_info.pbc.uint_dr_to_dr_cof, mstype.float32),
+                                           requires_grad=False)
         self.box_length = Tensor(self.md_info.box_length, mstype.float32)
         self.charge = Parameter(Tensor(np.asarray(self.md_info.h_charge, dtype=np.float32), mstype.float32),
                                 requires_grad=False)
@@ -140,12 +250,13 @@ class Simulation(nn.Cell):
                                  requires_grad=False)
         self.last_crd = Parameter(Tensor(np.zeros([self.atom_numbers, 3], dtype=np.float32), mstype.float32),
                                   requires_grad=False)
-        self.uint_crd = Parameter(Tensor(np.zeros([self.atom_numbers, 3], dtype=np.uint32), mstype.uint32),
-                                  requires_grad=False)
+        self.mass = Tensor(self.md_info.h_mass, mstype.float32)
         self.mass_inverse = Tensor(self.md_info.h_mass_inverse, mstype.float32)
+        self.res_mass = Tensor(self.md_info.res.h_mass, mstype.float32)
+        self.res_mass_inverse = Tensor(self.md_info.res.h_mass_inverse, mstype.float32)
+
         self.res_start = Tensor(self.md_info.h_res_start, mstype.int32)
         self.res_end = Tensor(self.md_info.h_res_end, mstype.int32)
-        self.mass = Tensor(self.md_info.h_mass, mstype.float32)
         self.velocity = Parameter(Tensor(self.md_info.velocity, mstype.float32), requires_grad=False)
         self.acc = Parameter(Tensor(np.zeros([self.atom_numbers, 3], np.float32), mstype.float32), requires_grad=False)
         self.bond_atom_a = Tensor(np.asarray(self.bond.h_atom_a, np.int32), mstype.int32)
@@ -161,17 +272,19 @@ class Simulation(nn.Cell):
         self.dihedral_atom_b = Tensor(np.asarray(self.dihedral.h_atom_b, np.int32), mstype.int32)
         self.dihedral_atom_c = Tensor(np.asarray(self.dihedral.h_atom_c, np.int32), mstype.int32)
         self.dihedral_atom_d = Tensor(np.asarray(self.dihedral.h_atom_d, np.int32), mstype.int32)
-        self.pk = Tensor(np.asarray(self.dihedral.pk, np.float32), mstype.float32)
-        self.gamc = Tensor(np.asarray(self.dihedral.gamc, np.float32), mstype.float32)
-        self.gams = Tensor(np.asarray(self.dihedral.gams, np.float32), mstype.float32)
-        self.pn = Tensor(np.asarray(self.dihedral.pn, np.float32), mstype.float32)
-        self.ipn = Tensor(np.asarray(self.dihedral.ipn, np.int32), mstype.int32)
+        self.pk = Tensor(np.asarray(self.dihedral.h_pk, np.float32), mstype.float32)
+        self.gamc = Tensor(np.asarray(self.dihedral.h_gamc, np.float32), mstype.float32)
+        self.gams = Tensor(np.asarray(self.dihedral.h_gams, np.float32), mstype.float32)
+        self.pn = Tensor(np.asarray(self.dihedral.h_pn, np.float32), mstype.float32)
+        self.ipn = Tensor(np.asarray(self.dihedral.h_ipn, np.int32), mstype.int32)
         self.nb14_atom_a = Tensor(np.asarray(self.nb14.h_atom_a, np.int32), mstype.int32)
         self.nb14_atom_b = Tensor(np.asarray(self.nb14.h_atom_b, np.int32), mstype.int32)
         self.lj_scale_factor = Tensor(np.asarray(self.nb14.h_lj_scale_factor, np.float32), mstype.float32)
         self.cf_scale_factor = Tensor(np.asarray(self.nb14.h_cf_scale_factor, np.float32), mstype.float32)
         self.grid_N = Tensor(self.nb_info.grid_N, mstype.int32)
-        self.grid_length_inverse = Tensor(self.nb_info.grid_length_inverse, mstype.float32)
+        self.grid_length = Parameter(Tensor(self.nb_info.grid_length, mstype.float32), requires_grad=False)
+        self.grid_length_inverse = Parameter(Tensor(self.nb_info.grid_length_inverse, mstype.float32),
+                                             requires_grad=False)
         self.bucket = Parameter(Tensor(
             np.asarray(self.nb_info.bucket, np.int32).reshape([self.grid_numbers, self.max_atom_in_grid_numbers]),
             mstype.int32), requires_grad=False)
@@ -187,24 +300,29 @@ class Simulation(nn.Cell):
         self.nl_atom_serial = Parameter(
             Tensor(np.zeros([self.atom_numbers, self.max_neighbor_numbers], np.int32), mstype.int32),
             requires_grad=False)
-        self.excluded_list_start = Tensor(np.asarray(self.nb_info.excluded_list_start, np.int32), mstype.int32)
-        self.excluded_list = Tensor(np.asarray(self.nb_info.excluded_list, np.int32), mstype.int32)
-        self.excluded_numbers = Tensor(np.asarray(self.nb_info.excluded_numbers, np.int32), mstype.int32)
+        self.excluded_list_start = Tensor(np.asarray(self.md_info.nb.h_excluded_list_start, np.int32), mstype.int32)
+        self.excluded_list = Tensor(np.asarray(self.md_info.nb.h_excluded_list, np.int32), mstype.int32)
+        self.excluded_numbers = Tensor(np.asarray(self.md_info.nb.h_excluded_numbers, np.int32), mstype.int32)
+
         self.need_refresh_flag = Tensor(np.asarray([0], np.int32), mstype.int32)
-        self.atom_LJ_type = Tensor(np.asarray(self.LJ_info.atom_LJ_type, dtype=np.int32), mstype.int32)
-        self.LJ_A = Tensor(np.asarray(self.LJ_info.LJ_A, dtype=np.float32), mstype.float32)
-        self.LJ_B = Tensor(np.asarray(self.LJ_info.LJ_B, dtype=np.float32), mstype.float32)
+        self.atom_LJ_type = Tensor(self.LJ_info.atom_LJ_type, mstype.int32)
+        self.LJ_A = Tensor(self.LJ_info.h_LJ_A, mstype.float32)
+        self.LJ_B = Tensor(self.LJ_info.h_LJ_B, mstype.float32)
         self.sqrt_mass = Tensor(self.liujian_info.h_sqrt_mass, mstype.float32)
         self.rand_state = Parameter(Tensor(self.liujian_info.rand_state, mstype.float32))
         self.zero_fp_tensor = Tensor(np.asarray([0,], np.float32))
+        self.zero_frc = Parameter(Tensor(np.zeros([self.atom_numbers, 3], dtype=np.float32), mstype.float32),
+                                  requires_grad=False)
 
     def op_define(self):
         '''op define'''
         self.crd_to_uint_crd = P.CrdToUintCrd(self.atom_numbers)
+        self.crd_to_uint_crd_quarter = P.CrdToUintCrdQuarter(self.atom_numbers)
         self.mdtemp = P.MDTemperature(self.residue_numbers, self.atom_numbers)
         self.setup_random_state = P.MDIterationSetupRandState(self.atom_numbers, self.random_seed)
-        self.bond_force_with_atom_energy = P.BondForceWithAtomEnergy(bond_numbers=self.bond_numbers,
-                                                                     atom_numbers=self.atom_numbers)
+
+        self.bond_force_with_atom_energy_virial = P.BondForceWithAtomEnergyAndVirial(bond_numbers=self.bond_numbers,
+                                                                                     atom_numbers=self.atom_numbers)
         self.angle_force_with_atom_energy = P.AngleForceWithAtomEnergy(angle_numbers=self.angle_numbers)
         self.dihedral_force_with_atom_energy = P.DihedralForceWithAtomEnergy(dihedral_numbers=self.dihedral_numbers)
         self.nb14_force_with_atom_energy = P.Dihedral14LJCFForceWithAtomEnergy(nb14_numbers=self.nb14_numbers,
@@ -215,7 +333,6 @@ class Simulation(nn.Cell):
         self.pme_reciprocal_force = P.PMEReciprocalForce(self.atom_numbers, self.beta, self.fftx, self.ffty, self.fftz,
                                                          self.md_info.box_length[0], self.md_info.box_length[1],
                                                          self.md_info.box_length[2])
-
         self.bond_energy = P.BondEnergy(self.bond_numbers, self.atom_numbers)
         self.angle_energy = P.AngleEnergy(self.angle_numbers)
         self.dihedral_energy = P.DihedralEnergy(self.dihedral_numbers)
@@ -225,77 +342,204 @@ class Simulation(nn.Cell):
         self.pme_energy = P.PMEEnergy(self.atom_numbers, self.excluded_atom_numbers, self.beta, self.fftx, self.ffty,
                                       self.fftz, self.md_info.box_length[0], self.md_info.box_length[1],
                                       self.md_info.box_length[2])
-
         self.md_iteration_leap_frog_liujian = P.MDIterationLeapFrogLiujian(self.atom_numbers, self.half_dt, self.dt,
                                                                            self.exp_gamma)
 
-        self.neighbor_list_update_init = P.NeighborListUpdate(grid_numbers=self.grid_numbers,
-                                                              atom_numbers=self.atom_numbers, not_first_time=0,
-                                                              nxy=self.nxy,
-                                                              excluded_atom_numbers=self.excluded_atom_numbers,
-                                                              cutoff_square=self.cutoff_square,
-                                                              half_skin_square=self.half_skin_square,
-                                                              cutoff_with_skin=self.cutoff_with_skin,
-                                                              half_cutoff_with_skin=self.half_cutoff_with_skin,
-                                                              cutoff_with_skin_square=self.cutoff_with_skin_square,
-                                                              refresh_interval=self.refresh_interval,
-                                                              cutoff=self.cutoff, skin=self.skin,
-                                                              max_atom_in_grid_numbers=self.max_atom_in_grid_numbers,
-                                                              max_neighbor_numbers=self.max_neighbor_numbers)
+        self.md_iteration_leap_frog_liujian_with_max_vel = P.MDIterationLeapFrogLiujianWithMaxVel(self.atom_numbers,
+                                                                                                  self.half_dt, self.dt,
+                                                                                                  self.exp_gamma,
+                                                                                                  self.max_velocity)
+        self.neighbor_list_update = \
+            P.NeighborListUpdate(grid_numbers=self.grid_numbers,
+                                 atom_numbers=self.atom_numbers,
+                                 not_first_time=1, nxy=self.nxy,
+                                 excluded_atom_numbers=self.excluded_atom_numbers,
+                                 cutoff_square=self.cutoff_square,
+                                 half_skin_square=self.half_skin_square,
+                                 cutoff_with_skin=self.cutoff_with_skin,
+                                 half_cutoff_with_skin=self.half_cutoff_with_skin,
+                                 cutoff_with_skin_square=self.cutoff_with_skin_square,
+                                 refresh_interval=self.refresh_interval, cutoff=self.cutoff,
+                                 skin=self.skin,
+                                 max_atom_in_grid_numbers=self.max_atom_in_grid_numbers,
+                                 max_neighbor_numbers=self.max_neighbor_numbers)
+
+        self.neighbor_list_update_forced_update = \
+            P.NeighborListUpdate(grid_numbers=self.grid_numbers,
+                                 atom_numbers=self.atom_numbers,
+                                 not_first_time=1, nxy=self.nxy,
+                                 excluded_atom_numbers=self.excluded_atom_numbers,
+                                 cutoff_square=self.cutoff_square,
+                                 half_skin_square=self.half_skin_square,
+                                 cutoff_with_skin=self.cutoff_with_skin,
+                                 half_cutoff_with_skin=self.half_cutoff_with_skin,
+                                 cutoff_with_skin_square=self.cutoff_with_skin_square,
+                                 refresh_interval=self.refresh_interval,
+                                 cutoff=self.cutoff,
+                                 skin=self.skin,
+                                 max_atom_in_grid_numbers=self.max_atom_in_grid_numbers,
+                                 max_neighbor_numbers=self.max_neighbor_numbers,
+                                 forced_update=1)
+
+        self.neighbor_list_update_nb = \
+            P.NeighborListUpdate(grid_numbers=self.grid_numbers,
+                                 atom_numbers=self.atom_numbers,
+                                 not_first_time=1, nxy=self.nxy,
+                                 excluded_atom_numbers=self.excluded_atom_numbers,
+                                 cutoff_square=self.cutoff_square,
+                                 half_skin_square=self.half_skin_square,
+                                 cutoff_with_skin=self.cutoff_with_skin,
+                                 half_cutoff_with_skin=self.half_cutoff_with_skin,
+                                 cutoff_with_skin_square=self.cutoff_with_skin_square,
+                                 refresh_interval=self.refresh_interval,
+                                 cutoff=self.cutoff,
+                                 skin=self.skin,
+                                 max_atom_in_grid_numbers=self.max_atom_in_grid_numbers,
+                                 max_neighbor_numbers=self.max_neighbor_numbers,
+                                 forced_update=1, forced_check=1)
+
+    def op_define_2(self):
+        """op_define_2"""
+        self.neighbor_list_update_mc = P.NeighborListUpdate(grid_numbers=self.grid_numbers,
+                                                            atom_numbers=self.atom_numbers,
+                                                            not_first_time=1, nxy=self.nxy,
+                                                            excluded_atom_numbers=self.excluded_atom_numbers,
+                                                            cutoff_square=self.cutoff_square,
+                                                            half_skin_square=self.half_skin_square,
+                                                            cutoff_with_skin=self.cutoff_with_skin,
+                                                            half_cutoff_with_skin=self.half_cutoff_with_skin,
+                                                            cutoff_with_skin_square=self.cutoff_with_skin_square,
+                                                            refresh_interval=self.refresh_interval,
+                                                            cutoff=self.cutoff,
+                                                            skin=self.skin,
+                                                            max_atom_in_grid_numbers=self.max_atom_in_grid_numbers,
+                                                            max_neighbor_numbers=self.max_neighbor_numbers,
+                                                            forced_update=0, forced_check=1)
 
-        self.neighbor_list_update = P.NeighborListUpdate(grid_numbers=self.grid_numbers, atom_numbers=self.atom_numbers,
-                                                         not_first_time=1, nxy=self.nxy,
-                                                         excluded_atom_numbers=self.excluded_atom_numbers,
-                                                         cutoff_square=self.cutoff_square,
-                                                         half_skin_square=self.half_skin_square,
-                                                         cutoff_with_skin=self.cutoff_with_skin,
-                                                         half_cutoff_with_skin=self.half_cutoff_with_skin,
-                                                         cutoff_with_skin_square=self.cutoff_with_skin_square,
-                                                         refresh_interval=self.refresh_interval, cutoff=self.cutoff,
-                                                         skin=self.skin,
-                                                         max_atom_in_grid_numbers=self.max_atom_in_grid_numbers,
-                                                         max_neighbor_numbers=self.max_neighbor_numbers)
         self.random_force = Tensor(np.zeros([self.atom_numbers, 3], np.float32), mstype.float32)
 
+        # simple_constrain
+        self.constrain_pair_numbers = self.simple_constrain.constrain_pair_numbers
+        self.last_pair_dr = Parameter(Tensor(np.zeros([self.constrain_pair_numbers, 3], np.float32), mstype.float32),
+                                      requires_grad=False)
+        if self.simple_constrain_is_initialized:
+            self.constrain_pair_numbers = self.simple_constrain.constrain_pair_numbers
+            self.last_crd_to_dr = P.lastcrdtodr(self.atom_numbers, self.constrain_pair_numbers)
+            self.constrain_pair = np.array(self.simple_constrain.h_constrain_pair)
+            self.atom_i_serials = Tensor(self.constrain_pair[:, 0], mstype.int32)
+            self.atom_j_serials = Tensor(self.constrain_pair[:, 1], mstype.int32)
+            self.constant_rs = Tensor(self.constrain_pair[:, 2], mstype.float32)
+            self.constrain_ks = Tensor(self.constrain_pair[:, 3], mstype.float32)
+            self.last_pair_dr = Parameter(
+                Tensor(np.zeros([self.constrain_pair_numbers, 3], np.float32), mstype.float32), requires_grad=False)
+            self.constrain_frc = Parameter(Tensor(np.zeros([self.atom_numbers, 3], np.float32), mstype.float32),
+                                           requires_grad=False)
+            self.iteration_numbers = self.simple_constrain.info.iteration_numbers
+            self.half_exp_gamma_plus_half = self.simple_constrain.half_exp_gamma_plus_half
+            self.refresh_uint_crd = P.refreshuintcrd(self.atom_numbers, self.half_exp_gamma_plus_half)
+            self.need_pressure = 0
+            self.constrain_force_cycle_with_virial = P.constrainforcecyclewithvirial(self.atom_numbers,
+                                                                                     self.constrain_pair_numbers)
+            self.constrain_force_cycle = P.ConstrainForceCycle(self.atom_numbers, self.constrain_pair_numbers)
+            self.dt_inverse = self.simple_constrain.dt_inverse
+            self.refresh_crd_vel = P.refreshcrdvel(self.atom_numbers, self.dt_inverse, self.dt, self.exp_gamma,
+                                                   self.half_exp_gamma_plus_half)
+
+        if self.mol_map_is_initialized:
+            self.refresh_boxmaptimes = P.refreshboxmaptimes(self.atom_numbers)
+            self.box_map_times = Parameter(Tensor(self.mol_map.h_box_map_times, mstype.int32), requires_grad=False)
+        self.residue_numbers = self.md_info.residue_numbers
+        self.getcenterofmass = P.GetCenterOfMass(self.residue_numbers)
+        self.mapcenterofmass = P.MapCenterOfMass(self.residue_numbers, scaler=1.0)
+
+        self.md_iteration_leap_frog = P.MDIterationLeapFrog(self.atom_numbers, self.dt)
+        self.md_iteration_leap_frog_with_max_vel = P.MDIterationLeapFrogWithMaxVel(self.atom_numbers, self.dt,
+                                                                                   self.max_velocity)
+        self.md_information_gradient_descent = P.MDIterationGradientDescent(self.atom_numbers, self.dt * self.dt)
+
     def Simulation_Beforce_Caculate_Force(self):
         '''simulation before calculate force'''
-        crd_to_uint_crd_cof = 0.5 * self.crd_to_uint_crd_cof
-        uint_crd = self.crd_to_uint_crd(crd_to_uint_crd_cof, self.crd)
-        return uint_crd
+        self.uint_crd = self.crd_to_uint_crd_quarter(self.quarter_crd_to_uint_crd_cof, self.crd)
+        return self.uint_crd
 
     def Simulation_Caculate_Force(self, uint_crd, scaler, nl_atom_numbers, nl_atom_serial):
         '''simulation calculate force'''
-        bond_force, _ = self.bond_force_with_atom_energy(uint_crd, scaler, self.bond_atom_a,
-                                                         self.bond_atom_b, self.bond_k, self.bond_r0)
+        uint_crd = self.Simulation_Beforce_Caculate_Force()
+        force = self.zero_frc
+        if self.LJ_info_is_initialized:
+            lj_force = self.lj_force_pme_direct_force(uint_crd, self.atom_LJ_type, self.charge, scaler, nl_atom_numbers,
+                                                      nl_atom_serial, self.LJ_A, self.LJ_B)
+            force = force + lj_force
 
-        angle_force, _ = self.angle_force_with_atom_energy(uint_crd, scaler, self.angle_atom_a,
-                                                           self.angle_atom_b, self.angle_atom_c,
-                                                           self.angle_k, self.angle_theta0)
+        if self.pme_is_initialized:
+            pme_excluded_force = self.pme_excluded_force(uint_crd, scaler, self.charge, self.excluded_list_start,
+                                                         self.excluded_list, self.excluded_numbers)
 
-        dihedral_force, _ = self.dihedral_force_with_atom_energy(uint_crd, scaler,
-                                                                 self.dihedral_atom_a,
-                                                                 self.dihedral_atom_b,
-                                                                 self.dihedral_atom_c,
-                                                                 self.dihedral_atom_d, self.ipn,
-                                                                 self.pk, self.gamc, self.gams,
-                                                                 self.pn)
+            pme_reciprocal_force = self.pme_reciprocal_force(uint_crd, self.charge)
+            force = force + pme_excluded_force + pme_reciprocal_force
+        if self.nb14_is_initialized:
+            nb14_force, _ = self.nb14_force_with_atom_energy(uint_crd, self.atom_LJ_type, self.charge,
+                                                             scaler, self.nb14_atom_a, self.nb14_atom_b,
+                                                             self.lj_scale_factor, self.cf_scale_factor,
+                                                             self.LJ_A, self.LJ_B)
+            force = force + nb14_force
 
-        nb14_force, _ = self.nb14_force_with_atom_energy(uint_crd, self.atom_LJ_type, self.charge,
-                                                         scaler, self.nb14_atom_a, self.nb14_atom_b,
-                                                         self.lj_scale_factor, self.cf_scale_factor,
-                                                         self.LJ_A, self.LJ_B)
+        if self.bond_is_initialized:
+            bond_force, _, _ = self.bond_force_with_atom_energy_virial(uint_crd, scaler, self.bond_atom_a,
+                                                                       self.bond_atom_b, self.bond_k, self.bond_r0)
+            force = force + bond_force
+        if self.angle_is_initialized:
+            angle_force, _ = self.angle_force_with_atom_energy(uint_crd, scaler, self.angle_atom_a,
+                                                               self.angle_atom_b, self.angle_atom_c,
+                                                               self.angle_k, self.angle_theta0)
+            force = force + angle_force
+        if self.dihedral_is_initialized:
+            dihedral_force, _ = self.dihedral_force_with_atom_energy(uint_crd, scaler,
+                                                                     self.dihedral_atom_a,
+                                                                     self.dihedral_atom_b,
+                                                                     self.dihedral_atom_c,
+                                                                     self.dihedral_atom_d, self.ipn,
+                                                                     self.pk, self.gamc, self.gams,
+                                                                     self.pn)
+            force = force + dihedral_force
+
+        if self.restrain_is_initialized:
+            _, _, restrain_frc = self.restrain_force_with_atom_energy_and_virial(self.restrain_list,
+                                                                                 self.crd,
+                                                                                 self.crd_ref,
+                                                                                 self.box_length)
+            force = force + restrain_frc
 
-        lj_force = self.lj_force_pme_direct_force(uint_crd, self.atom_LJ_type, self.charge, scaler, nl_atom_numbers,
-                                                  nl_atom_serial, self.LJ_A, self.LJ_B)
-        pme_excluded_force = self.pme_excluded_force(uint_crd, scaler, self.charge, self.excluded_list_start,
-                                                     self.excluded_list, self.excluded_numbers)
-        pme_reciprocal_force = self.pme_reciprocal_force(uint_crd, self.charge)
-        force = P.AddN()(
-            [bond_force, angle_force, dihedral_force, nb14_force, lj_force, pme_excluded_force, pme_reciprocal_force])
         return force
 
     def Simulation_Caculate_Energy(self, uint_crd, uint_dr_to_dr_cof):
         '''simulation calculate energy'''
+
+        lj_energy = self.lj_energy(uint_crd, self.atom_LJ_type, self.charge, uint_dr_to_dr_cof, self.nl_atom_numbers,
+                                   self.nl_atom_serial, self.LJ_A, self.LJ_B)
+
+        lj_energy_sum = P.ReduceSum(True)(lj_energy)
+        # lj_energy_sum = self.zero_fp_tensor
+
+        reciprocal_energy, self_energy, direct_energy, correction_energy = self.pme_energy(uint_crd, self.charge,
+                                                                                           self.nl_atom_numbers,
+                                                                                           self.nl_atom_serial,
+                                                                                           uint_dr_to_dr_cof,
+                                                                                           self.excluded_list_start,
+                                                                                           self.excluded_list,
+                                                                                           self.excluded_numbers)
+        ee_ene = reciprocal_energy + self_energy + direct_energy + correction_energy
+        # ee_ene = self.zero_fp_tensor
+
+        nb14_lj_energy = self.nb14_lj_energy(uint_crd, self.atom_LJ_type, self.charge, uint_dr_to_dr_cof,
+                                             self.nb14_atom_a, self.nb14_atom_b, self.lj_scale_factor, self.LJ_A,
+                                             self.LJ_B)
+        nb14_cf_energy = self.nb14_cf_energy(uint_crd, self.atom_LJ_type, self.charge, uint_dr_to_dr_cof,
+                                             self.nb14_atom_a, self.nb14_atom_b, self.cf_scale_factor)
+        nb14_lj_energy_sum = P.ReduceSum(True)(nb14_lj_energy)
+        nb14_cf_energy_sum = P.ReduceSum(True)(nb14_cf_energy)
+        # nb14_lj_energy_sum = self.zero_fp_tensor
+        # nb14_cf_energy_sum = self.zero_fp_tensor
         bond_energy = self.bond_energy(uint_crd, uint_dr_to_dr_cof, self.bond_atom_a, self.bond_atom_b, self.bond_k,
                                        self.bond_r0)
         bond_energy_sum = P.ReduceSum(True)(bond_energy)
@@ -309,26 +553,6 @@ class Simulation(nn.Cell):
                                                self.gams, self.pn)
         dihedral_energy_sum = P.ReduceSum(True)(dihedral_energy)
 
-        nb14_lj_energy = self.nb14_lj_energy(uint_crd, self.atom_LJ_type, self.charge, uint_dr_to_dr_cof,
-                                             self.nb14_atom_a, self.nb14_atom_b, self.lj_scale_factor, self.LJ_A,
-                                             self.LJ_B)
-        nb14_cf_energy = self.nb14_cf_energy(uint_crd, self.atom_LJ_type, self.charge, uint_dr_to_dr_cof,
-                                             self.nb14_atom_a, self.nb14_atom_b, self.cf_scale_factor)
-        nb14_lj_energy_sum = P.ReduceSum(True)(nb14_lj_energy)
-        nb14_cf_energy_sum = P.ReduceSum(True)(nb14_cf_energy)
-
-        lj_energy = self.lj_energy(uint_crd, self.atom_LJ_type, self.charge, uint_dr_to_dr_cof, self.nl_atom_numbers,
-                                   self.nl_atom_serial, self.LJ_A, self.LJ_B)
-        lj_energy_sum = P.ReduceSum(True)(lj_energy)
-
-        reciprocal_energy, self_energy, direct_energy, correction_energy = self.pme_energy(uint_crd, self.charge,
-                                                                                           self.nl_atom_numbers,
-                                                                                           self.nl_atom_serial,
-                                                                                           uint_dr_to_dr_cof,
-                                                                                           self.excluded_list_start,
-                                                                                           self.excluded_list,
-                                                                                           self.excluded_numbers)
-        ee_ene = reciprocal_energy + self_energy + direct_energy + correction_energy
         total_energy = P.AddN()(
             [bond_energy_sum, angle_energy_sum, dihedral_energy_sum, nb14_lj_energy_sum, nb14_cf_energy_sum,
              lj_energy_sum, ee_ene])
@@ -336,19 +560,43 @@ class Simulation(nn.Cell):
                lj_energy_sum, ee_ene, total_energy
 
     def Simulation_Temperature(self):
-        '''caculate temperature'''
+        """calculate temperature"""
         res_ek_energy = self.mdtemp(self.res_start, self.res_end, self.velocity, self.mass)
         temperature = P.ReduceSum()(res_ek_energy)
         return temperature
 
     def Simulation_MDIterationLeapFrog_Liujian(self, inverse_mass, sqrt_mass_inverse, crd, frc, rand_state, random_frc):
         '''simulation leap frog iteration liujian'''
-        crd = self.md_iteration_leap_frog_liujian(inverse_mass, sqrt_mass_inverse, self.velocity, crd, frc, self.acc,
-                                                  rand_state, random_frc)
+        if self.max_velocity <= 0:
+            crd = self.md_iteration_leap_frog_liujian(inverse_mass, sqrt_mass_inverse, self.velocity, crd, frc,
+                                                      self.acc,
+                                                      rand_state, random_frc)
+        else:
+            crd = self.md_iteration_leap_frog_liujian_with_max_vel(inverse_mass, sqrt_mass_inverse, self.velocity, crd,
+                                                                   frc, self.acc,
+                                                                   rand_state, random_frc)
         vel = F.depend(self.velocity, crd)
         acc = F.depend(self.acc, crd)
         return vel, crd, acc
 
+    def Simulation_MDIterationLeapFrog(self, force):
+        '''simulation leap frog'''
+        if self.max_velocity <= 0:
+            res = self.md_iteration_leap_frog(self.velocity, self.crd, force, self.acc, self.mass_inverse)
+        else:
+            res = self.md_iteration_leap_frog_with_max_vel(self.velocity, self.crd, force, self.acc, self.mass_inverse)
+        vel = F.depend(self.velocity, res)
+        crd = F.depend(self.crd, res)
+        return vel, crd, res
+
+    def Simulation_MDInformationGradientDescent(self, force):
+        # print("Simulation_MDInformationGradientDescent")
+        res = self.md_information_gradient_descent(self.crd, force)
+        self.velocity = self.zero_frc
+        vel = F.depend(self.velocity, res)
+        crd = F.depend(self.crd, res)
+        return vel, crd, res
+
     def Main_Print(self, *args):
         """compute the temperature"""
         steps, temperature, total_potential_energy, sigma_of_bond_ene, sigma_of_angle_ene, sigma_of_dihedral_ene, \
@@ -359,7 +607,7 @@ class Simulation(nn.Cell):
 
         temperature = temperature.asnumpy()
         total_potential_energy = total_potential_energy.asnumpy()
-        print("{:>7.0f} {:>7.3f} {:>11.3f}".format(steps, float(temperature), float(total_potential_energy)),
+        print("{:>7.0f} {:>7.3f} {:>11.3f}".format(steps + 1, float(temperature), float(total_potential_energy)),
               end=" ")
         if self.bond.bond_numbers > 0:
             sigma_of_bond_ene = sigma_of_bond_ene.asnumpy()
@@ -405,34 +653,304 @@ class Simulation(nn.Cell):
             self.datfile.close()
             print("Save .dat file successfully!")
 
+    # 控压部分代码
+    def Volume_Change_Attempt(self, boxlength, DeltaV_max):
+        """Volume_Change_Attempt"""
+        nrand = self.random((1, 1))
+        DeltaV = nrand * DeltaV_max
+        V = boxlength[0] * boxlength[1] * boxlength[2]
+        # crd_scale_factor = Tensor(np.crbt((V + DeltaV) / V), mstype.float32)
+        crd_scale_factor = self.pow((V + DeltaV) / V, -3)
+        return crd_scale_factor
+
+    def Update_Volume(self, factor):
+        """Update_Volume"""
+        self.CONSTANT_UINT_MAX_FLOAT = 4294967296.0
+        # f_inv = 1.0 / factor
+        self.box_length = factor * self.box_length
+        self.crd_to_uint_crd_cof = self.CONSTANT_UINT_MAX_FLOAT / self.box_length
+        self.quarter_crd_to_uint_crd_cof = 0.25 * self.crd_to_uint_crd_cof
+        self.uint_dr_to_dr_cof = 1.0 / self.crd_to_uint_crd_cof
+        self.uint_crd = self.crd_to_uint_crd_quarter(self.quarter_crd_to_uint_crd_cof, self.crd)
+
+    def Neighbor_List_Update_Volume(self, box_length):
+        """Neighbor_List_Update_Volume"""
+        self.quarter_crd_to_uint_crd_cof = 0.25 * self.CONSTANT_UINT_MAX_FLOAT / box_length
+        self.uint_dr_to_dr_cof = 1.0 / self.CONSTANT_UINT_MAX_FLOAT * box_length
+        self.grid_length[0] = box_length[0] / self.Nx
+        self.grid_length[1] = box_length[1] / self.Ny
+        self.grid_length[2] = box_length[1] / self.Nz
+        self.grid_length_inverse = 1.0 / self.grid_length
+
+    def LJ_Update_Volume(self):
+        """main destroy"""
+        if self.LJ_info_is_initialized:
+            # self.uint_dr_to_dr_cof = 1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length
+            self.volume = self.box_length[0] * self.box_length[1] * self.box_length[2]
+
+    def PME_Update_Volume(self, factor):
+        """PME_Update_Volume"""
+        factor_inverse = 1.0 / factor
+        self.PME_inverse_box_vector[0] = self.fftx / self.box_length[0]
+        self.PME_inverse_box_vector[1] = self.ffty / self.box_length[1]
+        self.PME_inverse_box_vector[2] = self.fftz / self.box_length[2]
+        self.PME_inverse_box_vector = factor_inverse * self.PME_inverse_box_vector
+        self.beta = self.beta * factor
+        # self.PME_BC = self.PME_BC * factor_inverse #scale list
+        self.neutralizing_factor = self.pow(factor, 5.0)
+
+    def Simple_Constrain_Update_Volume(self):
+        """Simple_Constrain_Update_Volume"""
+        if self.simple_constrain_is_initialized:
+            self.quarter_crd_to_uint_crd_cof = 0.25 * self.CONSTANT_UINT_MAX_FLOAT / self.box_length
+            self.uint_dr_to_dr_cof = 1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length
+            self.volume = self.box_length[0] * self.box_length[1] * self.box_length[2]
+
+    def Main_Volume_Change(self, factor):
+        """Main_Volume_Change"""
+        self.Update_Volume(factor)
+        self.Neighbor_List_Update_Volume(self.box_length)
+        _ = self.neighbor_list_update_nb(self.atom_numbers_in_grid_bucket, self.bucket,
+                                         self.crd, self.box_length, self.grid_N,
+                                         self.grid_length_inverse, self.atom_in_grid_serial,
+                                         self.old_crd, self.crd_to_uint_crd_cof, self.uint_crd,
+                                         self.pointer, self.nl_atom_numbers, self.nl_atom_serial,
+                                         self.uint_dr_to_dr_cof, self.excluded_list_start, self.excluded_list,
+                                         self.excluded_numbers, self.need_refresh_flag, self.refresh_count)  # Done
+        self.LJ_Update_Volume()
+        self.PME_Update_Volume(factor)
+        self.Simple_Constrain_Update_Volume()
+        # self.mol_map.Update_Volume(self.md_info.sys.box_length)
+
+    def Main_Volume_Change_Largely(self):
+        """Main_Volume_Change_Largely"""
+        # re-initialize neighbor_list and pme
+        _ = self.neighbor_list_update_forced_update(self.atom_numbers_in_grid_bucket, self.bucket,
+                                                    self.crd, self.box_length, self.grid_N,
+                                                    self.grid_length_inverse, self.atom_in_grid_serial,
+                                                    self.old_crd, self.crd_to_uint_crd_cof, self.uint_crd,
+                                                    self.pointer, self.nl_atom_numbers, self.nl_atom_serial,
+                                                    self.uint_dr_to_dr_cof, self.excluded_list_start,
+                                                    self.excluded_list,
+                                                    self.excluded_numbers, self.need_refresh_flag,
+                                                    self.refresh_count)
+
+    def Check_MC_Barostat_Accept(self):
+        """Check_MC_Barostat_Accept"""
+        self.total_count = self.total_count + 1
+        rand_num = self.random((1, 1))
+        if rand_num[0] < self.mc_baro_accept_possibility:
+            self.reject = 0
+            self.accept_count += 1
+        else:
+            self.reject = 1
+        return self.reject
+
+    def Delta_V_Max_Update(self):
+        """Delta_V_Max_Update"""
+        if self.total_count % self.check_interval == 0:
+            self.accept_rate = 100.0 * self.accept_count / self.total_count
+            if self.accept_rate < self.accept_rate_low:
+                self.total_count = 0
+                self.accept_count = 0
+                self.DeltaV_max = self.DeltaV_max * 0.9
+            if self.accept_rate > self.accept_rate_high:
+                self.total_count = 0
+                self.accept_count = 0
+                self.DeltaV_max = self.DeltaV_max * 1.1
+
+    def Main_iteration_presssure(self, steps, force):
+        """Main_iteration_presssure"""
+        if self.mc_baro_is_initialized and steps % self.mc_baro.update_interval == 0:
+            # old energy
+            self.mc_baro_energy_old = self.potential
+            self.frc_backup = self.frc
+            self.crd_backup = self.crd
+            self.Volume_Change_Attempt(self.box_length, 200)
+
+            # change coordinates
+            if self.is_molecule_map_output:
+                nowrap_crd = self.Calculate_No_Wrap_Crd()
+                self.crd, _ = self.Residue_Crd_Map(nowrap_crd)
+                _ = self.refresh_boxmaptimes(self.crd, self.old_crd, 1.0 / self.box_length, self.box_map_times)
+            else:
+                self.crd = self.crd * self.crd_scale_factor  # scale list
+
+            # change volume
+            self.Main_Volume_Change(self.crd_scale_factor)
+            self.system_reinitializing_count += 1
+
+            # new energy
+            _ = self.Simulation_Caculate_Force(self.uint_crd, self.uint_dr_to_dr_cof, self.nl_atom_numbers,
+                                               self.nl_atom_serial)
+
+            self.energy_new = self.potential
+
+            # calculate accepted rate
+            if self.scale_coordinate_by_residue:
+                self.extra_term = self.target_pressure * self.DeltaV - \
+                                  self.residue_numbers * self.CONSTANT_kB * \
+                                  self.target_temperature * self.log(self.VDevided)
+            else:
+                self.extra_term = self.target_pressure * self.DeltaV - \
+                                  self.atom_numbers * self.CONSTANT_kB * \
+                                  self.target_temperature * self.log(self.VDevided)
+
+            self.mc_baro_accept_possibility = self.mc_baro_energy_new - self.mc_baro_energy_old + self.extra_term
+            self.mc_baro.mc_baro_accept_possibility = self.exp(
+                -self.mc_baro_accept_possibility / (self.CONSTANT_kB * self.target_temperature))
+
+            # check if accepted
+            if self.Check_MC_Barostat_Accept():
+                # if accept, refresh
+                self.crd_scale_factor = 1.0 / self.crd_scale_factor
+                self.crd = self.crd_backup
+                self.Main_Volume_Change(self.crd_scale_factor)
+                self.system_reinitializing_count += 1
+                _ = self.neighbor_list_update_mc(self.atom_numbers_in_grid_bucket, self.bucket,
+                                                 self.crd, self.box_length, self.grid_N,
+                                                 self.grid_length_inverse, self.atom_in_grid_serial,
+                                                 self.old_crd, self.crd_to_uint_crd_cof, self.uint_crd,
+                                                 self.pointer, self.nl_atom_numbers, self.nl_atom_serial,
+                                                 self.uint_dr_to_dr_cof, self.excluded_list_start, self.excluded_list,
+                                                 self.excluded_numbers, self.need_refresh_flag,
+                                                 self.refresh_count)
+                self.frc = force
+                self.frc = self.frc_backup
+
+            # reinitialized
+            if self.system_reinitializing_count >= 20000 or (not self.reject and (
+                    self.mc_baro_newV > 1.331 * self.mc_baro_V0 or self.mc_baro_newV < 0.729 * self.mc_baro.V0)):
+                self.Main_Volume_Change_Largely()
+                self.mc_baro_V0 = self.mc_baro_newV
+                self.system_reinitializing_count = self.zero_fp_tensor
+            self.Delta_V_Max_Update()
+
+    def Constrain(self):
+        """Constrain"""
+        constrain_frc = self.zero_frc
+        for _ in range(self.iteration_numbers):
+            test_uint_crd = self.refresh_uint_crd(self.crd, self.quarter_crd_to_uint_crd_cof, constrain_frc,
+                                                  self.mass_inverse)
+            if self.need_pressure:
+                force, _ = self.constrain_force_cycle_with_virial(test_uint_crd, self.uint_dr_to_dr_cof,
+                                                                  self.last_pair_dr, self.atom_i_serials,
+                                                                  self.atom_j_serials, self.constant_rs,
+                                                                  self.constrain_ks)
+            else:
+                force = self.constrain_force_cycle(test_uint_crd, self.uint_dr_to_dr_cof, self.last_pair_dr,
+                                                   self.atom_i_serials,
+                                                   self.atom_j_serials, self.constant_rs, self.constrain_ks)
+            constrain_frc = constrain_frc + force
+
+        res = self.refresh_crd_vel(self.crd, self.velocity, constrain_frc, self.mass_inverse)
+        crd = self.depend(self.crd, res)
+        vel = self.depend(self.velocity, res)
+
+        return crd, vel, res
+
+    def Main_Iteration(self, steps, force):
+        '''Main_Iteration'''
+        # self.Main_iteration_presssure(steps, force)
+        # Remember_Last_Coordinates
+        # pressure control 1
+        if self.simple_constrain_is_initialized:
+            self.last_pair_dr = self.last_crd_to_dr(self.crd, self.quarter_crd_to_uint_crd_cof, self.uint_dr_to_dr_cof,
+                                                    self.atom_i_serials,
+                                                    self.atom_j_serials, self.constant_rs, self.constrain_ks)
+
+        if self.mode == 0:  # NVE
+            self.velocity, self.crd, _ = self.Simulation_MDIterationLeapFrog(force)
+        elif self.mode == -1:  # Minimization
+            _ = self.Simulation_MDInformationGradientDescent(force)
+        else:
+            if self.liujian_info_is_initialized:
+                self.velocity, self.crd, _ = self.Simulation_MDIterationLeapFrog_Liujian(self.mass_inverse,
+                                                                                         self.sqrt_mass, self.crd,
+                                                                                         force,
+                                                                                         self.rand_state,
+                                                                                         self.random_force)
+
+        if self.simple_constrain_is_initialized:
+            self.crd, self.velocity, res1 = self.Constrain()
+        else:
+            res1 = self.zero_fp_tensor
+
+        # MD_Information_Crd_To_Uint_Crd
+        self.uint_crd = self.crd_to_uint_crd_quarter(self.quarter_crd_to_uint_crd_cof, self.crd)
+        res2 = self.neighbor_list_update(self.atom_numbers_in_grid_bucket,
+                                         self.bucket,
+                                         self.crd,
+                                         self.box_length,
+                                         self.grid_N,
+                                         self.grid_length_inverse,
+                                         self.atom_in_grid_serial,
+                                         self.old_crd,
+                                         self.crd_to_uint_crd_cof,
+                                         self.uint_crd,
+                                         self.pointer,
+                                         self.nl_atom_numbers,
+                                         self.nl_atom_serial,
+                                         self.uint_dr_to_dr_cof,
+                                         self.excluded_list_start,
+                                         self.excluded_list,
+                                         self.excluded_numbers,
+                                         self.need_refresh_flag,
+                                         self.refresh_count)
+
+        res3 = self.refresh_boxmaptimes(self.crd, self.old_crd, 1.0 / self.box_length, self.box_map_times)
+
+        return self.velocity, self.crd, res1, res2, res3
+
+    def Calculate_No_Wrap_Crd(self):
+        """Calculate_No_Wrap_Crd"""
+        nowrap_crd = self.box_map_times * self.box_length + self.crd
+        return nowrap_crd
+
+    def Residue_Crd_Map(self, nowrap_crd):
+        """Residue_Crd_Map"""
+        center_of_mass = self.getcenterofmass(self.res_start, self.res_end, nowrap_crd, self.mass,
+                                              self.res_mass_inverse)
+
+        res = self.mapcenterofmass(self.res_start, self.res_end, center_of_mass, self.box_length, nowrap_crd, self.crd)
+
+        return self.crd, res
+
     def construct(self, step, print_step):
         '''construct'''
-        self.last_crd = self.crd
-        res = self.neighbor_list_update(self.atom_numbers_in_grid_bucket,
-                                        self.bucket,
-                                        self.crd,
-                                        self.box_length,
-                                        self.grid_N,
-                                        self.grid_length_inverse,
-                                        self.atom_in_grid_serial,
-                                        self.old_crd,
-                                        self.crd_to_uint_crd_cof,
-                                        self.uint_crd,
-                                        self.pointer,
-                                        self.nl_atom_numbers,
-                                        self.nl_atom_serial,
-                                        self.uint_dr_to_dr_cof,
-                                        self.excluded_list_start,
-                                        self.excluded_list,
-                                        self.excluded_numbers,
-                                        self.need_refresh_flag,
-                                        self.refresh_count)
-        uint_crd = self.Simulation_Beforce_Caculate_Force()
-        force = self.Simulation_Caculate_Force(uint_crd, self.uint_dr_to_dr_cof, self.nl_atom_numbers,
+        # self.last_crd = self.crd
+        if step == 0:
+            res = self.neighbor_list_update_forced_update(self.atom_numbers_in_grid_bucket,
+                                                          self.bucket,
+                                                          self.crd,
+                                                          self.box_length,
+                                                          self.grid_N,
+                                                          self.grid_length_inverse,
+                                                          self.atom_in_grid_serial,
+                                                          self.old_crd,
+                                                          self.crd_to_uint_crd_cof,
+                                                          self.uint_crd,
+                                                          self.pointer,
+                                                          self.nl_atom_numbers,
+                                                          self.nl_atom_serial,
+                                                          self.uint_dr_to_dr_cof,
+                                                          self.excluded_list_start,
+                                                          self.excluded_list,
+                                                          self.excluded_numbers,
+                                                          self.need_refresh_flag,
+                                                          self.refresh_count)
+        else:
+            res = self.zero_fp_tensor
+        force = self.Simulation_Caculate_Force(self.uint_crd, self.uint_dr_to_dr_cof, self.nl_atom_numbers,
                                                self.nl_atom_serial)
+        if step == 0:
+            self.rand_state = self.setup_random_state()
+
+        self.velocity, self.crd, res1, res2, res3 = self.Main_Iteration(step + 1, force)
+        temperature = self.Simulation_Temperature()
         if print_step == 0:
             bond_energy_sum, angle_energy_sum, dihedral_energy_sum, nb14_lj_energy_sum, nb14_cf_energy_sum, \
-            lj_energy_sum, ee_ene, total_energy = self.Simulation_Caculate_Energy(uint_crd, self.uint_dr_to_dr_cof)
+            lj_energy_sum, ee_ene, total_energy = self.Simulation_Caculate_Energy(self.uint_crd, self.uint_dr_to_dr_cof)
         else:
             bond_energy_sum = self.zero_fp_tensor
             angle_energy_sum = self.zero_fp_tensor
@@ -442,12 +960,5 @@ class Simulation(nn.Cell):
             lj_energy_sum = self.zero_fp_tensor
             ee_ene = self.zero_fp_tensor
             total_energy = self.zero_fp_tensor
-        temperature = self.Simulation_Temperature()
-        if step == 0:
-            self.rand_state = self.setup_random_state()
-        self.velocity, self.crd, _ = self.Simulation_MDIterationLeapFrog_Liujian(self.mass_inverse,
-                                                                                 self.sqrt_mass, self.crd, force,
-                                                                                 self.rand_state,
-                                                                                 self.random_force)
         return temperature, total_energy, bond_energy_sum, angle_energy_sum, dihedral_energy_sum, nb14_lj_energy_sum, \
-               nb14_cf_energy_sum, lj_energy_sum, ee_ene, res
+               nb14_cf_energy_sum, lj_energy_sum, ee_ene, res, res1, res2, res3
diff --git a/model_zoo/research/nlp/gpt2/src/gpt2_for_finetune.py b/model_zoo/research/nlp/gpt2/src/gpt2_for_finetune.py
index 63ac1af76df..60073bb1320 100644
--- a/model_zoo/research/nlp/gpt2/src/gpt2_for_finetune.py
+++ b/model_zoo/research/nlp/gpt2/src/gpt2_for_finetune.py
@@ -160,12 +160,9 @@ class GPT2FinetuneCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond)
-        return F.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, cond)
 
 
 class GPT2LM(nn.Cell):
diff --git a/model_zoo/research/nlp/seq2seq/src/seq2seq_model/seq2seq_for_train.py b/model_zoo/research/nlp/seq2seq/src/seq2seq_model/seq2seq_for_train.py
index 8d153ea3c67..c1edff1ada2 100644
--- a/model_zoo/research/nlp/seq2seq/src/seq2seq_model/seq2seq_for_train.py
+++ b/model_zoo/research/nlp/seq2seq/src/seq2seq_model/seq2seq_for_train.py
@@ -296,7 +296,6 @@ class Seq2seqTrainOneStepWithLossScaleCell(nn.Cell):
                                                dtype=mstype.float32), name="loss_scale")
         self.add_flags(has_effect=True)
 
-        self.loss_scalar = P.ScalarSummary()
 
     def construct(self,
                   source_eos_ids,
@@ -365,12 +364,7 @@ class Seq2seqTrainOneStepWithLossScaleCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
+        if not overflow:
+            self.optimizer(grads)
 
-        self.loss_scalar("loss", loss)
-
-        ret = (loss, cond, scaling_sens)
-        return F.depend(ret, succ)
+        return (loss, cond, scaling_sens)
diff --git a/model_zoo/research/nlp/seq2seq/src/utils/optimizer.py b/model_zoo/research/nlp/seq2seq/src/utils/optimizer.py
index 996ac637001..92651e2e600 100644
--- a/model_zoo/research/nlp/seq2seq/src/utils/optimizer.py
+++ b/model_zoo/research/nlp/seq2seq/src/utils/optimizer.py
@@ -229,7 +229,6 @@ class Adam(Optimizer):
         self.one = Tensor(np.array([1.0]).astype(np.float32))
         self.realdiv = P.RealDiv()
 
-        self.lr_scalar = P.ScalarSummary()
 
     def construct(self, gradients):
         """Adam optimizer."""
@@ -240,8 +239,6 @@ class Adam(Optimizer):
         gradients = self.scale_grad(gradients)
         lr = self.get_lr()
 
-        self.lr_scalar("learning_rate", lr)
-
         beta1_power = self.beta1_power * self.beta1
         self.beta1_power = beta1_power
         beta2_power = self.beta2_power * self.beta2
diff --git a/model_zoo/research/nlp/seq2seq/train.py b/model_zoo/research/nlp/seq2seq/train.py
index 529a01e5e19..23c9e0fb9d8 100644
--- a/model_zoo/research/nlp/seq2seq/train.py
+++ b/model_zoo/research/nlp/seq2seq/train.py
@@ -44,7 +44,7 @@ parser = argparse.ArgumentParser(description='Seq2seq train entry point.')
 
 parser.add_argument("--is_modelarts", type=ast.literal_eval, default=False, help="model config json file path.")
 parser.add_argument("--data_url", type=str, default=None, help="pre-train dataset address.")
-parser.add_argument('--train_url', required=True, default=None, help='Location of training outputs.')
+parser.add_argument('--train_url', type=str, default=None, help='Location of training outputs.')
 parser.add_argument("--config", type=str, required=True, help="model config json file path.")
 parser.add_argument("--pre_train_dataset", type=str, required=True, help="pre-train dataset address.")
 args = parser.parse_args()
@@ -217,7 +217,7 @@ def _build_training_pipeline(config: Seq2seqConfig,
         scale_update_cell=scale_manager.get_update_cell()
     )
     net_with_grads.set_train(True)
-    model = Model(net_with_grads, amp_level="O2")
+    model = Model(net_with_grads)
     loss_monitor = LossCallBack(config)
     dataset_size = dataset.get_dataset_size()
     time_cb = TimeMonitor(data_size=dataset_size)
diff --git a/model_zoo/research/nlp/skipgram/src/dataset.py b/model_zoo/research/nlp/skipgram/src/dataset.py
index b16d0de4fe4..bba2b2014f7 100644
--- a/model_zoo/research/nlp/skipgram/src/dataset.py
+++ b/model_zoo/research/nlp/skipgram/src/dataset.py
@@ -177,6 +177,8 @@ def load_eval_data(data_dir):
         if not os.path.isfile(data_path):
             continue
         with open(data_path, 'r') as f:
+            k = "capital-common-countries"
+            samples[k] = list()
             for line in f:
                 if ':' in line:
                     strs = line.strip().split(' ')
diff --git a/model_zoo/research/recommend/Fat-DeepFFM/src/fat_deepffm.py b/model_zoo/research/recommend/Fat-DeepFFM/src/fat_deepffm.py
index 3de30f1a3b3..715c02ff1bf 100644
--- a/model_zoo/research/recommend/Fat-DeepFFM/src/fat_deepffm.py
+++ b/model_zoo/research/recommend/Fat-DeepFFM/src/fat_deepffm.py
@@ -21,7 +21,6 @@ from mindspore.common.initializer import initializer
 
 import mindspore.ops as P
 from mindspore.ops import composite as C
-from mindspore.ops import functional as F
 
 from mindspore import Parameter, ParameterTuple
 from mindspore import Tensor
@@ -351,7 +350,8 @@ class TrainStepWrap(nn.Cell):
         grads = self.grad(self.network, weights)(cats_vals, num_vals, label, sens)
         if self.reducer_flag:
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
 
 
 class ModelBuilder:
diff --git a/model_zoo/research/recommend/autodis/src/autodis.py b/model_zoo/research/recommend/autodis/src/autodis.py
index a0fcd3a2799..57c775d8f57 100644
--- a/model_zoo/research/recommend/autodis/src/autodis.py
+++ b/model_zoo/research/recommend/autodis/src/autodis.py
@@ -18,7 +18,6 @@ import os
 import numpy as np
 from sklearn.metrics import roc_auc_score
 import mindspore.common.dtype as mstype
-from mindspore.ops import functional as F
 from mindspore.ops import composite as C
 from mindspore.ops import operations as P
 from mindspore.nn import Dropout
@@ -333,7 +332,8 @@ class TrainStepWrap(nn.Cell):
         loss = self.network(batch_ids, batch_wts, label)
         sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) #
         grads = self.grad(self.network, weights)(batch_ids, batch_wts, label, sens)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
 
 
 class PredictWithSigmoid(nn.Cell):
@@ -346,7 +346,7 @@ class PredictWithSigmoid(nn.Cell):
         self.sigmoid = P.Sigmoid()
 
     def construct(self, batch_ids, batch_wts, labels):
-        logits, _, _, = self.network(batch_ids, batch_wts)
+        logits, _, _, _, _, = self.network(batch_ids, batch_wts)
         pred_probs = self.sigmoid(logits)
 
         return logits, pred_probs, labels
diff --git a/model_zoo/utils/hccl_tools/hccl_tools.py b/model_zoo/utils/hccl_tools/hccl_tools.py
index f019f179bd8..2df333b5efc 100644
--- a/model_zoo/utils/hccl_tools/hccl_tools.py
+++ b/model_zoo/utils/hccl_tools/hccl_tools.py
@@ -110,13 +110,9 @@ def main():
 
     # construct hccn_table
     device_ips: Dict[Any, Any] = {}
-    with open('/etc/hccn.conf', 'r') as fin:
-        for hccn_item in fin.readlines():
-            if hccn_item.strip().startswith('address_'):
-                device_id, device_ip = hccn_item.split('=')
-                device_id = device_id.split('_')[1]
-                device_ips[device_id] = device_ip.strip()
-
+    for device_id in device_num_list:
+        ret = os.popen("hccn_tool -i %d -ip -g" % device_id).readlines()
+        device_ips[str(device_id)] = ret[0].split(":")[1].replace('\n', '')
     hccn_table = {'version': '1.0',
                   'server_count': '1',
                   'server_list': []}
diff --git a/tests/st/auto_monad/test_auto_monad_mindtester.py b/tests/st/auto_monad/test_auto_monad_mindtester.py
index 796ad620c40..8dc7af94920 100644
--- a/tests/st/auto_monad/test_auto_monad_mindtester.py
+++ b/tests/st/auto_monad/test_auto_monad_mindtester.py
@@ -675,10 +675,9 @@ class SideEffectControlFlowAssignDependWhileNet(Cell):
         return grad_out
 
 
-# Now the case can't pass because the GPU RT problem, so only run on Ascend current time.
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_side_effect_grad_control_flow_assign_depend_while_net():
     context.set_context(mode=context.GRAPH_MODE)
diff --git a/tests/st/control/inner/test_002_single_for.py b/tests/st/control/inner/test_002_single_for.py
index 1da99eed429..2f8a49e92a1 100644
--- a/tests/st/control/inner/test_002_single_for.py
+++ b/tests/st/control/inner/test_002_single_for.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
+import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.common.parameter import Parameter
@@ -165,7 +166,7 @@ def test_single_for_03():
     assert graph_forward_res == pynative_forward_res
     assert graph_backward_res == pynative_backward_res
 
-
+@pytest.mark.skip(reason="not supported side effect")
 def test_single_for_04():
     class SingleForNet(nn.Cell):
         def __init__(self):
diff --git a/tests/st/control/inner/test_010_if_in_if.py b/tests/st/control/inner/test_010_if_in_if.py
index 2d83bd15b65..a4fc529581b 100644
--- a/tests/st/control/inner/test_010_if_in_if.py
+++ b/tests/st/control/inner/test_010_if_in_if.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
+import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.ops import composite as C
@@ -143,11 +144,13 @@ def test_if_in_if():
     control_flow_if_in_if(IfInIfNet, x)
 
 
+@pytest.mark.skip(reason="not supported side effect")
 def test_if_in_if_01():
     x = Tensor(2, mstype.int32)
     control_flow_if_in_if(IfInIfNet1, x)
 
 
+@pytest.mark.skip(reason="not supported side effect")
 def test_if_in_if_02():
     x = Tensor(2, mstype.int32)
     control_flow_if_in_if(IfInIfNet2, x)
diff --git a/tests/st/control/inner/test_012_if_in_for.py b/tests/st/control/inner/test_012_if_in_for.py
index c4c8ec057ae..aca6bb0e4eb 100644
--- a/tests/st/control/inner/test_012_if_in_for.py
+++ b/tests/st/control/inner/test_012_if_in_for.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
+import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
@@ -52,6 +53,7 @@ class BackwardNet(nn.Cell):
         return grads
 
 
+@pytest.mark.skip(reason="not supported side effect")
 def test_forward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
@@ -66,6 +68,7 @@ def test_forward():
     assert graph_mode_out == pynative_mode_out
 
 
+@pytest.mark.skip(reason="not supported side effect")
 def test_backward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
diff --git a/tests/st/control/inner/test_032_for_in_for.py b/tests/st/control/inner/test_032_for_in_for.py
index d57a5807660..dd7094e54aa 100644
--- a/tests/st/control/inner/test_032_for_in_for.py
+++ b/tests/st/control/inner/test_032_for_in_for.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
+import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.common.parameter import Parameter
@@ -21,7 +22,8 @@ from mindspore.ops import operations as P
 from mindspore.common import dtype as mstype
 
 grad_all = C.GradOperation(get_all=True)
-context.set_context(device_target="Ascend")
+context.set_context(device_target="GPU")
+
 
 def test_for_in_for_01():
     class ForInForNet(nn.Cell):
@@ -75,7 +77,9 @@ def test_for_in_for_01():
     assert graph_forward_res == pynative_forward_res
     assert graph_backward_res == pynative_backward_res
 
-
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
 def test_for_in_for_02():
     class ForInForNet(nn.Cell):
         def __init__(self):
@@ -87,10 +91,10 @@ def test_for_in_for_02():
             self.param_b = Parameter(Tensor(11, mstype.int32), name='b')
 
         def construct(self, x):
-            for _ in range(0, 10):
+            for _ in range(0, 3):
                 x = x * 2
                 self.assign(self.param_a, x + self.param_a)
-                for _ in range(0, 5):
+                for _ in range(0, 2):
                     x = self.add(x, x)
                     self.param_b += 1
             y = self.sub(x, self.param_b + self.param_a)
diff --git a/tests/st/control/inner/test_101_if_after_while.py b/tests/st/control/inner/test_101_if_after_while.py
index fdddfbef036..3b322db3d1e 100644
--- a/tests/st/control/inner/test_101_if_after_while.py
+++ b/tests/st/control/inner/test_101_if_after_while.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 # ============================================================================
 import numpy as np
+import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
@@ -73,6 +74,7 @@ def test_forward():
     assert graph_mode_out == pynative_mode_out
 
 
+@pytest.mark.skip(reason="not supported side effect")
 def test_backward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
diff --git a/tests/st/control/inner/test_110_if_after_if_in_if.py b/tests/st/control/inner/test_110_if_after_if_in_if.py
index 12e269f0a6f..e0ce1edab70 100644
--- a/tests/st/control/inner/test_110_if_after_if_in_if.py
+++ b/tests/st/control/inner/test_110_if_after_if_in_if.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
+import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.ops import composite as C
@@ -19,7 +20,7 @@ from mindspore.common import dtype as mstype
 from mindspore.common.parameter import Parameter
 
 grad_all = C.GradOperation(get_all=True)
-context.set_context(device_target="Ascend")
+context.set_context(device_target="GPU")
 
 
 class IfAfterIfInIfNet(nn.Cell):
@@ -145,22 +146,27 @@ def control_flow_if_after_if_in_if(input_net, x):
     assert graph_forward_res == pynative_forward_res
     assert graph_backward_res == pynative_backward_res
 
-
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
 def test_if_after_if_in_if():
     x = Tensor(2, mstype.int32)
     control_flow_if_after_if_in_if(IfAfterIfInIfNet, x)
 
 
+@pytest.mark.skip(reason="not supported side effect")
 def test_if_after_if_in_if_01():
     x = Tensor(2, mstype.int32)
     control_flow_if_after_if_in_if(IfAfterIfInIfNet1, x)
 
 
+@pytest.mark.skip(reason="not supported side effect")
 def test_if_after_if_in_if_02():
     x = Tensor(2, mstype.int32)
     control_flow_if_after_if_in_if(IfAfterIfInIfNet2, x)
 
 
+@pytest.mark.skip(reason="not supported side effect")
 def test_if_after_if_in_if_03():
     x = Tensor(2, mstype.int32)
     control_flow_if_after_if_in_if(IfAfterIfInIfNet3, x)
diff --git a/tests/st/control/inner/test_121_if_after_while_in_while.py b/tests/st/control/inner/test_121_if_after_while_in_while.py
index 32f41a8fb19..9f3feb6a16c 100644
--- a/tests/st/control/inner/test_121_if_after_while_in_while.py
+++ b/tests/st/control/inner/test_121_if_after_while_in_while.py
@@ -14,6 +14,7 @@
 # ============================================================================
 
 import numpy as np
+import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
@@ -21,7 +22,7 @@ from mindspore.ops import composite as C
 from mindspore import context
 from mindspore.common.parameter import Parameter
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="GPU")
 
 
 class ForwardNet(nn.Cell):
@@ -73,6 +74,7 @@ def test_forward():
     assert graph_mode_out == pynative_mode_out
 
 
+@pytest.mark.skip(reason="not supported side effect")
 def test_backward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
@@ -122,6 +124,9 @@ class BackwardNetNoAssign(nn.Cell):
 
 
 # This test case has a problem of evaluator endless loop.
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
 def test_backward_no_assign():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
diff --git a/tests/st/control/inner/test_122_if_after_while_in_for.py b/tests/st/control/inner/test_122_if_after_while_in_for.py
index 4ecee12be36..5c572faeb85 100644
--- a/tests/st/control/inner/test_122_if_after_while_in_for.py
+++ b/tests/st/control/inner/test_122_if_after_while_in_for.py
@@ -14,6 +14,7 @@
 # ============================================================================
 
 import numpy as np
+import pytest
 from mindspore.common import dtype as mstype
 from mindspore import nn
 from mindspore import Tensor
@@ -21,7 +22,7 @@ from mindspore.ops import composite as C
 from mindspore import context
 from mindspore.common.parameter import Parameter
 
-context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend")
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="GPU")
 
 
 class ForwardNet(nn.Cell):
@@ -69,6 +70,7 @@ def test_forward():
     assert graph_mode_out == pynative_mode_out
 
 
+@pytest.mark.skip(reason="not supported side effect")
 def test_backward():
     x = Tensor(np.array(1), mstype.int32)
     y = Tensor(np.array(3), mstype.int32)
@@ -83,3 +85,52 @@ def test_backward():
     pynative_backward_net = BackwardNet(pynative_forward_net)
     pynative_mode_grads = pynative_backward_net(x, y)
     assert graph_mode_grads == pynative_mode_grads
+
+
+class ForwardNetNoAssign(nn.Cell):
+    def __init__(self, max_cycles=10):
+        super(ForwardNetNoAssign, self).__init__()
+        self.max_cycles = max_cycles
+        self.zero = Tensor(np.array(0), mstype.int32)
+        self.weight = Parameter(Tensor(np.array(0), mstype.int32))
+
+    def construct(self, x, y):
+        out = self.zero
+        for _ in range(0, self.max_cycles):
+            while x < y:
+                out = x * y + out
+                x = x + 1
+                #self.weight = x
+        if out > 20:
+            self.weight = out
+            out = out - 20
+        return out, self.weight
+
+class BackwardNetNoAssign(nn.Cell):
+    def __init__(self, net):
+        super(BackwardNetNoAssign, self).__init__(auto_prefix=False)
+        self.forward_net = net
+        self.grad = C.GradOperation(get_all=True)
+
+    def construct(self, *inputs):
+        grads = self.grad(self.forward_net)(*inputs)
+        return grads
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_backward_no_assign():
+    x = Tensor(np.array(1), mstype.int32)
+    y = Tensor(np.array(3), mstype.int32)
+    # Graph Mode
+    context.set_context(mode=context.GRAPH_MODE)
+    graph_forward_net = ForwardNetNoAssign(max_cycles=3)
+    graph_backward_net = BackwardNetNoAssign(graph_forward_net)
+    graph_mode_grads = graph_backward_net(x, y)
+    # Pynative Mode
+    context.set_context(mode=context.PYNATIVE_MODE)
+    pynative_forward_net = ForwardNetNoAssign(max_cycles=3)
+    pynative_backward_net = BackwardNetNoAssign(pynative_forward_net)
+    pynative_mode_grads = pynative_backward_net(x, y)
+    assert graph_mode_grads == pynative_mode_grads
diff --git a/tests/st/control/inner/test_330_for_after_for_in_if.py b/tests/st/control/inner/test_330_for_after_for_in_if.py
index d3246758f25..c05d387fc34 100644
--- a/tests/st/control/inner/test_330_for_after_for_in_if.py
+++ b/tests/st/control/inner/test_330_for_after_for_in_if.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
+import pytest
 from mindspore import context
 from mindspore import Tensor, nn
 from mindspore.ops import composite as C
@@ -21,6 +22,7 @@ from mindspore.common.parameter import Parameter
 grad_all = C.GradOperation(get_all=True)
 context.set_context(device_target="Ascend")
 
+@pytest.mark.skip(reason="not supported side effect")
 def test_for_after_for_in_if():
     class ForAfterForInIfNet(nn.Cell):
         def __init__(self):
diff --git a/tests/st/control/test_cont_grad.py b/tests/st/control/test_cont_grad.py
index 9b598ea4b8b..45ccc095f67 100644
--- a/tests/st/control/test_cont_grad.py
+++ b/tests/st/control/test_cont_grad.py
@@ -23,6 +23,7 @@ from mindspore import nn
 from mindspore.common.parameter import Parameter, ParameterTuple
 from mindspore.ops import composite as C
 from mindspore.ops import operations as P
+
 # from tests.vm_impl.math_ops_vm_impl import *
 # from tests.vm_impl.vm_interface import *
 # from tests.vm_impl import *
@@ -54,8 +55,9 @@ def test_while_grad():
 
         def construct(self, *inputs):
             return grad_all(self.net)(*inputs)
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -63,15 +65,16 @@ def test_while_grad():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
     assert np.allclose(graph_output[1].asnumpy(), pynative_output[1].asnumpy(), 0.0001, 0.0001)
     assert np.allclose(graph_output[2].asnumpy(), pynative_output[2].asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_while_with_const_param_grad():
     class MyWhileNet(nn.Cell):
@@ -93,7 +96,8 @@ def test_while_with_const_param_grad():
 
         def construct(self, *inputs):
             return grad_all(self.net)(*inputs)
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+
+    context.set_context(mode=context.GRAPH_MODE)
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor([1.1], dtype=ms.float32)
@@ -104,9 +108,10 @@ def test_while_with_const_param_grad():
     assert np.allclose(graph_output[0].asnumpy(), expect_one, 0.0001, 0.0001)
     assert np.allclose(graph_output[1].asnumpy(), expect_two, 0.0001, 0.0001)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_while_with_variable_grad():
     class MyWhileNet(nn.Cell):
@@ -128,7 +133,8 @@ def test_while_with_variable_grad():
 
         def construct(self, *inputs):
             return grad_all(self.net)(*inputs)
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+
+    context.set_context(mode=context.GRAPH_MODE)
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor([1.1], dtype=ms.float32)
@@ -139,9 +145,10 @@ def test_while_with_variable_grad():
     assert np.allclose(graph_output[0].asnumpy(), expect_one, 0.0001, 0.0001)
     assert np.allclose(graph_output[1].asnumpy(), expect_two, 0.0001, 0.0001)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_while_with_param_forward():
     class MyWhileNet(nn.Cell):
@@ -160,8 +167,9 @@ def test_while_with_param_forward():
                 out = out + x + self.param
                 idx = idx + 1
             return out
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     net = MyWhileNet()
     idx = Tensor(np.array(0), dtype=ms.int32)
     end = Tensor(np.array(2), dtype=ms.int32)
@@ -170,12 +178,14 @@ def test_while_with_param_forward():
     expect = np.array([[[6, 8], [10, 12]], [[19, 22], [25, 28]]], dtype=np.int32)
     assert np.allclose(graph_output.asnumpy(), expect, 0.0001, 0.0001)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_while_endless_case():
     """endless case when optimization"""
+
     class MyWhileNet(nn.Cell):
         def __init__(self):
             super().__init__()
@@ -190,21 +200,23 @@ def test_while_endless_case():
                 out = out + part
                 idx = idx + 1
             return out
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     net = MyWhileNet()
     idx = Tensor(np.array(0), dtype=ms.int32)
     end = Tensor(np.array(2), dtype=ms.int32)
     x = Tensor(np.arange(8).reshape(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_while_with_param_grad():
     class MyWhileNet(nn.Cell):
@@ -232,7 +244,8 @@ def test_while_with_param_grad():
 
         def construct(self, a, b, c):
             return grad_by_list(self.net, self.weights)(a, b, c)
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+
+    context.set_context(mode=context.GRAPH_MODE)
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -242,9 +255,10 @@ def test_while_with_param_grad():
     expect = np.array([[[2, 2], [2, 2]], [[2, 2], [2, 2]]], dtype=np.int32)
     assert np.allclose(graph_output[0].asnumpy(), expect, 0.0001, 0.0001)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_while_with_param_forward_with_const_branch():
     class MyWhileNet(nn.Cell):
@@ -264,8 +278,9 @@ def test_while_with_param_forward_with_const_branch():
                     out = out + idx + self.param
                 idx = idx + 1
             return out
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     while_net = MyWhileNet()
     net = while_net
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -273,16 +288,18 @@ def test_while_with_param_forward_with_const_branch():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_while_opt_endless():
     """endless during optimization case"""
+
     class MyWhileNet(nn.Cell):
         def __init__(self):
             super().__init__()
@@ -308,8 +325,9 @@ def test_while_opt_endless():
 
         def construct(self, *inputs):
             return grad_all(self.net)(*inputs)
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -317,7 +335,7 @@ def test_while_opt_endless():
     x = Tensor(np.ones([2, 2, 2]).astype(np.float32) * 3, dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
@@ -343,8 +361,9 @@ def test_no_while_call():
             else:
                 out = out + idx + self.param
             return out
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     while_net = MyWhileNet()
     net = while_net
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -352,13 +371,14 @@ def test_no_while_call():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_while_with_param_grad_with_const_branch():
     class MyWhileNet(nn.Cell):
@@ -387,8 +407,9 @@ def test_while_with_param_grad_with_const_branch():
 
         def construct(self, a, b, c):
             return grad_by_list(self.net, self.weights)(a, b, c)
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -396,10 +417,11 @@ def test_while_with_param_grad_with_const_branch():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.skip(reason="not supported yet")
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
@@ -435,8 +457,9 @@ def test_for_while_with_param_grad_with_const_branch():
 
         def construct(self, a, b, c):
             return grad_by_list(self.net, self.weights)(a, b, c)
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -444,13 +467,14 @@ def test_for_while_with_param_grad_with_const_branch():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_for_while_with_param_grad_basic():
     class MyWhileNet(nn.Cell):
@@ -479,8 +503,9 @@ def test_for_while_with_param_grad_basic():
 
         def construct(self, a, b, c):
             return grad_by_list(self.net, self.weights)(a, b, c)
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -488,13 +513,14 @@ def test_for_while_with_param_grad_basic():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_for_while_with_param_grad_normal():
     class MyWhileNet(nn.Cell):
@@ -523,8 +549,9 @@ def test_for_while_with_param_grad_normal():
 
         def construct(self, a, b, c):
             return grad_by_list(self.net, self.weights)(a, b, c)
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -532,13 +559,14 @@ def test_for_while_with_param_grad_normal():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_while_with_param_basic_grad():
     class MyWhileNet(nn.Cell):
@@ -564,8 +592,9 @@ def test_while_with_param_basic_grad():
 
         def construct(self, a, b, c):
             return grad_by_list(self.net, self.weights)(a, b, c)
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -573,13 +602,14 @@ def test_while_with_param_basic_grad():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_while_with_param_basic_grad_mul():
     class MyWhileNet(nn.Cell):
@@ -605,8 +635,9 @@ def test_while_with_param_basic_grad_mul():
 
         def construct(self, a, b, c):
             return grad_by_list(self.net, self.weights)(a, b, c)
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -614,13 +645,14 @@ def test_while_with_param_basic_grad_mul():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_while_with_param_basic_grad_two():
     class MyWhileNet(nn.Cell):
@@ -647,8 +679,9 @@ def test_while_with_param_basic_grad_two():
 
         def construct(self, a, b, c):
             return grad_by_list(self.net, self.weights)(a, b, c)
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -656,14 +689,15 @@ def test_while_with_param_basic_grad_two():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
     assert np.allclose(graph_output[1].asnumpy(), pynative_output[1].asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_while_with_param_basic_grad_three():
     class MyWhileNet(nn.Cell):
@@ -691,8 +725,9 @@ def test_while_with_param_basic_grad_three():
 
         def construct(self, a, b, c):
             return grad_by_list(self.net, self.weights)(a, b, c)
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -700,15 +735,16 @@ def test_while_with_param_basic_grad_three():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
     assert np.allclose(graph_output[1].asnumpy(), pynative_output[1].asnumpy(), 0.0001, 0.0001)
     assert np.allclose(graph_output[2].asnumpy(), pynative_output[2].asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_while_if_with_param_grad():
     class MyWhileNet(nn.Cell):
@@ -737,8 +773,9 @@ def test_while_if_with_param_grad():
 
         def construct(self, a, b, c):
             return grad_by_list(self.net, self.weights)(a, b, c)
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -746,10 +783,11 @@ def test_while_if_with_param_grad():
     x = Tensor(np.ones([2, 2, 2]).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.skip(reason="not supported yet")
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
@@ -778,8 +816,9 @@ def test_while_with_param_grad_not_enter_while():
 
         def construct(self, a, b, c):
             return grad_by_list(self.net, self.weights)(a, b, c)
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     while_net = MyWhileNet()
     net = GradNet(while_net)
     idx = Tensor(np.array(3), dtype=ms.int32)
@@ -787,13 +826,14 @@ def test_while_with_param_grad_not_enter_while():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_with_param_if_by_if_forward():
     class MyIfByIfNet(nn.Cell):
@@ -810,12 +850,13 @@ def test_with_param_if_by_if_forward():
             else:
                 out = out + x
             if a == b:
-                out = out + x*3 + self.param
+                out = out + x * 3 + self.param
             else:
-                out = out + x*2
+                out = out + x * 2
             return out
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     if_net = MyIfByIfNet()
     net = if_net
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -823,13 +864,14 @@ def test_with_param_if_by_if_forward():
     x = Tensor(np.ones([2, 2, 2]).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_with_param_if_by_if_grad_inputs():
     class MyIfByIfNet(nn.Cell):
@@ -844,7 +886,7 @@ def test_with_param_if_by_if_grad_inputs():
             if a < b:
                 out = out + x + self.param * 4
             if a == b:
-                out = out + x*3 + self.param * 3
+                out = out + x * 3 + self.param * 3
             return out
 
     class GradNet(nn.Cell):
@@ -854,8 +896,9 @@ def test_with_param_if_by_if_grad_inputs():
 
         def construct(self, *inputs):
             return grad_all(self.net)(*inputs)
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     if_net = MyIfByIfNet()
     net = GradNet(if_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -863,15 +906,16 @@ def test_with_param_if_by_if_grad_inputs():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
     assert np.allclose(graph_output[1].asnumpy(), pynative_output[1].asnumpy(), 0.0001, 0.0001)
     assert np.allclose(graph_output[2].asnumpy(), pynative_output[2].asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_with_param_if_by_if_grad_parameter():
     class MyIfByIfNet(nn.Cell):
@@ -886,7 +930,7 @@ def test_with_param_if_by_if_grad_parameter():
             if a < b:
                 out = out + x + self.param * 2
             if a == b:
-                out = out + x*3 + self.param
+                out = out + x * 3 + self.param
             return out
 
     class GradNet(nn.Cell):
@@ -897,8 +941,9 @@ def test_with_param_if_by_if_grad_parameter():
 
         def construct(self, *inputs):
             return grad_by_list(self.net, self.weights)(*inputs)
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     if_net = MyIfByIfNet()
     net = GradNet(if_net)
     idx = Tensor(np.array(0), dtype=ms.int32)
@@ -906,13 +951,14 @@ def test_with_param_if_by_if_grad_parameter():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_with_param_if_by_if_grad_param_excute_null():
     class MyIfByIfNet(nn.Cell):
@@ -936,8 +982,9 @@ def test_with_param_if_by_if_grad_param_excute_null():
 
         def construct(self, *inputs):
             return grad_by_list(self.net, self.weights)(*inputs)
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     if_net = MyIfByIfNet()
     net = GradNet(if_net)
     idx = Tensor(np.array(4), dtype=ms.int32)
@@ -945,13 +992,14 @@ def test_with_param_if_by_if_grad_param_excute_null():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_if_by_if_return_inside_grad():
     class MyIfByIfNet(nn.Cell):
@@ -977,8 +1025,9 @@ def test_if_by_if_return_inside_grad():
 
         def construct(self, *inputs):
             return grad_by_list(self.net, self.weights)(*inputs)
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     if_net = MyIfByIfNet()
     net = GradNet(if_net)
     idx = Tensor(np.array(1), dtype=ms.int32)
@@ -986,13 +1035,14 @@ def test_if_by_if_return_inside_grad():
     x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level1
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_if_by_if_forward():
     class MyIfByIfNet(nn.Cell):
@@ -1019,8 +1069,9 @@ def test_if_by_if_forward():
             a = a * b
             out = a + b + x
             return out
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     if_net = MyIfByIfNet()
     net = if_net
     idx = Tensor(np.array(2), dtype=ms.float32)
@@ -1028,16 +1079,18 @@ def test_if_by_if_forward():
     x = Tensor(np.array(4), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_if_by_if_forward_control_tuple_switch():
     """tuple_get from  switch op will generate new switch inside to eliminate tuple_get"""
+
     class Branch3Net(nn.Cell):
         def __init__(self):
             super().__init__()
@@ -1052,6 +1105,7 @@ def test_if_by_if_forward_control_tuple_switch():
             else:
                 b = self.add(a, x)
             return a, b, x
+
     class Branch2Net(nn.Cell):
         def __init__(self):
             super().__init__()
@@ -1086,8 +1140,9 @@ def test_if_by_if_forward_control_tuple_switch():
             a = a * b
             out = a + b + x
             return out
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     if_net = MyIfByIfNet()
     net = if_net
     idx = Tensor(np.array(2), dtype=ms.float32)
@@ -1095,13 +1150,14 @@ def test_if_by_if_forward_control_tuple_switch():
     x = Tensor(np.array(0), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level0
 @pytest.mark.platform_arm_ascend_training
-@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_if_by_if_forward_control_inside_net():
     class Branch3Net(nn.Cell):
@@ -1120,6 +1176,7 @@ def test_if_by_if_forward_control_inside_net():
             a = a * b
             out = a + b + x
             return out
+
     class Branch2Net(nn.Cell):
         def __init__(self):
             super().__init__()
@@ -1152,8 +1209,9 @@ def test_if_by_if_forward_control_inside_net():
                 a = self.sub(a, b)
             out = self.net(a, b, x)
             return out
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     if_net = MyIfByIfNet()
     net = if_net
     idx = Tensor(np.array(2), dtype=ms.float32)
@@ -1161,10 +1219,11 @@ def test_if_by_if_forward_control_inside_net():
     x = Tensor(np.array(0), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level1
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
@@ -1194,8 +1253,9 @@ def test_if_by_if_forward_use_namespace():
             a = a * b
             out = a + b + x
             return out
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     if_net = MyIfByIfNet()
     net = if_net
     idx = Tensor(np.array(2), dtype=ms.float32)
@@ -1203,10 +1263,11 @@ def test_if_by_if_forward_use_namespace():
     x = Tensor(np.array(0), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level1
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
@@ -1240,8 +1301,9 @@ def test_if_by_if_forward_use_global_op():
             a = a * b
             out = a + b + x
             return out
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     if_net = MyIfByIfNet()
     net = if_net
     idx = Tensor(np.array(2), dtype=ms.float32)
@@ -1249,10 +1311,11 @@ def test_if_by_if_forward_use_global_op():
     x = Tensor(np.array(0), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level1
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
@@ -1273,8 +1336,9 @@ def test_for_with_if_by_if_forward():
             a = a * b
             out = a + b + x
             return out
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     if_net = MyIfByIfNet()
     net = if_net
     idx = Tensor(np.array(2), dtype=ms.float32)
@@ -1282,10 +1346,11 @@ def test_for_with_if_by_if_forward():
     x = Tensor(np.array(0), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level1
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
@@ -1308,8 +1373,9 @@ def test_for_with_if_by_if_forward_namespace():
             a = a * b
             out = a + b + x
             return out
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     if_net = MyIfByIfNet()
     net = if_net
     idx = Tensor(np.array(2), dtype=ms.float32)
@@ -1317,7 +1383,7 @@ def test_for_with_if_by_if_forward_namespace():
     x = Tensor(np.array(0), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
@@ -1355,8 +1421,9 @@ def test_if_by_if_forward_const_branch_inner():
             a = a * b
             out = a + b + x
             return out
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     if_net = MyIfByIfNet()
     net = if_net
     idx = Tensor(np.array(2), dtype=ms.float32)
@@ -1364,10 +1431,11 @@ def test_if_by_if_forward_const_branch_inner():
     x = Tensor(np.array(0), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
+
 @pytest.mark.level1
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
@@ -1401,8 +1469,9 @@ def test_if_by_if_forward_all_const_branch():
             a = a * b
             out = a + b + x
             return out
+
     # graph mode
-    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+    context.set_context(mode=context.GRAPH_MODE)
     if_net = MyIfByIfNet()
     net = if_net
     idx = Tensor(np.array(2), dtype=ms.float32)
@@ -1410,13 +1479,14 @@ def test_if_by_if_forward_all_const_branch():
     x = Tensor(np.array(0), dtype=ms.float32)
     graph_output = net(idx, end, x)
     # pynative mode
-    context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
+    context.set_context(mode=context.PYNATIVE_MODE)
     pynative_output = net(idx, end, x)
     assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001)
 
 
 @pytest.mark.level0
 @pytest.mark.platform_x86_cpu
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_if_const_grad():
     class MyNet(nn.Cell):
@@ -1452,6 +1522,7 @@ def test_if_const_grad():
 
 @pytest.mark.level0
 @pytest.mark.platform_x86_cpu
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_if_by_if_const_grad():
     class MyNet(nn.Cell):
@@ -1491,6 +1562,7 @@ def test_if_by_if_const_grad():
 
 @pytest.mark.level0
 @pytest.mark.platform_x86_cpu
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_while_const_grad():
     class MyNet(nn.Cell):
@@ -1524,6 +1596,7 @@ def test_while_const_grad():
 
 @pytest.mark.level0
 @pytest.mark.platform_x86_cpu
+@pytest.mark.platform_x86_gpu_training
 @pytest.mark.env_onecard
 def test_if_by_while_const_grad():
     class MyNet(nn.Cell):
diff --git a/tests/st/dump/test_data_dump.py b/tests/st/dump/test_data_dump.py
index 29056acf1ae..f1b637084d7 100644
--- a/tests/st/dump/test_data_dump.py
+++ b/tests/st/dump/test_data_dump.py
@@ -55,7 +55,7 @@ def change_current_dump_json(file_name, dump_path):
         json.dump(data, f)
 
 
-@pytest.mark.level0
+@pytest.mark.level1
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
diff --git a/tests/st/fl/albert/src/cell_wrapper.py b/tests/st/fl/albert/src/cell_wrapper.py
index 477ddba3eba..187792c0543 100644
--- a/tests/st/fl/albert/src/cell_wrapper.py
+++ b/tests/st/fl/albert/src/cell_wrapper.py
@@ -295,5 +295,5 @@ class NetworkNoClientTrainCell(nn.Cell):
                                                  self.cast(F.tuple_to_array((self.sens,)),
                                                            mstype.float32))
         grads = self.hyper_map(F.partial(clip_grad, self.clip_type, self.clip_value), grads)
-        succ = self.optimizer(grads)
-        return F.depend(loss, succ)
+        self.optimizer(grads)
+        return loss
diff --git a/tests/st/model_zoo_tests/resnet50/test_resnet50_cifar10.py b/tests/st/model_zoo_tests/resnet50/test_resnet50_cifar10.py
index f3527397549..fcc3be0fd08 100644
--- a/tests/st/model_zoo_tests/resnet50/test_resnet50_cifar10.py
+++ b/tests/st/model_zoo_tests/resnet50/test_resnet50_cifar10.py
@@ -33,7 +33,7 @@ def test_resnet50_cifar10_ascend():
     new_list = ["total_epochs=10", "10"]
     utils.exec_sed_command(old_list, new_list, os.path.join(cur_model_path, "train.py"))
     dataset_path = os.path.join(utils.data_root, "cifar-10-batches-bin")
-    config_path = os.path.join(cur_model_path, "resnet50_cifar10_config.yaml")
+    config_path = os.path.join(cur_model_path, "config", "resnet50_cifar10_config.yaml")
     exec_network_shell = "cd resnet/scripts; bash run_distribute_train.sh {} {} {}"\
         .format(utils.rank_table_path, dataset_path, config_path)
     os.system(exec_network_shell)
@@ -64,7 +64,7 @@ def test_resnet50_cifar10_gpu():
     new_list = ["total_epochs=10", "10"]
     utils.exec_sed_command(old_list, new_list, os.path.join(cur_model_path, "train.py"))
     dataset_path = os.path.join(utils.data_root, "cifar-10-batches-bin")
-    config_path = os.path.join(cur_model_path, "resnet50_cifar10_config.yaml")
+    config_path = os.path.join(cur_model_path, "config", "resnet50_cifar10_config.yaml")
     os.system("nvidia-smi")
     exec_network_shell = "cd resnet/scripts; sh run_distribute_train_gpu.sh {} {}" \
         .format(dataset_path, config_path)
diff --git a/tests/st/model_zoo_tests/transformer/test_transformer.py b/tests/st/model_zoo_tests/transformer/test_transformer.py
index 8ace3c49c2d..cfcb049398a 100644
--- a/tests/st/model_zoo_tests/transformer/test_transformer.py
+++ b/tests/st/model_zoo_tests/transformer/test_transformer.py
@@ -145,7 +145,7 @@ class TimeMonitor(Callback):
         self.per_step_mseconds_list.append(epoch_mseconds / self.data_size)
 
 
-@pytest.mark.level0
+@pytest.mark.level1
 @pytest.mark.platform_arm_ascend_training
 @pytest.mark.platform_x86_ascend_training
 @pytest.mark.env_onecard
diff --git a/tests/st/model_zoo_tests/yolov3/src/yolov3.py b/tests/st/model_zoo_tests/yolov3/src/yolov3.py
index 7ddf3ae695f..643fe0be1d7 100644
--- a/tests/st/model_zoo_tests/yolov3/src/yolov3.py
+++ b/tests/st/model_zoo_tests/yolov3/src/yolov3.py
@@ -671,7 +671,8 @@ class TrainingWrapper(nn.Cell):
         if self.reducer_flag:
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
 
 
 class YoloBoxScores(nn.Cell):
diff --git a/tests/st/model_zoo_tests/yolov3_darknet53/src/yolo.py b/tests/st/model_zoo_tests/yolov3_darknet53/src/yolo.py
index 273a45302e9..8a073f0fb40 100644
--- a/tests/st/model_zoo_tests/yolov3_darknet53/src/yolo.py
+++ b/tests/st/model_zoo_tests/yolov3_darknet53/src/yolo.py
@@ -59,7 +59,7 @@ class YoloBlock(nn.Cell):
 
     Args:
         in_channels: Integer. Input channel.
-        out_chls: Interger. Middle channel.
+        out_chls: Integer. Middle channel.
         out_channels: Integer. Output channel.
 
     Returns:
@@ -108,7 +108,7 @@ class YOLOv3(nn.Cell):
      Args:
          backbone_shape: List. Darknet output channels shape.
          backbone: Cell. Backbone Network.
-         out_channel: Interger. Output channel.
+         out_channel: Integer. Output channel.
 
      Returns:
          Tensor, output tensor.
@@ -436,4 +436,5 @@ class TrainingWrapper(nn.Cell):
         grads = self.grad(self.network, weights)(*args, sens)
         if self.reducer_flag:
             grads = self.grad_reducer(grads)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
diff --git a/tests/st/networks/models/bert/src/bert_for_pre_training.py b/tests/st/networks/models/bert/src/bert_for_pre_training.py
index 0125875fd4f..a76ae7808f3 100644
--- a/tests/st/networks/models/bert/src/bert_for_pre_training.py
+++ b/tests/st/networks/models/bert/src/bert_for_pre_training.py
@@ -321,8 +321,8 @@ class BertTrainOneStepCell(nn.Cell):
             # apply grad reducer on grads
             grads = self.grad_reducer(grads)
 
-        succ = self.optimizer(grads)
-        return F.depend(loss, succ)
+        self.optimizer(grads)
+        return loss
 
 
 grad_scale = C.MultitypeFuncGraph("grad_scale")
@@ -431,9 +431,6 @@ class BertTrainOneStepWithLossScaleCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond, scaling_sens)
-        return F.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, cond, scaling_sens)
diff --git a/tests/st/networks/models/bert/src/utils.py b/tests/st/networks/models/bert/src/utils.py
index f76604ecfcf..2114dd12896 100644
--- a/tests/st/networks/models/bert/src/utils.py
+++ b/tests/st/networks/models/bert/src/utils.py
@@ -122,12 +122,9 @@ class BertFinetuneCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond)
-        return F.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, cond)
 
 class BertCLSModel(nn.Cell):
     """
diff --git a/tests/st/ops/cpu/test_softplus_grad_op.py b/tests/st/ops/cpu/test_softplus_grad_op.py
index 5dc8cc5a3e9..76879689960 100644
--- a/tests/st/ops/cpu/test_softplus_grad_op.py
+++ b/tests/st/ops/cpu/test_softplus_grad_op.py
@@ -48,7 +48,7 @@ class Grad(nn.Cell):
 @pytest.mark.level0
 @pytest.mark.platform_x86_cpu
 @pytest.mark.env_onecard
-def test_softplus_grad_1d_fp32():
+def test_softplus_grad():
     x = np.array([0.58401114, 0.68800163, 0.9760397, 0.14702141, 0.46563736, 0.9607501,
                   0.14567593, 0.12261796, 0.37054458, 0.46421242]).astype(np.float32)
     dy = np.array([0.5559598, 0.96994054, 0.24770357, 0.34646875, 0.2984393, 0.03287048,
@@ -67,7 +67,7 @@ def test_softplus_grad_1d_fp32():
 @pytest.mark.level0
 @pytest.mark.platform_x86_cpu
 @pytest.mark.env_onecard
-def test_softplus_grad_3d_fp16():
+def test_softplus_grad_fp16():
     np.random.seed(42)
     x_np = np.random.randn(5, 3, 6).astype(np.float16)
     dy_np = np.random.randn(5, 3, 6).astype(np.float16)
@@ -76,17 +76,3 @@ def test_softplus_grad_3d_fp16():
     output = grad(Tensor(x_np), Tensor(dy_np))
     expect = dy_np * np.exp(x_np) / (1 + np.exp(x_np))
     assert np.allclose(output[0].asnumpy(), expect, rtol=1e-2)
-
-
-@pytest.mark.level0
-@pytest.mark.platform_x86_cpu
-@pytest.mark.env_onecard
-def test_softplus_grad_7d_fp32():
-    np.random.seed(20)
-    x_np = np.random.randn(5, 3, 6, 3, 4, 5, 6).astype(np.float32)
-    dy_np = np.random.randn(5, 3, 6, 3, 4, 5, 6).astype(np.float32)
-    net = SoftplusNet()
-    grad = Grad(net)
-    output = grad(Tensor(x_np), Tensor(dy_np))
-    expect = dy_np * np.exp(x_np) / (1 + np.exp(x_np))
-    assert np.allclose(output[0].asnumpy(), expect, rtol=1e-2)
diff --git a/tests/st/ops/cpu/test_softplus_op.py b/tests/st/ops/cpu/test_softplus_op.py
index 87aada0feb8..19af2a20762 100644
--- a/tests/st/ops/cpu/test_softplus_op.py
+++ b/tests/st/ops/cpu/test_softplus_op.py
@@ -40,21 +40,7 @@ def SoftplusCompute(x):
 @pytest.mark.level0
 @pytest.mark.platform_x86_cpu
 @pytest.mark.env_onecard
-def test_softplus_0d_fp32():
-    x_np = np.array(1.2, np.float32)
-    y_np = SoftplusCompute(x_np)
-
-    x_ms = Tensor(x_np)
-    net = SoftplusNet()
-    y_ms = net(x_ms)
-
-    assert np.allclose(y_np, y_ms.asnumpy())
-
-
-@pytest.mark.level0
-@pytest.mark.platform_x86_cpu
-@pytest.mark.env_onecard
-def test_softplus_1d_fp32():
+def test_softplus_1d():
     x_np = np.random.random((50,)).astype(np.float32)
     y_np = SoftplusCompute(x_np)
 
@@ -68,7 +54,7 @@ def test_softplus_1d_fp32():
 @pytest.mark.level0
 @pytest.mark.platform_x86_cpu
 @pytest.mark.env_onecard
-def test_softplus_2d_fp32():
+def test_softplus_2d():
     x_np = np.random.random((50, 40)).astype(np.float32)
     y_np = SoftplusCompute(x_np)
 
@@ -82,7 +68,7 @@ def test_softplus_2d_fp32():
 @pytest.mark.level0
 @pytest.mark.platform_x86_cpu
 @pytest.mark.env_onecard
-def test_softplus_4d_fp32():
+def test_softplus_4d():
     x_np = np.random.random((32, 3, 224, 224)).astype(np.float32)
     y_np = SoftplusCompute(x_np)
 
@@ -119,17 +105,3 @@ def test_softplus_4d_fp16():
     y_ms = net(x_ms)
 
     assert np.allclose(y_np, y_ms.asnumpy(), rtol=5e-3)
-
-
-@pytest.mark.level0
-@pytest.mark.platform_x86_cpu
-@pytest.mark.env_onecard
-def test_softplus_7d_fp32():
-    x_np = np.random.random((32, 3, 20, 20, 20, 10, 10)).astype(np.float32)
-    y_np = SoftplusCompute(x_np)
-
-    x_ms = Tensor(x_np)
-    net = SoftplusNet()
-    y_ms = net(x_ms)
-
-    assert np.allclose(y_np, y_ms.asnumpy(), rtol=5e-3)
diff --git a/tests/ut/cpp/CMakeLists.txt b/tests/ut/cpp/CMakeLists.txt
index 58288960327..86d21eef618 100644
--- a/tests/ut/cpp/CMakeLists.txt
+++ b/tests/ut/cpp/CMakeLists.txt
@@ -68,6 +68,7 @@ if(ENABLE_MINDDATA)
             ./ps/*.cc
             ./fl/*.cc
             ./cxx_api/*.cc
+            ./tbe/*.cc
             )
 
     if(NOT ENABLE_PYTHON)
diff --git a/tests/ut/cpp/dataset/CMakeLists.txt b/tests/ut/cpp/dataset/CMakeLists.txt
index 3fba58918d0..1f33a1b4e3f 100644
--- a/tests/ut/cpp/dataset/CMakeLists.txt
+++ b/tests/ut/cpp/dataset/CMakeLists.txt
@@ -12,6 +12,8 @@ SET(DE_UT_SRCS
         btree_test.cc
         buddy_test.cc
         build_vocab_test.cc
+        c_api_audio_a_to_q_test.cc
+        c_api_audio_r_to_z_test.cc
         c_api_cache_test.cc
         c_api_dataset_album_test.cc
         c_api_audio_a_to_q_test.cc
@@ -20,6 +22,7 @@ SET(DE_UT_SRCS
         c_api_dataset_coco_test.cc
         c_api_dataset_config_test.cc
         c_api_dataset_csv_test.cc
+        c_api_dataset_flickr_test.cc
         c_api_dataset_iterator_test.cc
         c_api_dataset_manifest_test.cc
         c_api_dataset_minddata_test.cc
diff --git a/tests/ut/cpp/dataset/c_api_audio_a_to_q_test.cc b/tests/ut/cpp/dataset/c_api_audio_a_to_q_test.cc
index 7dc03926a17..089029ffd13 100644
--- a/tests/ut/cpp/dataset/c_api_audio_a_to_q_test.cc
+++ b/tests/ut/cpp/dataset/c_api_audio_a_to_q_test.cc
@@ -30,6 +30,65 @@ class MindDataTestPipeline : public UT::DatasetOpTesting {
  protected:
 };
 
+TEST_F(MindDataTestPipeline, TestAmplitudeToDBPipeline) {
+  MS_LOG(INFO) << "Basic Function Test";
+  // Original waveform
+  std::shared_ptr<SchemaObj> schema = Schema();
+  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200}));
+  std::shared_ptr<Dataset> ds = RandomData(50, schema);
+  EXPECT_NE(ds, nullptr);
+
+  ds = ds->SetNumWorkers(4);
+  EXPECT_NE(ds, nullptr);
+
+  auto amplitude_to_db_op = audio::AmplitudeToDB();
+
+  ds = ds->Map({amplitude_to_db_op});
+  EXPECT_NE(ds, nullptr);
+
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(ds, nullptr);
+
+  std::unordered_map<std::string, mindspore::MSTensor> row;
+  ASSERT_OK(iter->GetNextRow(&row));
+
+  std::vector<int64_t> expected = {2, 200};
+
+  int i = 0;
+  while (row.size() != 0) {
+    auto col = row["inputData"];
+    ASSERT_EQ(col.Shape(), expected);
+    ASSERT_EQ(col.Shape().size(), 2);
+    ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32);
+    ASSERT_OK(iter->GetNextRow(&row));
+    i++;
+  }
+  EXPECT_EQ(i, 50);
+
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestAmplitudeToDBWrongArgs) {
+  MS_LOG(INFO) << "Basic Function Test";
+  // Original waveform
+  std::shared_ptr<SchemaObj> schema = Schema();
+  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200}));
+  std::shared_ptr<Dataset> ds = RandomData(50, schema);
+  EXPECT_NE(ds, nullptr);
+
+  ds = ds->SetNumWorkers(4);
+  EXPECT_NE(ds, nullptr);
+
+  auto amplitude_to_db_op = audio::AmplitudeToDB(ScaleType::kPower, 1.0, -1e-10, 80.0);
+
+  ds = ds->Map({amplitude_to_db_op});
+  EXPECT_NE(ds, nullptr);
+
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  // Expect failure
+  EXPECT_EQ(iter, nullptr);
+}
+
 TEST_F(MindDataTestPipeline, Level0_TestBandBiquad001) {
   MS_LOG(INFO) << "Basic Function Test";
   // Original waveform
@@ -96,4 +155,335 @@ TEST_F(MindDataTestPipeline, Level0_TestBandBiquad002) {
 
   std::shared_ptr<Iterator> iter02 = ds02->CreateIterator();
   EXPECT_EQ(iter02, nullptr);
-}
\ No newline at end of file
+}
+
+TEST_F(MindDataTestPipeline, Level0_TestAllpassBiquad001) {
+  MS_LOG(INFO) << "Basic Function Test";
+  // Original waveform
+  std::shared_ptr<SchemaObj> schema = Schema();
+  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200}));
+  std::shared_ptr<Dataset> ds = RandomData(50, schema);
+  EXPECT_NE(ds, nullptr);
+
+  ds = ds->SetNumWorkers(4);
+  EXPECT_NE(ds, nullptr);
+
+  auto AllpassBiquadOp = audio::AllpassBiquad(44100, 200.0);
+
+  ds = ds->Map({AllpassBiquadOp});
+  EXPECT_NE(ds, nullptr);
+
+  // Filtered waveform by allpassbiquad
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(ds, nullptr);
+
+  std::unordered_map<std::string, mindspore::MSTensor> row;
+  ASSERT_OK(iter->GetNextRow(&row));
+
+  std::vector<int64_t> expected = {2, 200};
+
+  int i = 0;
+  while (row.size() != 0) {
+    auto col = row["inputData"];
+    ASSERT_EQ(col.Shape(), expected);
+    ASSERT_EQ(col.Shape().size(), 2);
+    ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32);
+    ASSERT_OK(iter->GetNextRow(&row));
+    i++;
+  }
+  EXPECT_EQ(i, 50);
+
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, Level0_TestAllpassBiquad002) {
+  MS_LOG(INFO) << "Wrong Arg.";
+  std::shared_ptr<SchemaObj> schema = Schema();
+  // Original waveform
+  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 2}));
+  std::shared_ptr<Dataset> ds = RandomData(50, schema);
+  std::shared_ptr<Dataset> ds01;
+  std::shared_ptr<Dataset> ds02;
+  EXPECT_NE(ds, nullptr);
+
+  // Check sample_rate
+  MS_LOG(INFO) << "Sample_rate_ is zero.";
+  auto allpass_biquad_op_01 = audio::AllpassBiquad(0, 200.0, 0.707);
+  ds01 = ds->Map({allpass_biquad_op_01});
+  EXPECT_NE(ds01, nullptr);
+
+  std::shared_ptr<Iterator> iter01 = ds01->CreateIterator();
+  EXPECT_EQ(iter01, nullptr);
+
+  // Check Q_
+  MS_LOG(INFO) << "Q_ is zero.";
+  auto allpass_biquad_op_02 = audio::AllpassBiquad(44100, 200, 0);
+  ds02 = ds->Map({allpass_biquad_op_02});
+  EXPECT_NE(ds02, nullptr);
+
+  std::shared_ptr<Iterator> iter02 = ds02->CreateIterator();
+  EXPECT_EQ(iter02, nullptr);
+}
+
+TEST_F(MindDataTestPipeline, Level0_TestBandpassBiquad001) {
+  MS_LOG(INFO) << "Basic Function Test";
+  // Original waveform
+  std::shared_ptr<SchemaObj> schema = Schema();
+  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200}));
+  std::shared_ptr<Dataset> ds = RandomData(50, schema);
+  EXPECT_NE(ds, nullptr);
+
+  ds = ds->SetNumWorkers(4);
+  EXPECT_NE(ds, nullptr);
+
+  auto BandpassBiquadOp = audio::BandpassBiquad(44100, 200.0);
+
+  ds = ds->Map({BandpassBiquadOp});
+  EXPECT_NE(ds, nullptr);
+
+  // Filtered waveform by bandpassbiquad
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(ds, nullptr);
+
+  std::unordered_map<std::string, mindspore::MSTensor> row;
+  ASSERT_OK(iter->GetNextRow(&row));
+
+  std::vector<int64_t> expected = {2, 200};
+
+  int i = 0;
+  while (row.size() != 0) {
+    auto col = row["inputData"];
+    ASSERT_EQ(col.Shape(), expected);
+    ASSERT_EQ(col.Shape().size(), 2);
+    ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32);
+    ASSERT_OK(iter->GetNextRow(&row));
+    i++;
+  }
+  EXPECT_EQ(i, 50);
+
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, Level0_TestBandpassBiquad002) {
+  MS_LOG(INFO) << "Wrong Arg.";
+  std::shared_ptr<SchemaObj> schema = Schema();
+  // Original waveform
+  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 2}));
+  std::shared_ptr<Dataset> ds = RandomData(50, schema);
+  std::shared_ptr<Dataset> ds01;
+  std::shared_ptr<Dataset> ds02;
+  EXPECT_NE(ds, nullptr);
+
+  // Check sample_rate
+  MS_LOG(INFO) << "sample_rate is zero.";
+  auto bandpass_biquad_op_01 = audio::BandpassBiquad(0, 200);
+  ds01 = ds->Map({bandpass_biquad_op_01});
+  EXPECT_NE(ds01, nullptr);
+
+  std::shared_ptr<Iterator> iter01 = ds01->CreateIterator();
+  EXPECT_EQ(iter01, nullptr);
+
+  // Check Q_
+  MS_LOG(INFO) << "Q_ is zero.";
+  auto bandpass_biquad_op_02 = audio::BandpassBiquad(44100, 200, 0);
+  ds02 = ds->Map({bandpass_biquad_op_02});
+  EXPECT_NE(ds02, nullptr);
+
+  std::shared_ptr<Iterator> iter02 = ds02->CreateIterator();
+  EXPECT_EQ(iter02, nullptr);
+}
+
+TEST_F(MindDataTestPipeline, Level0_TestBandrejectBiquad001) {
+  MS_LOG(INFO) << "Basic Function Test";
+  // Original waveform
+  std::shared_ptr<SchemaObj> schema = Schema();
+  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200}));
+  std::shared_ptr<Dataset> ds = RandomData(50, schema);
+  EXPECT_NE(ds, nullptr);
+
+  ds = ds->SetNumWorkers(4);
+  EXPECT_NE(ds, nullptr);
+
+  auto BandrejectBiquadOp = audio::BandrejectBiquad(44100, 200.0);
+
+  ds = ds->Map({BandrejectBiquadOp});
+  EXPECT_NE(ds, nullptr);
+
+  // Filtered waveform by bandrejectbiquad
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(ds, nullptr);
+
+  std::unordered_map<std::string, mindspore::MSTensor> row;
+  ASSERT_OK(iter->GetNextRow(&row));
+
+  std::vector<int64_t> expected = {2, 200};
+
+  int i = 0;
+  while (row.size() != 0) {
+    auto col = row["inputData"];
+    ASSERT_EQ(col.Shape(), expected);
+    ASSERT_EQ(col.Shape().size(), 2);
+    ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32);
+    ASSERT_OK(iter->GetNextRow(&row));
+    i++;
+  }
+  EXPECT_EQ(i, 50);
+
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, Level0_TestBandrejectBiquad002) {
+  MS_LOG(INFO) << "Wrong Arg.";
+  std::shared_ptr<SchemaObj> schema = Schema();
+  // Original waveform
+  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 2}));
+  std::shared_ptr<Dataset> ds = RandomData(50, schema);
+  std::shared_ptr<Dataset> ds01;
+  std::shared_ptr<Dataset> ds02;
+  EXPECT_NE(ds, nullptr);
+
+  // Check sample_rate
+  MS_LOG(INFO) << "sample_rate is zero.";
+  auto bandreject_biquad_op_01 = audio::BandrejectBiquad(0, 200);
+  ds01 = ds->Map({bandreject_biquad_op_01});
+  EXPECT_NE(ds01, nullptr);
+
+  std::shared_ptr<Iterator> iter01 = ds01->CreateIterator();
+  EXPECT_EQ(iter01, nullptr);
+
+  // Check Q_
+  MS_LOG(INFO) << "Q_ is zero.";
+  auto bandreject_biquad_op_02 = audio::BandrejectBiquad(44100, 200, 0);
+  ds02 = ds->Map({bandreject_biquad_op_02});
+  EXPECT_NE(ds02, nullptr);
+
+  std::shared_ptr<Iterator> iter02 = ds02->CreateIterator();
+  EXPECT_EQ(iter02, nullptr);
+}
+
+TEST_F(MindDataTestPipeline, Level0_TestBassBiquad001) {
+  MS_LOG(INFO) << "Basic Function Test";
+  // Original waveform
+  std::shared_ptr<SchemaObj> schema = Schema();
+  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200}));
+  std::shared_ptr<Dataset> ds = RandomData(50, schema);
+  EXPECT_NE(ds, nullptr);
+
+  ds = ds->SetNumWorkers(4);
+  EXPECT_NE(ds, nullptr);
+
+  auto BassBiquadOp = audio::BassBiquad(44100, 50, 200.0);
+
+  ds = ds->Map({BassBiquadOp});
+  EXPECT_NE(ds, nullptr);
+
+  // Filtered waveform by bassbiquad
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(ds, nullptr);
+
+  std::unordered_map<std::string, mindspore::MSTensor> row;
+  ASSERT_OK(iter->GetNextRow(&row));
+
+  std::vector<int64_t> expected = {2, 200};
+
+  int i = 0;
+  while (row.size() != 0) {
+    auto col = row["inputData"];
+    ASSERT_EQ(col.Shape(), expected);
+    ASSERT_EQ(col.Shape().size(), 2);
+    ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32);
+    ASSERT_OK(iter->GetNextRow(&row));
+    i++;
+  }
+  EXPECT_EQ(i, 50);
+
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, Level0_TestBassBiquad002) {
+  MS_LOG(INFO) << "Wrong Arg.";
+  std::shared_ptr<SchemaObj> schema = Schema();
+  // Original waveform
+  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 2}));
+  std::shared_ptr<Dataset> ds = RandomData(50, schema);
+  std::shared_ptr<Dataset> ds01;
+  std::shared_ptr<Dataset> ds02;
+  EXPECT_NE(ds, nullptr);
+
+  // Check sample_rate
+  MS_LOG(INFO) << "sample_rate is zero.";
+  auto bass_biquad_op_01 = audio::BassBiquad(0, 50, 200.0);
+  ds01 = ds->Map({bass_biquad_op_01});
+  EXPECT_NE(ds01, nullptr);
+
+  std::shared_ptr<Iterator> iter01 = ds01->CreateIterator();
+  EXPECT_EQ(iter01, nullptr);
+
+  // Check Q_
+  MS_LOG(INFO) << "Q_ is zero.";
+  auto bass_biquad_op_02 = audio::BassBiquad(44100, 50, 200.0, 0);
+  ds02 = ds->Map({bass_biquad_op_02});
+  EXPECT_NE(ds02, nullptr);
+
+  std::shared_ptr<Iterator> iter02 = ds02->CreateIterator();
+  EXPECT_EQ(iter02, nullptr);
+}
+
+TEST_F(MindDataTestPipeline, TestAnglePipeline) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAnglePipeline";
+
+  std::shared_ptr<SchemaObj> schema = Schema();
+  ASSERT_OK(schema->add_column("complex", mindspore::DataType::kNumberTypeFloat32, {2, 2}));
+  std::shared_ptr<Dataset> ds = RandomData(50, schema);
+  EXPECT_NE(ds, nullptr);
+
+  ds = ds->SetNumWorkers(4);
+  EXPECT_NE(ds, nullptr);
+
+  auto angle_op = audio::Angle();
+
+  ds = ds->Map({angle_op});
+  EXPECT_NE(ds, nullptr);
+
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(ds, nullptr);
+
+  std::unordered_map<std::string, mindspore::MSTensor> row;
+  ASSERT_OK(iter->GetNextRow(&row));
+
+  std::vector<int64_t> expected = {2};
+
+  int i = 0;
+  while (row.size() != 0) {
+    auto col = row["complex"];
+    ASSERT_EQ(col.Shape(), expected);
+    ASSERT_EQ(col.Shape().size(), 1);
+    ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32);
+    ASSERT_OK(iter->GetNextRow(&row));
+    i++;
+  }
+  EXPECT_EQ(i, 50);
+
+  iter->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestAnglePipelineError) {
+  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAnglePipelineError";
+
+  std::shared_ptr<SchemaObj> schema = Schema();
+  ASSERT_OK(schema->add_column("complex", mindspore::DataType::kNumberTypeFloat32, {3, 2, 1}));
+  std::shared_ptr<Dataset> ds = RandomData(4, schema);
+  EXPECT_NE(ds, nullptr);
+
+  ds = ds->SetNumWorkers(4);
+  EXPECT_NE(ds, nullptr);
+
+  auto angle_op = audio::Angle();
+
+  ds = ds->Map({angle_op});
+  EXPECT_NE(ds, nullptr);
+
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  std::unordered_map<std::string, mindspore::MSTensor> row;
+  EXPECT_ERROR(iter->GetNextRow(&row));
+}
diff --git a/tests/ut/cpp/dataset/c_api_audio_r_to_z_test.cc b/tests/ut/cpp/dataset/c_api_audio_r_to_z_test.cc
index cc833a53654..902f906a5c2 100644
--- a/tests/ut/cpp/dataset/c_api_audio_r_to_z_test.cc
+++ b/tests/ut/cpp/dataset/c_api_audio_r_to_z_test.cc
@@ -13,7 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 #include "common/common.h"
 #include "minddata/dataset/core/tensor.h"
 #include "minddata/dataset/include/dataset/datasets.h"
@@ -24,73 +23,12 @@ using mindspore::LogStream;
 using mindspore::ExceptionType::NoExceptionType;
 using mindspore::MsLogLevel::INFO;
 
-class MindDataTestPipeline : public UT::DatasetOpTesting {
- protected:
+class MindDataTestPipeline : public UT::Common {
+ public:
 };
 
-TEST_F(MindDataTestPipeline, TestTimeMaskingPipeline) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTimeMaskingPipeline.";
-  // Original waveform
-  std::shared_ptr<SchemaObj> schema = Schema();
-  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200}));
-  std::shared_ptr<Dataset> ds = RandomData(50, schema);
-  EXPECT_NE(ds, nullptr);
-
-  ds = ds->SetNumWorkers(4);
-  EXPECT_NE(ds, nullptr);
-
-  auto timemasking = audio::TimeMasking(true, 6);
-
-  ds = ds->Map({timemasking});
-  EXPECT_NE(ds, nullptr);
-
-  // Filtered waveform by bandbiquad
-  std::shared_ptr<Iterator> iter = ds->CreateIterator();
-  EXPECT_NE(ds, nullptr);
-
-  std::unordered_map<std::string, mindspore::MSTensor> row;
-  ASSERT_OK(iter->GetNextRow(&row));
-
-  std::vector<int64_t> expected = {2, 200};
-
-  int i = 0;
-  while (row.size() != 0) {
-    auto col = row["inputData"];
-    ASSERT_EQ(col.Shape(), expected);
-    ASSERT_EQ(col.Shape().size(), 2);
-    ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32);
-    ASSERT_OK(iter->GetNextRow(&row));
-    i++;
-  }
-  EXPECT_EQ(i, 50);
-
-  iter->Stop();
-}
-
-TEST_F(MindDataTestPipeline, TestTimeMaskingWrongArgs) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTimeMaskingWrongArgs.";
-  // Original waveform
-  std::shared_ptr<SchemaObj> schema = Schema();
-  ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 20}));
-  std::shared_ptr<Dataset> ds = RandomData(50, schema);
-  EXPECT_NE(ds, nullptr);
-
-  ds = ds->SetNumWorkers(4);
-  EXPECT_NE(ds, nullptr);
-
-  auto timemasking = audio::TimeMasking(true, -100);
-
-  ds = ds->Map({timemasking});
-  EXPECT_NE(ds, nullptr);
-
-  // Filtered waveform by bandbiquad
-  std::shared_ptr<Iterator> iter = ds->CreateIterator();
-  // Expect failure
-  EXPECT_EQ(iter, nullptr);
-}
-
 TEST_F(MindDataTestPipeline, TestTimeStretchPipeline) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTimeStretchPipeline.";
+  MS_LOG(INFO) << "Doing test TimeStretchOp with custom param value. Pipeline.";
   // op param
   int freq = 1025;
   int hop_length = 512;
@@ -116,7 +54,7 @@ TEST_F(MindDataTestPipeline, TestTimeStretchPipeline) {
   std::unordered_map<std::string, mindspore::MSTensor> row;
   ASSERT_OK(iter->GetNextRow(&row));
 
-  std::vector<int64_t> expected = {2, freq, static_cast<int64_t>(std::ceil(400 / rate)), 2};
+  std::vector<int64_t> expected = {2, freq, int(std::ceil(400 / rate)), 2};
 
   int i = 0;
   while (row.size() != 0) {
@@ -132,7 +70,7 @@ TEST_F(MindDataTestPipeline, TestTimeStretchPipeline) {
 }
 
 TEST_F(MindDataTestPipeline, TestTimeStretchPipelineWrongArgs) {
-  MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTimeStretchPipelineWrongArgs.";
+  MS_LOG(INFO) << "Doing test TimeStretchOp with wrong param value. Pipeline.";
   // op param
   int freq = 1025;
   int hop_length = 512;
@@ -155,4 +93,4 @@ TEST_F(MindDataTestPipeline, TestTimeStretchPipelineWrongArgs) {
   std::shared_ptr<Iterator> iter = ds->CreateIterator();
   // Expect failure
   EXPECT_EQ(iter, nullptr);
-}
+}
\ No newline at end of file
diff --git a/tests/ut/cpp/dataset/c_api_vision_a_to_q_test.cc b/tests/ut/cpp/dataset/c_api_vision_a_to_q_test.cc
index 33149fd8520..0647ae7bdd0 100644
--- a/tests/ut/cpp/dataset/c_api_vision_a_to_q_test.cc
+++ b/tests/ut/cpp/dataset/c_api_vision_a_to_q_test.cc
@@ -27,6 +27,102 @@ class MindDataTestPipeline : public UT::DatasetOpTesting {
 
 // Tests for vision C++ API A to Q TensorTransform Operations (in alphabetical order)
 
+TEST_F(MindDataTestPipeline, TestAdjustGammaSuccess1) {
+  // pipeline 3-channel
+  MS_LOG(INFO) << "Pipeline Test.";
+  std::string MindDataPath = "data/dataset";
+  std::string folder_path = MindDataPath + "/testImageNetData/train/";
+  std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 2));
+  EXPECT_NE(ds1, nullptr);
+  std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 2));
+  EXPECT_NE(ds2, nullptr);
+
+  auto adjustgamma_op = vision::AdjustGamma(10.0);
+
+  ds1 = ds1->Map({adjustgamma_op});
+  EXPECT_NE(ds1, nullptr);
+
+  std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
+  EXPECT_NE(iter1, nullptr);
+  std::unordered_map<std::string, mindspore::MSTensor> row1;
+  iter1->GetNextRow(&row1);
+
+  std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
+  EXPECT_NE(iter2, nullptr);
+  std::unordered_map<std::string, mindspore::MSTensor> row2;
+  iter2->GetNextRow(&row2);
+
+  uint64_t i = 0;
+  while (row1.size() != 0) {
+    i++;
+    auto image = row1["image"];
+    iter1->GetNextRow(&row1);
+    iter2->GetNextRow(&row2);
+  }
+  EXPECT_EQ(i, 2);
+
+  iter1->Stop();
+  iter2->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestAdjustGammaSuccess2) {
+  // pipeline 1-channel
+  MS_LOG(INFO) << "Pipeline Test.";
+  std::string MindDataPath = "data/dataset";
+  std::string folder_path = MindDataPath + "/testImageNetData/train/";
+  std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 2));
+  EXPECT_NE(ds1, nullptr);
+  std::shared_ptr<Dataset> ds2 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 2));
+  EXPECT_NE(ds2, nullptr);
+
+  auto adjustgamma_op = vision::AdjustGamma(10.0);
+  auto rgb2gray_op = vision::RGB2GRAY();
+
+  ds1 = ds1->Map({rgb2gray_op, adjustgamma_op});
+  EXPECT_NE(ds1, nullptr);
+
+  std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
+  EXPECT_NE(iter1, nullptr);
+  std::unordered_map<std::string, mindspore::MSTensor> row1;
+  iter1->GetNextRow(&row1);
+
+  std::shared_ptr<Iterator> iter2 = ds2->CreateIterator();
+  EXPECT_NE(iter2, nullptr);
+  std::unordered_map<std::string, mindspore::MSTensor> row2;
+  iter2->GetNextRow(&row2);
+
+  uint64_t i = 0;
+  while (row1.size() != 0) {
+    i++;
+    auto image = row1["image"];
+    iter1->GetNextRow(&row1);
+    iter2->GetNextRow(&row2);
+  }
+  EXPECT_EQ(i, 2);
+
+  iter1->Stop();
+  iter2->Stop();
+}
+
+TEST_F(MindDataTestPipeline, TestAdjustGammaParamCheck) {
+  // pipeline 3-channel
+  MS_LOG(INFO) << "Pipeline Test.";
+  std::string MindDataPath = "data/dataset";
+  std::string folder_path = MindDataPath + "/testImageNetData/train/";
+  std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 2));
+  EXPECT_NE(ds, nullptr);
+
+  // Case 1: Negative gamma
+  // Create objects for the tensor ops
+  std::shared_ptr<TensorTransform> adjust_gamma(new vision::AdjustGamma(-1, 1.0));
+  auto ds1 = ds->Map({adjust_gamma});
+  EXPECT_NE(ds1, nullptr);
+  // Create an iterator over the result of the above dataset
+  std::shared_ptr<Iterator> iter1 = ds1->CreateIterator();
+  // Expect failure: invalid value of AdjustGamma
+  EXPECT_EQ(iter1, nullptr);
+}
+
 TEST_F(MindDataTestPipeline, TestAutoContrastSuccess1) {
   MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAutoContrastSuccess1.";
 
diff --git a/tests/ut/cpp/dataset/cmu_arctic_test.cc b/tests/ut/cpp/dataset/cmu_arctic_test.cc
new file mode 100644
index 00000000000..f799ebc897e
--- /dev/null
+++ b/tests/ut/cpp/dataset/cmu_arctic_test.cc
@@ -0,0 +1,145 @@
+/**
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <string>
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> 
+
+#include "utils/ms_utils.h"
+#include "common/common.h"
+#include "minddata/dataset/core/client.h"
+#include "minddata/dataset/core/global_context.h"
+#include "minddata/dataset/engine/datasetops/source/cmu_arctic_op.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h"
+#include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h"
+#include "minddata/dataset/include/dataset/datasets.h"
+#include "minddata/dataset/util/path.h"
+#include "minddata/dataset/util/status.h"
+#include "gtest/gtest.h"
+#include "utils/log_adapter.h"
+#include "securec.h"
+
+namespace common = mindspore::common;
+using namespace mindspore::dataset;
+using mindspore::LogStream;
+using mindspore::ExceptionType::NoExceptionType;
+using mindspore::MsLogLevel::ERROR;
+
+std::shared_ptr<RepeatOp> Repeat(int repeat_cnt);
+
+std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);
+
+class MindDataTestCmuArcticSampler : public UT::DatasetOpTesting {
+ protected:
+};
+
+TEST_F(MindDataTestCmuArcticSampler, TestSequentialCmuArcticWithRepeat) {
+  std::string folder_path = datasets_root_path_ + "/testCmuArcticData/";
+  int64_t num_samples = 10;
+  int64_t start_index = 0;
+  std::shared_ptr<Dataset> ds =
+    CmuArctic(folder_path, "aew", std::make_shared<SequentialSampler>(start_index, num_samples));
+  EXPECT_NE(ds, nullptr);
+  ds = ds->Repeat(2);
+  EXPECT_NE(ds, nullptr);
+  std::shared_ptr<Iterator> iter = ds->CreateIterator();
+  EXPECT_NE(iter, nullptr);
+  std::unordered_map<std::string, mindspore::MSTensor> row;
+  ASSERT_OK(iter->GetNextRow(&row));
+
+  std::string_view utterance;
+  std::string_view utterance_id;
+  uint32_t rate;
+  
+  uint64_t i = 0;
+  while (row.size() != 0) {
+
+    auto waveform = row["waveform"];
+    auto sample_rate = row["sample_rate"];
+    auto utterance_ = row["utterance"];
+    auto utterance_id_ = row["utterance_id"];
+
+    MS_LOG(ERROR) << "Tensor image shape: " << waveform.Shape();
+
+    std::shared_ptr<Tensor> t_rate;
+    ASSERT_OK(Tensor::CreateFromMSTensor(sample_rate, &t_rate));
+    ASSERT_OK(t_rate->GetItemAt<uint32_t>(&rate, {}));
+    MS_LOG(ERROR) << "Tensor rate: " << rate;
+
+    std::shared_ptr<Tensor> t_utterance;
+    ASSERT_OK(Tensor::CreateFromMSTensor(utterance_, &t_utterance));
+    ASSERT_OK(t_utterance->GetItemAt(&utterance, {}));
+    MS_LOG(ERROR) << "Tensor utterance value: " << utterance;
+
+    std::shared_ptr<Tensor> t_utterance_id;
+    ASSERT_OK(Tensor::CreateFromMSTensor(utterance_id_, &t_utterance_id));
+    ASSERT_OK(t_utterance_id->GetItemAt(&utterance_id, {}));
+    MS_LOG(ERROR) << "Tensor utterance_id value: " << utterance_id;
+
+
+    ASSERT_OK(iter->GetNextRow(&row));
+    i++;
+  }
+
+  EXPECT_EQ(i, 20);
+  iter->Stop();
+}
+
+// TEST_F(MindDataTestMnistSampler, TestSequentialImageFolderWithRepeatBatch) {
+//   std::string folder_path = datasets_root_path_ + "/testMnistData/";
+//   int64_t num_samples = 10;
+//   int64_t start_index = 0;
+//   std::shared_ptr<Dataset> ds =
+//     Mnist(folder_path, "all", std::make_shared<SequentialSampler>(start_index, num_samples));
+//   EXPECT_NE(ds, nullptr);
+//   ds = ds->Repeat(2);
+//   EXPECT_NE(ds, nullptr);
+//   ds = ds->Batch(5);
+//   EXPECT_NE(ds, nullptr);
+//   std::shared_ptr<Iterator> iter = ds->CreateIterator();
+//   EXPECT_NE(iter, nullptr);
+//   std::vector<std::vector<uint32_t>> expected = {{0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}};
+//   std::unordered_map<std::string, mindspore::MSTensor> row;
+//   ASSERT_OK(iter->GetNextRow(&row));
+//   uint64_t i = 0;
+//   while (row.size() != 0) {
+//     auto image = row["image"];
+//     auto label = row["label"];
+//     MS_LOG(INFO) << "Tensor image shape: " << image.Shape();
+//     TEST_MS_LOG_MSTENSOR(INFO, "Tensor label: ", label);
+//     std::shared_ptr<Tensor> de_expected_label;
+//     ASSERT_OK(Tensor::CreateFromVector(expected[i % 4], &de_expected_label));
+//     mindspore::MSTensor expected_label =
+//       mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_expected_label));
+//     EXPECT_MSTENSOR_EQ(label, expected_label);
+//     ASSERT_OK(iter->GetNextRow(&row));
+//     i++;
+//   }
+//   EXPECT_EQ(i, 4);
+//   iter->Stop();
+// }
+
+
diff --git a/tests/ut/cpp/dataset/common/bboxop_common.cc b/tests/ut/cpp/dataset/common/bboxop_common.cc
index 0b612a653e7..70c794856c0 100644
--- a/tests/ut/cpp/dataset/common/bboxop_common.cc
+++ b/tests/ut/cpp/dataset/common/bboxop_common.cc
@@ -164,8 +164,8 @@ void BBoxOpCommon::CompareActualAndExpected(const std::string &op_name) {
     EXPECT_TRUE(remove(actual_path.c_str()) == 0);
     // compare using ==operator by Tensor
     std::shared_ptr<CVTensor> expect_img_t, actual_img_t;
-    CVTensor::CreateFromMat(expect_img, &expect_img_t);
-    CVTensor::CreateFromMat(actual_img, &actual_img_t);
+    CVTensor::CreateFromMat(expect_img, 3, &expect_img_t);
+    CVTensor::CreateFromMat(actual_img, 3, &actual_img_t);
     if (actual_img.data) {
       EXPECT_EQ(*expect_img_t == *actual_img_t, true);
     } else {
diff --git a/tests/ut/cpp/dataset/common/cvop_common.cc b/tests/ut/cpp/dataset/common/cvop_common.cc
index adddb1ad41d..ec2016bf543 100644
--- a/tests/ut/cpp/dataset/common/cvop_common.cc
+++ b/tests/ut/cpp/dataset/common/cvop_common.cc
@@ -55,7 +55,7 @@ void CVOpCommon::GetInputImage(std::string filename) {
     Tensor::CreateFromFile(filename, &raw_input_tensor_);
     raw_cv_image_ = cv::imread(filename, cv::ImreadModes::IMREAD_COLOR);
     std::shared_ptr<CVTensor> input_cv_tensor;
-    CVTensor::CreateFromMat(raw_cv_image_, &input_cv_tensor);
+    CVTensor::CreateFromMat(raw_cv_image_, 3, &input_cv_tensor);
     input_tensor_ = std::dynamic_pointer_cast<Tensor>(input_cv_tensor);
     SwapRedAndBlue(input_tensor_, &input_tensor_);
     if (raw_cv_image_.data) {
@@ -134,6 +134,10 @@ void CVOpCommon::CheckImageShapeAndData(const std::shared_ptr<Tensor> &output_te
       expect_image_path = dir_path + "imagefolder/apple_expect_randomaffine.jpg";
       actual_image_path = dir_path + "imagefolder/apple_actual_randomaffine.jpg";
       break;
+    case kAdjustGamma:
+      expect_image_path = dir_path + "imagefolder/apple_expect_adjustgamma.png";
+      actual_image_path = dir_path + "imagefolder/apple_actual_adjustgamma.png";
+      break;
     case kAutoContrast:
       expect_image_path = dir_path + "imagefolder/apple_expect_autocontrast.jpg";
       actual_image_path = dir_path + "imagefolder/apple_actual_autocontrast.jpg";
diff --git a/tests/ut/cpp/dataset/common/cvop_common.h b/tests/ut/cpp/dataset/common/cvop_common.h
index 5dbb5ea98cd..1effc6360af 100644
--- a/tests/ut/cpp/dataset/common/cvop_common.h
+++ b/tests/ut/cpp/dataset/common/cvop_common.h
@@ -44,6 +44,7 @@ class CVOpCommon : public Common {
     kRandomAffine,
     kRandomPosterize,
     kAutoContrast,
+    kAdjustGamma,
     kEqualize
   };
 
diff --git a/tests/ut/cpp/dataset/deserialize_test.cc b/tests/ut/cpp/dataset/deserialize_test.cc
index b333660171c..61b8ada1371 100644
--- a/tests/ut/cpp/dataset/deserialize_test.cc
+++ b/tests/ut/cpp/dataset/deserialize_test.cc
@@ -13,13 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <nlohmann/json.hpp>
 #include "common/common.h"
 #include "minddata/dataset/core/global_context.h"
 #include "minddata/dataset/engine/serdes.h"
 #include "minddata/dataset/include/dataset/datasets.h"
 #include "minddata/dataset/include/dataset/vision.h"
 #include "minddata/dataset/include/dataset/transforms.h"
+#include "minddata/dataset/kernels/ir/data/transforms_ir.h"
 
 using namespace mindspore::dataset;
 using mindspore::dataset::DatasetNode;
@@ -33,14 +33,15 @@ class MindDataTestDeserialize : public UT::DatasetOpTesting {
 
 void compare_dataset(std::shared_ptr<DatasetNode> ds) {
   nlohmann::json out_json;
-  std::make_shared<Serdes>()->SaveToJSON(ds, "dataset_pipeline.json", &out_json);
+  ASSERT_OK(Serdes::SaveToJSON(ds, "dataset_pipeline.json", &out_json));
   // output the deserialized out_json to ds1 and then out_json1
   std::shared_ptr<DatasetNode> ds1;
   ASSERT_OK(Serdes::Deserialize("dataset_pipeline.json", &ds1));
   EXPECT_NE(ds1, nullptr);
+
   // check original and deserialized dataset are the same
   nlohmann::json out_json1;
-  std::make_shared<Serdes>()->SaveToJSON(ds1, "dataset_pipeline_1.json", &out_json1);
+  ASSERT_OK(Serdes::SaveToJSON(ds1, "dataset_pipeline_1.json", &out_json1));
   std::stringstream json_ss;
   json_ss << out_json;
   std::stringstream json_ss1;
@@ -305,6 +306,21 @@ TEST_F(MindDataTestDeserialize, TestDeserializeManifest) {
   std::shared_ptr<DatasetCache> cache = nullptr;
   std::shared_ptr<DatasetNode> ds =
     std::make_shared<ManifestNode>(data_file, "train", sampler, class_indexing, false, cache);
+  std::vector<int32_t> coordinates = {50, 50};
+  std::vector<int32_t> size = {224, 224};
+  std::shared_ptr<TensorOperation> operation1 = std::make_shared<vision::CropOperation>(coordinates, size);
+  std::shared_ptr<TensorOperation> operation2 = std::make_shared<vision::RgbToBgrOperation>();
+  std::shared_ptr<TensorOperation> operation3 = std::make_shared<vision::RgbToGrayOperation>();
+  std::shared_ptr<TensorOperation> operation4 =
+    std::make_shared<vision::SlicePatchesOperation>(5, 5, SliceMode::kDrop, 1);
+  std::shared_ptr<TensorOperation> operation5 = std::make_shared<vision::VerticalFlipOperation>();
+  std::vector<std::shared_ptr<TensorOperation>> operations;
+  operations.push_back(operation1);
+  operations.push_back(operation2);
+  operations.push_back(operation3);
+  operations.push_back(operation4);
+  operations.push_back(operation5);
+  ds = std::make_shared<MapNode>(ds, operations);
   ds = std::make_shared<BatchNode>(ds, 2, false);
   compare_dataset(ds);
 }
@@ -433,4 +449,36 @@ TEST_F(MindDataTestDeserialize, TestDeserializeInvalidJson) {
   // check the invalid json object would return error
   ASSERT_ERROR(Serdes::Deserialize("./data/dataset/testDataset1/datasetTestInvalidJson.json", &ds));
   EXPECT_EQ(ds, nullptr);
-}
\ No newline at end of file
+}
+
+TEST_F(MindDataTestDeserialize, TestDeserializeFill) {
+  MS_LOG(INFO) << "Doing MindDataTestDeserialize-Fill.";
+  std::vector<std::string> dataset_files = {"./data/dataset/testTextFileDataset/1.txt"};
+  std::shared_ptr<DatasetCache> cache = nullptr;
+  std::shared_ptr<DatasetNode> ds = std::make_shared<TextFileNode>(dataset_files, 2, ShuffleMode::kFiles, 1, 0, cache);
+  std::shared_ptr<Tensor> fill_value;
+  ASSERT_OK(Tensor::CreateScalar(true, &fill_value));
+  std::shared_ptr<TensorOperation> operation1 = std::make_shared<transforms::FillOperation>(fill_value);
+  std::shared_ptr<TensorOperation> operation2 = std::make_shared<text::ToNumberOperation>("int32_t");
+  std::vector<std::shared_ptr<TensorOperation>> ops = {operation1, operation2};
+  ds = std::make_shared<MapNode>(ds, ops);
+  compare_dataset(ds);
+}
+
+TEST_F(MindDataTestDeserialize, TestDeserializeTensor) {
+  MS_LOG(INFO) << "Doing MindDataTestDeserialize-Tensor.";
+  std::shared_ptr<Tensor> test_tensor;
+  std::vector<float> input = {1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 1.2, 0.7, 0.8, 0.9, 1.0, 2.0, 1.3, 3.0, 4.0};
+  ASSERT_OK(Tensor::CreateFromVector(input, TensorShape{3, 5}, &test_tensor));
+  nlohmann::json json_obj;
+  ASSERT_OK(test_tensor->to_json(&json_obj));
+  std::shared_ptr<Tensor> test_tensor1;
+  ASSERT_OK(Tensor::from_json(json_obj, &test_tensor1));
+  nlohmann::json json_obj1;
+  ASSERT_OK(test_tensor1->to_json(&json_obj1));
+  std::stringstream json_ss;
+  json_ss << json_obj;
+  std::stringstream json_ss1;
+  json_ss1 << json_obj1;
+  EXPECT_EQ(json_ss.str(), json_ss1.str());
+}
diff --git a/tests/ut/cpp/dataset/execute_test.cc b/tests/ut/cpp/dataset/execute_test.cc
index 19654c3c816..c7069a5b2f2 100644
--- a/tests/ut/cpp/dataset/execute_test.cc
+++ b/tests/ut/cpp/dataset/execute_test.cc
@@ -19,7 +19,9 @@
 #include "minddata/dataset/include/dataset/audio.h"
 #include "minddata/dataset/include/dataset/execute.h"
 #include "minddata/dataset/include/dataset/transforms.h"
+#include "minddata/dataset/include/dataset/audio.h"
 #include "minddata/dataset/include/dataset/vision.h"
+#include "minddata/dataset/include/dataset/audio.h"
 #include "minddata/dataset/include/dataset/text.h"
 #include "utils/log_adapter.h"
 
@@ -32,6 +34,132 @@ class MindDataTestExecute : public UT::DatasetOpTesting {
  protected:
 };
 
+TEST_F(MindDataTestExecute, TestAllpassBiquadWithEager) {
+  MS_LOG(INFO) << "Basic Function Test With Eager.";
+  // Original waveform
+  std::vector<float> labels = {
+    2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02,
+    1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02,
+    1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02,
+    1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02,
+    1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03};
+  std::shared_ptr<Tensor> input;
+  ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 10}), &input));
+  auto input_02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input));
+  std::shared_ptr<TensorTransform> allpass_biquad_01 = std::make_shared<audio::AllpassBiquad>(44100, 200);
+  mindspore::dataset::Execute Transform01({allpass_biquad_01});
+  // Filtered waveform by allpassbiquad
+  Status s01 = Transform01(input_02, &input_02);
+  EXPECT_TRUE(s01.IsOk());
+}
+
+TEST_F(MindDataTestExecute, TestAllpassBiquadWithWrongArg) {
+  MS_LOG(INFO) << "Wrong Arg.";
+  std::vector<double> labels = {
+    2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02,
+    1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02,
+    1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02,
+    1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02,
+    1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03};
+  std::shared_ptr<Tensor> input;
+  ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 10}), &input));
+  auto input_02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input));
+  // Check Q
+  MS_LOG(INFO) << "Q is zero.";
+  std::shared_ptr<TensorTransform> allpass_biquad_op = std::make_shared<audio::AllpassBiquad>(44100, 200, 0);
+  mindspore::dataset::Execute Transform01({allpass_biquad_op});
+  Status s01 = Transform01(input_02, &input_02);
+  EXPECT_FALSE(s01.IsOk());
+}
+
+TEST_F(MindDataTestExecute, TestAdjustGammaEager1) {
+  // 3-channel eager
+  MS_LOG(INFO) << "3-channel image test";
+  // Read images
+  auto image = ReadFileToTensor("data/dataset/apple.jpg");
+
+  // Transform params
+  auto decode = vision::Decode();
+  auto adjust_gamma_op = vision::AdjustGamma(0.1, 1.0);
+
+  auto transform = Execute({decode, adjust_gamma_op});
+  Status rc = transform(image, &image);
+  EXPECT_EQ(rc, Status::OK());
+}
+
+TEST_F(MindDataTestExecute, TestAdjustGammaEager2) {
+  // 1-channel eager
+  MS_LOG(INFO) << "1-channel image test";
+  auto m1 = ReadFileToTensor("data/dataset/apple.jpg");
+  // Transform params
+  auto decode = vision::Decode();
+  auto rgb2gray = vision::RGB2GRAY();
+  auto adjust_gamma_op = vision::AdjustGamma(0.1, 1.0);
+
+  auto transform = Execute({decode, rgb2gray, adjust_gamma_op});
+  Status rc = transform(m1, &m1);
+  EXPECT_EQ(rc, Status::OK());
+}
+
+TEST_F(MindDataTestExecute, TestAmplitudeToDB) {
+  MS_LOG(INFO) << "Basic Function Test With Eager.";
+  // Original waveform
+  std::vector<float> labels = {
+    2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02,
+    1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02,
+    1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02,
+    1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02,
+    1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03,
+    1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03};
+  std::shared_ptr<Tensor> input;
+  ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 2, 2, 3}), &input));
+  auto input_ms = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input));
+  std::shared_ptr<TensorTransform> amplitude_to_db_op = std::make_shared<audio::AmplitudeToDB>();
+  // apply amplitude_to_db
+  mindspore::dataset::Execute trans({amplitude_to_db_op});
+  Status status = trans(input_ms, &input_ms);
+  EXPECT_TRUE(status.IsOk());
+}
+
+TEST_F(MindDataTestExecute, TestAmplitudeToDBWrongArgs) {
+  MS_LOG(INFO) << "Wrong Arg.";
+  // Original waveform
+  std::vector<float> labels = {
+    2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02,
+    1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02,
+    1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02,
+    1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02,
+    1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03};
+  std::shared_ptr<Tensor> input;
+  ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 10}), &input));
+  auto input_ms = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input));
+  std::shared_ptr<TensorTransform> amplitude_to_db_op =
+    std::make_shared<audio::AmplitudeToDB>(ScaleType::kPower, 1.0, -1e-10, 80.0);
+  // apply amplitude_to_db
+  mindspore::dataset::Execute trans({amplitude_to_db_op});
+  Status status = trans(input_ms, &input_ms);
+  EXPECT_FALSE(status.IsOk());
+}
+
+TEST_F(MindDataTestExecute, TestAmplitudeToDBWrongInput) {
+  MS_LOG(INFO) << "Wrong Input.";
+  // Original waveform
+  std::vector<float> labels = {
+    2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02,
+    1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02,
+    1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02,
+    1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02,
+    1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03};
+  std::shared_ptr<Tensor> input;
+  ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({20}), &input));
+  auto input_ms = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input));
+  std::shared_ptr<TensorTransform> amplitude_to_db_op = std::make_shared<audio::AmplitudeToDB>();
+  // apply amplitude_to_db
+  mindspore::dataset::Execute trans({amplitude_to_db_op});
+  Status status = trans(input_ms, &input_ms);
+  EXPECT_FALSE(status.IsOk());
+}
+
 TEST_F(MindDataTestExecute, TestComposeTransforms) {
   MS_LOG(INFO) << "Doing TestComposeTransforms.";
 
@@ -69,6 +197,65 @@ TEST_F(MindDataTestExecute, TestCrop) {
   EXPECT_EQ(image.Shape()[1], 15);
 }
 
+TEST_F(MindDataTestExecute, TestTimeStretchEager) {
+  MS_LOG(INFO) << "Doing test TimeStretchOp with custom param value. Eager.";
+  std::shared_ptr<Tensor> input_tensor_;
+  // op param
+  int freq = 4;
+  int hop_length = 20;
+  float rate = 1.3;
+  int frame_num = 10;
+  // create tensor
+  TensorShape s = TensorShape({2, freq, frame_num, 2});
+  // init input vec
+  std::vector<float> input_vec(2 * freq * frame_num * 2);
+  for (int ind = 0; ind < input_vec.size(); ind++) {
+    input_vec[ind] = std::rand() % (1000) / (1000.0f);
+  }
+  ASSERT_OK(Tensor::CreateFromVector(input_vec, s, &input_tensor_));
+  auto input_ms = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input_tensor_));
+  std::shared_ptr<TensorTransform> time_stretch_op = std::make_shared<audio::TimeStretch>(hop_length, freq, rate);
+
+  // apply timestretch
+  mindspore::dataset::Execute Transform({time_stretch_op});
+  Status status = Transform(input_ms, &input_ms);
+  EXPECT_TRUE(status.IsOk());
+}
+
+TEST_F(MindDataTestExecute, TestTimeStretchParamCheck1) {
+  MS_LOG(INFO) << "Doing MindDataTestTimeStretch-TestTimeStretchParamCheck with invalid parameters.";
+  // Create an input
+  std::shared_ptr<Tensor> input_tensor_;
+  std::shared_ptr<Tensor> output_tensor;
+  TensorShape s = TensorShape({1, 4, 3, 2});
+  ASSERT_OK(Tensor::CreateFromVector(
+    std::vector<float>({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f,
+                        1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f}),
+    s, &input_tensor_));
+  auto input_ms = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input_tensor_));
+  std::shared_ptr<TensorTransform> timestretch = std::make_shared<audio::TimeStretch>(4, 512, -2);
+  mindspore::dataset::Execute Transform({timestretch});
+  Status status = Transform(input_ms, &input_ms);
+  EXPECT_FALSE(status.IsOk());
+}
+
+TEST_F(MindDataTestExecute, TestTimeStretchParamCheck2) {
+  MS_LOG(INFO) << "Doing MindDataTestTimeStretch-TestTimeStretchParamCheck with invalid parameters.";
+  // Create an input
+  std::shared_ptr<Tensor> input_tensor_;
+  std::shared_ptr<Tensor> output_tensor;
+  TensorShape s = TensorShape({1, 4, 3, 2});
+  ASSERT_OK(Tensor::CreateFromVector(
+    std::vector<float>({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f,
+                        1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f}),
+    s, &input_tensor_));
+  auto input_ms = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input_tensor_));
+  std::shared_ptr<TensorTransform> timestretch = std::make_shared<audio::TimeStretch>(4, -512, 2);
+  mindspore::dataset::Execute Transform({timestretch});
+  Status status = Transform(input_ms, &input_ms);
+  EXPECT_FALSE(status.IsOk());
+}
+
 TEST_F(MindDataTestExecute, TestTransformInput1) {
   MS_LOG(INFO) << "Doing MindDataTestExecute-TestTransformInput1.";
   // Test Execute with transform op input using API constructors, with std::shared_ptr<TensorTransform pointers,
@@ -334,3 +521,94 @@ TEST_F(MindDataTestExecute, TestBandBiquadWithWrongArg) {
   Status s01 = Transform01(input_02, &input_02);
   EXPECT_FALSE(s01.IsOk());
 }
+
+TEST_F(MindDataTestExecute, TestBandpassBiquadWithEager) {
+  MS_LOG(INFO) << "Basic Function Test With Eager.";
+  // Original waveform
+  std::vector<float> labels = {
+    2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02,
+    1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02,
+    1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02,
+    1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02,
+    1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03};
+  std::shared_ptr<Tensor> input;
+  ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 10}), &input));
+  auto input_02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input));
+  std::shared_ptr<TensorTransform> bandpass_biquad_01 = std::make_shared<audio::BandpassBiquad>(44100, 200);
+  mindspore::dataset::Execute Transform01({bandpass_biquad_01});
+  // Filtered waveform by bandpassbiquad
+  Status s01 = Transform01(input_02, &input_02);
+  EXPECT_TRUE(s01.IsOk());
+}
+
+TEST_F(MindDataTestExecute, TestBandpassBiquadWithWrongArg) {
+  MS_LOG(INFO) << "Wrong Arg.";
+  std::vector<double> labels = {
+    2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02,
+    1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02,
+    1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02,
+    1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02,
+    1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03};
+  std::shared_ptr<Tensor> input;
+  ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 10}), &input));
+  auto input_02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input));
+  // Check Q
+  MS_LOG(INFO) << "Q is zero.";
+  std::shared_ptr<TensorTransform> bandpass_biquad_op = std::make_shared<audio::BandpassBiquad>(44100, 200, 0);
+  mindspore::dataset::Execute Transform01({bandpass_biquad_op});
+  Status s01 = Transform01(input_02, &input_02);
+  EXPECT_FALSE(s01.IsOk());
+}
+
+TEST_F(MindDataTestExecute, TestBandrejectBiquadWithEager) {
+  MS_LOG(INFO) << "Basic Function Test With Eager.";
+  // Original waveform
+  std::vector<float> labels = {
+    2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02,
+    1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02,
+    1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02,
+    1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02,
+    1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03};
+  std::shared_ptr<Tensor> input;
+  ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 10}), &input));
+  auto input_02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input));
+  std::shared_ptr<TensorTransform> bandreject_biquad_01 = std::make_shared<audio::BandrejectBiquad>(44100, 200);
+  mindspore::dataset::Execute Transform01({bandreject_biquad_01});
+  // Filtered waveform by bandrejectbiquad
+  Status s01 = Transform01(input_02, &input_02);
+  EXPECT_TRUE(s01.IsOk());
+}
+
+TEST_F(MindDataTestExecute, TestBandrejectBiquadWithWrongArg) {
+  MS_LOG(INFO) << "Wrong Arg.";
+  std::vector<double> labels = {
+    2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02,
+    1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02,
+    1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02,
+    1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02,
+    1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03};
+  std::shared_ptr<Tensor> input;
+  ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 10}), &input));
+  auto input_02 = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(input));
+  // Check Q
+  MS_LOG(INFO) << "Q is zero.";
+  std::shared_ptr<TensorTransform> bandreject_biquad_op = std::make_shared<audio::BandrejectBiquad>(44100, 200, 0);
+  mindspore::dataset::Execute Transform01({bandreject_biquad_op});
+  Status s01 = Transform01(input_02, &input_02);
+  EXPECT_FALSE(s01.IsOk());
+}
+
+TEST_F(MindDataTestExecute, TestAngleEager) {
+  MS_LOG(INFO) << "Doing MindDataTestExecute-TestAngleEager";
+  std::vector<double> origin = {1.143, 1.3123, 2.632, 2.554, -1.213, 1.3, 0.456, 3.563};
+  TensorShape input_shape({4, 2});
+  std::shared_ptr<Tensor> de_tensor;
+  Tensor::CreateFromVector(origin, input_shape, &de_tensor);
+
+  std::shared_ptr<TensorTransform> angle = std::make_shared<audio::Angle>();
+  auto input = mindspore::MSTensor(std::make_shared<mindspore::dataset::DETensor>(de_tensor));
+  mindspore::dataset::Execute Transform({angle});
+  Status s = Transform(input, &input);
+
+  ASSERT_TRUE(s.IsOk());
+}
diff --git a/tests/ut/cpp/dataset/random_color_op_test.cc b/tests/ut/cpp/dataset/random_color_op_test.cc
index 144174a49d8..0ad25711ca8 100644
--- a/tests/ut/cpp/dataset/random_color_op_test.cc
+++ b/tests/ut/cpp/dataset/random_color_op_test.cc
@@ -43,7 +43,7 @@ class MindDataTestRandomColorOp : public UT::CVOP::CVOpCommon {
     cv::Mat cv_out;
     cv::merge(temp, 3, cv_out);
     std::shared_ptr<CVTensor> cvt_out;
-    CVTensor::CreateFromMat(cv_out, &cvt_out);
+    CVTensor::CreateFromMat(cv_out, 3, &cvt_out);
     gray_tensor = std::static_pointer_cast<Tensor>(cvt_out);
   }
   TensorShape shape;
@@ -96,4 +96,4 @@ TEST_F(MindDataTestRandomColorOp, TestOp3) {
     auto s = op.Compute(input_tensor, &output_tensor);
     EXPECT_TRUE(s.IsOk());
   }
-}
\ No newline at end of file
+}
diff --git a/tests/ut/cpp/dataset/rgba_to_bgr_op_test.cc b/tests/ut/cpp/dataset/rgba_to_bgr_op_test.cc
index 470e18eaee7..2520c2f65d5 100644
--- a/tests/ut/cpp/dataset/rgba_to_bgr_op_test.cc
+++ b/tests/ut/cpp/dataset/rgba_to_bgr_op_test.cc
@@ -48,7 +48,7 @@ TEST_F(MindDataTestRgbaToBgrOp, TestOp1) {
   // create new tensor to test conversion
   std::shared_ptr<Tensor> rgba_input;
   std::shared_ptr<CVTensor> input_cv_tensor;
-  CVTensor::CreateFromMat(rgba_image, &input_cv_tensor);
+  CVTensor::CreateFromMat(rgba_image, 3, &input_cv_tensor);
   rgba_input = std::dynamic_pointer_cast<Tensor>(input_cv_tensor);
 
   Status s = op->Compute(rgba_input, &output_tensor_);
diff --git a/tests/ut/cpp/dataset/rgba_to_rgb_op_test.cc b/tests/ut/cpp/dataset/rgba_to_rgb_op_test.cc
index a50c8047a0b..b9902302361 100644
--- a/tests/ut/cpp/dataset/rgba_to_rgb_op_test.cc
+++ b/tests/ut/cpp/dataset/rgba_to_rgb_op_test.cc
@@ -48,7 +48,7 @@ TEST_F(MindDataTestRgbaToRgbOp, TestOp1) {
   // create new tensor to test conversion
   std::shared_ptr<Tensor> rgba_input;
   std::shared_ptr<CVTensor> input_cv_tensor;
-  CVTensor::CreateFromMat(rgba_image, &input_cv_tensor);
+  CVTensor::CreateFromMat(rgba_image, 3, &input_cv_tensor);
   rgba_input = std::dynamic_pointer_cast<Tensor>(input_cv_tensor);
 
   Status s = op->Compute(rgba_input, &output_tensor_);
diff --git a/tests/ut/cpp/dataset/tensor_test.cc b/tests/ut/cpp/dataset/tensor_test.cc
index 1a872ecd85e..25f03ebccd8 100644
--- a/tests/ut/cpp/dataset/tensor_test.cc
+++ b/tests/ut/cpp/dataset/tensor_test.cc
@@ -303,7 +303,8 @@ TEST_F(MindDataTestTensorDE, CVTensorFromMat) {
   m.at<uint8_t>(1, 0) = 30;
   m.at<uint8_t>(1, 1) = 40;
   std::shared_ptr<CVTensor> cvt;
-  CVTensor::CreateFromMat(m, &cvt);
+  TensorShape shape{2, 2};
+  CVTensor::CreateFromMat(m, 2, &cvt);
   std::shared_ptr<Tensor> t;
   Tensor::CreateEmpty(TensorShape({2, 2}), DataType(DataType::DE_UINT8), &t);
   t->SetItemAt<uint8_t>({0, 0}, 10);
@@ -318,7 +319,7 @@ TEST_F(MindDataTestTensorDE, CVTensorFromMat) {
   m2.at<uint8_t>(2) = 30;
   m2.at<uint8_t>(3) = 40;
   std::shared_ptr<CVTensor> cvt2;
-  CVTensor::CreateFromMat(m2, &cvt2);
+  CVTensor::CreateFromMat(m2, 2, &cvt2);
   std::shared_ptr<Tensor> t2;
   Tensor::CreateEmpty(TensorShape({4}), DataType(DataType::DE_UINT8), &t2);
   t2->SetItemAt<uint8_t>({0}, 10);
@@ -360,7 +361,7 @@ TEST_F(MindDataTestTensorDE, CVTensorMatSlice) {
   m.at<int32_t>(1, 1) = 50;
   m.at<int32_t>(1, 2) = 60;
   std::shared_ptr<CVTensor> cvt;
-  CVTensor::CreateFromMat(m, &cvt);
+  CVTensor::CreateFromMat(m, 2, &cvt);
   cv::Mat mat;
   cvt->MatAtIndex({1}, &mat);
   cv::Mat m2(3, 1, CV_32S);
@@ -368,17 +369,17 @@ TEST_F(MindDataTestTensorDE, CVTensorMatSlice) {
   m2.at<int32_t>(1) = 50;
   m2.at<int32_t>(2) = 60;
   std::shared_ptr<CVTensor> cvt2;
-  CVTensor::CreateFromMat(mat, &cvt2);
+  CVTensor::CreateFromMat(mat, 2, &cvt2);
   std::shared_ptr<CVTensor> cvt3;
-  CVTensor::CreateFromMat(m2, &cvt3);
+  CVTensor::CreateFromMat(m2, 2, &cvt3);
 
   ASSERT_TRUE(*cvt2 == *cvt3);
   cvt->MatAtIndex({0}, &mat);
   m2.at<int32_t>(0) = 10;
   m2.at<int32_t>(1) = 20;
   m2.at<int32_t>(2) = 30;
-  CVTensor::CreateFromMat(mat, &cvt2);
-  CVTensor::CreateFromMat(m2, &cvt3);
+  CVTensor::CreateFromMat(mat, 2, &cvt2);
+  CVTensor::CreateFromMat(m2, 2, &cvt3);
   ASSERT_TRUE(*cvt2 == *cvt3);
 }
 
@@ -536,44 +537,3 @@ TEST_F(MindDataTestTensorDE, TensorEmpty) {
   t2->Invalidate();
   ASSERT_TRUE(!t2->HasData());
 }
-
-TEST_F(MindDataTestTensorDE, TestTensorJson) {
-  MS_LOG(INFO) << "Doing TestTensor.";
-  std::vector<uint64_t> labels = {1, 1, 2};
-  std::shared_ptr<Tensor> input;
-  Tensor::CreateFromVector(labels, &input);
-  nlohmann::json out_json;
-  input->to_json(&out_json);
-
-  std::shared_ptr<Tensor> check;
-  std::stringstream ss;
-  ss << out_json["shape"];
-  std::string shape = ss.str();
-  ss.str("");
-  ss << out_json["type"];
-  std::string type = ss.str();
-  ss.str("");
-  ss << out_json["data"];
-  std::string data = ss.str();
-  ss.str("");
-
-  ASSERT_TRUE('"' + input->shape().ToString() + '"' == shape);
-  ASSERT_TRUE('"' + input->type().ToString() + '"' == type);
-
-  std::string input_data;
-  input_data.push_back('"');
-  input_data.push_back('[');
-  for (int i = 0; i < labels.size(); i++) {
-    input_data += std::to_string(labels[i]);
-    if (i < labels.size() - 1) {
-      input_data.push_back(',');
-    }
-  }
-  input_data.push_back(']');
-  input_data.push_back('"');
-
-  std::cout << input_data << std::endl;
-  std::cout << data << std::endl;
-
-  ASSERT_TRUE(input_data == data);
-}
diff --git a/tests/ut/cpp/runtest.sh b/tests/ut/cpp/runtest.sh
index e4c5f6cdf2f..df1f81e9bd2 100755
--- a/tests/ut/cpp/runtest.sh
+++ b/tests/ut/cpp/runtest.sh
@@ -32,6 +32,8 @@ ${PROJECT_PATH}/graphengine/third_party/prebuild/aarch64:${LD_LIBRARY_PATH}
 export PYTHONPATH=${PROJECT_PATH}/tests/ut/cpp/python_input:$PYTHONPATH:${PROJECT_PATH}
 export GLOG_v=2
 export GC_COLLECT_IN_CELL=1
+## set op info config path
+export MINDSPORE_OP_INFO_PATH=${PROJECT_PATH}/config/op_info.config
 
 ## prepare data for dataset & mindrecord
 cp -fr $PROJECT_PATH/tests/ut/data ${PROJECT_PATH}/build/mindspore/tests/ut/cpp/
diff --git a/tests/ut/cpp/stub/dynamic_shape/dynamic_shape_stub.cc b/tests/ut/cpp/stub/dynamic_shape/dynamic_shape_stub.cc
index aab00605814..83f6e95cc91 100644
--- a/tests/ut/cpp/stub/dynamic_shape/dynamic_shape_stub.cc
+++ b/tests/ut/cpp/stub/dynamic_shape/dynamic_shape_stub.cc
@@ -18,7 +18,6 @@
 #include "runtime/device/ascend/executor/rts/memcpy_rts_dynamic_kernel.h"
 #include "runtime/device/ascend/executor/rts/profiling_rts_dynamic_kernel.h"
 #include "runtime/device/ascend/executor/ai_core_dynamic_kernel.h"
-#include "runtime/device/ascend/executor/tiling/op_tiling_calculater.h"
 #include "backend/kernel_compiler/host/host_kernel_metadata.h"
 #include "backend/kernel_compiler/host/host_kernel_build.h"
 
@@ -38,11 +37,6 @@ void AiCoreDynamicKernel::Execute() {}
 void AiCoreDynamicKernel::UpdateArgs() {}
 void AiCoreDynamicKernel::Initialize() {}
 void AiCoreDynamicKernel::PostExecute() {}
-
-void OpTilingCalculater::Init() {}
-void OpTilingCalculater::CalculateTiling(const NotNull<CNodePtr> &cnode, const optiling::OpCompileInfo &op_compile_info,
-                     const std::map<uint32_t, tensor::TensorPtr> &depend_tensor_map,
-                     NotNull<optiling::OpRunInfo *> op_run_info) {}
 }  // namespace ascend
 }  // namespace device
 }  // namespace mindspore
diff --git a/tests/ut/cpp/stub/ge/ge_mock.cc b/tests/ut/cpp/stub/ge/ge_mock.cc
index 2a405c20073..ed32606bb32 100644
--- a/tests/ut/cpp/stub/ge/ge_mock.cc
+++ b/tests/ut/cpp/stub/ge/ge_mock.cc
@@ -53,10 +53,8 @@ Status Graph::SaveToFile(const string& file_name) const { return ge::GRAPH_SUCCE
 }  // namespace ge
 
 namespace gelc {
-extern "C" {
 uint32_t GetOptInfo(uint32_t mode, const std::string &soc_ver, std::map<std::string, std::string> &opt_info_map) {
   return 0;
 }
-}  // extern C
 }  // namespace gelc
 #endif
diff --git a/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc b/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc
index 0e3477976c0..7be74ba8d73 100644
--- a/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc
+++ b/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc
@@ -22,6 +22,7 @@ HcclAdapter &HcclAdapter::GetInstance() {
   static HcclAdapter instance;
   return instance;
 }
+bool HcclAdapter::InitHccl() { return true; }
 bool HcclAdapter::InitHccl(uint32_t, std::string_view, std::string_view) { return true; }
 bool HcclAdapter::FinalizeHccl() { return true; }
 HcclResult HcclAdapter::HcclCreateGroup(const std::string &, uint32_t, uint32_t *) const { return HCCL_SUCCESS; }
@@ -35,7 +36,21 @@ std::string HcclAdapter::GetHcclType(const AnfNodePtr &) { return ""; }
 HcclResult HcclAdapter::HcclBroadcast(void *, uint64_t, HcclDataType, uint32_t, aclrtStream) const {
   return HCCL_SUCCESS;
 }
-HcclResult HcclAdapter::HcclAllReduce(void *, void *, uint64_t, HcclDataType, HcclReduceOp, aclrtStream) const {
+HcclResult HcclAdapter::HcclAllReduce(void *, void *, uint64_t, HcclDataType, HcclReduceOp, aclrtStream,
+                                      const std::string &) const {
+  return HCCL_SUCCESS;
+}
+HcclResult HcclAdapter::HcclAllGather(void *, void *, uint64_t, HcclDataType, aclrtStream, const std::string &) const {
+  return HCCL_SUCCESS;
+}
+HcclResult HcclAdapter::HcclReduceScatter(void *, void *, uint64_t, HcclDataType, HcclReduceOp, aclrtStream,
+                                          const std::string &) const {
+  return HCCL_SUCCESS;
+}
+HcclResult HcclAdapter::HcclSend(void *, uint64_t, HcclDataType, uint32_t, aclrtStream, const std::string &) const {
+  return HCCL_SUCCESS;
+}
+HcclResult HcclAdapter::HcclRecv(void *, uint64_t, HcclDataType, uint32_t, aclrtStream, const std::string &) const {
   return HCCL_SUCCESS;
 }
 HcclResult HcclAdapter::HcclExecEnqueueOp(const ::HcomOperation &op_info, const HExecCallBack &callback) const {
diff --git a/tests/ut/python/dataset/test_adjustgamma.py b/tests/ut/python/dataset/test_adjustgamma.py
index 32363f76b84..61e91fdc5f5 100644
--- a/tests/ut/python/dataset/test_adjustgamma.py
+++ b/tests/ut/python/dataset/test_adjustgamma.py
@@ -31,6 +31,8 @@ MNIST_DATA_DIR = "../data/dataset/testMnistData"
 DATA_DIR_2 = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
 SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json"
 
+GENERATE_GOLDEN = False
+
 
 def generate_numpy_random_rgb(shape):
     """
@@ -88,20 +90,26 @@ def test_adjust_gamma_invalid_gamma_param_c():
     logger.info("Test AdjustGamma C Op with invalid ignore parameter")
     try:
         data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
-        data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0])],
+        data_set = data_set.map(operations=[C.Decode(),
+                                            C.Resize((224, 224)),
+                                            lambda img: np.array(img[:, :, 0])],
                                 input_columns=["image"])
         # invalid gamma
-        data_set = data_set.map(operations=C.AdjustGamma(gamma=-10.0, gain=1.0),
+        data_set = data_set.map(operations=C.AdjustGamma(gamma=-10.0,
+                                                         gain=1.0),
                                 input_columns="image")
     except ValueError as error:
         logger.info("Got an exception in AdjustGamma: {}".format(str(error)))
         assert "Input is not within the required interval of " in str(error)
     try:
         data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
-        data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0])],
+        data_set = data_set.map(operations=[C.Decode(),
+                                            C.Resize((224, 224)),
+                                            lambda img: np.array(img[:, :, 0])],
                                 input_columns=["image"])
         # invalid gamma
-        data_set = data_set.map(operations=C.AdjustGamma(gamma=[1, 2], gain=1.0),
+        data_set = data_set.map(operations=C.AdjustGamma(gamma=[1, 2],
+                                                         gain=1.0),
                                 input_columns="image")
     except TypeError as error:
         logger.info("Got an exception in AdjustGamma: {}".format(str(error)))
@@ -121,7 +129,8 @@ def test_adjust_gamma_invalid_gamma_param_py():
             F.AdjustGamma(gamma=-10.0),
             F.ToTensor()
         ])
-        data_set = data_set.map(operations=[trans], input_columns=["image"])
+        data_set = data_set.map(operations=[trans],
+                                input_columns=["image"])
     except ValueError as error:
         logger.info("Got an exception in AdjustGamma: {}".format(str(error)))
         assert "Input is not within the required interval of " in str(error)
@@ -133,7 +142,8 @@ def test_adjust_gamma_invalid_gamma_param_py():
             F.AdjustGamma(gamma=[1, 2]),
             F.ToTensor()
         ])
-        data_set = data_set.map(operations=[trans], input_columns=["image"])
+        data_set = data_set.map(operations=[trans],
+                                input_columns=["image"])
     except TypeError as error:
         logger.info("Got an exception in AdjustGamma: {}".format(str(error)))
         assert "is not of type [<class 'float'>, <class 'int'>], but got" in str(error)
@@ -146,10 +156,13 @@ def test_adjust_gamma_invalid_gain_param_c():
     logger.info("Test AdjustGamma C Op with invalid gain parameter")
     try:
         data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False)
-        data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0])],
+        data_set = data_set.map(operations=[C.Decode(),
+                                            C.Resize((224, 224)),
+                                            lambda img: np.array(img[:, :, 0])],
                                 input_columns=["image"])
         # invalid gain
-        data_set = data_set.map(operations=C.AdjustGamma(gamma=10.0, gain=[1, 10]),
+        data_set = data_set.map(operations=C.AdjustGamma(gamma=10.0,
+                                                         gain=[1, 10]),
                                 input_columns="image")
     except TypeError as error:
         logger.info("Got an exception in AdjustGamma: {}".format(str(error)))
@@ -169,7 +182,8 @@ def test_adjust_gamma_invalid_gain_param_py():
             F.AdjustGamma(gamma=10.0, gain=[1, 10]),
             F.ToTensor()
         ])
-        data_set = data_set.map(operations=[trans], input_columns=["image"])
+        data_set = data_set.map(operations=[trans],
+                                input_columns=["image"])
     except TypeError as error:
         logger.info("Got an exception in AdjustGamma: {}".format(str(error)))
         assert "is not of type [<class 'float'>, <class 'int'>], but got " in str(error)
diff --git a/tests/ut/python/dataset/test_allpass_biquad.py b/tests/ut/python/dataset/test_allpass_biquad.py
index 29805ab6df3..e3cadece4f5 100644
--- a/tests/ut/python/dataset/test_allpass_biquad.py
+++ b/tests/ut/python/dataset/test_allpass_biquad.py
@@ -19,14 +19,16 @@ import mindspore.dataset.audio.transforms as audio
 from mindspore import log as logger
 
 
-def count_unequal_element(data_expected, data_me, rtol, atol):
+def _count_unequal_element(data_expected, data_me, rtol, atol):
+
     assert data_expected.shape == data_me.shape
     total_count = len(data_expected.flatten())
     error = np.abs(data_expected - data_me)
     greater = np.greater(error, atol + np.abs(data_expected) * rtol)
     loss_count = np.count_nonzero(greater)
-    assert (loss_count / total_count) < rtol, "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}".format(
-        data_expected[greater], data_me[greater], error[greater])
+    assert (loss_count / total_count) < rtol, \
+        "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}". \
+        format(data_expected[greater], data_me[greater], error[greater])
 
 
 def test_func_allpass_biquad_eager():
@@ -35,11 +37,12 @@ def test_func_allpass_biquad_eager():
     # Original waveform
     waveform = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float64)
     # Expect waveform
-    expect_waveform = np.array([[0.96049707, 1.0, 1.0], [1.0, 1.0, 1.0]], dtype=np.float64)
+    expect_waveform = np.array([[0.96049707, 1.0, 1.0],
+                                [1.0, 1.0, 1.0]], dtype=np.float64)
     allpass_biquad_op = audio.AllpassBiquad(44100, 200.0, 0.707)
     # Filtered waveform by allpassbiquad
     output = allpass_biquad_op(waveform)
-    count_unequal_element(expect_waveform, output, 0.0001, 0.0001)
+    _count_unequal_element(expect_waveform, output, 0.0001, 0.0001)
 
 
 def test_func_allpass_biquad_pipeline():
@@ -48,57 +51,56 @@ def test_func_allpass_biquad_pipeline():
     # Original waveform
     waveform = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float64)
     # Expect waveform
-    expect_waveform = np.array([[0.96049707, 1.0, 1.0], [1.0, 1.0, 1.0]], dtype=np.float64)
+    expect_waveform = np.array([[0.96049707, 1.0, 1.0],
+                                [1.0, 1.0, 1.0]], dtype=np.float64)
     label = np.random.sample((2, 1))
     data = (waveform, label)
     dataset = ds.NumpySlicesDataset(data, ["channel", "sample"], shuffle=False)
     allpass_biquad_op = audio.AllpassBiquad(44100, 200.0)
     # Filtered waveform by allpassbiquad
-    dataset = dataset.map(input_columns=["channel"], operations=allpass_biquad_op, num_parallel_workers=8)
+    dataset = dataset.map(
+        input_columns=["channel"], operations=allpass_biquad_op, num_parallel_workers=8)
     i = 0
-    for item in dataset.create_dict_iterator(output_numpy=True):
-        count_unequal_element(expect_waveform[i, :], item['channel'], 0.0001, 0.0001)
+    for _ in dataset.create_dict_iterator(output_numpy=True):
+        _count_unequal_element(expect_waveform[i, :],
+                               _['channel'], 0.0001, 0.0001)
         i += 1
 
-
 def test_invalid_input_all():
     waveform = np.random.rand(2, 1000)
-
     def test_invalid_input(test_name, sample_rate, central_freq, Q, error, error_msg):
         logger.info("Test Allpassallpassiquad with bad input: {0}".format(test_name))
         with pytest.raises(error) as error_info:
             audio.AllpassBiquad(sample_rate, central_freq, Q)(waveform)
         assert error_msg in str(error_info.value)
-
     test_invalid_input("invalid sample_rate parameter type as a float", 44100.5, 200, 0.707, TypeError,
                        "Argument sample_rate with value 44100.5 is not of type [<class 'int'>],"
-                       + " but got <class 'float'>.")
+                       +" but got <class 'float'>.")
     test_invalid_input("invalid sample_rate parameter type as a String", "44100", 200, 0.707, TypeError,
-                       "Argument sample_rate with value 44100 is not of type [<class 'int'>]," +
+                       "Argument sample_rate with value 44100 is not of type [<class 'int'>],"+
                        " but got <class 'str'>.")
     test_invalid_input("invalid contral_freq parameter type as a String", 44100, "200", 0.707, TypeError,
                        "Argument central_freq with value 200 is not of type [<class 'float'>, <class 'int'>],"
-                       + " but got <class 'str'>.")
+                       +" but got <class 'str'>.")
     test_invalid_input("invalid Q parameter type as a String", 44100, 200, "0.707", TypeError,
                        "Argument Q with value 0.707 is not of type [<class 'float'>, <class 'int'>],"
-                       + " but got <class 'str'>.")
+                       +" but got <class 'str'>.")
     test_invalid_input("invalid sample_rate parameter value", 441324343243242342345300, 200, 0.707, ValueError,
-                       "Input sample_rate is not within the required interval of [-2147483648, 0) and (0, 2147483647].")
+                       "Input sample_rate is not within the required interval of [-2147483648, 2147483647].")
     test_invalid_input("invalid contral_freq parameter value", 44100, 32434324324234321, 0.707, ValueError,
                        "Input central_freq is not within the required interval of [-16777216, 16777216].")
     test_invalid_input("invalid sample_rate parameter value", None, 200, 0.707, TypeError,
                        "Argument sample_rate with value None is not of type [<class 'int'>],"
-                       + " but got <class 'NoneType'>.")
+                       +" but got <class 'NoneType'>.")
     test_invalid_input("invalid central_rate parameter value", 44100, None, 0.707, TypeError,
                        "Argument central_freq with value None is not of type [<class 'float'>, <class 'int'>],"
-                       + " but got <class 'NoneType'>.")
+                       +" but got <class 'NoneType'>.")
     test_invalid_input("invalid sample_rate parameter value", 0, 200, 0.707, ValueError,
-                       "Input sample_rate is not within the required interval of [-2147483648, 0) and (0, 2147483647].")
+                       "Input sample_rate can not be 0.")
     test_invalid_input("invalid Q parameter value", 44100, 200, 1.707, ValueError,
                        "Input Q is not within the required interval of (0, 1].")
 
-
 if __name__ == '__main__':
-    test_func_allpass_biquad_eager()
-    test_func_allpass_biquad_pipeline()
+    test_eager_allpassbiquad_mindspore_001()
+    test_pipeline_allpass_biquad_001()
     test_invalid_input_all()
diff --git a/tests/ut/python/dataset/test_amplitude_to_db.py b/tests/ut/python/dataset/test_amplitude_to_db.py
index 9fba2ed07b9..448b8b09ef4 100644
--- a/tests/ut/python/dataset/test_amplitude_to_db.py
+++ b/tests/ut/python/dataset/test_amplitude_to_db.py
@@ -23,6 +23,7 @@ import mindspore.dataset.audio.transforms as c_audio
 from mindspore import log as logger
 from mindspore.dataset.audio.utils import ScaleType
 
+
 CHANNEL = 1
 FREQ = 20
 TIME = 15
@@ -31,18 +32,19 @@ TIME = 15
 def gen(shape):
     np.random.seed(0)
     data = np.random.random(shape)
-    yield (np.array(data, dtype=np.float32),)
+    yield(np.array(data, dtype=np.float32),)
 
 
-def count_unequal_element(data_expected, data_me, rtol, atol):
+def _count_unequal_element(data_expected, data_me, rtol, atol):
     """ Precision calculation func """
     assert data_expected.shape == data_me.shape
     total_count = len(data_expected.flatten())
     error = np.abs(data_expected - data_me)
     greater = np.greater(error, atol + np.abs(data_expected) * rtol)
     loss_count = np.count_nonzero(greater)
-    assert (loss_count / total_count) < rtol, "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}".format(
-        data_expected[greater], data_me[greater], error[greater])
+    assert (loss_count / total_count) < rtol, \
+        "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}". \
+        format(data_expected[greater], data_me[greater], error[greater])
 
 
 def allclose_nparray(data_expected, data_me, rtol, atol, equal_nan=True):
@@ -50,7 +52,9 @@ def allclose_nparray(data_expected, data_me, rtol, atol, equal_nan=True):
     if np.any(np.isnan(data_expected)):
         assert np.allclose(data_me, data_expected, rtol, atol, equal_nan=equal_nan)
     elif not np.allclose(data_me, data_expected, rtol, atol, equal_nan=equal_nan):
-        count_unequal_element(data_expected, data_me, rtol, atol)
+        _count_unequal_element(data_expected, data_me, rtol, atol)
+    else:
+        assert True
 
 
 def test_func_amplitude_to_db_eager():
@@ -87,7 +91,9 @@ def test_func_amplitude_to_db_pipeline():
 
     data1 = ds.GeneratorDataset(source=generator, column_names=["multi_dimensional_data"])
 
-    transforms = [c_audio.AmplitudeToDB()]
+    transforms = [
+        c_audio.AmplitudeToDB()
+    ]
     data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
 
     for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
@@ -96,6 +102,7 @@ def test_func_amplitude_to_db_pipeline():
 
 
 def test_amplitude_to_db_invalid_input():
+
     def test_invalid_input(test_name, stype, ref_value, amin, top_db, error, error_msg):
         logger.info("Test AmplitudeToDB with bad input: {0}".format(test_name))
         with pytest.raises(error) as error_info:
diff --git a/tests/ut/python/dataset/test_angle.py b/tests/ut/python/dataset/test_angle.py
index 1de8e2fd0a2..6c366b6a41e 100755
--- a/tests/ut/python/dataset/test_angle.py
+++ b/tests/ut/python/dataset/test_angle.py
@@ -19,28 +19,28 @@ import pytest
 import mindspore.dataset as ds
 import mindspore.dataset.audio.transforms as a_c_trans
 
+def _count_unequal_element(data_expected, data_me, rtol, atol):
 
-def count_unequal_element(data_expected, data_me, rtol, atol):
     assert data_expected.shape == data_me.shape
     total_count = len(data_expected.flatten())
     error = np.abs(data_expected - data_me)
     greater = np.greater(error, atol + np.abs(data_expected) * rtol)
     loss_count = np.count_nonzero(greater)
-    assert (loss_count / total_count) < rtol, "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}".format(
-        data_expected[greater], data_me[greater], error[greater])
-
+    assert (loss_count / total_count) < rtol, \
+        "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}". \
+        format(data_expected[greater], data_me[greater], error[greater])
 
 def test_func_angle_001():
     """
     Eager Test
     """
-    arr = np.array([[73.04, -13.00], [57.49, 13.20], [-57.64, 6.51], [-52.25, 30.67], [-30.11, -18.34],
+    arr = np.array([[73.04, -13.00], [57.49, 13.20], [-57.64, 6.51], [-52.25, 30.67], [-30.11, -18.34], \
                     [-63.32, 99.33], [95.82, -24.76]], dtype=np.double)
-    expected = np.array([-0.17614017, 0.22569334, 3.02912684, 2.6107975, -2.59450886, 2.13831337, -0.25286988],
+    expected = np.array([-0.17614017, 0.22569334, 3.02912684, 2.6107975, -2.59450886, 2.13831337, -0.25286988], \
                         dtype=np.double)
     angle_op = a_c_trans.Angle()
     output = angle_op(arr)
-    count_unequal_element(expected, output, 0.0001, 0.0001)
+    _count_unequal_element(expected, output, 0.0001, 0.0001)
 
 
 def test_func_angle_002():
@@ -48,9 +48,9 @@ def test_func_angle_002():
     Pipeline Test
     """
     np.random.seed(6)
-    arr = np.array([[[84.25, -85.92], [-92.23, 23.06], [-7.33, -44.17], [-62.95, -14.73]],
+    arr = np.array([[[84.25, -85.92], [-92.23, 23.06], [-7.33, -44.17], [-62.95, -14.73]], \
                     [[93.09, 38.18], [-81.94, 71.34], [71.33, -39.00], [95.25, -32.94]]], dtype=np.double)
-    expected = np.array([[-0.79521156, 2.89658848, -1.73524737, -2.91173309],
+    expected = np.array([[-0.79521156, 2.89658848, -1.73524737, -2.91173309], \
                          [0.3892177, 2.42523905, -0.50034807, -0.33295219]], dtype=np.double)
     label = np.random.sample((2, 4, 1))
     data = (arr, label)
@@ -58,8 +58,7 @@ def test_func_angle_002():
     angle_op = a_c_trans.Angle()
     dataset = dataset.map(operations=angle_op, input_columns=["col1"])
     for item1, item2 in zip(dataset.create_dict_iterator(output_numpy=True), expected):
-        count_unequal_element(item2, item1['col1'], 0.0001, 0.0001)
-
+        _count_unequal_element(item2, item1['col1'], 0.0001, 0.0001)
 
 def test_func_angle_003():
     """
@@ -73,7 +72,7 @@ def test_func_angle_003():
     angle_op = a_c_trans.Angle()
     dataset = dataset.map(operations=angle_op, input_columns=["col1"])
     num_itr = 0
-    with pytest.raises(RuntimeError, match="input tensor type should be int, float or double"):
+    with pytest.raises(RuntimeError, match="The input type should be numbers"):
         for _ in dataset.create_dict_iterator(output_numpy=True):
             num_itr += 1
 
diff --git a/tests/ut/python/dataset/test_bandpass_biquad.py b/tests/ut/python/dataset/test_bandpass_biquad.py
index caa8277dc35..90a8ddc78b1 100644
--- a/tests/ut/python/dataset/test_bandpass_biquad.py
+++ b/tests/ut/python/dataset/test_bandpass_biquad.py
@@ -19,14 +19,16 @@ import mindspore.dataset.audio.transforms as audio
 from mindspore import log as logger
 
 
-def count_unequal_element(data_expected, data_me, rtol, atol):
+def _count_unequal_element(data_expected, data_me, rtol, atol):
+
     assert data_expected.shape == data_me.shape
     total_count = len(data_expected.flatten())
     error = np.abs(data_expected - data_me)
     greater = np.greater(error, atol + np.abs(data_expected) * rtol)
     loss_count = np.count_nonzero(greater)
-    assert (loss_count / total_count) < rtol, "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}".format(
-        data_expected[greater], data_me[greater], error[greater])
+    assert (loss_count / total_count) < rtol, \
+        "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}". \
+        format(data_expected[greater], data_me[greater], error[greater])
 
 
 def test_func_bandpass_biquad_eager():
@@ -40,7 +42,7 @@ def test_func_bandpass_biquad_eager():
     bandpass_biquad_op = audio.BandpassBiquad(44000, 200.0, 0.707, False)
     # Filtered waveform by bandpassbiquad
     output = bandpass_biquad_op(waveform)
-    count_unequal_element(expect_waveform, output, 0.0001, 0.0001)
+    _count_unequal_element(expect_waveform, output, 0.0001, 0.0001)
 
 
 def test_func_bandpass_biquad_pipeline():
@@ -56,10 +58,12 @@ def test_func_bandpass_biquad_pipeline():
     dataset = ds.NumpySlicesDataset(data, ["channel", "sample"], shuffle=False)
     bandpass_biquad_op = audio.BandpassBiquad(44000, 200.0)
     # Filtered waveform by bandpassbiquad
-    dataset = dataset.map(input_columns=["channel"], operations=bandpass_biquad_op, num_parallel_workers=8)
+    dataset = dataset.map(
+        input_columns=["channel"], operations=bandpass_biquad_op, num_parallel_workers=8)
     i = 0
-    for item in dataset.create_dict_iterator(output_numpy=True):
-        count_unequal_element(expect_waveform[i, :], item['channel'], 0.0001, 0.0001)
+    for _ in dataset.create_dict_iterator(output_numpy=True):
+        _count_unequal_element(expect_waveform[i, :],
+                               _['channel'], 0.0001, 0.0001)
         i += 1
 
 
@@ -68,7 +72,8 @@ def test_bandpass_biquad_invalid_input():
         logger.info(
             "Test BandpassBiquad with bad input: {0}".format(test_name))
         with pytest.raises(error) as error_info:
-            audio.BandpassBiquad(sample_rate, central_freq, Q, const_skirt_gain)
+            audio.BandpassBiquad(
+                sample_rate, central_freq, Q, const_skirt_gain)
         assert error_msg in str(error_info.value)
 
     test_invalid_input("invalid sample_rate parameter type as a float", 44100.5, 200, 0.707, True, TypeError,
@@ -80,7 +85,7 @@ def test_bandpass_biquad_invalid_input():
                        "Argument central_freq with value 200 is not of type [<class 'float'>, <class 'int'>],"
                        " but got <class 'str'>.")
     test_invalid_input("invalid sample_rate parameter value", 0, 200, 0.707, True, ValueError,
-                       "Input sample_rate is not within the required interval of [-2147483648, 0) and (0, 2147483647].")
+                       "Input sample_rate can not be 0.")
     test_invalid_input("invalid contral_freq parameter value", 44100, 32434324324234321, 0.707, True, ValueError,
                        "Input central_freq is not within the required interval of [-16777216, 16777216].")
     test_invalid_input("invalid Q parameter type as a String", 44100, 200, "0.707", True, TypeError,
@@ -91,7 +96,7 @@ def test_bandpass_biquad_invalid_input():
     test_invalid_input("invalid Q parameter value", 44100, 200, 0, True, ValueError,
                        "Input Q is not within the required interval of (0, 1].")
     test_invalid_input("invalid sample_rate parameter value", 441324343243242342345300, 200, 0.707, True, ValueError,
-                       "Input sample_rate is not within the required interval of [-2147483648, 0) and (0, 2147483647].")
+                       "Input sample_rate is not within the required interval of [-2147483648, 2147483647].")
     test_invalid_input("invalid sample_rate parameter value", None, 200, 0.707, True, TypeError,
                        "Argument sample_rate with value None is not of type [<class 'int'>],"
                        " but got <class 'NoneType'>.")
diff --git a/tests/ut/python/dataset/test_bandreject_biquad.py b/tests/ut/python/dataset/test_bandreject_biquad.py
index af04d34de25..3c799c6f827 100644
--- a/tests/ut/python/dataset/test_bandreject_biquad.py
+++ b/tests/ut/python/dataset/test_bandreject_biquad.py
@@ -19,14 +19,16 @@ import mindspore.dataset.audio.transforms as audio
 from mindspore import log as logger
 
 
-def count_unequal_element(data_expected, data_me, rtol, atol):
+def _count_unequal_element(data_expected, data_me, rtol, atol):
+
     assert data_expected.shape == data_me.shape
     total_count = len(data_expected.flatten())
     error = np.abs(data_expected - data_me)
     greater = np.greater(error, atol + np.abs(data_expected) * rtol)
     loss_count = np.count_nonzero(greater)
-    assert (loss_count / total_count) < rtol, "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}".format(
-        data_expected[greater], data_me[greater], error[greater])
+    assert (loss_count / total_count) < rtol, \
+        "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}". \
+        format(data_expected[greater], data_me[greater], error[greater])
 
 
 def test_func_bandreject_biquad_eager():
@@ -41,7 +43,7 @@ def test_func_bandreject_biquad_eager():
     bandreject_biquad_op = audio.BandrejectBiquad(44100, 200.0, 0.707)
     # Filtered waveform by bandrejectbiquad
     output = bandreject_biquad_op(waveform)
-    count_unequal_element(expect_waveform, output, 0.0001, 0.0001)
+    _count_unequal_element(expect_waveform, output, 0.0001, 0.0001)
 
 
 def test_func_bandreject_biquad_pipeline():
@@ -61,9 +63,9 @@ def test_func_bandreject_biquad_pipeline():
     dataset = dataset.map(
         input_columns=["channel"], operations=bandreject_biquad_op, num_parallel_workers=8)
     i = 0
-    for item in dataset.create_dict_iterator(output_numpy=True):
-        count_unequal_element(expect_waveform[i, :],
-                              item['channel'], 0.0001, 0.0001)
+    for _ in dataset.create_dict_iterator(output_numpy=True):
+        _count_unequal_element(expect_waveform[i, :],
+                               _['channel'], 0.0001, 0.0001)
         i += 1
 
 
@@ -74,7 +76,6 @@ def test_bandreject_biquad_invalid_input():
         with pytest.raises(error) as error_info:
             audio.BandrejectBiquad(sample_rate, central_freq, Q)
         assert error_msg in str(error_info.value)
-
     test_invalid_input("invalid sample_rate parameter type as a float", 44100.5, 200, 0.707, TypeError,
                        "Argument sample_rate with value 44100.5 is not of type [<class 'int'>],"
                        " but got <class 'float'>.")
@@ -84,7 +85,7 @@ def test_bandreject_biquad_invalid_input():
                        "Argument central_freq with value 200 is not of type [<class 'float'>, <class 'int'>],"
                        " but got <class 'str'>.")
     test_invalid_input("invalid sample_rate parameter value", 0, 200, 0.707, ValueError,
-                       "Input sample_rate is not within the required interval of [-2147483648, 0) and (0, 2147483647].")
+                       "Input sample_rate can not be 0.")
     test_invalid_input("invalid contral_freq parameter value", 44100, 32434324324234321, 0.707, ValueError,
                        "Input central_freq is not within the required interval of [-16777216, 16777216].")
     test_invalid_input("invalid Q parameter type as a String", 44100, 200, "0.707", TypeError,
@@ -95,7 +96,7 @@ def test_bandreject_biquad_invalid_input():
     test_invalid_input("invalid Q parameter value", 44100, 200, 0, ValueError,
                        "Input Q is not within the required interval of (0, 1].")
     test_invalid_input("invalid sample_rate parameter value", 441324343243242342345300, 200, 0.707, ValueError,
-                       "Input sample_rate is not within the required interval of [-2147483648, 0) and (0, 2147483647].")
+                       "Input sample_rate is not within the required interval of [-2147483648, 2147483647].")
     test_invalid_input("invalid sample_rate parameter value", None, 200, 0.707, TypeError,
                        "Argument sample_rate with value None is not of type [<class 'int'>],"
                        " but got <class 'NoneType'>.")
@@ -105,6 +106,6 @@ def test_bandreject_biquad_invalid_input():
 
 
 if __name__ == "__main__":
-    test_func_bandreject_biquad_eager()
-    test_func_bandreject_biquad_pipeline()
-    test_bandreject_biquad_invalid_input()
+    test_func_band_biquad_eager()
+    test_func_band_biquad_pipeline()
+    test_band_biquad_invalid_input()
diff --git a/tests/ut/python/dataset/test_bass_biquad.py b/tests/ut/python/dataset/test_bass_biquad.py
index 41f1e7c87cf..c06470db271 100644
--- a/tests/ut/python/dataset/test_bass_biquad.py
+++ b/tests/ut/python/dataset/test_bass_biquad.py
@@ -19,14 +19,16 @@ import mindspore.dataset.audio.transforms as audio
 from mindspore import log as logger
 
 
-def count_unequal_element(data_expected, data_me, rtol, atol):
+def _count_unequal_element(data_expected, data_me, rtol, atol):
+
     assert data_expected.shape == data_me.shape
     total_count = len(data_expected.flatten())
     error = np.abs(data_expected - data_me)
     greater = np.greater(error, atol + np.abs(data_expected) * rtol)
     loss_count = np.count_nonzero(greater)
-    assert (loss_count / total_count) < rtol, "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}".format(
-        data_expected[greater], data_me[greater], error[greater])
+    assert (loss_count / total_count) < rtol, \
+        "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}". \
+        format(data_expected[greater], data_me[greater], error[greater])
 
 
 def test_func_bass_biquad_eager():
@@ -40,7 +42,7 @@ def test_func_bass_biquad_eager():
     bass_biquad_op = audio.BassBiquad(44100, 50.0, 100.0, 0.707)
     # Filtered waveform by bassbiquad
     output = bass_biquad_op(waveform)
-    count_unequal_element(expect_waveform, output, 0.0001, 0.0001)
+    _count_unequal_element(expect_waveform, output, 0.0001, 0.0001)
 
 
 def test_func_bass_biquad_pipeline():
@@ -59,9 +61,9 @@ def test_func_bass_biquad_pipeline():
     dataset = dataset.map(
         input_columns=["channel"], operations=bass_biquad_op, num_parallel_workers=8)
     i = 0
-    for item in dataset.create_dict_iterator(output_numpy=True):
-        count_unequal_element(expect_waveform[i, :],
-                              item['channel'], 0.0001, 0.0001)
+    for _ in dataset.create_dict_iterator(output_numpy=True):
+        _count_unequal_element(expect_waveform[i, :],
+                               _['channel'], 0.0001, 0.0001)
         i += 1
 
 
@@ -71,7 +73,6 @@ def test_invalid_invalid_input():
         with pytest.raises(error) as error_info:
             audio.BassBiquad(sample_rate, gain, central_freq, Q)
         assert error_msg in str(error_info.value)
-
     test_invalid_input("invalid sample_rate parameter type as a float", 44100.5, 50.0, 200, 0.707, TypeError,
                        "Argument sample_rate with value 44100.5 is not of type [<class 'int'>],"
                        " but got <class 'float'>.")
@@ -89,7 +90,7 @@ def test_invalid_invalid_input():
                        " but got <class 'str'>.")
 
     test_invalid_input("invalid sample_rate parameter value", 441324343243242342345300, 50.0, 200, 0.707, ValueError,
-                       "Input sample_rate is not within the required interval of [-2147483648, 0) and (0, 2147483647].")
+                       "Input sample_rate is not within the required interval of [-2147483648, 2147483647].")
     test_invalid_input("invalid gain parameter value", 44100, 32434324324234321, 200, 0.707, ValueError,
                        "Input gain is not within the required interval of [-16777216, 16777216].")
     test_invalid_input("invalid contral_freq parameter value", 44100, 50, 32434324324234321, 0.707, ValueError,
@@ -106,11 +107,10 @@ def test_invalid_invalid_input():
                        " but got <class 'NoneType'>.")
 
     test_invalid_input("invalid sample_rate parameter value", 0, 50.0, 200, 0.707, ValueError,
-                       "Input sample_rate is not within the required interval of [-2147483648, 0) and (0, 2147483647].")
+                       "Input sample_rate can not be 0.")
     test_invalid_input("invalid Q parameter value", 44100, 50.0, 200, 1.707, ValueError,
                        "Input Q is not within the required interval of (0, 1].")
 
-
 if __name__ == '__main__':
     test_func_bass_biquad_eager()
     test_func_bass_biquad_pipeline()
diff --git a/tests/ut/python/dataset/test_datasets_cmuarctic.py b/tests/ut/python/dataset/test_datasets_cmuarctic.py
new file mode 100644
index 00000000000..8dc36bddd91
--- /dev/null
+++ b/tests/ut/python/dataset/test_datasets_cmuarctic.py
@@ -0,0 +1,203 @@
+"""
+Test CmuArctic dataset operators
+"""
+import os
+import pytest
+import numpy as np
+import matplotlib.pyplot as plt
+import mindspore.dataset as ds
+import mindspore.dataset.vision.c_transforms as vision
+from mindspore import log as logger
+
+DATA_DIR = "/home/user06/zjm/data/cmu_arctic/"
+
+def test_cmuarctic_basic():
+    """
+    Validate CmuarcticDataset
+    """
+    logger.info("Test CmuArcticDataset Op")
+
+    # case 1: test loading fault dataset
+    data1 = ds.CmuArcticDataset(DATA_DIR)
+    num_iter1 = 0
+    for _ in data1.create_dict_iterator( output_numpy=True,num_epochs=1):
+        num_iter1 += 1
+    assert num_iter1 == 1132
+
+    # case 2: test num_samples
+    data2 = ds.CmuArcticDataset(DATA_DIR, num_samples=500)
+    num_iter2 = 0
+    for _ in data2.create_dict_iterator( output_numpy=True,num_epochs=1):
+        num_iter2 += 1
+    assert num_iter2 == 500
+
+    # case 3: test repeat
+    data3 = ds.CmuArcticDataset(DATA_DIR, num_samples=200)
+    data3 = data3.repeat(5)
+    num_iter3 = 0
+    for _ in data3.create_dict_iterator( output_numpy=True,num_epochs=1):
+        num_iter3 += 1
+    assert num_iter3 == 1000
+
+    # case 4: test batch with drop_remainder=False
+    data4 = ds.CmuArcticDataset(DATA_DIR, num_samples=100)
+    assert data4.get_dataset_size() == 100
+    assert data4.get_batch_size() == 1
+    data4 = data4.batch(batch_size=7)  # drop_remainder is default to be False
+    assert data4.get_dataset_size() == 15
+    assert data4.get_batch_size() == 7
+    # num_iter4 = 0
+    # for _ in data4.create_dict_iterator( output_numpy=True,num_epochs=1):
+    #     num_iter4 += 1
+    # assert num_iter4 == 15
+
+    # case 5: test batch with drop_remainder=True
+    data5 = ds.CmuArcticDataset(DATA_DIR, num_samples=100)
+    assert data5.get_dataset_size() == 100
+    assert data5.get_batch_size() == 1
+    data5 = data5.batch(batch_size=7, drop_remainder=True)  # the rest of incomplete batch will be dropped
+    assert data5.get_dataset_size() == 14
+    assert data5.get_batch_size() == 7
+    # num_iter5 = 0
+    # for _ in data5.create_dict_iterator( output_numpy=True,num_epochs=1):
+    #     num_iter5 += 1
+    # assert num_iter5 == 14
+
+
+
+def test_cmu_arctic_sequential_sampler():
+    """
+    Test CmuArcticDataset with SequentialSampler
+    """
+    logger.info("Test CmuArcticDataset Op with SequentialSampler")
+    num_samples = 50
+    sampler = ds.SequentialSampler(num_samples=num_samples)
+    data1 = ds.CmuArcticDataset(DATA_DIR, sampler=sampler)
+    data2 = ds.CmuArcticDataset(DATA_DIR, shuffle=False, num_samples=num_samples)
+    label_list1, label_list2 = [], []
+    num_iter = 0
+    for item1, item2 in zip(data1.create_dict_iterator( output_numpy=True,num_epochs=1), data2.create_dict_iterator( output_numpy=True,num_epochs=1)):
+        label_list1.append(item1["utterance"])
+        label_list2.append(item2["utterance"])
+        num_iter += 1
+    np.testing.assert_array_equal(label_list1, label_list2)
+    assert num_iter == num_samples
+
+
+def test_cmu_arctic_exception():
+    """
+    Test error cases for CmuArcticDataset
+    """
+    logger.info("Test error cases for CmuArcticDataset")
+    error_msg_1 = "sampler and shuffle cannot be specified at the same time"
+    with pytest.raises(RuntimeError, match=error_msg_1):
+        ds.CmuArcticDataset(DATA_DIR, shuffle=False, sampler=ds.PKSampler(3))
+
+    error_msg_2 = "sampler and sharding cannot be specified at the same time"
+    with pytest.raises(RuntimeError, match=error_msg_2):
+        ds.CmuArcticDataset(DATA_DIR, sampler=ds.PKSampler(3), num_shards=2, shard_id=0)
+
+    error_msg_3 = "num_shards is specified and currently requires shard_id as well"
+    with pytest.raises(RuntimeError, match=error_msg_3):
+        ds.CmuArcticDataset(DATA_DIR, num_shards=10)
+
+    error_msg_4 = "shard_id is specified but num_shards is not"
+    with pytest.raises(RuntimeError, match=error_msg_4):
+        ds.CmuArcticDataset(DATA_DIR, shard_id=0)
+
+    error_msg_5 = "Input shard_id is not within the required interval"
+    with pytest.raises(ValueError, match=error_msg_5):
+        ds.CmuArcticDataset(DATA_DIR, num_shards=5, shard_id=-1)
+    with pytest.raises(ValueError, match=error_msg_5):
+        ds.CmuArcticDataset(DATA_DIR, num_shards=5, shard_id=5)
+    with pytest.raises(ValueError, match=error_msg_5):
+        ds.CmuArcticDataset(DATA_DIR, num_shards=2, shard_id=5)
+
+    error_msg_6 = "num_parallel_workers exceeds"
+    with pytest.raises(ValueError, match=error_msg_6):
+        ds.CmuArcticDataset(DATA_DIR, shuffle=False, num_parallel_workers=0)
+    with pytest.raises(ValueError, match=error_msg_6):
+        ds.CmuArcticDataset(DATA_DIR, shuffle=False, num_parallel_workers=256)
+    with pytest.raises(ValueError, match=error_msg_6):
+        ds.CmuArcticDataset(DATA_DIR, shuffle=False, num_parallel_workers=-2)
+
+    error_msg_7 = "Argument shard_id"
+    with pytest.raises(TypeError, match=error_msg_7):
+        ds.CmuArcticDataset(DATA_DIR, num_shards=2, shard_id="0")
+
+    def exception_func(item):
+        raise Exception("Error occur!")
+
+    error_msg_8 = "The corresponding data files"
+    with pytest.raises(RuntimeError, match=error_msg_8):
+        data = ds.CmuArcticDataset(DATA_DIR)
+        data = data.map(operations=exception_func, input_columns=["waveform"], num_parallel_workers=1)
+        for _ in data.__iter__():
+            pass
+    with pytest.raises(RuntimeError, match=error_msg_8):
+        data = ds.CmuArcticDataset(DATA_DIR)
+        data = data.map(operations=vision.Decode(), input_columns=["waveform"], num_parallel_workers=1)
+        data = data.map(operations=exception_func, input_columns=["waveform"], num_parallel_workers=1)
+        for _ in data.__iter__():
+            pass
+    with pytest.raises(RuntimeError, match=error_msg_8):
+        data = ds.CmuArcticDataset(DATA_DIR)
+        data = data.map(operations=exception_func, input_columns=["waveform"], num_parallel_workers=1)
+        for _ in data.__iter__():
+            pass
+
+
+def test_cmu_arctic_visualize(plot=False):
+    """
+    Visualize CmuArcticDataset results
+    """
+    logger.info("Test CmuArcticDataset visualization")
+
+    data1 = ds.CmuArcticDataset(DATA_DIR, num_samples=10, shuffle=False)
+    num_iter = 0
+    for item in data1.create_dict_iterator( num_epochs=1, output_numpy=True):
+        audio = item["waveform"]
+        sample_rate = item["sample_rate"]
+        assert isinstance(audio, np.ndarray)
+        assert audio.dtype == np.float64
+        assert sample_rate.dtype == np.uint32
+        num_iter += 1
+    assert num_iter == 10
+
+
+def test_cmu_arctic_usage():
+    """
+    Validate CmuArcticDataset audio readings
+    """
+    logger.info("Test CmuArcticDataset usage flag")
+
+    def test_config(usage, cmu_arctic_path=None):
+        cmu_arctic_path = DATA_DIR if cmu_arctic_path is None else cmu_arctic_path
+        try:
+            data = ds.CmuArcticDataset(cmu_arctic_path, usage=usage, shuffle=False)
+            num_rows = 0
+            for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True):
+                num_rows += 1
+        except (ValueError, TypeError, RuntimeError) as e:
+            return str(e)
+        return num_rows
+
+    assert test_config("aew") == 1132
+    assert test_config("ahw") == 593
+    assert "Input usage is not within the valid set of ['aew', 'ahw', 'aup', 'awb', 'axb', 'bdl', 'clb', 'eey', 'fem', 'gka', 'jmk', 'ksp', 'ljm', 'lnh', 'rms', 'rxr', 'slp', 'slt']." in test_config("invalid")
+    assert "Argument usage with value ['list'] is not of type [<class 'str'>]" in test_config(["list"])
+
+    all_files_path = None
+    if all_files_path is not None:
+        assert test_config("aew", all_files_path) == 1132
+        assert test_config("ahw", all_files_path) == 593
+        assert ds.cmu_arcticDataset(all_files_path, usage="aew").get_dataset_size() == 1132
+        assert ds.cmu_arcticDataset(all_files_path, usage="ahw").get_dataset_size() == 593
+
+
+if __name__ == '__main__':
+    test_cmuarctic_basic()
+    test_cmu_arctic_sequential_sampler()
+    test_cmu_arctic_exception()
+    test_cmu_arctic_visualize(plot=True)
+    test_cmu_arctic_usage()
diff --git a/tests/ut/python/dataset/test_datasets_sbd.py b/tests/ut/python/dataset/test_datasets_sbd.py
index db7c3b9fd05..3801cfa669b 100644
--- a/tests/ut/python/dataset/test_datasets_sbd.py
+++ b/tests/ut/python/dataset/test_datasets_sbd.py
@@ -22,6 +22,7 @@ import mindspore.dataset as ds
 from mindspore import log as logger
 import mindspore.dataset.vision.c_transforms as c_vision
 
+
 DATASET_DIR = "../data/dataset/testSBData/sbd"
 
 
@@ -192,7 +193,6 @@ def test_sbd_usage():
     """
     Validate SBDataset image readings
     """
-
     def test_config(usage):
         try:
             data = ds.SBDataset(DATASET_DIR, task='Segmentation', usage=usage)
diff --git a/tests/ut/python/dataset/test_schema.py b/tests/ut/python/dataset/test_schema.py
index f31400dffe5..84ff09f498e 100644
--- a/tests/ut/python/dataset/test_schema.py
+++ b/tests/ut/python/dataset/test_schema.py
@@ -48,7 +48,7 @@ def test_schema_exception():
 
     with pytest.raises(TypeError) as info:
         ds.Schema(1)
-    assert "Argument schema_file with value 1 is not of type [<class 'str'>]" in str(info.value)
+    assert "path: 1 is not string" in str(info.value)
 
     with pytest.raises(RuntimeError) as info:
         schema = ds.Schema(SCHEMA_FILE)
diff --git a/tests/ut/python/dataset/test_serdes_dataset.py b/tests/ut/python/dataset/test_serdes_dataset.py
index ef69671d250..a6a1fcee4ea 100644
--- a/tests/ut/python/dataset/test_serdes_dataset.py
+++ b/tests/ut/python/dataset/test_serdes_dataset.py
@@ -59,7 +59,8 @@ def test_serdes_imagefolder_dataset(remove_json_files=True):
 
     resize_op = vision.Resize((resize_height, resize_width), Inter.LINEAR)
     data1 = data1.map(operations=[rescale_op, resize_op], input_columns=["image"])
-    data1 = data1.batch(2)
+    data1_1 = ds.TFRecordDataset(["../data/dataset/testTFTestAllTypes/test.data"], num_samples=6).batch(2).repeat(10)
+    data1 = data1.zip(data1_1)
 
     # Serialize the dataset pre-processing pipeline.
     # data1 should still work after saving.
@@ -78,6 +79,7 @@ def test_serdes_imagefolder_dataset(remove_json_files=True):
     ds.serialize(data2, "imagenet_dataset_pipeline_1.json")
     assert validate_jsonfile("imagenet_dataset_pipeline_1.json") is True
     assert filecmp.cmp('imagenet_dataset_pipeline.json', 'imagenet_dataset_pipeline_1.json')
+    assert data1.get_dataset_size() == data2.get_dataset_size()
 
     # Deserialize the latest json file again
     data3 = ds.deserialize(json_filepath="imagenet_dataset_pipeline_1.json")
@@ -97,7 +99,7 @@ def test_serdes_imagefolder_dataset(remove_json_files=True):
         num_samples += 1
 
     logger.info("Number of data in data1: {}".format(num_samples))
-    assert num_samples == 6
+    assert num_samples == 11
 
     # Remove the generated json file
     if remove_json_files:
@@ -169,8 +171,8 @@ def test_serdes_cifar10_dataset(remove_json_files=True):
     data1 = data1.map(operations=trans, input_columns="image")
     data1 = data1.batch(3, drop_remainder=True)
     data1 = data1.repeat(1)
-    data2 = util_check_serialize_deserialize_file(data1, "cifar10_dataset_pipeline", remove_json_files)
-
+    # json files are needed for create iterator, remove_json_files = False
+    data2 = util_check_serialize_deserialize_file(data1, "cifar10_dataset_pipeline", False)
     num_samples = 0
     # Iterate and compare the data in the original pipeline (data1) against the deserialized pipeline (data2)
     for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True),
@@ -183,6 +185,8 @@ def test_serdes_cifar10_dataset(remove_json_files=True):
     # Restore configuration num_parallel_workers
     ds.config.set_seed(original_seed)
     ds.config.set_num_parallel_workers(original_num_parallel_workers)
+    if remove_json_files:
+        delete_json_files()
 
 
 def test_serdes_celeba_dataset(remove_json_files=True):
@@ -196,7 +200,8 @@ def test_serdes_celeba_dataset(remove_json_files=True):
     center_crop = vision.CenterCrop((80, 80))
     pad_op = vision.Pad(20, fill_value=(20, 20, 20))
     data1 = data1.map(operations=[center_crop, pad_op], input_columns=["image"], num_parallel_workers=8)
-    data2 = util_check_serialize_deserialize_file(data1, "celeba_dataset_pipeline", remove_json_files)
+    # json files are needed for create iterator, remove_json_files = False
+    data2 = util_check_serialize_deserialize_file(data1, "celeba_dataset_pipeline", False)
 
     num_samples = 0
     # Iterate and compare the data in the original pipeline (data1) against the deserialized pipeline (data2)
@@ -206,6 +211,8 @@ def test_serdes_celeba_dataset(remove_json_files=True):
         num_samples += 1
 
     assert num_samples == 8
+    if remove_json_files:
+        delete_json_files()
 
 
 def test_serdes_csv_dataset(remove_json_files=True):
@@ -220,7 +227,8 @@ def test_serdes_csv_dataset(remove_json_files=True):
         shuffle=False)
     columns = ["col1", "col4", "col2"]
     data1 = data1.project(columns=columns)
-    data2 = util_check_serialize_deserialize_file(data1, "csv_dataset_pipeline", remove_json_files)
+    # json files are needed for create iterator, remove_json_files = False
+    data2 = util_check_serialize_deserialize_file(data1, "csv_dataset_pipeline", False)
 
     num_samples = 0
     # Iterate and compare the data in the original pipeline (data1) against the deserialized pipeline (data2)
@@ -232,6 +240,8 @@ def test_serdes_csv_dataset(remove_json_files=True):
         num_samples += 1
 
     assert num_samples == 3
+    if remove_json_files:
+        delete_json_files()
 
 
 def test_serdes_voc_dataset(remove_json_files=True):
@@ -251,7 +261,8 @@ def test_serdes_voc_dataset(remove_json_files=True):
     data1 = data1.map(operations=random_color_adjust_op, input_columns=["image"])
     data1 = data1.map(operations=random_rotation_op, input_columns=["image"])
     data1 = data1.skip(2)
-    data2 = util_check_serialize_deserialize_file(data1, "voc_dataset_pipeline", remove_json_files)
+    # json files are needed for create iterator, remove_json_files = False
+    data2 = util_check_serialize_deserialize_file(data1, "voc_dataset_pipeline", False)
 
     num_samples = 0
     # Iterate and compare the data in the original pipeline (data1) against the deserialized pipeline (data2)
@@ -265,6 +276,8 @@ def test_serdes_voc_dataset(remove_json_files=True):
     # Restore configuration num_parallel_workers
     ds.config.set_seed(original_seed)
     ds.config.set_num_parallel_workers(original_num_parallel_workers)
+    if remove_json_files:
+        delete_json_files()
 
 
 def test_serdes_zip_dataset(remove_json_files=True):
@@ -380,8 +393,8 @@ def test_serdes_pyvision(remove_json_files=True):
     try:
         util_check_serialize_deserialize_file(data1, "pyvision_dataset_pipeline", remove_json_files)
         assert False
-    except NotImplementedError as e:
-        assert "python function is not yet supported" in str(e)
+    except RuntimeError as e:
+        assert "python operation is not yet supported" in str(e)
 
 
 def test_serdes_uniform_augment(remove_json_files=True):
@@ -420,7 +433,6 @@ def skip_test_serdes_fill(remove_json_files=True):
     for data_row in data:
         np.testing.assert_array_equal(data_row[0].asnumpy(), expected)
 
-    # FIXME - need proper serdes support for Fill's fill_value parameter
     util_check_serialize_deserialize_file(data, "fill_pipeline", remove_json_files)
 
 
@@ -434,8 +446,10 @@ def test_serdes_exception():
     data1 = data1.filter(input_columns=["image", "label"], predicate=lambda data: data < 11, num_parallel_workers=4)
     data1_json = ds.serialize(data1)
     with pytest.raises(RuntimeError) as msg:
-        ds.deserialize(input_dict=data1_json)
-    assert "Filter is not yet supported by ds.engine.deserialize" in str(msg)
+        data2 = ds.deserialize(input_dict=data1_json)
+        ds.serialize(data2, "filter_dataset_fail.json")
+    assert "Filter operation is not supported" in str(msg)
+    delete_json_files()
 
 
 def util_check_serialize_deserialize_file(data_orig, filename, remove_json_files):
@@ -456,7 +470,7 @@ def util_check_serialize_deserialize_file(data_orig, filename, remove_json_files
     data_changed = ds.deserialize(json_filepath=file1)
     ds.serialize(data_changed, file2)
     assert validate_jsonfile(file2) is True
-    assert filecmp.cmp(file1, file2)
+    assert filecmp.cmp(file1, file2, shallow=False)
 
     # Remove the generated json file
     if remove_json_files:
diff --git a/tests/ut/python/dataset/test_skip.py b/tests/ut/python/dataset/test_skip.py
index 187239895a1..a75e88e7bad 100644
--- a/tests/ut/python/dataset/test_skip.py
+++ b/tests/ut/python/dataset/test_skip.py
@@ -17,7 +17,6 @@ import pytest
 
 import mindspore.dataset as ds
 import mindspore.dataset.vision.c_transforms as vision
-from mindspore import log as logger
 
 
 DATA_DIR_TF2 = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
@@ -208,9 +207,8 @@ def test_skip_exception_1():
         for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
             num_iter += 1
 
-    except RuntimeError as e:
-        logger.info("Got an exception in DE: {}".format(str(e)))
-        assert "skip_count should not be negative, skip_count: -1" in str(e)
+    except ValueError as e:
+        assert "Input count is not within the required interval" in str(e)
 
 
 def test_skip_exception_2():
diff --git a/tests/ut/python/dataset/test_slice_patches.py b/tests/ut/python/dataset/test_slice_patches.py
index 9a681a3be5d..159d994a812 100644
--- a/tests/ut/python/dataset/test_slice_patches.py
+++ b/tests/ut/python/dataset/test_slice_patches.py
@@ -140,6 +140,54 @@ def test_slice_patches_exception_01():
         logger.info("Got an exception in SlicePatches: {}".format(str(e)))
         assert "Input fill_value is not within" in str(e)
 
+def test_slice_patches_06():
+    image = np.random.randint(0, 255, (158, 126, 1)).astype(np.int32)
+    slice_patches_op = c_vision.SlicePatches(2, 8)
+    patches = slice_patches_op(image)
+    assert len(patches) == 16
+    assert patches[0].shape == (79, 16, 1)
+
+def test_slice_patches_07():
+    image = np.random.randint(0, 255, (158, 126)).astype(np.int32)
+    slice_patches_op = c_vision.SlicePatches(2, 8)
+    patches = slice_patches_op(image)
+    assert len(patches) == 16
+    assert patches[0].shape == (79, 16)
+
+def test_slice_patches_08():
+    np_data = np.random.randint(0, 255, (1, 56, 82, 256)).astype(np.uint8)
+    dataset = ds.NumpySlicesDataset(np_data, column_names=["image"])
+    slice_patches_op = c_vision.SlicePatches(2, 2)
+    dataset = dataset.map(input_columns=["image"], output_columns=["img0", "img1", "img2", "img3"],
+                          column_order=["img0", "img1", "img2", "img3"],
+                          operations=slice_patches_op)
+    for item in dataset.create_dict_iterator(output_numpy=True):
+        patch_shape = item['img0'].shape
+        assert patch_shape == (28, 41, 256)
+
+def test_slice_patches_09():
+    image = np.random.randint(0, 255, (56, 82, 256)).astype(np.uint8)
+    slice_patches_op = c_vision.SlicePatches(4, 3, mode.SliceMode.PAD)
+    patches = slice_patches_op(image)
+    assert len(patches) == 12
+    assert patches[0].shape == (14, 28, 256)
+
+def skip_test_slice_patches_10():
+    image = np.random.randint(0, 255, (7000, 7000, 255)).astype(np.uint8)
+    slice_patches_op = c_vision.SlicePatches(10, 13, mode.SliceMode.DROP)
+    patches = slice_patches_op(image)
+    assert patches[0].shape == (700, 538, 255)
+
+def skip_test_slice_patches_11():
+    np_data = np.random.randint(0, 255, (1, 7000, 7000, 256)).astype(np.uint8)
+    dataset = ds.NumpySlicesDataset(np_data, column_names=["image"])
+    slice_patches_op = c_vision.SlicePatches(10, 13, mode.SliceMode.DROP)
+    cols = ['img' + str(x) for x in range(10*13)]
+    dataset = dataset.map(input_columns=["image"], output_columns=cols,
+                          column_order=cols, operations=slice_patches_op)
+    for item in dataset.create_dict_iterator(output_numpy=True):
+        patch_shape = item['img0'].shape
+        assert patch_shape == (700, 538, 256)
 
 def slice_patches(image, num_h, num_w, pad_or_drop, fill_value):
     """ help function which slice patches with numpy """
@@ -174,4 +222,8 @@ if __name__ == "__main__":
     test_slice_patches_03(plot=True)
     test_slice_patches_04(plot=True)
     test_slice_patches_05(plot=True)
+    test_slice_patches_06()
+    test_slice_patches_07()
+    test_slice_patches_08()
+    test_slice_patches_09()
     test_slice_patches_exception_01()
diff --git a/tests/ut/python/dataset/test_take.py b/tests/ut/python/dataset/test_take.py
index 3754aba0f87..96c79ef9c87 100644
--- a/tests/ut/python/dataset/test_take.py
+++ b/tests/ut/python/dataset/test_take.py
@@ -351,7 +351,7 @@ def test_take_19():
 
         data1 = data1.batch(2)
         data1 = data1.take(0)
-    assert "positive integer" in str(info.value)
+    assert "within the required interval" in str(info.value)
 
 if __name__ == '__main__':
     test_take_01()
diff --git a/tests/ut/python/dataset/test_time_stretch.py b/tests/ut/python/dataset/test_time_stretch.py
index 577c40ebdbf..52a796c7ad6 100644
--- a/tests/ut/python/dataset/test_time_stretch.py
+++ b/tests/ut/python/dataset/test_time_stretch.py
@@ -31,24 +31,27 @@ COMPLEX = 2
 def gen(shape):
     np.random.seed(0)
     data = np.random.random(shape)
-    yield (np.array(data, dtype=np.float32),)
+    yield(np.array(data, dtype=np.float32),)
 
 
-def count_unequal_element(data_expected, data_me, rtol, atol):
+def _count_unequal_element(data_expected, data_me, rtol, atol):
     assert data_expected.shape == data_me.shape
     total_count = len(data_expected.flatten())
     error = np.abs(data_expected - data_me)
     greater = np.greater(error, atol + np.abs(data_expected) * rtol)
     loss_count = np.count_nonzero(greater)
-    assert (loss_count / total_count) < rtol, "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}".format(
-        data_expected[greater], data_me[greater], error[greater])
+    assert (loss_count / total_count) < rtol, \
+        "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}". \
+        format(data_expected[greater], data_me[greater], error[greater])
 
 
 def allclose_nparray(data_expected, data_me, rtol, atol, equal_nan=True):
     if np.any(np.isnan(data_expected)):
         assert np.allclose(data_me, data_expected, rtol, atol, equal_nan=equal_nan)
     elif not np.allclose(data_me, data_expected, rtol, atol, equal_nan=equal_nan):
-        count_unequal_element(data_expected, data_me, rtol, atol)
+        _count_unequal_element(data_expected, data_me, rtol, atol)
+    else:
+        assert True
 
 
 def test_time_stretch_pipeline():
@@ -57,14 +60,18 @@ def test_time_stretch_pipeline():
     """
     logger.info("test TimeStretch op")
     generator = gen([CHANNEL_NUM, FREQ, FRAME_NUM, COMPLEX])
-    data1 = ds.GeneratorDataset(source=generator, column_names=["multi_dimensional_data"])
+    data1 = ds.GeneratorDataset(source=generator, column_names=[
+        "multi_dimensional_data"])
 
-    transforms = [c_audio.TimeStretch(512, FREQ, 1.3)]
-    data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
+    transforms = [
+        c_audio.TimeStretch(512, FREQ, 1.3)
+    ]
+    data1 = data1.map(operations=transforms, input_columns=[
+        "multi_dimensional_data"])
 
     for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
         out_put = item["multi_dimensional_data"]
-    assert out_put.shape == (CHANNEL_NUM, FREQ, np.ceil(FRAME_NUM / 1.3), COMPLEX)
+    assert out_put.shape == (CHANNEL_NUM, FREQ, np.ceil(FRAME_NUM/1.3), COMPLEX)
 
 
 def test_time_stretch_pipeline_invalid_param():
@@ -73,15 +80,19 @@ def test_time_stretch_pipeline_invalid_param():
     """
     logger.info("test TimeStretch op with invalid values")
     generator = gen([CHANNEL_NUM, FREQ, FRAME_NUM, COMPLEX])
-    data1 = ds.GeneratorDataset(source=generator, column_names=["multi_dimensional_data"])
+    data1 = ds.GeneratorDataset(source=generator, column_names=[
+        "multi_dimensional_data"])
 
     with pytest.raises(ValueError, match=r"Input fixed_rate is not within the required interval of \(0, 16777216\]."):
-        transforms = [c_audio.TimeStretch(512, FREQ, -1.3)]
-        data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"])
+        transforms = [
+            c_audio.TimeStretch(512, FREQ, -1.3)
+        ]
+        data1 = data1.map(operations=transforms, input_columns=[
+            "multi_dimensional_data"])
 
         for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
             out_put = item["multi_dimensional_data"]
-        assert out_put.shape == (CHANNEL_NUM, FREQ, np.ceil(FRAME_NUM / 1.3), COMPLEX)
+        assert out_put.shape == (CHANNEL_NUM, FREQ, np.ceil(FRAME_NUM/1.3), COMPLEX)
 
 
 def test_time_stretch_eager():
@@ -91,7 +102,7 @@ def test_time_stretch_eager():
     logger.info("test TimeStretch op with customized parameter values")
     spectrogram = next(gen([CHANNEL_NUM, FREQ, FRAME_NUM, COMPLEX]))[0]
     out_put = c_audio.TimeStretch(512, FREQ, 1.3)(spectrogram)
-    assert out_put.shape == (CHANNEL_NUM, FREQ, np.ceil(FRAME_NUM / 1.3), COMPLEX)
+    assert out_put.shape == (CHANNEL_NUM, FREQ, np.ceil(FRAME_NUM/1.3), COMPLEX)
 
 
 def test_percision_time_stretch_eager():
diff --git a/tests/ut/python/exec/test_train_with_lars.py b/tests/ut/python/exec/test_train_with_lars.py
index 04087cb0f0a..beec5d21b90 100644
--- a/tests/ut/python/exec/test_train_with_lars.py
+++ b/tests/ut/python/exec/test_train_with_lars.py
@@ -20,7 +20,6 @@ from mindspore.common.parameter import ParameterTuple, Parameter
 from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
 from mindspore.nn.optim import Momentum
 from mindspore.ops import composite as C
-from mindspore.ops import functional as F
 from mindspore.ops import operations as P
 
 
@@ -67,10 +66,11 @@ class TrainOneStepWithLarsCell(nn.Cell):
         bias_grads = grads[self.slice_index: self.params_len]
         lars_grads = self.lars(non_bias_weights, non_bias_grads, self.weight_decay)
         new_grads = lars_grads + bias_grads
-        return F.depend(loss, self.optimizer(new_grads))
+        self.optimizer(new_grads)
+        return loss
 
 
-# fn is a funcation use i as input
+# fn is a function use i as input
 def lr_gen(fn, epoch_size):
     for i in range(epoch_size):
         yield fn(i)
diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py
index be3c5f16432..b21e85500bc 100755
--- a/tests/ut/python/ops/test_ops.py
+++ b/tests/ut/python/ops/test_ops.py
@@ -2119,6 +2119,11 @@ test_case_nn_ops = [
         'block': P.L2Loss(),
         'desc_inputs': [Tensor(np.array([[1, 1], [2, 2], [3, 3], [4, 4]]), mstype.float16)],
         'desc_bprop': []}),
+    ('SoftMarginLoss', {
+        'block': P.SoftMarginLoss(reduction="none"),
+        'desc_inputs': [Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]).astype(np.float32)),
+                        Tensor(np.array([[-1, 1], [1, -1]]).astype(np.float32))],
+        'desc_bprop': [Tensor(np.array([[1, 1], [1, 1]]).astype(np.float32))]}),
     ('BCEWithLogitsLoss', {
         'block': P.BCEWithLogitsLoss(),
         'desc_inputs': [[3, 3], [3, 3], [3, 3], [3, 3]],
@@ -2204,6 +2209,16 @@ test_case_nn_ops = [
         'desc_inputs': [Tensor(np.array([[-4, 4, 1]]), mstype.float32)],
         'desc_bprop': [Tensor(np.array([[0, 1, 0.6666]]), mstype.float32)],
         'skip': ['backward']}),
+    ('HardShrink', {
+        'block': P.HShrink(),
+        'desc_inputs': [Tensor(np.array([[0.5, 1, 2.0], [0.0533, 0.0776, -2.1233]]), mstype.float32)],
+        'desc_bprop': [],
+        'skip': ['backward']}),
+    ('HShrinkGrad', {
+        'block': G.HShrinkGrad(),
+        'desc_inputs': [Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]), mstype.float16),
+                        Tensor(np.array([[-4, -3, -2], [1, 2, 4]]), mstype.float16)],
+        'skip': ['backward']}),
 ]
 
 test_case_array_ops = [
diff --git a/tests/ut/python/optimizer/test_auto_grad.py b/tests/ut/python/optimizer/test_auto_grad.py
index 3314472176a..ca5e7a85f00 100644
--- a/tests/ut/python/optimizer/test_auto_grad.py
+++ b/tests/ut/python/optimizer/test_auto_grad.py
@@ -252,3 +252,112 @@ def test_limit_lift_fv_scope():
     grad_net = GradNet(net)
     grad_net.add_flags_recursive(defer_inline=True)
     grad_net(x, y)
+
+
+def test_same_primal_used_by_multi_j():
+    class Net(nn.Cell):
+        def __init__(self):
+            super(Net, self).__init__()
+
+        def construct(self, x):
+            return x
+
+    class GradNet(nn.Cell):
+        def __init__(self, net):
+            super(GradNet, self).__init__()
+            self.net = net
+            self.grad = ops.GradOperation()
+
+        def construct(self, x):
+            out = self.net(x)
+            gout = self.grad(self.net)(x)
+            gout1 = self.grad(self.net)(x)
+            return out, gout, gout1
+
+    x = Tensor(np.array([1.0], dtype=np.float32))
+    net = Net()
+    grad = GradNet(net)
+    grad(x)
+
+
+def test_same_primal_used_by_multi_j_with_monad1():
+    class AdamNet(nn.Cell):
+        def __init__(self, var, m, v):
+            super(AdamNet, self).__init__()
+            self.apply_adam = P.Adam()
+            self.var = Parameter(var, name="var")
+            self.m = Parameter(m, name="m")
+            self.v = Parameter(v, name="v")
+
+        def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad):
+            self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
+            return self.var
+
+    class AdamGradNet(nn.Cell):
+        def __init__(self, network):
+            super(AdamGradNet, self).__init__()
+            self.grad_fn = ops.GradOperation(sens_param=True)
+            self.sens = [Tensor(np.ones([3, 3, 3]).astype(np.float32)), Tensor(np.ones([3, 3, 3]).astype(np.float32))]
+            self.network = network
+
+        def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad):
+            out = self.network(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
+            gout1 = self.grad_fn(self.network)(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self.sens[0])
+            gout2 = self.grad_fn(self.network)(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self.sens[1])
+            return out, gout1, gout2
+
+    var = Tensor(np.ones([3, 3, 3]).astype(np.float32))
+    m = Tensor(np.ones([3, 3, 3]).astype(np.float32))
+    v = Tensor(np.ones([3, 3, 3]).astype(np.float32))
+    beta1_power = Tensor(np.array([0.9], dtype=np.float32))
+    beta2_power = Tensor(np.array([0.999], dtype=np.float32))
+    lr = Tensor(np.array([0.001], dtype=np.float32))
+    beta1 = Tensor(np.array([0.9], dtype=np.float32))
+    beta2 = Tensor(np.array([0.999], dtype=np.float32))
+    epsilon = Tensor(np.array([1e-8], dtype=np.float32))
+    grad = Tensor(np.random.rand(3, 3, 3).astype(np.float32))
+    net = AdamNet(var, m, v)
+    grad_net = AdamGradNet(net)
+    grad_net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
+
+
+def test_same_primal_used_by_multi_j_with_monad2():
+    class AdamNet(nn.Cell):
+        def __init__(self, var, m, v):
+            super(AdamNet, self).__init__()
+            self.apply_adam = P.Adam()
+            self.var = Parameter(var, name="var")
+            self.m = Parameter(m, name="m")
+            self.v = Parameter(v, name="v")
+
+        def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad):
+            self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
+            return self.var
+
+    class AdamGradNet(nn.Cell):
+        def __init__(self, network):
+            super(AdamGradNet, self).__init__()
+            self.grad = ops.GradOperation(sens_param=True)
+            self.sens = [Tensor(np.ones([3, 3, 3]).astype(np.float32)), Tensor(np.ones([3, 3, 3]).astype(np.float32))]
+            self.network = network
+
+        def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad):
+            out = self.network(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
+            grad_fn = self.grad(self.network)
+            gout1 = grad_fn(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self.sens[0])
+            gout2 = grad_fn(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self.sens[1])
+            return out, gout1, gout2
+
+    var = Tensor(np.ones([3, 3, 3]).astype(np.float32))
+    m = Tensor(np.ones([3, 3, 3]).astype(np.float32))
+    v = Tensor(np.ones([3, 3, 3]).astype(np.float32))
+    beta1_power = Tensor(np.array([0.9], dtype=np.float32))
+    beta2_power = Tensor(np.array([0.999], dtype=np.float32))
+    lr = Tensor(np.array([0.001], dtype=np.float32))
+    beta1 = Tensor(np.array([0.9], dtype=np.float32))
+    beta2 = Tensor(np.array([0.999], dtype=np.float32))
+    epsilon = Tensor(np.array([1e-8], dtype=np.float32))
+    grad = Tensor(np.random.rand(3, 3, 3).astype(np.float32))
+    net = AdamNet(var, m, v)
+    grad_net = AdamGradNet(net)
+    grad_net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad)
diff --git a/tests/ut/python/parallel/test_conv2d.py b/tests/ut/python/parallel/test_conv2d.py
index 4309b707513..1ef971a0587 100644
--- a/tests/ut/python/parallel/test_conv2d.py
+++ b/tests/ut/python/parallel/test_conv2d.py
@@ -39,6 +39,8 @@ class Net(Cell):
 
 _x = Tensor(np.ones([32, 16, 8, 8]), dtype=ms.float32)
 _w1 = Tensor(np.ones([8, 16, 2, 2]), dtype=ms.float32)
+_w2 = Tensor(np.ones([8, 16, 3, 3]), dtype=ms.float32)
+_w3 = Tensor(np.ones([8, 16, 5, 5]), dtype=ms.float32)
 _b = Tensor(np.ones([32, 16, 8, 8]), dtype=ms.float32)
 
 
@@ -75,6 +77,31 @@ def test_conv2d_model_parallel2():
     compile_net(net)
 
 
+def test_conv2d_model_parallel3():
+    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
+    strategy1 = ((2, 1, 1, 4), (1, 1, 1, 1))
+    strategy2 = ((2, 1, 1, 4),)
+    net = Net(_w2, out_channel=8, kernel_size=3, pad_mode="same", stride=1, strategy1=strategy1, strategy2=strategy2)
+    compile_net(net)
+
+
+def test_conv2d_model_parallel4():
+    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=32, global_rank=0)
+    strategy1 = ((2, 2, 1, 4), (2, 2, 1, 1))
+    strategy2 = ((2, 2, 1, 4),)
+    net = Net(_w2, out_channel=8, kernel_size=3, pad_mode="same", stride=1, strategy1=strategy1, strategy2=strategy2)
+    compile_net(net)
+
+
+def test_conv2d_left_and_right_no_need_to_send():
+    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
+    strategy1 = ((2, 1, 1, 4), (1, 1, 1, 1))
+    strategy2 = ((2, 1, 1, 4),)
+    net = Net(_w2, out_channel=8, kernel_size=3, pad_mode="same", stride=2, strategy1=strategy1, strategy2=strategy2)
+    with pytest.raises(RuntimeError):
+        compile_net(net)
+
+
 def test_conv2d_output_can_not_divisible_by_strategy():
     context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
     strategy1 = ((1, 1, 1, 8), (1, 1, 1, 1))
diff --git a/tests/ut/python/parallel/test_conv2d_transpose.py b/tests/ut/python/parallel/test_conv2d_transpose.py
index e5cc5d12027..46b65a2ea86 100644
--- a/tests/ut/python/parallel/test_conv2d_transpose.py
+++ b/tests/ut/python/parallel/test_conv2d_transpose.py
@@ -36,8 +36,24 @@ class Net(Cell):
         return out
 
 
+class Net2(Cell):
+    def __init__(self, conv2d_weight, out_channel, kernel_size, pad_mode, stride,
+                 strategy1=None, strategy2=None):
+        super().__init__()
+        self.conv2d_transpose = P.Conv2DTranspose(out_channel=out_channel, kernel_size=kernel_size,
+                                                  pad_mode=pad_mode, stride=stride).shard(strategy1)
+        self.neg = P.Neg().shard(strategy2)
+        self.weight = Parameter(conv2d_weight, "w1")
+
+    def construct(self, x, b):
+        out = self.conv2d_transpose(x, self.weight, (32, 16, 16, 16))
+        out = self.neg(out)
+        return out
+
+
 _x = Tensor(np.ones([32, 8, 8, 8]), dtype=ms.float32)
 _w1 = Tensor(np.ones([8, 16, 2, 2]), dtype=ms.float32)
+_w2 = Tensor(np.ones([8, 16, 4, 4]), dtype=ms.float32)
 _b = Tensor(np.ones([32, 16, 8, 8]), dtype=ms.float32)
 
 
@@ -64,3 +80,21 @@ def test_conv2d_transpose_model_parallel1():
     strategy2 = ((8, 1, 1, 1),)
     net = Net(_w1, out_channel=8, kernel_size=2, pad_mode="same", stride=1, strategy1=strategy1, strategy2=strategy2)
     compile_net(net)
+
+
+def test_conv2d_transpose_model_parallel2():
+    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0)
+    strategy1 = ((2, 1, 1, 4), (1, 1, 1, 1))
+    strategy2 = ((2, 1, 1, 4),)
+    net = Net2(_w2, out_channel=8, kernel_size=(4, 4), pad_mode="same", stride=2,
+               strategy1=strategy1, strategy2=strategy2)
+    compile_net(net)
+
+
+def test_conv2d_transpose_model_parallel3():
+    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0)
+    strategy1 = ((2, 2, 1, 4), (2, 1, 1, 1))
+    strategy2 = ((2, 2, 1, 4),)
+    net = Net2(_w2, out_channel=8, kernel_size=(4, 4), pad_mode="same", stride=2,
+               strategy1=strategy1, strategy2=strategy2)
+    compile_net(net)
diff --git a/tests/ut/python/parallel/test_dataset_interface.py b/tests/ut/python/parallel/test_dataset_interface.py
index fbe8a7b0480..a662ff81567 100644
--- a/tests/ut/python/parallel/test_dataset_interface.py
+++ b/tests/ut/python/parallel/test_dataset_interface.py
@@ -21,7 +21,7 @@ from mindspore import context
 from mindspore.common.parameter import Parameter, ParameterTuple
 from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
 from mindspore.nn.optim.momentum import Momentum
-from mindspore.ops import composite as C, functional as F, operations as P
+from mindspore.ops import composite as C, operations as P
 from mindspore.train import Model
 from mindspore.context import ParallelMode
 from mindspore.train.loss_scale_manager import DynamicLossScaleManager
@@ -114,7 +114,8 @@ class TrainOneStepCell(nn.Cell):
         weights = self.weights
         loss = self.network(data)
         grads = self.grad(self.network, weights)(data, sens)
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
 
 
 def loss_scale_manager_sens(strategy1, sens):
diff --git a/tests/ut/python/parallel/test_gather_v2_primitive.py b/tests/ut/python/parallel/test_gather_v2_primitive.py
index ab6a2a6283b..d307fb7a57e 100644
--- a/tests/ut/python/parallel/test_gather_v2_primitive.py
+++ b/tests/ut/python/parallel/test_gather_v2_primitive.py
@@ -25,7 +25,6 @@ from mindspore.nn import Dense, Cell
 from mindspore.nn.loss.loss import LossBase
 from mindspore.nn.optim import Momentum
 from mindspore.ops import composite as C
-from mindspore.ops import functional as F
 from mindspore.ops import operations as P
 from mindspore.train import Model
 from mindspore.context import ParallelMode
@@ -121,7 +120,8 @@ class TrainOneStepCell(Cell):
         sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
         grads = self.grad(self.network, weights)(data, sens)
 
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
 
 
 def net_trains(criterion, rank):
diff --git a/tests/ut/python/parallel/test_gatherd.py b/tests/ut/python/parallel/test_gatherd.py
index 2ee2a9c7964..abdcdd69391 100644
--- a/tests/ut/python/parallel/test_gatherd.py
+++ b/tests/ut/python/parallel/test_gatherd.py
@@ -65,6 +65,14 @@ def test_gathernd_dim2():
     compile_net(net)
 
 
+def test_gathernd_dim2_default_batch_parallel():
+    context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0)
+    strategy1 = None
+    strategy2 = ((2, 8, 1),)
+    net = Net(2, _w1, strategy1, strategy2)
+    compile_net(net)
+
+
 def test_gathernd_auto_parallel():
     context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0)
     net = Net(1, _w1)
diff --git a/tests/ut/python/parallel/test_loss_scale.py b/tests/ut/python/parallel/test_loss_scale.py
index c707e1bedf4..ebf10b68141 100644
--- a/tests/ut/python/parallel/test_loss_scale.py
+++ b/tests/ut/python/parallel/test_loss_scale.py
@@ -105,12 +105,9 @@ class TrainOneStepWithLossScaleCell(nn.Cell):
         overflow = cond
         if sens is None:
             overflow = self.loss_scaling_manager(self.loss_scale, cond)
-        if overflow:
-            succ = False
-        else:
-            succ = self.optimizer(grads)
-        ret = (loss, cond, scaling_sens)
-        return F.depend(ret, succ)
+        if not overflow:
+            self.optimizer(grads)
+        return (loss, cond, scaling_sens)
 
 
 class DatasetLenet(MindData):
diff --git a/tests/ut/python/parallel/test_reshape.py b/tests/ut/python/parallel/test_reshape.py
index 5db1eb409e2..9f1b81b057b 100644
--- a/tests/ut/python/parallel/test_reshape.py
+++ b/tests/ut/python/parallel/test_reshape.py
@@ -24,7 +24,6 @@ from mindspore.common.parameter import ParameterTuple
 from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits
 from mindspore.nn.optim.momentum import Momentum
 from mindspore.ops import composite as C
-from mindspore.ops import functional as F
 from mindspore.ops import operations as P
 from mindspore.nn.wrap.cell_wrapper import _VirtualDatasetCell
 from mindspore.parallel import set_algo_parameters
@@ -419,7 +418,8 @@ class TrainOneStepCell(nn.Cell):
         sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens)
         grads = self.grad(self.network, weights)(data, sens)
 
-        return F.depend(loss, self.optimizer(grads))
+        self.optimizer(grads)
+        return loss
 
 
 def reshape_common2(parallel_mode, net):
diff --git a/third_party/patch/icu4c/icu4c.patch01 b/third_party/patch/icu4c/icu4c.patch01
index 4b002c024ae..19378ec36cc 100644
--- a/third_party/patch/icu4c/icu4c.patch01
+++ b/third_party/patch/icu4c/icu4c.patch01
@@ -5,8 +5,8 @@
          THE_OS="Linux"
          THE_COMP="the clang or else GNU C++"
 -        RELEASE_CFLAGS='-O3'
-+        RELEASE_CFLAGS='-fstack-protector -D_FORTIFY_SOURCE=2 -O3 -Wl,-z,relro,-z,now'
++        RELEASE_CFLAGS='-fstack-protector -D_FORTIFY_SOURCE=2 -O3 -Wl,-z,relro,-z,now -s'
 -        RELEASE_CXXFLAGS='-O3'
-+        RELEASE_CXXFLAGS='-fstack-protector -D_FORTIFY_SOURCE=2 -O3 -Wl,-z,relro,-z,now'
++        RELEASE_CXXFLAGS='-fstack-protector -D_FORTIFY_SOURCE=2 -O3 -Wl,-z,relro,-z,now -s'
          DEBUG_CFLAGS='-g'
          DEBUG_CXXFLAGS='-g'
diff --git a/third_party/patch/sqlite/sqlite.patch001 b/third_party/patch/sqlite/sqlite.patch001
index d40825a1488..bd3210dbaf7 100644
--- a/third_party/patch/sqlite/sqlite.patch001
+++ b/third_party/patch/sqlite/sqlite.patch001
@@ -1,6 +1,6 @@
-diff -Npur sqlite-version-3.32.2/src/expr.c sqlite-version-3.32.2-patched/src/expr.c
---- sqlite-version-3.32.2/src/expr.c	2020-06-04 08:58:43.000000000 -0400
-+++ sqlite-version-3.32.2-patched/src/expr.c	2021-04-29 04:06:04.544208700 -0400
+diff -Npur sqlite-version-3.32.2-new/src/expr.c sqlite-version-3.32.2/src/expr.c
+--- sqlite-version-3.32.2-new/src/expr.c	2020-06-04 08:58:43.000000000 -0400
++++ sqlite-version-3.32.2/src/expr.c	2021-08-04 11:57:45.029230992 -0400
 @@ -3813,6 +3813,7 @@ expr_code_doover:
        AggInfo *pAggInfo = pExpr->pAggInfo;
        struct AggInfo_col *pCol;
@@ -32,9 +32,9 @@ diff -Npur sqlite-version-3.32.2/src/expr.c sqlite-version-3.32.2-patched/src/ex
      int i;
      struct SrcCount *p = pWalker->u.pSrcCount;
      SrcList *pSrc = p->pSrc;
-diff -Npur sqlite-version-3.32.2/src/global.c sqlite-version-3.32.2-patched/src/global.c
---- sqlite-version-3.32.2/src/global.c	2020-06-04 08:58:43.000000000 -0400
-+++ sqlite-version-3.32.2-patched/src/global.c	2021-04-29 04:06:04.544208700 -0400
+diff -Npur sqlite-version-3.32.2-new/src/global.c sqlite-version-3.32.2/src/global.c
+--- sqlite-version-3.32.2-new/src/global.c	2020-06-04 08:58:43.000000000 -0400
++++ sqlite-version-3.32.2/src/global.c	2021-08-04 11:57:45.033230992 -0400
 @@ -300,6 +300,11 @@ sqlite3_uint64 sqlite3NProfileCnt = 0;
  int sqlite3PendingByte = 0x40000000;
  #endif
@@ -47,9 +47,9 @@ diff -Npur sqlite-version-3.32.2/src/global.c sqlite-version-3.32.2-patched/src/
  #include "opcodes.h"
  /*
  ** Properties of opcodes.  The OPFLG_INITIALIZER macro is
-diff -Npur sqlite-version-3.32.2/src/resolve.c sqlite-version-3.32.2-patched/src/resolve.c
---- sqlite-version-3.32.2/src/resolve.c	2020-06-04 08:58:43.000000000 -0400
-+++ sqlite-version-3.32.2-patched/src/resolve.c	2021-04-29 04:06:04.545208700 -0400
+diff -Npur sqlite-version-3.32.2-new/src/resolve.c sqlite-version-3.32.2/src/resolve.c
+--- sqlite-version-3.32.2-new/src/resolve.c	2020-06-04 08:58:43.000000000 -0400
++++ sqlite-version-3.32.2/src/resolve.c	2021-08-04 11:57:45.033230992 -0400
 @@ -1715,6 +1715,14 @@ static int resolveSelectStep(Walker *pWa
            return WRC_Abort;
          }
@@ -65,9 +65,9 @@ diff -Npur sqlite-version-3.32.2/src/resolve.c sqlite-version-3.32.2-patched/src
      }
  #endif
  
-diff -Npur sqlite-version-3.32.2/src/select.c sqlite-version-3.32.2-patched/src/select.c
---- sqlite-version-3.32.2/src/select.c	2020-06-04 08:58:43.000000000 -0400
-+++ sqlite-version-3.32.2-patched/src/select.c	2021-04-29 04:07:21.458212191 -0400
+diff -Npur sqlite-version-3.32.2-new/src/select.c sqlite-version-3.32.2/src/select.c
+--- sqlite-version-3.32.2-new/src/select.c	2020-06-04 08:58:43.000000000 -0400
++++ sqlite-version-3.32.2/src/select.c	2021-08-04 12:27:34.737267443 -0400
 @@ -15,20 +15,6 @@
  #include "sqliteInt.h"
  
@@ -89,7 +89,27 @@ diff -Npur sqlite-version-3.32.2/src/select.c sqlite-version-3.32.2-patched/src/
  ** An instance of the following object is used to record information about
  ** how to process the DISTINCT keyword, to simplify passing that information
  ** into the selectInnerLoop() routine.
-@@ -4426,11 +4412,14 @@ static int pushDownWhereTerms(
+@@ -2717,9 +2703,7 @@ static int multiSelect(
+                           selectOpName(p->op)));
+         rc = sqlite3Select(pParse, p, &uniondest);
+         testcase( rc!=SQLITE_OK );
+-        /* Query flattening in sqlite3Select() might refill p->pOrderBy.
+-        ** Be sure to delete p->pOrderBy, therefore, to avoid a memory leak. */
+-        sqlite3ExprListDelete(db, p->pOrderBy);
++        assert( p->pOrderBy==0 );
+         pDelete = p->pPrior;
+         p->pPrior = pPrior;
+         p->pOrderBy = 0;
+@@ -4105,7 +4089,7 @@ static int flattenSubquery(
+     ** We look at every expression in the outer query and every place we see
+     ** "a" we substitute "x*3" and every place we see "b" we substitute "y+10".
+     */
+-    if( pSub->pOrderBy ){
++    if( pSub->pOrderBy && (pParent->selFlags & SF_NoopOrderBy)==0 ){
+       /* At this point, any non-zero iOrderByCol values indicate that the
+       ** ORDER BY column expression is identical to the iOrderByCol'th
+       ** expression returned by SELECT statement pSub. Since these values
+@@ -4426,11 +4410,14 @@ static int pushDownWhereTerms(
  ){
    Expr *pNew;
    int nChng = 0;
@@ -105,7 +125,7 @@ diff -Npur sqlite-version-3.32.2/src/select.c sqlite-version-3.32.2-patched/src/
  #endif
  
  #ifdef SQLITE_DEBUG
-@@ -5553,7 +5542,9 @@ static void explainSimpleCount(
+@@ -5553,7 +5540,9 @@ static void explainSimpleCount(
  static int havingToWhereExprCb(Walker *pWalker, Expr *pExpr){
    if( pExpr->op!=TK_AND ){
      Select *pS = pWalker->u.pSelect;
@@ -116,7 +136,7 @@ diff -Npur sqlite-version-3.32.2/src/select.c sqlite-version-3.32.2-patched/src/
        sqlite3 *db = pWalker->pParse->db;
        Expr *pNew = sqlite3Expr(db, TK_INTEGER, "1");
        if( pNew ){
-@@ -5766,6 +5757,9 @@ int sqlite3Select(
+@@ -5766,6 +5755,9 @@ int sqlite3Select(
    }
    if( sqlite3AuthCheck(pParse, SQLITE_SELECT, 0, 0, 0) ) return 1;
    memset(&sAggInfo, 0, sizeof(sAggInfo));
@@ -126,7 +146,15 @@ diff -Npur sqlite-version-3.32.2/src/select.c sqlite-version-3.32.2-patched/src/
  #if SELECTTRACE_ENABLED
    SELECTTRACE(1,pParse,p, ("begin processing:\n", pParse->addrExplain));
    if( sqlite3SelectTrace & 0x100 ){
-@@ -5804,19 +5798,6 @@ int sqlite3Select(
+@@ -5787,6 +5779,7 @@ int sqlite3Select(
+     sqlite3ExprListDelete(db, p->pOrderBy);
+     p->pOrderBy = 0;
+     p->selFlags &= ~SF_Distinct;
++    p->selFlags |= SF_NoopOrderBy;
+   }
+   sqlite3SelectPrep(pParse, p, 0);
+   if( pParse->nErr || db->mallocFailed ){
+@@ -5804,19 +5797,6 @@ int sqlite3Select(
      generateColumnNames(pParse, p);
    }
  
@@ -146,7 +174,7 @@ diff -Npur sqlite-version-3.32.2/src/select.c sqlite-version-3.32.2-patched/src/
    pTabList = p->pSrc;
    isAgg = (p->selFlags & SF_Aggregate)!=0;
    memset(&sSort, 0, sizeof(sSort));
-@@ -6144,7 +6125,7 @@ int sqlite3Select(
+@@ -6144,7 +6124,7 @@ int sqlite3Select(
    if( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct 
     && sqlite3ExprListCompare(sSort.pOrderBy, pEList, -1)==0
  #ifndef SQLITE_OMIT_WINDOWFUNC
@@ -155,7 +183,7 @@ diff -Npur sqlite-version-3.32.2/src/select.c sqlite-version-3.32.2-patched/src/
  #endif
    ){
      p->selFlags &= ~SF_Distinct;
-@@ -6791,6 +6772,14 @@ int sqlite3Select(
+@@ -6791,6 +6771,14 @@ int sqlite3Select(
  select_end:
    sqlite3ExprListDelete(db, pMinMaxOrderBy);
    sqlite3DbFree(db, sAggInfo.aCol);
@@ -170,9 +198,9 @@ diff -Npur sqlite-version-3.32.2/src/select.c sqlite-version-3.32.2-patched/src/
    sqlite3DbFree(db, sAggInfo.aFunc);
  #if SELECTTRACE_ENABLED
    SELECTTRACE(0x1,pParse,p,("end processing\n"));
-diff -Npur sqlite-version-3.32.2/src/sqliteInt.h sqlite-version-3.32.2-patched/src/sqliteInt.h
---- sqlite-version-3.32.2/src/sqliteInt.h	2020-06-04 08:58:43.000000000 -0400
-+++ sqlite-version-3.32.2-patched/src/sqliteInt.h	2021-04-29 04:06:04.547208700 -0400
+diff -Npur sqlite-version-3.32.2-new/src/sqliteInt.h sqlite-version-3.32.2/src/sqliteInt.h
+--- sqlite-version-3.32.2-new/src/sqliteInt.h	2020-06-04 08:58:43.000000000 -0400
++++ sqlite-version-3.32.2/src/sqliteInt.h	2021-08-04 12:28:22.825268422 -0400
 @@ -976,7 +976,12 @@ typedef INT16_TYPE LogEst;
  */
  #if defined(SQLITE_ENABLE_SELECTTRACE)
@@ -211,7 +239,15 @@ diff -Npur sqlite-version-3.32.2/src/sqliteInt.h sqlite-version-3.32.2-patched/s
  ** The datatype ynVar is a signed integer, either 16-bit or 32-bit.
  ** Usually it is 16-bits.  But if SQLITE_MAX_VARIABLE_NUMBER is greater
  ** than 32767 we have to make it 32-bit.  16-bit is preferred because
-@@ -4546,10 +4566,11 @@ extern const unsigned char sqlite3UpperT
+@@ -3105,6 +3125,7 @@ struct Select {
+ #define SF_WhereBegin    0x0080000 /* Really a WhereBegin() call.  Debug Only */
+ #define SF_WinRewrite    0x0100000 /* Window function rewrite accomplished */
+ #define SF_View          0x0200000 /* SELECT statement is a view */
++#define SF_NoopOrderBy   0x0400000 /* ORDER BY is ignored for this query */
+ 
+ /*
+ ** The results of a SELECT can be distributed in several ways, as defined
+@@ -4546,10 +4567,11 @@ extern const unsigned char sqlite3UpperT
  extern const unsigned char sqlite3CtypeMap[];
  extern SQLITE_WSD struct Sqlite3Config sqlite3Config;
  extern FuncDefHash sqlite3BuiltinFunctions;
@@ -224,9 +260,9 @@ diff -Npur sqlite-version-3.32.2/src/sqliteInt.h sqlite-version-3.32.2-patched/s
  #ifdef VDBE_PROFILE
  extern sqlite3_uint64 sqlite3NProfileCnt;
  #endif
-diff -Npur sqlite-version-3.32.2/src/test1.c sqlite-version-3.32.2-patched/src/test1.c
---- sqlite-version-3.32.2/src/test1.c	2020-06-04 08:58:43.000000000 -0400
-+++ sqlite-version-3.32.2-patched/src/test1.c	2021-04-29 04:06:04.548208700 -0400
+diff -Npur sqlite-version-3.32.2-new/src/test1.c sqlite-version-3.32.2/src/test1.c
+--- sqlite-version-3.32.2-new/src/test1.c	2020-06-04 08:58:43.000000000 -0400
++++ sqlite-version-3.32.2/src/test1.c	2021-08-04 11:57:45.037230992 -0400
 @@ -8164,7 +8164,7 @@ int Sqlitetest1_Init(Tcl_Interp *interp)
  #endif
  #endif
@@ -236,9 +272,9 @@ diff -Npur sqlite-version-3.32.2/src/test1.c sqlite-version-3.32.2-patched/src/t
  #endif
  
    for(i=0; i<sizeof(aCmd)/sizeof(aCmd[0]); i++){
-diff -Npur sqlite-version-3.32.2/src/window.c sqlite-version-3.32.2-patched/src/window.c
---- sqlite-version-3.32.2/src/window.c	2020-06-04 08:58:43.000000000 -0400
-+++ sqlite-version-3.32.2-patched/src/window.c	2021-04-29 04:06:04.548208700 -0400
+diff -Npur sqlite-version-3.32.2-new/src/window.c sqlite-version-3.32.2/src/window.c
+--- sqlite-version-3.32.2-new/src/window.c	2020-06-04 08:58:43.000000000 -0400
++++ sqlite-version-3.32.2/src/window.c	2021-08-04 11:57:45.041230992 -0400
 @@ -942,7 +942,7 @@ static int sqlite3WindowExtraAggFuncDept
  */
  int sqlite3WindowRewrite(Parse *pParse, Select *p){
@@ -248,13 +284,13 @@ diff -Npur sqlite-version-3.32.2/src/window.c sqlite-version-3.32.2-patched/src/
      Vdbe *v = sqlite3GetVdbe(pParse);
      sqlite3 *db = pParse->db;
      Select *pSub = 0;             /* The subquery */
-diff -Npur sqlite-version-3.32.2/test/having.test sqlite-version-3.32.2-patched/test/having.test
---- sqlite-version-3.32.2/test/having.test	2020-06-04 08:58:43.000000000 -0400
-+++ sqlite-version-3.32.2-patched/test/having.test	2021-04-29 04:08:11.785214475 -0400
+diff -Npur sqlite-version-3.32.2-new/test/having.test sqlite-version-3.32.2/test/having.test
+--- sqlite-version-3.32.2-new/test/having.test	2020-06-04 08:58:43.000000000 -0400
++++ sqlite-version-3.32.2/test/having.test	2021-08-04 11:57:45.041230992 -0400
 @@ -154,5 +154,24 @@ do_execsql_test 4.3 {
    SELECT a, sum(b) FROM t3 WHERE nondeter(a) GROUP BY a
  } {1 4 2 2}
-
+ 
 +#-------------------------------------------------------------------------
 +reset_db
 +do_execsql_test 5.0 {
@@ -274,11 +310,41 @@ diff -Npur sqlite-version-3.32.2/test/having.test sqlite-version-3.32.2-patched/
 +    SELECT x FROM t2 WHERE a=2 GROUP BY y HAVING 0
 +  ) FROM t1;
 +} {b {}}
-
+ 
  finish_test
-diff -Npur sqlite-version-3.32.2/test/window1.test sqlite-version-3.32.2-patched/test/window1.test
---- sqlite-version-3.32.2/test/window1.test	2020-06-04 08:58:43.000000000 -0400
-+++ sqlite-version-3.32.2-patched/test/window1.test	2021-04-29 04:06:04.549208700 -0400
+diff -Npur sqlite-version-3.32.2-new/test/selectA.test sqlite-version-3.32.2/test/selectA.test
+--- sqlite-version-3.32.2-new/test/selectA.test	2020-06-04 08:58:43.000000000 -0400
++++ sqlite-version-3.32.2/test/selectA.test	2021-08-04 12:29:43.021270055 -0400
+@@ -1446,5 +1446,26 @@ do_execsql_test 6.1 {
+   SELECT * FROM (SELECT a FROM t1 UNION SELECT b FROM t2) WHERE a=a;
+ } {12345}
+ 
++# 2020-06-15 ticket 8f157e8010b22af0
++#
++reset_db
++do_execsql_test 7.1 {
++  CREATE TABLE t1(c1);     INSERT INTO t1 VALUES(12),(123),(1234),(NULL),('abc');
++  CREATE TABLE t2(c2);     INSERT INTO t2 VALUES(44),(55),(123);
++  CREATE TABLE t3(c3,c4);  INSERT INTO t3 VALUES(66,1),(123,2),(77,3);
++  CREATE VIEW t4 AS SELECT c3 FROM t3;
++  CREATE VIEW t5 AS SELECT c3 FROM t3 ORDER BY c4;
++}
++do_execsql_test 7.2 {
++  SELECT * FROM t1, t2 WHERE c1=(SELECT 123 INTERSECT SELECT c2 FROM t4) AND c1=123;
++} {123 123}
++do_execsql_test 7.3 {
++  SELECT * FROM t1, t2 WHERE c1=(SELECT 123 INTERSECT SELECT c2 FROM t5) AND c1=123;
++} {123 123}
++do_execsql_test 7.4 {
++  CREATE TABLE a(b);
++  CREATE VIEW c(d) AS SELECT b FROM a ORDER BY b;
++  SELECT sum(d) OVER( PARTITION BY(SELECT 0 FROM c JOIN a WHERE b =(SELECT b INTERSECT SELECT d FROM c) AND b = 123)) FROM c;
++} {}
+ 
+ finish_test
+diff -Npur sqlite-version-3.32.2-new/test/window1.test sqlite-version-3.32.2/test/window1.test
+--- sqlite-version-3.32.2-new/test/window1.test	2020-06-04 08:58:43.000000000 -0400
++++ sqlite-version-3.32.2/test/window1.test	2021-08-04 11:57:45.041230992 -0400
 @@ -1743,5 +1743,47 @@ do_execsql_test 53.0 {
                 WHERE a.c);
  } {4 4 4 4}
diff --git a/version.txt b/version.txt
index 589268e6fed..e21e727f96f 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-1.3.0
\ No newline at end of file
+1.4.0
\ No newline at end of file