From b077aa1cabbe778c5868eb7e5df59d37d6b574d3 Mon Sep 17 00:00:00 2001 From: djc <1462492739@qq.com> Date: Sun, 22 Aug 2021 16:26:45 +0800 Subject: [PATCH] [feat] [assistant] [I3T96T] add new Dataset operator CMUARCTICDataset --- CMakeLists.txt | 3 + build.sh | 12 +- cmake/external_libs/flatbuffers.cmake | 6 +- cmake/mind_expression.cmake | 1 - cmake/options.cmake | 1 + cmake/package.cmake | 13 +- cmake/package_script.cmake | 55 ++ cmake/package_tar.cmake | 12 - cmake/package_win.cmake | 13 - docker/OWNERS | 4 +- docker/mindspore-cpu/devel/Dockerfile | 3 + docker/mindspore-cpu/runtime/Dockerfile | 5 +- docker/mindspore-gpu/devel/Dockerfile | 7 +- docker/mindspore-gpu/runtime/Dockerfile | 7 +- include/api/context.h | 134 ++- include/api/model.h | 47 +- include/api/serialization.h | 32 +- include/api/types.h | 90 +- mindspore/_checkparam.py | 5 + .../graph_kernel/parallel_estimate.py | 4 +- mindspore/_extends/graph_kernel/splitter.py | 2 +- .../akg_compiler/akg_process.py | 5 - mindspore/_extends/parse/parser.py | 7 +- mindspore/_extends/parse/standard_method.py | 12 +- mindspore/ccsrc/CMakeLists.txt | 2 +- .../backend/kernel_compiler/CMakeLists.txt | 1 + .../kernel_compiler/akg/akg_kernel_build.cc | 417 ++++++++- .../kernel_compiler/akg/akg_kernel_build.h | 75 ++ .../akg/ascend/akg_ascend_kernel_build.cc | 2 + .../akg/ascend/akg_ascend_kernel_mod.cc | 13 +- .../akg/gpu/akg_gpu_kernel_build.cc | 2 + .../akg/gpu/akg_gpu_kernel_mod.cc | 8 +- .../akg/gpu/akg_gpu_kernel_mod.h | 1 + .../backend/kernel_compiler/common_utils.cc | 32 +- .../backend/kernel_compiler/common_utils.h | 3 +- .../cpu/adam_weight_decay_cpu_kernel.cc | 85 +- .../cpu/adam_weight_decay_cpu_kernel.h | 2 +- .../cpu/apply_adagrad_cpu_kernel.cc | 25 +- .../cpu/arithmetic_logic_cpu_kernel.cc | 32 +- .../backend/kernel_compiler/cpu/cpu_kernel.h | 2 +- .../kernel_compiler/cpu/cpu_kernel_factory.cc | 6 + .../cpu/dropout_grad_kernel.cc | 11 +- .../kernel_compiler/cpu/dropout_grad_kernel.h | 2 +- .../cpu/eltwise_grad_cpu_kernel.cc | 97 ++- .../cpu/eltwise_grad_cpu_kernel.h | 7 +- .../cpu/mkldnn/eltwise_cpu_kernel.cc | 52 +- .../cpu/mkldnn/eltwise_cpu_kernel.h | 4 +- .../kernel_compiler/cpu/nnacl/CMakeLists.txt | 18 + .../cpu/nnacl/assembly/opt/MatmulDpInt8.S | 3 +- .../cpu/nnacl/assembly/opt/MatmulDpInt8Opt.S | 5 +- .../cpu/nnacl/base/batch_to_space_base.c | 39 +- .../cpu/nnacl/base/broadcast_to.c | 2 +- .../cpu/nnacl/base/concat_base.c | 9 +- .../cpu/nnacl/base/depth_to_space_base.c | 16 +- .../nnacl/base/minimal_filtering_generator.c | 4 +- .../cpu/nnacl/base/slice_base.c | 64 +- .../cpu/nnacl/base/split_base.c | 4 - .../cpu/nnacl/base/tile_base.c | 10 +- .../cpu/nnacl/base/tile_base.h | 14 +- .../cpu/nnacl/base/transpose_base.c | 4 +- .../kernel_compiler/cpu/nnacl/common_func.c | 10 +- .../kernel_compiler/cpu/nnacl/common_func.h | 7 +- .../kernel_compiler/cpu/nnacl/fp16/pad_fp16.c | 14 +- .../cpu/nnacl/fp32/adam_fp32.c | 20 +- .../cpu/nnacl/fp32/adam_fp32.h | 8 +- .../cpu/nnacl/fp32/arg_min_max_fp32.c | 50 +- .../cpu/nnacl/fp32/common_func_fp32.c | 4 +- .../cpu/nnacl/fp32/conv_depthwise_fp32.c | 7 +- .../cpu/nnacl/fp32/deconv_fp32.c | 2 +- .../cpu/nnacl/fp32/embedding_lookup_fp32.c | 4 +- .../cpu/nnacl/fp32/gatherNd_fp32.c | 2 +- .../cpu/nnacl/fp32/lstm_fp32.c | 2 +- .../cpu/nnacl/fp32/matmul_fp32.c | 85 +- .../kernel_compiler/cpu/nnacl/fp32/pad_fp32.c | 17 +- .../cpu/nnacl/fp32/resize_fp32.c | 4 +- .../cpu/nnacl/fp32/reverse_fp32.c | 4 +- .../cpu/nnacl/fp32/scatter_nd_fp32.c | 2 +- .../cpu/nnacl/fp32/splice_fp32.c | 2 +- .../cpu/nnacl/fp32/strided_slice_fp32.c | 4 +- .../cpu/nnacl/fp32/transpose_fp32.c | 8 +- .../cpu/nnacl/fp32/winograd_transform.c | 4 +- .../cpu/nnacl/fp32_grad/activation_grad.c | 27 +- .../cpu/nnacl/fp32_grad/activation_grad.h | 3 +- .../cpu/nnacl/fp32_grad/gemm.c | 15 +- .../cpu/nnacl/fp32_grad/reduce_grad.c | 4 +- .../cpu/nnacl/fp32_grad/softmax_grad.c | 4 +- .../cpu/nnacl/fp32_grad/strided_slice_grad.c | 4 +- .../cpu/nnacl/infer/addn_infer.c | 6 +- .../cpu/nnacl/infer/affine_infer.c | 4 +- .../cpu/nnacl/infer/argmin_max_infer.c | 4 +- .../cpu/nnacl/infer/arithmetic_grad_infer.c | 10 +- .../cpu/nnacl/infer/audio_spectrogram_infer.c | 2 +- .../cpu/nnacl/infer/bias_grad_infer.c | 4 +- .../cpu/nnacl/infer/broadcast_to_infer.c | 4 +- .../cpu/nnacl/infer/common_infer.c | 113 +-- .../cpu/nnacl/infer/common_infer.h | 11 +- .../cpu/nnacl/infer/concat_infer.c | 9 +- .../cpu/nnacl/infer/constant_of_shape_infer.c | 2 +- .../nnacl/infer/conv2d_grad_filter_infer.c | 5 +- .../cpu/nnacl/infer/conv2d_grad_input_infer.c | 8 +- .../cpu/nnacl/infer/conv2d_infer.c | 2 + .../cpu/nnacl/infer/deconv2d_infer.c | 6 +- .../nnacl/infer/dedepthwise_conv2d_infer.c | 2 + .../cpu/nnacl/infer/depthwise_conv2d_infer.c | 2 + .../cpu/nnacl/infer/embedding_lookup_infer.c | 3 + .../cpu/nnacl/infer/expand_dims_infer.c | 5 +- .../cpu/nnacl/infer/fill_infer.c | 4 +- .../cpu/nnacl/infer/full_connection_infer.c | 4 +- .../cpu/nnacl/infer/gather_infer.c | 10 +- .../cpu/nnacl/infer/gather_nd_infer.c | 4 +- .../infer/group_conv2d_grad_input_infer.c | 8 +- .../cpu/nnacl/infer/infer_register.c | 26 +- .../cpu/nnacl/infer/infer_register.h | 3 +- .../cpu/nnacl/infer/layer_norm_grad_infer.c | 5 +- .../cpu/nnacl/infer/layer_norm_infer.c | 6 +- .../cpu/nnacl/infer/lin_space_infer.c | 3 + .../cpu/nnacl/infer/matmul_infer.c | 5 +- .../cpu/nnacl/infer/max_min_grad_infer.c | 10 +- .../cpu/nnacl/infer/mean_infer.c | 5 +- .../cpu/nnacl/infer/pad_infer.c | 4 +- .../cpu/nnacl/infer/prior_box_infer.c | 11 +- .../cpu/nnacl/infer/range_infer.c | 4 +- .../cpu/nnacl/infer/reduce_infer.c | 9 +- .../cpu/nnacl/infer/reshape_infer.c | 56 +- .../cpu/nnacl/infer/resize_infer.c | 5 +- .../cpu/nnacl/infer/scatter_nd_infer.h | 1 - .../cpu/nnacl/infer/select_infer.c | 4 + .../cpu/nnacl/infer/slice_infer.c | 55 +- .../cpu/nnacl/infer/space_to_batch_infer.c | 2 +- .../cpu/nnacl/infer/space_to_batch_nd_infer.c | 2 +- .../cpu/nnacl/infer/split_infer.c | 11 +- .../cpu/nnacl/infer/squeeze_infer.c | 2 +- .../cpu/nnacl/infer/stack_infer.c | 4 +- .../nnacl/infer/strided_slice_grad_infer.c | 20 +- .../cpu/nnacl/infer/strided_slice_infer.c | 52 +- .../cpu/nnacl/infer/tile_infer.c | 20 +- .../cpu/nnacl/infer/transpose_infer.c | 8 +- .../cpu/nnacl/infer/uniform_real_infer.c | 2 +- .../nnacl/infer/unsorted_segment_sum_infer.c | 2 +- .../cpu/nnacl/infer/unsqueeze_infer.c | 11 +- .../cpu/nnacl/infer/unstack_infer.c | 6 +- .../cpu/nnacl/infer/where_infer.c | 11 +- .../cpu/nnacl/int8/arg_min_max_int8.c | 52 +- .../cpu/nnacl/int8/arithmetic_self_int8.c | 2 +- .../cpu/nnacl/int8/conv1x1_int8.c | 5 +- .../cpu/nnacl/int8/conv1x1_int8.h | 5 +- .../cpu/nnacl/int8/conv3x3_int8.c | 6 +- .../cpu/nnacl/int8/deconv_int8.c | 10 +- .../cpu/nnacl/int8/fixed_point.c | 4 +- .../cpu/nnacl/int8/hswish_int8.c | 2 +- .../cpu/nnacl/int8/matmul_int8.c | 22 +- .../cpu/nnacl/int8/matmul_int8.h | 12 +- .../kernel_compiler/cpu/nnacl/int8/mul_int8.c | 14 +- .../kernel_compiler/cpu/nnacl/int8/mul_int8.h | 7 +- .../cpu/nnacl/int8/pack_int8.c | 3 +- .../cpu/nnacl/int8/pack_int8.h | 2 +- .../kernel_compiler/cpu/nnacl/int8/pad_int8.c | 6 +- .../cpu/nnacl/int8/quant_dtype_cast_int8.c | 2 +- .../cpu/nnacl/int8/resize_int8.c | 8 +- .../cpu/nnacl/int8/scale_int8.c | 4 +- .../kernel_compiler/cpu/nnacl/int8/sub_int8.c | 8 +- .../cpu/nnacl/int8/transpose_int8.c | 8 +- .../cpu/nnacl/int8/unsqueeze_int8.c | 2 +- .../cpu/nnacl/matmul_parameter.h | 13 +- .../kernel_compiler/cpu/nnacl/op_base.h | 2 + .../kernel_compiler/cpu/nnacl/pad_parameter.h | 8 +- .../kernel_compiler/cpu/ps/pserver_kernel.cc | 5 + .../cpu/pyfunc/py_func_cpu_kernel.cc | 3 +- .../cpu/searchsorted_cpu_kernel.cc | 14 +- .../cpu/searchsorted_cpu_kernel.h | 4 +- .../kernel_compiler/cpu/sgd_cpu_kernel.cc | 18 +- .../kernel_compiler/cpu/sgd_cpu_kernel.h | 4 +- .../kernel_compiler/cpu/sort_cpu_kernel.cc | 6 +- .../kernel_compiler/cpu/split_cpu_kernel.cc | 3 +- .../cpu/transpose_cpu_kernel.cc | 14 +- .../kernel_compiler/cpu/unpack_cpu_kernel.cc | 12 +- .../gpu/arrays/array_reduce_gpu_kernel.h | 2 +- .../gpu/arrays/cast_gpu_kernel.h | 16 +- .../gpu/arrays/concatv2_gpu_kernel.h | 22 +- .../gpu/arrays/dynamic_range_gpu_kernel.h | 2 +- .../gpu/arrays/dynamic_shape_gpu_kernel.h | 4 +- .../gpu/arrays/one_hot_gpu_kernel.h | 6 +- .../gpu/arrays/slice_gpu_kernel.h | 2 +- .../gpu/arrays/slice_grad_gpu_kernel.h | 4 +- .../gpu/arrays/strided_slice_gpu_common.h | 18 +- .../gpu/arrays/strided_slice_gpu_kernel.h | 4 +- .../backend/kernel_compiler/gpu/gpu_kernel.h | 13 + .../gpu/nn/batch_norm_gpu_kernel.h | 10 +- .../gpu/nn/batch_norm_grad_gpu_kernel.h | 10 +- .../gpu/nn/bias_add_grad_gpu_kenel.h | 4 +- .../gpu/nn/conv2d_gpu_kernel.h | 5 +- .../gpu/nn/conv2d_grad_filter_gpu_kernel.h | 7 +- .../gpu/nn/conv2d_grad_input_gpu_kernel.h | 7 +- .../gpu/nn/conv3d_gpu_kernel.h | 7 +- .../gpu/nn/conv3d_grad_filter_gpu_kernel.h | 7 +- .../gpu/nn/conv3d_grad_input_gpu_kernel.h | 5 +- .../gpu/nn/conv3d_transpose_gpu_kernel.h | 7 +- .../gpu/nn/instance_norm_gpu_kernel.h | 5 +- .../gpu/nn/instance_norm_grad_gpu_kernel.h | 5 +- .../gpu/nn/l2normalize_gpu_kernel.h | 4 +- .../gpu/nn/l2normalize_grad_gpu_kernel.h | 8 +- .../kernel_compiler/gpu/nn/lstm_gpu_kernel.h | 2 +- .../gpu/nn/lstm_grad_data_gpu_kernel.h | 2 +- .../gpu/nn/nll_loss_gpu_kernel.h | 2 +- .../gpu/other/assign_gpu_kernel.cc | 3 + .../neighbor_list_update_new_kernel.cc | 2 +- .../kernel_compiler/hccl/hccl_kernel.cc | 40 +- .../kernel_compiler/hccl/hccl_kernel.h | 4 +- .../hccl/hccl_kernel_metadata.cc | 10 +- .../hccl/hcom_all_broadcast.cc | 1 + .../kernel_compiler/hccl/hcom_all_gather.cc | 22 +- .../kernel_compiler/hccl/hcom_all_gather.h | 1 - .../kernel_compiler/hccl/hcom_all_reduce.cc | 11 +- .../hccl/hcom_all_reduce_scatter.cc | 22 +- .../hccl/hcom_all_reduce_scatter.h | 1 - .../kernel_compiler/hccl/hcom_receive.cc | 20 +- .../backend/kernel_compiler/hccl/hcom_send.cc | 21 +- .../backend/kernel_compiler/hccl/hcom_util.cc | 53 +- .../backend/kernel_compiler/hccl/hcom_util.h | 6 +- .../kernel_compiler/kash/kernel_pack.cc | 10 + .../backend/kernel_compiler/kernel_query.cc | 6 + .../tbe/tbe_dynaminc_shape_util.cc | 4 +- .../tbe/tbe_json/tbe_json_creator.cc | 6 +- .../tbe/tbe_json/tbe_json_creator.h | 2 + .../enhancer/concat_outputs_for_all_gather.cc | 2 +- .../insert_tensor_move_for_hccl_op.cc | 30 +- .../enhancer/insert_tensor_move_for_hccl_op.h | 2 +- .../change_axis_of_reduce_kernel.cc | 15 + .../ascend/mindir/all_to_all_unify_mindir.cc | 2 +- .../ccsrc/backend/optimizer/common/helper.cc | 14 +- .../backend/optimizer/cpu/insert_cast_cpu.cc | 38 + .../gpu/concat_outputs_for_all_gather.cc | 2 +- .../gpu/insert_format_transform_op.cc | 10 +- .../graph_kernel/add_atomic_clean.cc | 2 +- .../add_stitch_atomic_clean_gpu.cc | 2 +- .../graph_kernel/arithmetic_simplify.cc | 48 +- .../graph_kernel/graph_kernel_cse.cc | 4 +- .../graph_kernel/graph_kernel_helper.cc | 2 +- .../graph_kernel/graph_kernel_optimization.cc | 20 +- .../graph_kernel/graph_kernel_splitter.cc | 7 +- .../graph_kernel/model/lite_graph.cc | 32 +- .../optimizer/graph_kernel/model/lite_graph.h | 22 - .../optimizer/graph_kernel/model/node.h | 12 + .../optimizer/graph_kernel/model/op_node.cc | 350 +++++++- .../optimizer/graph_kernel/model/op_node.h | 163 +++- .../mem_reuse/mem_dynamic_allocator.cc | 31 +- .../mem_reuse/mem_dynamic_allocator.h | 5 +- .../optimizer/pass/communication_op_fusion.cc | 2 +- .../pass/convert_const_input_to_attr.cc | 23 +- .../convert_tuple_input_to_dynamic_input.cc | 2 +- .../optimizer/pass/optimize_dependence.cc | 3 + .../backend/session/anf_runtime_algorithm.cc | 106 +-- .../ccsrc/backend/session/ascend_session.cc | 8 +- mindspore/ccsrc/backend/session/executor.cc | 4 +- .../ccsrc/backend/session/gpu_session.cc | 8 +- .../ccsrc/backend/session/kernel_graph.cc | 9 +- .../ccsrc/backend/session/kernel_graph.h | 9 +- .../ccsrc/backend/session/session_basic.cc | 45 +- .../ccsrc/backend/session/session_basic.h | 3 +- mindspore/ccsrc/common/trans.cc | 6 +- .../ccsrc/cxx_api/graph/acl/acl_graph_impl.cc | 9 +- .../ccsrc/cxx_api/graph/acl/model_process.cc | 17 +- .../ccsrc/cxx_api/graph/acl/model_process.h | 2 +- .../cxx_api/model/acl/model_converter.cc | 4 +- .../ccsrc/cxx_api/model/acl/model_converter.h | 5 +- mindspore/ccsrc/cxx_api/model/model.cc | 8 +- .../model_converter_utils/multi_process.cc | 39 +- .../model_converter_utils/multi_process.h | 8 +- .../model_converter_utils/shared_memory.h | 4 +- mindspore/ccsrc/cxx_api/types.cc | 14 +- mindspore/ccsrc/debug/anf_ir_dump.cc | 6 +- mindspore/ccsrc/debug/anf_ir_utils.cc | 3 +- mindspore/ccsrc/debug/common.cc | 48 +- .../ccsrc/debug/data_dump/dump_json_parser.cc | 35 +- mindspore/ccsrc/debug/debug_services.cc | 128 +-- .../ccsrc/debug/debugger/debug_grpc.proto | 6 + mindspore/ccsrc/debug/debugger/debugger.cc | 68 +- mindspore/ccsrc/debug/debugger/debugger.h | 7 + mindspore/ccsrc/debug/debugger/grpc_client.cc | 15 + mindspore/ccsrc/debug/debugger/grpc_client.h | 3 + .../debugger/offline_debug/dbg_services.cc | 2 +- .../debugger/offline_debug/offline_logger.h | 18 +- .../ccsrc/debug/debugger/proto_exporter.cc | 3 +- mindspore/ccsrc/debug/dump_proto.cc | 3 +- mindspore/ccsrc/debug/env_config_parser.cc | 3 +- mindspore/ccsrc/debug/tensor_data.h | 19 +- mindspore/ccsrc/debug/trace.cc | 20 +- .../ccsrc/fl/server/consistent_hash_ring.cc | 2 + .../fl/server/distributed_count_service.cc | 1 + mindspore/ccsrc/fl/server/executor.cc | 17 +- mindspore/ccsrc/fl/server/executor.h | 12 +- mindspore/ccsrc/fl/server/iteration_timer.cc | 4 +- .../server/kernel/dense_grad_accum_kernel.h | 2 + .../ccsrc/fl/server/kernel/fed_avg_kernel.h | 8 + .../ccsrc/fl/server/kernel/optimizer_kernel.h | 2 +- .../server/kernel/round/pull_weight_kernel.cc | 2 +- .../server/kernel/round/push_weight_kernel.cc | 2 +- .../round/reconstruct_secrets_kernel.cc | 10 +- mindspore/ccsrc/fl/server/model_store.cc | 4 - .../ccsrc/fl/server/parameter_aggregator.cc | 31 +- .../ccsrc/fl/server/parameter_aggregator.h | 12 +- mindspore/ccsrc/fl/server/round.cc | 6 +- mindspore/ccsrc/fl/server/server.h | 10 +- .../ccsrc/frontend/optimizer/ad/dfunctor.cc | 58 +- mindspore/ccsrc/frontend/optimizer/irpass.cc | 25 +- mindspore/ccsrc/frontend/optimizer/irpass.h | 11 +- .../optimizer/irpass/symbol_resolver.h | 118 +-- .../optimizer/irpass/updatestate_eliminate.cc | 174 ++-- .../optimizer/irpass/updatestate_eliminate.h | 39 +- mindspore/ccsrc/frontend/optimizer/opt.cc | 62 +- mindspore/ccsrc/frontend/optimizer/opt.h | 28 +- .../rec_core/rec_generate_strategy.cc | 49 +- .../rec_core/rec_generate_strategy.h | 3 + .../parallel/graph_util/generate_graph.cc | 2 +- .../parallel/graph_util/generate_graph.h | 2 +- .../frontend/parallel/ops_info/conv2d_info.cc | 195 +++-- .../frontend/parallel/ops_info/conv2d_info.h | 9 +- .../parallel/ops_info/gatherd_info.cc | 16 + .../frontend/parallel/ops_info/gatherd_info.h | 1 + .../frontend/parallel/ops_info/ops_utils.h | 3 + .../parallel/ops_info/virtual_output_info.cc | 10 +- .../ccsrc/frontend/parallel/step_parallel.cc | 48 +- .../ccsrc/frontend/parallel/step_parallel.h | 9 + .../ccsrc/minddata/dataset/CMakeLists.txt | 2 + mindspore/ccsrc/minddata/dataset/api/audio.cc | 112 +++ .../ccsrc/minddata/dataset/api/datasets.cc | 43 +- .../dataset/audio/kernels/ir/bindings.cc | 90 ++ .../dataset/engine/ir/datasetops/bindings.cc | 22 +- .../engine/ir/datasetops/source/bindings.cc | 14 +- .../dataset/kernels/ir/image/bindings.cc | 12 + .../ccsrc/minddata/dataset/api/vision.cc | 14 + .../minddata/dataset/audio/ir/CMakeLists.txt | 2 + .../dataset/audio/ir/kernels/CMakeLists.txt | 7 + .../audio/ir/kernels/allpass_biquad_ir.cc | 9 +- .../audio/ir/kernels/allpass_biquad_ir.h | 5 +- .../audio/ir/kernels/amplitude_to_db_ir.cc | 10 +- .../dataset/audio/ir/kernels/angle_ir.cc | 3 +- .../dataset/audio/ir/kernels/angle_ir.h | 3 +- .../audio/ir/kernels/bandpass_biquad_ir.cc | 7 +- .../audio/ir/kernels/bandpass_biquad_ir.h | 4 +- .../audio/ir/kernels/bandreject_biquad_ir.cc | 6 +- .../audio/ir/kernels/bandreject_biquad_ir.h | 4 +- .../audio/ir/kernels/bass_biquad_ir.cc | 6 +- .../dataset/audio/ir/kernels/bass_biquad_ir.h | 1 + .../audio/ir/kernels/time_stretch_ir.cc | 13 +- .../minddata/dataset/audio/ir/validators.cc | 72 +- .../minddata/dataset/audio/ir/validators.h | 28 + .../dataset/audio/kernels/CMakeLists.txt | 9 +- .../audio/kernels/allpass_biquad_op.cc | 18 +- .../dataset/audio/kernels/allpass_biquad_op.h | 1 - .../audio/kernels/amplitude_to_db_op.cc | 13 +- .../audio/kernels/amplitude_to_db_op.h | 1 - .../dataset/audio/kernels/angle_op.cc | 6 +- .../minddata/dataset/audio/kernels/angle_op.h | 1 - .../dataset/audio/kernels/audio_utils.cc | 254 ++---- .../dataset/audio/kernels/audio_utils.h | 48 ++ .../audio/kernels/bandpass_biquad_op.cc | 17 +- .../audio/kernels/bandpass_biquad_op.h | 3 +- .../audio/kernels/bandreject_biquad_op.cc | 19 +- .../audio/kernels/bandreject_biquad_op.h | 1 - .../dataset/audio/kernels/bass_biquad_op.cc | 11 +- .../dataset/audio/kernels/bass_biquad_op.h | 3 +- .../dataset/audio/kernels/time_stretch_op.cc | 13 +- .../dataset/audio/kernels/time_stretch_op.h | 4 +- .../dataset/callback/py_ds_callback.cc | 12 +- .../dataset/callback/py_ds_callback.h | 12 +- .../ccsrc/minddata/dataset/core/cv_tensor.cc | 20 +- .../ccsrc/minddata/dataset/core/cv_tensor.h | 3 +- .../ccsrc/minddata/dataset/core/data_type.cc | 2 +- .../ccsrc/minddata/dataset/core/de_tensor.cc | 2 +- .../ccsrc/minddata/dataset/core/tensor.cc | 98 ++- .../ccsrc/minddata/dataset/core/tensor.h | 17 +- .../dataset/engine/cache/cache_grpc_client.cc | 2 +- .../dataset/engine/consumers/tree_consumer.cc | 2 +- .../minddata/dataset/engine/data_schema.cc | 2 +- .../dataset/engine/datasetops/rename_op.cc | 19 +- .../engine/datasetops/source/CMakeLists.txt | 1 + .../engine/datasetops/source/album_op.cc | 18 +- .../engine/datasetops/source/album_op.h | 18 +- .../engine/datasetops/source/cifar_op.cc | 2 +- .../engine/datasetops/source/cmu_arctic_op.cc | 254 ++++++ .../engine/datasetops/source/cmu_arctic_op.h | 126 +++ .../engine/datasetops/source/flickr_op.cc | 26 +- .../engine/datasetops/source/mindrecord_op.cc | 2 +- .../engine/ir/cache/dataset_cache_impl.cc | 16 +- .../engine/ir/datasetops/dataset_node.h | 3 +- .../engine/ir/datasetops/epoch_ctrl_node.cc | 4 +- .../engine/ir/datasetops/epoch_ctrl_node.h | 4 +- .../ir/datasetops/source/CMakeLists.txt | 1 + .../engine/ir/datasetops/source/album_node.cc | 2 +- .../engine/ir/datasetops/source/album_node.h | 2 +- .../ir/datasetops/source/cmu_arctic_node.cc | 107 +++ .../ir/datasetops/source/cmu_arctic_node.h | 76 ++ .../engine/ir/datasetops/source/mnist_node.cc | 2 +- .../engine/ir/datasetops/source/mnist_node.h | 2 +- .../ir/datasetops/source/random_node.cc | 2 +- .../engine/ir/datasetops/source/random_node.h | 2 +- .../ir/datasetops/source/tf_record_node.cc | 4 +- .../ir/datasetops/source/tf_record_node.h | 4 +- .../engine/ir/datasetops/transfer_node.cc | 3 + .../dataset/engine/opt/post/repeat_pass.cc | 2 +- .../dataset/engine/opt/post/repeat_pass.h | 2 +- .../ccsrc/minddata/dataset/engine/serdes.cc | 246 +++--- .../ccsrc/minddata/dataset/engine/serdes.h | 21 +- .../minddata/dataset/include/dataset/audio.h | 154 ++++ .../dataset/include/dataset/constants.h | 6 + .../dataset/include/dataset/datasets.h | 166 ++-- .../dataset/include/dataset/samplers.h | 3 +- .../minddata/dataset/include/dataset/vision.h | 26 +- .../dataset/kernels/image/CMakeLists.txt | 1 + .../dataset/kernels/image/adjust_gamma_op.cc | 6 +- .../minddata/dataset/kernels/image/crop_op.cc | 12 +- .../dataset/kernels/image/hwc_to_chw_op.cc | 8 +- .../dataset/kernels/image/image_utils.cc | 77 +- .../dataset/kernels/image/image_utils.h | 10 + .../kernels/image/lite_cv/image_process.cc | 6 +- .../kernels/image/lite_cv/warp_affine.cc | 4 +- .../dataset/kernels/image/posterize_op.cc | 3 +- .../dataset/kernels/image/random_color_op.cc | 2 +- .../image/random_crop_and_resize_op.cc | 12 +- .../dataset/kernels/image/random_crop_op.cc | 12 +- .../dataset/kernels/image/resize_op.cc | 12 +- .../dataset/kernels/image/sharpness_op.cc | 2 +- ..._dvpp_decode_random_crop_resize_jpeg_op.cc | 3 +- .../soft_dvpp_decode_resize_jpeg_op.cc | 3 +- .../dataset/kernels/image/solarize_op.cc | 2 +- .../dataset/kernels/image/uniform_aug_op.h | 2 +- .../dataset/kernels/ir/data/transforms_ir.cc | 21 + .../dataset/kernels/ir/data/transforms_ir.h | 13 +- .../minddata/dataset/kernels/ir/validators.cc | 5 + .../minddata/dataset/kernels/ir/validators.h | 3 + .../dataset/kernels/ir/vision/CMakeLists.txt | 1 + .../kernels/ir/vision/adjust_gamma_ir.cc | 2 + .../dataset/kernels/ir/vision/affine_ir.cc | 12 +- .../kernels/ir/vision/auto_contrast_ir.cc | 4 +- .../kernels/ir/vision/center_crop_ir.cc | 2 +- .../dataset/kernels/ir/vision/crop_ir.cc | 15 + .../dataset/kernels/ir/vision/crop_ir.h | 4 + .../kernels/ir/vision/cutmix_batch_ir.cc | 6 +- .../dataset/kernels/ir/vision/cutout_ir.cc | 4 +- .../dataset/kernels/ir/vision/decode_ir.cc | 2 +- .../kernels/ir/vision/gaussian_blur_ir.cc | 4 +- .../kernels/ir/vision/mixup_batch_ir.cc | 2 +- .../kernels/ir/vision/normalize_pad_ir.cc | 6 +- .../dataset/kernels/ir/vision/pad_ir.cc | 6 +- .../kernels/ir/vision/random_affine_ir.cc | 13 +- .../ir/vision/random_color_adjust_ir.cc | 9 +- .../kernels/ir/vision/random_color_ir.cc | 2 +- .../ir/vision/random_crop_decode_resize_ir.cc | 10 +- .../kernels/ir/vision/random_crop_ir.cc | 10 +- .../ir/vision/random_crop_with_bbox_ir.cc | 10 +- .../ir/vision/random_horizontal_flip_ir.cc | 2 +- .../random_horizontal_flip_with_bbox_ir.cc | 2 +- .../kernels/ir/vision/random_posterize_ir.cc | 2 +- .../kernels/ir/vision/random_resize_ir.cc | 2 +- .../ir/vision/random_resize_with_bbox_ir.cc | 2 +- .../ir/vision/random_resized_crop_ir.cc | 10 +- .../random_resized_crop_with_bbox_ir.cc | 10 +- .../kernels/ir/vision/random_rotation_ir.cc | 10 +- .../kernels/ir/vision/random_sharpness_ir.cc | 2 +- .../kernels/ir/vision/random_solarize_ir.cc | 4 +- .../ir/vision/random_vertical_flip_ir.cc | 2 +- .../random_vertical_flip_with_bbox_ir.cc | 2 +- .../dataset/kernels/ir/vision/rescale_ir.cc | 4 +- .../dataset/kernels/ir/vision/resize_ir.cc | 4 +- .../ir/vision/resize_preserve_ar_ir.cc | 6 +- .../kernels/ir/vision/resize_with_bbox_ir.cc | 4 +- .../kernels/ir/vision/rgb_to_bgr_ir.cc | 5 + .../dataset/kernels/ir/vision/rgb_to_bgr_ir.h | 2 + .../kernels/ir/vision/rgb_to_gray_ir.cc | 6 + .../kernels/ir/vision/rgb_to_gray_ir.h | 2 + .../kernels/ir/vision/rgba_to_bgr_ir.cc | 1 - .../dataset/kernels/ir/vision/rotate_ir.cc | 12 +- .../kernels/ir/vision/slice_patches_ir.cc | 12 + .../kernels/ir/vision/slice_patches_ir.h | 2 + ...tdvpp_decode_random_crop_resize_jpeg_ir.cc | 10 +- .../vision/softdvpp_decode_resize_jpeg_ir.cc | 4 +- .../kernels/ir/vision/vertical_flip_ir.cc | 6 + .../kernels/ir/vision/vertical_flip_ir.h | 2 + .../minddata/dataset/kernels/tensor_op.h | 8 + .../dataset/text/ir/kernels/text_ir.cc | 7 + .../dataset/text/ir/kernels/text_ir.h | 2 + .../mindrecord/io/shard_index_generator.cc | 2 +- .../minddata/mindrecord/io/shard_reader.cc | 4 +- mindspore/ccsrc/pipeline/jit/action.cc | 47 +- .../pipeline/jit/parse/function_block.cc | 15 +- .../ccsrc/pipeline/jit/parse/function_block.h | 2 +- mindspore/ccsrc/pipeline/jit/parse/parse.cc | 267 ++++-- mindspore/ccsrc/pipeline/jit/parse/parse.h | 148 ++-- mindspore/ccsrc/pipeline/jit/parse/resolve.cc | 16 +- mindspore/ccsrc/pipeline/jit/parse/resolve.h | 2 +- mindspore/ccsrc/pipeline/jit/pass.cc | 59 +- mindspore/ccsrc/pipeline/jit/pipeline.cc | 5 +- mindspore/ccsrc/pipeline/jit/resource.h | 4 + .../jit/static_analysis/order_enforce.cc | 12 +- .../pipeline/pynative/pynative_execute.cc | 1 + mindspore/ccsrc/profiler/device/data_saver.cc | 13 +- mindspore/ccsrc/profiler/device/data_saver.h | 9 + .../profiler/device/gpu/gpu_data_saver.cc | 4 + .../profiler/device/gpu/gpu_profiling.cc | 4 + .../ps/core/communicator/http_communicator.cc | 8 +- .../ps/core/communicator/tcp_communicator.cc | 16 +- .../ps/core/communicator/tcp_communicator.h | 2 +- mindspore/ccsrc/ps/optimizer_info.cc | 47 +- mindspore/ccsrc/ps/optimizer_info_builder.cc | 3 + .../ccsrc/ps/ps_cache/ps_cache_manager.cc | 15 +- .../ps/ps_cache/ps_data/ps_data_prefetch.cc | 1 + mindspore/ccsrc/ps/ps_context.cc | 3 +- mindspore/ccsrc/ps/ps_context.h | 7 +- .../ccsrc/pybind_api/ir/param_info_py.cc | 1 + mindspore/ccsrc/runtime/device/CMakeLists.txt | 2 +- .../runtime/device/ascend/ascend_bucket.cc | 4 +- .../device/ascend/ascend_device_address.cc | 3 + .../runtime/device/ascend/ascend_event.cc | 4 + .../device/ascend/ascend_kernel_runtime.cc | 59 +- .../device/ascend/ascend_stream_assign.cc | 36 +- .../device/ascend/ascend_stream_assign.h | 3 +- .../ascend/executor/ai_core_dynamic_kernel.cc | 19 +- .../ascend/executor/ai_cpu_dynamic_kernel.cc | 2 +- .../executor/tiling/op_tiling_adapter.cc | 14 +- .../executor/tiling/op_tiling_adapter.h | 4 +- .../device/ascend/kernel_select_ascend.cc | 2 +- .../profiling/profiling_callback_register.h | 2 +- .../ascend/profiling/profiling_manager.cc | 1 - .../ascend/profiling/profiling_manager.h | 2 +- .../runtime/device/cpu/kernel_select_cpu.cc | 74 +- .../runtime/device/cpu/kernel_select_cpu.h | 2 + .../ccsrc/runtime/device/gpu/blocking_queue.h | 2 +- .../runtime/device/gpu/cuda_env_checker.cc | 4 + .../device/gpu/distribution/mpi_wrapper.cc | 6 +- .../runtime/device/gpu/gpu_buffer_mgr.cc | 12 +- .../runtime/device/gpu/gpu_device_address.cc | 1 + .../runtime/device/gpu/gpu_kernel_build.cc | 4 +- .../runtime/device/gpu/gpu_kernel_runtime.cc | 54 +- .../runtime/device/gpu/gpu_stream_assign.cc | 5 + .../runtime/device/gpu/kernel_info_setter.cc | 5 + .../ccsrc/runtime/device/kernel_runtime.cc | 10 +- .../runtime/framework/actor/debug_actor.cc | 1 - .../runtime/framework/actor/gather_actor.cc | 10 +- .../runtime/framework/actor/gather_actor.h | 10 +- .../runtime/framework/actor/kernel_actor.cc | 2 +- .../runtime/framework/actor/switch_actor.cc | 10 +- .../runtime/framework/actor/switch_actor.h | 12 +- .../runtime/framework/control_node_parser.cc | 2 +- .../ccsrc/runtime/framework/graph_compiler.cc | 13 +- .../runtime/framework/graph_scheduler.cc | 65 +- .../ccsrc/runtime/framework/graph_scheduler.h | 30 +- .../hardware/cpu/cpu_device_context.cc | 8 + .../runtime/hardware/cpu/cpu_device_context.h | 2 + .../runtime/hardware/cpu/cpu_memory_pool.cc | 4 +- .../hardware/gpu/gpu_device_context.cc | 9 - .../runtime/hccl_adapter/hccl_adapter.cc | 95 ++- .../ccsrc/runtime/hccl_adapter/hccl_adapter.h | 17 +- .../runtime/hccl_adapter/plugin/hccl_plugin.h | 6 + .../transform/express_ir/mindir_exporter.cc | 71 +- .../transform/express_ir/onnx_exporter.cc | 660 +++++++++++++-- mindspore/ccsrc/transform/graph_ir/convert.h | 9 +- .../ccsrc/utils/context/graph_kernel_flags.cc | 2 + .../ccsrc/utils/context/graph_kernel_flags.h | 7 + mindspore/ccsrc/utils/utils.h | 8 + mindspore/ccsrc/vm/transform.cc | 7 + mindspore/common/parameter.py | 14 +- mindspore/common/seed.py | 4 +- mindspore/common/tensor.py | 41 +- mindspore/context.py | 40 +- mindspore/core/abstract/abstract_value.cc | 8 +- mindspore/core/abstract/analysis_context.cc | 27 +- mindspore/core/abstract/analysis_context.h | 10 +- mindspore/core/abstract/prim_arrays.cc | 2 +- mindspore/core/abstract/prim_structures.cc | 7 +- .../core/abstract/primitive_infer_map.cc | 12 +- mindspore/core/base/core_ops.h | 10 + mindspore/core/ir/param_info.h | 5 + .../core/load_mindir/anf_model_parser.cc | 162 +++- mindspore/core/load_mindir/anf_model_parser.h | 2 + mindspore/core/load_mindir/load_model.cc | 4 +- mindspore/core/mindrt/src/actor/actormgr.cc | 30 +- mindspore/core/mindrt/src/actor/actormgr.h | 24 +- .../mindrt/src/thread/actor_threadpool.cc | 57 +- .../core/mindrt/src/thread/actor_threadpool.h | 8 +- .../core/mindrt/src/thread/core_affinity.cc | 20 +- .../core/mindrt/src/thread/core_affinity.h | 2 + mindspore/core/mindrt/src/thread/threadlog.h | 9 + .../core/mindrt/src/thread/threadpool.cc | 42 +- mindspore/core/mindrt/src/thread/threadpool.h | 9 +- mindspore/core/ops/apply_momentum.cc | 3 + mindspore/core/ops/arg_min.cc | 1 + mindspore/core/ops/asin.cc | 1 + mindspore/core/ops/assert.cc | 3 + mindspore/core/ops/batch_to_space_nd.cc | 2 +- mindspore/core/ops/batch_to_space_nd.h | 2 +- mindspore/core/ops/conv2d.cc | 3 + mindspore/core/ops/cos.cc | 2 +- .../core/ops/fake_quant_with_min_max_vars.cc | 2 +- mindspore/core/ops/grad/hshrink_grad.h | 2 +- .../core/ops/grad/soft_margin_loss_grad.h | 2 +- mindspore/core/ops/hshrink.h | 2 +- mindspore/core/ops/logical_not.cc | 1 + mindspore/core/ops/lrn.cc | 3 +- mindspore/core/ops/max_pool.cc | 3 +- mindspore/core/ops/ones_like.cc | 2 + mindspore/core/ops/pack.cc | 1 + mindspore/core/ops/rank.cc | 1 + mindspore/core/ops/reduce_sum.cc | 168 +++- mindspore/core/ops/reduce_sum.h | 4 +- mindspore/core/ops/round.cc | 1 + mindspore/core/ops/scatter_nd_update.h | 2 +- mindspore/core/ops/soft_margin_loss.h | 2 +- mindspore/core/ops/space_to_batch_nd.cc | 2 +- mindspore/core/ops/space_to_batch_nd.h | 2 +- mindspore/core/ops/squeeze.cc | 2 +- mindspore/core/ops/stack.cc | 3 + mindspore/core/ops/strided_slice.cc | 63 +- mindspore/core/ops/topk.cc | 3 + mindspore/core/ops/unpack.cc | 1 + mindspore/core/ops/unsorted_segment_sum.cc | 3 + mindspore/core/ops/unstack.cc | 1 + mindspore/core/proto/mind_ir.proto | 6 + mindspore/core/utils/check_convert_utils.cc | 15 + mindspore/core/utils/check_convert_utils.h | 1 + mindspore/core/utils/log_adapter.cc | 4 +- mindspore/core/utils/parallel_node_check.cc | 2 +- mindspore/core/utils/trace_info.h | 8 + mindspore/dataset/audio/transforms.py | 215 ++++- mindspore/dataset/audio/utils.py | 2 - mindspore/dataset/audio/validators.py | 130 ++- mindspore/dataset/core/validator_helpers.py | 13 +- mindspore/dataset/engine/__init__.py | 2 +- mindspore/dataset/engine/datasets.py | 10 +- .../dataset/engine/serializer_deserializer.py | 350 +------- mindspore/dataset/engine/validators.py | 6 +- mindspore/dataset/vision/c_transforms.py | 33 +- mindspore/dataset/vision/py_transforms.py | 42 +- .../dataset/vision/py_transforms_util.py | 28 +- mindspore/dataset/vision/validators.py | 24 +- mindspore/lite/CMakeLists.txt | 44 +- mindspore/lite/OWNERS | 16 +- mindspore/lite/build_lite.sh | 2 +- .../models/densenet_train_export.py | 3 +- .../lite/examples/quick_start_cpp/build.sh | 4 +- .../lite/examples/quick_start_cpp/main.cc | 160 ++-- mindspore/lite/examples/runtime_cpp/build.sh | 8 +- mindspore/lite/examples/runtime_cpp/main.cc | 674 +++++++-------- .../train_lenet_java/prepare_and_run.sh | 4 + .../lite/include/registry/kernel_interface.h | 18 - .../lite/include/registry/register_kernel.h | 28 - mindspore/lite/micro/cmake/file_list.cmake | 1 + .../generator/component/weight_component.cc | 1 - mindspore/lite/micro/coder/graph.cc | 7 +- .../cmsis-nn/int8/conv2d_int8_coder.cc | 5 +- .../opcoders/nnacl/fp32/addn_fp32_coder.cc | 6 +- .../nnacl/fp32/batchnorm_fp32_coder.cc | 2 + .../opcoders/nnacl/fp32/biasadd_fp32_coder.cc | 1 + .../fp32/convolution_depthwise_fp32_coder.cc | 3 +- .../fp32/convolution_winograd_fp32_coder.cc | 4 +- .../nnacl/fp32/full_connection_fp32_coder.cc | 1 + .../opcoders/nnacl/fp32/gather_fp32_coder.cc | 6 +- .../nnacl/fp32/matmul_fp32_base_coder.cc | 2 + .../opcoders/nnacl/fp32/softmax_fp32_coder.cc | 3 +- .../opcoders/nnacl/int8/conv2d_int8_coder.cc | 2 +- .../int8/convolution_depthwise_int8_coder.cc | 2 +- .../opcoders/nnacl/int8/reduce_int8_coder.cc | 2 +- .../opcoders/nnacl/int8/reduce_int8_coder.h | 16 +- .../opcoders/nnacl/int8/softmax_int8_coder.cc | 5 +- mindspore/lite/micro/coder/train.cc | 4 + .../wrapper/base/optimize_handler_wrapper.c | 22 +- .../wrapper/base/optimize_handler_wrapper.h | 13 +- .../wrapper/int8/conv1x1_init_int8_wrapper.c | 5 +- .../lite/minddata/example/CMakeLists.txt | 4 +- .../lite/minddata/example/testlitecv.cpp | 20 +- mindspore/lite/minddata/wrapper/MDToDApi.cc | 4 - .../lite/minddata/wrapper/album_op_android.cc | 18 +- .../lite/minddata/wrapper/album_op_android.h | 18 +- mindspore/lite/schema/ops.fbs | 4 + mindspore/lite/src/CMakeLists.txt | 23 +- .../lite/src/common/dynamic_library_loader.cc | 4 +- .../lite/src/common/dynamic_library_loader.h | 4 +- mindspore/lite/src/common/log_adapter.h | 14 + mindspore/lite/src/common/string_util.cc | 79 +- mindspore/lite/src/common/string_util.h | 5 +- mindspore/lite/src/common/tensor_util.cc | 49 +- mindspore/lite/src/common/tensor_util.h | 6 +- mindspore/lite/src/common/utils.cc | 33 +- mindspore/lite/src/common/utils.h | 4 +- .../lite/src/cxx_api/model/model_impl.cc | 6 +- .../lite/src/cxx_api/tensor/tensor_impl.cc | 10 + .../lite/src/cxx_api/tensor/tensor_impl.h | 12 +- .../lite/src/delegate/npu/npu_delegate.cc | 8 + mindspore/lite/src/delegate/npu/npu_graph.cc | 2 +- .../lite/src/delegate/npu/npu_manager.cc | 6 +- .../lite/src/delegate/npu/op/resize_npu.cc | 1 + .../tensorrt/op/activation_tensorrt.cc | 5 + .../delegate/tensorrt/op/concate_tensorrt.cc | 6 +- .../tensorrt/op/convolution_tensorrt.cc | 15 +- .../tensorrt/op/deconvolution_tensorrt.cc | 15 +- .../tensorrt/op/elementwise_tensorrt.cc | 41 +- .../tensorrt/op/elementwise_tensorrt.h | 6 +- .../delegate/tensorrt/op/gather_tensorrt.cc | 5 + .../delegate/tensorrt/op/matmul_tensorrt.cc | 14 +- .../src/delegate/tensorrt/op/pad_tensorrt.cc | 5 + .../src/delegate/tensorrt/op/pool_tensorrt.cc | 5 + .../delegate/tensorrt/op/reduce_tensorrt.cc | 4 + .../delegate/tensorrt/op/scale_tensorrt.cc | 4 + .../delegate/tensorrt/op/shape_tensorrt.cc | 5 + .../delegate/tensorrt/op/shuffle_tensorrt.cc | 101 ++- .../delegate/tensorrt/op/shuffle_tensorrt.h | 1 + .../delegate/tensorrt/op/slice_tensorrt.cc | 15 +- .../src/delegate/tensorrt/op/slice_tensorrt.h | 2 + .../delegate/tensorrt/op/softmax_tensorrt.cc | 4 + .../src/delegate/tensorrt/op/tensorrt_op.cc | 11 + .../src/delegate/tensorrt/op/tensorrt_op.h | 2 + .../delegate/tensorrt/op/unary_tensorrt.cc | 4 + .../delegate/tensorrt/tensorrt_delegate.cc | 19 +- .../delegate/tensorrt/tensorrt_subgraph.cc | 2 + .../src/delegate/tensorrt/tensorrt_subgraph.h | 6 +- .../src/delegate/tensorrt/tensorrt_utils.cc | 2 +- .../src/delegate/tensorrt/tensorrt_utils.h | 2 +- mindspore/lite/src/huffman_decode.h | 1 - mindspore/lite/src/inner_context.cc | 24 +- mindspore/lite/src/inner_kernel.cc | 35 + mindspore/lite/src/inner_kernel.h | 47 +- mindspore/lite/src/kernel_registry.cc | 41 +- mindspore/lite/src/kernel_registry.h | 5 +- mindspore/lite/src/lite_kernel.cc | 13 +- mindspore/lite/src/lite_kernel.h | 10 +- mindspore/lite/src/lite_kernel_util.cc | 4 +- mindspore/lite/src/lite_kernel_util.h | 2 + mindspore/lite/src/lite_mindrt.cc | 341 ++++---- mindspore/lite/src/lite_mindrt.h | 6 +- mindspore/lite/src/lite_model.cc | 1 - mindspore/lite/src/lite_session.cc | 20 +- mindspore/lite/src/ops/CMakeLists.txt | 36 + .../ops/compat/v0/expand_dims_compat_v0.cc | 2 +- .../lite/src/ops/compat/v0/slice_compat_v0.cc | 2 +- .../ops/compat/v0/strided_slice_compat_v0.cc | 2 +- .../lite/src/ops/compat/v0/topk_compat_v0.cc | 2 +- mindspore/lite/src/ops/ops_def.cc | 4 + mindspore/lite/src/ops/ops_func_declare.h | 2 + mindspore/lite/src/ops/ops_utils.cc | 6 + .../lite/src/ops/populate/adder_populate.cc | 4 +- .../lite/src/ops/populate/conv2d_populate.cc | 10 +- .../src/ops/populate/deconv2d_populate.cc | 10 +- .../detection_post_process_populate.cc | 7 +- .../lite/src/ops/populate/pooling_populate.cc | 16 +- .../lite/src/ops/populate/populate_register.h | 4 + .../src/ops/populate/prior_box_populate.cc | 6 +- .../lite/src/ops/populate/split_populate.cc | 8 +- .../src/ops/populate/v0/split_populate_v0.cc | 9 +- .../lite/src/registry/register_kernel.cc | 4 - .../lite/src/registry/register_kernel_impl.h | 1 + mindspore/lite/src/registry/register_utils.cc | 25 + mindspore/lite/src/registry/register_utils.h | 59 ++ .../runtime/gpu/opencl/opencl_allocator.cc | 5 + .../src/runtime/gpu/opencl/opencl_runtime.cc | 35 +- .../src/runtime/gpu/opencl/opencl_runtime.h | 2 +- mindspore/lite/src/runtime/infer_manager.cc | 10 + .../src/runtime/kernel/arm/CMakeLists.txt | 19 +- .../src/runtime/kernel/arm/base/carry_data.cc | 9 +- .../src/runtime/kernel/arm/base/carry_data.h | 2 + .../kernel/arm/base/convolution_base.cc | 50 +- .../kernel/arm/base/convolution_base.h | 18 +- .../kernel/arm/base/group_convolution_base.cc | 1 + .../kernel/arm/base/group_convolution_base.h | 2 +- .../kernel/arm/base/quant_dtype_cast.cc | 5 +- .../runtime/kernel/arm/base/reshape_base.cc | 58 +- .../runtime/kernel/arm/base/reshape_base.h | 13 +- .../src/runtime/kernel/arm/base/slice_base.cc | 4 +- .../runtime/kernel/arm/base/softmax_base.cc | 2 + .../src/runtime/kernel/arm/base/stack_base.cc | 6 +- .../kernel/arm/control/tensorlist_reserve.cc | 6 +- .../kernel/arm/fp16/activation_fp16.cc | 2 + .../arm/fp16/arithmetic_compare_fp16.cc | 4 +- .../kernel/arm/fp16/arithmetic_fp16.cc | 6 +- .../kernel/arm/fp16/arithmetic_self_fp16.cc | 16 +- .../runtime/kernel/arm/fp16/batchnorm_fp16.cc | 6 +- .../runtime/kernel/arm/fp16/biasadd_fp16.cc | 12 +- .../src/runtime/kernel/arm/fp16/cast_fp16.cc | 26 +- .../runtime/kernel/arm/fp16/concat_fp16.cc | 5 + .../kernel/arm/fp16/convolution_1x1_fp16.cc | 91 +- .../kernel/arm/fp16/convolution_1x1_fp16.h | 10 +- .../arm/fp16/convolution_delegate_fp16.cc | 6 +- .../arm/fp16/convolution_delegate_fp16.h | 4 +- .../fp16/convolution_depthwise_3x3_fp16.cc | 50 +- .../arm/fp16/convolution_depthwise_3x3_fp16.h | 9 +- .../arm/fp16/convolution_depthwise_fp16.cc | 47 +- .../arm/fp16/convolution_depthwise_fp16.h | 9 +- .../convolution_depthwise_slidewindow_fp16.cc | 56 +- .../convolution_depthwise_slidewindow_fp16.h | 7 +- .../kernel/arm/fp16/convolution_fp16.cc | 41 +- .../kernel/arm/fp16/convolution_fp16.h | 17 +- .../arm/fp16/convolution_winograd_fp16.cc | 60 +- .../arm/fp16/convolution_winograd_fp16.h | 20 +- .../src/runtime/kernel/arm/fp16/crop_fp16.cc | 5 +- .../arm/fp16/deconvolution_depthwise_fp16.cc | 43 +- .../arm/fp16/deconvolution_depthwise_fp16.h | 7 +- .../kernel/arm/fp16/deconvolution_fp16.cc | 67 +- .../kernel/arm/fp16/deconvolution_fp16.h | 7 +- .../arm/fp16/deconvolution_winograd_fp16.cc | 60 +- .../arm/fp16/deconvolution_winograd_fp16.h | 4 +- .../kernel/arm/fp16/fullconnection_fp16.cc | 3 + .../kernel/arm/fp16/fused_batchnorm_fp16.cc | 8 +- .../runtime/kernel/arm/fp16/gather_fp16.cc | 45 +- .../src/runtime/kernel/arm/fp16/gather_fp16.h | 1 - .../kernel/arm/fp16/group_convolution_fp16.cc | 2 + .../src/runtime/kernel/arm/fp16/gru_fp16.cc | 13 +- .../kernel/arm/fp16/instance_norm_fp16.cc | 8 + .../src/runtime/kernel/arm/fp16/lstm_fp16.cc | 15 +- .../kernel/arm/fp16/matmul_base_fp16.cc | 8 + .../runtime/kernel/arm/fp16/matmul_fp16.cc | 3 + .../src/runtime/kernel/arm/fp16/pad_fp16.cc | 21 +- .../runtime/kernel/arm/fp16/pooling_fp16.cc | 3 +- .../src/runtime/kernel/arm/fp16/power_fp16.cc | 13 +- .../kernel/arm/fp16/quant_dtype_cast_fp16.cc | 16 +- .../runtime/kernel/arm/fp16/reduce_fp16.cc | 9 +- .../src/runtime/kernel/arm/fp16/scale_fp16.cc | 10 +- .../runtime/kernel/arm/fp16/softmax_fp16.cc | 14 +- .../src/runtime/kernel/arm/fp16/stack_fp16.cc | 6 +- .../kernel/arm/fp16_grad/bn_fp16_grad.cc | 28 +- .../kernel/arm/fp16_grad/dropout_fp16_grad.cc | 1 - .../arm/fp16_grad/layernorm_fp16_grad.cc | 26 +- .../kernel/arm/fp16_grad/pooling_fp16_grad.cc | 18 +- .../kernel/arm/fp16_grad/resize_fp16_grad.cc | 2 - .../kernel/arm/fp32/activation_fp32.cc | 2 + .../src/runtime/kernel/arm/fp32/adder_fp32.cc | 10 +- .../src/runtime/kernel/arm/fp32/adder_fp32.h | 2 +- .../src/runtime/kernel/arm/fp32/addn_fp32.cc | 6 +- .../kernel/arm/fp32/arithmetic_fp32.cc | 2 + .../kernel/arm/fp32/arithmetic_self_fp32.cc | 2 + .../kernel/arm/fp32/batch_to_space_fp32.cc | 2 + .../runtime/kernel/arm/fp32/batchnorm_fp32.cc | 2 + .../src/runtime/kernel/arm/fp32/bias_fp32.cc | 6 +- .../kernel/arm/fp32/broadcast_to_fp32.cc | 2 + .../src/runtime/kernel/arm/fp32/cast_fp32.cc | 2 + .../runtime/kernel/arm/fp32/concat_fp32.cc | 1 + .../kernel/arm/fp32/convolution_1x1_fp32.cc | 105 +-- .../kernel/arm/fp32/convolution_1x1_fp32.h | 11 +- .../arm/fp32/convolution_delegate_fp32.cc | 5 + .../arm/fp32/convolution_delegate_fp32.h | 4 +- .../fp32/convolution_depthwise_3x3_fp32.cc | 96 +-- .../arm/fp32/convolution_depthwise_3x3_fp32.h | 9 +- .../arm/fp32/convolution_depthwise_fp32.cc | 84 +- .../arm/fp32/convolution_depthwise_fp32.h | 10 +- .../convolution_depthwise_indirect_fp32.cc | 114 +-- .../convolution_depthwise_indirect_fp32.h | 8 +- .../convolution_depthwise_slidewindow_fp32.cc | 88 +- .../convolution_depthwise_slidewindow_fp32.h | 8 +- ...volution_depthwise_slidewindow_x86_fp32.cc | 81 +- ...nvolution_depthwise_slidewindow_x86_fp32.h | 10 +- .../kernel/arm/fp32/convolution_fp32.cc | 108 +-- .../kernel/arm/fp32/convolution_fp32.h | 18 +- .../arm/fp32/convolution_slidewindow_fp32.cc | 89 +- .../arm/fp32/convolution_slidewindow_fp32.h | 19 +- .../arm/fp32/convolution_winograd_fp32.cc | 156 ++-- .../arm/fp32/convolution_winograd_fp32.h | 21 +- .../kernel/arm/fp32/crop_and_resize_fp32.cc | 2 + .../src/runtime/kernel/arm/fp32/crop_fp32.cc | 2 + .../arm/fp32/deconvolution_depthwise_fp32.cc | 77 +- .../arm/fp32/deconvolution_depthwise_fp32.h | 9 +- .../kernel/arm/fp32/deconvolution_fp32.cc | 104 ++- .../kernel/arm/fp32/deconvolution_fp32.h | 8 +- .../arm/fp32/deconvolution_winograd_fp32.cc | 66 +- .../arm/fp32/deconvolution_winograd_fp32.h | 4 +- .../kernel/arm/fp32/depth_to_space_fp32.cc | 2 + .../arm/fp32/detection_post_process_fp32.cc | 1 + .../src/runtime/kernel/arm/fp32/elu_fp32.cc | 2 + .../kernel/arm/fp32/embedding_lookup_fp32.cc | 2 + .../src/runtime/kernel/arm/fp32/exp_fp32.cc | 2 + .../src/runtime/kernel/arm/fp32/fill_fp32.cc | 2 + .../kernel/arm/fp32/fullconnection_fp32.cc | 3 + .../kernel/arm/fp32/fused_batchnorm_fp32.cc | 7 +- .../runtime/kernel/arm/fp32/gatherNd_fp32.cc | 2 + .../runtime/kernel/arm/fp32/gather_fp32.cc | 6 +- .../src/runtime/kernel/arm/fp32/glu_fp32.cc | 5 +- .../kernel/arm/fp32/group_convolution_fp32.cc | 2 + .../src/runtime/kernel/arm/fp32/gru_fp32.cc | 2 + .../kernel/arm/fp32/instance_norm_fp32.cc | 2 + .../arm/fp32/invert_permutation_fp32.cc | 2 + .../runtime/kernel/arm/fp32/l2_norm_fp32.cc | 2 + .../kernel/arm/fp32/layer_norm_fp32.cc | 2 + .../arm/fp32/local_response_norm_fp32.cc | 6 +- .../src/runtime/kernel/arm/fp32/lstm_fp32.cc | 2 + .../runtime/kernel/arm/fp32/matmul_fp32.cc | 2 + .../kernel/arm/fp32/matmul_fp32_base.cc | 32 +- .../arm/fp32/non_max_suppression_fp32.cc | 13 +- .../runtime/kernel/arm/fp32/nonzero_fp32.cc | 2 + .../src/runtime/kernel/arm/fp32/pad_fp32.cc | 92 +- .../src/runtime/kernel/arm/fp32/pad_fp32.h | 6 +- .../runtime/kernel/arm/fp32/pooling_fp32.cc | 2 + .../src/runtime/kernel/arm/fp32/power_fp32.cc | 6 +- .../src/runtime/kernel/arm/fp32/prelu_fp32.cc | 2 + .../src/runtime/kernel/arm/fp32/range_fp32.cc | 2 + .../src/runtime/kernel/arm/fp32/rank_fp32.cc | 6 +- .../runtime/kernel/arm/fp32/reduce_fp32.cc | 2 + .../fp32/relative_position_attention_fp32.cc | 2 + .../runtime/kernel/arm/fp32/resize_fp32.cc | 19 +- .../runtime/kernel/arm/fp32/reverse_fp32.cc | 10 +- .../kernel/arm/fp32/reverse_sequence_fp32.cc | 2 + .../kernel/arm/fp32/roi_pooling_fp32.cc | 6 +- .../src/runtime/kernel/arm/fp32/scale_fp32.cc | 6 +- .../kernel/arm/fp32/scatter_nd_fp32.cc | 2 + .../src/runtime/kernel/arm/fp32/shape_fp32.cc | 6 +- .../src/runtime/kernel/arm/fp32/size_fp32.cc | 6 +- .../runtime/kernel/arm/fp32/softmax_fp32.cc | 2 + .../kernel/arm/fp32/space_to_batch_fp32.cc | 2 + .../kernel/arm/fp32/space_to_depth_fp32.cc | 2 + .../kernel/arm/fp32/sparse_to_dense_fp32.cc | 9 +- .../runtime/kernel/arm/fp32/splice_fp32.cc | 6 +- .../src/runtime/kernel/arm/fp32/topk_fp32.cc | 5 +- .../runtime/kernel/arm/fp32/transpose_fp32.cc | 4 +- .../runtime/kernel/arm/fp32/transpose_fp32.h | 2 +- .../kernel/arm/fp32/uniform_real_fp32.cc | 21 +- .../runtime/kernel/arm/fp32/unique_fp32.cc | 6 +- .../runtime/kernel/arm/fp32/unstack_fp32.cc | 6 +- .../src/runtime/kernel/arm/fp32/where_fp32.cc | 12 +- .../runtime/kernel/arm/fp32/zeroslike_fp32.cc | 6 +- .../runtime/kernel/arm/int8/arithmetic_int8.h | 2 +- .../runtime/kernel/arm/int8/batchnorm_int8.cc | 8 +- .../runtime/kernel/arm/int8/concat_int8.cc | 21 +- .../src/runtime/kernel/arm/int8/concat_int8.h | 2 +- .../kernel/arm/int8/convolution_1x1_int8.h | 2 +- .../kernel/arm/int8/convolution_3x3_int8.cc | 4 +- .../kernel/arm/int8/convolution_3x3_int8.h | 4 +- .../int8/convolution_depthwise_3x3_int8.cc | 13 +- .../arm/int8/convolution_depthwise_3x3_int8.h | 2 +- .../arm/int8/convolution_depthwise_int8.cc | 2 +- .../arm/int8/convolution_depthwise_int8.h | 2 +- .../convolution_depthwise_slidewindow_int8.cc | 37 +- .../convolution_depthwise_slidewindow_int8.h | 2 +- .../kernel/arm/int8/convolution_int8.cc | 8 +- .../kernel/arm/int8/convolution_int8.h | 2 +- .../src/runtime/kernel/arm/int8/crop_int8.cc | 9 +- .../src/runtime/kernel/arm/int8/crop_int8.h | 2 +- .../arm/int8/deconvolution_depthwise_int8.h | 2 +- .../kernel/arm/int8/deconvolution_int8.h | 2 +- .../runtime/kernel/arm/int8/gatherNd_int8.cc | 10 +- .../runtime/kernel/arm/int8/gatherNd_int8.h | 2 +- .../kernel/arm/int8/group_convolution_int8.cc | 4 +- .../src/runtime/kernel/arm/int8/hswish_int8.h | 2 +- .../runtime/kernel/arm/int8/leaky_relu_int8.h | 2 +- .../src/runtime/kernel/arm/int8/mul_int8.cc | 23 +- .../src/runtime/kernel/arm/int8/mul_int8.h | 4 +- .../runtime/kernel/arm/int8/opt_op_handler.cc | 13 +- .../runtime/kernel/arm/int8/opt_op_handler.h | 21 +- .../src/runtime/kernel/arm/int8/pad_int8.cc | 5 +- .../src/runtime/kernel/arm/int8/reduce_int8.h | 2 +- .../src/runtime/kernel/arm/int8/relux_int8.h | 2 +- .../runtime/kernel/arm/int8/reshape_int8.cc | 12 +- .../runtime/kernel/arm/int8/reshape_int8.h | 2 +- .../runtime/kernel/arm/int8/resize_int8.cc | 12 + .../src/runtime/kernel/arm/int8/resize_int8.h | 4 +- .../src/runtime/kernel/arm/int8/split_int8.cc | 4 +- .../runtime/kernel/arm/int8/squeeze_int8.cc | 11 +- .../runtime/kernel/arm/int8/squeeze_int8.h | 2 +- .../src/runtime/kernel/arm/int8/tanh_int8.h | 2 +- .../runtime/kernel/arm/int8/transpose_int8.cc | 2 +- .../runtime/kernel/arm/int8/transpose_int8.h | 3 +- .../src/runtime/kernel/opencl/cl/pooling2d.cl | 97 +-- .../kernel/opencl/kernel/activation.cc | 42 +- .../runtime/kernel/opencl/kernel/activation.h | 2 +- .../runtime/kernel/opencl/kernel/argminmax.cc | 71 +- .../runtime/kernel/opencl/kernel/argminmax.h | 2 +- .../kernel/opencl/kernel/arithmetic.cc | 69 +- .../runtime/kernel/opencl/kernel/arithmetic.h | 2 +- .../kernel/opencl/kernel/arithmetic_self.cc | 22 +- .../kernel/opencl/kernel/arithmetic_self.h | 8 +- .../kernel/opencl/kernel/batch_to_space_nd.cc | 47 +- .../kernel/opencl/kernel/batch_to_space_nd.h | 2 +- .../runtime/kernel/opencl/kernel/batchnorm.cc | 130 ++- .../runtime/kernel/opencl/kernel/batchnorm.h | 4 +- .../src/runtime/kernel/opencl/kernel/cast.cc | 32 +- .../src/runtime/kernel/opencl/kernel/cast.h | 2 +- .../runtime/kernel/opencl/kernel/concat.cc | 60 +- .../src/runtime/kernel/opencl/kernel/concat.h | 2 +- .../runtime/kernel/opencl/kernel/conv2d.cc | 114 ++- .../src/runtime/kernel/opencl/kernel/conv2d.h | 6 +- .../kernel/opencl/kernel/conv2d_transpose.cc | 93 +- .../kernel/opencl/kernel/conv2d_transpose.h | 2 +- .../kernel/opencl/kernel/depthwise_conv2d.cc | 79 +- .../kernel/opencl/kernel/depthwise_conv2d.h | 2 +- .../src/runtime/kernel/opencl/kernel/fill.cc | 7 +- .../src/runtime/kernel/opencl/kernel/fill.h | 2 +- .../kernel/opencl/kernel/fullconnection.cc | 93 +- .../kernel/opencl/kernel/fullconnection.h | 2 +- .../kernel/opencl/kernel/fusion_eltwise.cc | 62 +- .../kernel/opencl/kernel/fusion_eltwise.h | 2 +- .../runtime/kernel/opencl/kernel/gather.cc | 81 +- .../src/runtime/kernel/opencl/kernel/gather.h | 2 +- .../opencl/kernel/int8/arithmetic_int8.cc | 79 +- .../opencl/kernel/int8/arithmetic_int8.h | 2 +- .../kernel/opencl/kernel/layer_norm.cc | 120 ++- .../runtime/kernel/opencl/kernel/layer_norm.h | 2 +- .../runtime/kernel/opencl/kernel/matmul.cc | 81 +- .../src/runtime/kernel/opencl/kernel/matmul.h | 4 +- .../runtime/kernel/opencl/kernel/one_hot.cc | 60 +- .../runtime/kernel/opencl/kernel/one_hot.h | 2 +- .../src/runtime/kernel/opencl/kernel/pad.cc | 48 +- .../src/runtime/kernel/opencl/kernel/pad.h | 2 +- .../runtime/kernel/opencl/kernel/pooling2d.cc | 120 ++- .../runtime/kernel/opencl/kernel/pooling2d.h | 11 +- .../src/runtime/kernel/opencl/kernel/power.cc | 55 +- .../src/runtime/kernel/opencl/kernel/power.h | 2 +- .../src/runtime/kernel/opencl/kernel/prelu.cc | 56 +- .../src/runtime/kernel/opencl/kernel/prelu.h | 2 +- .../runtime/kernel/opencl/kernel/reduce.cc | 38 +- .../src/runtime/kernel/opencl/kernel/reduce.h | 2 +- .../runtime/kernel/opencl/kernel/reshape.cc | 42 +- .../runtime/kernel/opencl/kernel/reshape.h | 2 +- .../runtime/kernel/opencl/kernel/resize.cc | 45 +- .../src/runtime/kernel/opencl/kernel/resize.h | 2 +- .../src/runtime/kernel/opencl/kernel/scale.cc | 100 ++- .../src/runtime/kernel/opencl/kernel/scale.h | 2 +- .../runtime/kernel/opencl/kernel/softmax.cc | 35 +- .../runtime/kernel/opencl/kernel/softmax.h | 2 +- .../kernel/opencl/kernel/space_to_batch_nd.cc | 47 +- .../kernel/opencl/kernel/space_to_batch_nd.h | 2 +- .../kernel/opencl/kernel/space_to_depth.cc | 50 +- .../kernel/opencl/kernel/space_to_depth.h | 2 +- .../kernel/opencl/kernel/sparse_to_dense.cc | 86 +- .../kernel/opencl/kernel/sparse_to_dense.h | 2 +- .../src/runtime/kernel/opencl/kernel/split.cc | 81 +- .../src/runtime/kernel/opencl/kernel/split.h | 4 +- .../src/runtime/kernel/opencl/kernel/stack.cc | 57 +- .../src/runtime/kernel/opencl/kernel/stack.h | 2 +- .../runtime/kernel/opencl/kernel/strassen.cc | 255 ++++-- .../runtime/kernel/opencl/kernel/strassen.h | 24 +- .../kernel/opencl/kernel/strided_slice.cc | 55 +- .../kernel/opencl/kernel/strided_slice.h | 2 +- .../runtime/kernel/opencl/kernel/to_format.cc | 35 +- .../runtime/kernel/opencl/kernel/to_format.h | 2 +- .../runtime/kernel/opencl/kernel/transpose.cc | 40 +- .../runtime/kernel/opencl/kernel/transpose.h | 2 +- .../runtime/kernel/opencl/kernel/winograd.cc | 151 +++- .../runtime/kernel/opencl/kernel/winograd.h | 6 +- .../runtime/kernel/opencl/opencl_kernel.cc | 21 +- .../src/runtime/kernel/opencl/opencl_kernel.h | 4 +- .../runtime/kernel/opencl/opencl_subgraph.cc | 2 + mindspore/lite/src/runtime/runtime_pass.cc | 112 +-- mindspore/lite/src/runtime/runtime_pass.h | 14 +- mindspore/lite/src/scheduler.cc | 176 ++-- mindspore/lite/src/scheduler.h | 21 +- mindspore/lite/src/sub_graph_kernel.cc | 12 +- mindspore/lite/src/sub_graph_kernel.h | 10 +- mindspore/lite/src/tensor.cc | 4 +- mindspore/lite/src/tensor.h | 14 +- mindspore/lite/src/tensorlist.h | 4 +- mindspore/lite/src/train/train_session.cc | 114 ++- mindspore/lite/src/train/train_session.h | 3 + mindspore/lite/src/weight_decoder.cc | 22 +- mindspore/lite/src/weight_decoder.h | 2 + mindspore/lite/test/CMakeLists.txt | 3 +- .../lite/test/config/models_ms_train.cfg | 3 + mindspore/lite/test/config/models_npu.cfg | 4 + mindspore/lite/test/config/models_onnx.cfg | 5 + .../lite/test/config/models_onnx_fp16.cfg | 1 + .../lite/test/config/models_posttraining.cfg | 4 +- mindspore/lite/test/config/models_tf.cfg | 4 +- mindspore/lite/test/config/models_tf_fp16.cfg | 3 + mindspore/lite/test/runtest.sh | 3 + mindspore/lite/test/st/run_benchmark_nets.sh | 13 +- .../lite/test/st/scripts/base_functions.sh | 12 +- .../st/scripts/nnie/run_converter_nnie.sh | 4 +- .../test/st/scripts/run_benchmark_arm64.sh | 17 +- .../lite/test/st/scripts/run_benchmark_x86.sh | 22 +- .../lite/test/st/scripts/run_net_train.sh | 101 ++- .../custom_extract_features_infer_test.cc | 2 +- .../infer/custom_normalize_infer_test.cc | 2 +- .../nnacl/infer/custom_predict_infer_test.cc | 2 +- .../infer/hashtable_lookup_infer_test.cc | 2 +- .../nnacl/infer/lsh_projection_infer_test.cc | 2 +- .../ut/nnacl/infer/skip_gram_infer_test.cc | 2 +- .../infer/tensorlist_fromtensor_infer_test.cc | 2 +- .../infer/tensorlist_getitem_infer_test.cc | 2 +- .../infer/tensorlist_reserve_infer_test.cc | 2 +- .../infer/tensorlist_setitem_infer_test.cc | 2 +- .../infer/tensorlist_stack_infer_test.cc | 2 +- .../runtime/kernel/arm/fp32/skip_gram_fp32.cc | 2 +- .../runtime/kernel/arm/string/normalize.cc | 2 +- .../test/ut/src/runtime/runtime_pass_tests.cc | 62 +- .../lite/tools/benchmark/benchmark_base.cc | 4 +- .../lite/tools/benchmark_train/net_train.cc | 6 +- mindspore/lite/tools/common/flag_parser.cc | 4 + mindspore/lite/tools/common/flag_parser.h | 5 +- .../lite/tools/common/func_graph_subgraph.cc | 4 +- mindspore/lite/tools/common/graph_util.cc | 24 + mindspore/lite/tools/common/graph_util.h | 2 + mindspore/lite/tools/common/node_util.cc | 155 +--- mindspore/lite/tools/common/node_util.h | 2 + mindspore/lite/tools/converter/CMakeLists.txt | 6 +- .../lite/tools/converter/anf_transform.cc | 3 - .../lite/tools/converter/converter_flags.cc | 10 +- .../lite/tools/converter/export_model.cc | 2 +- .../converter/import/mindspore_importer.cc | 98 +-- .../converter/import/mindspore_importer.h | 3 +- .../graph/batchnorm_convert_scale_pass.cc | 41 +- .../parser/caffe/caffe_model_parser.cc | 111 +-- .../parser/caffe/caffe_model_parser.h | 4 - .../converter/parser/conv1d_inout_adjust.cc | 2 + .../parser/onnx/onnx_conv_transpose_parser.cc | 37 +- .../parser/onnx/onnx_inputs_adjust.cc | 1 + .../parser/onnx/onnx_model_parser.cc | 142 +--- .../converter/parser/onnx/onnx_model_parser.h | 5 +- .../converter/parser/onnx/onnx_pad_adjust.cc | 4 +- .../tools/converter/parser/parser_utils.cc | 300 ++++--- .../tools/converter/parser/parser_utils.h | 17 +- .../converter/parser/tf/tf_model_parser.cc | 136 +-- .../converter/parser/tf/tf_model_parser.h | 7 - .../parser/tflite/tflite_model_parser.cc | 117 +-- .../parser/tflite/tflite_model_parser.h | 4 - .../tools/converter/parser/unify_format.cc | 182 +++- .../tools/converter/parser/unify_format.h | 10 +- .../quantizer/post_training_quantizer.cc | 10 +- .../tools/converter/quantizer/quant_cast.cc | 9 +- .../attention_quant_type_determiner.cc | 13 +- .../conv_quant_param_propogator.cc | 1 - ...default_quant_all_quant_type_determiner.cc | 1 - .../only_need_inputs_quant_type_determiner.cc | 1 - .../quant_helper/quant_node_helper.cc | 1 - .../converter/quantizer/weight_quantizer.cc | 2 +- .../registry/model_parser_registry.cc | 1 - .../tools/cropper/build_cropper_config.sh | 93 +- mindspore/lite/tools/cropper/cropper.cc | 4 +- .../lite/tools/dataset/cropper/build_lib.py | 6 +- .../dataset/cropper/cropper_configure.py | 8 +- .../lite/tools/optimizer/common/gllo_utils.cc | 13 +- .../format/delete_redundant_transpose.cc | 31 + .../format/delete_redundant_transpose.h | 1 + .../tools/optimizer/format/to_format_base.cc | 79 +- .../tools/optimizer/format/to_format_base.h | 8 +- .../tools/optimizer/format/to_nchw_format.cc | 24 +- .../tools/optimizer/format/to_nchw_format.h | 4 +- .../tools/optimizer/format/to_nhwc_format.cc | 23 +- .../tools/optimizer/format/to_nhwc_format.h | 4 +- .../optimizer/fusion/batchmatmul_fusion.cc | 1 - .../optimizer/fusion/conv_conv_fusion.cc | 4 + .../fusion/multi_head_attention_fusion.cc | 4 +- ...ite_rel_pos_multi_head_attention_fusion.cc | 24 +- .../tools/optimizer/graph/node_infershape.cc | 15 - .../optimizer/graph/slice_prepose_pass.cc | 4 + mindspore/nn/acc/base.py | 68 +- mindspore/nn/acc/grad_freeze.py | 1 + mindspore/nn/acc/less_batch_normalization.py | 1 + mindspore/nn/cell.py | 8 +- mindspore/nn/layer/activation.py | 47 ++ mindspore/nn/loss/__init__.py | 4 +- mindspore/nn/loss/loss.py | 53 +- mindspore/nn/wrap/grad_reducer.py | 8 - mindspore/nn/wrap/loss_scale.py | 31 +- mindspore/numpy/array_creations.py | 123 +-- mindspore/numpy/array_ops.py | 32 +- mindspore/numpy/math_ops.py | 163 ++-- mindspore/numpy/utils_const.py | 2 + mindspore/ops/_grad/grad_array_ops.py | 10 +- .../ops/_grad_experimental/grad_nn_ops.py | 25 + mindspore/ops/_op_impl/akg/ascend/__init__.py | 1 + mindspore/ops/_op_impl/cpu/__init__.py | 1 + mindspore/ops/_op_impl/tbe/__init__.py | 5 + .../ops/bprop_mindir/Identity_bprop.mindir | 12 +- mindspore/ops/bprop_mindir/ReLU_bprop.mindir | 16 +- mindspore/ops/composite/random_ops.py | 4 +- mindspore/ops/functional.py | 4 + mindspore/ops/operations/__init__.py | 50 +- mindspore/ops/operations/_grad_ops.py | 43 + mindspore/ops/operations/_thor_ops.py | 51 ++ mindspore/ops/operations/array_ops.py | 33 +- mindspore/ops/operations/inner_ops.py | 3 +- mindspore/ops/operations/math_ops.py | 84 +- mindspore/ops/operations/nn_ops.py | 99 ++- mindspore/ops/operations/other_ops.py | 3 +- mindspore/ops/operations/sponge_ops.py | 302 ------- mindspore/ops/operations/sponge_update_ops.py | 2 +- mindspore/ops/primitive.py | 28 +- .../profiler/common/exceptions/error_code.py | 4 +- .../profiler/common/exceptions/exceptions.py | 1 - .../profiler/parser/aicpu_data_parser.py | 11 +- mindspore/profiler/parser/container.py | 5 + mindspore/profiler/parser/flops_parser.py | 14 +- mindspore/profiler/parser/hccl_parser.py | 25 +- mindspore/profiler/parser/hwts_log_parser.py | 5 +- mindspore/profiler/parser/integrator.py | 3 + .../profiler/parser/memory_usage_parser.py | 8 +- .../profiler/parser/minddata_analyzer.py | 2 + mindspore/profiler/parser/minddata_parser.py | 2 + .../parser/minddata_pipeline_parser.py | 4 + mindspore/profiler/parser/optime_parser.py | 11 +- .../profiler/parser/step_trace_parser.py | 4 +- mindspore/profiler/profiling.py | 83 +- mindspore/run_check/_check_version.py | 2 +- .../train/callback/_lr_scheduler_callback.py | 2 +- .../train/callback/_summary_collector.py | 4 +- mindspore/train/loss_scale_manager.py | 3 +- mindspore/train/model.py | 12 +- mindspore/train/serialization.py | 19 +- mindspore/train/train_thor/convert_utils.py | 2 +- .../official/cv/centerface/src/centerface.py | 4 +- model_zoo/official/cv/cnnctc/src/cnn_ctc.py | 6 +- .../official/cv/crnn/src/crnn_for_train.py | 3 +- .../cv/crnn_seq2seq_ocr/src/attention_ocr.py | 3 +- .../cv/crnn_seq2seq_ocr/src/seq2seq.py | 2 +- .../official/cv/ctpn/default_config.yaml | 4 +- model_zoo/official/cv/ctpn/src/ctpn.py | 4 +- .../official/cv/ctpn/src/network_define.py | 4 +- .../cv/deeptext/src/network_define.py | 4 +- .../cv/faster_rcnn/default_config.yaml | 2 +- .../cv/faster_rcnn/default_config_101.yaml | 2 +- .../cv/faster_rcnn/default_config_152.yaml | 2 +- .../cv/faster_rcnn/src/network_define.py | 4 +- .../cv/maskrcnn/src/network_define.py | 5 +- .../src/network_define.py | 3 +- .../official/cv/nasnet/src/nasnet_a_mobile.py | 3 +- model_zoo/official/cv/openpose/src/loss.py | 3 +- model_zoo/official/cv/psenet/README.md | 181 ++-- model_zoo/official/cv/psenet/README_CN.md | 207 +++-- model_zoo/official/cv/psenet/requirements.txt | 2 + .../cv/psenet/src/ETSNET/pse/Makefile | 3 +- .../cv/psenet/src/ETSNET/pse/adaptor.cpp | 2 +- .../official/cv/psenet/src/network_define.py | 4 +- model_zoo/official/cv/psenet/train.py | 2 +- model_zoo/official/cv/resnet/README.md | 87 +- model_zoo/official/cv/resnet/README_CN.md | 86 +- .../cv/resnet/scripts/run_distribute_train.sh | 4 +- .../scripts/run_distribute_train_gpu.sh | 4 +- .../official/cv/resnet/scripts/run_eval.sh | 4 +- .../cv/resnet/scripts/run_eval_gpu.sh | 4 +- .../official/cv/resnet/scripts/run_infer.sh | 4 +- .../cv/resnet/scripts/run_infer_310.sh | 2 +- .../scripts/run_parameter_server_train.sh | 8 +- .../scripts/run_parameter_server_train_gpu.sh | 8 +- .../cv/resnet/scripts/run_standalone_train.sh | 4 +- .../scripts/run_standalone_train_gpu.sh | 4 +- .../cv/resnet/src/model_utils/config.py | 4 +- model_zoo/official/cv/resnet/src/resnet.py | 10 +- model_zoo/official/cv/resnet/train.py | 59 +- .../cv/retinaface_resnet50/src/network.py | 4 +- .../official/cv/retinanet/src/retinanet.py | 3 +- model_zoo/official/cv/shufflenetv1/eval.py | 2 +- model_zoo/official/cv/shufflenetv1/export.py | 2 +- model_zoo/official/cv/shufflenetv1/train.py | 2 +- model_zoo/official/cv/ssd/src/ssd.py | 3 +- model_zoo/official/cv/unet/README.md | 6 +- model_zoo/official/cv/unet/README_CN.md | 6 +- .../official/cv/unet/preprocess_dataset.py | 2 +- model_zoo/official/cv/vgg16/README.md | 35 + model_zoo/official/cv/vgg16/README_CN.md | 34 + .../ascend310_quant_infer/run_quant_infer.sh | 1 - .../cv/warpctc/src/warpctc_for_train.py | 3 +- .../official/cv/yolov3_darknet53/src/yolo.py | 3 +- .../cv/yolov3_darknet53_quant/src/yolo.py | 3 +- .../cv/yolov3_resnet18/scripts/run_eval.sh | 2 +- .../official/cv/yolov3_resnet18/src/yolov3.py | 3 +- model_zoo/official/cv/yolov4/src/yolo.py | 7 +- model_zoo/official/cv/yolov5/README.md | 2 +- model_zoo/official/cv/yolov5/README_CN.md | 88 +- model_zoo/official/cv/yolov5/src/yolo.py | 3 +- model_zoo/official/gnn/gat/src/utils.py | 4 +- model_zoo/official/nlp/bert/README.md | 12 + model_zoo/official/nlp/bert/README_CN.md | 8 + .../nlp/bert/src/bert_for_finetune.py | 18 +- .../nlp/bert/src/bert_for_pre_training.py | 31 +- .../bert_thor/src/bert_for_pre_training.py | 31 +- model_zoo/official/nlp/cpm/src/cpm_train.py | 17 +- .../official/nlp/dgu/src/bert_for_finetune.py | 18 +- .../nlp/dgu/src/bert_for_pre_training.py | 31 +- .../nlp/emotect/src/ernie_for_finetune.py | 9 +- .../nlp/fasttext/src/fasttext_train.py | 4 +- .../gnmt_v2/src/gnmt_model/gnmt_for_train.py | 9 +- .../official/nlp/gpt/src/gpt_wrapcell.py | 9 +- model_zoo/official/nlp/gru/README.md | 108 ++- .../official/nlp/gru/default_config.yaml | 2 + .../official/nlp/gru/model_utils/config.py | 24 +- .../nlp/gru/scripts/create_dataset.sh | 1 - .../scripts/run_distribute_train_ascend.sh | 5 +- .../official/nlp/gru/src/gru_for_train.py | 54 +- model_zoo/official/nlp/gru/src/seq2seq.py | 38 +- model_zoo/official/nlp/gru/src/weight_init.py | 16 +- model_zoo/official/nlp/gru/train.py | 66 +- .../src/transformer/transformer_for_train.py | 9 +- .../official/nlp/pangu_alpha/src/dataset.py | 5 +- .../pangu_alpha/src/pangu_alpha_wrapcell.py | 17 +- .../official/nlp/pangu_alpha/src/utils.py | 12 + model_zoo/official/nlp/pangu_alpha/train.py | 146 ++-- model_zoo/official/nlp/q8bert/src/q8bert.py | 26 +- .../nlp/tinybert/src/tinybert_for_gd_td.py | 26 +- .../transformer/src/transformer_for_train.py | 26 +- model_zoo/official/recommend/ncf/src/ncf.py | 4 +- model_zoo/official/rl/dqn/README.md | 39 +- model_zoo/official/rl/dqn/README_CN.md | 8 +- model_zoo/official/rl/dqn/eval.py | 42 +- .../dqn/scripts/run_standalone_train_gpu.sh | 3 +- model_zoo/official/rl/dqn/src/agent.py | 128 +-- model_zoo/official/rl/dqn/src/config.py | 11 +- model_zoo/official/rl/dqn/src/dqn.py | 17 +- model_zoo/official/rl/dqn/train.py | 84 +- .../cv/AVA_cifar/src/network_define.py | 4 +- .../cv/AVA_hpa/src/network_define_pretrain.py | 4 +- .../cv/AVA_hpa/src/network_define_train.py | 4 +- model_zoo/research/cv/AttGAN/src/cell.py | 6 +- .../cv/FaceDetection/src/network_define.py | 6 +- model_zoo/research/cv/ICNet/README.md | 41 +- model_zoo/research/cv/ICNet/eval.py | 5 +- .../research/cv/ICNet/scripts/run_eval.sh | 8 +- .../cv/ICNet/src/model_utils/icnet.yaml | 2 +- model_zoo/research/cv/IPT/src/loss.py | 9 +- model_zoo/research/cv/IPT/src/utils.py | 4 +- .../cv/LearningToSeeInTheDark/src/myutils.py | 10 +- .../cv/MaskedFaceRecognition/model/model.py | 4 +- model_zoo/research/cv/ProtoNet/README.md | 36 +- model_zoo/research/cv/ProtoNet/eval.py | 5 +- .../scripts/run_distribution_ascend.sh | 14 +- .../research/cv/ProtoNet/src/parser_util.py | 2 +- .../cv/SRGAN/src/trainonestep/train_gan.py | 6 +- .../cv/SRGAN/src/trainonestep/train_psnr.py | 3 +- .../research/cv/STGAN/src/models/networks.py | 6 +- model_zoo/research/cv/SiamFC/readme.md | 195 +++++ .../research/cv/advanced_east/src/model.py | 4 +- model_zoo/research/cv/arcface/README_CN.md | 10 +- .../arcface/scripts/run_distribute_train.sh | 2 +- model_zoo/research/cv/arcface/train.py | 27 +- .../cv/centernet/src/centernet_pose.py | 10 +- .../cv/centernet_det/src/centernet_det.py | 14 +- .../src/centernet_det.py | 14 +- model_zoo/research/cv/dem/src/demnet.py | 3 +- .../research/cv/glore_res200/README_CN.md | 22 +- .../scripts/run_distribute_train.sh | 35 +- .../research/cv/glore_res200/src/config.py | 2 +- model_zoo/research/cv/glore_res200/train.py | 2 + model_zoo/research/cv/hardnet/README_CN.md | 4 +- .../hardnet/scripts/run_distribute_train.sh | 42 +- model_zoo/research/cv/midas/src/midas_net.py | 4 +- .../research/cv/resnext152_64x4d/README.md | 74 +- .../research/cv/resnext152_64x4d/README_CN.md | 66 +- .../scripts/run_distribute_train.sh | 3 +- .../scripts/run_standalone_train.sh | 3 +- .../research/cv/resnext152_64x4d/train.py | 5 +- .../cv/retinanet_resnet101/src/retinahead.py | 3 +- .../cv/retinanet_resnet152/src/retinahead.py | 3 +- .../research/cv/simple_baselines/README.md | 14 +- .../scripts/run_distribute_train.sh | 31 +- model_zoo/research/cv/squeezenet1_1/README.md | 11 +- model_zoo/research/cv/squeezenet1_1/eval.py | 9 +- model_zoo/research/cv/squeezenet1_1/train.py | 4 +- .../cv/ssd_ghostnet/src/ssd_ghostnet.py | 3 +- .../research/cv/ssd_mobilenetV2/src/ssd.py | 3 +- .../cv/ssd_mobilenetV2_FPNlite/src/ssd.py | 3 +- model_zoo/research/cv/ssd_resnet50/src/ssd.py | 3 +- model_zoo/research/cv/wideresnet/README_CN.md | 36 +- model_zoo/research/hpc/sponge/main.py | 10 +- model_zoo/research/hpc/sponge/src/angle.py | 46 +- model_zoo/research/hpc/sponge/src/bond.py | 44 +- model_zoo/research/hpc/sponge/src/dihedral.py | 71 +- .../hpc/sponge/src/langevin_liujian_md.py | 65 +- .../research/hpc/sponge/src/lennard_jones.py | 99 ++- .../research/hpc/sponge/src/md_information.py | 203 ++++- model_zoo/research/hpc/sponge/src/nb14.py | 48 +- .../research/hpc/sponge/src/neighbor_list.py | 40 +- .../hpc/sponge/src/particle_mesh_ewald.py | 23 +- .../research/hpc/sponge/src/simulation.py | 799 ++++++++++++++---- .../nlp/gpt2/src/gpt2_for_finetune.py | 9 +- .../src/seq2seq_model/seq2seq_for_train.py | 12 +- .../nlp/seq2seq/src/utils/optimizer.py | 3 - model_zoo/research/nlp/seq2seq/train.py | 4 +- .../research/nlp/skipgram/src/dataset.py | 2 + .../recommend/Fat-DeepFFM/src/fat_deepffm.py | 4 +- .../research/recommend/autodis/src/autodis.py | 6 +- model_zoo/utils/hccl_tools/hccl_tools.py | 10 +- .../auto_monad/test_auto_monad_mindtester.py | 3 +- tests/st/control/inner/test_002_single_for.py | 3 +- tests/st/control/inner/test_010_if_in_if.py | 3 + tests/st/control/inner/test_012_if_in_for.py | 3 + tests/st/control/inner/test_032_for_in_for.py | 12 +- .../control/inner/test_101_if_after_while.py | 2 + .../inner/test_110_if_after_if_in_if.py | 10 +- .../inner/test_121_if_after_while_in_while.py | 7 +- .../inner/test_122_if_after_while_in_for.py | 53 +- .../inner/test_330_for_after_for_in_if.py | 2 + tests/st/control/test_cont_grad.py | 251 ++++-- tests/st/dump/test_data_dump.py | 2 +- tests/st/fl/albert/src/cell_wrapper.py | 4 +- .../resnet50/test_resnet50_cifar10.py | 4 +- .../transformer/test_transformer.py | 2 +- tests/st/model_zoo_tests/yolov3/src/yolov3.py | 3 +- .../yolov3_darknet53/src/yolo.py | 7 +- .../models/bert/src/bert_for_pre_training.py | 13 +- tests/st/networks/models/bert/src/utils.py | 9 +- tests/st/ops/cpu/test_softplus_grad_op.py | 18 +- tests/st/ops/cpu/test_softplus_op.py | 34 +- tests/ut/cpp/CMakeLists.txt | 1 + tests/ut/cpp/dataset/CMakeLists.txt | 3 + .../ut/cpp/dataset/c_api_audio_a_to_q_test.cc | 392 ++++++++- .../ut/cpp/dataset/c_api_audio_r_to_z_test.cc | 74 +- .../cpp/dataset/c_api_vision_a_to_q_test.cc | 96 +++ tests/ut/cpp/dataset/cmu_arctic_test.cc | 145 ++++ tests/ut/cpp/dataset/common/bboxop_common.cc | 4 +- tests/ut/cpp/dataset/common/cvop_common.cc | 6 +- tests/ut/cpp/dataset/common/cvop_common.h | 1 + tests/ut/cpp/dataset/deserialize_test.cc | 56 +- tests/ut/cpp/dataset/execute_test.cc | 278 ++++++ tests/ut/cpp/dataset/random_color_op_test.cc | 4 +- tests/ut/cpp/dataset/rgba_to_bgr_op_test.cc | 2 +- tests/ut/cpp/dataset/rgba_to_rgb_op_test.cc | 2 +- tests/ut/cpp/dataset/tensor_test.cc | 56 +- tests/ut/cpp/runtest.sh | 2 + .../stub/dynamic_shape/dynamic_shape_stub.cc | 6 - tests/ut/cpp/stub/ge/ge_mock.cc | 2 - tests/ut/cpp/stub/ge/ge_task_launch_stub.cc | 17 +- tests/ut/python/dataset/test_adjustgamma.py | 32 +- .../ut/python/dataset/test_allpass_biquad.py | 48 +- .../ut/python/dataset/test_amplitude_to_db.py | 19 +- tests/ut/python/dataset/test_angle.py | 23 +- .../ut/python/dataset/test_bandpass_biquad.py | 25 +- .../python/dataset/test_bandreject_biquad.py | 27 +- tests/ut/python/dataset/test_bass_biquad.py | 22 +- .../python/dataset/test_datasets_cmuarctic.py | 203 +++++ tests/ut/python/dataset/test_datasets_sbd.py | 2 +- tests/ut/python/dataset/test_schema.py | 2 +- .../ut/python/dataset/test_serdes_dataset.py | 40 +- tests/ut/python/dataset/test_skip.py | 6 +- tests/ut/python/dataset/test_slice_patches.py | 52 ++ tests/ut/python/dataset/test_take.py | 2 +- tests/ut/python/dataset/test_time_stretch.py | 39 +- tests/ut/python/exec/test_train_with_lars.py | 6 +- tests/ut/python/ops/test_ops.py | 15 + tests/ut/python/optimizer/test_auto_grad.py | 109 +++ tests/ut/python/parallel/test_conv2d.py | 27 + .../python/parallel/test_conv2d_transpose.py | 34 + .../python/parallel/test_dataset_interface.py | 5 +- .../parallel/test_gather_v2_primitive.py | 4 +- tests/ut/python/parallel/test_gatherd.py | 8 + tests/ut/python/parallel/test_loss_scale.py | 9 +- tests/ut/python/parallel/test_reshape.py | 4 +- third_party/patch/icu4c/icu4c.patch01 | 4 +- third_party/patch/sqlite/sqlite.patch001 | 138 ++- version.txt | 2 +- 1432 files changed, 21434 insertions(+), 10802 deletions(-) create mode 100644 mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cmu_arctic_op.cc create mode 100644 mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cmu_arctic_op.h create mode 100644 mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cmu_arctic_node.cc create mode 100644 mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cmu_arctic_node.h create mode 100644 mindspore/lite/src/registry/register_utils.cc create mode 100644 mindspore/lite/src/registry/register_utils.h create mode 100644 model_zoo/research/cv/SiamFC/readme.md create mode 100644 tests/ut/cpp/dataset/cmu_arctic_test.cc create mode 100644 tests/ut/python/dataset/test_datasets_cmuarctic.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b61c1987f1..098b8af2383 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,9 @@ if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") -Wno-return-std-move -Wno-unused-private-field -Wno-unused-lambda-capture -Wno-sign-compare \ -Wno-overloaded-virtual -Wno-unneeded-internal-declaration -Wno-unused-variable -Wno-pessimizing-move \ -Wno-inconsistent-missing-override -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2") +elseif(ENABLE_SYM_FILE) + set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -g -ggdb -Wl,--allow-shlib-undefined \ + -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2") else() set(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O2 -Wl,--allow-shlib-undefined \ -DHALF_ENABLE_CPP11_USER_LITERALS=0 -D_FORTIFY_SOURCE=2") diff --git a/build.sh b/build.sh index 17dc8f19629..83ea081fca1 100755 --- a/build.sh +++ b/build.sh @@ -27,7 +27,7 @@ usage() echo " [-P on|off] [-z [on|off]] [-M on|off] [-V 10.1|11.1|310|910] [-I arm64|arm32|x86_64] [-K] \\" echo " [-B on|off] [-E] [-l on|off] [-n full|lite|off] [-H on|off] \\" echo " [-A on|off] [-S on|off] [-k on|off] [-W sse|neon|avx|avx512|off] \\" - echo " [-L Tensor-RT path] \\" + echo " [-L Tensor-RT path] [-y on|off] \\" echo "" echo "Options:" echo " -d Debug mode" @@ -64,6 +64,7 @@ usage() echo " -W Enable x86_64 SSE or AVX instruction set, use [sse|neon|avx|avx512|off], default off for lite and avx for CPU" echo " -H Enable hidden" echo " -L Link and specify Tensor-RT library path, default disable Tensor-RT lib linking" + echo " -y Compile the symbol table switch and save the symbol table to the directory output" } # check value of input is 'on' or 'off' @@ -122,8 +123,9 @@ checkopts() TENSORRT_HOME="" USER_ENABLE_DUMP_IR=false USER_ENABLE_DEBUGGER=false + ENABLE_SYM_FILE="off" # Process the options - while getopts 'drvj:c:t:hb:s:a:g:p:ie:m:l:I:RP:D:zM:V:K:B:En:A:S:k:W:H:L:' opt + while getopts 'drvj:c:t:hb:s:a:g:p:ie:m:l:I:RP:D:zM:V:K:B:En:A:S:k:W:H:L:y' opt do CASE_SENSIVE_ARG=${OPTARG} OPTARG=$(echo ${OPTARG} | tr '[A-Z]' '[a-z]') @@ -140,6 +142,9 @@ checkopts() exit 1 fi ;; + y) + ENABLE_SYM_FILE="on" + ;; r) DEBUG_MODE="off" ;; @@ -442,6 +447,9 @@ build_mindspore() if [[ -n "$TRAIN_MODE" ]]; then CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_${TRAIN_MODE}=ON" fi + if [[ "X$ENABLE_SYM_FILE" = "Xon" ]]; then + CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_SYM_FILE=ON" + fi if [[ "X$ENABLE_ASAN" = "Xon" ]]; then CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_ASAN=ON" fi diff --git a/cmake/external_libs/flatbuffers.cmake b/cmake/external_libs/flatbuffers.cmake index 182632b09f1..72b68bf6446 100644 --- a/cmake/external_libs/flatbuffers.cmake +++ b/cmake/external_libs/flatbuffers.cmake @@ -1,10 +1,10 @@ if(MSVC) set(flatbuffers_CXXFLAGS "${CMAKE_CXX_FLAGS}") - set(flatbuffers_CFLAGS "${CMAKE_CXX_FLAGS}") + set(flatbuffers_CFLAGS "${CMAKE_C_FLAGS}") set(flatbuffers_LDFLAGS "${CMAKE_SHARED_LINKER_FLAGS}") else() - set(flatbuffers_CXXFLAGS "-D_FORTIFY_SOURCE=2 -O2") - set(flatbuffers_CFLAGS "-D_FORTIFY_SOURCE=2 -O2") + set(flatbuffers_CXXFLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-strong") + set(flatbuffers_CFLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -fstack-protector-strong") endif() if(WIN32) diff --git a/cmake/mind_expression.cmake b/cmake/mind_expression.cmake index b1c6cf50ec8..69ee8b0c295 100644 --- a/cmake/mind_expression.cmake +++ b/cmake/mind_expression.cmake @@ -89,7 +89,6 @@ if(ENABLE_MINDDATA) include(${CMAKE_SOURCE_DIR}/cmake/external_libs/tinyxml2.cmake) include(${CMAKE_SOURCE_DIR}/cmake/external_libs/cppjieba.cmake) include(${CMAKE_SOURCE_DIR}/cmake/external_libs/sentencepiece.cmake) - include(${CMAKE_SOURCE_DIR}/cmake/external_libs/ffmpeg.cmake) endif() if(ENABLE_MINDDATA) diff --git a/cmake/options.cmake b/cmake/options.cmake index c4bd42b3223..59d5861c5ed 100644 --- a/cmake/options.cmake +++ b/cmake/options.cmake @@ -25,6 +25,7 @@ option(ENABLE_ACL "enable acl" OFF) option(ENABLE_GLIBCXX "enable_glibcxx" OFF) option(MODE_ASCEND_ALL "supports all ascend platform" OFF) option(MODE_ASCEND_ACL "supports ascend acl mode only" OFF) +option(ENABLE_SYM_FILE "enable sym file" OFF) if(NOT ENABLE_D AND NOT ENABLE_TESTCASES AND NOT ENABLE_ACL AND NOT ENABLE_GE) set(ENABLE_GLIBCXX ON) diff --git a/cmake/package.cmake b/cmake/package.cmake index 69b8ecbcd2a..506f5ee86dc 100644 --- a/cmake/package.cmake +++ b/cmake/package.cmake @@ -12,6 +12,8 @@ set(CPACK_TEMPORARY_PACKAGE_FILE_NAME ${BUILD_PATH}/package/mindspore) set(CPACK_TEMPORARY_INSTALL_DIRECTORY ${BUILD_PATH}/package/mindspore) set(CPACK_PACK_ROOT_DIR ${BUILD_PATH}/package/) set(CPACK_CMAKE_SOURCE_DIR ${CMAKE_SOURCE_DIR}) +set(CPACK_ENABLE_SYM_FILE ${ENABLE_SYM_FILE}) +set(CPACK_CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE}) if(ENABLE_GE) set(CPACK_MS_BACKEND "ge") set(CPACK_MS_TARGET "ascend or cpu") @@ -125,17 +127,6 @@ if(ENABLE_MINDDATA) DESTINATION ${INSTALL_LIB_DIR} RENAME libicudata.so.67 COMPONENT mindspore) install(FILES ${icu4c_LIBPATH}/libicui18n.so.67.1 DESTINATION ${INSTALL_LIB_DIR} RENAME libicui18n.so.67 COMPONENT mindspore) - - install(FILES ${ffmpeg_LIBPATH}/libavcodec.so.58.91.100 - DESTINATION ${INSTALL_LIB_DIR} RENAME libavcodec.so.58 COMPONENT mindspore) - install(FILES ${ffmpeg_LIBPATH}/libavformat.so.58.45.100 - DESTINATION ${INSTALL_LIB_DIR} RENAME libavformat.so.58 COMPONENT mindspore) - install(FILES ${ffmpeg_LIBPATH}/libavutil.so.56.51.100 - DESTINATION ${INSTALL_LIB_DIR} RENAME libavutil.so.56 COMPONENT mindspore) - install(FILES ${ffmpeg_LIBPATH}/libswresample.so.3.7.100 - DESTINATION ${INSTALL_LIB_DIR} RENAME libswresample.so.3 COMPONENT mindspore) - install(FILES ${ffmpeg_LIBPATH}/libswscale.so.5.7.100 - DESTINATION ${INSTALL_LIB_DIR} RENAME libswscale.so.5 COMPONENT mindspore) endif() if(ENABLE_CPU) diff --git a/cmake/package_script.cmake b/cmake/package_script.cmake index edef651b414..bdfcd13314d 100644 --- a/cmake/package_script.cmake +++ b/cmake/package_script.cmake @@ -77,6 +77,48 @@ set(ENV{BACKEND_TARGET} ${CPACK_MS_TARGET}) set(ENV{MS_PACKAGE_NAME} ${CPACK_MS_PACKAGE_NAME}) set(ENV{COMMIT_ID} ${GIT_COMMIT_ID}) +file(GLOB DEBUG_SYM + ${MS_PACK_ROOT_DIR}/mindspore/*.so + ${MS_PACK_ROOT_DIR}/mindspore/lib/*.so +) + +file(GLOB DEBUG_STRIP_SYM + ${MS_PACK_ROOT_DIR}/mindspore/*.so + ${MS_PACK_ROOT_DIR}/mindspore/lib/*.so* +) + +set(CMAKE_OBJCOPY $ENV{CROSS_COMPILE}objcopy) +set(CMAKE_STRIP $ENV{CROSS_COMPILE}strip) + +if(CPACK_ENABLE_SYM_FILE) + foreach(schema ${DEBUG_SYM}) + execute_process( + COMMAND ${CMAKE_OBJCOPY} "--only-keep-debug" ${schema} ${schema}.sym + WORKING_DIRECTORY ${MS_PACK_ROOT_DIR} + ) + endforeach() +endif() + +if("${CPACK_CMAKE_BUILD_TYPE}" STREQUAL "Release") + foreach(schema ${DEBUG_STRIP_SYM}) + execute_process( + COMMAND ${CMAKE_STRIP} ${schema} + WORKING_DIRECTORY ${MS_PACK_ROOT_DIR} + ) + endforeach() +endif() + +file(GLOB DEBUG_SYM_FILE + ${MS_PACK_ROOT_DIR}/mindspore/*.sym + ${MS_PACK_ROOT_DIR}/mindspore/lib/*.sym +) + +if(CPACK_ENABLE_SYM_FILE) + file(MAKE_DIRECTORY ${MS_ROOT_DIR}/debug_info) + file(COPY ${DEBUG_SYM_FILE} DESTINATION ${MS_ROOT_DIR}/debug_info/) + file(REMOVE_RECURSE ${DEBUG_SYM_FILE}) +endif() + execute_process( COMMAND ${PYTHON} ${MS_ROOT_DIR}/setup.py "bdist_wheel" WORKING_DIRECTORY ${MS_PACK_ROOT_DIR} @@ -104,3 +146,16 @@ file(COPY ${MS_PACK_ROOT_DIR}/${NEW_FILE_NAME} DESTINATION ${MS_ROOT_DIR}/output file(SHA256 ${MS_ROOT_DIR}/output/${NEW_FILE_NAME} SHA256_VAR) file(WRITE ${MS_ROOT_DIR}/output/${NEW_FILE_NAME}.sha256 ${SHA256_VAR} " " ${NEW_FILE_NAME}) +set(CMAKE_TAR $ENV{CROSS_COMPILE}tar) +if(CPACK_ENABLE_SYM_FILE) + file(MAKE_DIRECTORY ${MS_ROOT_DIR}/output/${PACKAGE_NAME}-${VERSION}-${PY_TAGS}-${PLATFORM_TAG}) + file(COPY ${MS_ROOT_DIR}/debug_info/ DESTINATION + ${MS_ROOT_DIR}/output/${PACKAGE_NAME}-${VERSION}-${PY_TAGS}-${PLATFORM_TAG}/) + execute_process(COMMAND + ${CMAKE_COMMAND} -E ${CMAKE_TAR} cfv + ${MS_ROOT_DIR}/output/${PACKAGE_NAME}-${VERSION}-${PY_TAGS}-${PLATFORM_TAG}.zip + ${MS_ROOT_DIR}/output/${PACKAGE_NAME}-${VERSION}-${PY_TAGS}-${PLATFORM_TAG}/ --format=zip + WORKING_DIRECTORY ${MS_ROOT_DIR}) + file(REMOVE_RECURSE ${MS_ROOT_DIR}/debug_info) + file(REMOVE_RECURSE ${MS_ROOT_DIR}/output/${PACKAGE_NAME}-${VERSION}-${PY_TAGS}-${PLATFORM_TAG}) +endif() diff --git a/cmake/package_tar.cmake b/cmake/package_tar.cmake index 57fc5deba9f..1f04942d82e 100644 --- a/cmake/package_tar.cmake +++ b/cmake/package_tar.cmake @@ -91,18 +91,6 @@ if(ENABLE_MINDDATA) DESTINATION ${INSTALL_LIB_DIR} COMPONENT mindspore ) - file(GLOB_RECURSE FFMPEG_LIB_LIST - ${ffmpeg_LIBPATH}/libavcodec* - ${ffmpeg_LIBPATH}/libavformat* - ${ffmpeg_LIBPATH}/libavutil* - ${ffmpeg_LIBPATH}/libswresample* - ${ffmpeg_LIBPATH}/libswscale* - ) - install( - FILES ${FFMPEG_LIB_LIST} - DESTINATION ${INSTALL_LIB_DIR} - COMPONENT mindspore - ) endif() # CPU mode diff --git a/cmake/package_win.cmake b/cmake/package_win.cmake index d17cf1236e9..bbed4e0ff07 100644 --- a/cmake/package_win.cmake +++ b/cmake/package_win.cmake @@ -42,7 +42,6 @@ set(opencv_LIBPATH ${opencv_LIBPATH}/../bin/) set(jpeg_turbo_LIBPATH ${jpeg_turbo_LIBPATH}/../bin/) set(sqlite_LIBPATH ${sqlite_LIBPATH}/../bin/) set(tinyxml2_LIBPATH ${tinyxml2_LIBPATH}/../bin/) -set(ffmpeg_LIBPATH ${ffmpeg_LIBPATH}/../bin/) message("offline debugger does not support windows system temporarily") @@ -98,18 +97,6 @@ if(ENABLE_MINDDATA) DESTINATION ${INSTALL_LIB_DIR} COMPONENT mindspore ) - file(GLOB_RECURSE FFMPEG_LIB_LIST - ${ffmpeg_LIBPATH}/libavcodec* - ${ffmpeg_LIBPATH}/libavformat* - ${ffmpeg_LIBPATH}/libavutil* - ${ffmpeg_LIBPATH}/libswresample* - ${ffmpeg_LIBPATH}/libswscale* - ) - install( - FILES ${FFMPEG_LIB_LIST} - DESTINATION ${INSTALL_LIB_DIR} - COMPONENT mindspore - ) endif() if(ENABLE_CPU) diff --git a/docker/OWNERS b/docker/OWNERS index 36d9fc6ffe5..7c5cab59d6b 100644 --- a/docker/OWNERS +++ b/docker/OWNERS @@ -1,2 +1,4 @@ +approvers: +- zhoufeng54 reviewers: -- HW_KK +- HW_KK \ No newline at end of file diff --git a/docker/mindspore-cpu/devel/Dockerfile b/docker/mindspore-cpu/devel/Dockerfile index ec611bc7ea9..148265abbd0 100644 --- a/docker/mindspore-cpu/devel/Dockerfile +++ b/docker/mindspore-cpu/devel/Dockerfile @@ -58,8 +58,11 @@ RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \ && make install -j4 \ && rm -f /usr/local/bin/python \ && rm -f /usr/local/bin/pip \ + && rm -f /usr/local/lib/libpython3.7m.so.1.0 \ && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \ && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \ + && ln -s ${PYTHON_ROOT_PATH}/lib/libpython3.7m.so.1.0 /usr/local/lib/libpython3.7m.so.1.0 \ + && ldconfig \ && rm -rf /tmp/cpython-3.7.5 \ && rm -f /tmp/v3.7.5.tar.gz diff --git a/docker/mindspore-cpu/runtime/Dockerfile b/docker/mindspore-cpu/runtime/Dockerfile index b84ac946152..ad61f9b3bec 100644 --- a/docker/mindspore-cpu/runtime/Dockerfile +++ b/docker/mindspore-cpu/runtime/Dockerfile @@ -51,13 +51,16 @@ RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \ && tar -xvf v3.7.5.tar.gz \ && cd /tmp/cpython-3.7.5 \ && mkdir -p ${PYTHON_ROOT_PATH} \ - && ./configure --prefix=${PYTHON_ROOT_PATH} \ + && ./configure --prefix=${PYTHON_ROOT_PATH} --enable-shared \ && make -j4 \ && make install -j4 \ && rm -f /usr/local/bin/python \ && rm -f /usr/local/bin/pip \ + && rm -f /usr/local/lib/libpython3.7m.so.1.0 \ && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \ && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \ + && ln -s ${PYTHON_ROOT_PATH}/lib/libpython3.7m.so.1.0 /usr/local/lib/libpython3.7m.so.1.0 \ + && ldconfig \ && rm -rf /tmp/cpython-3.7.5 \ && rm -f /tmp/v3.7.5.tar.gz diff --git a/docker/mindspore-gpu/devel/Dockerfile b/docker/mindspore-gpu/devel/Dockerfile index f8f4bf7ffa0..9983f3ad8a9 100644 --- a/docker/mindspore-gpu/devel/Dockerfile +++ b/docker/mindspore-gpu/devel/Dockerfile @@ -1,4 +1,4 @@ -FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 +FROM nvidia/cuda:11.1-cudnn8-devel-ubuntu18.04 MAINTAINER leonwanghui @@ -43,7 +43,7 @@ RUN DEBIAN_FRONTEND=noninteractive apt install -y \ libnuma-dev # Configure cuDNN (v7.6.5) -RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.7.6.5 /usr/local/cuda/lib64/libcudnn.so +RUN ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.8.0.5 /usr/local/cuda/lib64/libcudnn.so # Set bash RUN echo "dash dash/sh boolean false" | debconf-set-selections @@ -62,8 +62,11 @@ RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \ && make install -j4 \ && rm -f /usr/local/bin/python \ && rm -f /usr/local/bin/pip \ + && rm -f /usr/local/lib/libpython3.7m.so.1.0 \ && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \ && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \ + && ln -s ${PYTHON_ROOT_PATH}/lib/libpython3.7m.so.1.0 /usr/local/lib/libpython3.7m.so.1.0 \ + && ldconfig \ && rm -rf /tmp/cpython-3.7.5 \ && rm -f /tmp/v3.7.5.tar.gz diff --git a/docker/mindspore-gpu/runtime/Dockerfile b/docker/mindspore-gpu/runtime/Dockerfile index 9ff9b71a246..5a2ed3cdbe1 100644 --- a/docker/mindspore-gpu/runtime/Dockerfile +++ b/docker/mindspore-gpu/runtime/Dockerfile @@ -1,4 +1,4 @@ -FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 +FROM nvidia/cuda:11.1-cudnn8-devel-ubuntu18.04 MAINTAINER leonwanghui @@ -53,13 +53,16 @@ RUN apt install -y libffi-dev libssl-dev zlib1g-dev libbz2-dev libncurses5-dev \ && tar -xvf v3.7.5.tar.gz \ && cd /tmp/cpython-3.7.5 \ && mkdir -p ${PYTHON_ROOT_PATH} \ - && ./configure --prefix=${PYTHON_ROOT_PATH} \ + && ./configure --prefix=${PYTHON_ROOT_PATH} --enable-shared \ && make -j4 \ && make install -j4 \ && rm -f /usr/local/bin/python \ && rm -f /usr/local/bin/pip \ + && rm -f /usr/local/lib/libpython3.7m.so.1.0 \ && ln -s ${PYTHON_ROOT_PATH}/bin/python3.7 /usr/local/bin/python \ && ln -s ${PYTHON_ROOT_PATH}/bin/pip3.7 /usr/local/bin/pip \ + && ln -s ${PYTHON_ROOT_PATH}/lib/libpython3.7m.so.1.0 /usr/local/lib/libpython3.7m.so.1.0 \ + && ldconfig \ && rm -rf /tmp/cpython-3.7.5 \ && rm -f /tmp/v3.7.5.tar.gz diff --git a/include/api/context.h b/include/api/context.h index ec02b93598c..d1b525ef713 100644 --- a/include/api/context.h +++ b/include/api/context.h @@ -38,12 +38,19 @@ class Allocator; class Delegate; class DeviceInfoContext; +/// \brief Context is used to store environment variables during execution. class MS_API Context { public: Context(); ~Context() = default; + /// \brief Set the number of threads at runtime. This option is only valid for MindSpore Lite. + /// + /// \param[in] thread_num the number of threads at runtime. void SetThreadNum(int32_t thread_num); + /// \brief Get the current thread number setting. + /// + /// \return The current thread number setting. int32_t GetThreadNum() const; /// \brief Set the thread affinity to CPU cores. @@ -60,6 +67,10 @@ class MS_API Context { void SetDelegate(const std::shared_ptr &delegate); std::shared_ptr GetDelegate() const; + /// \brief Get a mutable reference of DeviceInfoContext vector in this context. Only MindSpore Lite supports + /// heterogeneous scenarios with multiple members in the vector. + /// + /// \return Mutable reference of DeviceInfoContext vector in this context. std::vector> &MutableDeviceInfo(); private: @@ -67,14 +78,24 @@ class MS_API Context { std::shared_ptr data_; }; +/// \brief DeviceInfoContext defines different device contexts. class MS_API DeviceInfoContext : public std::enable_shared_from_this { public: struct Data; DeviceInfoContext(); virtual ~DeviceInfoContext() = default; + + /// \brief Get the type of this DeviceInfoContext. + /// + /// \return Type of this DeviceInfoContext. virtual enum DeviceType GetDeviceType() const = 0; + /// \brief A similar function to RTTI is provided when the -fno-rtti compilation option is turned on, which converts + /// DeviceInfoContext to a shared pointer of type T, and returns nullptr if the conversion fails. + /// + /// \param T Type + /// \return A pointer of type T after conversion. If the conversion fails, it will be nullptr. template std::shared_ptr Cast() { static_assert(std::is_base_of::value, "Wrong cast type."); @@ -98,27 +119,60 @@ class MS_API DeviceInfoContext : public std::enable_shared_from_this data_; }; +/// \brief Derived from DeviceInfoContext, The configuration of the model running on the CPU. This option is only valid +/// for MindSpore Lite. class MS_API CPUDeviceInfo : public DeviceInfoContext { public: + /// \brief Get the type of this DeviceInfoContext. + /// + /// \return Type of this DeviceInfoContext. enum DeviceType GetDeviceType() const override { return DeviceType::kCPU; }; + /// \brief Set enables to perform the float16 inference + /// + /// \param[in] is_fp16 Enable float16 inference or not. void SetEnableFP16(bool is_fp16); + /// \brief Get enables to perform the float16 inference + /// + /// \return Whether enable float16 inference. bool GetEnableFP16() const; }; +/// \brief Derived from DeviceInfoContext, The configuration of the model running on the NPU. This option is only valid +/// for MindSpore Lite. class MS_API KirinNPUDeviceInfo : public DeviceInfoContext { public: + /// \brief Get the type of this DeviceInfoContext. + /// + /// \return Type of this DeviceInfoContext. enum DeviceType GetDeviceType() const override { return DeviceType::kKirinNPU; }; + /// \brief Set the NPU frequency. + /// + /// \param[in] frequency Can be set to 1 (low power consumption), 2 (balanced), 3 (high performance), 4 (extreme + /// performance), default as 3. void SetFrequency(int frequency); + /// \brief Get the NPU frequency. + /// + /// \return NPU frequency int GetFrequency() const; }; +/// \brief Derived from DeviceInfoContext, The configuration of the model running on the GPU. class MS_API GPUDeviceInfo : public DeviceInfoContext { public: + /// \brief Get the type of this DeviceInfoContext. + /// + /// \return Type of this DeviceInfoContext. enum DeviceType GetDeviceType() const override { return DeviceType::kGPU; }; + /// \brief Set device id. + /// + /// \param[in] device_id The device id. void SetDeviceID(uint32_t device_id); + /// \brief Get the device id. + /// + /// \return The device id. uint32_t GetDeviceID() const; void SetGpuTrtInferMode(bool gpu_trt_infer_mode); @@ -127,8 +181,15 @@ class MS_API GPUDeviceInfo : public DeviceInfoContext { inline void SetPrecisionMode(const std::string &precison_mode); inline std::string GetPrecisionMode() const; + /// \brief Set enables to perform the float16 inference + /// + /// \param[in] is_fp16 Enable float16 inference or not. void SetEnableFP16(bool is_fp16); + /// \brief Get enables to perform the float16 inference + /// + /// \return Whether enable float16 inference. bool GetEnableFP16() const; + private: void SetPrecisionMode(const std::vector &precision_mode); std::vector GetPrecisionModeChar() const; @@ -139,52 +200,113 @@ void GPUDeviceInfo::SetPrecisionMode(const std::string &precision_mode) { } std::string GPUDeviceInfo::GetPrecisionMode() const { return CharToString(GetPrecisionModeChar()); } +/// \brief Derived from DeviceInfoContext, The configuration of the model running on the Ascend910. This option is +/// invalid for MindSpore Lite. class MS_API Ascend910DeviceInfo : public DeviceInfoContext { public: + /// \brief Get the type of this DeviceInfoContext. + /// + /// \return Type of this DeviceInfoContext. enum DeviceType GetDeviceType() const override { return DeviceType::kAscend910; }; + /// \brief Set device id. + /// + /// \param[in] device_id The device id. void SetDeviceID(uint32_t device_id); + /// \brief Get the device id. + /// + /// \return The device id. uint32_t GetDeviceID() const; }; +/// \brief Derived from DeviceInfoContext, The configuration of the model running on the Ascend310. This option is +/// invalid for MindSpore Lite. class MS_API Ascend310DeviceInfo : public DeviceInfoContext { public: + /// \brief Get the type of this DeviceInfoContext. + /// + /// \return Type of this DeviceInfoContext. enum DeviceType GetDeviceType() const override { return DeviceType::kAscend310; }; + /// \brief Set device id. + /// + /// \param[in] device_id The device id. void SetDeviceID(uint32_t device_id); + /// \brief Get the device id. + /// + /// \return The device id. uint32_t GetDeviceID() const; inline void SetDumpConfigPath(const std::string &cfg_path); inline std::string GetDumpConfigPath() const; - // aipp config file + /// \brief Set AIPP configuration file path. + /// + /// \param[in] cfg_path AIPP configuration file path. inline void SetInsertOpConfigPath(const std::string &cfg_path); + /// \brief Get AIPP configuration file path. + /// + /// \return AIPP configuration file path. inline std::string GetInsertOpConfigPath() const; - // nchw or nhwc + /// \brief Set format of model inputs. + /// + /// \param[in] format Optional "NCHW", "NHWC", etc. inline void SetInputFormat(const std::string &format); + /// \brief Get format of model inputs. + /// + /// \return The format of model inputs. inline std::string GetInputFormat() const; - // Mandatory while dynamic batch: e.g. "input_op_name1: 1,2,3,4;input_op_name2: 4,3,2,1" + /// \brief Set shape of model inputs. + /// + /// \param[in] shape e.g. "input_op_name1: 1,2,3,4;input_op_name2: 4,3,2,1". inline void SetInputShape(const std::string &shape); + /// \brief Get shape of model inputs. + /// + /// \return The shape of model inputs. inline std::string GetInputShape() const; + /// \brief Set shape of model inputs. + /// + /// \param[in] shape e.g. {{1, {1,2,3,4}}, {2, {4,3,2,1}}} means the first input shape 1,2,3,4 and the second input + /// shape 4,3,2,1. void SetInputShapeMap(const std::map> &shape); + /// \brief Get shape of model inputs. + /// + /// \return The shape of model inputs. std::map> GetInputShapeMap() const; void SetDynamicBatchSize(const std::vector &dynamic_batch_size); inline std::string GetDynamicBatchSize() const; - // FP32, UINT8 or FP16, default as FP32 + /// \brief Set type of model outputs. + /// + /// \param[in] output_type FP32, UINT8 or FP16, default as FP32. void SetOutputType(enum DataType output_type); + /// \brief Get type of model outputs. + /// + /// \return The set type of model outputs. enum DataType GetOutputType() const; - // "force_fp16", "allow_fp32_to_fp16", "must_keep_origin_dtype" or "allow_mix_precision", default as "force_fp16" + /// \brief Set precision mode of model. + /// + /// \param[in] precision_mode Optional "force_fp16", "allow_fp32_to_fp16", "must_keep_origin_dtype" and + /// "allow_mix_precision", "force_fp16" is set as default inline void SetPrecisionMode(const std::string &precision_mode); + /// \brief Get precision mode of model. + /// + /// \return The set type of model outputs inline std::string GetPrecisionMode() const; - // Optional "high_performance" and "high_precision", "high_performance" is set as default + /// \brief Set op select implementation mode. + /// + /// \param[in] op_select_impl_mode Optional "high_performance" and "high_precision", "high_performance" is set as + /// default. inline void SetOpSelectImplMode(const std::string &op_select_impl_mode); + /// \brief Get op select implementation mode. + /// + /// \return The set op select implementation mode. inline std::string GetOpSelectImplMode() const; inline void SetFusionSwitchConfigPath(const std::string &cfg_path); diff --git a/include/api/model.h b/include/api/model.h index 9c0b434f0ab..53dfdb0d51d 100644 --- a/include/api/model.h +++ b/include/api/model.h @@ -37,32 +37,75 @@ class Metrics; namespace dataset { class Dataset; } // namespace dataset - +/// \brief The Model class is used to define a MindSpore model, facilitating computational graph management. class MS_API Model { public: Model(); ~Model(); Model(const Model &) = delete; void operator=(const Model &) = delete; - + /// \brief Builds a model so that it can run on a device. + /// + /// \param[in] graph GraphCell is a derivative of Cell. Cell is not available currently. GraphCell can be constructed + /// from Graph, for example, model.Build(GraphCell(graph), context). + /// \param[in] model_context A context used to store options during execution. + /// \param[in] train_cfg A config used by training. + /// + /// \return Status. Status Build(GraphCell graph, const std::shared_ptr &model_context = nullptr, const std::shared_ptr &train_cfg = nullptr); + + /// \brief Resizes the shapes of inputs. + /// + /// \param[in] inputs A vector that includes all input tensors in order. + /// \param[in] dims Defines the new shapes of inputs, should be consistent with inputs. + /// + /// \return Status. Status Resize(const std::vector &inputs, const std::vector> &dims); + /// \brief Inference model. + /// + /// \param[in] inputs A vector where model inputs are arranged in sequence. + /// \param[out] outputs Which is a pointer to a vector. The model outputs are filled in the container in sequence. + /// \param[in] before CallBack before predict. + /// \param[in] after CallBack after predict. + /// + /// \return Status. Status Predict(const std::vector &inputs, std::vector *outputs, const MSKernelCallBack &before = nullptr, const MSKernelCallBack &after = nullptr); + /// \brief Obtains all input tensors of the model. + /// + /// \return The vector that includes all input tensors. std::vector GetInputs(); + /// \brief Obtains the input tensor of the model by name. + /// + /// \return The input tensor with the given name, if the name is not found, an invalid tensor is returned. inline MSTensor GetInputByTensorName(const std::string &tensor_name); Status InitMetrics(std::vector metrics); std::vector GetMetrics(); + /// \brief Obtains all output tensors of the model. + /// + /// \return The vector that includes all output tensors. std::vector GetOutputs(); + /// \brief Obtains names of all output tensors of the model. + /// + /// \return A vector that includes names of all output tensors. inline std::vector GetOutputTensorNames(); + /// \brief Obtains the output tensor of the model by name. + /// + /// \return The output tensor with the given name, if the name is not found, an invalid tensor is returned. inline MSTensor GetOutputByTensorName(const std::string &tensor_name); inline std::vector GetOutputsByNodeName(const std::string &tensor_name); + /// \brief Inference model. + /// + /// \param[in] device_type Device type,options are kGPU, kAscend910, etc. + /// \param[in] model_type The type of model file, options are ModelType::kMindIR, ModelType::kOM. + /// + /// \return Is supported or not. static bool CheckModelSupport(enum DeviceType device_type, ModelType model_type); Status SetTrainMode(bool train); diff --git a/include/api/serialization.h b/include/api/serialization.h index c56e67fc2e9..dcb0a4762ae 100644 --- a/include/api/serialization.h +++ b/include/api/serialization.h @@ -27,13 +27,43 @@ #include "include/api/dual_abi_helper.h" namespace mindspore { - +/// \brief The Serialization class is used to summarize methods for reading and writing model files. class MS_API Serialization { public: + /// \brief Loads a model file from memory buffer. + /// + /// \param[in] model_data A buffer filled by model file. + /// \param[in] data_size The size of the buffer. + /// \param[in] model_type The Type of model file, options are ModelType::kMindIR, ModelType::kOM. + /// \param[out] graph The output parameter, an object saves graph data. + /// \param[in] dec_key The decryption key, key length is 16, 24, or 32. + /// \param[in] dec_mode The decryption mode, optional options are AES-GCM, AES-CBC. + /// + /// \return Status. inline static Status Load(const void *model_data, size_t data_size, ModelType model_type, Graph *graph, const Key &dec_key = {}, const std::string &dec_mode = kDecModeAesGcm); + + /// \brief Loads a model file from path, is not supported on MindSpore Lite. + /// + /// \param[in] file The path of model file. + /// \param[in] model_type The Type of model file, options are ModelType::kMindIR, ModelType::kOM. + /// \param[out] graph The output parameter, an object saves graph data. + /// \param[in] dec_key The decryption key, key length is 16, 24, or 32. + /// \param[in] dec_mode The decryption mode, optional options are AES-GCM, AES-CBC. + /// + /// \return Status. inline static Status Load(const std::string &file, ModelType model_type, Graph *graph, const Key &dec_key = {}, const std::string &dec_mode = kDecModeAesGcm); + + /// \brief Load multiple models from multiple files, MindSpore Lite does not provide this feature. + /// + /// \param[in] files The path of model files. + /// \param[in] model_type The Type of model file, options are ModelType::kMindIR, ModelType::kOM. + /// \param[out] graph The output parameter, an object saves graph data. + /// \param[in] dec_key The decryption key, key length is 16, 24, or 32. + /// \param[in] dec_mode The decryption mode, optional options are AES-GCM, AES-CBC. + /// + /// \return Status. inline static Status Load(const std::vector &files, ModelType model_type, std::vector *graphs, const Key &dec_key = {}, const std::string &dec_mode = kDecModeAesGcm); static Status SetParameters(const std::map ¶meters, Model *model); diff --git a/include/api/types.h b/include/api/types.h index 383ba5cf9ac..77f200bda5c 100644 --- a/include/api/types.h +++ b/include/api/types.h @@ -25,11 +25,17 @@ #include "include/api/dual_abi_helper.h" #include "include/api/format.h" +#ifndef MS_API #ifdef _WIN32 +#ifdef BUILDING_DLL #define MS_API __declspec(dllexport) #else +#define MS_API __declspec(dllimport) +#endif +#else #define MS_API __attribute__((visibility("default"))) #endif +#endif namespace mindspore { enum ModelType : uint32_t { @@ -64,18 +70,64 @@ struct QuantParam { }; class Allocator; +/// \brief The MSTensor class defines a tensor in MindSpore. class MS_API MSTensor { public: class Impl; - + /// \brief Creates a MSTensor object, whose data need to be copied before accessed by Model, must be used in pairs + /// with DestroyTensorPtr. + /// + /// \param[in] name The name of the MSTensor. + /// \param[in] type The data type of the MSTensor. + /// \param[in] shape The shape of the MSTensor. + /// \param[in] data The data pointer that points to allocated memory. + /// \param[in] data_len The length of the memory, in bytes. + /// + /// \return A pointer of MSTensor. static inline MSTensor *CreateTensor(const std::string &name, DataType type, const std::vector &shape, const void *data, size_t data_len) noexcept; + /// \brief Creates a MSTensor object, whose data can be directly accessed by Model, must be used in pairs with + /// DestroyTensorPtr. + /// + /// \param[in] name The name of the MSTensor. + /// \param[in] type The data type of the MSTensor. + /// \param[in] shape The shape of the MSTensor. + /// \param[in] data The data pointer that points to allocated memory. + /// \param[in] data_len The length of the memory, in bytes. + /// + /// \return A pointer of MSTensor. static inline MSTensor *CreateRefTensor(const std::string &name, DataType type, const std::vector &shape, const void *data, size_t data_len) noexcept; + /// \brief Creates a MSTensor object, whose device data can be directly accessed by Model, must be used in pairs with + /// DestroyTensorPtr. + /// + /// \param[in] name The name of the MSTensor. + /// \param[in] type The data type of the MSTensor. + /// \param[in] shape The shape of the MSTensor. + /// \param[in] data The data pointer that points to device memory. + /// \param[in] data_len The length of the memory, in bytes. + /// + /// \return A pointer of MSTensor. static inline MSTensor *CreateDevTensor(const std::string &name, DataType type, const std::vector &shape, const void *data, size_t data_len) noexcept; + /// \brief Create a string type MSTensor object whose data can be accessed by Model only after being copied, must be + /// used in pair with DestroyTensorPtr. + /// + /// \param[in] name The name of the MSTensor. + /// \param[in] str A vector container containing several strings. + /// + /// \return A pointer of MSTensor. static inline MSTensor *StringsToTensor(const std::string &name, const std::vector &str); + /// \brief Parse the string type MSTensor object into strings. + /// + /// \param[in] tensor A MSTensor object. + /// + /// \return A vector container containing several strings. static inline std::vector TensorToStrings(const MSTensor &tensor); + /// \brief Destroy an object created by Clone, StringsToTensor, CreateRefTensor, CreateDevTensor or CreateTensor. Do + /// not use it to destroy MSTensor from other sources. + /// + /// \param[in] tensor A MSTensor object. static void DestroyTensorPtr(MSTensor *tensor) noexcept; MSTensor(); @@ -85,19 +137,51 @@ class MS_API MSTensor { explicit MSTensor(std::nullptr_t); ~MSTensor(); + /// \brief Obtains the name of the MSTensor. + /// + /// \return The name of the MSTensor. inline std::string Name() const; + /// \brief Obtains the data type of the MSTensor. + /// + /// \return The data type of the MSTensor. enum DataType DataType() const; + /// \brief Obtains the shape of the MSTensor. + /// + /// \return The shape of the MSTensor. const std::vector &Shape() const; + /// \brief Obtains the number of elements of the MSTensor. + /// + /// \return The number of elements of the MSTensor. int64_t ElementNum() const; + /// \brief Obtains a shared pointer to the copy of data of the MSTensor. The data can be read on host. + /// + /// \return A shared pointer to the copy of data of the MSTensor. std::shared_ptr Data() const; + /// \brief Obtains the pointer to the data of the MSTensor. If the MSTensor is a device tensor, the data cannot be + /// accessed directly on host. + /// + /// \return A pointer to the data of the MSTensor. void *MutableData(); + /// \brief Obtains the length of the data of the MSTensor, in bytes. + /// + /// \return The length of the data of the MSTensor, in bytes. size_t DataSize() const; - + /// \brief Gets the boolean value that indicates whether the memory of MSTensor is on device. + /// + /// \return The boolean value that indicates whether the memory of MSTensor is on device. bool IsDevice() const; - + /// \brief Gets a deep copy of the MSTensor, must be used in pair with DestroyTensorPtr. + /// + /// \return A pointer points to a deep copy of the MSTensor. MSTensor *Clone() const; + /// \brief Gets the boolean value that indicates whether the MSTensor is valid. + /// + /// \return The boolean value that indicates whether the MSTensor is valid. bool operator==(std::nullptr_t) const; + /// \brief Gets the boolean value that indicates whether the MSTensor is valid. + /// + /// \return The boolean value that indicates whether the MSTensor is valid. bool operator!=(std::nullptr_t) const; bool operator==(const MSTensor &tensor) const; diff --git a/mindspore/_checkparam.py b/mindspore/_checkparam.py index 978256756a1..58cec1666a4 100644 --- a/mindspore/_checkparam.py +++ b/mindspore/_checkparam.py @@ -23,6 +23,7 @@ from itertools import repeat, zip_longest from collections import deque from collections.abc import Iterable import numpy as np +from mindspore import context from mindspore import log as logger from mindspore.common import dtype as mstype from mindspore._c_expression import Tensor as Tensor_ @@ -846,6 +847,10 @@ class Validator: """Returns an empty Tensor.""" return Tensor_(dtype, shape) + @staticmethod + def check_type_support(dtype, device, supported_dtypes): + return dtype in supported_dtypes or not context.get_context('device_target') == device + def check_input_format(input_param): """Judge input format.""" diff --git a/mindspore/_extends/graph_kernel/parallel_estimate.py b/mindspore/_extends/graph_kernel/parallel_estimate.py index a1f7d7a0952..0cf1a954966 100644 --- a/mindspore/_extends/graph_kernel/parallel_estimate.py +++ b/mindspore/_extends/graph_kernel/parallel_estimate.py @@ -21,7 +21,7 @@ from . import model def estimate_ops(json_str: str): - """Call costmodel to estimate ops.""" + """Call cost model to estimate ops.""" try: json_obj = json.loads(json_str) graph_descs = json_obj["graph_desc"] @@ -38,7 +38,7 @@ def estimate_ops(json_str: str): def estimate_calulation_amount(json_str: str): - """Call costmodel to estimate calculation amount of op.""" + """Call cost model to estimate calculation amount of op.""" try: graph_desc = json.loads(json_str) comp = model.load_composite(graph_desc) diff --git a/mindspore/_extends/graph_kernel/splitter.py b/mindspore/_extends/graph_kernel/splitter.py index c622159ac1c..87b7da1260a 100644 --- a/mindspore/_extends/graph_kernel/splitter.py +++ b/mindspore/_extends/graph_kernel/splitter.py @@ -24,7 +24,7 @@ from . import utils def split_with_json(json_str, flags_str): - """Call costmodel to split GraphKernel""" + """Call cost model to split GraphKernel""" try: graph_desc = json.loads(json_str) flags = json.loads(flags_str) diff --git a/mindspore/_extends/parallel_compile/akg_compiler/akg_process.py b/mindspore/_extends/parallel_compile/akg_compiler/akg_process.py index c6487c9f17c..d3f0bbf1641 100644 --- a/mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +++ b/mindspore/_extends/parallel_compile/akg_compiler/akg_process.py @@ -50,11 +50,6 @@ def _compile_akg_task_gpu(json_strs, attrs): if not res: raise ValueError("Compile error, args: {}! build attrs: {}".format(json_str, attrs)) - pid_path = os.path.realpath("./cuda_meta_" + str(os.getpid())) - if os.path.exists(pid_path): - copy_json(pid_path, os.path.realpath("./cuda_meta_" + str(os.getppid()))) - shutil.rmtree(pid_path) - def _compile_akg_task_ascend(json_strs, attrs): """ diff --git a/mindspore/_extends/parse/parser.py b/mindspore/_extends/parse/parser.py index 3af474860cc..e3b0afee226 100644 --- a/mindspore/_extends/parse/parser.py +++ b/mindspore/_extends/parse/parser.py @@ -159,12 +159,17 @@ def resolve_symbol(namespace, symbol): if getattr(resolve_, "__hash__") is None: return resolve_ + # Raise NotImplementedError when parsing the numpy methods, but not the numpy constant. + if namespace.name == "numpy" and isinstance(resolve_, (types.FunctionType, types.MethodType, types.ModuleType)): + raise NotImplementedError( + f"MindSpore does not support to use the numpy methods in the function construct with the graph mode.") + # If need trope the obj if resolve_ in convert_object_map: resolve_ = convert_object_map.get(resolve_) logger.debug("convert resolve = %r", resolve_) if resolve_ == NO_IMPLEMENT: - raise NotImplementedError(f"Not support for `{symbol}`") + raise NotImplementedError(f"Not support for `{symbol}`.") except Exception as e: if isinstance(e, NotImplementedError): raise e diff --git a/mindspore/_extends/parse/standard_method.py b/mindspore/_extends/parse/standard_method.py index 40e13001493..efd29dfc760 100644 --- a/mindspore/_extends/parse/standard_method.py +++ b/mindspore/_extends/parse/standard_method.py @@ -1312,7 +1312,8 @@ def sum(x, axis=None, dtype=None, keepdims=False, initial=None): # pylint: disab >>> print(input_x.sum(axis=1)) [10. 35.] """ - dtype = x.dtype if dtype is None else dtype + input_x = x.astype(mstype.int32) if x.dtype == mstype.bool_ else x + dtype = input_x.dtype if dtype is None else dtype if not isinstance(keepdims, int): const_utils.raise_type_error("integer argument expected") if initial is not None and not isinstance(initial, (int, float, bool)): @@ -1322,14 +1323,14 @@ def sum(x, axis=None, dtype=None, keepdims=False, initial=None): # pylint: disab else: axis = check_and_canonicalize_axes(axis, x.ndim) - if x.dtype == mstype.bool_: - x = x.astype("int32") + if not check_type_support(input_x.dtype, 'GPU', (mstype.float64, mstype.float32, mstype.float16)): + input_x = input_x.astype(mstype.float32) if 0 in x.shape: x = const_utils.make_tensor([0], x.dtype) if keepdims: - res = _reduce_sum_keepdims(x, axis) + res = _reduce_sum_keepdims(input_x, axis) else: - res = _reduce_sum_default(x, axis) + res = _reduce_sum_default(input_x, axis) if initial is not None: res += initial return res.astype(dtype) @@ -1648,6 +1649,7 @@ get_log2_size = constexpr(validator.get_log2_size) check_axis_type = constexpr(validator.check_axis_type) check_and_canonicalize_axes = constexpr(validator.check_and_canonicalize_axes) empty_compile = constexpr(validator.empty_compile) +check_type_support = constexpr(validator.check_type_support) def tensor_bool(x): diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt index 7027396063c..444d08a5edd 100644 --- a/mindspore/ccsrc/CMakeLists.txt +++ b/mindspore/ccsrc/CMakeLists.txt @@ -325,7 +325,7 @@ endif() set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) set_property(SOURCE "pipeline/jit/init.cc" PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_PIPELINE) -pybind11_add_module(_c_expression "pipeline/jit/init.cc") +pybind11_add_module(_c_expression NO_EXTRAS "pipeline/jit/init.cc") MESSAGE(STATUS "operation system is ${CMAKE_SYSTEM}") if(CMAKE_SYSTEM_NAME MATCHES "Linux") diff --git a/mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt b/mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt index 5622013fa27..954402e5c9e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt +++ b/mindspore/ccsrc/backend/kernel_compiler/CMakeLists.txt @@ -35,6 +35,7 @@ if(ENABLE_CPU) "cpu/fl/*.cc" "cpu/ps/*.cc" "cpu/quantum/*.cc" + "cpu/pyfunc/*.cc" ) if(NOT ENABLE_MPI) diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc index 8b047f153a0..500be4de4ad 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.cc @@ -16,6 +16,11 @@ #include "backend/kernel_compiler/akg/akg_kernel_build.h" +#include +#include +#include +#include + #include #include #include @@ -23,6 +28,7 @@ #include #include #include +#include #include "nlohmann/json.hpp" #include "ir/dtype.h" #include "ir/func_graph.h" @@ -34,9 +40,320 @@ namespace mindspore { namespace kernel { + +#define INIT_SET_FROM_2D_ARRAY(set_var, list_idx) \ + std::set set_var(kernel_lists_[list_idx], kernel_lists_[list_idx] + kernel_lists_[list_idx][kMaxKernelNum_]); + +#define LIST_BEGIN(list_idx) kernel_lists_[list_idx] +#define LIST_END(list_idx) (kernel_lists_[list_idx] + kernel_lists_[list_idx][kMaxKernelNum_]) +#define RESET_LIST_SIZE(list_idx, val) kernel_lists_[list_idx][kMaxKernelNum_] = val + +#define INCREASE_LIST_SIZE(list_idx, val) kernel_lists_[list_idx][kMaxKernelNum_] += val + constexpr int32_t PROCESS_NUM = 16; constexpr int32_t TIME_OUT = 300; +bool AkgKernelPool::LockMng::TryLock() { + // Try to lock 100 times. Return errno if lock unsuccessfully + uint32_t trial = 100; + + int32_t ret = -1; + while (trial > 0) { + ret = lockf(fd_, F_TLOCK, 0); + if (ret == 0 || (errno != EACCES && errno != EAGAIN)) { + break; + } + + trial--; + usleep(5000); + } + + if (ret == -1) { + MS_LOG(ERROR) << "Failed to acquire the lock, errno:" << strerror(errno) << "."; + return false; + } + + return true; +} + +void AkgKernelPool::LockMng::Unlock() { + auto ret = lockf(fd_, F_ULOCK, 0); + if (ret == -1) { + MS_LOG(ERROR) << "Failed to release the lock, errno:" << strerror(errno); + } +} + +std::string AkgKernelPool::GetCurrentPath() { + char cwd[PATH_MAX]; + char *ret = getcwd(cwd, sizeof(cwd)); + if (ret == nullptr) { + MS_LOG(ERROR) << "Get current work directory failed, errno:" << strerror(errno); + return ""; + } + + char abspath[PATH_MAX]; + char *res = realpath(cwd, abspath); + if (res == nullptr) { + MS_LOG(ERROR) << "Change to realpath failed, errno:" << strerror(errno); + return ""; + } + + return std::string(abspath); +} + +void *AkgKernelPool::CreateSharedMem(const std::string &path) { + is_creator_ = false; + + auto hash_id = std::hash()(path); + auto key_id = static_cast(hash_id); + auto mem_size = sizeof(size_t) * kListNum_ * (kMaxKernelNum_ + 1) + 512; + + { + LockMng lock(fd_); + if (!lock.locked_) { + MS_LOG(ERROR) << "Failed to acquire lock."; + return nullptr; + } + + // check if the shared memory exists or not. + // remove shared memory if exists and the nattach is 0 + struct shmid_ds buf; + auto id = shmget(key_id, mem_size, 0); + if (id != -1) { + auto ret = shmctl(id, IPC_STAT, &buf); + if (ret == -1) { + MS_LOG(ERROR) << "Failed to get the info of shared memory, errno:" << strerror(errno); + return nullptr; + } + + if (buf.shm_nattch == 0) { + ret = shmctl(id, IPC_RMID, nullptr); + if (ret < 0) { + MS_LOG(EXCEPTION) << "Realse shared_mem failed, errno:" << strerror(errno); + } + } + } + } + + LockMng lock(fd_); + if (!lock.locked_) { + MS_LOG(ERROR) << "Failed to acquire lock."; + return nullptr; + } + + shm_id_ = shmget(key_id, mem_size, IPC_CREAT | IPC_EXCL | 0600); + if (shm_id_ == -1) { + if (errno == EEXIST) { + shm_id_ = shmget(key_id, mem_size, 0); + } + + if (shm_id_ == -1) { + MS_LOG(ERROR) << "Create shared_mem failed, error no:" << strerror(errno); + return nullptr; + } + } else { + is_creator_ = true; + } + + auto local_addr = shmat(shm_id_, nullptr, 0); + if (local_addr == reinterpret_cast(-1)) { + MS_LOG(ERROR) << "Attach to shared_mem failed, error no:" << strerror(errno); + return nullptr; + } + + if (is_creator_) { + (void)memset(local_addr, 0, mem_size); + } + + return local_addr; +} + +int32_t AkgKernelPool::Init(const std::vector &build_args) { + auto cp = GetCurrentPath(); + if (cp.empty()) { + return -1; + } + + fd_ = open(kKeyName_, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR); + if (fd_ == -1) { + MS_LOG(ERROR) << "open file <" << kKeyName_ << "> failed, errno:" << strerror(errno); + return -1; + } + + auto addr = CreateSharedMem(cp); + if (addr == nullptr) { + return -1; + } + + InitKernelLists(addr); + + auto ret = AddKernels(build_args); + if (ret != 0) { + MS_LOG(ERROR) << "AkgKernelPool AddKernels failed."; + return false; + } + + return 0; +} + +AkgKernelPool::~AkgKernelPool() { + // Detach shared memory + auto ret = shmdt(reinterpret_cast(kernel_lists_[0])); + if (ret < 0) { + MS_LOG(EXCEPTION) << "Shared_mem detach failed, errno:" << strerror(errno); + } + + // Realse shared_memroy + if (is_creator_) { + ret = shmctl(shm_id_, IPC_RMID, nullptr); + if (ret < 0) { + MS_LOG(EXCEPTION) << "Realse shared_mem failed, errno:" << strerror(errno); + } + } + + // Close key file + if (fd_ != -1) { + (void)close(fd_); + } +} + +int32_t AkgKernelPool::AddKernels(const std::vector &build_args) { + LockMng lock(fd_); + if (!lock.locked_) { + MS_LOG(ERROR) << "Failed to acquire lock."; + return -1; + } + + INIT_SET_FROM_2D_ARRAY(todo_list, kToDoIdx_); + INIT_SET_FROM_2D_ARRAY(doing_list, kDoingIdx_); + INIT_SET_FROM_2D_ARRAY(done_list, kDoneIdx_); + + for (const auto &[json_generator, anf_node] : build_args) { + MS_EXCEPTION_IF_NULL(anf_node); + auto kernel_name = json_generator.kernel_name(); + + auto hash_id = std::hash()(kernel_name); + if (self_kernel_ids_.count(hash_id) != 0) { + MS_LOG(ERROR) << "Duplicated hash_id in list."; + return -1; + } + + self_kernel_ids_.emplace(hash_id); + } + + std::set diff_from_todo; + std::set diff_from_doing; + std::set diff_from_done; + + // add the unique kernel only once, so need to check if it exists in todo_list, doing_list, or done_list + std::set_difference(self_kernel_ids_.begin(), self_kernel_ids_.end(), todo_list.begin(), todo_list.end(), + std::inserter(diff_from_todo, diff_from_todo.begin())); + std::set_difference(diff_from_todo.begin(), diff_from_todo.end(), doing_list.begin(), doing_list.end(), + std::inserter(diff_from_doing, diff_from_doing.begin())); + std::set_difference(diff_from_doing.begin(), diff_from_doing.end(), done_list.begin(), done_list.end(), + std::inserter(diff_from_done, diff_from_done.begin())); + + auto new_kernel_size = diff_from_done.size(); + if (new_kernel_size + todo_list.size() > static_cast(kMaxKernelNum_)) { + MS_LOG(ERROR) << "The size of kernels is " << new_kernel_size << ", while the left space of the pool is " + << kMaxKernelNum_ - todo_list.size(); + return -1; + } + + std::copy(diff_from_done.begin(), diff_from_done.end(), LIST_END(kToDoIdx_)); + INCREASE_LIST_SIZE(kToDoIdx_, new_kernel_size); + + return 0; +} + +int32_t AkgKernelPool::FetchKernels(std::set *out) { + LockMng lock(fd_); + if (!lock.locked_) { + MS_LOG(ERROR) << "Failed to acquire lock."; + return -1; + } + + std::set left_in_todo_list; + + // filter out kernels which belongs to other processes + auto FilterBySelfList = [&left_in_todo_list, &out, this](size_t id) { + if (this->self_kernel_ids_.count(id) != 0) { + out->emplace(id); + } else { + left_in_todo_list.emplace(id); + } + }; + + std::for_each(LIST_BEGIN(kToDoIdx_), LIST_END(kToDoIdx_), FilterBySelfList); + + std::copy(out->begin(), out->end(), LIST_END(kDoingIdx_)); + INCREASE_LIST_SIZE(kDoingIdx_, out->size()); + + std::copy(left_in_todo_list.begin(), left_in_todo_list.end(), LIST_BEGIN(kToDoIdx_)); + RESET_LIST_SIZE(kToDoIdx_, left_in_todo_list.size()); + + return 0; +} + +int32_t AkgKernelPool::UpdateAndWait(const std::set &ids) { + if (!ids.empty()) { + LockMng lock(fd_); + if (!lock.locked_) { + MS_LOG(ERROR) << "Failed to acquire lock."; + return -1; + } + + // update the state of finished kernels to `done` + std::copy(ids.begin(), ids.end(), LIST_END(kDoneIdx_)); + INCREASE_LIST_SIZE(kDoneIdx_, ids.size()); + + // delete the finished kernels from doing_list + std::vector left_in_doing_list; + INIT_SET_FROM_2D_ARRAY(doing_list, kDoingIdx_); + std::set_difference(doing_list.begin(), doing_list.end(), ids.begin(), ids.end(), + std::inserter(left_in_doing_list, left_in_doing_list.begin())); + + std::copy(left_in_doing_list.begin(), left_in_doing_list.end(), LIST_BEGIN(kDoingIdx_)); + RESET_LIST_SIZE(kDoingIdx_, left_in_doing_list.size()); + } + + auto ret = Wait(); + if (ret != 0) { + MS_LOG(ERROR) << "AkgKernelPool Wait failed."; + return -1; + } + + return 0; +} + +int32_t AkgKernelPool::Wait() { + // wait until all the kernels which belong to this process finish compiling + uint32_t trials = 1000; + + while (trials > 0) { + { + LockMng lock(fd_); + if (!lock.locked_) { + MS_LOG(ERROR) << "Failed to acquire lock."; + return -1; + } + + INIT_SET_FROM_2D_ARRAY(done_list, kDoneIdx_); + + if (std::all_of(self_kernel_ids_.begin(), self_kernel_ids_.end(), + [&done_list](size_t id) { return done_list.count(id) != 0; })) { + return 0; + } + } + + usleep(1000000); + trials--; + } + + MS_LOG(ERROR) << "Time out while wait kernel compiling"; + return -1; +} + std::vector AkgKernelBuilder::GetNotCachedKernelJsons(const std::vector &build_args) { // Remove cached nodes, gether unique nodes, and collect repeated nodes which need postprecess. std::vector jsons; @@ -66,6 +383,31 @@ std::vector AkgKernelBuilder::GetNotCachedKernelJsons(const std::ve return jsons; } +std::vector AkgKernelBuilder::GetNotCachedKernels(const std::vector &build_args) { + std::unordered_set kernel_name_set; + std::vector new_build_args; + for (const auto &[json_generator, anf_node] : build_args) { + MS_EXCEPTION_IF_NULL(anf_node); + auto kernel_name = json_generator.kernel_name(); + + auto cached_kernel_pack = AkgSearchCache(kernel_name); + if (cached_kernel_pack != nullptr) { + MS_LOG(DEBUG) << "Use cached kernel, kernel_name[" << kernel_name << "], fullname_with_scope[" + << anf_node->fullname_with_scope() << "]."; + AkgSetKernelMod(cached_kernel_pack, json_generator, anf_node); + continue; + } + + if (kernel_name_set.count(kernel_name) != 0) { + repeat_nodes_.push_back({json_generator, anf_node}); + continue; + } + kernel_name_set.insert(kernel_name); + new_build_args.push_back({json_generator, anf_node}); + } + return new_build_args; +} + bool AkgKernelBuilder::InsertToCache(const std::vector &build_args) { for (const auto &[json_generator, anf_node] : build_args) { auto kernel_name = json_generator.kernel_name(); @@ -97,32 +439,77 @@ bool AkgKernelBuilder::HandleRepeatNodes() { return true; } +std::vector AkgKernelBuilder::GetKernelJsonsByHashId(const std::vector &build_args, + std::set fetched_ids) { + std::vector jsons; + for (const auto &[json_generator, anf_node] : build_args) { + MS_EXCEPTION_IF_NULL(anf_node); + auto kernel_name = json_generator.kernel_name(); + + auto hash_id = std::hash()(kernel_name); + + if (fetched_ids.count(hash_id) == 0) { + continue; + } + + auto kernel_json = json_generator.kernel_json_str(); + AkgSaveJsonInfo(kernel_name, kernel_json); + jsons.push_back(kernel_json); + } + return jsons; +} + bool AkgKernelBuilder::AkgOpParallelBuild(const std::vector &build_args) { repeat_nodes_.clear(); - auto jsons = GetNotCachedKernelJsons(build_args); - if (jsons.empty()) { + auto new_build_args = GetNotCachedKernels(build_args); + if (new_build_args.empty()) { return true; } - auto client = GetClient(); - MS_EXCEPTION_IF_NULL(client); - if (!client->AkgStart(PROCESS_NUM, TIME_OUT)) { - MS_LOG(ERROR) << "Akg start failed."; + AkgKernelPool kp; + auto ret = kp.Init(new_build_args); + if (ret != 0) { + MS_LOG(ERROR) << "AkgKernelPool init failed."; return false; } - auto attrs = CollectBuildAttrs(); - if (!attrs.empty() && !client->AkgSendAttr(attrs)) { - MS_LOG(ERROR) << "Akg send attr failed."; + + std::set fetched_ids; + ret = kp.FetchKernels(&fetched_ids); + if (ret != 0) { + MS_LOG(ERROR) << "AkgKernelPool FetchKernels failed."; return false; } - if (!client->AkgSendData(jsons)) { - MS_LOG(ERROR) << "Akg send data failed."; - return false; - } - if (!client->AkgWait()) { - MS_LOG(ERROR) << "Akg compile failed."; + + if (!fetched_ids.empty()) { + auto jsons = GetKernelJsonsByHashId(new_build_args, fetched_ids); + + auto client = GetClient(); + MS_EXCEPTION_IF_NULL(client); + if (!client->AkgStart(PROCESS_NUM, TIME_OUT)) { + MS_LOG(ERROR) << "Akg start failed."; + return false; + } + auto attrs = CollectBuildAttrs(); + if (!attrs.empty() && !client->AkgSendAttr(attrs)) { + MS_LOG(ERROR) << "Akg send attr failed."; + return false; + } + if (!client->AkgSendData(jsons)) { + MS_LOG(ERROR) << "Akg send data failed."; + return false; + } + if (!client->AkgWait()) { + MS_LOG(ERROR) << "Akg compile failed."; + return false; + } + } + + ret = kp.UpdateAndWait(fetched_ids); + if (ret != 0) { + MS_LOG(ERROR) << "AkgKernelPool UpdateAndWait failed."; return false; } + // All unique done here, cache them and set kernel. if (!InsertToCache(build_args)) { MS_LOG(ERROR) << "Insert cache failed."; diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h index c0012ece6ff..9f9958f1464 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h +++ b/mindspore/ccsrc/backend/kernel_compiler/akg/akg_kernel_build.h @@ -17,10 +17,13 @@ #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_AKG_KERNEL_BUILD_H_ #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_AKG_KERNEL_BUILD_H_ +#include + #include #include #include #include +#include #include "ir/anf.h" #include "backend/kernel_compiler/kernel.h" #include "backend/session/kernel_build_client.h" @@ -45,12 +48,84 @@ class AkgKernelBuilder { private: std::vector GetNotCachedKernelJsons(const std::vector &build_args); + std::vector GetNotCachedKernels(const std::vector &build_args); + std::vector GetKernelJsonsByHashId(const std::vector &build_args, + std::set fetched_ids); bool InsertToCache(const std::vector &build_args); bool HandleRepeatNodes(); bool AkgOpParallelBuild(const std::vector &build_args); std::vector repeat_nodes_; std::string CollectBuildAttrs(); }; + +class AkgKernelPool { + public: + class LockMng { + public: + explicit LockMng(int32_t fd) { + fd_ = fd; + locked_ = TryLock(); + } + + virtual ~LockMng() { + if (locked_) { + Unlock(); + } + } + + bool locked_{false}; + + private: + bool TryLock(); + void Unlock(); + + int32_t fd_{-1}; + }; + + public: + AkgKernelPool() = default; + virtual ~AkgKernelPool(); + + int32_t Init(const std::vector &build_args); + int32_t FetchKernels(std::set *out); + int32_t UpdateAndWait(const std::set &ids); + + constexpr inline static size_t kMaxKernelNum_{1000}; + constexpr inline static key_t kSharedMemKey_{0x57565845}; + + // allocate memory for todo_list, doing_list, done_list + constexpr inline static size_t kListNum_{3}; + + constexpr inline static auto kKeyName_ = "./akg_build_tmp.key"; + + constexpr inline static int32_t kToDoIdx_ = 0; + constexpr inline static int32_t kDoingIdx_ = 1; + constexpr inline static int32_t kDoneIdx_ = 2; + + private: + void *CreateSharedMem(const std::string &path); + std::string GetCurrentPath(); + + inline void InitKernelLists(void *addr) { + kernel_lists_[kToDoIdx_] = reinterpret_cast(addr); + kernel_lists_[kDoingIdx_] = kernel_lists_[kToDoIdx_] + kMaxKernelNum_ + 1; + kernel_lists_[kDoneIdx_] = kernel_lists_[kDoingIdx_] + kMaxKernelNum_ + 1; + } + + int32_t AddKernels(const std::vector &kernel_jsons); + int32_t Wait(); + + int32_t shm_id_{-1}; + bool is_creator_{false}; + int32_t fd_{-1}; + + // includes 3 lists: todo_list, doing_list, done_list. + // each list has kMaxKernelNum_ + 1 elements and, the count of elements in each list + // is stored in kernel_lists_[xx][kMaxKernelNum_] + size_t *kernel_lists_[kListNum_]{nullptr, nullptr, nullptr}; + + std::set self_kernel_ids_; +}; } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.cc index 34641fc481e..4f0b619848c 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_build.cc @@ -44,8 +44,10 @@ KernelPackPtr AkgAscendKernelBuilder::AkgInsertCache(const std::string &kernel_n void AkgAscendKernelBuilder::AkgSetKernelMod(const KernelPackPtr &kernel_pack, const AkgKernelJsonGenerator &json_generator, const AnfNodePtr &anf_node) { auto kernel_mod_ptr = std::make_shared(kernel_pack); + auto kernel_json_info = kernel_pack->kernel_json_info(); kernel_mod_ptr->SetInputSizeList(json_generator.input_size_list()); kernel_mod_ptr->SetOutputSizeList(json_generator.output_size_list()); + kernel_mod_ptr->SetWorkspaceSizeList(kernel_json_info.workspaces); AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get()); } diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.cc index 4761f359ae5..856106fec7b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/akg/ascend/akg_ascend_kernel_mod.cc @@ -49,7 +49,7 @@ const std::vector &AkgKernelMod::GetOutputSizeList() const { return outp const std::vector &AkgKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; } -bool AkgKernelMod::Launch(const std::vector &inputs, const std::vector &, +bool AkgKernelMod::Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs, void *stream_ptr) { if (stream_ptr == nullptr) { MS_LOG(ERROR) << "stream_ptr should not be nullptr."; @@ -74,6 +74,10 @@ bool AkgKernelMod::Launch(const std::vector &inputs, const std::vect [](const AddressPtr &input) -> void * { return input->addr; }); (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtime_args), [](const AddressPtr &output) -> void * { return output->addr; }); + if (!workspace.empty()) { + (void)std::transform(std::begin(workspace), std::end(workspace), std::back_inserter(runtime_args), + [](const AddressPtr &addr) -> void * { return addr->addr; }); + } rtL2Ctrl_t *l2ctrl = nullptr; auto stream = static_cast(stream_ptr); @@ -86,7 +90,8 @@ bool AkgKernelMod::Launch(const std::vector &inputs, const std::vect return true; } -std::vector AkgKernelMod::GenTask(const std::vector &inputs, const std::vector &, +std::vector AkgKernelMod::GenTask(const std::vector &inputs, + const std::vector &workspace, const std::vector &outputs, uint32_t stream_id) { if (kernel_pack_ == nullptr) { MS_LOG(EXCEPTION) << "kernel pack should not be nullptr."; @@ -107,6 +112,10 @@ std::vector AkgKernelMod::GenTask(const std::vector &in [](const AddressPtr &input) -> void * { return input->addr; }); (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(output_data_addrs), [](const AddressPtr &output) -> void * { return output->addr; }); + if (!workspace.empty()) { + (void)std::transform(std::begin(workspace), std::end(workspace), std::back_inserter(workspace_addrs), + [](const AddressPtr &workspace) -> void * { return workspace->addr; }); + } uint32_t block_dim = DEFAULT_BLOCK_DIM; // default blockdim equal to 1. auto func_stub = KernelManager::GenFuncStub(*kernel_pack_, false, &block_dim); diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.cc index d39e75e2917..47d5c0f31ba 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_build.cc @@ -39,8 +39,10 @@ KernelPackPtr AkgGpuKernelBuilder::AkgInsertCache(const std::string &kernel_name void AkgGpuKernelBuilder::AkgSetKernelMod(const KernelPackPtr &kernel_pack, const AkgKernelJsonGenerator &json_generator, const AnfNodePtr &anf_node) { auto kernel_mod_ptr = std::make_shared(kernel_pack); + auto kernel_json_info = kernel_pack->kernel_json_info(); kernel_mod_ptr->SetInputSizeList(json_generator.input_size_list()); kernel_mod_ptr->SetOutputSizeList(json_generator.output_size_list()); + kernel_mod_ptr->SetWorkspaceSizeList(kernel_json_info.workspaces); AnfAlgo::SetKernelMod(kernel_mod_ptr, anf_node.get()); } diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.cc b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.cc index 3cdb095ab41..0971bdcf42b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.cc @@ -92,13 +92,15 @@ void GpuKernelMod::SetInputSizeList(const std::vector &size_list) { inpu void GpuKernelMod::SetOutputSizeList(const std::vector &size_list) { output_size_list_ = size_list; } +void GpuKernelMod::SetWorkspaceSizeList(const std::vector &size_list) { workspace_size_list_ = size_list; } + const std::vector &GpuKernelMod::GetInputSizeList() const { return input_size_list_; } const std::vector &GpuKernelMod::GetOutputSizeList() const { return output_size_list_; } const std::vector &GpuKernelMod::GetWorkspaceSizeList() const { return workspace_size_list_; } -bool GpuKernelMod::Launch(const std::vector &inputs, const std::vector &, +bool GpuKernelMod::Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs, void *stream_ptr) { if (stream_ptr == 0) { MS_LOG(ERROR) << "stream_ptr should not be nullptr."; @@ -122,6 +124,10 @@ bool GpuKernelMod::Launch(const std::vector &inputs, const std::vect [](const AddressPtr &input) -> void * { return reinterpret_cast(&(input->addr)); }); (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtimeargs), [](const AddressPtr &output) -> void * { return reinterpret_cast(&(output->addr)); }); + if (!workspace.empty()) { + (void)std::transform(std::begin(workspace), std::end(workspace), std::back_inserter(runtimeargs), + [](const AddressPtr &addr) -> void * { return addr->addr; }); + } result = cuLaunchKernel(kernel_addr, thread_info[0], thread_info[1], thread_info[2], thread_info[3], thread_info[4], thread_info[5], 0, reinterpret_cast(stream_ptr), reinterpret_cast(&runtimeargs[0]), 0); diff --git a/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h index b87d223f7f3..5e9d17acfd1 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h +++ b/mindspore/ccsrc/backend/kernel_compiler/akg/gpu/akg_gpu_kernel_mod.h @@ -60,6 +60,7 @@ class GpuKernelMod : public KernelMod { void SetInputSizeList(const std::vector &size_list); void SetOutputSizeList(const std::vector &size_list); + void SetWorkspaceSizeList(const std::vector &size_list); const std::vector &GetInputSizeList() const override; const std::vector &GetOutputSizeList() const override; const std::vector &GetWorkspaceSizeList() const override; diff --git a/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc b/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc index edc94673083..b9124449dd8 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/common_utils.cc @@ -141,14 +141,8 @@ FusionType GetFusionTypeByName(const std::string &name) { return iter->first; } -void KernelMeta::Initialize(int pid) { - if (pid == -1) { - kernel_meta_path_ = std::string(kGpuKernelMeta) + "_" + std::to_string(getpid()) + "/"; - } else { - kernel_meta_path_ = std::string(kGpuKernelMeta) + "_" + std::to_string(pid) + "/"; - } - // remove old kernel cache - RemoveKernelCache(); +void KernelMeta::Initialize() { + kernel_meta_path_ = std::string(kGpuKernelMeta) + "/"; #if defined(_WIN32) || defined(_WIN64) auto ret = mkdir(kernel_meta_path_.c_str()); @@ -161,21 +155,6 @@ void KernelMeta::Initialize(int pid) { initialized_ = true; } -void KernelMeta::RemoveKernelCache() { - DIR *dir = opendir(kernel_meta_path_.c_str()); - if (dir == nullptr) { - return; - } - struct dirent *entry; - while ((entry = readdir(dir)) != nullptr) { - std::string kernel_file = entry->d_name; - std::string kernel_file_realpath = kernel_meta_path_ + kernel_file; - (void)remove(kernel_file_realpath.c_str()); - } - (void)closedir(dir); - (void)rmdir(kernel_meta_path_.c_str()); -} - std::string KernelMeta::Search(const std::string &kernel_name) const { if (!initialized_) { return ""; @@ -227,7 +206,7 @@ KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &pro KernelPackPtr kernel_pack = std::make_shared(); // just a tmp solution. if (!kernel_pack->ReadFromJsonFile(kernel_json, processor)) { - MS_LOG(DEBUG) << "Read cache json and bin file failed[" << kernel_json << "]."; + MS_LOG(ERROR) << "Read cache json and bin file failed[" << kernel_json << "]."; return nullptr; } else { return kernel_pack; @@ -250,7 +229,7 @@ KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &pro (void)kernel_json.append(kernel_name).append(kJsonSuffix); KernelPackPtr kernel_pack = std::make_shared(); if (!kernel_pack->ReadFromJsonFile(kernel_json, processor)) { - MS_LOG(DEBUG) << "Read json and bin file failed[" << kernel_json << "]."; + MS_LOG(ERROR) << "Read json and bin file failed[" << kernel_json << "]."; return nullptr; } @@ -714,6 +693,9 @@ void GetFuncGraphOutputNodes(const FuncGraphPtr &func_graph, std::vectorinputs().size(); ++input_idx) { auto input_node = cnode->input(input_idx); MS_EXCEPTION_IF_NULL(input_node); + if (input_node->isa() && AnfAlgo::GetInputTensorNum(input_node) == 0) { + continue; + } output_list->push_back(AnfAlgo::VisitKernel(input_node, 0).first); } } else { diff --git a/mindspore/ccsrc/backend/kernel_compiler/common_utils.h b/mindspore/ccsrc/backend/kernel_compiler/common_utils.h index 9c50ea0213f..507517954bd 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/common_utils.h +++ b/mindspore/ccsrc/backend/kernel_compiler/common_utils.h @@ -55,8 +55,7 @@ using KernelMetaPtr = std::shared_ptr; class KernelMeta { public: KernelMeta() = default; - void Initialize(int pid); - void RemoveKernelCache(); + void Initialize(); std::string Search(const std::string &kernel_name) const; bool Insert(const std::string &kernel_name, const std::string &kernel_json); std::string kernel_meta_path() const { return kernel_meta_path_; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.cc index ae3182d97f7..2bdbc7fcc26 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.cc @@ -26,46 +26,26 @@ namespace mindspore { namespace kernel { constexpr size_t kSizeFloat16 = sizeof(float16); constexpr size_t kSizeFloat32 = sizeof(float); +constexpr size_t kScalarIndex = 0; constexpr size_t kAdamWeightDecayInputSize = 9; constexpr size_t kAdamWeightDecayOutputSize = 3; -void AdamWeightDecayCPUKernel::ParallelForAdam(const CTask &task, size_t count) { - auto max_thread_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum(); - const float block_size = 128.0; - const float align_size = 16.0; - size_t thread_num = count < block_size * max_thread_num ? std::ceil(count / block_size) : max_thread_num; - std::vector tasks; - size_t start = 0; - size_t once_compute_size = align_size * std::ceil(count / (align_size * thread_num)); - while (start < count) { - size_t end = (start + once_compute_size) > count ? count : (start + once_compute_size); - auto block = [&, start, end]() { - task(start, end); - return common::SUCCESS; - }; - tasks.emplace_back(block); - start += once_compute_size; - } - common::ThreadPool::GetInstance().SyncRun(tasks); -} - template -void AdamWeightDecayCPUKernel::LaunchFusedAdam(const std::vector &inputs, - const std::vector &outputs) { - auto var = reinterpret_cast(inputs[0]->addr); - auto m = reinterpret_cast(inputs[1]->addr); - auto v = reinterpret_cast(inputs[2]->addr); - auto lr = reinterpret_cast(inputs[3]->addr)[0]; - auto beta1 = reinterpret_cast(inputs[4]->addr)[0]; - auto beta2 = reinterpret_cast(inputs[5]->addr)[0]; - auto epsilon = reinterpret_cast(inputs[6]->addr)[0]; - auto decay = reinterpret_cast(inputs[7]->addr)[0]; - auto gradient16 = reinterpret_cast(inputs[8]->addr); +void AdamWeightDecayCPUKernel::LaunchFusedAdam(const std::vector &inputs, const std::vector &) { + auto var = reinterpret_cast(inputs[VAR]->addr); + auto m = reinterpret_cast(inputs[M]->addr); + auto v = reinterpret_cast(inputs[V]->addr); + auto lr = reinterpret_cast(inputs[LR]->addr)[kScalarIndex]; + auto beta1 = reinterpret_cast(inputs[BETA1]->addr)[kScalarIndex]; + auto beta2 = reinterpret_cast(inputs[BETA2]->addr)[kScalarIndex]; + auto epsilon = reinterpret_cast(inputs[EPSILON]->addr)[kScalarIndex]; + auto decay = reinterpret_cast(inputs[DECAY]->addr)[kScalarIndex]; + auto gradient16 = reinterpret_cast(inputs[GRAD]->addr); const auto beta1_minus = 1 - beta1; const auto beta2_minus = 1 - beta2; // multithreading - size_t lens = inputs[0]->size > 0 ? static_cast(inputs[0]->size / sizeof(float)) : 1; + size_t lens = inputs[VAR]->size > 0 ? static_cast(inputs[VAR]->size / sizeof(float)) : 1; std::function task; task = [&](size_t start, size_t end) { @@ -81,28 +61,27 @@ void AdamWeightDecayCPUKernel::LaunchFusedAdam(const std::vector &in var[i] -= lr * update; } }; - ParallelForAdam(task, lens); + CPUKernelUtils::ParallelFor(task, lens); } template void AdamWeightDecayCPUKernel::LaunchAdamWeightDecay(const std::vector &inputs, - const std::vector &outputs) { - auto var = reinterpret_cast(inputs[0]->addr); - auto m = reinterpret_cast(inputs[1]->addr); - auto v = reinterpret_cast(inputs[2]->addr); - auto lr = reinterpret_cast(inputs[3]->addr)[0]; - auto beta1 = reinterpret_cast(inputs[4]->addr)[0]; - auto beta2 = reinterpret_cast(inputs[5]->addr)[0]; - auto epsilon = reinterpret_cast(inputs[6]->addr)[0]; - auto decay = reinterpret_cast(inputs[7]->addr)[0]; - auto gradient = reinterpret_cast(inputs[8]->addr); + const std::vector &) { + auto var = reinterpret_cast(inputs[VAR]->addr); + auto m = reinterpret_cast(inputs[M]->addr); + auto v = reinterpret_cast(inputs[V]->addr); + auto lr = reinterpret_cast(inputs[LR]->addr)[kScalarIndex]; + auto beta1 = reinterpret_cast(inputs[BETA1]->addr)[kScalarIndex]; + auto beta2 = reinterpret_cast(inputs[BETA2]->addr)[kScalarIndex]; + auto epsilon = reinterpret_cast(inputs[EPSILON]->addr)[kScalarIndex]; + auto decay = reinterpret_cast(inputs[DECAY]->addr)[kScalarIndex]; + auto gradient = reinterpret_cast(inputs[GRAD]->addr); const auto beta1_minus = 1 - beta1; const auto beta2_minus = 1 - beta2; // multithreading - size_t lens = inputs[0]->size > 0 ? static_cast(inputs[0]->size / sizeof(float)) : 1; + size_t lens = inputs[VAR]->size > 0 ? static_cast(inputs[VAR]->size / sizeof(float)) : 1; std::function task; - task = [&](size_t start, size_t end) { size_t i = AdamWeightDecayFp32(var, m, v, lr, beta1, beta2, epsilon, decay, gradient, start, end); // remaining @@ -114,14 +93,14 @@ void AdamWeightDecayCPUKernel::LaunchAdamWeightDecay(const std::vector var_shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0); - dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); - gradient_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 8); + std::vector var_shape = AnfAlgo::GetInputDeviceShape(kernel_node, VAR); + dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, VAR); + gradient_dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, GRAD); size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != kAdamWeightDecayInputSize) { MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but AdamWeightDecay needs 9 inputs."; @@ -155,12 +134,12 @@ void AdamWeightDecayCPUKernel::CheckParam(const std::vector } size_t elem1_size = elem_num_ * kSizeFloat32; size_t elem2_size = gradient_dtype_ == kNumberTypeFloat16 ? elem_num_ * kSizeFloat16 : elem1_size; - if (inputs[0]->size != elem1_size || inputs[1]->size != elem1_size || inputs[2]->size != elem1_size || - inputs[8]->size != elem2_size) { + if (inputs[VAR]->size != elem1_size || inputs[M]->size != elem1_size || inputs[V]->size != elem1_size || + inputs[GRAD]->size != elem2_size) { MS_LOG(EXCEPTION) << "Error input data size!"; } - if (inputs[3]->size != kSizeFloat32 || inputs[4]->size != kSizeFloat32 || inputs[5]->size != kSizeFloat32 || - inputs[6]->size != kSizeFloat32 || inputs[7]->size != kSizeFloat32) { + if (inputs[LR]->size != kSizeFloat32 || inputs[BETA1]->size != kSizeFloat32 || inputs[BETA2]->size != kSizeFloat32 || + inputs[EPSILON]->size != kSizeFloat32 || inputs[DECAY]->size != kSizeFloat32) { MS_LOG(EXCEPTION) << "The attribute beta, lr, epsilon and weight decay must be float!"; } } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.h index 34c56bed352..fe6f309e38e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/adam_weight_decay_cpu_kernel.h @@ -32,7 +32,6 @@ class AdamWeightDecayCPUKernel : public CPUKernel { const std::vector &outputs) override; private: - void ParallelForAdam(const CTask &task, size_t count); void CheckParam(const std::vector &inputs, const std::vector &outputs); template void LaunchFusedAdam(const std::vector &inputs, const std::vector &outputs); @@ -41,6 +40,7 @@ class AdamWeightDecayCPUKernel : public CPUKernel { size_t elem_num_{0}; TypeId dtype_{kTypeUnknown}; TypeId gradient_dtype_{kTypeUnknown}; + enum input_list_ { VAR, M, V, LR, BETA1, BETA2, EPSILON, DECAY, GRAD }; }; MS_REG_CPU_KERNEL(AdamWeightDecay, diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_adagrad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_adagrad_cpu_kernel.cc index 238b5c5e9a3..578eda21a66 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_adagrad_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/apply_adagrad_cpu_kernel.cc @@ -76,27 +76,10 @@ void ApplyAdagradCPUKernel::LaunchKernel(const std::vector &inputs, // multithreading size_t length = inputs[0]->size / sizeof(T); - size_t max_thread_num = std::thread::hardware_concurrency(); - size_t use_thread_num = length < 128 * max_thread_num ? std::ceil(length / 128.0) : max_thread_num; - std::vector threads; - threads.reserve(use_thread_num); - size_t start = 0; - const size_t batch_size = (length + use_thread_num - 1) / use_thread_num; - - if (batch_size == 0) { - MS_LOG(EXCEPTION) << "Error occur in launch kernel"; - return; - } - while (start < length) { - size_t end = (start + batch_size) > length ? length : (start + batch_size); - threads.emplace_back( - std::thread(&ApplyAdagradCPUKernel::LaunchApplyAdagrad, this, var, accum, lr, gradient, start, end)); - start += batch_size; - } - - for (auto &it : threads) { - it.join(); - } + auto task = [this, &var, &accum, lr, gradient](size_t start, size_t end) { + LaunchApplyAdagrad(var, accum, lr, gradient, start, end); + }; + CPUKernelUtils::ParallelForAutoSearch(task, length, ¶llel_search_info_); // Copy result to output tensor auto output_var = reinterpret_cast(outputs[0]->addr); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.cc index 85fdec3c565..5e85be5fe6c 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.cc @@ -13,10 +13,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +#include "backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.h" #include #include #include -#include "backend/kernel_compiler/cpu/arithmetic_logic_cpu_kernel.h" +#include #include "runtime/device/cpu/cpu_device_address.h" namespace mindspore { @@ -29,7 +31,9 @@ void ArithmeticLogicCPUKernel::Less(const T *input1, const T *input2, bool *o auto iter = base_iter; iter.SetPos(start); for (size_t i = start; i < end; i++) { - out[i] = input1[iter.GetInputPosA()] < input2[iter.GetInputPosB()]; + auto x = input1[iter.GetInputPosA()]; + auto y = input2[iter.GetInputPosB()]; + out[i] = std::less()(x, y); iter.GenNextPos(); } }; @@ -37,7 +41,9 @@ void ArithmeticLogicCPUKernel::Less(const T *input1, const T *input2, bool *o } else { base_iter.SetPos(0); for (size_t i = 0; i < output_size_; i++) { - out[i] = input1[base_iter.GetInputPosA()] < input2[base_iter.GetInputPosB()]; + auto x = input1[base_iter.GetInputPosA()]; + auto y = input2[base_iter.GetInputPosB()]; + out[i] = std::less()(x, y); base_iter.GenNextPos(); } } @@ -50,7 +56,9 @@ void ArithmeticLogicCPUKernel::Equal(const T *input1, const T *input2, bool * auto iter = base_iter; iter.SetPos(start); for (size_t i = start; i < end; i++) { - out[i] = input1[iter.GetInputPosA()] == input2[iter.GetInputPosB()]; + auto x = input1[iter.GetInputPosA()]; + auto y = input2[iter.GetInputPosB()]; + out[i] = std::equal_to()(x, y); iter.GenNextPos(); } }; @@ -64,7 +72,9 @@ void ArithmeticLogicCPUKernel::NotEqual(const T *input1, const T *input2, boo auto iter = base_iter; iter.SetPos(start); for (size_t i = start; i < end; i++) { - out[i] = input1[iter.GetInputPosA()] != input2[iter.GetInputPosB()]; + auto x = input1[iter.GetInputPosA()]; + auto y = input2[iter.GetInputPosB()]; + out[i] = std::not_equal_to()(x, y); iter.GenNextPos(); } }; @@ -106,7 +116,9 @@ void ArithmeticLogicCPUKernel::Greater(const T *input1, const T *input2, bool auto iter = base_iter; iter.SetPos(start); for (size_t i = start; i < end; i++) { - out[i] = input1[iter.GetInputPosA()] > input2[iter.GetInputPosB()]; + auto x = input1[iter.GetInputPosA()]; + auto y = input2[iter.GetInputPosB()]; + out[i] = std::greater()(x, y); iter.GenNextPos(); } }; @@ -120,7 +132,9 @@ void ArithmeticLogicCPUKernel::GreaterEqual(const T *input1, const T *input2, auto iter = base_iter; iter.SetPos(start); for (size_t i = start; i < end; i++) { - out[i] = input1[iter.GetInputPosA()] >= input2[iter.GetInputPosB()]; + auto x = input1[iter.GetInputPosA()]; + auto y = input2[iter.GetInputPosB()]; + out[i] = std::greater_equal()(x, y); iter.GenNextPos(); } }; @@ -134,7 +148,9 @@ void ArithmeticLogicCPUKernel::LessEqual(const T *input1, const T *input2, bo auto iter = base_iter; iter.SetPos(start); for (size_t i = start; i < end; i++) { - out[i] = input1[iter.GetInputPosA()] <= input2[iter.GetInputPosB()]; + auto x = input1[iter.GetInputPosA()]; + auto y = input2[iter.GetInputPosB()]; + out[i] = std::less_equal()(x, y); iter.GenNextPos(); } }; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h index 7241f6163cf..b85568f505e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc index 12ae560be86..eee6e6f4985 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc @@ -21,6 +21,7 @@ #include #include "runtime/device/kernel_info.h" +#include "runtime/device/cpu/kernel_select_cpu.h" namespace mindspore { namespace kernel { @@ -111,6 +112,11 @@ std::pair CPUKernelFactory::CPUKernelAttrCheck(const std::string & MS_LOG(INFO) << "Not registered CPU kernel: op[" << kernel_name << "]!"; return std::make_pair(false, 0); } + + if (device::cpu::IsDynamicParamKernel(kernel_name)) { + return std::make_pair(true, 0); + } + auto kernel_attrs = GetSupportedKernelAttrList(kernel_name); if (kernel_attrs[0].GetInputSize() == 0 && kernel_attrs[0].GetOutputSize() == 0) { auto op_info_ptr = mindspore::kernel::OpLib::FindOp(kernel_name, kernel::OpImplyType::kCPU); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.cc index 743fef0cdb0..2f458845f70 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.cc @@ -43,9 +43,9 @@ void DropoutGradCpuBwdKernel::InitKernel(const CNodePtr &kernel_node) { bool DropoutGradCpuBwdKernel::Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs) { if (dtype_ == kNumberTypeFloat16) { - DropoutBackwardKernel(inputs, outputs, num_count_, keep_prob_); + DropoutBackwardKernel(inputs, outputs, keep_prob_); } else if (dtype_ == kNumberTypeFloat32) { - DropoutBackwardKernel(inputs, outputs, num_count_, keep_prob_); + DropoutBackwardKernel(inputs, outputs, keep_prob_); } else { MS_LOG(ERROR) << "Input data type: " << dtype_ << " is not supported for DropoutGrad kernel for CPU."; } @@ -55,8 +55,7 @@ bool DropoutGradCpuBwdKernel::Launch(const std::vector &inputs, cons template void DropoutGradCpuBwdKernel::DropoutBackwardKernel(const std::vector &inputs, - const std::vector &outputs, size_t num_count, - float keep_prob) { + const std::vector &outputs, float keep_prob) { auto *output = reinterpret_cast(outputs[0]->addr); const auto *input = reinterpret_cast(inputs[0]->addr); const auto *mask = reinterpret_cast(inputs[1]->addr); @@ -70,7 +69,7 @@ void DropoutGradCpuBwdKernel::DropoutBackwardKernel(const std::vector(input[i]); mask_tmp[i] = static_cast(mask[i]); } - DropoutGrad(input_tmp, mask_tmp, output_tmp, num_count_, scale); + DropoutGrad(input_tmp, mask_tmp, output_tmp, SizeToInt(num_count_), scale); for (size_t i = 0; i < num_count_; ++i) { output[i] = static_cast(output_tmp[i]); } @@ -78,7 +77,7 @@ void DropoutGradCpuBwdKernel::DropoutBackwardKernel(const std::vector) { - DropoutGrad(input, mask, output, num_count_, scale); + DropoutGrad(input, mask, output, SizeToInt(num_count_), scale); } } } // namespace kernel diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.h index ab5889dc4b8..e7931d08303 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/dropout_grad_kernel.h @@ -40,7 +40,7 @@ class DropoutGradCpuBwdKernel : public CPUKernel { TypeId dtype_{kTypeUnknown}; template void DropoutBackwardKernel(const std::vector &inputs, const std::vector &outputs, - size_t num_count, float keep_prob); + float keep_prob); }; MS_REG_CPU_KERNEL(DropoutGrad, KernelAttr(), DropoutGradCpuBwdKernel); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc index 394fcbbd786..926d8e172ef 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,8 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include + #include "backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h" +#include +#include #include "common/thread_pool.h" #include "runtime/device/cpu/cpu_device_address.h" #include "nnacl/fp32_grad/activation_grad.h" @@ -25,50 +27,50 @@ namespace mindspore { namespace kernel { template void EltWiseGradCPUKernel::ReluGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { - if constexpr (std::is_same_v) { - int ret = ::ReluGrad(input1 + start, input2 + start, end - start, out + start); - if (ret == NNACL_ERR) { - MS_LOG(EXCEPTION) << "ReLUGrad failed."; - } - } else { + if constexpr (!std::is_same::value) { MS_LOG(EXCEPTION) << "ReLUGrad only support float"; } + + int ret = ::ReluGrad(input1 + start, input2 + start, end - start, out + start); + if (ret == NNACL_ERR) { + MS_LOG(EXCEPTION) << "ReLUGrad execute failed."; + } } template void EltWiseGradCPUKernel::ReLU6Grad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { - if constexpr (std::is_same_v) { - int ret = ::Relu6Grad(input1 + start, input2 + start, end - start, out + start); - if (ret == NNACL_ERR) { - MS_LOG(EXCEPTION) << "ReLU6Grad failed."; - } - } else { + if constexpr (!std::is_same::value) { MS_LOG(EXCEPTION) << "ReLU6Grad only support float"; } + + int ret = ::Relu6Grad(input1 + start, input2 + start, end - start, out + start); + if (ret == NNACL_ERR) { + MS_LOG(EXCEPTION) << "ReLU6Grad execute failed."; + } } template void EltWiseGradCPUKernel::AbsGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { - if constexpr (std::is_same_v) { - int ret = ::ElementAbsGrad(input1 + start, input2 + start, out + start, end - start); - if (ret == NNACL_ERR) { - MS_LOG(EXCEPTION) << "AbsGrad failed."; - } - } else { + if constexpr (!std::is_same::value) { MS_LOG(EXCEPTION) << "AbsGrad only support float"; } + + int ret = ::ElementAbsGrad(input1 + start, input2 + start, out + start, end - start); + if (ret == NNACL_ERR) { + MS_LOG(EXCEPTION) << "AbsGrad execute failed."; + } } template void EltWiseGradCPUKernel::SigmoidGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { - if constexpr (std::is_same_v) { - int ret = ::SigmoidGrad(input2 + start, input1 + start, end - start, out + start); - if (ret == NNACL_ERR) { - MS_LOG(EXCEPTION) << "SigmoidGrad failed."; - } - } else { + if constexpr (!std::is_same::value) { MS_LOG(EXCEPTION) << "SigmoidGrad only support float"; } + + int ret = ::SigmoidGrad(input2 + start, input1 + start, end - start, out + start); + if (ret == NNACL_ERR) { + MS_LOG(EXCEPTION) << "SigmoidGrad execute failed."; + } } template @@ -80,14 +82,14 @@ void EltWiseGradCPUKernel::SqrtGrad(const T *input1, const T *input2, T *out, template void EltWiseGradCPUKernel::TanhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { - if constexpr (std::is_same_v) { - int ret = ::TanhGrad(input2 + start, input1 + start, end - start, out + start); - if (ret == NNACL_ERR) { - MS_LOG(EXCEPTION) << "TanhGrad failed."; - } - } else { + if constexpr (!std::is_same::value) { MS_LOG(EXCEPTION) << "TanhGrad only support float"; } + + int ret = ::TanhGrad(input2 + start, input1 + start, end - start, out + start); + if (ret == NNACL_ERR) { + MS_LOG(EXCEPTION) << "TanhGrad execute failed."; + } } template @@ -207,6 +209,18 @@ void EltWiseGradCPUKernel::AcoshGrad(const T *input1, const T *input2, T *out } } +template +void EltWiseGradCPUKernel::SoftplusGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const { + if constexpr (!std::is_same::value) { + MS_LOG(EXCEPTION) << "SoftplusGrad only support float"; + } + + int ret = ::SoftplusGrad(input1 + start, input2 + start, end - start, out + start); + if (ret == NNACL_ERR) { + MS_LOG(EXCEPTION) << "SoftplusGrad execute failed."; + } +} + template void EltWiseGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { MS_EXCEPTION_IF_NULL(kernel_node); @@ -219,12 +233,19 @@ bool EltWiseGradCPUKernel::Launch(const std::vector &inpu const std::vector &outputs) { static const std::map> - elt_map{{"ReluGrad", &EltWiseGradCPUKernel::ReluGrad}, {"ReLU6Grad", &EltWiseGradCPUKernel::ReLU6Grad}, - {"SigmoidGrad", &EltWiseGradCPUKernel::SigmoidGrad}, {"AbsGrad", &EltWiseGradCPUKernel::AbsGrad}, - {"TanhGrad", &EltWiseGradCPUKernel::TanhGrad}, {"SqrtGrad", &EltWiseGradCPUKernel::SqrtGrad}, - {"GeLUGrad", &EltWiseGradCPUKernel::GeluGrad}, {"AsinGrad", &EltWiseGradCPUKernel::AsinGrad}, - {"ACosGrad", &EltWiseGradCPUKernel::ACosGrad}, {"AtanGrad", &EltWiseGradCPUKernel::AtanGrad}, - {"AsinhGrad", &EltWiseGradCPUKernel::AsinhGrad}, {"AcoshGrad", &EltWiseGradCPUKernel::AcoshGrad}}; + elt_map{{prim::kPrimReluGrad->name(), &EltWiseGradCPUKernel::ReluGrad}, + {prim::kPrimRelu6Grad->name(), &EltWiseGradCPUKernel::ReLU6Grad}, + {prim::kPrimSigmoidGrad->name(), &EltWiseGradCPUKernel::SigmoidGrad}, + {prim::kPrimAbsGrad->name(), &EltWiseGradCPUKernel::AbsGrad}, + {prim::kPrimTanhGrad->name(), &EltWiseGradCPUKernel::TanhGrad}, + {prim::kPrimSqrtGrad->name(), &EltWiseGradCPUKernel::SqrtGrad}, + {prim::kPrimGeLUGrad->name(), &EltWiseGradCPUKernel::GeluGrad}, + {prim::kPrimAsinGrad->name(), &EltWiseGradCPUKernel::AsinGrad}, + {prim::kPrimACosGrad->name(), &EltWiseGradCPUKernel::ACosGrad}, + {prim::kPrimAtanGrad->name(), &EltWiseGradCPUKernel::AtanGrad}, + {prim::kPrimAsinhGrad->name(), &EltWiseGradCPUKernel::AsinhGrad}, + {prim::kPrimAcoshGrad->name(), &EltWiseGradCPUKernel::AcoshGrad}, + {prim::kPrimSoftplusGrad->name(), &EltWiseGradCPUKernel::SoftplusGrad}}; if (inputs.size() < 2 || outputs.size() != 1) { MS_LOG(ERROR) << kernel_name_ << " requires at least 2 inputs and 1 output, but got " << inputs.size() << " inputs and " << outputs.size() << " output."; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h index f085a9a80d6..9f434981f75 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/eltwise_grad_cpu_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -48,6 +48,7 @@ class EltWiseGradCPUKernel : public CPUKernel { void AtanGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const; void AsinhGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const; void AcoshGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const; + void SoftplusGrad(const T *input1, const T *input2, T *out, size_t start, size_t end) const; std::string kernel_name_ = ""; }; @@ -103,6 +104,10 @@ MS_REG_CPU_KERNEL_T( AcoshGrad, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), EltWiseGradCPUKernel, float); +MS_REG_CPU_KERNEL_T( + SoftplusGrad, + KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + EltWiseGradCPUKernel, float); } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.cc index ecb66469d0d..0d76cff47a9 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.cc @@ -13,39 +13,47 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h" +#include +#include #include "backend/kernel_compiler/cpu/mkldnn/mkl_kernel_engine.h" #include "runtime/device/cpu/cpu_device_address.h" #include "utils/ms_utils.h" namespace mindspore { namespace kernel { +namespace { +struct DescParam { + dnnl::algorithm algorithm; + float alpha = 0.f; + float beta = 0.f; +}; +} // namespace + dnnl::eltwise_forward::desc EltWiseCPUKernel::GetForwardEltwiseDesc(const CNodePtr &kernel_node, const dnnl::memory::desc src_desc) { + static const std::unordered_map eltWiseOpDescMap{ + {prim::kPrimRelu->name(), DescParam{dnnl::algorithm::eltwise_relu}}, + {prim::kPrimRelu6->name(), DescParam{dnnl::algorithm::eltwise_clip, 0.f, 6.f}}, + {prim::kPrimAbs->name(), DescParam{dnnl::algorithm::eltwise_abs}}, + {prim::kPrimExp->name(), DescParam{dnnl::algorithm::eltwise_exp}}, + {prim::kPrimLog->name(), DescParam{dnnl::algorithm::eltwise_log}}, + {prim::kPrimSigmoid->name(), DescParam{dnnl::algorithm::eltwise_logistic}}, + {prim::kPrimSqrt->name(), DescParam{dnnl::algorithm::eltwise_sqrt}}, + {prim::kPrimSquare->name(), DescParam{dnnl::algorithm::eltwise_square}}, + {prim::kPrimTanh->name(), DescParam{dnnl::algorithm::eltwise_tanh}}, + {prim::kPrimElu->name(), DescParam{dnnl::algorithm::eltwise_elu, 1.f, 0.f}}, + {prim::kPrimSoftplus->name(), DescParam{dnnl::algorithm::eltwise_soft_relu}}, + }; + std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); - if (kernel_name == "ReLU") { - return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_relu, src_desc, 0.0); - } else if (kernel_name == "ReLU6") { - return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_clip, src_desc, 0.0, 6.0); - } else if (kernel_name == "Abs") { - return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_abs, src_desc); - } else if (kernel_name == "Exp") { - return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_exp, src_desc); - } else if (kernel_name == "Log") { - return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_log, src_desc); - } else if (kernel_name == "Sigmoid") { - return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_logistic, src_desc); - } else if (kernel_name == "Sqrt") { - return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_sqrt, src_desc); - } else if (kernel_name == "Square") { - return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_square, src_desc); - } else if (kernel_name == "Tanh") { - return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_tanh, src_desc); - } else if (kernel_name == "Elu") { - return dnnl::eltwise_forward::desc(DnnlForward, dnnl::algorithm::eltwise_elu, src_desc, 1.0); - } else { - MS_LOG(EXCEPTION) << "Eltwise operators don't support " << kernel_name; + const auto desc_pair = eltWiseOpDescMap.find(kernel_name); + if (desc_pair == eltWiseOpDescMap.end()) { + MS_LOG(EXCEPTION) << "EltWiseCPUKernel does not support " << kernel_name; } + return dnnl::eltwise_forward::desc(DnnlForward, desc_pair->second.algorithm, src_desc, desc_pair->second.alpha, + desc_pair->second.beta); } void EltWiseCPUKernel::InitKernel(const CNodePtr &kernel_node) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h index 18d0ae24548..cd695e2a9e6 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/eltwise_cpu_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,6 +56,8 @@ MS_REG_CPU_KERNEL(Square, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutpu EltWiseCPUKernel); MS_REG_CPU_KERNEL(Tanh, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), EltWiseCPUKernel); +MS_REG_CPU_KERNEL(Softplus, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + EltWiseCPUKernel); } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/CMakeLists.txt b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/CMakeLists.txt index c9986d8a7bb..1b4f1e4d969 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/CMakeLists.txt +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/CMakeLists.txt @@ -36,6 +36,24 @@ file(GLOB KERNEL_SRC ${NNACL_DIR}/fp32_grad/*.c ) +if(MSLITE_STRING_KERNEL) + file(GLOB KERNEL_SRC_INFER_STRING + ${NNACL_DIR}/infer/string/*.c + ) + set(KERNEL_SRC + ${KERNEL_SRC} + ${KERNEL_SRC_INFER_STRING} + ) +endif() +if(MSLITE_CONTROL_TENSORLIST) + file(GLOB KERNEL_SRC_INFER_CONTROL_TENSORLIST + ${NNACL_DIR}/infer/control/*.c + ) + set(KERNEL_SRC + ${KERNEL_SRC} + ${KERNEL_SRC_INFER_CONTROL_TENSORLIST} + ) +endif() if(PLATFORM_ARM64) file(GLOB ASSEMBLY_SRC ${NNACL_DIR}/assembly/arm64/*.S) set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C) diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8.S b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8.S index 8dceae7ac54..8bfaa90a5b1 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8.S +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8.S @@ -5,7 +5,8 @@ //void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4, // const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, -// int *multiplier, int *left_shift, int *right_shift, int row, int col, int stride, int peroc); +// const int *multiplier, const int *left_shift, const int *right_shift, int row, +// int col, int stride, int peroc); // x0: a(left matrix ptr) // x1: b(right matrix ptr) diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8Opt.S b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8Opt.S index c3f473880b1..36546f26853 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8Opt.S +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/opt/MatmulDpInt8Opt.S @@ -4,8 +4,9 @@ .align 5 //void MatmulInt8DpOpt(const int8_t *a, const int8_t *b, int8_t *dst, int row, int col, int deep4, const int *a_sums, -// const int *bias, int act_min, int act_max, int out_zp, int32_t *multiplier, int32_t *left_shift, -// int32_t *right_shift, size_t stride, size_t filter_peroc, int32_t *filter_zp) +// const int *bias, int act_min, int act_max, int out_zp, const int32_t *multiplier, +// const int32_t *left_shift, const int32_t *right_shift, size_t stride, size_t filter_peroc, +// const int32_t *filter_zp) // x0: a(left matrix ptr) // x1: b(right matrix ptr) diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/batch_to_space_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/batch_to_space_base.c index fe6bb74906e..d8900df0b44 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/batch_to_space_base.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/batch_to_space_base.c @@ -23,19 +23,19 @@ void BatchToSpaceNoCropForNHWC(const void *input, void *output, const int *in_sh int in_h = in_shape[1]; int in_w = in_shape[2]; int in_c = in_shape[3]; - size_t stride_h = block_w * out_n; - size_t output_offset = 0; - size_t copy_size = in_c * data_size; - size_t in_stride_h = in_w * in_c; - size_t in_stride_n = in_stride_h * in_h; + int stride_h = block_w * out_n; + int output_offset = 0; + int copy_size = in_c * data_size; + int in_stride_h = in_w * in_c; + int in_stride_n = in_stride_h * in_h; for (int n = 0; n < out_n; ++n) { for (int h = 0; h < in_h; ++h) { - size_t h_offset = h * in_stride_h; + int h_offset = h * in_stride_h; for (int bh = 0; bh < block_h; ++bh) { for (int w = 0; w < in_w; ++w) { - size_t w_offset = w * in_c; + int w_offset = w * in_c; for (int bw = 0; bw < block_w; ++bw) { - size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset; + int in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset; memcpy((int8_t *)output + output_offset, (int8_t *)input + in_offset * data_size, copy_size); output_offset += copy_size; } @@ -49,6 +49,9 @@ void BatchToSpaceForNHWC(const void *input, void *output, const int *in_shape, i const int *crops, int data_size) { int block_h = block[0]; int block_w = block[1]; + if (block_h == 0 || block_w == 0) { + return; + } int in_h = in_shape[1]; int in_w = in_shape[2]; int in_c = in_shape[3]; @@ -61,27 +64,27 @@ void BatchToSpaceForNHWC(const void *input, void *output, const int *in_shape, i int w_end = MSMIN((in_w * block_w - crops[3]) / block_w + 1, in_w); int w_valid_end = in_w * block_w - crops[3] - 1; - size_t stride_h = block_w * out_n; - size_t output_offset = 0; - size_t copy_size = in_c * data_size; - size_t in_stride_h = in_w * in_c; - size_t in_stride_n = in_stride_h * in_h; + int stride_h = block_w * out_n; + int output_offset = 0; + int copy_size = in_c * data_size; + int in_stride_h = in_w * in_c; + int in_stride_n = in_stride_h * in_h; for (int n = 0; n < out_n; ++n) { for (int h = h_start; h < h_end; ++h) { - size_t h_offset = h * in_stride_h; + int h_offset = h * in_stride_h; for (int bh = 0; bh < block_h; ++bh) { - size_t h_index = h * block_h + bh; + int h_index = h * block_h + bh; if (h_index < h_valid_begin || h_index > h_valid_end) { continue; } for (int w = w_start; w < w_end; ++w) { - size_t w_offset = w * in_c; + int w_offset = w * in_c; for (int bw = 0; bw < block_w; ++bw) { - size_t w_index = w * block_w + bw; + int w_index = w * block_w + bw; if (w_index < w_valid_begin || w_index > w_valid_end) { continue; } - size_t in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset; + int in_offset = in_stride_n * (bh * stride_h + bw * out_n + n) + w_offset + h_offset; memcpy((int8_t *)output + output_offset, (int8_t *)input + in_offset * data_size, copy_size); output_offset += copy_size; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/broadcast_to.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/broadcast_to.c index ede7fc7166a..a4ea4318d58 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/broadcast_to.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/broadcast_to.c @@ -62,7 +62,7 @@ void pad_input_shape(int *input_shape, int input_shape_len, int output_shape_len shape_info->input_shape_size_ = dim_max + 1; \ \ size_t before_dim_elements_num = accumulate(input_shape, 0, dim_max - 1); \ - size_t after_dim_elements_num = input_shape[dim_max]; \ + size_t after_dim_elements_num = (size_t)(input_shape[dim_max]); \ size_t dim_broadcast_rate = (size_t)(output_shape[dim_max] / input_shape[dim_max]); \ for (size_t i = 0; i < before_dim_elements_num; ++i) { \ const type *in_ptr = input + i * after_dim_elements_num; \ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/concat_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/concat_base.c index 747139835dc..bfef2732099 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/concat_base.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/concat_base.c @@ -24,15 +24,18 @@ void Concat(void **input, int input_num, int axis, int **inputs_output_shape, si } int after_axis_size = data_size; - for (size_t i = axis + 1; i < shape_size; ++i) { + for (size_t i = (size_t)(axis) + 1; i < shape_size; ++i) { after_axis_size *= inputs_output_shape[0][i]; } int axis_offset = 0; uint8_t *dst_base = (output); - size_t output_stride = after_axis_size * inputs_output_shape[input_num][axis]; + int output_stride = after_axis_size * inputs_output_shape[input_num][axis]; for (int i = 0; i < input_num; ++i) { const uint8_t *src_base = (input[i]); - size_t input_stride = after_axis_size * inputs_output_shape[i][axis]; + if (inputs_output_shape[i] == NULL) { + continue; + } + int input_stride = after_axis_size * inputs_output_shape[i][axis]; int offset = UP_DIV(input_stride, thread_num); int count = input_stride - offset * task_id; if (count <= 0) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/depth_to_space_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/depth_to_space_base.c index e2b16837e44..bc3d3a3c1fe 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/depth_to_space_base.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/depth_to_space_base.c @@ -22,17 +22,17 @@ void DepthToSpaceForNHWC(const void *input, void *output, const int *in_shape, c int32_t in_shape_dim1 = in_shape[1]; size_t copy_size = block_size * param->out_stride_dim2_ * param->data_type_size_; for (int i = 0; i < in_shape[0]; ++i) { - size_t in_offset_n = i * param->in_stride_dim0_; - size_t out_offset_n = i * param->out_stride_dim0_; + int in_offset_n = i * param->in_stride_dim0_; + int out_offset_n = i * param->out_stride_dim0_; for (int j = 0; j < in_shape_dim1; ++j) { - size_t in_offset_h = in_offset_n + j * param->in_stride_dim1_; - size_t out_offset_h = out_offset_n + j * block_size * param->out_stride_dim1_; + int in_offset_h = in_offset_n + j * param->in_stride_dim1_; + int out_offset_h = out_offset_n + j * block_size * param->out_stride_dim1_; for (int k = 0; k < in_shape_dim2; ++k) { - size_t in_offset_w = in_offset_h + k * param->in_stride_dim2_; - size_t out_offset_w = out_offset_h + k * block_size * param->out_stride_dim2_; + int in_offset_w = in_offset_h + k * param->in_stride_dim2_; + int out_offset_w = out_offset_h + k * block_size * param->out_stride_dim2_; for (int l = 0; l < block_size; ++l) { - size_t out_offset = (out_offset_w + l * param->out_stride_dim1_) * param->data_type_size_; - size_t in_offset = (in_offset_w + l * block_size * param->out_stride_dim2_) * param->data_type_size_; + int out_offset = (out_offset_w + l * param->out_stride_dim1_) * param->data_type_size_; + int in_offset = (in_offset_w + l * block_size * param->out_stride_dim2_) * param->data_type_size_; memcpy((int8_t *)output + out_offset, (int8_t *)input + in_offset, copy_size); } } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/minimal_filtering_generator.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/minimal_filtering_generator.c index b17000d3573..85d7c630562 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/minimal_filtering_generator.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/minimal_filtering_generator.c @@ -118,7 +118,9 @@ int B(const float *poly_array, float *matrix_b, int in_unit) { float matrix_t[MAX_LEN]; // n * in_unit T(poly_array, matrix_t, n); - LT(poly_array, matrix_lt, n); + if (LT(poly_array, matrix_lt, n) != NNACL_OK) { + return NNACL_ERR; + } MatrixTranspose(matrix_lt, matrix_l, n, n); MatrixMultiply(matrix_l, matrix_t, matrix_b, n, n, in_unit); matrix_b[in_unit * in_unit - 1] = 1; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/slice_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/slice_base.c index e252a696165..5773c6d74c7 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/slice_base.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/slice_base.c @@ -47,43 +47,43 @@ void DoSlice(const void *input, void *output, SliceParameter *param, int thread_ int8_t *int8_in = (int8_t *)input; int8_t *int8_out = (int8_t *)output; - size_t out_stride[8]; + int out_stride[8]; out_stride[7] = 1; for (int i = 6; i >= 0; --i) { out_stride[i] = out_stride[i + 1] * param->size_[i + 1]; } - size_t count_per_thread = UP_DIV(param->size_[5], param->op_parameter_.thread_num_); - size_t thread_begin = thread_id * count_per_thread; - size_t thread_end = MSMIN(param->size_[5], thread_begin + count_per_thread); - size_t copy_size = param->size_[7] * data_size; - size_t in_stride[8]; + int count_per_thread = UP_DIV(param->size_[5], param->op_parameter_.thread_num_); + int thread_begin = thread_id * count_per_thread; + int thread_end = MSMIN(param->size_[5], thread_begin + count_per_thread); + int copy_size = param->size_[7] * data_size; + int in_stride[8]; in_stride[7] = 1; for (int i = 6; i >= 0; --i) { in_stride[i] = param->shape_[i + 1] * in_stride[i + 1]; } for (int ii = 0; ii < param->size_[0]; ++ii) { - size_t out_offset0 = ii * out_stride[0]; - size_t in_offset0 = (ii + param->begin_[0]) * in_stride[0] + param->begin_[7]; + int out_offset0 = ii * out_stride[0]; + int in_offset0 = (ii + param->begin_[0]) * in_stride[0] + param->begin_[7]; for (int jj = 0; jj < param->size_[1]; ++jj) { - size_t out_offset1 = jj * out_stride[1] + out_offset0; - size_t in_offset1 = (jj + param->begin_[1]) * in_stride[1] + in_offset0; + int out_offset1 = jj * out_stride[1] + out_offset0; + int in_offset1 = (jj + param->begin_[1]) * in_stride[1] + in_offset0; for (int kk = 0; kk < param->size_[2]; ++kk) { - size_t out_offset2 = kk * out_stride[2] + out_offset1; - size_t in_offset2 = (kk + param->begin_[2]) * in_stride[2] + in_offset1; + int out_offset2 = kk * out_stride[2] + out_offset1; + int in_offset2 = (kk + param->begin_[2]) * in_stride[2] + in_offset1; for (int ll = 0; ll < param->size_[3]; ++ll) { - size_t out_offset3 = ll * out_stride[3] + out_offset2; - size_t in_offset3 = (ll + param->begin_[3]) * in_stride[3] + in_offset2; + int out_offset3 = ll * out_stride[3] + out_offset2; + int in_offset3 = (ll + param->begin_[3]) * in_stride[3] + in_offset2; for (int i = 0; i < param->size_[4]; ++i) { - size_t out_offset4 = i * out_stride[4] + out_offset3; - size_t in_offset4 = (i + param->begin_[4]) * in_stride[4] + in_offset3; - for (size_t j = thread_begin; j < thread_end; ++j) { - size_t out_offset5 = j * out_stride[5] + out_offset4; - size_t in_offset5 = (j + param->begin_[5]) * in_stride[5] + in_offset4; + int out_offset4 = i * out_stride[4] + out_offset3; + int in_offset4 = (i + param->begin_[4]) * in_stride[4] + in_offset3; + for (int j = thread_begin; j < thread_end; ++j) { + int out_offset5 = j * out_stride[5] + out_offset4; + int in_offset5 = (j + param->begin_[5]) * in_stride[5] + in_offset4; for (int k = 0; k < param->size_[6]; ++k) { - size_t out_offset6 = k * out_stride[6] + out_offset5; - size_t in_offset6 = (k + param->begin_[6]) * in_stride[6] + in_offset5; + int out_offset6 = k * out_stride[6] + out_offset5; + int in_offset6 = (k + param->begin_[6]) * in_stride[6] + in_offset5; memcpy(int8_out + out_offset6 * data_size, int8_in + in_offset6 * data_size, copy_size); } } @@ -105,8 +105,8 @@ void DoSliceNoParallel(const void *input, void *output, SliceParameter *param, i int8_t *int8_in = (int8_t *)input; int8_t *int8_out = (int8_t *)output; - size_t copy_size = param->size_[7] * data_size; - size_t in_stride[8]; + int copy_size = param->size_[7] * data_size; + int in_stride[8]; in_stride[7] = 1; for (int i = 6; i >= 0; --i) { in_stride[i] = param->shape_[i + 1] * in_stride[i + 1]; @@ -115,9 +115,9 @@ void DoSliceNoParallel(const void *input, void *output, SliceParameter *param, i for (int i = 0; i < DIMENSION_8D; ++i) { axis_copy_flag[i] = WhetherCopyByAxis(param->begin_, param->end_, param->shape_, i); } - size_t out_offset = 0; + int out_offset = 0; for (int32_t dim0 = param->begin_[0]; dim0 < param->end_[0]; ++dim0) { - size_t in_offset0 = dim0 * in_stride[0] + param->begin_[7]; + int in_offset0 = dim0 * in_stride[0] + param->begin_[7]; #define FAST_COPY_IF_NEED(rank) \ if (axis_copy_flag[rank]) { \ int left_block_num = param->end_[rank] - dim##rank; \ @@ -128,24 +128,24 @@ void DoSliceNoParallel(const void *input, void *output, SliceParameter *param, i continue; \ } FAST_COPY_IF_NEED(0); - for (size_t dim1 = param->begin_[1]; dim1 < param->end_[1]; ++dim1) { - size_t in_offset1 = dim1 * in_stride[1] + in_offset0; + for (int dim1 = param->begin_[1]; dim1 < param->end_[1]; ++dim1) { + int in_offset1 = dim1 * in_stride[1] + in_offset0; FAST_COPY_IF_NEED(1); for (int32_t dim2 = param->begin_[2]; dim2 < param->end_[2]; ++dim2) { - size_t in_offset2 = in_offset1 + dim2 * in_stride[2]; + int in_offset2 = in_offset1 + dim2 * in_stride[2]; FAST_COPY_IF_NEED(2); for (int32_t dim3 = param->begin_[3]; dim3 < param->end_[3]; ++dim3) { - size_t in_offset3 = in_offset2 + dim3 * in_stride[3]; + int in_offset3 = in_offset2 + dim3 * in_stride[3]; FAST_COPY_IF_NEED(3); for (int32_t dim4 = param->begin_[4]; dim4 < param->end_[4]; ++dim4) { - size_t in_offset4 = in_offset3 + dim4 * in_stride[4]; + int in_offset4 = in_offset3 + dim4 * in_stride[4]; FAST_COPY_IF_NEED(4); for (int32_t dim5 = param->begin_[5]; dim5 < param->end_[5]; ++dim5) { - size_t in_offset5 = in_offset4 + dim5 * in_stride[5]; + int in_offset5 = in_offset4 + dim5 * in_stride[5]; FAST_COPY_IF_NEED(5); #undef FAST_COPY_IF_NEED for (int32_t dim6 = param->begin_[6]; dim6 < param->end_[6]; ++dim6) { - size_t in_offset6 = in_offset5 + dim6 * in_stride[6]; + int in_offset6 = in_offset5 + dim6 * in_stride[6]; memcpy(int8_out + out_offset * data_size, int8_in + in_offset6 * data_size, copy_size); out_offset += param->size_[7]; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_base.c index 9c20b5af481..9f7f70bab58 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_base.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/split_base.c @@ -21,10 +21,6 @@ int DoSplit(void *in_data, void **out_data, const int *input_shape, int offset, int num_unit, SplitParameter *split_param, int data_size) { - if (in_data == NULL || out_data == NULL) { - return NNACL_ERR; - } - int8_t *int8_in = (int8_t *)in_data; int num_split = split_param->num_split_; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.c index dc2711237df..b7771693ce3 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.c @@ -26,15 +26,15 @@ void DoCopyData(const uint8_t *input_data, uint8_t *output_data, size_t size, si } int DoTileOneDimension(uint8_t *input_data, uint8_t *output_data, size_t dim, const TileParameter *parameter) { - size_t src_dim_size = parameter->in_shape_[dim]; + int src_dim_size = parameter->in_shape_[dim]; if (dim == parameter->in_dim_ - 1) { DoCopyData(input_data, output_data, src_dim_size, parameter->data_size_, parameter->multiples_[dim]); return 0; } - for (size_t i = 0; i < src_dim_size; ++i) { - for (size_t j = 0; j < parameter->multiples_[dim]; ++j) { - size_t in_pos = parameter->in_strides_[dim] * i; - size_t out_pos = parameter->out_strides_[dim] * (i + j * src_dim_size); + for (int i = 0; i < src_dim_size; ++i) { + for (int j = 0; j < parameter->multiples_[dim]; ++j) { + int in_pos = parameter->in_strides_[dim] * i; + int out_pos = parameter->out_strides_[dim] * (i + j * src_dim_size); DoTileOneDimension(input_data + in_pos * parameter->data_size_, output_data + out_pos * parameter->data_size_, dim + 1, parameter); } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.h index ccd91d1663d..b91bae0ced9 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/tile_base.h @@ -18,20 +18,20 @@ #define MINDSPORE_NNACL_BASE_TILE_H_ #include "nnacl/op_base.h" - +#define MAX_TILE_DIM_SIZE 8 typedef struct TileParameter { // primitive parameter OpParameter op_parameter_; - int multiples_[8]; - int dims_[8]; + int multiples_[MAX_TILE_DIM_SIZE]; + int dims_[MAX_TILE_DIM_SIZE]; size_t dims_size_; size_t multiples_size_; // shape correlative - int in_shape_[8]; - int out_shape_[8]; - int in_strides_[8]; - int out_strides_[8]; + int in_shape_[MAX_TILE_DIM_SIZE]; + int out_shape_[MAX_TILE_DIM_SIZE]; + int in_strides_[MAX_TILE_DIM_SIZE]; + int out_strides_[MAX_TILE_DIM_SIZE]; // other parameter int in_dim_; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/transpose_base.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/transpose_base.c index d47051d981b..de5c507a14d 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/transpose_base.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/base/transpose_base.c @@ -184,7 +184,7 @@ for (int i = dims - 1; i > 0; --i) { \ *(size + i - 1) = *(size + i) * output_shape[i]; \ } \ - for (size_t idx = 0; idx < (*size) * output_shape[0]; ++idx) { \ + for (int idx = 0; idx < (*size) * output_shape[0]; ++idx) { \ int pos = idx; \ int output_idx = 0; \ int input_idx = 0; \ @@ -215,7 +215,7 @@ return; \ } \ count = MSMIN(offset_size, count); \ - for (size_t idx = task_offset; idx < task_offset + count; ++idx) { \ + for (int idx = task_offset; idx < task_offset + count; ++idx) { \ int pos = idx; \ int output_idx = 0; \ int input_idx = 0; \ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.c index a6e3f265939..7f4e7817a93 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.c @@ -16,15 +16,19 @@ #include "nnacl/common_func.h" -int offset(const int *shape, const int dim0, const int dim1, const int dim2, const int dim3) { +int Offset(const int *shape, const int dim0, const int dim1, const int dim2, const int dim3) { return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3] + dim3; } -int offsetComm(const int *shape, const int dim0, const int dim1, const int dim2) { +int OffsetComm(const int *shape, const int dim0, const int dim1, const int dim2) { return ((dim0 * shape[1] + dim1) * shape[2] + dim2) * shape[3]; } -int offset4d(const int *shape, const int *dims) { return offset(shape, dims[0], dims[1], dims[2], dims[3]); } +int Offset4d(const int *shape, const int *dims) { return Offset(shape, dims[0], dims[1], dims[2], dims[3]); } + +int Offset6d(const int *shape, const int *dims) { + return ((OffsetComm(shape, dims[0], dims[1], dims[2]) + dims[3]) * shape[4] + dims[4]) * shape[5]; +} int8_t MinInt8(int8_t a, int8_t b) { return b ^ ((a ^ b) & -(a < b)); } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.h index f7ca4f0b2c6..74f418d430a 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/common_func.h @@ -36,9 +36,10 @@ void ReluFp32C8(float *data, float *dst, int ele_num); void Relu6Fp32C8(float *data, float *dst, int ele_num); #endif #endif -int offset(const int *shape, const int dim0, const int dim1, const int dim2, const int dim3); -int offsetComm(const int *shape, const int dim0, const int dim1, const int dim2); -int offset4d(const int *shape, const int *dims); +int Offset(const int *shape, const int dim0, const int dim1, const int dim2, const int dim3); +int OffsetComm(const int *shape, const int dim0, const int dim1, const int dim2); +int Offset4d(const int *shape, const int *dims); +int Offset6d(const int *shape, const int *dims); static inline bool isAddOverflow(int32_t x, int32_t y) { int32_t sum = x + y; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pad_fp16.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pad_fp16.c index 0dd833af6bc..e0d69be8409 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pad_fp16.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp16/pad_fp16.c @@ -19,16 +19,22 @@ void PadFp16(const float16_t *input_data, float16_t *output_data, const int *input_shape, const int *output_shape, const int *paddings, const int tid, const int thread_num) { - int in[4], out[4]; + int in[DEFAULT_PAD_NDIMS], out[DEFAULT_PAD_NDIMS]; for (in[0] = 0; in[0] < input_shape[0]; in[0]++) { out[0] = in[0] + paddings[0]; for (in[1] = tid; in[1] < input_shape[1]; in[1] += thread_num) { out[1] = in[1] + paddings[2]; for (in[2] = 0; in[2] < input_shape[2]; in[2]++) { out[2] = in[2] + paddings[4]; - float16_t *dst = output_data + offset(output_shape, out[0], out[1], out[2], paddings[6]); - const float16_t *src = input_data + offset(input_shape, in[0], in[1], in[2], 0); - memcpy(dst, src, input_shape[3] * sizeof(float16_t)); + for (in[3] = 0; in[3] < input_shape[3]; in[3]++) { + out[3] = in[3] + paddings[6]; + for (in[4] = 0; in[4] < input_shape[4]; in[4]++) { + out[4] = in[4] + paddings[8]; + float16_t *dst = output_data + Offset6d(output_shape, out) + paddings[10]; + const float16_t *src = input_data + Offset6d(input_shape, in); + memcpy(dst, src, input_shape[5] * sizeof(float16_t)); + } + } } } } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.c index 728a38964a7..19b0b7bd428 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.c @@ -152,17 +152,15 @@ int AdamDeltaFp32(float *delta, float *m, float *v, float lr, float beta1, float return NNACL_OK; } -int AdamWeightDecayFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, float decay, - const float *gradient, size_t start, size_t end) { +size_t AdamWeightDecayFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, + float decay, const float *gradient, size_t start, size_t end) { size_t c1 = start; #ifdef ENABLE_AVX512 - const float beta1_minus = 1 - beta1; - const float beta2_minus = 1 - beta2; struct AVX_Data beta1_r, beta2_r, beta1_minus_r, beta2_minus_r, lr_neg_r, epsilon_r, decay_r; beta1_r.data = _mm512_set1_ps(beta1); beta2_r.data = _mm512_set1_ps(beta2); - beta1_minus_r.data = _mm512_set1_ps(beta1_minus); - beta2_minus_r.data = _mm512_set1_ps(beta2_minus); + beta1_minus_r.data = _mm512_set1_ps(1.0f - beta1); + beta2_minus_r.data = _mm512_set1_ps(1.0f - beta2); lr_neg_r.data = _mm512_set1_ps(-lr); epsilon_r.data = _mm512_set1_ps(epsilon); decay_r.data = _mm512_set1_ps(decay); @@ -260,17 +258,15 @@ int AdamWeightDecayFp32(float *var, float *m, float *v, float lr, float beta1, f return c1; } -int FusedAdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, float decay, - const int16_t *gradient16, size_t start, size_t end) { +size_t FusedAdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, float decay, + const int16_t *gradient16, size_t start, size_t end) { size_t c1 = start; #ifdef ENABLE_AVX512 - const float beta1_minus = 1 - beta1; - const float beta2_minus = 1 - beta2; struct AVX_Data beta1_r, beta2_r, beta1_minus_r, beta2_minus_r, lr_neg_r, epsilon_r, decay_r; beta1_r.data = _mm512_set1_ps(beta1); beta2_r.data = _mm512_set1_ps(beta2); - beta1_minus_r.data = _mm512_set1_ps(beta1_minus); - beta2_minus_r.data = _mm512_set1_ps(beta2_minus); + beta1_minus_r.data = _mm512_set1_ps(1.0f - beta1); + beta2_minus_r.data = _mm512_set1_ps(1.0f - beta2); lr_neg_r.data = _mm512_set1_ps(-lr); epsilon_r.data = _mm512_set1_ps(epsilon); decay_r.data = _mm512_set1_ps(decay); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.h index 3690cd646e6..b4f02754d27 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/adam_fp32.h @@ -71,10 +71,10 @@ int AdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, size_t start, size_t end, bool use_nesterov); int AdamDeltaFp32(float *delta, float *m, float *v, float lr, float beta1, float beta2, float epsilon, const float *gradient, size_t start, size_t end, bool use_nesterov); -int AdamWeightDecayFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, float decay, - const float *gradient, size_t start, size_t end); -int FusedAdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, float decay, - const int16_t *gradient16, size_t start, size_t end); +size_t AdamWeightDecayFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, + float decay, const float *gradient, size_t start, size_t end); +size_t FusedAdamFp32(float *var, float *m, float *v, float lr, float beta1, float beta2, float epsilon, float decay, + const int16_t *gradient16, size_t start, size_t end); #ifdef __cplusplus } #endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/arg_min_max_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/arg_min_max_fp32.c index 21ea9658088..cb3523edfea 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/arg_min_max_fp32.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/arg_min_max_fp32.c @@ -49,8 +49,8 @@ void ArgMaxTopK1(const float *input, void *output, float *output_value, const Ar float *outputfp32 = (float *)output; int *outputint = (int *)output; for (int i = 0; i < pre_axis_count; ++i) { - size_t output_offset = i * after_axis_count; - size_t input_offset = output_offset * axis_count; + int output_offset = i * after_axis_count; + int input_offset = output_offset * axis_count; for (int j = 0; j < after_axis_count; ++j) { float value = -FLT_MAX; int index = 0; @@ -79,8 +79,8 @@ void ArgMinTopK1(const float *input, void *output, float *output_value, const Ar float *outputfp32 = (float *)output; int *outputint = (int *)output; for (int i = 0; i < pre_axis_count; ++i) { - size_t output_offset = i * after_axis_count; - size_t input_offset = output_offset * axis_count; + int output_offset = i * after_axis_count; + int input_offset = output_offset * axis_count; for (int j = 0; j < after_axis_count; ++j) { float value = FLT_MAX; int index = 0; @@ -109,13 +109,13 @@ void ArgMinMaxDim0(const float *input, void *output, float *output_value, const int *outputint = (int *)output; for (int32_t i = 0; i < param->in_strides_[0]; ++i) { for (int j = 0; j < in_shape[0]; ++j) { - size_t offset = param->in_strides_[0] * j + i; + int offset = param->in_strides_[0] * j + i; param->arg_elements_[j].index_ = j; param->arg_elements_[j].data_.f_data_ = input[offset]; } qsort(param->arg_elements_, in_shape[0], sizeof(ArgElement), *compare_func); for (int j = 0; j < param->topk_; ++j) { - size_t out_offset = j * param->out_strides_[0] + i; + int out_offset = j * param->out_strides_[0] + i; if (param->out_value_) { outputfp32[out_offset] = param->arg_elements_[j].data_.f_data_; } else { @@ -135,17 +135,17 @@ void ArgMinMaxDim1(const float *input, void *output, float *output_value, const int *outputint = (int *)output; int in_shape1 = in_shape[1]; for (int i = 0; i < in_shape[0]; ++i) { - size_t in_dim0_offset = i * param->in_strides_[0]; - size_t out_dim0_offset = i * param->out_strides_[0]; + int in_dim0_offset = i * param->in_strides_[0]; + int out_dim0_offset = i * param->out_strides_[0]; for (int j = 0; j < param->in_strides_[1]; ++j) { for (int k = 0; k < in_shape1; ++k) { - size_t offset = param->in_strides_[1] * k + in_dim0_offset + j; + int offset = param->in_strides_[1] * k + in_dim0_offset + j; param->arg_elements_[k].index_ = k; param->arg_elements_[k].data_.f_data_ = input[offset]; } qsort(param->arg_elements_, in_shape1, sizeof(ArgElement), *compare_func); for (int k = 0; k < param->topk_; ++k) { - size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1]; + int out_offset = out_dim0_offset + j + k * param->out_strides_[1]; if (param->out_value_) { outputfp32[out_offset] = param->arg_elements_[k].data_.f_data_; } else { @@ -167,20 +167,20 @@ void ArgMinMaxDim2(const float *input, void *output, float *output_value, const float *outputfp32 = (float *)output; int *outputint = (int *)output; for (int i = 0; i < in_shape[0]; ++i) { - size_t in_dim0_offset = i * param->in_strides_[0]; - size_t out_dim0_offset = i * param->out_strides_[0]; + int in_dim0_offset = i * param->in_strides_[0]; + int out_dim0_offset = i * param->out_strides_[0]; for (int j = 0; j < in_shape1; ++j) { - size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; - size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; + int in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; + int out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; for (int k = 0; k < param->in_strides_[2]; ++k) { for (int l = 0; l < in_shape2; ++l) { - size_t offset = param->in_strides_[2] * l + k + in_dim1_offset; + int offset = param->in_strides_[2] * l + k + in_dim1_offset; param->arg_elements_[l].index_ = l; param->arg_elements_[l].data_.f_data_ = input[offset]; } qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), *compare_func); for (int l = 0; l < param->topk_; ++l) { - size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2]; + int out_offset = out_dim1_offset + k + l * param->out_strides_[2]; if (param->out_value_) { outputfp32[out_offset] = param->arg_elements_[l].data_.f_data_; } else { @@ -203,26 +203,26 @@ void ArgMinMaxDim3(const float *input, void *output, float *output_value, const float *outputfp32 = (float *)output; int *outputint = (int *)output; for (int i = 0; i < in_shape[0]; ++i) { - size_t in_dim0_offset = i * param->in_strides_[0]; - size_t out_dim0_offset = i * param->out_strides_[0]; + int in_dim0_offset = i * param->in_strides_[0]; + int out_dim0_offset = i * param->out_strides_[0]; for (int j = 0; j < in_shape1; ++j) { - size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; - size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; + int in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; + int out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; for (int k = 0; k < in_shape2; ++k) { - size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset; - size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset; + int in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset; + int out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset; for (int l = 0; l < in_shape3; ++l) { - size_t offset = l + in_dim2_offset; + int offset = l + in_dim2_offset; param->arg_elements_[l].index_ = l; param->arg_elements_[l].data_.f_data_ = input[offset]; } qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), *compare_func); for (int l = 0; l < param->topk_; ++l) { - size_t out_offset = out_dim2_offset + l; + int out_offset = out_dim2_offset + l; if (param->out_value_) { outputfp32[out_offset] = param->arg_elements_[l].data_.f_data_; } else { - outputint[out_offset] = param->arg_elements_[l].index_; + outputint[out_offset] = (int)(param->arg_elements_[l].index_); } if (output_value != NULL) { output_value[out_offset] = param->arg_elements_[l].data_.f_data_; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/common_func_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/common_func_fp32.c index a7040ce33ee..1379226d7ef 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/common_func_fp32.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/common_func_fp32.c @@ -21,10 +21,10 @@ void PostConvFuncComm(const float *src_ptr_, float *out_ptr, const float *bias_p if (size == 0) { return; } - for (int oc = 0; oc < output_channel; oc++) { + for (size_t oc = 0; oc < output_channel; oc++) { int oc_div = oc / size; int oc_mod = oc % size; - for (int hw = 0; hw < plane_size; hw++) { + for (int hw = 0; hw < (int)plane_size; hw++) { int src_index = oc_div * size * plane_stride + hw * size + oc_mod; int dst_index = hw * oc_stride + oc; float value = src_ptr_[src_index]; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_depthwise_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_depthwise_fp32.c index 4b4bfa43257..621abed1dcc 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_depthwise_fp32.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/conv_depthwise_fp32.c @@ -52,7 +52,8 @@ int ConvDw(float *output_data, const float *input_data, const float *weight_data int end_kh = MSMIN(conv_param->kernel_h_, UP_DIV(conv_param->input_h_ - ih_origin, conv_param->dilation_h_)); for (int ow = 0; ow < conv_param->output_w_; ow++) { - memcpy(dst_data + ow * conv_param->output_channel_, bias_data, conv_param->output_channel_ * sizeof(float)); + memcpy(dst_data + ow * conv_param->output_channel_, bias_data, + conv_param->output_channel_ * (int)(sizeof(float))); } for (int kh = start_kh; kh < end_kh; kh++) { int ih = ih_origin + conv_param->dilation_w_ * kh; @@ -764,10 +765,10 @@ void ConvDwFp32IndirectRow(float *output, float **input, const float *weights, c int output_width, int input_stride, bool relu, bool relu6, int kernel) { do { float **in = input; - size_t c = channels; + size_t c = (size_t)channels; const float *w = weights; float *out = output; - memcpy(out, bias, channels * sizeof(float)); + memcpy(out, bias, channels * (int)sizeof(float)); for (; c >= C4NUM; c -= C4NUM) { for (int i = 0; i < C4NUM; i++) { for (int k = 0; k < kernel; k++) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/deconv_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/deconv_fp32.c index 31cc38b5606..c7c457c5fe9 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/deconv_fp32.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/deconv_fp32.c @@ -61,7 +61,7 @@ void DeConvPostFp32C8(const float *src, float *tmp, const float *bias, float *ds for (int c = 0; c < oc8; c += 8) { float *dst_ptr = tmp + c * output_plane; const float *src_ptr = src + c * in_plane_round * kernel_plane; - memset(dst_ptr, 0, output_plane * C8NUM * sizeof(float)); + memset(dst_ptr, 0, output_plane * C8NUM * (int)sizeof(float)); for (int ih = 0; ih < conv_param->input_h_; ih++) { for (int iw = 0; iw < conv_param->input_w_; iw++) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/embedding_lookup_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/embedding_lookup_fp32.c index 8664ec56c5f..9fdfd4eae5b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/embedding_lookup_fp32.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/embedding_lookup_fp32.c @@ -43,7 +43,7 @@ int CopyData(float *input_data, const int *ids, float *output_data, int num, parameter->is_regulated_[ids[num]] = true; } - memcpy(out_data, in_data, sizeof(float) * parameter->layer_size_); + memcpy(out_data, in_data, sizeof(float) * (size_t)(parameter->layer_size_)); return NNACL_OK; } @@ -52,7 +52,7 @@ int EmbeddingLookup(float *input_data, const int *ids, float *output_data, const if (parameter->op_parameter_.thread_num_ == 0) { return NNACL_PARAM_INVALID; } - for (size_t i = task_id; i < parameter->ids_size_; i += parameter->op_parameter_.thread_num_) { + for (int i = task_id; i < parameter->ids_size_; i += parameter->op_parameter_.thread_num_) { int ret = CopyData(input_data, ids, output_data, i, parameter); if (ret != NNACL_OK) { return ret; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/gatherNd_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/gatherNd_fp32.c index 88cfdacf2a0..d1165298265 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/gatherNd_fp32.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/gatherNd_fp32.c @@ -21,7 +21,7 @@ int GatherNd(const float *input, float *output, const int *in_offset, int area, int count) { int i = 0; for (i = 0; i < count; i++) { - (void)memcpy(output + area * i, input + in_offset[i], area * sizeof(float)); + (void)memcpy(output + area * i, input + in_offset[i], (size_t)(area) * sizeof(float)); } return NNACL_OK; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/lstm_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/lstm_fp32.c index 9a94c35e46e..41a9b1ffaf4 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/lstm_fp32.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/lstm_fp32.c @@ -41,7 +41,7 @@ void PackLstmBias(float *dst, const float *src, int batch, int col, int col_alig for (int i = 0; i < unidirectional_batch; i++) { const float *src_batch = src + i * col; float *dst_batch = dst + i * col_align; - memcpy(dst_batch, src_batch, col * sizeof(float)); + memcpy(dst_batch, src_batch, col * (int)sizeof(float)); } if (is_bidirectional) { const float *backward_src = src + batch * col; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_fp32.c index 530fd6c6ac3..4b1702e55b5 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_fp32.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_fp32.c @@ -263,9 +263,9 @@ void RowMajor2Col12Major_arm32(const float *src_c, float *dst_c, size_t col) { void RowMajor2Col12Major(const float *src_ptr, float *dst_ptr, int row, int col) { const float *src_r = src_ptr; float *dst_r = dst_ptr; - size_t ri = 0; + int ri = 0; for (; ri < (row / C12NUM * C12NUM); ri += C12NUM) { - size_t ci = 0; + int ci = 0; for (; ci < (col / C4NUM * C4NUM); ci += C4NUM) { const float *src_c = src_r + ci; float *dst_c = dst_r + ci * C12NUM; @@ -340,7 +340,7 @@ void RowMajor2Col12Major(const float *src_ptr, float *dst_ptr, int row, int col) for (; ci < col; ci++) { const float *src_c = src_r + ci; float *dst_c = dst_r + ci * C12NUM; - for (size_t i = 0; i < C12NUM; i++) { + for (int i = 0; i < C12NUM; i++) { dst_c[i] = src_c[i * col]; } } @@ -348,16 +348,15 @@ void RowMajor2Col12Major(const float *src_ptr, float *dst_ptr, int row, int col) dst_r += C12NUM * col; } for (; ri < row; ri++, dst_r++, src_r += col) { - for (size_t i = 0; i < col; i++) { + for (int i = 0; i < col; i++) { dst_r[i * C12NUM] = src_r[i]; } } for (; ri < UP_ROUND(row, C12NUM); ri++, dst_r++) { - for (size_t i = 0; i < col; i++) { + for (int i = 0; i < col; i++) { dst_r[i * C12NUM] = 0; } } - return; } #ifdef ENABLE_ARM64 @@ -532,20 +531,20 @@ void RowMajor2Col8Major_arm32(const float *src_c, float *dst_c, size_t col) { #endif #endif void RowMajor2Col8Major(const float *src_ptr, float *dst_ptr, int row, int col) { - size_t row8 = row / C8NUM * C8NUM; + int row8 = row / C8NUM * C8NUM; #ifdef ENABLE_ARM64 - size_t col_skip = col / C8NUM * C8NUM; + int col_skip = col / C8NUM * C8NUM; int skip_size = C8NUM; #else - size_t col_skip = col / C4NUM * C4NUM; + int col_skip = col / C4NUM * C4NUM; int skip_size = C4NUM; #endif const float *src_r = src_ptr; float *dst_r = dst_ptr; - size_t ri = 0; + int ri = 0; for (; ri < row8; ri += C8NUM) { - size_t ci = 0; + int ci = 0; for (; ci < col_skip; ci += skip_size) { const float *src_c = src_r + ci; float *dst_c = dst_r + ci * C8NUM; @@ -593,7 +592,7 @@ void RowMajor2Col8Major(const float *src_ptr, float *dst_ptr, int row, int col) for (; ci < col; ci++) { const float *src_c = src_r + ci; float *dst_c = dst_r + ci * C8NUM; - for (size_t i = 0; i < C8NUM; i++) { + for (int i = 0; i < C8NUM; i++) { dst_c[i] = src_c[i * col]; } } @@ -601,29 +600,28 @@ void RowMajor2Col8Major(const float *src_ptr, float *dst_ptr, int row, int col) dst_r += C8NUM * col; } for (; ri < row; ri++, src_r += col, dst_r++) { - for (size_t i = 0; i < col; i++) { + for (int i = 0; i < col; i++) { dst_r[i * C8NUM] = src_r[i]; } } for (; ri < UP_ROUND(row, C8NUM); ri++, dst_r++) { - for (size_t i = 0; i < col; i++) { + for (int i = 0; i < col; i++) { dst_r[i * C8NUM] = 0; } } - return; } void RowMajor2Col16Major(const float *src_ptr, float *dst_ptr, int row, int col) { - size_t row16 = row / C16NUM * C16NUM; - size_t col_skip = col / C4NUM * C4NUM; + int row16 = row / C16NUM * C16NUM; + int col_skip = col / C4NUM * C4NUM; int skip_size = C4NUM; const float *src_r = src_ptr; float *dst_r = dst_ptr; - size_t ri = 0; + int ri = 0; for (; ri < row16; ri += C16NUM) { - size_t ci = 0; + int ci = 0; for (; ci < col_skip; ci += skip_size) { const float *src_c = src_r + ci; float *dst_c = dst_r + ci * C16NUM; @@ -636,7 +634,7 @@ void RowMajor2Col16Major(const float *src_ptr, float *dst_ptr, int row, int col) for (; ci < col; ci++) { const float *src_c = src_r + ci; float *dst_c = dst_r + ci * C16NUM; - for (size_t i = 0; i < C16NUM; i++) { + for (int i = 0; i < C16NUM; i++) { dst_c[i] = src_c[i * col]; } } @@ -644,21 +642,20 @@ void RowMajor2Col16Major(const float *src_ptr, float *dst_ptr, int row, int col) dst_r += C16NUM * col; } for (; ri < row; ri++) { - for (size_t i = 0; i < col; i++) { + for (int i = 0; i < col; i++) { dst_r[i * C16NUM] = src_r[i]; } src_r += col; dst_r += 1; } - size_t total_row = UP_ROUND(row, C16NUM); + int total_row = UP_ROUND(row, C16NUM); for (; ri < total_row; ri++) { - for (size_t i = 0; i < col; i++) { + for (int i = 0; i < col; i++) { dst_r[i * C16NUM] = 0; } dst_r += 1; } - return; } void RowMajor2Col32Major(const float *src_ptr, float *dst_ptr, int row, int col) { @@ -680,15 +677,15 @@ void RowMajor2Col32Major(const float *src_ptr, float *dst_ptr, int row, int col) } void RowMajor2Col6Major(const float *src_ptr, float *dst_ptr, int row, int col) { - size_t totalRow = UP_ROUND(row, C6NUM); - size_t row6 = row / C6NUM * C6NUM; - size_t col8 = col / C8NUM * C8NUM; + int totalRow = UP_ROUND(row, C6NUM); + int row6 = row / C6NUM * C6NUM; + int col8 = col / C8NUM * C8NUM; const float *src_r = src_ptr; float *dst_r = dst_ptr; - size_t ri = 0; + int ri = 0; for (; ri < row6; ri += C6NUM) { - size_t ci = 0; + int ci = 0; for (; ci < col8; ci += C8NUM) { const float *src_c = src_r + ci; float *dst_c = dst_r + ci * C6NUM; @@ -753,7 +750,7 @@ void RowMajor2Col6Major(const float *src_ptr, float *dst_ptr, int row, int col) for (; ci < col; ci++) { const float *src_c = src_r + ci; float *dst_c = dst_r + ci * C6NUM; - for (size_t i = 0; i < C6NUM; i++) { + for (int i = 0; i < C6NUM; i++) { dst_c[i] = src_c[i * col]; } } @@ -762,7 +759,7 @@ void RowMajor2Col6Major(const float *src_ptr, float *dst_ptr, int row, int col) } for (; ri < row; ri++) { - for (size_t i = 0; i < col; i++) { + for (int i = 0; i < col; i++) { dst_r[i * C6NUM] = src_r[i]; } src_r += col; @@ -770,30 +767,29 @@ void RowMajor2Col6Major(const float *src_ptr, float *dst_ptr, int row, int col) } for (; ri < totalRow; ri++) { - for (size_t i = 0; i < col; i++) { + for (int i = 0; i < col; i++) { dst_r[i * C6NUM] = 0; } dst_r += 1; } - return; } void RowMajor2Col4Major(const float *src_ptr, float *dst_ptr, int row, int col) { - size_t total_row = UP_ROUND(row, C4NUM); - size_t row4 = row / C4NUM * C4NUM; - size_t col4 = col / C4NUM * C4NUM; + int total_row = UP_ROUND(row, C4NUM); + int row4 = row / C4NUM * C4NUM; + int col4 = col / C4NUM * C4NUM; const float *src_r = src_ptr; float *dst_r = dst_ptr; - size_t ri = 0; + int ri = 0; for (; ri < row4; ri += C4NUM) { - size_t ci = 0; + int ci = 0; for (; ci < col4; ci += C4NUM) { const float *src_c = src_r + ci; float *dst_c = dst_r + ci * C4NUM; #ifdef ENABLE_ARM32 - size_t stride = col * 4; + int stride = col * 4; asm volatile( "mov r10, %[src_c]\n" "mov r12, %[dst_c]\n" @@ -840,8 +836,8 @@ void RowMajor2Col4Major(const float *src_ptr, float *dst_ptr, int row, int col) _mm_storeu_ps(dst_c + 8, dst2); _mm_storeu_ps(dst_c + 12, dst3); #else - for (int tr = 0; tr < C4NUM; tr++) { - for (int tc = 0; tc < C4NUM; tc++) { + for (size_t tr = 0; tr < C4NUM; tr++) { + for (size_t tc = 0; tc < C4NUM; tc++) { dst_c[tc * C4NUM + tr] = src_c[tr * col + tc]; } } @@ -850,7 +846,7 @@ void RowMajor2Col4Major(const float *src_ptr, float *dst_ptr, int row, int col) for (; ci < col; ci++) { const float *src_c = src_r + ci; float *dst_c = dst_r + ci * C4NUM; - for (size_t i = 0; i < C4NUM; i++) { + for (int i = 0; i < C4NUM; i++) { dst_c[i] = src_c[i * col]; } } @@ -858,7 +854,7 @@ void RowMajor2Col4Major(const float *src_ptr, float *dst_ptr, int row, int col) dst_r += C4NUM * col; } for (; ri < row; ri++) { - for (size_t i = 0; i < col; i++) { + for (int i = 0; i < col; i++) { dst_r[i * C4NUM] = src_r[i]; } src_r += col; @@ -866,12 +862,11 @@ void RowMajor2Col4Major(const float *src_ptr, float *dst_ptr, int row, int col) } for (; ri < total_row; ri++) { - for (size_t i = 0; i < col; i++) { + for (int i = 0; i < col; i++) { dst_r[i * C4NUM] = 0; } dst_r += 1; } - return; } #ifndef ENABLE_ARM diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/pad_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/pad_fp32.c index f80bb5657d3..2daaed1bf27 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/pad_fp32.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/pad_fp32.c @@ -23,16 +23,22 @@ void Pad(const float *input_data, float *output_data, const int *input_shape, co if (thread_num == 0) { return; } - int in[4], out[4]; + int in[DEFAULT_PAD_NDIMS], out[DEFAULT_PAD_NDIMS]; for (in[0] = 0; in[0] < input_shape[0]; in[0]++) { out[0] = in[0] + paddings[0]; for (in[1] = tid; in[1] < input_shape[1]; in[1] += thread_num) { out[1] = in[1] + paddings[2]; for (in[2] = 0; in[2] < input_shape[2]; in[2]++) { out[2] = in[2] + paddings[4]; - float *dst = output_data + offset(output_shape, out[0], out[1], out[2], paddings[6]); - const float *src = input_data + offset(input_shape, in[0], in[1], in[2], 0); - memcpy(dst, src, input_shape[3] * sizeof(float)); + for (in[3] = 0; in[3] < input_shape[3]; in[3]++) { + out[3] = in[3] + paddings[6]; + for (in[4] = 0; in[4] < input_shape[4]; in[4]++) { + out[4] = in[4] + paddings[8]; + float *dst = output_data + Offset6d(output_shape, out) + paddings[10]; + const float *src = input_data + Offset6d(input_shape, in); + memcpy(dst, src, input_shape[5] * (int)(sizeof(float))); + } + } } } } @@ -57,8 +63,7 @@ int TransOut2InputDimIndex(int out_dim_index, int left_pad, int in_dim, int offs int GetInputFlattenIndex(int out_flatten_index, const int *input_shape, const PadParameter *pad_param) { int in_flatten_index = 0; - int i; - for (i = 0; i < COMM_SHAPE_SIZE; ++i) { + for (int i = 0; i < DEFAULT_PAD_NDIMS; ++i) { int left_pad = pad_param->paddings_[i * 2]; NNACL_CHECK_ZERO_RETURN_ERR(pad_param->out_strides[i]) int out_dim_index = out_flatten_index / pad_param->out_strides[i]; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/resize_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/resize_fp32.c index 13f98915e35..89de95ff7f5 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/resize_fp32.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/resize_fp32.c @@ -510,8 +510,8 @@ int ResizeNearestNeighbor(const float *input_data, float *output_data, const int } else { input_x = (int)(floorf(actual_x)); } - int in_offset = offset(input_shape, batch, input_y, input_x, 0); - int out_offset = offset(output_shape, batch, y, x, 0); + int in_offset = Offset(input_shape, batch, input_y, input_x, 0); + int out_offset = Offset(output_shape, batch, y, x, 0); memcpy(output_data + out_offset, input_data + in_offset, c * sizeof(float)); } } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/reverse_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/reverse_fp32.c index 45aa7179d6d..7125f13a19b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/reverse_fp32.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/reverse_fp32.c @@ -20,10 +20,8 @@ #include "nnacl/nnacl_utils.h" int Reverse(const float *input, float *output, size_t elem_size, int *index) { - for (int i = 0; i < elem_size; i++) { + for (size_t i = 0; i < elem_size; i++) { NNACL_ASSERT(index[i] >= 0); - } - for (int i = 0; i < elem_size; i++) { output[index[i]] = input[i]; } return NNACL_OK; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/scatter_nd_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/scatter_nd_fp32.c index 33db0194d73..3ad61bf142a 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/scatter_nd_fp32.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/scatter_nd_fp32.c @@ -23,7 +23,7 @@ int DoScatterND(float *output_ptr, const float *update, int *output_unit_offsets return NNACL_ERR; } for (int i = 0; i < num_units; i++) { - (void)memcpy(output_ptr + output_unit_offsets[i], update + unit_size * i, unit_size * sizeof(float)); + (void)memcpy(output_ptr + output_unit_offsets[i], update + unit_size * i, (size_t)(unit_size) * sizeof(float)); } return NNACL_OK; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/splice_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/splice_fp32.c index f0c1ca8c711..a329c448248 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/splice_fp32.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/splice_fp32.c @@ -25,7 +25,7 @@ void SpliceFp32(const float *src_data, int src_row, int src_col, const SplicePar forward_index++; const float *tmp_src_data = src_data + r_off * src_col; float *tmp_dst_data = dst_row_data + off * src_col; - memcpy(tmp_dst_data, tmp_src_data, src_col * sizeof(float)); + memcpy(tmp_dst_data, tmp_src_data, (size_t)(src_col) * sizeof(float)); } } } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/strided_slice_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/strided_slice_fp32.c index d510cacccd1..1e63955173c 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/strided_slice_fp32.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/strided_slice_fp32.c @@ -70,7 +70,7 @@ int DoStridedSliceIntFp64Bool(const void *in_data, void *out_data, StridedSliceP if (param->num_axes_ < DIMENSION_8D) { PadStridedSliceParameterTo8D(param); } - size_t dim_offset[DIMENSION_8D - 1]; + int dim_offset[DIMENSION_8D - 1]; dim_offset[6] = in_shape[7]; dim_offset[5] = in_shape[6] * dim_offset[6]; dim_offset[4] = in_shape[5] * dim_offset[5]; @@ -132,7 +132,7 @@ int DoStridedSlice(const void *in_data, void *out_data, StridedSliceParameter *p if (param->num_axes_ < DIMENSION_8D) { PadStridedSliceParameterTo8D(param); } - size_t dim_offset[DIMENSION_8D - 1]; + int dim_offset[DIMENSION_8D - 1]; dim_offset[6] = in_shape[7]; dim_offset[5] = in_shape[6] * dim_offset[6]; dim_offset[4] = in_shape[5] * dim_offset[5]; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/transpose_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/transpose_fp32.c index fa73291a318..820f6a8b2ed 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/transpose_fp32.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/transpose_fp32.c @@ -180,15 +180,15 @@ void TransposeDimsFp32(const float *in_data, float *out_data, const int *output_ int *strides = (int *)(transpose_param->strides_); int *out_strides = (int *)(transpose_param->out_strides_); int num_axes = transpose_param->num_axes_; - size_t data_size = (*out_strides) * output_shape[0]; - size_t offset_size = UP_DIV(data_size, thread_num); - size_t task_offset = offset_size * task_id; + int data_size = (*out_strides) * output_shape[0]; + int offset_size = UP_DIV(data_size, thread_num); + int task_offset = offset_size * task_id; int count = data_size - task_offset; if (count <= 0) { return; } count = MSMIN(offset_size, count); - for (size_t idx = task_offset; idx < task_offset + count; ++idx) { + for (int idx = task_offset; idx < task_offset + count; ++idx) { int pos = idx; int output_idx = 0; int input_idx = 0; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_transform.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_transform.c index afdd1ab3b73..e23023dfa2f 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_transform.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/winograd_transform.c @@ -45,7 +45,7 @@ void WinogradInputTransform(const float *input_data, float *trans_input, float * int dst_plane_offset = c * in_channel; for (int ic = 0; ic < ic4; ic++) { // clear tmp buffer - memset(tmp_data, 0, input_unit * input_unit * C4NUM * sizeof(float)); + memset(tmp_data, 0, input_unit * input_unit * C4NUM * (int)(sizeof(float))); int real_c = in_channel - ic * C4NUM; real_c = real_c > C4NUM ? C4NUM : real_c; @@ -87,7 +87,7 @@ void WinogradInputTransform(const float *input_data, float *trans_input, float * // input transform const int tile_num = C12NUM; int dst_ic4_offset = dst_plane_offset + ic * C4NUM; - size_t dst_step = tile_num * in_channel; + int dst_step = tile_num * in_channel; float *trans_input_ptr = trans_input + dst_ic4_offset; func(tmp_data, trans_input_ptr, C4NUM, dst_step, real_c); } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.c index 488d413727b..366d1a9cf6a 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.c @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include #include "nnacl/op_base.h" #include "nnacl/fp32/arithmetic_fp32.h" +#include "nnacl/fp32/exp_fp32.h" #include "nnacl/fp32_grad/activation_grad.h" #include "nnacl/errorcode.h" @@ -110,3 +111,27 @@ int GeluGrad(const float *src0, const float *src1, size_t length, float *dst) { } return NNACL_OK; } + +int SoftplusGrad(const float *src0, const float *src1, int length, float *dst) { + int i = 0; +#if defined(ENABLE_AVX) + for (; i <= length - C8NUM; i += C8NUM) { + simd_exp_avx(-(MS_LD256_F32(src1 + i)), dst + i); + MS_ST256_F32(dst + i, + MS_DIV256_F32(MS_LD256_F32(src0 + i), MS_ADD256_F32(MS_MOV256_F32(1.0f), MS_LD256_F32(dst + i)))); + } +#endif + +#if defined(ENABLE_ARM) || defined(ENABLE_SSE) + for (; i <= length - C4NUM; i += C4NUM) { + simd_exp(MS_SUBQ_F32(MS_MOVQ_F32(0.0f), MS_LDQ_F32(src1 + i)), dst + i); + MS_STQ_F32(dst + i, MS_DIVQ_F32(MS_LDQ_F32(src0 + i), MS_ADDQ_F32(MS_MOVQ_F32(1.0f), MS_LDQ_F32(dst + i)))); + } +#endif + + for (; i < length; ++i) { + single_exp(-src1[i], dst + i); + dst[i] = src0[i] / (1.0f + dst[i]); + } + return NNACL_OK; +} diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.h index e88b27addb5..7f493215fe3 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/activation_grad.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,6 +39,7 @@ int HSwishGrad(const float *src0, const float *src1, size_t length, float *dst); int HSigmoidGrad(const float *src0, const float *src1, size_t length, float *dst); int EluGrad(const float *src0, const float *src1, size_t length, float *dst, float alpha); int GeluGrad(const float *src0, const float *src1, size_t length, float *dst); +int SoftplusGrad(const float *src, const float *src1, int length, float *dst); #ifdef __cplusplus } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/gemm.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/gemm.c index 3523c1476c6..8df87bc4bdb 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/gemm.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/gemm.c @@ -231,7 +231,7 @@ static void RowMajor2Col12MajorStride(const float *src_ptr, float *dst_ptr, size for (; ci < col; ci++) { const float *src_c = src_r + ci; float *dst_c = dst_r + ci * C12NUM; - for (size_t i = 0; i < C12NUM; i++) { + for (int i = 0; i < C12NUM; i++) { dst_c[i] = src_c[i * lead]; } } @@ -240,7 +240,7 @@ static void RowMajor2Col12MajorStride(const float *src_ptr, float *dst_ptr, size } for (; ri < row; ri++) { - for (size_t i = 0; i < col; i++) { + for (int i = 0; i < col; i++) { dst_r[i * C12NUM] = src_r[i]; } src_r += lead; @@ -248,12 +248,11 @@ static void RowMajor2Col12MajorStride(const float *src_ptr, float *dst_ptr, size } for (; ri < row_up_12; ri++) { - for (size_t i = 0; i < col; i++) { + for (int i = 0; i < col; i++) { dst_r[i * C12NUM] = 0; } dst_r += 1; } - return; } #endif @@ -261,10 +260,10 @@ static void RowMajor2Col8MajorStride(const float *src_ptr, float *dst_ptr, size_ size_t row8 = row / C8NUM * C8NUM; #ifdef ENABLE_ARM64 size_t col_skip = col / C8NUM * C8NUM; - int skip_size = C8NUM; + size_t skip_size = C8NUM; #else size_t col_skip = col / C4NUM * C4NUM; - int skip_size = C4NUM; + size_t skip_size = C4NUM; #endif const float *src_r = src_ptr; float *dst_r = dst_ptr; @@ -450,7 +449,7 @@ static void RowMajor2Col8MajorStride(const float *src_ptr, float *dst_ptr, size_ for (; ci < col; ci++) { const float *src_c = src_r + ci; float *dst_c = dst_r + ci * C8NUM; - for (size_t i = 0; i < C8NUM; i++) { + for (int i = 0; i < C8NUM; i++) { dst_c[i] = src_c[i * lead]; } } @@ -458,7 +457,7 @@ static void RowMajor2Col8MajorStride(const float *src_ptr, float *dst_ptr, size_ dst_r += C8NUM * col; } for (; ri < row; ri++) { - for (size_t i = 0; i < col; i++) { + for (int i = 0; i < col; i++) { dst_r[i * C8NUM] = src_r[i]; } src_r += lead; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/reduce_grad.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/reduce_grad.c index d71b8356972..f7bb275591c 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/reduce_grad.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/reduce_grad.c @@ -64,11 +64,11 @@ void ReduceSumByAxes(const float *input, const int *input_dims, float *output, c if (output_dims[idx] != input_dims[idx]) same_shape = 0; } if (same_shape) { - memcpy(output, input, num_outputs * sizeof(float)); + memcpy(output, input, (size_t)(num_outputs) * sizeof(float)); return; } - memset(output, 0, num_outputs * sizeof(float)); // zero output + memset(output, 0, (size_t)(num_outputs) * sizeof(float)); // zero output int input_iter[8] = {0}; int axes[5] = {0}; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.c index 1e5ac7ccc76..d6991dbb071 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/softmax_grad.c @@ -37,13 +37,13 @@ void SoftmaxGrad(const float *input_ptr, const float *yt_ptr, float *output_ptr, for (int i = 0; i < inner_size * input_shape[axis]; i++) sum_mul[i] = 1.0; for (int i = 0; i < n_dim; i++) dim *= input_shape[i]; dim /= outter_size; - memcpy(output_ptr, yt_ptr, ele_size * sizeof(float)); + memcpy(output_ptr, yt_ptr, (size_t)(ele_size) * sizeof(float)); const int M = input_shape[axis]; const int N = inner_size; for (int i = 0; i < outter_size; i++) { int outter_offset = i * dim; - memset(sum_data, 0.0f, inner_size * sizeof(float)); + memset(sum_data, 0, (size_t)(inner_size) * sizeof(float)); for (int k = 0; k < inner_size; k++) { int inner_offset = outter_offset + k; for (int j = 0; j < input_shape[axis]; j++) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/strided_slice_grad.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/strided_slice_grad.c index b7f1f94b3b8..e3ed62cb9e0 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/strided_slice_grad.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/strided_slice_grad.c @@ -20,7 +20,7 @@ static size_t CalcIndex(const int *shape, size_t size, int i, size_t pos) { size_t res = 1; for (size_t j = 0; j < size; j++) { - res *= shape[(i + 1) + j]; + res *= shape[((size_t)(i) + 1) + j]; } return (pos / res % shape[i]); } @@ -37,7 +37,7 @@ int DoStridedSliceGrad(const float *inputs, float *output, const int *dx_shape, const int *s = param->strides_; const int *b = param->begins_; for (int i = 0; i < DIMENSION_8D; i++) { - size *= param->in_shape_[i]; + size *= (size_t)(param->in_shape_[i]); } for (size_t pos = 0; pos < size; pos++) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/addn_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/addn_infer.c index b92fe1fd2e7..b0609b97abf 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/addn_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/addn_infer.c @@ -56,13 +56,13 @@ int AddnInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **o for (size_t d = 0; d < inputs[max_dims_idx]->shape_size_; ++d) { size_t max_dim = 0; for (size_t i = 0; i < inputs_size; ++i) { - size_t shift = max_dims - inputs[i]->shape_size_; - size_t dim = (i < shift) ? 1 : inputs[i]->shape_[d]; + size_t shift = max_dims - (size_t)(inputs[i]->shape_size_); + size_t dim = (i < shift) ? 1 : (size_t)(inputs[i]->shape_[d]); if (dim > max_dim) { max_dim = dim; } } - output->shape_[d] = max_dim; // set the biggest dimension in the output tensor + output->shape_[d] = (int)(max_dim); // set the biggest dimension in the output tensor } return NNACL_OK; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/affine_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/affine_infer.c index 07ad84871ae..1513b841778 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/affine_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/affine_infer.c @@ -17,8 +17,8 @@ #include "nnacl/infer/affine_infer.h" #include "nnacl/infer/infer_register.h" -int MatmulInfer(AffineParameter *param, int a_shape[MAX_SHAPE_SIZE], size_t a_shape_size, int b_shape[MAX_SHAPE_SIZE], - size_t b_shape_size) { +int MatmulInfer(const AffineParameter *param, int a_shape[MAX_SHAPE_SIZE], size_t a_shape_size, + int b_shape[MAX_SHAPE_SIZE], size_t b_shape_size) { MatMulParameter *matmul_param = param->matmul_parameter_; if (matmul_param->a_transpose_) { if (a_shape_size < 2) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/argmin_max_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/argmin_max_infer.c index 44cae261f29..3608e762e1d 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/argmin_max_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/argmin_max_infer.c @@ -56,8 +56,8 @@ int ArgMinMaxInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor int output_shape[MAX_SHAPE_SIZE] = {0}; size_t output_shape_size = 0; ShapeSet(output_shape, &output_shape_size, input->shape_, input->shape_size_); - size_t input_shape_size = input->shape_size_; - int axis = param->axis_ < 0 ? param->axis_ + (int)input_shape_size : param->axis_; + int input_shape_size = (int)input->shape_size_; + int axis = param->axis_ < 0 ? param->axis_ + input_shape_size : param->axis_; if (axis >= input_shape_size || axis < 0) { return NNACL_PARAM_INVALID; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/arithmetic_grad_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/arithmetic_grad_infer.c index 9971a6c2cd6..83987ccfe2f 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/arithmetic_grad_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/arithmetic_grad_infer.c @@ -55,10 +55,10 @@ int ArithmeticGradInferShape(const TensorC *const *inputs, size_t inputs_size, T if (GetElementNum(dx1) < GetElementNum(dx2)) { param->ndim_ = in_shape1_size; - param->in_elements_num0_ = param->ndim_; - param->in_elements_num1_ = param->ndim_; - param->out_elements_num_ = param->ndim_; - int fill_dim_num = in_shape1_size - in_shape0_size; // This will not work for batch! + param->in_elements_num0_ = (int)param->ndim_; + param->in_elements_num1_ = (int)param->ndim_; + param->out_elements_num_ = (int)param->ndim_; + size_t fill_dim_num = in_shape1_size - in_shape0_size; // This will not work for batch! int j = 0; for (unsigned int i = 0; i < in_shape1_size; i++) { if (i < fill_dim_num) { @@ -76,7 +76,7 @@ int ArithmeticGradInferShape(const TensorC *const *inputs, size_t inputs_size, T param->out_elements_num_ = param->ndim_; param->broadcasting_ = true; int j = 0; - int fill_dim_num = in_shape0_size - in_shape1_size; + size_t fill_dim_num = in_shape0_size - in_shape1_size; for (unsigned int i = 0; i < in_shape0_size; i++) { if (i < fill_dim_num) { param->in_shape1_[i] = 1; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/audio_spectrogram_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/audio_spectrogram_infer.c index af020b85d57..959a4af64d6 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/audio_spectrogram_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/audio_spectrogram_infer.c @@ -66,7 +66,7 @@ int AudioSpectrogramInferShape(const TensorC *const *inputs, size_t inputs_size, int sample_sub_window = input->shape_[0] - param->window_size_; output_shape[1] = sample_sub_window < 0 ? 0 : 1 + sample_sub_window / param->stride_; // compute fft length - int fft_length = GetFftLength(param->window_size_); + int fft_length = (int)GetFftLength(param->window_size_); output_shape[2] = fft_length / 2 + 1; SetShapeArray(output, output_shape, 3); return NNACL_OK; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/bias_grad_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/bias_grad_infer.c index fb3f72300d0..5a78919b6ff 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/bias_grad_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/bias_grad_infer.c @@ -33,8 +33,8 @@ int BiasGradInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC int inshape[MAX_SHAPE_SIZE]; size_t inshape_size = 0; ShapeSet(inshape, &inshape_size, in0->shape_, in0->shape_size_); - int ndim = inshape_size; - for (int i = 0; i < ndim - 1; i++) { + size_t ndim = inshape_size; + for (size_t i = 0; i < ndim - 1; i++) { inshape[i] = 1; } SetDataTypeFormat(out, in0); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/broadcast_to_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/broadcast_to_infer.c index 5caedb299e8..31e36427ad0 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/broadcast_to_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/broadcast_to_infer.c @@ -111,12 +111,12 @@ int BroadcastToInferShape(const TensorC *const *inputs, size_t inputs_size, Tens const int *input_shape = input->shape_; size_t input_shape_size = input->shape_size_; int shape[MAX_SHAPE_SIZE]; - int input_shape_index = input_shape_size - 1; + int input_shape_index = (int)(input_shape_size)-1; if (input_shape_size > dst_shape_size) { return NNACL_ERR; } - for (int i = dst_shape_size - 1; i >= 0; --i) { + for (int i = (int)(dst_shape_size)-1; i >= 0; --i) { if (dst_shape[i] < 0) { return NNACL_ERR; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.c index 497287eaa94..30c75d62d1e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.c @@ -18,6 +18,7 @@ #include #include "nnacl/infer/infer_register.h" +#ifdef ENABLE_CONTROL_TENSORLIST int MallocTensorListData(TensorListC *tensor_list, TypeIdC dtype, const vvector *tensor_shape) { // This function will create a new tensors_ // Your must to set shape(param2: tensor_shape) and data_type_(tensors_data_type_ = param1: dtype) of each tensor in @@ -35,7 +36,7 @@ int MallocTensorListData(TensorListC *tensor_list, TypeIdC dtype, const vvector return NNACL_NULL_PTR; } memset(tensor_list->tensors_, 0, tensor_list->element_num_ * sizeof(TensorC)); - for (int i = 0; i < tensor_list->element_num_; ++i) { + for (size_t i = 0; i < tensor_list->element_num_; ++i) { tensor_list->tensors_[i].format_ = Format_NHWC; tensor_list->tensors_[i].data_type_ = dtype; ShapeSet(tensor_list->tensors_[i].shape_, &(tensor_list->tensors_[i].shape_size_), tensor_shape->shape_[i], @@ -69,6 +70,7 @@ bool TensorListIsFullyDefined(const int *shape, size_t shape_size) { } return true; } +#endif int CheckAugmentNull(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size, const OpParameter *parameter) { @@ -157,7 +159,7 @@ void SetShapeTensor(TensorC *dst, const TensorC *src) { } void SetShapeArray(TensorC *dst, const int *src, size_t src_size) { - for (size_t i = 0; i < src_size; i++) { + for (size_t i = 0; i < src_size && i < MAX_SHAPE_SIZE; i++) { dst->shape_[i] = src[i]; } dst->shape_size_ = src_size; @@ -286,13 +288,17 @@ int GetDimensionSize(const TensorC *tensor, const size_t index) { } void ShapeSet(int *dst_shape, size_t *dst_shape_size, const int *src_shape, size_t src_shape_size) { - for (size_t i = 0; i < src_shape_size; i++) { + size_t i = 0; + for (; i < src_shape_size && i < MAX_SHAPE_SIZE; i++) { dst_shape[i] = src_shape[i]; } - *dst_shape_size = src_shape_size; + *dst_shape_size = i; } void ShapePush(int *shape, size_t *shape_size, int value) { + if (*shape_size >= MAX_SHAPE_SIZE) { + return; + } shape[*shape_size] = value; *shape_size = *shape_size + 1; } @@ -301,6 +307,9 @@ int ShapeInsert(int *shape, size_t *shape_size, int index, int value) { if (index < 0 || index > *shape_size) { return NNACL_ERR; } + if (*shape_size >= MAX_SHAPE_SIZE) { + return NNACL_ERR; + } for (int i = *shape_size; i > index; i--) { shape[i] = shape[i - 1]; } @@ -325,7 +334,7 @@ bool ShapeEqual(const int *shape0, size_t shape0_size, const int *shape1, size_t if (shape0_size != shape1_size) { return false; } - for (int i = 0; i < shape0_size; i++) { + for (size_t i = 0; i < shape0_size; i++) { if (shape0[i] != shape1[i]) { return false; } @@ -401,96 +410,6 @@ int FftInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **ou return NNACL_OK; } -int VectorCInit(VectorC *vc, size_t per_malloc_size) { - if (per_malloc_size == 0) { - return NNACL_ERR; - } - vc->data_ = (int *)malloc(per_malloc_size * sizeof(int)); - if (vc->data_ == NULL) { - return NNACL_ERR; - } - vc->size_ = 0; - vc->max_size_ = per_malloc_size; - vc->per_malloc_size_ = per_malloc_size; - return NNACL_OK; -} - -int VectorCSet(VectorC *vc, const int *src_shape, size_t src_shape_size) { - if (src_shape_size == 0) { - vc->size_ = 0; - } else { - free(vc->data_); - if (vc->per_malloc_size_ == 0) { - return NNACL_ERR; - } - vc->max_size_ = (src_shape_size / vc->per_malloc_size_ + 1) * vc->per_malloc_size_; - vc->data_ = (int *)malloc(sizeof(int) * vc->max_size_); - if (vc->data_ == NULL) { - return NNACL_ERR; - } - for (size_t i = 0; i < src_shape_size; i++) { - vc->data_[i] = src_shape[i]; - } - vc->size_ = src_shape_size; - } - return NNACL_OK; -} - -int VectorCPush(VectorC *vc, int value) { - if (vc->size_ + 1 > vc->max_size_) { - int *tmp = (int *)malloc(vc->per_malloc_size_ * sizeof(int) + vc->max_size_ * sizeof(int)); - if (tmp == NULL) { - return NNACL_ERR; - } - memcpy(tmp, vc->data_, vc->size_ * sizeof(int)); - free(vc->data_); - vc->data_ = tmp; - vc->max_size_ = vc->max_size_ + vc->per_malloc_size_; - } - vc->data_[vc->size_] = value; - vc->size_++; - return NNACL_OK; -} - -int VectorCInsert(VectorC *vc, int index, int value) { - if (vc->size_ + 1 > vc->max_size_) { - int *tmp = (int *)malloc(vc->per_malloc_size_ * sizeof(int) + vc->max_size_ * sizeof(int)); - if (tmp == NULL) { - return NNACL_ERR; - } - memcpy(tmp, vc->data_, vc->size_ * sizeof(int)); - free(vc->data_); - vc->data_ = tmp; - vc->max_size_ = vc->max_size_ + vc->per_malloc_size_; - } - memmove(vc->data_ + index + 1, vc->data_ + index, (vc->size_ - index) * sizeof(int)); - vc->data_[index] = value; - vc->size_++; - return NNACL_OK; -} - -void VectorCErase(VectorC *vc, int index) { - memmove(vc->data_ + index, vc->data_ + index + 1, (vc->size_ - index - 1) * sizeof(int)); - vc->size_--; -} - -bool VectorCEqual(const VectorC *vc1, const VectorC *vc2) { - if (vc1->size_ != vc2->size_) { - return false; - } - for (size_t i = 0; i < vc1->size_; i++) { - if (vc1->data_[i] != vc2->data_[i]) { - return false; - } - } - return true; -} - -void VectorCFree(VectorC *vc) { - free(vc->data_); - vc->data_ = NULL; -} - bool InferFlag(const TensorC *const *inputs, size_t inputs_size) { if (inputs == NULL) { return false; @@ -499,18 +418,22 @@ bool InferFlag(const TensorC *const *inputs, size_t inputs_size) { if (inputs[i] == NULL) { return false; } +#ifdef ENABLE_CONTROL_TENSORLIST if (inputs[i]->data_type_ == kObjectTypeTensorType) { TensorListC *input_tensor_list = (TensorListC *)inputs[i]; if (input_tensor_list->shape_value_ == -1) { return false; } } else { +#endif for (size_t j = 0; j < inputs[i]->shape_size_; ++j) { if (inputs[i]->shape_[j] == -1) { return false; } } +#ifdef ENABLE_CONTROL_TENSORLIST } +#endif } return true; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.h index 8e5a867cfd0..63e95a1203e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.h @@ -138,6 +138,7 @@ typedef struct vvector { size_t size_; // number of shapes } vvector; +#ifdef ENABLE_CONTROL_TENSORLIST typedef struct TensorListC { bool is_ready_; int data_type_; @@ -150,6 +151,7 @@ typedef struct TensorListC { size_t element_shape_size_; TensorC *tensors_; } TensorListC; +#endif typedef struct VectorC { int *data_; @@ -158,9 +160,11 @@ typedef struct VectorC { size_t per_malloc_size_; } VectorC; +#ifdef ENABLE_CONTROL_TENSORLIST int MallocTensorListData(TensorListC *tensor_list, TypeIdC dtype, const vvector *tensor_shape); int TensorListMergeShape(int *element_shape, size_t *element_shape_size, const int *tmp, size_t tmp_size); bool TensorListIsFullyDefined(const int *shape, size_t shape_size); +#endif int GetBatch(const TensorC *tensor); int GetHeight(const TensorC *tensor); @@ -202,13 +206,6 @@ int CommonInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC * int FftInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size, const OpParameter *parameter); -int VectorCInit(VectorC *vc, size_t per_malloc_size); -int VectorCSet(VectorC *vc, const int *src_shape, size_t src_shape_size); -int VectorCPush(VectorC *vc, int value); -int VectorCInsert(VectorC *vc, int index, int value); -void VectorCErase(VectorC *vc, int index); -bool VectorCEqual(const VectorC *vc1, const VectorC *vc2); -void VectorCFree(VectorC *vc); bool InferFlag(const TensorC *const *inputs, size_t inputs_size); #ifdef __cplusplus diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/concat_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/concat_infer.c index 638e4a1a5fd..92692403fcc 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/concat_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/concat_infer.c @@ -54,8 +54,13 @@ int ConcatInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC * } int output_axis_dim = input0_shape[axis]; for (size_t i = 1; i < inputs_size; ++i) { - if (inputs[i]->shape_size_ != input0_shape_size) { - return NNACL_PARAM_INVALID; + size_t input_i_shape_size = inputs[i]->shape_size_; + if (input_i_shape_size != input0_shape_size) { + if (input_i_shape_size != 0) { + return NNACL_PARAM_INVALID; + } else { + continue; + } } int shape_tmp[MAX_SHAPE_SIZE] = {0}; size_t shape_tmp_size = 0; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/constant_of_shape_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/constant_of_shape_infer.c index 967eb87c451..258fc03ffa7 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/constant_of_shape_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/constant_of_shape_infer.c @@ -37,7 +37,7 @@ int ConstantOfShapeInferShape(const TensorC *const *inputs, size_t inputs_size, return NNACL_ERR; } int out_shape[MAX_SHAPE_SIZE]; - size_t out_shape_size = size; + int out_shape_size = size; switch (in_tensor->data_type_) { case kNumberTypeInt32: { int32_t *in_data = (int32_t *)(in_tensor->data_); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_filter_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_filter_infer.c index fd40ccab871..c02ba325a62 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_filter_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_filter_infer.c @@ -34,7 +34,10 @@ int Conv2dGradFilterInferShape(const TensorC *const *inputs, size_t inputs_size, if (inputs[2]->shape_size_ < 1 || inputs[2]->data_ == NULL) { return NNACL_ERR; } - size_t filter_shape_size = inputs[2]->shape_[0]; + if (inputs[2]->shape_[0] < 0) { + return NNACL_ERR; + } + size_t filter_shape_size = (size_t)(inputs[2]->shape_[0]); if (filter_shape_size != 4) { return NNACL_ERR; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_input_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_input_infer.c index f6f5ec00109..60609c6f0e4 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_input_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_grad_input_infer.c @@ -40,16 +40,16 @@ int Conv2dGradInputInferShape(const TensorC *const *inputs, size_t inputs_size, if (inputs[2]->shape_size_ < 1 || inputs[2]->data_ == NULL) { return NNACL_ERR; } - size_t shape_size = inputs[2]->shape_[0]; - if (shape_size != 4) { + size_t data_size = (size_t)inputs[2]->shape_[0]; + if (data_size != 4) { return NNACL_ERR; } int shape[MAX_SHAPE_SIZE]; const int nchw2nhwc[4] = {0, 2, 3, 1}; - for (int i = 0; i < shape_size; i++) { + for (size_t i = 0; i < data_size; i++) { shape[i] = *((int *)(inputs[2]->data_) + nchw2nhwc[i]); } - SetShapeArray(out, shape, shape_size); + SetShapeArray(out, shape, data_size); return NNACL_OK; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_infer.c index 79678b7176c..4193630893e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/conv2d_infer.c @@ -89,6 +89,8 @@ int Conv2dInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC * if (param->stride_h_ == 0 || param->stride_w_ == 0) { return NNACL_PARAM_INVALID; } + param->kernel_h_ = param->kernel_h_ != -1 ? param->kernel_h_ : weight_tensor->shape_[1]; + param->kernel_w_ = param->kernel_w_ != -1 ? param->kernel_w_ : weight_tensor->shape_[2]; ConvInferShape(input_h, input_w, &output_h, &output_w, param); int out_shape[MAX_SHAPE_SIZE]; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/deconv2d_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/deconv2d_infer.c index f030c7ce9b4..9c7d7a2fbf4 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/deconv2d_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/deconv2d_infer.c @@ -51,8 +51,8 @@ int Deconv2dInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC output_c = GetBatch(weight); /* depthwise */ } - int kernel_w = param->kernel_w_; - int kernel_h = param->kernel_h_; + int kernel_w = param->kernel_w_ != -1 ? param->kernel_w_ : GetWidth(weight); + int kernel_h = param->kernel_h_ != -1 ? param->kernel_h_ : GetHeight(weight); int stride_w = param->stride_w_; int stride_h = param->stride_h_; int dilate_w = param->dilation_w_; @@ -97,6 +97,8 @@ int Deconv2dInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC param->output_h_ = output_h; param->output_w_ = output_w; param->output_channel_ = output_c; + param->kernel_h_ = kernel_h; + param->kernel_w_ = kernel_w; return NNACL_OK; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/dedepthwise_conv2d_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/dedepthwise_conv2d_infer.c index dff2324da77..f480f23bbc6 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/dedepthwise_conv2d_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/dedepthwise_conv2d_infer.c @@ -38,6 +38,8 @@ int DeDepthwiseConv2DInferShape(const TensorC *const *inputs, size_t inputs_size if (param->stride_h_ == 0 || param->stride_w_ == 0) { return NNACL_PARAM_INVALID; } + param->kernel_h_ = param->kernel_h_ != -1 ? param->kernel_h_ : GetHeight(inputs[kWeightIndex]); + param->kernel_w_ = param->kernel_w_ != -1 ? param->kernel_w_ : GetWidth(inputs[kWeightIndex]); output_h = param->stride_h_ * (input_h - 1) + param->kernel_h_ - param->pad_u_ - param->pad_d_; output_w = param->stride_w_ * (input_w - 1) + param->kernel_w_ - param->pad_l_ - param->pad_r_; if ((output_h + param->pad_u_ + param->pad_d_ - param->kernel_h_) % param->stride_h_ != 0) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/depthwise_conv2d_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/depthwise_conv2d_infer.c index 6c79ffe945c..ba809ee8f38 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/depthwise_conv2d_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/depthwise_conv2d_infer.c @@ -43,6 +43,8 @@ int DepthwiseConv2dInferShape(const TensorC *const *inputs, size_t inputs_size, if (param->stride_h_ == 0 || param->stride_w_ == 0) { return NNACL_PARAM_INVALID; } + param->kernel_h_ = param->kernel_h_ != -1 ? param->kernel_h_ : GetHeight(inputs[kWeightIndex]); + param->kernel_w_ = param->kernel_w_ != -1 ? param->kernel_w_ : GetWidth(inputs[kWeightIndex]); if (param->pad_mode_ == Pad_same) { output_h = ceil((float)(input_h) / (float)(param->stride_h_)); output_w = ceil((float)(input_w) / (float)(param->stride_w_)); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/embedding_lookup_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/embedding_lookup_infer.c index bcaecf4c583..110612f0fba 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/embedding_lookup_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/embedding_lookup_infer.c @@ -49,6 +49,9 @@ int EmbeddingLookupInferShape(const TensorC *const *inputs, size_t inputs_size, size_t output_shape_size = 0; ShapeSet(output_shape, &output_shape_size, ids->shape_, ids->shape_size_); for (size_t i = 0; i < embedding_shape_size; ++i) { + if (output_shape_size >= MAX_SHAPE_SIZE) { + return NNACL_ERR; + } ShapePush(output_shape, &output_shape_size, embedding_shape[i]); } for (size_t i = 1; i < inputs_size - 1; ++i) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/expand_dims_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/expand_dims_infer.c index 39ed749343b..e80e648a65e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/expand_dims_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/expand_dims_infer.c @@ -34,9 +34,12 @@ int ExpandDimsInferShape(const TensorC *const *inputs, size_t inputs_size, Tenso if (inputs[1]->data_ == NULL) { return NNACL_INPUT_TENSOR_ERROR; } + if (GetElementNum(inputs[1]) < 1) { + return NNACL_ERR; + } int dim = ((int32_t *)(inputs[1]->data_))[0]; if (dim < 0) { - dim += input->shape_size_ + 1; + dim += (int)(input->shape_size_) + 1; } if (dim > (int)(input->shape_size_)) { return NNACL_INPUT_TENSOR_ERROR; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/fill_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/fill_infer.c index bd889bf1897..583b281e0c0 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/fill_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/fill_infer.c @@ -29,7 +29,7 @@ int FillInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **o SetDataTypeFormat(output, input); const TensorC *dst_shape_tensor = inputs[1]; const int32_t *dst_shape = (int32_t *)(dst_shape_tensor->data_); - size_t num_dims = 1; + int num_dims = 1; for (size_t i = 0; i < dst_shape_tensor->shape_size_; ++i) { num_dims *= dst_shape_tensor->shape_[i]; } @@ -44,7 +44,7 @@ int FillInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **o } int output_shape[MAX_SHAPE_SIZE] = {0}; size_t output_shape_size = 0; - for (size_t i = 0; i < num_dims; i++) { + for (int i = 0; i < num_dims; i++) { ShapePush(output_shape, &output_shape_size, dst_shape[i]); } SetShapeArray(output, output_shape, output_shape_size); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/full_connection_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/full_connection_infer.c index eccdd195b3a..7e3d7f66a16 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/full_connection_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/full_connection_infer.c @@ -40,7 +40,7 @@ int FullConnectionInferShape(const TensorC *const *inputs, size_t inputs_size, T } int new_k = 1; if (param->use_axis_) { - for (size_t i = param->axis_; i < input0->shape_size_; ++i) { + for (size_t i = (size_t)(param->axis_); i < input0->shape_size_; ++i) { new_k *= input0->shape_[i]; } if (new_k != input1->shape_[1]) { @@ -61,7 +61,7 @@ int FullConnectionInferShape(const TensorC *const *inputs, size_t inputs_size, T size_t out_shape_size = 0; ShapeSet(out_shape, &out_shape_size, inputs[0]->shape_, inputs[0]->shape_size_); if (param->use_axis_) { - out_shape_size = param->axis_ + 1; + out_shape_size = (size_t)(param->axis_) + 1; out_shape[param->axis_] = input1->shape_[0]; } else { int total = 1; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_infer.c index b8ca877d4c2..66f1b2f6061 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_infer.c @@ -43,6 +43,9 @@ int GatherInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC * if (inputs[2]->data_ == NULL) { return NNACL_NULL_PTR; } + if (GetElementNum(inputs[2]) < 1) { + return NNACL_ERR; + } int axis = *((int *)inputs[2]->data_); if (axis < 0) { axis += input->shape_size_; @@ -50,12 +53,11 @@ int GatherInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC * int indices_shape[MAX_SHAPE_SIZE]; size_t indices_shape_size = 0; ShapeSet(indices_shape, &indices_shape_size, indices->shape_, indices->shape_size_); - int indices_rank = indices_shape_size; + size_t indices_rank = indices_shape_size; int in_shape[MAX_SHAPE_SIZE] = {0}; size_t in_shape_size = 0; ShapeSet(in_shape, &in_shape_size, input->shape_, input->shape_size_); - int in_rank = in_shape_size; - if (in_rank < axis + 1) { + if ((size_t)(in_shape_size) < axis + 1) { return NNACL_ERR; } int out_shape[MAX_SHAPE_SIZE] = {0}; @@ -65,7 +67,7 @@ int GatherInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC * if (erase_ret != NNACL_OK) { return NNACL_ERR; } - for (int i = indices_rank - 1; i >= 0; --i) { + for (int i = (int)(indices_rank - 1); i >= 0; --i) { ret = ShapeInsert(out_shape, &out_shape_size, axis, indices_shape[i]); if (ret != NNACL_OK) { return NNACL_ERR; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_nd_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_nd_infer.c index 3511190718e..37c9fb88e27 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_nd_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/gather_nd_infer.c @@ -35,8 +35,8 @@ int GatherNdInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC if (input->shape_size_ > MAX_SHAPE_SIZE || indices->shape_size_ > MAX_SHAPE_SIZE) { return NNACL_INPUT_TENSOR_ERROR; } - int in_rank = input->shape_size_; - int indices_rank = indices->shape_size_; + int in_rank = (int)(input->shape_size_); + int indices_rank = (int)(indices->shape_size_); if (indices->shape_[indices_rank - 1] > in_rank) { return NNACL_OK; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/group_conv2d_grad_input_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/group_conv2d_grad_input_infer.c index 64ac57b30a1..de5bf3faaf6 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/group_conv2d_grad_input_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/group_conv2d_grad_input_infer.c @@ -31,15 +31,15 @@ int GroupConv2dGradInputInferShape(const TensorC *const *inputs, size_t inputs_s SetDataTypeFormat(out, in0); - size_t shape_size_ = in0->shape_size_; - if (shape_size_ > MAX_SHAPE_SIZE) { + size_t shape_size = in0->shape_size_; + if (shape_size > MAX_SHAPE_SIZE) { return NNACL_INPUT_TENSOR_ERROR; } int shape_[MAX_SHAPE_SIZE]; - for (int i = 0; i < shape_size_; i++) { + for (size_t i = 0; i < shape_size; i++) { shape_[i] = in0->shape_[i]; } - SetShapeArray(out, shape_, shape_size_); + SetShapeArray(out, shape_, shape_size); return NNACL_OK; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.c index 3943ca45903..84c5ffe545f 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.c @@ -43,9 +43,9 @@ #include "nnacl/infer/crop_and_resize_infer.h" #include "nnacl/infer/crop_infer.h" #include "nnacl/infer/cumsum_infer.h" -#include "nnacl/infer/custom_extract_features_infer.h" -#include "nnacl/infer/custom_normalize_infer.h" -#include "nnacl/infer/custom_predict_infer.h" +#include "nnacl/infer/string/custom_extract_features_infer.h" +#include "nnacl/infer/string/custom_normalize_infer.h" +#include "nnacl/infer/string/custom_predict_infer.h" #include "nnacl/infer/deconv2d_infer.h" #include "nnacl/infer/dedepthwise_conv2d_infer.h" #include "nnacl/infer/depth_to_space_infer.h" @@ -66,18 +66,18 @@ #include "nnacl/infer/gather_nd_infer.h" #include "nnacl/infer/group_conv2d_grad_input_infer.h" #include "nnacl/infer/gru_infer.h" -#include "nnacl/infer/hashtable_lookup_infer.h" +#include "nnacl/infer/string/hashtable_lookup_infer.h" #include "nnacl/infer/invert_permutation_infer.h" #include "nnacl/infer/layer_norm_grad_infer.h" #include "nnacl/infer/layer_norm_infer.h" #include "nnacl/infer/lin_space_infer.h" #include "nnacl/infer/log_softmax_infer.h" -#include "nnacl/infer/lsh_projection_infer.h" +#include "nnacl/infer/string/lsh_projection_infer.h" #include "nnacl/infer/lstm_infer.h" #include "nnacl/infer/matmul_infer.h" #include "nnacl/infer/max_min_grad_infer.h" #include "nnacl/infer/mean_infer.h" -#include "nnacl/infer/merge_infer.h" +#include "nnacl/infer/control/merge_infer.h" #include "nnacl/infer/mfcc_infer.h" #include "nnacl/infer/non_max_suppression_infer.h" #include "nnacl/infer/one_hot_infer.h" @@ -102,7 +102,7 @@ #include "nnacl/infer/sgd_infer.h" #include "nnacl/infer/shape_infer.h" #include "nnacl/infer/size_infer.h" -#include "nnacl/infer/skip_gram_infer.h" +#include "nnacl/infer/string/skip_gram_infer.h" #include "nnacl/infer/slice_infer.h" #include "nnacl/infer/softmax_cross_entropy_infer.h" #include "nnacl/infer/softmax_infer.h" @@ -117,12 +117,12 @@ #include "nnacl/infer/stack_infer.h" #include "nnacl/infer/strided_slice_grad_infer.h" #include "nnacl/infer/strided_slice_infer.h" -#include "nnacl/infer/switch_infer.h" -#include "nnacl/infer/tensorlist_fromtensor_infer.h" -#include "nnacl/infer/tensorlist_getitem_infer.h" -#include "nnacl/infer/tensorlist_reserve_infer.h" -#include "nnacl/infer/tensorlist_setitem_infer.h" -#include "nnacl/infer/tensorlist_stack_infer.h" +#include "nnacl/infer/control/switch_infer.h" +#include "nnacl/infer/control/tensorlist_fromtensor_infer.h" +#include "nnacl/infer/control/tensorlist_getitem_infer.h" +#include "nnacl/infer/control/tensorlist_reserve_infer.h" +#include "nnacl/infer/control/tensorlist_setitem_infer.h" +#include "nnacl/infer/control/tensorlist_stack_infer.h" #include "nnacl/infer/tile_infer.h" #include "nnacl/infer/topk_infer.h" #include "nnacl/infer/transpose_infer.h" diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.h index 351e4f70086..5929f8e3f4f 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.h @@ -227,8 +227,9 @@ enum PrimType { PrimType_Affine = 200, PrimType_Attention = 201, PrimType_LSTMGrad = 202, + PrimType_ScatterNdUpdate = 203, PrimType_MIN = PrimType_NONE, - PrimType_MAX = PrimType_LSTMGrad + 1 + PrimType_MAX = PrimType_ScatterNdUpdate + 1 }; void RegInfer(int prim_type, InferShape func); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_grad_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_grad_infer.c index f8791c23582..90d7bc6f5c0 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_grad_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_grad_infer.c @@ -35,13 +35,16 @@ int LayerNormGradInferShape(const TensorC *const *inputs, size_t inputs_size, Te SetShapeTensor(output_dx, input_x); int begin_params_axis = param->begin_params_axis_; if (param->begin_params_axis_ < 0) { - begin_params_axis += input_x->shape_size_; + begin_params_axis += (int)(input_x->shape_size_); } int size = 0; if (input_x->shape_size_ > MAX_SHAPE_SIZE) { return NNACL_INPUT_TENSOR_ERROR; } for (int i = begin_params_axis; i < input_x->shape_size_; i++) { + if (size >= MAX_SHAPE_SIZE) { + return NNACL_ERR; + } output_dg->shape_[size] = input_x->shape_[i]; output_db->shape_[size] = input_x->shape_[i]; size++; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_infer.c index ed8103ef080..dd8a42782e4 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/layer_norm_infer.c @@ -39,7 +39,7 @@ int LayerNormInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor return NNACL_INPUT_TENSOR_ERROR; } param->begin_norm_axis_ = - param->begin_norm_axis_ < 0 ? param->begin_norm_axis_ + input->shape_size_ : param->begin_norm_axis_; + param->begin_norm_axis_ < 0 ? param->begin_norm_axis_ + ((int)(input->shape_size_)) : param->begin_norm_axis_; SetShapeTensor(output, input); // take care of other outputs if (outputs_size == 3) { @@ -52,8 +52,8 @@ int LayerNormInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor output_mean->shape_[size] = input->shape_[size]; output_var->shape_[size] = input->shape_[size]; } - output_mean->shape_size_ = size; - output_var->shape_size_ = size; + output_mean->shape_size_ = (size_t)size; + output_var->shape_size_ = (size_t)size; } return NNACL_OK; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/lin_space_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/lin_space_infer.c index 4ec6388b56c..7ab34e59d08 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/lin_space_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/lin_space_infer.c @@ -32,6 +32,9 @@ int LinSpaceInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC if (!InferFlag(inputs, inputs_size)) { return NNACL_INFER_INVALID; } + if (GetElementNum(inputs[2]) < 1) { + return NNACL_ERR; + } int *num = (int *)(inputs[2]->data_); if (num == NULL) { return NNACL_INFER_INVALID; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/matmul_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/matmul_infer.c index 31f169c242d..a252684f19b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/matmul_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/matmul_infer.c @@ -18,7 +18,10 @@ #include "nnacl/infer/infer_register.h" int CheckMatmulInputShape(int *a_shape, size_t a_shape_size, int *b_shape, size_t b_shape_size, - MatMulParameter *param) { + const MatMulParameter *param) { + if (a_shape_size < 2 || b_shape_size < 2) { + return NNACL_PARAM_INVALID; + } for (size_t i = 0; i < (a_shape_size - 2) && i < (b_shape_size - 2); ++i) { if (a_shape[i] != b_shape[i]) { return NNACL_INPUT_TENSOR_ERROR; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/max_min_grad_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/max_min_grad_infer.c index fe84e5a1a8b..37b3f387731 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/max_min_grad_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/max_min_grad_infer.c @@ -41,11 +41,11 @@ int MaxMinGradInferShape(const TensorC *const *inputs, size_t inputs_size, Tenso ArithmeticParameter *param = (ArithmeticParameter *)parameter; param->ndim_ = dy->shape_size_; - param->in_elements_num0_ = param->ndim_; - param->in_elements_num1_ = param->ndim_; - param->out_elements_num_ = param->ndim_; - int fillDimNum0 = dy->shape_size_ - x1->shape_size_; - int fillDimNum1 = dy->shape_size_ - x2->shape_size_; + param->in_elements_num0_ = (int)(param->ndim_); + param->in_elements_num1_ = (int)(param->ndim_); + param->out_elements_num_ = (int)(param->ndim_); + int fillDimNum0 = (int)(dy->shape_size_ - x1->shape_size_); + int fillDimNum1 = (int)(dy->shape_size_ - x2->shape_size_); int j0 = 0; int j1 = 0; for (unsigned int i = 0; i < dy->shape_size_; i++) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/mean_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/mean_infer.c index b2300f4ecb3..2c669ac4a9b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/mean_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/mean_infer.c @@ -52,7 +52,10 @@ int MeanInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **o // reduce on selected axes for (size_t i = 0; i < input->shape_size_; i++) { bool reduce_axis = false; - for (size_t idx = 0; idx < num_axes; ++idx) { + if (num_axes > MAX_SHAPE_SIZE || num_axes < 0) { + return NNACL_ERR; + } + for (int idx = 0; idx < num_axes; ++idx) { if (((size_t)(axes[idx])) == i) { reduce_axis = true; break; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/pad_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/pad_infer.c index a436621f49c..f26ece39dc1 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/pad_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/pad_infer.c @@ -32,7 +32,7 @@ int PadInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **ou return NNACL_INFER_INVALID; } - if (input->shape_size_ > 4) { + if (input->shape_size_ > DEFAULT_PAD_NDIMS) { return NNACL_INPUT_TENSOR_ERROR; } const TensorC *paddings = inputs[1]; @@ -48,7 +48,7 @@ int PadInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **ou param->paddings_[i] = ((int *)paddings->data_)[i]; } - int output_shape[MAX_SHAPE_SIZE] = {0}; + int output_shape[DEFAULT_PAD_NDIMS] = {0}; size_t output_shape_size = 0; for (size_t i = 0; i < input->shape_size_; i++) { int shape = input->shape_[i] + param->paddings_[2 * i] + param->paddings_[2 * i + 1]; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/prior_box_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/prior_box_infer.c index a1aaee328b6..795ee4b019e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/prior_box_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/prior_box_infer.c @@ -38,8 +38,11 @@ int PriorBoxInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC PriorBoxParameter *param = (PriorBoxParameter *)parameter; float *aspect_ratios = param->aspect_ratios; - size_t aspect_ratios_size = param->aspect_ratios_size; - for (size_t i = 0; i < aspect_ratios_size; i++) { + if (aspect_ratios == NULL) { + return NNACL_NULL_PTR; + } + int32_t aspect_ratios_size = param->aspect_ratios_size; + for (int32_t i = 0; i < aspect_ratios_size; i++) { float ratio = aspect_ratios[i]; if (ratio == 0) { return NNACL_ERR; @@ -62,8 +65,8 @@ int PriorBoxInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC } } - size_t min_sizes_size = param->min_sizes_size; - size_t max_sizes_size = param->max_sizes_size; + int32_t min_sizes_size = param->min_sizes_size; + int32_t max_sizes_size = param->max_sizes_size; int32_t num_priors_box = min_sizes_size * different_aspect_ratios_size + max_sizes_size; const int kPriorBoxPoints = 4; const int kPriorBoxN = 1; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/range_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/range_infer.c index 0d8bb85785c..0c2114f02c0 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/range_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/range_infer.c @@ -40,7 +40,9 @@ int RangeInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC ** if (!InferFlag(inputs, inputs_size)) { return NNACL_INFER_INVALID; } - + if (GetElementNum(inputs[0]) < 1 || GetElementNum(inputs[1]) < 1 || GetElementNum(inputs[2]) < 1) { + return NNACL_ERR; + } int shape_size = 0; if (inputs_size == 3) { if ((inputs[0]->data_ == NULL) || (inputs[1]->data_ == NULL) || (inputs[2]->data_ == NULL)) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reduce_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reduce_infer.c index 5058ab1c3ec..936339bf22d 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reduce_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reduce_infer.c @@ -33,7 +33,7 @@ int ReduceOnSelectedAxes(const TensorC *input, size_t num_axes, const int *actua for (size_t i = 0; i < input->shape_size_; i++) { bool reduce_axis = false; for (size_t idx = 0; idx < num_axes; ++idx) { - if ((size_t)(actual_axes[idx]) == i || (size_t)(actual_axes[idx] + input->shape_size_) == i) { + if ((size_t)(actual_axes[idx]) == i || (size_t)(actual_axes[idx]) + input->shape_size_ == i) { reduce_axis = true; break; } @@ -79,7 +79,7 @@ int ReduceInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC * if (axes == NULL) { return NNACL_NULL_PTR; } - size_t num_axes; + int num_axes; if (axes_input->shape_size_ == 1) { num_axes = axes_input->shape_[0]; } else if (axes_input->shape_size_ == 0) { @@ -102,7 +102,10 @@ int ReduceInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC * int begin_axis; begin_axis = axes[0] < 0 ? axes[0] + rank : axes[0]; - for (size_t i = begin_axis + 1; i < rank; ++i) { + if (rank > MAX_SHAPE_SIZE || rank < 0) { + return NNACL_ERR; + } + for (int i = begin_axis + 1; i < rank; ++i) { ShapePush(actual_axes, &actual_axes_size, i); } num_axes = rank - begin_axis; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reshape_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reshape_infer.c index d04cc280158..6fc571263e0 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reshape_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/reshape_infer.c @@ -17,7 +17,7 @@ #include "nnacl/infer/reshape_infer.h" #include "nnacl/infer/infer_register.h" -void CalShape(const int *data, const TensorC *const *inputs, int *out_shape, size_t *out_shape_size, int shape_size) { +int CalShape(const int *data, const TensorC *const *inputs, int *out_shape, size_t *out_shape_size, int shape_size) { int input_count = GetElementNum(inputs[0]); int index = 0; int size = 1; @@ -32,24 +32,28 @@ void CalShape(const int *data, const TensorC *const *inputs, int *out_shape, siz ShapePush(out_shape, out_shape_size, data[i]); } if (size == 0) { - return; + return NNACL_ERR; } if ((int)(data[index]) == -1) { + if (index >= MAX_SHAPE_SIZE) { + return NNACL_ERR; + } out_shape[index] = input_count / size; } + return NNACL_OK; } int CalNewShape(const TensorC *in_tensor, int *out_shape, size_t out_shape_size) { size_t in_shape_size = 1; for (size_t i = 0; i < in_tensor->shape_size_; i++) { - in_shape_size *= in_tensor->shape_[i]; + in_shape_size *= (size_t)(in_tensor->shape_[i]); } int64_t infer_index = -1; size_t out_shape_size_new = 1; for (size_t i = 0; i < out_shape_size; i++) { if (out_shape[i] == -1) { if (infer_index == -1) { - infer_index = i; + infer_index = (int64_t)(i); } else { return NNACL_ERR; } @@ -64,7 +68,7 @@ int CalNewShape(const TensorC *in_tensor, int *out_shape, size_t out_shape_size) break; } } else { - out_shape_size_new *= out_shape[i]; + out_shape_size_new *= (size_t)(out_shape[i]); } } if (infer_index == -1 && out_shape_size_new != in_shape_size) { @@ -74,7 +78,10 @@ int CalNewShape(const TensorC *in_tensor, int *out_shape, size_t out_shape_size) if (out_shape_size_new == 0) { return NNACL_ERR; } - out_shape[infer_index] = in_shape_size / out_shape_size_new; + if (infer_index >= MAX_SHAPE_SIZE) { + return NNACL_ERR; + } + out_shape[infer_index] = (int)(in_shape_size / out_shape_size_new); } return NNACL_OK; } @@ -94,35 +101,55 @@ int CalShapeByType(const TensorC *const *inputs, size_t shape_size, int *out_sha for (size_t i = 0; i < shape_size; i++) { data_int[i] = data[i]; } - CalShape(data_int, inputs, out_shape, out_shape_size, shape_size); + int cal_ret = CalShape(data_int, inputs, out_shape, out_shape_size, shape_size); + if (cal_ret != NNACL_OK) { + free(data_int); + return NNACL_ERR; + } } break; case kNumberTypeInt32: { int32_t *data = (int32_t *)(shape_tensor->data_); for (size_t i = 0; i < shape_size; i++) { data_int[i] = data[i]; } - CalShape(data_int, inputs, out_shape, out_shape_size, shape_size); + int cal_ret = CalShape(data_int, inputs, out_shape, out_shape_size, shape_size); + if (cal_ret != NNACL_OK) { + free(data_int); + return NNACL_ERR; + } } break; case kNumberTypeInt64: { int64_t *data = (int64_t *)(shape_tensor->data_); for (size_t i = 0; i < shape_size; i++) { data_int[i] = data[i]; } - CalShape(data_int, inputs, out_shape, out_shape_size, shape_size); + int cal_ret = CalShape(data_int, inputs, out_shape, out_shape_size, shape_size); + if (cal_ret != NNACL_OK) { + free(data_int); + return NNACL_ERR; + } } break; case kNumberTypeFloat: { float *data = (float *)(shape_tensor->data_); for (size_t i = 0; i < shape_size; i++) { data_int[i] = data[i]; } - CalShape(data_int, inputs, out_shape, out_shape_size, shape_size); + int cal_ret = CalShape(data_int, inputs, out_shape, out_shape_size, shape_size); + if (cal_ret != NNACL_OK) { + free(data_int); + return NNACL_ERR; + } } break; case kNumberTypeUInt32: { uint32_t *data = (uint32_t *)(shape_tensor->data_); for (size_t i = 0; i < shape_size; i++) { data_int[i] = data[i]; } - CalShape(data_int, inputs, out_shape, out_shape_size, shape_size); + int cal_ret = CalShape(data_int, inputs, out_shape, out_shape_size, shape_size); + if (cal_ret != NNACL_OK) { + free(data_int); + return NNACL_ERR; + } } break; default: { free(data_int); @@ -162,7 +189,10 @@ int ReshapeInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC if (shape_tensor->data_ == NULL) { return NNACL_INFER_INVALID; } - size_t shape_size = GetElementNum(shape_tensor); + int shape_size = GetElementNum(shape_tensor); + if (shape_size > MAX_SHAPE_SIZE) { + return NNACL_ERR; + } int calRet = CalShapeByType(inputs, shape_size, out_shape, &out_shape_size); if (calRet != NNACL_OK) { return calRet; @@ -171,7 +201,7 @@ int ReshapeInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC if (param->shape_dim_ > MAX_SHAPE_SIZE) { return NNACL_PARAM_INVALID; } - for (size_t i = 0; i < param->shape_dim_; ++i) { + for (int i = 0; i < param->shape_dim_; ++i) { ShapePush(out_shape, &out_shape_size, param->shape_[i]); } } else { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/resize_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/resize_infer.c index 2e718166262..da8d02756fa 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/resize_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/resize_infer.c @@ -24,7 +24,7 @@ int HandleTwoInputs(const TensorC *const *inputs, ResizeParameter *param) { if (shape_tensor->data_ == NULL) { return NNACL_INFER_INVALID; } - size_t shape_size = GetElementNum(shape_tensor); + int shape_size = GetElementNum(shape_tensor); switch (shape_size) { case 4: { if (shape_tensor->data_type_ == kNumberTypeInt32) { @@ -32,6 +32,9 @@ int HandleTwoInputs(const TensorC *const *inputs, ResizeParameter *param) { if (data == NULL) { return NNACL_INFER_INVALID; } + if (GetElementNum(shape_tensor) < 4) { + return NNACL_ERR; + } switch (shape_tensor->format_) { case Format_NCHW: param->new_height_ = data[2]; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/scatter_nd_infer.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/scatter_nd_infer.h index 699405e831f..7b035b15a0e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/scatter_nd_infer.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/scatter_nd_infer.h @@ -17,7 +17,6 @@ #define MINDSPORE_NNACL_SCATTER_ND_INFER_H #include "nnacl/infer/common_infer.h" -#include "nnacl/softmax_parameter.h" #ifdef __cplusplus extern "C" { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/select_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/select_infer.c index d1f9a695d13..9708755ea64 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/select_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/select_infer.c @@ -34,6 +34,7 @@ int SelectInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC * TensorC *output = outputs[i]; SetDataTypeFormat(output, input); if (input->data_type_ == kObjectTypeTensorType) { +#ifdef ENABLE_CONTROL_TENSORLIST TensorListC *input_tensorlist = (TensorListC *)(input); TensorListC *output_tensorlist = (TensorListC *)(output); output_tensorlist->element_shape_size_ = input_tensorlist->element_shape_size_; @@ -47,6 +48,9 @@ int SelectInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC * for (size_t j = 0; j < output_tensorlist->element_num_; j++) { memcpy(&output_tensorlist->tensors_[j], &input_tensorlist->tensors_[j], sizeof(TensorC)); } +#else + return NNACL_ERR; +#endif } else { SetShapeTensor(output, input); } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/slice_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/slice_infer.c index 91a3121c048..b2d1e6678b3 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/slice_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/slice_infer.c @@ -32,6 +32,35 @@ static bool CheckInputsDataType(const TensorC *const *inputs, size_t inputs_size return true; } +int InitBeginAndSizeParam(const TensorC *const *inputs, SliceParameter *param) { + /* init begin parameter */ + int slice_begin_size = GetElementNum(inputs[1]); + int *begin_ptr = (int *)(inputs[1]->data_); + if (slice_begin_size != param->param_length_ || begin_ptr == NULL) { + return NNACL_INFER_INVALID; + } + if (slice_begin_size > MAX_AXIS_SIZE) { + return NNACL_ERR; + } + for (size_t i = 0; i < slice_begin_size; i++) { + param->begin_[i] = begin_ptr[i]; + } + + /* init size parameter */ + int slice_size_size = GetElementNum(inputs[2]); + int *size_ptr = (int *)(inputs[2]->data_); + if (slice_size_size != param->param_length_ || size_ptr == NULL) { + return NNACL_INFER_INVALID; + } + if (slice_size_size > MAX_AXIS_SIZE) { + return NNACL_ERR; + } + for (size_t i = 0; i < slice_size_size; i++) { + param->size_[i] = size_ptr[i]; + } + return NNACL_OK; +} + int SliceInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size, OpParameter *parameter) { int ret = CheckAugmentWithMinSize(inputs, inputs_size, outputs, outputs_size, parameter, 3, 1); @@ -54,38 +83,22 @@ int SliceInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC ** return NNACL_INPUT_TENSOR_ERROR; } SliceParameter *param = (SliceParameter *)parameter; - param->param_length_ = input->shape_size_; + param->param_length_ = (int)(input->shape_size_); output->shape_size_ = input->shape_size_; - /* init begin parameter */ - size_t slice_begin_size = GetElementNum(inputs[1]); - int *begin_ptr = (int *)(inputs[1]->data_); - if (slice_begin_size != param->param_length_ || begin_ptr == NULL) { - return NNACL_INFER_INVALID; - } - for (int i = 0; i < slice_begin_size; i++) { - param->begin_[i] = begin_ptr[i]; - } - - /* init size parameter */ - size_t slice_size_size = GetElementNum(inputs[2]); - int *size_ptr = (int *)(inputs[2]->data_); - if (slice_size_size != param->param_length_ || size_ptr == NULL) { - return NNACL_INFER_INVALID; - } - for (int i = 0; i < slice_size_size; i++) { - param->size_[i] = size_ptr[i]; + if (InitBeginAndSizeParam(inputs, param) != NNACL_OK) { + return NNACL_ERR; } /* infer output shape information */ int begin[MAX_SHAPE_SIZE]; int size[MAX_SHAPE_SIZE]; - for (size_t i = 0; i < param->param_length_; ++i) { + for (int32_t i = 0; i < param->param_length_; ++i) { begin[param->axis_[i]] = param->begin_[i]; size[param->axis_[i]] = param->size_[i]; } - for (size_t i = 0; i < param->param_length_; ++i) { + for (int32_t i = 0; i < param->param_length_; ++i) { if (size[i] < 0 && size[i] != -1) { return NNACL_PARAM_INVALID; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_infer.c index c9b29b4415e..9a13e40c73a 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_infer.c @@ -38,7 +38,7 @@ int SpaceToBatchInferShape(const TensorC *const *inputs, size_t inputs_size, Ten } int *block_shape = param->block_sizes_; - size_t block_shape_size = param->m_; + int block_shape_size = param->m_; int *paddings = param->paddings_; int padding_left = 0; int padding_right = 0; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_nd_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_nd_infer.c index 612d1408f1f..036168cd0cb 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_nd_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/space_to_batch_nd_infer.c @@ -29,7 +29,7 @@ int SpaceSetOutputShapeFromParam(const TensorC *const *inputs, size_t inputs_siz } SpaceToBatchParameter *param = (SpaceToBatchParameter *)parameter; int *block_shape = param->block_sizes_; - size_t block_shape_size = param->m_; + int block_shape_size = param->m_; int *padding = param->paddings_; int padding_left = 0; int padding_right = 0; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/split_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/split_infer.c index 533a32824f7..007b50d1d71 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/split_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/split_infer.c @@ -31,7 +31,7 @@ int SplitInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC ** SplitParameter *param = (SplitParameter *)parameter; - size_t num_split_ = param->num_split_ == 0 ? (int)(outputs_size) : param->num_split_; + int num_split_ = param->num_split_ == 0 ? (int)(outputs_size) : param->num_split_; if (num_split_ == 0) { return NNACL_ERR; } @@ -43,8 +43,8 @@ int SplitInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC ** if (input->shape_size_ > MAX_SHAPE_SIZE) { return NNACL_INPUT_TENSOR_ERROR; } - size_t split_dim = param->split_dim_ < 0 ? input->shape_size_ + param->split_dim_ : param->split_dim_; - if (split_dim > input->shape_size_) { + int split_dim = param->split_dim_ < 0 ? ((int)(input->shape_size_)) + param->split_dim_ : param->split_dim_; + if (split_dim > (int)(input->shape_size_)) { return NNACL_ERR; } if ((int)(outputs_size) != num_split_) { @@ -64,7 +64,10 @@ int SplitInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC ** ShapeSet(output_shape, &output_shape_size, input->shape_, input->shape_size_); int split_dim_i = input->shape_[split_dim]; if (i == num_split_ - 1 && param->split_sizes_[i] == -1) { - for (size_t j = 0; j < param->num_split_ - 1; ++j) { + if (param->num_split_ - 1 < 0) { + return NNACL_ERR; + } + for (int j = 0; j < param->num_split_ - 1; ++j) { split_dim_i -= param->split_sizes_[j]; } param->split_sizes_[i] = split_dim_i; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/squeeze_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/squeeze_infer.c index cf2137f8095..2d35201add1 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/squeeze_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/squeeze_infer.c @@ -40,7 +40,7 @@ int SqueezeInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC size_t out_shape_size = 0; for (size_t i = 0; i < param->axis_size_; i++) { - param->axis_[i] = param->axis_[i] >= 0 ? param->axis_[i] : param->axis_[i] + input->shape_size_; + param->axis_[i] = param->axis_[i] >= 0 ? param->axis_[i] : param->axis_[i] + (int)input->shape_size_; } if (param->axis_size_ == 0) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/stack_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/stack_infer.c index d533441390d..340284aeae1 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/stack_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/stack_infer.c @@ -41,8 +41,8 @@ int StackInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC ** int32_t output_shape[MAX_SHAPE_SIZE] = {0}; size_t output_shape_size = 0; ShapeSet(output_shape, &output_shape_size, input->shape_, input->shape_size_); - int axis = param->axis_ < 0 ? param->axis_ + input->shape_size_ + 1 : param->axis_; - if (axis < 0 || axis > input->shape_size_) { + int axis = param->axis_ < 0 ? (int)(param->axis_) + (int)(input->shape_size_) + 1 : param->axis_; + if (axis < 0 || axis > (int)(input->shape_size_)) { return NNACL_PARAM_INVALID; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_grad_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_grad_infer.c index b4be741c3d4..ea124e94763 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_grad_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_grad_infer.c @@ -32,8 +32,9 @@ bool StridedSliceCheckInputs(const TensorC *const *inputs, size_t inputs_size) { return true; // note: the original code is ndim_ <= in_shape_size } -void ApplyBeginEndEllipsisMask(size_t ndim, int *begins, uint32_t *begins_mask, int *ends, uint32_t *ends_mask, - uint32_t *ellipsis_mask, int *in_shape) { +void ApplyBeginEndEllipsisMask(size_t ndim, int *begins, const uint32_t *const begins_mask, int *ends, + const uint32_t *const ends_mask, const uint32_t *const ellipsis_mask, + const int *const in_shape) { for (size_t i = 0; i < ndim; i++) { if (begins_mask[i]) { begins[i] = 0; @@ -84,8 +85,8 @@ int StridedSliceGradInferShape(const TensorC *const *inputs, size_t inputs_size, int *end_data = (int *)(inputs[3]->data_); int *stride_data = (int *)(inputs[4]->data_); - size_t ndim_ = GetElementNum(begin_tensor); - for (int i = 0; i < ndim_; ++i) { + size_t ndim_ = (size_t)GetElementNum(begin_tensor); + for (size_t i = 0; i < ndim_; ++i) { ShapePush(begins_, &begins_size, begin_data[i]); ShapePush(ends_, &ends_size, end_data[i]); ShapePush(strides_, &strides_size, stride_data[i]); @@ -104,9 +105,9 @@ int StridedSliceGradInferShape(const TensorC *const *inputs, size_t inputs_size, ellipsis_mask_[i] = (unsigned)(param->ellipsisMask_) & (1 << i); new_axis_mask_[i] = (unsigned)(param->newAxisMask_) & (1 << i); } - param->num_axes_ = in_shape_size; - param->in_shape_length_ = in_shape_size; - for (int i = 0; i < ndim_; ++i) { + param->num_axes_ = (int)(in_shape_size); + param->in_shape_length_ = (int)(in_shape_size); + for (size_t i = 0; i < ndim_; ++i) { param->begins_[i] = begins_[i]; param->ends_[i] = ends_[i]; param->strides_[i] = strides_[i]; @@ -138,13 +139,16 @@ int StridedSliceGradInferShape(const TensorC *const *inputs, size_t inputs_size, return NNACL_OK; } - size_t output_size = inputs[1]->shape_[0]; + int output_size = inputs[1]->shape_[0]; int output_shape[MAX_SHAPE_SIZE] = {0}; size_t output_shape_size = 0; if (inputs[1]->data_ == NULL) { return NNACL_ERR; } + if (output_size > MAX_SHAPE_SIZE) { + return NNACL_ERR; + } for (int i = 0; i < output_size; i++) { ShapePush(output_shape, &output_shape_size, ((int *)(inputs[1]->data_))[i]); } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_infer.c index 442d95624d3..c8c6bf067b2 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/strided_slice_infer.c @@ -70,7 +70,7 @@ int HandleAxesInputNotExist(const TensorC *const *inputs, struct StridedSliceTra return NNACL_ERR; } transfer_buffer->ndim_ = GetElementNum(begin_tensor); - for (int i = 0; i < transfer_buffer->ndim_; ++i) { + for (int i = 0; i < (size_t)(transfer_buffer->ndim_); ++i) { ShapePush(transfer_buffer->begins_, &transfer_buffer->begins_size_, begin_data[i]); ShapePush(transfer_buffer->ends_, &transfer_buffer->ends_size_, end_data[i]); ShapePush(transfer_buffer->strides_, &transfer_buffer->strides_size_, stride_data[i]); @@ -94,7 +94,7 @@ int GenerateAxes(const TensorC *axes_tensor, int *axes, int num, int ndim) { axes[i] = i; } } else { - for (size_t i = 0; i < num; i++) { + for (int i = 0; i < num; i++) { axes[i] = axes_data[i]; } for (int i = 0; i < num; ++i) { @@ -132,23 +132,29 @@ int HandleAxesInputExist(const TensorC *const *inputs, int *ndim, int *in_shape, } const TensorC *axes_tensor = inputs[3]; - int axes[MAX_SHAPE_SIZE]; + int axes[MAX_SHAPE_SIZE] = {0}; int ret = GenerateAxes(axes_tensor, axes, begin_ndim, *ndim); if (ret != NNACL_OK) { return ret; } - for (size_t i = 0; i < *ndim; i++) { + if (*ndim > MAX_SHAPE_SIZE || *ndim < 0) { + return NNACL_ERR; + } + for (int i = 0; i < *ndim; i++) { in_shape[i] = 0; begins[i] = 0; strides[i] = 0; } - for (size_t i = 0; i < *ndim; ++i) { + for (int i = 0; i < *ndim; ++i) { in_shape[i] = input_tensor->shape_[i]; } - for (size_t i = 0; i < *ndim; ++i) { + for (int i = 0; i < *ndim; ++i) { int axes_it = 0; - for (size_t j = 0; j < begin_ndim; j++) { + if (begin_ndim > MAX_SHAPE_SIZE || begin_ndim < 0) { + return NNACL_ERR; + } + for (int j = 0; j < begin_ndim; j++) { if (axes[j] == i) { axes_it = j; break; @@ -158,8 +164,12 @@ int HandleAxesInputExist(const TensorC *const *inputs, int *ndim, int *in_shape, } if (axes_it != begin_ndim) { int axis = axes_it; - // begins or ends exceed limit will be set to limit - begins[i] = imax(imin(begin_data[axis], input_tensor->shape_[i] - 1), -input_tensor->shape_[i]); + if (begin_data[axis] > input_tensor->shape_[i] - 1) { + begins[i] = begin_data[axis]; + } else { + begins[i] = imax(imin(begin_data[axis], input_tensor->shape_[i] - 1), -input_tensor->shape_[i]); + } + // ends exceed limit will be set to limit ends[i] = imax(imin(end_data[axis], input_tensor->shape_[i]), -input_tensor->shape_[i] - 1); if (stride_data == NULL) { return NNACL_ERR; @@ -190,7 +200,7 @@ int StrideSlicePreCheck(const TensorC *const *inputs, size_t inputs_size, Tensor } void Bit2Vector(StridedSliceTransferBuffer *transfer_buffer, const StridedSliceParameter *param) { - for (unsigned i = 0; i < (unsigned)transfer_buffer->ndim_; i++) { + for (unsigned i = 0; i < (unsigned)(size_t)(transfer_buffer->ndim_); i++) { transfer_buffer->begins_mask_[i] = (unsigned)(param->begins_mask_) & (1 << i); transfer_buffer->ends_mask_[i] = (unsigned)(param->ends_mask_) & (1 << i); transfer_buffer->ellipsis_mask_[i] = (unsigned)(param->ellipsisMask_) & (1 << i); @@ -215,7 +225,7 @@ int ApplyNewAxisMask(StridedSliceTransferBuffer *transfer_buffer, StridedSlicePa transfer_buffer->strides_[i] = 1; ShapePush(transfer_buffer->begins_, &transfer_buffer->begins_size_, 0); - ShapePush(transfer_buffer->ends_, &transfer_buffer->ends_size_, in_shape[transfer_buffer->ndim_ - 1]); + ShapePush(transfer_buffer->ends_, &transfer_buffer->ends_size_, in_shape[(size_t)(transfer_buffer->ndim_) - 1]); ShapePush(transfer_buffer->strides_, &transfer_buffer->strides_size_, 1); transfer_buffer->begins_mask_[i] = false; @@ -228,7 +238,7 @@ int ApplyNewAxisMask(StridedSliceTransferBuffer *transfer_buffer, StridedSlicePa } void ApplyBeginMask(StridedSliceTransferBuffer *transfer_buffer) { - for (int i = 0; i < transfer_buffer->ndim_; i++) { + for (int i = 0; i < (size_t)(transfer_buffer->ndim_); i++) { if (transfer_buffer->begins_mask_[i]) { transfer_buffer->begins_[i] = 0; } @@ -296,7 +306,7 @@ void ApplyShrinkMask(StridedSliceTransferBuffer *transfer_buffer, int *output_sh int TransferBuffer2Param(const StridedSliceTransferBuffer *transfer_buffer, StridedSliceParameter *param, const int *in_shape, size_t in_shape_size) { - if (transfer_buffer->ndim_ >= in_shape_size || param->in_shape_length_ >= in_shape_size) { + if (transfer_buffer->ndim_ >= (int)(in_shape_size) || param->in_shape_length_ >= (int)(in_shape_size)) { return NNACL_ERR; } for (int i = 0; i < transfer_buffer->ndim_; i++) { @@ -325,12 +335,12 @@ void InitStridedSliceTransferBuffer(StridedSliceTransferBuffer *transfer_buffer) } void SetMaskSize(StridedSliceTransferBuffer *transfer_buffer) { - transfer_buffer->ellipsis_mask_size_ = transfer_buffer->ndim_; - transfer_buffer->new_axis_mask_size_ = transfer_buffer->ndim_; - transfer_buffer->shrink_axis_mask_size_ = transfer_buffer->ndim_; - transfer_buffer->begins_size_ = transfer_buffer->ndim_; - transfer_buffer->ends_size_ = transfer_buffer->ndim_; - transfer_buffer->strides_size_ = transfer_buffer->ndim_; + transfer_buffer->ellipsis_mask_size_ = (size_t)(transfer_buffer->ndim_); + transfer_buffer->new_axis_mask_size_ = (size_t)(transfer_buffer->ndim_); + transfer_buffer->shrink_axis_mask_size_ = (size_t)(transfer_buffer->ndim_); + transfer_buffer->begins_size_ = (size_t)(transfer_buffer->ndim_); + transfer_buffer->ends_size_ = (size_t)(transfer_buffer->ndim_); + transfer_buffer->strides_size_ = (size_t)(transfer_buffer->ndim_); } // note: begin, end, stride length are equal, but may less than rank of input @@ -359,8 +369,8 @@ int StridedSliceInferShape(const TensorC *const *inputs, size_t inputs_size, Ten InitStridedSliceTransferBuffer(&transfer_buffer); StridedSliceParameter *param = (StridedSliceParameter *)parameter; - param->num_axes_ = in_shape_size; - param->in_shape_length_ = in_shape_size; + param->num_axes_ = (int)(in_shape_size); + param->in_shape_length_ = (int)(in_shape_size); transfer_buffer.ndim_ = 0; if (inputs_size == kStridedSliceInputNum) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tile_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tile_infer.c index ecf1db30156..77609e8b1a2 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tile_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/tile_infer.c @@ -25,6 +25,9 @@ void TileParamCaffe2Tflite(TileParameter *param, size_t out_shape_size) { multiples_size_tmp[i] = 1; } for (size_t i = 0; i < param->dims_size_; i++) { + if (i >= MAX_TILE_DIM_SIZE) { + return; + } multiples_size_tmp[param->dims_[i]] = param->multiples_[i]; } for (size_t i = 0; i < 5; i++) { @@ -35,13 +38,10 @@ void TileParamCaffe2Tflite(TileParameter *param, size_t out_shape_size) { int TileInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **outputs, size_t outputs_size, OpParameter *parameter) { - int check_ret = CheckAugmentNull(inputs, inputs_size, outputs, outputs_size, parameter); + int check_ret = CheckAugmentNullSize(inputs, inputs_size, outputs, outputs_size, parameter, 2, 1); if (check_ret != NNACL_OK) { return check_ret; } - if (inputs_size != 2 || outputs_size < 1) { - return NNACL_INPUT_TENSOR_ERROR; - } const TensorC *input = inputs[0]; TensorC *output = outputs[0]; @@ -51,7 +51,7 @@ int TileInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **o return NNACL_INFER_INVALID; } - int out_shape[MAX_SHAPE_SIZE]; + int out_shape[MAX_SHAPE_SIZE] = {0}; size_t out_shape_size = 0; TileParameter *param = (TileParameter *)parameter; @@ -60,7 +60,10 @@ int TileInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **o if (data_num > (int)(input->shape_size_) || input->shape_size_ > MAX_SHAPE_SIZE) { return NNACL_INPUT_TENSOR_ERROR; } - multiples_size = data_num; + if (data_num > MAX_TILE_DIM_SIZE) { + return NNACL_ERR; + } + multiples_size = (size_t)(data_num); if (inputs[1]->data_type_ != kNumberTypeInt && inputs[1]->data_type_ != kNumberTypeInt32) { return NNACL_INPUT_TENSOR_ERROR; } @@ -68,7 +71,7 @@ int TileInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **o if (input1_data == NULL) { return NNACL_INFER_INVALID; } - for (size_t i = 0; i < data_num; i++) { + for (int i = 0; i < data_num; i++) { param->multiples_[i] = input1_data[i]; } @@ -91,6 +94,9 @@ int TileInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC **o ShapePush(out_shape, &out_shape_size, input->shape_[i]); } for (size_t i = 0; i < dims_size; ++i) { + if (dims[i] >= MAX_SHAPE_SIZE || input->shape_[dims[i]] == 0) { + return NNACL_ERR; + } if (input->shape_[dims[i]] != 0 && param->multiples_[i] > INT_MAX / input->shape_[dims[i]]) { return NNACL_ERR; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/transpose_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/transpose_infer.c index 36b083ca301..0f00b7280f1 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/transpose_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/transpose_infer.c @@ -17,8 +17,8 @@ #include "nnacl/infer/transpose_infer.h" #include "nnacl/infer/infer_register.h" -bool CheckPermTransFormat(const int *perm, const int *perm_transformat, const size_t size) { - for (size_t i = 0; i < size; ++i) { +bool CheckPermTransFormat(const int *perm, const int *perm_transformat, const int size) { + for (int i = 0; i < size; ++i) { if (perm[i] != perm_transformat[i]) { return false; } @@ -64,7 +64,7 @@ int TransposeInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor SetDataTypeFormat(output, input); const TensorC *perm_tensor = inputs[1]; const int32_t *perm_data = (int32_t *)perm_tensor->data_; - const size_t perms_num = (size_t)perm_tensor->shape_[0]; + const int perms_num = perm_tensor->shape_[0]; if (perm_tensor->shape_size_ == 0) { return NNACL_INFER_INVALID; } @@ -73,7 +73,7 @@ int TransposeInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor } int perm[MAX_TRANSPOSE_DIM_SIZE] = {0}; size_t perm_size = 0; - for (size_t i = 0; i < perms_num; i++) { + for (int i = 0; i < perms_num; i++) { if (perm_data[i] >= perms_num) { return NNACL_ERR; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/uniform_real_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/uniform_real_infer.c index c5e9b8db1f3..57f1a0b06c1 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/uniform_real_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/uniform_real_infer.c @@ -37,7 +37,7 @@ int UniformRealInferShape(const TensorC *const *inputs, size_t inputs_size, Tens return NNACL_INPUT_TENSOR_ERROR; } int output_shape[MAX_SHAPE_SIZE]; - size_t output_shape_size = input_num; + size_t output_shape_size = (size_t)(input_num); for (int i = 0; i < input_num; i++) { output_shape[i] = input_data[i]; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsorted_segment_sum_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsorted_segment_sum_infer.c index 77a696baf1c..cee24a5e5f4 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsorted_segment_sum_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsorted_segment_sum_infer.c @@ -35,7 +35,7 @@ int UnsortedSegmentSumInferShape(const TensorC *const *inputs, size_t inputs_siz int output_shape[MAX_SHAPE_SIZE] = {0}; size_t output_shape_size = 0; ShapePush(output_shape, &output_shape_size, num_segments); - for (int index = segment_id->shape_size_; index < (int)(x->shape_size_); index++) { + for (int index = (int)(segment_id->shape_size_); index < (int)(x->shape_size_); index++) { if (output_shape_size >= MAX_SHAPE_SIZE) { return NNACL_ERR; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsqueeze_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsqueeze_infer.c index 0119718058e..930aff8c54e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsqueeze_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unsqueeze_infer.c @@ -33,7 +33,7 @@ int UnsqueezeInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor } UnSqueezeParameter *param = (UnSqueezeParameter *)parameter; - int in_rank = input->shape_size_; + int in_rank = (int)(input->shape_size_); int dim_rank = param->num_dim_; int out_shape[MAX_SHAPE_SIZE] = {0}; size_t out_shape_size = 0; @@ -50,14 +50,17 @@ int UnsqueezeInferShape(const TensorC *const *inputs, size_t inputs_size, Tensor int sz = in_rank + dim_rank; size_t in_itr = 0; size_t ax_itr = 0; - for (size_t i = 0; i < sz; i++) { + if (sz < 0) { + return NNACL_ERR; + } + for (int i = 0; i < sz; i++) { if (out_shape_size >= MAX_SHAPE_SIZE) { return NNACL_ERR; } - if (ax_itr < dim_rank && param->dims_[ax_itr] == (int)(i)) { + if (ax_itr < (size_t)(dim_rank) && param->dims_[ax_itr] == (int)(i)) { ShapePush(out_shape, &out_shape_size, 1); ax_itr++; - } else if (ax_itr < dim_rank && param->dims_[ax_itr] + sz == i) { + } else if (ax_itr < (size_t)(dim_rank) && param->dims_[ax_itr] + sz == i) { ShapePush(out_shape, &out_shape_size, 1); ax_itr++; } else { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unstack_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unstack_infer.c index b4fd6165ed5..33eb635e278 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unstack_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/unstack_infer.c @@ -26,8 +26,8 @@ int UnstackInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC const TensorC *input = inputs[0]; UnstackParameter *param = (UnstackParameter *)parameter; - int axis = param->axis_ < 0 ? param->axis_ + input->shape_size_ : param->axis_; - if (axis < 0 || axis >= input->shape_size_) { + int axis = param->axis_ < 0 ? param->axis_ + (int)(input->shape_size_) : param->axis_; + if (axis < 0 || axis >= (int)(input->shape_size_)) { return NNACL_PARAM_INVALID; } for (size_t i = 0; i < outputs_size; i++) { @@ -40,7 +40,7 @@ int UnstackInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC int output_shape[MAX_SHAPE_SIZE] = {0}; size_t output_shape_size = 0; for (size_t i = 0; i < input->shape_size_; ++i) { - if (i != axis) { + if (i != (size_t)(axis)) { if (output_shape_size >= MAX_SHAPE_SIZE) { return NNACL_ERR; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/where_infer.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/where_infer.c index b20e1e07e01..4c05f58bced 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/where_infer.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/where_infer.c @@ -38,14 +38,14 @@ int WhereInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC ** return NNACL_INPUT_TENSOR_ERROR; } - SetDataTypeFormat(output, input); + const TensorC *input0 = inputs[0]; + const TensorC *input1 = inputs[1]; + const TensorC *input2 = inputs[2]; + SetDataTypeFormat(output, input1); if (!InferFlag(inputs, inputs_size)) { return NNACL_INFER_INVALID; } - const TensorC *input0 = inputs[0]; - const TensorC *input1 = inputs[1]; - const TensorC *input2 = inputs[2]; int num = GetElementNum(input0); int num1 = GetElementNum(input1); int num2 = GetElementNum(input2); @@ -53,6 +53,9 @@ int WhereInferShape(const TensorC *const *inputs, size_t inputs_size, TensorC ** int axisout = 0; size_t temp = 0; for (size_t j = 0; j < input0->shape_size_; j++) { + if (j >= MAX_SHAPE_SIZE) { + return NNACL_ERR; + } if (input0->shape_[j] == input1->shape_[j] && input0->shape_[j] != input2->shape_[j]) { axisout = j; break; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arg_min_max_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arg_min_max_int8.c index 3b03088b3a5..6314b6b0d6f 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arg_min_max_int8.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arg_min_max_int8.c @@ -39,8 +39,8 @@ void DoArgMinMaxQuant(const int8_t *input, int8_t *output, const ArgMinMaxParame float bias = -in_quant_arg->zp_ * in_quant_arg->scale_; int32_t output_zp = out_quant_arg->zp_; for (int i = 0; i < pre_axis_count; ++i) { - size_t output_offset = i * after_axis_count; - size_t input_offset = output_offset * axis_count; + int output_offset = i * after_axis_count; + int input_offset = output_offset * axis_count; for (int j = 0; j < after_axis_count; ++j) { float value = -FLT_MAX; if (!param->get_max_) { @@ -97,8 +97,8 @@ void Int8ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, int32_t output_zp = out_quant_arg->zp_; for (int32_t i = 0; i < param->in_strides_[0]; ++i) { for (int j = 0; j < in_shape[0]; ++j) { - size_t offset = param->in_strides_[0] * j + i; - param->arg_elements_[j].index_ = j; + int offset = param->in_strides_[0] * j + i; + param->arg_elements_[j].index_ = (uint32_t)j; param->arg_elements_[j].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias; } if (param->get_max_) { @@ -108,7 +108,7 @@ void Int8ArgMinMaxDim0(const int8_t *input, int8_t *output, const int *in_shape, } for (int j = 0; j < param->topk_; ++j) { - size_t out_offset = j * param->out_strides_[0] + i; + int out_offset = j * param->out_strides_[0] + i; float real_out = out_value ? param->arg_elements_[j].data_.f_data_ : param->arg_elements_[j].index_; output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp); } @@ -123,12 +123,12 @@ void Int8ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, int32_t output_zp = out_quant_arg->zp_; int in_shape1 = in_shape[1]; for (int i = 0; i < in_shape[0]; ++i) { - size_t in_dim0_offset = i * param->in_strides_[0]; - size_t out_dim0_offset = i * param->out_strides_[0]; + int in_dim0_offset = i * param->in_strides_[0]; + int out_dim0_offset = i * param->out_strides_[0]; for (int j = 0; j < param->in_strides_[1]; ++j) { for (int k = 0; k < in_shape1; ++k) { - size_t offset = param->in_strides_[1] * k + in_dim0_offset + j; - param->arg_elements_[k].index_ = k; + int offset = param->in_strides_[1] * k + in_dim0_offset + j; + param->arg_elements_[k].index_ = (size_t)k; param->arg_elements_[k].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias; } if (param->get_max_) { @@ -138,7 +138,7 @@ void Int8ArgMinMaxDim1(const int8_t *input, int8_t *output, const int *in_shape, } for (int k = 0; k < param->topk_; ++k) { - size_t out_offset = out_dim0_offset + j + k * param->out_strides_[1]; + int out_offset = out_dim0_offset + j + k * param->out_strides_[1]; float real_out = out_value ? param->arg_elements_[k].data_.f_data_ : param->arg_elements_[k].index_; output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp); } @@ -155,15 +155,15 @@ void Int8ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, int in_shape1 = in_shape[1]; int in_shape2 = in_shape[2]; for (int i = 0; i < in_shape[0]; ++i) { - size_t in_dim0_offset = i * param->in_strides_[0]; - size_t out_dim0_offset = i * param->out_strides_[0]; + int in_dim0_offset = i * param->in_strides_[0]; + int out_dim0_offset = i * param->out_strides_[0]; for (int j = 0; j < in_shape1; ++j) { - size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; - size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; + int in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; + int out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; for (int k = 0; k < param->in_strides_[2]; ++k) { for (int l = 0; l < in_shape2; ++l) { - size_t offset = param->in_strides_[2] * l + k + in_dim1_offset; - param->arg_elements_[l].index_ = l; + int offset = param->in_strides_[2] * l + k + in_dim1_offset; + param->arg_elements_[l].index_ = (uint32_t)l; param->arg_elements_[l].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias; } if (param->get_max_) { @@ -172,7 +172,7 @@ void Int8ArgMinMaxDim2(const int8_t *input, int8_t *output, const int *in_shape, qsort(param->arg_elements_, in_shape2, sizeof(ArgElement), ArgCompareAscInt8); } for (int l = 0; l < param->topk_; ++l) { - size_t out_offset = out_dim1_offset + k + l * param->out_strides_[2]; + int out_offset = out_dim1_offset + k + l * param->out_strides_[2]; float real_out = out_value ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_; output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp); } @@ -191,17 +191,17 @@ void Int8ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, int in_shape2 = in_shape[2]; int in_shape3 = in_shape[3]; for (int i = 0; i < in_shape[0]; ++i) { - size_t in_dim0_offset = i * param->in_strides_[0]; - size_t out_dim0_offset = i * param->out_strides_[0]; + int in_dim0_offset = i * param->in_strides_[0]; + int out_dim0_offset = i * param->out_strides_[0]; for (int j = 0; j < in_shape1; ++j) { - size_t in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; - size_t out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; + int in_dim1_offset = j * param->in_strides_[1] + in_dim0_offset; + int out_dim1_offset = j * param->out_strides_[1] + out_dim0_offset; for (int k = 0; k < in_shape2; ++k) { - size_t in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset; - size_t out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset; + int in_dim2_offset = k * param->in_strides_[2] + in_dim1_offset; + int out_dim2_offset = k * param->out_strides_[2] + out_dim1_offset; for (int l = 0; l < in_shape3; ++l) { - size_t offset = l + in_dim2_offset; - param->arg_elements_[l].index_ = l; + int offset = l + in_dim2_offset; + param->arg_elements_[l].index_ = (uint32_t)l; param->arg_elements_[l].data_.f_data_ = input[offset] * in_quant_arg->scale_ + bias; } if (param->get_max_) { @@ -210,7 +210,7 @@ void Int8ArgMinMaxDim3(const int8_t *input, int8_t *output, const int *in_shape, qsort(param->arg_elements_, in_shape3, sizeof(ArgElement), ArgCompareAscInt8); } for (int l = 0; l < param->topk_; ++l) { - size_t out_offset = out_dim2_offset + l; + int out_offset = out_dim2_offset + l; float real_out = out_value ? param->arg_elements_[l].data_.f_data_ : param->arg_elements_[l].index_; output[out_offset] = GetInt8Output(real_out, output_inverse_scale, output_zp); } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arithmetic_self_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arithmetic_self_int8.c index 181dc9815f3..982d7c4da8a 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arithmetic_self_int8.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/arithmetic_self_int8.c @@ -218,7 +218,7 @@ int16x4_t ClacSumHalfWord(int32x4_t scaled_input, int32x4_t left_shift_out_vec, void SquareInt8NEON(const int8_t *input_data, int8_t *output_data, int64_t element_size, ArithSelfQuantArg para, int *index) { int32x4_t output_multiplier_vec = vdupq_n_s32(para.output_multiplier_); - int32x4_t left_shift_out_vec = vdupq_n_s32(1 << para.shift_left_); + int32x4_t left_shift_out_vec = vdupq_n_s32(1 << (size_t)para.shift_left_); for (; (*index) <= element_size - 8; (*index) += 8) { int16x8_t input_val = LoadAndAddOffset(input_data, *index, para.in_args_.zp_); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.c index e3d6840d2b3..018346effb5 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.c @@ -18,7 +18,8 @@ void Conv1x1Int8Opt(const int8_t *packed_input, const int8_t *packed_weight, int8_t *dst, const int32_t *input_sum, const int32_t *bias, int row, int col, int deep4, int32_t *left_shift, int32_t *right_shift, - int32_t *multiplier, ConvParameter *conv_param, MATMUL_OPT_DP_FUNC matmul_func, int *filter_zp) { + int32_t *multiplier, ConvParameter *conv_param, MATMUL_OPT_DP_FUNC matmul_func, + const int *filter_zp) { int is_per_oc = (int)conv_param->conv_quant_arg_.filter_arg_num_ != 1; matmul_func(packed_input, packed_weight, dst, row, col, deep4, conv_param->output_channel_, input_sum, bias, left_shift, right_shift, multiplier, conv_param->conv_quant_arg_.output_quant_args_[0].zp_, @@ -29,7 +30,7 @@ void Conv1x1Int8Opt(const int8_t *packed_input, const int8_t *packed_weight, int void Conv1x1Int8(const int8_t *packed_input, const int8_t *packed_weight, int8_t *dst, const int32_t *input_sum, const int32_t *bias, int row, int col, int deep16, int32_t *left_shift, int32_t *right_shift, - int32_t *multiplier, ConvParameter *conv_param, int32_t *filter_zp) { + int32_t *multiplier, ConvParameter *conv_param, const int32_t *filter_zp) { int is_per_oc = (int)conv_param->conv_quant_arg_.filter_arg_num_ != 1; MatmulInt8Opt(packed_input, packed_weight, dst, row, col, deep16, input_sum, bias, conv_param->conv_quant_arg_.out_act_min_[0], conv_param->conv_quant_arg_.out_act_max_[0], diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.h index f8339b54198..6cc8d0d22e4 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv1x1_int8.h @@ -33,10 +33,11 @@ extern "C" { void Conv1x1Int8(const int8_t *packed_input, const int8_t *packed_weight, int8_t *dst, const int32_t *input_sum, const int32_t *bias, int row, int col, int deep16, int32_t *left_shift, int32_t *right_shift, - int32_t *multiplier, ConvParameter *conv_param, int32_t *filter_zp); + int32_t *multiplier, ConvParameter *conv_param, const int32_t *filter_zp); void Conv1x1Int8Opt(const int8_t *packed_input, const int8_t *packed_weight, int8_t *dst, const int32_t *input_sum, const int32_t *bias, int row, int col, int deep4, int32_t *left_shift, int32_t *right_shift, - int32_t *multiplier, ConvParameter *conv_param, MATMUL_OPT_DP_FUNC matmul_func, int32_t *filter_zp); + int32_t *multiplier, ConvParameter *conv_param, MATMUL_OPT_DP_FUNC matmul_func, + const int32_t *filter_zp); #ifdef __cplusplus } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv3x3_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv3x3_int8.c index b2f3da19cd1..6ad20cade63 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv3x3_int8.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/conv3x3_int8.c @@ -812,11 +812,11 @@ void Conv3x3Int8InputTransform(const int16_t *input_data, int16_t *trans_input, for (int j = real_y_start; j < real_y_end; j++) { const int16_t *src = input_data + src_c8_offset + C8NUM * (j * input_width + real_x_start); int16_t *dst = tmp_data + C8NUM * (C4NUM * j + real_x_start); - memcpy(dst, src, (real_x_end - real_x_start) * C8NUM * sizeof(int16_t)); + memcpy(dst, src, (size_t)(real_x_end - real_x_start) * C8NUM * sizeof(int16_t)); } // input transform int dst_ic8_offset = dst_plane_offset + ic * TILE_NUM * C8NUM; - size_t dst_step = ic8 * C8NUM * TILE_NUM; + size_t dst_step = (size_t)ic8 * C8NUM * TILE_NUM; int16_t *trans_input_ptr = trans_input + dst_ic8_offset; Conv3x3Int8InputUnit(tmp_data, trans_input_ptr, dst_step, input_zp); } @@ -826,7 +826,7 @@ void Conv3x3Int8InputTransform(const int16_t *input_data, int16_t *trans_input, void Conv3x3Int8Gemm(int32_t *dst, const int16_t *src, const int16_t *weight, int oc, int ic8, size_t real_cal_num) { int oc4 = UP_DIV(oc, C4NUM); #ifdef ENABLE_ARM - IndirectGemmInt16to32_8x4(dst, src, weight, 16, ic8, oc4, oc4 * 4 * 16 * sizeof(int32_t)); + IndirectGemmInt16to32_8x4(dst, src, weight, 16, ic8, oc4, (size_t)oc4 * 4 * 16 * sizeof(int32_t)); #else const int input_unit_square = 16; for (int c = 0; c < oc4; c++) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/deconv_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/deconv_int8.c index 6b679514ed2..cfa160e0ac5 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/deconv_int8.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/deconv_int8.c @@ -20,9 +20,9 @@ int DeConvPostInt8C4(const int32_t *src, const int32_t *bias, int32_t *tmp, int8_t *out, int output_channel, const ConvParameter *conv_param) { /* row4x4-major(ih*iw x oc*kh*kw) -> row4-major(oh*ow x oc) */ - size_t input_plane = conv_param->input_w_ * conv_param->input_h_; - size_t kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_; - size_t output_plane = conv_param->output_w_ * conv_param->output_h_; + int input_plane = conv_param->input_w_ * conv_param->input_h_; + int kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_; + int output_plane = conv_param->output_w_ * conv_param->output_h_; int oc4 = UP_DIV(output_channel, C4NUM); int in_plane4 = UP_ROUND(input_plane, C4NUM); @@ -38,7 +38,7 @@ int DeConvPostInt8C4(const int32_t *src, const int32_t *bias, int32_t *tmp, int8 for (int c = 0; c < oc4; c++) { int32_t *dst_ptr = tmp + c * output_plane * C4NUM; const int32_t *src_ptr = src + c * in_plane4 * kernel_plane * C4NUM; - memset(dst_ptr, 0, output_plane * C4NUM * sizeof(int32_t)); + memset(dst_ptr, 0, (size_t)output_plane * C4NUM * sizeof(int32_t)); for (int ih = 0; ih < conv_param->input_h_; ih++) { for (int iw = 0; iw < conv_param->input_w_; iw++) { @@ -81,7 +81,7 @@ int DeConvPostInt8C4(const int32_t *src, const int32_t *bias, int32_t *tmp, int8 } /*ih*/ } /*oc*/ - PostFuncInt8C4(tmp, bias, out, output_channel, output_plane, conv_param->output_channel_, + PostFuncInt8C4(tmp, bias, out, output_channel, (size_t)output_plane, conv_param->output_channel_, conv_param->conv_quant_arg_.quant_multiplier_[0], conv_param->conv_quant_arg_.left_shift_[0], conv_param->conv_quant_arg_.right_shift_[0], conv_param->conv_quant_arg_.output_quant_args_[0].zp_, conv_param->conv_quant_arg_.out_act_min_[0], conv_param->conv_quant_arg_.out_act_max_[0]); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/fixed_point.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/fixed_point.c index 7635dfef316..ea6138ff8fa 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/fixed_point.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/fixed_point.c @@ -71,7 +71,7 @@ int MultiplyByMultiplierAndRightShift(int32_t value, int32_t multiplier, int32_t return RoundingDivideByPOT(SaturatingRoundingDoublingHighMul(value, multiplier), right_shift); } -int FractionsBits(int integer_bits) { return 8 * sizeof(int32_t) - 1 - integer_bits; } +int FractionsBits(int integer_bits) { return 8 * (int)(sizeof(int32_t)) - 1 - integer_bits; } int FixedPoint_One(int integer_bits, int fractions_bits) { return (integer_bits == 0 ? INT32_MAX : ((1) << (uint32_t)(integer_bits == 0 ? 0 : fractions_bits))); @@ -129,7 +129,7 @@ int SaturatingRoundingMultiplyByPOT(int32_t x, int exponent) { if (exponent > 0) { const int min = INT32_MIN; const int max = INT32_MAX; - const int scalar_int_bits = 8 * sizeof(int32_t); + const int scalar_int_bits = 8 * (int)(sizeof(int32_t)); const int threshold = ((1 << (uint32_t)(scalar_int_bits - 1 - exponent)) - 1); const int positive_mask = x > threshold ? BitNot(0) : 0; const int negative_mask = x < -threshold ? BitNot(0) : 0; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/hswish_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/hswish_int8.c index 21d8909195e..3bd9bc88f39 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/hswish_int8.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/hswish_int8.c @@ -39,7 +39,7 @@ int HSwishInt8(const int8_t *src, int length, int8_t *dst, HswishQuantArg *arg) if (arg->relu6_multiplier_exponent < 0) { relu6_value = RoundingDivideByPOT(relu6_value, -arg->relu6_multiplier_exponent); } - relu6_value = (relu6_value + (1 << 15)) >> 1; + relu6_value = (size_t)(relu6_value + (1 << 15)) >> 1; const int16_t preshift_output_value = SaturatingRoundingDoublingHighMulInt16(relu6_value, input_value_on_preshift_output_scale); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.c index 3b9a893707d..ac2c3b04d13 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.c @@ -104,7 +104,7 @@ void RowMajor2Row16x4MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, for (int ri = 0; ri < row_4div; ri += C4NUM) { for (int ci = 0; ci < col_16div; ci += C16NUM) { - size_t col_offset = col; + size_t col_offset = (size_t)col; int8_t *src_c = src_r + ci; int8_t *dst_c = dst_r + ci * C4NUM; #ifdef ENABLE_ARM64 @@ -207,7 +207,7 @@ void MatMulInt8_4x2_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, int c2div = c / C2NUM, c2mod = c % C2NUM; size_t ci = r * stride + c; int32_t value = 0; - for (int d = 0; d < deep_16; d++) { + for (int d = 0; d < (int)deep_16; d++) { int d16div = d / C16NUM, d16mod = d % C16NUM; size_t ai = r4div * deep_16 * C4NUM + d16div * C4NUM * C16NUM + r4mod * C16NUM + d16mod; size_t bi = c2div * deep_16 * C2NUM + d16div * C2NUM * C16NUM + c2mod * C16NUM + d16mod; @@ -269,9 +269,9 @@ void MatmulInt8Opt(const int8_t *a, const int8_t *b, int8_t *dst, int row, int c #endif void MatMulInt8_8x8_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, - size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, - int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi, - size_t per_channel) { + size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift, + const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini, + int32_t maxi, size_t per_channel) { /* row8x4-major * row4x8-major => (int8)row-major */ for (int r = 0; r < row; r++) { for (int c = 0; c < col; c++) { @@ -279,7 +279,7 @@ void MatMulInt8_8x8_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, int c8div = c / C8NUM, c8mod = c % C8NUM; size_t ci = r * stride + c; int32_t value = 0; - for (int d = 0; d < deep_4; d++) { + for (int d = 0; d < (int)deep_4; d++) { int d4div = d / C4NUM, d4mod = d % C4NUM; size_t ai = r8div * deep_4 * C8NUM + d4div * C8NUM * C4NUM + r8mod * C4NUM + d4mod; size_t bi = c8div * deep_4 * C8NUM + d4div * C8NUM * C4NUM + c8mod * C4NUM + d4mod; @@ -302,9 +302,9 @@ void MatMulInt8_8x8_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, } void MatMulInt8_4x16_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, - size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, - int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi, - size_t per_channel, int32_t *filter_zp) { + size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift, + const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini, + int32_t maxi, size_t per_channel, const int32_t *filter_zp) { /* row4x4-major * row4x16-major => (int8)row-major */ for (int r = 0; r < row; r++) { for (int c = 0; c < col; c++) { @@ -312,7 +312,7 @@ void MatMulInt8_4x16_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row int c16div = c / C16NUM, c16mod = c % C16NUM; size_t ci = r * stride + c; int32_t value = 0; - for (int d = 0; d < deep_4; d++) { + for (int d = 0; d < (int)deep_4; d++) { int d4div = d / C4NUM, d4mod = d % C4NUM; size_t ai = r4div * deep_4 * C4NUM + d4div * C4NUM * C4NUM + r4mod * C4NUM + d4mod; size_t bi = c16div * deep_4 * C16NUM + d4div * C16NUM * C4NUM + c16mod * C4NUM + d4mod; @@ -453,7 +453,7 @@ void PackInput4x4AndInputSumPert(const int8_t *src_input, int8_t *packed_input, #else int32_t tmp_sum_value[4] = {0}; for (int ici = 0; ici < ic_4div; ici += C4NUM) { - for (int i = 0; i < C4NUM; i++) { + for (size_t i = 0; i < C4NUM; i++) { tmp_sum_value[i] += src_ic[0 + i * input_channel]; tmp_sum_value[i] += src_ic[1 + i * input_channel]; tmp_sum_value[i] += src_ic[2 + i * input_channel]; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.h index f8fa9a85d72..87424e20098 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.h @@ -42,9 +42,9 @@ void MatmulInt8Opt(const int8_t *a, const int8_t *b, int8_t *dst, int row, int c /* optimize conv */ void RowMajor2Row8x4MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, int col); void MatMulInt8_8x8_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, - size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, - int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi, - size_t per_channel); + size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift, + const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini, + int32_t maxi, size_t per_channel); /* 4x16 16x2 -> 4x2 */ /* arm32 conv1x1 */ @@ -61,9 +61,9 @@ void RowMajor2Row4x16MajorInt8(const int8_t *src_ptr, int8_t *dst_ptr, int row, void PackInput4x4AndInputSumPert(const int8_t *src_input, int8_t *packed_input, int32_t *input_sum, size_t input_channel, size_t plane_size, int32_t filter_zp); void MatMulInt8_4x16_r(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, - size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, - int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, int32_t maxi, - size_t per_channel, int32_t *filter_zp); + size_t stride, const int32_t *input_sum, const int32_t *bias, const int32_t *left_shift, + const int32_t *right_shift, const int32_t *multiplier, int32_t output_zp, int32_t mini, + int32_t maxi, size_t per_channel, const int32_t *filter_zp); #ifdef ENABLE_ARM64 void MatmulInt8Neon64(const int8_t *a, const int8_t *b, int8_t *dst, int row4, int col4, int deep16, const int *a_sums, diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.c index fbda674d0cb..4ef53e8db1b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.c @@ -27,10 +27,10 @@ int16x4_t ClacSumHalfWordMul(int16x4_t scaled_input0, int16x4_t scaled_input1, i return vqmovn_s32(raw_sum); } -void MulInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, - MulQuantArg *quant_arg, int *index) { +void MulInt8NEON(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, + const MulQuantArg *quant_arg, int *index) { int32x4_t output_multiplier_vec = vdupq_n_s32(quant_arg->output_multiplier_); - int32x4_t left_shift_out_vec = vdupq_n_s32(1 << quant_arg->shift_left_); + int32x4_t left_shift_out_vec = vdupq_n_s32(1 << (size_t)quant_arg->shift_left_); int32x4_t right_shift_out_vec = vdupq_n_s32(-quant_arg->shift_right_); int16x8_t out_zp_vec = vdupq_n_s16(quant_arg->out_quant_arg_.zp_); int8x16_t out_min_vec = vdupq_n_s8(quant_arg->output_activation_min_); @@ -104,8 +104,8 @@ void MulInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, } #endif -void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int depth, int64_t real_dst_count, - bool input1_broad, MulQuantArg *quant_arg) { +void FastMul(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int depth, + int64_t real_dst_count, bool input1_broad, const MulQuantArg *quant_arg) { // input0 need broadcast int32_t zp1 = quant_arg->in_quant_args_[0].zp_; int32_t zp2 = quant_arg->in_quant_args_[1].zp_; @@ -215,8 +215,8 @@ void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int return; } -void Mul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, - MulQuantArg *quant_arg) { +void Mul(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, + const MulQuantArg *quant_arg) { int index = 0; #ifdef ENABLE_NEON MulInt8NEON(input0_data, input1_data, output_data, real_dst_count, quant_arg, &index); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.h index f19d8e40f84..a02363a1e67 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/mul_int8.h @@ -28,9 +28,10 @@ #ifdef __cplusplus extern "C" { #endif -void Mul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, MulQuantArg *quant_arg); -void FastMul(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int depth, int64_t real_dst_count, - bool input1_broad, MulQuantArg *quant_arg); +void Mul(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, + const MulQuantArg *quant_arg); +void FastMul(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int depth, + int64_t real_dst_count, bool input1_broad, const MulQuantArg *quant_arg); #ifdef __cplusplus } #endif diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.c index cd5ffe72ce2..ea1b4c45731 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.c @@ -849,7 +849,8 @@ void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, ConvPara } } -void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight_data, ConvParameter *conv_param) { +void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight_data, + const ConvParameter *conv_param) { // origin weight format : ohwi int input_channel = conv_param->input_channel_; int ic8 = input_channel / C8NUM * C8NUM; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.h index e63127b066d..ecac124e689 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pack_int8.h @@ -40,7 +40,7 @@ void PackInputSum16x4Int8(const int8_t *input, int32_t *input_sum, const int32_t const ConvParameter *conv_param); void PackInputSum16x4PerLayer(const int8_t *src, int32_t *dst, int32_t filter_zp, size_t row4, size_t col16); void PackInputToC8Int8(const int8_t *input_data, int16_t *packed_input, ConvParameter *conv_param); -void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight_data, ConvParameter *conv_param); +void PackWeightToC8Int8(const int8_t *origin_weight_data, int16_t *packed_weight_data, const ConvParameter *conv_param); void Im2ColPackUnitInt8Opt(const int8_t *input_data, int8_t *packed_input, int8_t *matmul_input, int real_cal_num, int block_index, const int32_t *filter_zp, int32_t *input_sum, const ConvParameter *conv_param, bool per_channel, bool is_optimize); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pad_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pad_int8.c index 10f648882a7..e7c0c0eaad6 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pad_int8.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/pad_int8.c @@ -24,9 +24,9 @@ int PadConstant4D(const int8_t *in_data, int8_t *out_data, const int32_t *in_dim for (int n = 0; n < in_dims[0]; n++) { for (int h = tid; h < in_dims[1]; h += thread_num) { for (int w = 0; w < in_dims[2]; w++) { - const int8_t *in = in_data + offset(in_dims, n, h, w, 0); - int8_t *out = out_data + offset(out_dims, n + paddings[0], h + paddings[2], w + paddings[4], paddings[6]); - memcpy(out, in, copy_size * sizeof(int8_t)); + const int8_t *in = in_data + Offset(in_dims, n, h, w, 0); + int8_t *out = out_data + Offset(out_dims, n + paddings[0], h + paddings[2], w + paddings[4], paddings[6]); + memcpy(out, in, (size_t)copy_size * sizeof(int8_t)); } } } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/quant_dtype_cast_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/quant_dtype_cast_int8.c index 0ec6fc72f52..6c0620a6350 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/quant_dtype_cast_int8.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/quant_dtype_cast_int8.c @@ -112,7 +112,7 @@ int UInt8ToInt8(const uint8_t *real_values, int8_t *quant_values, int size) { } for (int i = 0; i < size; ++i) { - int temp = real_values[i] - 128; + int temp = (int)real_values[i] - 128; if (temp > 127) { quant_values[i] = 127; } else if (temp < -128) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/resize_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/resize_int8.c index 31dd3e92b1d..1e7cb91c2a9 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/resize_int8.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/resize_int8.c @@ -173,8 +173,8 @@ int ResizeNearestNeighborInt8Simple(const int8_t *input_data, int8_t *output_dat for (x = 0; x < output_shape[2]; x++) { int input_x = 0; ComputeNearestNeighborInt(x, in_w, new_width, align_corners, &input_x); - int in_offset = offset(input_shape, batch, input_y, input_x, 0); - int out_offset = offset(output_shape, batch, y, x, 0); + int in_offset = Offset(input_shape, batch, input_y, input_x, 0); + int out_offset = Offset(output_shape, batch, y, x, 0); memcpy(output_data + out_offset, input_data + in_offset, c * sizeof(int8_t)); } } @@ -214,8 +214,8 @@ int ResizeNearestNeighborInt8(const int8_t *input_data, int8_t *output_data, con int input_x = 0; ComputeNearestNeighborInt(x, in_w, new_width, align_corners, &input_x); for (c = 0; c < output_shape[3]; c++) { - int in_offset = offset(input_shape, batch, input_y, input_x, c); - int out_offset = offset(output_shape, batch, y, x, c); + int in_offset = Offset(input_shape, batch, input_y, input_x, c); + int out_offset = Offset(output_shape, batch, y, x, c); int32_t out_value = MultiplyByQuantizedMultiplier( input_data[in_offset] - quant_in->zp_, multiplier->multiplier_, diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/scale_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/scale_int8.c index bb33c643f17..e007e6a7754 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/scale_int8.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/scale_int8.c @@ -34,8 +34,8 @@ int16x4_t ClacSumHalfWordMul3(int32x4_t scaled_input0, int32x4_t scaled_input1, const ScaleParameter *scale_param) { int32x4_t output_multiplier_vec = vdupq_n_s32(scale_param->scale_mul_arg_.multiplier_); int32x4_t output_multiplier_vec2 = vdupq_n_s32(scale_param->offset_mul_arg_.multiplier_); - int32x4_t left_shift_out_vec = vdupq_n_s32(1 << scale_param->scale_mul_arg_.left_shift_); - int32x4_t left_shift_out_vec2 = vdupq_n_s32(1 << scale_param->offset_mul_arg_.left_shift_); + int32x4_t left_shift_out_vec = vdupq_n_s32(1 << (size_t)(scale_param->scale_mul_arg_.left_shift_)); + int32x4_t left_shift_out_vec2 = vdupq_n_s32(1 << (size_t)(scale_param->offset_mul_arg_.left_shift_)); int32x4_t input_scale = vmulq_s32(scaled_input0, scaled_input1); int32x4_t raw_sum = RoundingDivideByPOTInt32x4( SaturatingRoundingDoublingHighMulInt32x4(vmulq_s32(input_scale, left_shift_out_vec), output_multiplier_vec), diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/sub_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/sub_int8.c index ace1417b287..64a62152168 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/sub_int8.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/sub_int8.c @@ -24,7 +24,7 @@ #ifdef ENABLE_NEON int16x4_t DoClacSumHalfWord(int32x4_t scaled_input0, int32x4_t scaled_input1, int32x4_t left_shift_out_vec, - int32x4_t output_multiplier_vec, SubQuantArg *para) { + int32x4_t output_multiplier_vec, const SubQuantArg *para) { int32x4_t raw_data = vsubq_s32(scaled_input0, scaled_input1); raw_data = RoundingDivideByPOTInt32x4(vqrdmulhq_s32(vmulq_s32(raw_data, left_shift_out_vec), output_multiplier_vec), @@ -35,14 +35,14 @@ int16x4_t DoClacSumHalfWord(int32x4_t scaled_input0, int32x4_t scaled_input1, in return vqmovn_s32(raw_data); } -void SubInt8NEON(int8_t *input0_data, int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, - SubQuantArg *para, int *index) { +void SubInt8NEON(const int8_t *input0_data, const int8_t *input1_data, int8_t *output_data, int64_t real_dst_count, + const SubQuantArg *para, int *index) { int32x4_t left_shift_result0_vec = vdupq_n_s32(para->left_shift_result0_); int32x4_t left_shift_result1_vec = vdupq_n_s32(para->left_shift_result1_); int32x4_t input0_multiplier_vec = vdupq_n_s32(para->input0_multiplier_); int32x4_t input1_multiplier_vec = vdupq_n_s32(para->input1_multiplier_); int32x4_t output_multiplier_vec = vdupq_n_s32(para->output_multiplier_); - int32x4_t left_shift_out_vec = vdupq_n_s32((1 << para->left_shift_out_)); + int32x4_t left_shift_out_vec = vdupq_n_s32((1 << (size_t)para->left_shift_out_)); int32x4_t right_shift0_vec = vdupq_n_s32(-para->right_shift0_); int32x4_t right_shift1_vec = vdupq_n_s32(-para->right_shift1_); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/transpose_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/transpose_int8.c index bf3fd14d2c9..873c11857cf 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/transpose_int8.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/transpose_int8.c @@ -226,16 +226,16 @@ void TransposeDimsInt8(const int8_t *in_data, int8_t *out_data, const int *outpu const int *strides = transpose_param->strides_; const int *out_strides = transpose_param->out_strides_; int num_axes = transpose_param->num_axes_; - size_t data_size = (*out_strides) * output_shape[0]; + size_t data_size = (size_t)((*out_strides) * output_shape[0]); size_t offset_size = UP_DIV(data_size, thread_num); size_t task_offset = offset_size * task_id; - int count = data_size - task_offset; - if (count <= 0) { + size_t count = data_size - task_offset; + if (data_size < task_offset) { return; } count = MSMIN(offset_size, count); for (size_t idx = task_offset; idx < task_offset + count; ++idx) { - int pos = idx; + int pos = (int)idx; int output_idx = 0; int input_idx = 0; for (int i = 0; i < num_axes; ++i) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/unsqueeze_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/unsqueeze_int8.c index a46a3dfe864..e5f8c0aab59 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/unsqueeze_int8.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/unsqueeze_int8.c @@ -24,7 +24,7 @@ int Int8Unsqueeze(const int8_t *input_ptr, int8_t *output_ptr, UnSqueezeParamete float input_scale = para_->quant_arg.in_quant_args_.scale_; int8_t input_zp = para_->quant_arg.in_quant_args_.zp_; - for (int i = task_id; i < data_size; i += para_->thread_count_) { + for (int i = task_id; i < (int)data_size; i += para_->thread_count_) { output_ptr[i] = output_zp + round(1 / output_scale * input_scale * (input_ptr[i] - input_zp)); } return 0; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/matmul_parameter.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/matmul_parameter.h index a513f4608b6..d11feea2207 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/matmul_parameter.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/matmul_parameter.h @@ -23,14 +23,15 @@ typedef void (*MATMUL_OPT_R4_FUNC)(const int8_t *a, const int8_t *b, int *dst, i const int *input_sum, const int *bias); typedef void (*MATMUL_OPT_R_FUNC)(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, - size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, - int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, - int32_t maxi, size_t per_channel); + size_t stride, const int32_t *input_sum, const int32_t *bias, + const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier, + int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel); typedef void (*MATMUL_OPT_DP_FUNC)(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, - size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, - int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, - int32_t maxi, size_t per_channel, int *filter_zp); + size_t stride, const int32_t *input_sum, const int32_t *bias, + const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier, + int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel, + const int *filter_zp); typedef enum OutType { OutType_C8 = 0, OutType_Nhwc = 1, OutType_TileC8 = 2 } OutType; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/op_base.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/op_base.h index 1891dd7ff00..52241ba13c8 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/op_base.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/op_base.h @@ -61,6 +61,8 @@ #define DIMENSION_6D 6 #define DIMENSION_7D 7 #define DIMENSION_8D 8 +#define DIMENSION_10D 10 +#define DIMENSION_11D 11 #define kInputIndex 0 #define kWeightIndex 1 #define kBiasIndex 2 diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/pad_parameter.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/pad_parameter.h index a6f2a1b5e41..c741599512c 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/pad_parameter.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/pad_parameter.h @@ -18,8 +18,8 @@ #include "nnacl/op_base.h" -#define MAX_PAD_SIZE 8 -#define DEFAULT_PAD_NDIMS 4 +#define MAX_PAD_SIZE 12 +#define DEFAULT_PAD_NDIMS 6 typedef struct PadQuantArg { QuantArg *in_quant_args_; @@ -30,13 +30,13 @@ typedef struct PadQuantArg { typedef struct PadParameter { // Primitive parameter OpParameter op_parameter_; - int paddings_[MAX_SHAPE_SIZE]; + int paddings_[MAX_PAD_SIZE]; int pad_mode_; float constant_value_; // shape correlative int padding_length; // other parameter - int in_strides[COMM_SHAPE_SIZE]; + int in_strides[DEFAULT_PAD_NDIMS]; int out_strides[DEFAULT_PAD_NDIMS]; int mirror_offset_; PadQuantArg pad_quant_arg_; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.cc index e2074c70a4a..b3f879dde5e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/ps/pserver_kernel.cc @@ -20,6 +20,11 @@ namespace mindspore { namespace kernel { namespace ps { void PServerKernel::Shard(std::vector *shape, int axis) { + MS_EXCEPTION_IF_NULL(shape); + if ((*shape).size() <= IntToSize(axis)) { + MS_LOG(EXCEPTION) << "Shape size is invalid."; + return; + } (*shape)[IntToSize(axis)] = LongToSize(Util::LocalShard(SizeToLong((*shape)[IntToSize(axis)]), SizeToLong(rank_id_), SizeToLong(pserver_num_))); } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/pyfunc/py_func_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/pyfunc/py_func_cpu_kernel.cc index c790b4d926c..0d81da83a2e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/pyfunc/py_func_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/pyfunc/py_func_cpu_kernel.cc @@ -135,8 +135,7 @@ void ScalarToRawMemory(const py::object &obj, const TypePtr &type, const Address void ArrayToRawMemory(const py::array &array, const AddressPtr &address) { if (static_cast(array.flags()) & pybind11::detail::npy_api::NPY_ARRAY_C_CONTIGUOUS_) { const py::buffer_info &buf_info = array.request(); - CHECK_RET_WITH_EXCEPT(memcpy_s(address->addr, address->size, buf_info.ptr, buf_info.size * buf_info.itemsize), EOK, - "memcpy failed."); + CHECK_RET_WITH_EXCEPT(memcpy_s(address->addr, address->size, buf_info.ptr, buf_info.size), EOK, "memcpy failed."); } else { // Transform numpy array to row major buffer. Py_buffer pybuf; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.cc index 5ba93e43fcb..2a987be45d2 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.cc @@ -39,7 +39,7 @@ void SearchSortedCPUKernel::InitKernel(const CNodePtr &kernel_node) { template const S *SearchSortedCPUKernel::CustomizedLowerBound(const S *seq_start, const S *seq_end, const S key) { while (seq_start < seq_end) { - const S *mid = seq_start + ((seq_end - seq_start) >> 1); + const S *mid = seq_start + ((seq_end - seq_start) / 2); if (!(key <= *mid)) { seq_start = mid + 1; } else { @@ -61,11 +61,12 @@ bool SearchSortedCPUKernel::Launch(const std::vector & size_t seq_dim = sequence_shape_.size(); size_t search_repeat = values_shape_.back(); - auto task = [&](size_t start, size_t end) { + auto task = [this, &sequence, &values, &output, seq_dim, search_repeat](size_t start, size_t end) { for (size_t i = start; i < end; i++) { auto seq_start = (seq_dim == 1) ? sequence : sequence + (i / search_repeat) * search_len; - output[i] = right_ ? std::upper_bound(seq_start, seq_start + search_len, values[i]) - seq_start - : CustomizedLowerBound(seq_start, seq_start + search_len, values[i]) - seq_start; + auto result = right_ ? std::upper_bound(seq_start, seq_start + search_len, values[i]) - seq_start + : CustomizedLowerBound(seq_start, seq_start + search_len, values[i]) - seq_start; + output[i] = static_cast(result); } }; CPUKernelUtils::ParallelFor(task, elem_num); @@ -92,8 +93,8 @@ void SearchSortedCPUKernel::CheckParam(const std::vector &inpu } auto sequence = reinterpret_cast(inputs[0]->addr); - size_t list_count = accumulate(sequence_shape_.begin(), sequence_shape_.end() - 1, 1, std::multiplies()); - auto task = [&](size_t start, size_t end) { + int list_count = accumulate(sequence_shape_.begin(), sequence_shape_.end() - 1, 1, std::multiplies()); + auto task = [this, &sequence](size_t start, size_t end) { for (size_t i = start; i < end; i++) { for (size_t j = 0; j < search_len - 1; j++) { if (sequence[i * search_len + j] > sequence[i * search_len + j + 1]) { @@ -104,6 +105,5 @@ void SearchSortedCPUKernel::CheckParam(const std::vector &inpu }; CPUKernelUtils::ParallelFor(task, list_count); } - } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.h index 87cea83a5ef..9333e72dc96 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/searchsorted_cpu_kernel.h @@ -42,7 +42,7 @@ class SearchSortedCPUKernel : public CPUKernel { std::vector sequence_shape_; std::vector values_shape_; std::vector output_shape_; - size_t search_len; + size_t search_len{0}; }; MS_REG_CPU_KERNEL_T_S( @@ -104,8 +104,6 @@ MS_REG_CPU_KERNEL_T_S( SearchSorted, KernelAttr().AddInputAttr(kNumberTypeInt8).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt64), SearchSortedCPUKernel, int8_t, int64_t); - } // namespace kernel } // namespace mindspore - #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SEARCHSORTED_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.cc index 40814707d1d..32606a9a4e7 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.cc @@ -35,12 +35,12 @@ void SGDCPUKernel::InitKernel(const CNodePtr &kernel_node) { template void SGDCPUKernel::CheckParam(const std::vector &inputs, const std::vector &outputs) { - // inputs: params, grad, lr, accum, momentum, stat + // inputs: param, grad, lr, accum, momentum, stat if (inputs.size() != kInputSize) { MS_LOG(EXCEPTION) << "Input number is " << inputs.size() << ", but SGD needs 6 inputs."; } - // output: param + // output: output_param if (outputs.size() != kOutputSize) { MS_LOG(EXCEPTION) << "Output number is " << outputs.size() << ", but SGD needs 1 outputs."; } @@ -60,18 +60,20 @@ bool SGDCPUKernel::Launch(const std::vector &inputs, const std::v auto output_param = reinterpret_cast(outputs[0]->addr); size_t elem_num = inputs[0]->size / sizeof(T); - auto task = [&](size_t start, size_t end) { + auto task = [this, ¶m, &grad, &lr, &accum, &momentum, &stat, &output_param](size_t start, size_t end) { + T ZERO = static_cast(0); + T ONE = static_cast(1); for (size_t i = start; i < end; i++) { T grad_new = grad[i]; - if (weight_decay_ > 0) { + if (weight_decay_ > static_cast(0.0)) { grad_new += param[i] * static_cast(weight_decay_); } - if (momentum[0] > static_cast(0)) { - if (stat[i] > static_cast(0)) { + if (momentum[0] > ZERO) { + if (stat[i] > ZERO) { accum[i] = grad_new; - stat[i] = static_cast(0); + stat[i] = ZERO; } else { - accum[i] = accum[i] * momentum[0] + static_cast(1.0 - dampening_) * grad_new; + accum[i] = accum[i] * momentum[0] + (ONE - static_cast(dampening_)) * grad_new; } if (nesterov_) { grad_new += accum[i] * momentum[0]; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.h index 93f25d1b657..95fb461f440 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sgd_cpu_kernel.h @@ -36,8 +36,8 @@ class SGDCPUKernel : public CPUKernel { private: static void CheckParam(const std::vector &inputs, const std::vector &outputs); - float dampening_; - float weight_decay_; + float dampening_{0.0}; + float weight_decay_{0.0}; bool nesterov_{true}; }; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/sort_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/sort_cpu_kernel.cc index 837ddeca56c..10b605d4ad0 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/sort_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sort_cpu_kernel.cc @@ -24,12 +24,12 @@ template void SortCpuKernel::InitKernel(const CNodePtr &kernel_node) { size_t input_count = AnfAlgo::GetInputTensorNum(kernel_node); if (input_count != 1) { - MS_LOG(EXCEPTION) << input_count << " inputs were provided, but SortCpuKernel expects 1."; + MS_LOG(EXCEPTION) << input_count << " inputs were provided, but Sort expects 1."; } size_t output_count = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_count != 2) { - MS_LOG(EXCEPTION) << "Number of outputs is " << output_count << ", but should be 2 for SortCpuKernel."; + MS_LOG(EXCEPTION) << "Number of outputs is " << output_count << ", but should be 2 for Sort."; } auto x_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); @@ -64,7 +64,7 @@ template bool SortCpuKernel::Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs) { if (inputs.size() != 1 || outputs.size() != 2) { - MS_LOG(EXCEPTION) << "TopK needs 1 input and 2 outputs, but get inputs: " << inputs.size() + MS_LOG(EXCEPTION) << "Sort needs 1 input and 2 outputs, but get inputs: " << inputs.size() << "outputs: " << outputs.size(); } if (inputs[0]->size != outer_size_ * axis_size_ * inner_size_ * sizeof(T)) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.cc index 8f1dc225320..338ff4b405c 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/split_cpu_kernel.cc @@ -64,7 +64,8 @@ void SplitCPUKernel::LaunchSplit(T *input, T **output, size_t size) { param.split_count_ *= input_shape_[i]; } auto task = [&](size_t start, size_t end) { - (void)DoSplit(input, reinterpret_cast(output), &input_shape_[0], start, end - start, ¶m, sizeof(T)); + (void)DoSplit(input, reinterpret_cast(output), &input_shape_[0], SizeToInt(start), SizeToInt(end - start), + ¶m, SizeToInt(sizeof(T))); }; CPUKernelUtils::ParallelForAutoSearch(task, param.split_count_ * param.num_split_, ¶llel_search_info_); return; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.cc index 4dba82b928b..a142c9ab695 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/transpose_cpu_kernel.cc @@ -46,8 +46,8 @@ void TransposeCPUFwdKernel::InitKernel(const CNodePtr &kernel_node) { transpose_param_.strides_[num_axes - 1] = 1; transpose_param_.out_strides_[num_axes - 1] = 1; for (int i = num_axes - 2; i >= 0; i--) { - transpose_param_.strides_[i] = input_shape_[i + 1] * transpose_param_.strides_[i + 1]; - transpose_param_.out_strides_[i] = output_shape_[i + 1] * transpose_param_.out_strides_[i + 1]; + transpose_param_.strides_[i] = SizeToInt(input_shape_[i + 1]) * transpose_param_.strides_[i + 1]; + transpose_param_.out_strides_[i] = SizeToInt(output_shape_[i + 1]) * transpose_param_.out_strides_[i + 1]; } launch_map_[kNumberTypeInt8] = &TransposeCPUFwdKernel::LaunchKernel; launch_map_[kNumberTypeInt16] = &TransposeCPUFwdKernel::LaunchKernel; @@ -87,7 +87,7 @@ void TransposeCPUFwdKernel::LaunchKernel(const std::vector &inputs, } size_t data_count = (inputs[0]->size) / sizeof(T); if (axes_.size() <= DIMENSION_6D && data_count < MAX_TRANSPOSE_SERIAL_SIZE) { - int res = NNACL_ERR; + int res = static_cast(NNACL_ERR); if constexpr (std::is_same_v) { res = DoTransposeInt8(input_addr, output_addr, output_shape, &transpose_param_); } else if constexpr (std::is_same_v) { @@ -121,7 +121,7 @@ template void TransposeCPUFwdKernel::ParallelRun(const T *input_addr, T *output_addr, const int *output_shape, size_t count) { auto max_thread_num = common::ThreadPool::GetInstance().GetSyncRunThreadNum(); const float block_size = 128.0; - size_t thread_num = count < block_size * max_thread_num ? std::ceil(count / block_size) : max_thread_num; + size_t thread_num = count < block_size * max_thread_num ? FloatToSize(std::ceil(count / block_size)) : max_thread_num; std::vector tasks; std::function TransposeDims; @@ -147,13 +147,13 @@ void TransposeCPUFwdKernel::ParallelRun(const T *input_addr, T *output_addr, con TransposeDims = &TransposeDimsBool; } for (int task_id = 0; task_id < SizeToInt(thread_num); ++task_id) { - auto task = [&, task_id, thread_num]() { + auto task = [this, &TransposeDims, &input_addr, &output_addr, &output_shape, task_id, thread_num]() { TransposeDims(input_addr, output_addr, output_shape, &transpose_param_, task_id, SizeToInt(thread_num)); return common::SUCCESS; }; - tasks.emplace_back(task); + (void)tasks.emplace_back(task); } - common::ThreadPool::GetInstance().SyncRun(tasks); + (void)common::ThreadPool::GetInstance().SyncRun(tasks); } } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/unpack_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/unpack_cpu_kernel.cc index 39c113c26dd..d8ce599babc 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/unpack_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/unpack_cpu_kernel.cc @@ -29,18 +29,18 @@ void UnpackCPUKernel::InitKernel(const CNodePtr &kernel_node) { } output_num_ = LongToSize(AnfAlgo::GetNodeAttr(kernel_node, "num")); unstack_param_.num_ = SizeToInt(output_num_); - unstack_param_.axis_ = LongToSize(axis_tmp); + unstack_param_.axis_ = LongToInt(axis_tmp); unstack_param_.pre_dims_ = 1; unstack_param_.axis_dim_ = 1; unstack_param_.after_dims_ = 1; for (size_t i = 0; i < input_shape.size(); i++) { - if (static_cast(i) < unstack_param_.axis_) { - unstack_param_.pre_dims_ *= input_shape[i]; - } else if (static_cast(i) > unstack_param_.axis_) { - unstack_param_.after_dims_ *= input_shape[i]; + if (i < IntToSize(unstack_param_.axis_)) { + unstack_param_.pre_dims_ *= SizeToInt(input_shape[i]); + } else if (i > IntToSize(unstack_param_.axis_)) { + unstack_param_.after_dims_ *= SizeToInt(input_shape[i]); } else { - unstack_param_.axis_dim_ = input_shape[i]; + unstack_param_.axis_dim_ = SizeToInt(input_shape[i]); } } dtype_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h index f38e1cace11..496c59e1392 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h @@ -49,7 +49,7 @@ class ArrayReduceGpuKernel : public GpuKernel { } T *input_addr = GetDeviceAddress(inputs, 0); T *output_addr = GetDeviceAddress(outputs, 0); - T *workspace_addr = GetDeviceAddress(workspace, 0); + T *workspace_addr = GetPossiblyNullDeviceAddress(workspace, 0); T alpha = static_cast(1.0f); T beta = static_cast(0.0f); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/cast_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/cast_gpu_kernel.h index 78dc29941e5..d91c0514091 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/cast_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/cast_gpu_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,10 +36,18 @@ class CastGpuKernel : public GpuKernel { bool Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs, void *stream_ptr) override { - S *input_addr = GetDeviceAddress(inputs, 0); - T *output_addr = GetDeviceAddress(outputs, 0); + S *input_addr = GetPossiblyNullDeviceAddress(inputs, 0); + T *output_addr = GetPossiblyNullDeviceAddress(outputs, 0); + + if (input_addr == nullptr && output_addr == nullptr) { + return true; + } else if (input_addr != nullptr && output_addr != nullptr) { + Cast(input_size_, input_addr, output_addr, reinterpret_cast(stream_ptr)); + } else { + MS_LOG(EXCEPTION) + << "The input and output device addresses for CastGpuKernel should be both null or both not null."; + } - Cast(input_size_, input_addr, output_addr, reinterpret_cast(stream_ptr)); return true; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h index e261fcdfa00..0331cd85a20 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/concatv2_gpu_kernel.h @@ -43,11 +43,20 @@ class ConcatV2GpuFwdKernel : public GpuKernel { bool Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs, void *stream_ptr) override { + if (input_num_ == 0) { + return true; + } + T *output = GetDeviceAddress(outputs, 0); T **inputs_device = GetDeviceAddress(workspace, 0); int *len_axis_device = GetDeviceAddress(workspace, 1); + int current_dim = 0; for (size_t i = 0; i < inputs.size(); i++) { - inputs_host_[i] = GetDeviceAddress(inputs, i); + T *input = GetPossiblyNullDeviceAddress(inputs, i); + if (input != nullptr) { + inputs_host_[current_dim] = input; + current_dim++; + } } CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpyAsync(inputs_device, inputs_host_.get(), sizeof(T *) * input_num_, @@ -83,14 +92,21 @@ class ConcatV2GpuFwdKernel : public GpuKernel { input_num_ = SizeToInt(AnfAlgo::GetInputTensorNum(kernel_node)); inputs_host_ = std::make_unique(input_num_); len_axis_ = std::make_unique(input_num_); + int current_dim = 0; for (int i = 0; i < input_num_; i++) { size_t input_size = 1; auto input_shape = AnfAlgo::GetInputDeviceShape(kernel_node, i); for (size_t j = 0; j < input_shape.size(); j++) { input_size *= input_shape[j]; } - input_size_list_.push_back(input_size * sizeof(T)); - len_axis_[i] = SizeToInt(input_shape[axis_]); + + if (input_size == 0) { + input_num_--; + } else { + input_size_list_.push_back(input_size * sizeof(T)); + len_axis_[current_dim] = SizeToInt(input_shape[axis_]); + current_dim++; + } } workspace_size_list_.push_back(sizeof(T *) * input_num_); workspace_size_list_.push_back(sizeof(int) * input_num_); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_range_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_range_gpu_kernel.h index c3384f34e7e..474858fbe52 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_range_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_range_gpu_kernel.h @@ -102,7 +102,7 @@ class DynamicRangeGpuKernel : public GpuKernel { "cudaStreamSynchronize failed"); std::vector output_type = {AnfAlgo::GetOutputInferDataType(kernel_node_.lock(), 0)}; - std::vector> output_shape = {{(size_t)output_shape_}}; + std::vector> output_shape = {{static_cast(output_shape_)}}; AnfAlgo::SetOutputInferTypeAndShape(output_type, output_shape, kernel_node_.lock().get()); } diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_shape_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_shape_gpu_kernel.h index dd15fa7d221..ba3c3230d52 100755 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_shape_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/dynamic_shape_gpu_kernel.h @@ -75,8 +75,8 @@ class DynamicShapeGpuKernel : public GpuKernel { } void ResetResource() noexcept override { - input_size_ = -1; - output_size_ = -1; + input_size_ = 0; + output_size_ = 0; prev_node_output_shape_.clear(); input_size_list_.clear(); output_size_list_.clear(); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h index a6e25cac507..e5e64323aec 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/one_hot_gpu_kernel.h @@ -49,8 +49,10 @@ class OneHotGpuFwdKernel : public GpuKernel { auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); int64_t input_dims = static_cast(input_shape.size()); - if (axis >= input_dims) { - MS_LOG(ERROR) << "invalid one hot axis value: " << axis << " for input dims size: " << input_shape.size(); + int64_t output_dims = static_cast(output_shape.size()); + if (axis >= input_dims || axis >= output_dims) { + MS_LOG(ERROR) << "invalid one hot axis value: " << axis << " for input dims size: " << input_shape.size() + << " or output dims size: " << output_dims; return false; } const int64_t default_axis = -1; diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h index 28ede23470d..0aa4d397b3f 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_gpu_kernel.h @@ -146,7 +146,7 @@ class SliceGpuFwdKernel : public GpuKernel { begin_ = GetAttr>(kernel_node, "begin"); for (size_t i = 0; i < input_shape.size(); i++) { - if (input_shape[i] <= 0 || size_[i] <= 0) { + if (i >= size_.size() || input_shape[i] <= 0 || size_[i] <= 0) { MS_LOG(WARNING) << "Slice output is null."; is_null_input_ = true; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h index 9ede49565c8..1866fe071fc 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/slice_grad_gpu_kernel.h @@ -105,12 +105,12 @@ class SliceGradGpuKernel : public GpuKernel { std::swap(size_[1], size_[2]); } for (size_t i = 0; i < begin_.size(); i++) { - if (begin_[i] < 0) { + if (begin_[i] < 0 && i < input_shape_.size()) { begin_[i] = begin_[i] + input_shape_[i]; } } for (size_t i = 0; i < size_.size(); i++) { - if (size_[i] < 0) { + if (size_[i] < 0 && i < input_shape_.size()) { size_[i] = (size_[i] + input_shape_[i]) > 0 ? (size_[i] + input_shape_[i]) : 0; } } diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_common.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_common.h index 71b3b9876be..5e22fe09f35 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_common.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_common.h @@ -43,6 +43,10 @@ class StridedSliceGpuCommon { strides_ = AnfAlgo::GetNodeAttr>(kernel_node, "strides"); for (size_t i = 0; i < MAX_DIMS; i++) { + if (i >= input_shape_.size()) { + input_shape_.push_back(1); + } + if (i < begin_.size()) { int64_t dim = input_shape_[i]; begin_[i] = std::min(begin_[i] < 0 ? std::max(begin_[i] + dim, static_cast(0)) : begin_[i], dim - 1); @@ -60,10 +64,6 @@ class StridedSliceGpuCommon { if (i >= strides_.size()) { strides_.push_back(1); } - - if (i >= input_shape_.size()) { - input_shape_.push_back(1); - } } } @@ -71,7 +71,7 @@ class StridedSliceGpuCommon { auto begin_mask_int = AnfAlgo::GetNodeAttr(kernel_node, "begin_mask"); auto begin_mask = Dec2Bin(begin_mask_int); for (size_t i = 0; i < begin_mask.size(); i++) { - if (begin_mask[i]) { + if (begin_mask[i] && i < MAX_DIMS) { begin_[i] = 0; } } @@ -79,7 +79,7 @@ class StridedSliceGpuCommon { auto end_mask_int = AnfAlgo::GetNodeAttr(kernel_node, "end_mask"); auto end_mask = Dec2Bin(end_mask_int); for (size_t j = 0; j < end_mask.size(); j++) { - if (end_mask[j]) { + if (end_mask[j] && j < MAX_DIMS) { end_[j] = input_shape_[j]; } } @@ -87,7 +87,7 @@ class StridedSliceGpuCommon { auto ellipsis_mask_int = AnfAlgo::GetNodeAttr(kernel_node, "ellipsis_mask"); auto ellipsis_mask = Dec2Bin(ellipsis_mask_int); for (size_t k = 0; k < ellipsis_mask.size(); k++) { - if (ellipsis_mask[k]) { + if (ellipsis_mask[k] && k < MAX_DIMS) { begin_[k] = 0; end_[k] = input_shape_[k]; strides_[k] = 1; @@ -97,7 +97,7 @@ class StridedSliceGpuCommon { auto new_axis_mask_int = AnfAlgo::GetNodeAttr(kernel_node, "new_axis_mask"); auto new_axis_mask = Dec2Bin(new_axis_mask_int); for (size_t l = 0; l < new_axis_mask.size(); l++) { - if (new_axis_mask[l]) { + if (new_axis_mask[l] && l < MAX_DIMS) { begin_[l] = 0; end_[l] = input_shape_[l]; strides_[l] = 1; @@ -107,7 +107,7 @@ class StridedSliceGpuCommon { auto shrink_axis_mask_int = AnfAlgo::GetNodeAttr(kernel_node, "shrink_axis_mask"); auto shrink_axis_mask = Dec2Bin(shrink_axis_mask_int); for (size_t m = 0; m < shrink_axis_mask.size(); m++) { - if (shrink_axis_mask[m]) { + if (shrink_axis_mask[m] && m < MAX_DIMS) { end_[m] = end_[m] > begin_[m] ? begin_[m] + 1 : begin_[m] - 1; strides_[m] = end_[m] > begin_[m] ? 1 : -1; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.h index fc483c6f985..56e0aea1e60 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/strided_slice_gpu_kernel.h @@ -65,13 +65,13 @@ class StridedSliceGpuKernel : public GpuKernel, public StridedSliceGpuCommon { protected: void InitSizeLists() override { size_t size = sizeof(T); - for (size_t i = 0; i < MAX_DIMS; i++) { + for (size_t i = 0; i < input_shape_.size(); i++) { size *= input_shape_[i]; } input_size_list_.push_back(size); size_t size1 = sizeof(T); - for (size_t i = 0; i < MAX_DIMS; i++) { + for (size_t i = 0; i < output_shape_.size(); i++) { size1 *= output_shape_[i]; } output_size_list_.push_back(size1); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h index 2083dc63c17..c2f597c2958 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/gpu_kernel.h @@ -99,6 +99,19 @@ class GpuKernel : public KernelMod { if (index >= addr_list.size()) { MS_LOG(EXCEPTION) << "Address index(" << index << ") out of range(" << addr_list.size() << ")"; } + + if ((addr_list[index] == nullptr) || (addr_list[index]->addr == nullptr) || (addr_list[index]->size == 0)) { + MS_LOG(EXCEPTION) << "The device address is empty, address index: " << index; + } + + return reinterpret_cast(addr_list[index]->addr); + } + + template + inline T *GetPossiblyNullDeviceAddress(const std::vector &addr_list, size_t index) { + if (index >= addr_list.size()) { + MS_LOG(EXCEPTION) << "Address index(" << index << ") out of range(" << addr_list.size() << ")"; + } // Kernels may run normally without workspace, the addr_list[index] maybe nullptr. if ((addr_list[index] == nullptr) || (addr_list[index]->size == 0)) { return nullptr; diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_gpu_kernel.h index e1d5277e7a1..1f27011014b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_gpu_kernel.h @@ -52,18 +52,16 @@ class BatchNormGpuKernel : public GpuKernel { auto running_variance = GetDeviceAddress(inputs, 4); T *z = nullptr; if (bn_ops_ == CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION) { - z = GetDeviceAddress(inputs, 5); + z = GetPossiblyNullDeviceAddress(inputs, 5); } auto y = GetDeviceAddress(outputs, 0); - auto reserve_addr = GetDeviceAddress(outputs, 2); - T *workspace_addr = nullptr; - if (workspace_size_ != 0) { - workspace_addr = GetDeviceAddress(workspace, 0); - } + T *workspace_addr = GetPossiblyNullDeviceAddress(workspace, 0); + const float alpha = 1; const float beta = 0; if (is_train_) { + auto reserve_addr = GetPossiblyNullDeviceAddress(outputs, 2); auto save_mean = GetDeviceAddress(outputs, 3); auto save_variance = GetDeviceAddress(outputs, 4); CHECK_CUDNN_RET_WITH_EXCEPT( diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_grad_gpu_kernel.h index ae36b56df85..69720b5a854 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_grad_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/batch_norm_grad_gpu_kernel.h @@ -71,8 +71,6 @@ class BatchNormGradGpuKernel : public GpuKernel { auto scale = GetDeviceAddress(inputs, 2); auto save_mean = GetDeviceAddress(inputs, 3); auto save_variance = GetDeviceAddress(inputs, 4); - auto reserve_addr = GetDeviceAddress(inputs, 5); - reserve_size_ = inputs[5]->size; void *bias = nullptr; T *y = nullptr; if (bn_ops_ != CUDNN_BATCHNORM_OPS_BN) { @@ -88,11 +86,11 @@ class BatchNormGradGpuKernel : public GpuKernel { dz = GetDeviceAddress(outputs, 3); } - void *workspace_addr = nullptr; - if (workspace_size_ != 0) { - workspace_addr = GetDeviceAddress(workspace, 0); - } if (is_train_) { + auto reserve_addr = GetPossiblyNullDeviceAddress(inputs, 5); + reserve_size_ = inputs[5]->size; + void *workspace_addr = GetPossiblyNullDeviceAddress(workspace, 0); + const float alpha_data_diff = 1; const float alpha_param_diff = 1; const float beta_param_diff = 0; diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.h index a50f2a38f3c..f007d5073a7 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/bias_add_grad_gpu_kenel.h @@ -58,8 +58,8 @@ class BiasAddGradGpuKernel : public GpuKernel { "cudaMemcpyAsync failed."); } else { if (use_cudnn_) { // shared memory not satisfied or num_dim > 4 - T *indices_addr = GetDeviceAddress(workspace, 0); - T *workspace_addr = GetDeviceAddress(workspace, 1); + T *indices_addr = GetPossiblyNullDeviceAddress(workspace, 0); + T *workspace_addr = GetPossiblyNullDeviceAddress(workspace, 1); const float alpha = 1; const float beta = 0; CHECK_CUDNN_RET_WITH_EXCEPT( diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h index 46cd0de59ef..2c5708c3b24 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_gpu_kernel.h @@ -46,10 +46,7 @@ class Conv2dGpuFwdKernel : public GpuKernel { T *input_addr = GetDeviceAddress(inputs, 0); T *filter_addr = GetDeviceAddress(inputs, 1); T *output_addr = GetDeviceAddress(outputs, 0); - T *workspace_addr = nullptr; - if (workspace_size_ != 0) { - workspace_addr = GetDeviceAddress(workspace, 0); - } + T *workspace_addr = GetPossiblyNullDeviceAddress(workspace, 0); const float alpha = 1; const float beta = 0; diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h index 7e5b955cc37..5ef67871fde 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_filter_gpu_kernel.h @@ -71,16 +71,13 @@ class ConvGradFilterGpuBkwKernel : public GpuKernel { T *dy = GetDeviceAddress(inputs, 0); T *x = GetDeviceAddress(inputs, 1); T *dw = GetDeviceAddress(outputs, 0); - T *work_space = nullptr; - if (workspace_size_ != 0) { - work_space = GetDeviceAddress(workspace, 0); - } + T *work_space = GetPossiblyNullDeviceAddress(workspace, 0); const float alpha = 1; const float beta = 0; if (use_pad_) { - T *padded = GetDeviceAddress(workspace, 1); + T *padded = GetPossiblyNullDeviceAddress(workspace, 1); if (data_format_ == kOpFormat_NHWC) { CalPadNHWC(padded_size_ / sizeof(T), x, n_, old_height_, old_width_, c_, old_height_ + pad_height_, old_width_ + pad_width_, pad_top_, pad_left_, pad_value_, padded, diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h index 2453d50df71..a4b64bac75c 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv2d_grad_input_gpu_kernel.h @@ -74,14 +74,11 @@ class ConvGradInputGpuBkwKernel : public GpuKernel { T *dy = GetDeviceAddress(inputs, 0); T *w = GetDeviceAddress(inputs, 1); T *dx = GetDeviceAddress(outputs, 0); - T *work_space = nullptr; - if (workspace_size_ != 0) { - work_space = GetDeviceAddress(workspace, 0); - } + T *work_space = GetPossiblyNullDeviceAddress(workspace, 0); const float alpha = 1; if (use_pad_) { - T *padded = GetDeviceAddress(workspace, 1); + T *padded = GetPossiblyNullDeviceAddress(workspace, 1); CHECK_CUDNN_RET_WITH_EXCEPT( kernel_node_, diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_gpu_kernel.h index 3c01afaa5e4..3ba26473ad1 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_gpu_kernel.h @@ -45,15 +45,12 @@ class Conv3dGpuKernel : public GpuKernel { T *input_addr = GetDeviceAddress(inputs, 0); T *filter_addr = GetDeviceAddress(inputs, 1); T *output_addr = GetDeviceAddress(outputs, 0); - T *workspace_addr = nullptr; - if (workspace_size_ != 0) { - workspace_addr = GetDeviceAddress(workspace, 0); - } + T *workspace_addr = GetPossiblyNullDeviceAddress(workspace, 0); const float alpha = 1; const float beta = 0; if (use_pad_) { - T *padded_addr = GetDeviceAddress(workspace, 1); + T *padded_addr = GetPossiblyNullDeviceAddress(workspace, 1); CalPad3d(padded_size_ / sizeof(T), input_addr, n_, c_, old_depth_, old_height_, old_width_, old_depth_ + pad_depth_, old_height_ + pad_height_, old_width_ + pad_width_, pad_head_, pad_top_, pad_left_, pad_value_, padded_addr, reinterpret_cast(stream_ptr)); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_filter_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_filter_gpu_kernel.h index 7d04d31e098..f88e58ad5e7 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_filter_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_filter_gpu_kernel.h @@ -47,10 +47,7 @@ class Conv3dGradFilterGpuKernel : public GpuKernel { T *x = GetDeviceAddress(inputs, 0); T *dy = GetDeviceAddress(inputs, 1); - T *work_space = nullptr; - if (workspace_size_ != 0) { - work_space = GetDeviceAddress(workspace, 0); - } + T *work_space = GetPossiblyNullDeviceAddress(workspace, 0); T *dw = nullptr; float *dw_float32 = nullptr; @@ -64,7 +61,7 @@ class Conv3dGradFilterGpuKernel : public GpuKernel { const float alpha = 1; const float beta = 0; if (use_pad_) { - T *padded = GetDeviceAddress(workspace, 1); + T *padded = GetPossiblyNullDeviceAddress(workspace, 1); CalPad3d(padded_size_ / sizeof(T), x, n_, c_, old_depth_, old_height_, old_width_, old_depth_ + pad_depth_, old_height_ + pad_height_, old_width_ + pad_width_, pad_head_, pad_top_, pad_left_, pad_value_, padded, reinterpret_cast(stream_ptr)); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_input_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_input_gpu_kernel.h index 15020b4edd1..5c525a3992b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_input_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_grad_input_gpu_kernel.h @@ -46,10 +46,7 @@ class Conv3dGradInputGpuKernel : public GpuKernel { T *w = GetDeviceAddress(inputs, 0); T *dy = GetDeviceAddress(inputs, 1); T *dx = GetDeviceAddress(outputs, 0); - T *work_space = nullptr; - if (workspace_size_ != 0) { - work_space = GetDeviceAddress(workspace, 0); - } + T *work_space = GetPossiblyNullDeviceAddress(workspace, 0); const float alpha = 1; if (use_pad_) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_transpose_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_transpose_gpu_kernel.h index b6c4b985b15..6f8b2970e36 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_transpose_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/conv3d_transpose_gpu_kernel.h @@ -46,14 +46,11 @@ class Conv3dTransposeGpuFwdKernel : public GpuKernel { T *input_addr = GetDeviceAddress(inputs, 0); T *filter_addr = GetDeviceAddress(inputs, 1); T *output_addr = GetDeviceAddress(outputs, 0); - T *work_space = nullptr; - if (workspace_size_ != 0) { - work_space = GetDeviceAddress(workspace, 0); - } + T *work_space = GetPossiblyNullDeviceAddress(workspace, 0); const float alpha = 1; if (use_pad_) { - T *padded = GetDeviceAddress(workspace, 1); + T *padded = GetPossiblyNullDeviceAddress(workspace, 1); CHECK_CUDNN_RET_WITH_EXCEPT(kernel_node_, cudnnConvolutionBackwardData(cudnn_handle_, &alpha, filter_desc_, filter_addr, input_desc_, input_addr, conv_desc_, algo_, work_space, diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_gpu_kernel.h index 60809ab376b..faf49129eb1 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_gpu_kernel.h @@ -75,10 +75,7 @@ class InstanceNormGpuKernel : public GpuKernel { float *ws_beta = GetDeviceAddress(workspace, 1); float *ws_mean = GetDeviceAddress(workspace, 2); float *ws_var = GetDeviceAddress(workspace, 3); - T *workspace_addr = nullptr; - if (workspace_size_ != 0) { - workspace_addr = GetDeviceAddress(workspace, 4); - } + T *workspace_addr = GetPossiblyNullDeviceAddress(workspace, 4); size_t N = input_shape_[0]; size_t C = input_shape_[1]; diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_grad_gpu_kernel.h index c1741ed1322..d4dfbbab94b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_grad_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/instance_norm_grad_gpu_kernel.h @@ -78,10 +78,7 @@ class InstanceNormGradGpuKernel : public GpuKernel { float *ws_gamma = GetDeviceAddress(workspace, 0); float *ws_dgamma = GetDeviceAddress(workspace, 1); float *ws_dbeta = GetDeviceAddress(workspace, 2); - void *workspace_addr = nullptr; - if (workspace_size_ != 0) { - workspace_addr = GetDeviceAddress(workspace, 3); - } + void *workspace_addr = GetPossiblyNullDeviceAddress(workspace, 3); size_t N = input_shape_[0]; size_t C = input_shape_[1]; diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_gpu_kernel.h index 02c3586a603..31074c7f399 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_gpu_kernel.h @@ -59,8 +59,8 @@ class L2NormalizeGpuKernel : public GpuKernel { } T *input_addr = GetDeviceAddress(inputs, 0); T *output_addr = GetDeviceAddress(outputs, 0); - T *reduce_workspace_addr = GetDeviceAddress(workspace, 0); - T *workspace_addr = GetDeviceAddress(workspace, 1); + T *reduce_workspace_addr = GetPossiblyNullDeviceAddress(workspace, 0); + T *workspace_addr = GetPossiblyNullDeviceAddress(workspace, 1); const float alpha = 1; const float beta = 0; diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_grad_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_grad_gpu_kernel.h index e1a9598cada..e4221c11cb6 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_grad_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/l2normalize_grad_gpu_kernel.h @@ -62,10 +62,10 @@ class L2NormalizeGradGpuKernel : public GpuKernel { T *y_addr = GetDeviceAddress(inputs, 1); T *dy_addr = GetDeviceAddress(inputs, 2); T *dx_addr = GetDeviceAddress(outputs, 0); - T *reduce_workspace_addr = GetDeviceAddress(workspace, 0); - T *reduce_y_dy_workspace_addr = GetDeviceAddress(workspace, 1); - T *workspace_addr = GetDeviceAddress(workspace, 2); - T *workspace_y_dy_addr = GetDeviceAddress(workspace, 3); + T *reduce_workspace_addr = GetPossiblyNullDeviceAddress(workspace, 0); + T *reduce_y_dy_workspace_addr = GetPossiblyNullDeviceAddress(workspace, 1); + T *workspace_addr = GetPossiblyNullDeviceAddress(workspace, 2); + T *workspace_y_dy_addr = GetPossiblyNullDeviceAddress(workspace, 3); const float alpha = 1; const float beta = 0; diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.h index 8d2c0c073db..537ef654057 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_gpu_kernel.h @@ -70,7 +70,7 @@ class LstmGpuKernel : public GpuKernel { auto cy_addr = GetDeviceAddress(outputs, 2); auto reserved_addr = GetDeviceAddress(outputs, 3); auto states_addr = GetDeviceAddress(outputs, 4); - void *workspace_addr = GetDeviceAddress(workspace, 0); + void *workspace_addr = GetPossiblyNullDeviceAddress(workspace, 0); if (!states_init_) { CHECK_CUDNN_RET_WITH_EXCEPT( diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.h index 30e454a08be..4ca32d73634 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/lstm_grad_data_gpu_kernel.h @@ -75,7 +75,7 @@ class LstmGradDataGpuKernel : public GpuKernel { auto dx_addr = GetDeviceAddress(outputs, 0); auto dhx_addr = GetDeviceAddress(outputs, 1); auto dcx_addr = GetDeviceAddress(outputs, 2); - void *workspace_addr = GetDeviceAddress(workspace, 0); + void *workspace_addr = GetPossiblyNullDeviceAddress(workspace, 0); if (!states_init_) { CHECK_CUDNN_RET_WITH_EXCEPT( diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/nll_loss_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/nll_loss_gpu_kernel.h index f3ab1a2b90e..da2edb2178f 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/nll_loss_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/nn/nll_loss_gpu_kernel.h @@ -44,7 +44,7 @@ class NLLLossGpuKernel : public GpuKernel { T *loss_device = GetDeviceAddress(outputs, 0); S *total_weight_device = GetDeviceAddress(outputs, 1); - T *tmp_loss_device = GetDeviceAddress(workspace, 0); + T *tmp_loss_device = GetPossiblyNullDeviceAddress(workspace, 0); S *tmp_target_weight_device = GetDeviceAddress(workspace, 1); NLLLoss(n_, c_, reduction_, input_device, target_device, weight_device, loss_device, total_weight_device, diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.cc index 06a329dc2d0..73f8c19639d 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/other/assign_gpu_kernel.cc @@ -36,5 +36,8 @@ MS_REG_GPU_KERNEL_ONE( MS_REG_GPU_KERNEL_ONE( Assign, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), AssignGpuKernel, int64_t) +MS_REG_GPU_KERNEL_ONE( + Assign, KernelAttr().AddInputAttr(kNumberTypeUInt64).AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64), + AssignGpuKernel, unsigned int) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.cc index 9a54908849f..dcd61375060 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/sponge/neighbor_list/neighbor_list_update_new_kernel.cc @@ -18,7 +18,7 @@ namespace mindspore { namespace kernel { -MS_REG_GPU_KERNEL_TWO(NeighborListUpdateNew, +MS_REG_GPU_KERNEL_TWO(NeighborListUpdate, KernelAttr() .AddInputAttr(kNumberTypeInt32) .AddInputAttr(kNumberTypeInt32) diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.cc index 01ba0fdae9f..77edb57eaba 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.cc @@ -47,11 +47,11 @@ std::string MsOpNameToHcomOpType(const std::string &ms_op_type) { namespace mindspore { namespace kernel { void HcclKernelFactory::Register(const std::string &name, HcclKernelCreater &&fun) { - hcclKernelMap_.emplace(name, std::move(fun)); + hccl_kernel_map_.emplace(name, fun); } std::shared_ptr HcclKernelFactory::Get(const std::string &name) { - const auto &map = Get().hcclKernelMap_; + const auto &map = Get().hccl_kernel_map_; auto it = map.find(name); if (it != map.end() && it->second) { return (it->second)(); @@ -64,14 +64,15 @@ HcclKernelFactory &HcclKernelFactory::Get() { return _this; } -HcclKernel::HcclKernel() : hccl_count_(0), op_type_(HCCL_REDUCE_SUM), root_id_(0) {} +HcclKernel::HcclKernel() + : hccl_count_(0), op_type_(::HcclReduceOp::HCCL_REDUCE_SUM), root_id_(0), src_rank_(0), dest_rank_(0) {} HcclKernel::~HcclKernel() { hccl_kernel_input_shape_list_.clear(); hccl_kernel_output_shape_list_.clear(); hccl_data_type_list_.clear(); hccl_count_ = 0; - op_type_ = HCCL_REDUCE_SUM; + op_type_ = ::HcclReduceOp::HCCL_REDUCE_SUM; root_id_ = 0; input_size_list_.clear(); output_size_list_.clear(); @@ -81,6 +82,18 @@ HcclKernel::~HcclKernel() { bool HcclKernel::Init(const AnfNodePtr &anf_node) { MS_EXCEPTION_IF_NULL(anf_node); op_name_ = AnfAlgo::GetCNodeName(anf_node); + if (op_name_ == kHcomSend) { + if (!HcomUtil::GetHcomDestRank(anf_node, &dest_rank_)) { + MS_LOG(ERROR) << "GetHcomDestRank fail!"; + return false; + } + } + if (op_name_ == kReceive) { + if (!HcomUtil::GetHcomSrcRank(anf_node, &src_rank_)) { + MS_LOG(ERROR) << "GetHcomSrcRank fail!"; + return false; + } + } if (!HcomUtil::GetKernelInputShape(anf_node, &hccl_kernel_input_shape_list_)) { MS_LOG(ERROR) << "GetKernelInputShape fail!"; return false; @@ -126,6 +139,10 @@ const std::vector &HcclKernel::GetInputSizeList() const { if (!input_size_list_.empty()) { return input_size_list_; } + if (hccl_data_type_list_.size() != hccl_kernel_input_shape_list_.size()) { + MS_LOG(EXCEPTION) << "Invalid data type size " << hccl_data_type_list_.size() << " diff shape size " + << hccl_kernel_input_shape_list_.size(); + } for (ulong i = 0; i < hccl_data_type_list_.size(); ++i) { if (!HcomUtil::GetHcclOpSize(hccl_data_type_list_[i], hccl_kernel_input_shape_list_[i], &size)) { MS_LOG(ERROR) << "GetHcclOpInputSize failed"; @@ -145,6 +162,7 @@ const std::vector &HcclKernel::GetOutputSizeList() const { return output_size_list_; } auto cnode = anf_node->cast(); + MS_EXCEPTION_IF_NULL(cnode); auto op_name = AnfAlgo::GetCNodeName(cnode); int64_t rank_size = 1; if (AnfAlgo::HasNodeAttr(kAttrRankSize, cnode)) { @@ -154,6 +172,10 @@ const std::vector &HcclKernel::GetOutputSizeList() const { if (AnfAlgo::HasNodeAttr(kAttrFusion, cnode)) { fusion = AnfAlgo::GetNodeAttr(cnode, kAttrFusion); } + if (hccl_data_type_list_.size() != hccl_kernel_input_shape_list_.size()) { + MS_LOG(EXCEPTION) << "Invalid data type size " << hccl_data_type_list_.size() << " diff shape size " + << hccl_kernel_input_shape_list_.size(); + } ulong loop_size = hccl_data_type_list_.size(); if (AnfAlgo::GetInputTensorNum(anf_node) > 1 && op_name == kAllGatherOpName && fusion >= 1) { loop_size *= static_cast(rank_size); @@ -171,10 +193,13 @@ const std::vector &HcclKernel::GetOutputSizeList() const { } const std::vector &HcclKernel::GetWorkspaceSizeList() const { - if (!workspace_size_list_.empty() || hccl_data_type_list_.empty()) { + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + bool is_task_sink = context_ptr->get_param(MS_CTX_ENABLE_TASK_SINK); + auto mode = context_ptr->get_param(MS_CTX_EXECUTION_MODE); + if (!workspace_size_list_.empty() || hccl_data_type_list_.empty() || (!is_task_sink && mode == kGraphMode)) { return workspace_size_list_; } - workspace_size_list_.emplace_back( hccl::HcclAdapter::GetInstance().CalcWorkspaceSize(anf_node_.lock(), hccl_data_type_list_[0])); return workspace_size_list_; @@ -204,6 +229,9 @@ std::vector HcclKernel::GenTask(const std::vector &inpu MS_EXCEPTION_IF_NULL(outputs.at(0)); auto output_data_addr = outputs.at(0)->addr; std::vector private_def; + if (hccl_data_type_list_.empty()) { + MS_LOG(EXCEPTION) << "Hccl data type list is empty"; + } HcclDataType data_type = hccl_data_type_list_[0]; std::vector task_info; bool ret = hccl::HcclAdapter::GetInstance().GenTask(anf_node, data_type, &task_info); diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.h index 7cf960dcad1..493ca33fc8e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel.h @@ -51,6 +51,8 @@ class HcclKernel : public AscendKernelMod { uint64_t hccl_count_; HcclReduceOp op_type_; uint32_t root_id_; + uint32_t src_rank_; + uint32_t dest_rank_; mutable std::vector input_size_list_; mutable std::vector output_size_list_; mutable std::vector workspace_size_list_; @@ -71,7 +73,7 @@ class HcclKernelFactory { static std::shared_ptr Get(const string &name); private: - std::map hcclKernelMap_; + std::map hccl_kernel_map_; }; class _HcclKernelRegister { diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.cc index d9a46984163..7b94ca5e659 100755 --- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hccl_kernel_metadata.cc @@ -30,7 +30,9 @@ namespace { constexpr size_t N_nchw = 0; constexpr size_t C_nchw = 1; std::string GetKernelFormat(const CNodePtr &kernel_node, size_t index) { - const std::set kReduceNoSupportedSet = {kOpFormat_FRAC_Z, kOpFormat_FRACTAL_Z_C04, kOpFormat_C1HWNCoC0}; + static const std::set kReduceNoSupportedSet = {kOpFormat_FRAC_Z, kOpFormat_FRACTAL_Z_C04, + kOpFormat_C1HWNCoC0}; + MS_EXCEPTION_IF_NULL(kernel_node); auto op_name = AnfAlgo::GetCNodeName(kernel_node); auto parallel_context_instance = parallel::ParallelContext::GetInstance(); MS_EXCEPTION_IF_NULL(parallel_context_instance); @@ -61,8 +63,8 @@ std::string GetKernelFormat(const CNodePtr &kernel_node, size_t index) { } } // namespace void HcclMetadataInfo(const CNodePtr &kernel_node, std::vector> *kernel_info_list) { - const std::vector kHcclSupportTypes = {kNumberTypeInt8, kNumberTypeInt32, kNumberTypeFloat16, - kNumberTypeFloat32, kNumberTypeInt16}; + static const std::vector kHcclSupportTypes = {kNumberTypeInt8, kNumberTypeInt32, kNumberTypeFloat16, + kNumberTypeFloat32, kNumberTypeInt16}; MS_EXCEPTION_IF_NULL(kernel_info_list); MS_EXCEPTION_IF_NULL(kernel_node); std::string op_name = AnfAlgo::GetCNodeName(kernel_node); @@ -76,7 +78,7 @@ void HcclMetadataInfo(const CNodePtr &kernel_node, std::vector &inputs, const std::vector &, const std::vector &, void *stream_ptr) { + MS_LOG(DEBUG) << "HcomAllBroadCast launch"; if (inputs.empty() || hccl_data_type_list_.empty()) { MS_LOG(ERROR) << "BroadCast param is empty"; return false; diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.cc index 99c57736b52..014323de5c4 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.cc @@ -16,13 +16,27 @@ #include "backend/kernel_compiler/hccl/hcom_all_gather.h" #include -#include "utils/ms_context.h" +#include "runtime/hccl_adapter/hccl_adapter.h" namespace mindspore { namespace kernel { -bool HcomAllGatherKernel::Launch(const std::vector &, const std::vector &, - const std::vector &, void *) { - MS_LOG(INFO) << "HcomAllGather launch"; +bool HcomAllGatherKernel::Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs, void *stream_ptr) { + MS_LOG(DEBUG) << "HcomAllGather launch"; + if (inputs.empty() || outputs.empty() || hccl_data_type_list_.empty()) { + MS_LOG(ERROR) << "Invalid AllGather input, output or data type size(" << inputs.size() << ", " << outputs.size() + << ", " << hccl_data_type_list_.size() << ")."; + return false; + } + MS_EXCEPTION_IF_NULL(inputs[0]); + MS_EXCEPTION_IF_NULL(outputs[0]); + MS_EXCEPTION_IF_NULL(stream_ptr); + auto hccl_result = hccl::HcclAdapter::GetInstance().HcclAllGather(inputs[0]->addr, outputs[0]->addr, hccl_count_, + hccl_data_type_list_[0], stream_ptr, group_); + if (hccl_result != HCCL_SUCCESS) { + MS_LOG(ERROR) << "HcclAllGather faled, ret:" << hccl_result; + return false; + } return true; } } // namespace kernel diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.h index 36a11d70c42..c729de94e6b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.h +++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_gather.h @@ -19,7 +19,6 @@ #include #include -#include "hccl/hcom.h" #include "backend/kernel_compiler/hccl/hccl_kernel.h" namespace mindspore { diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.cc index fbf95b00fb2..dcafbb0e1de 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce.cc @@ -22,16 +22,17 @@ namespace mindspore { namespace kernel { bool HcomAllReduceKernel::Launch(const std::vector &inputs, const std::vector &, const std::vector &outputs, void *stream_ptr) { - MS_LOG(INFO) << "HcclAllReduce launch"; - if (inputs.empty() || outputs.empty()) { - MS_LOG(ERROR) << "Invalid AllReduce input output size(" << inputs.size() << ", " << outputs.size() << ")."; + MS_LOG(DEBUG) << "HcclAllReduce launch"; + if (inputs.empty() || outputs.empty() || hccl_data_type_list_.empty()) { + MS_LOG(ERROR) << "Invalid AllReduce input, output or data type size (" << inputs.size() << ", " << outputs.size() + << ", " << hccl_data_type_list_.size() << ")."; return false; } MS_EXCEPTION_IF_NULL(inputs[0]); MS_EXCEPTION_IF_NULL(outputs[0]); MS_EXCEPTION_IF_NULL(stream_ptr); - auto hccl_result = hccl::HcclAdapter::GetInstance().HcclAllReduce(inputs[0]->addr, outputs[0]->addr, hccl_count_, - hccl_data_type_list_[0], op_type_, stream_ptr); + auto hccl_result = hccl::HcclAdapter::GetInstance().HcclAllReduce( + inputs[0]->addr, outputs[0]->addr, hccl_count_, hccl_data_type_list_[0], op_type_, stream_ptr, group_); if (hccl_result != HCCL_SUCCESS) { MS_LOG(ERROR) << "HcclAllReduce faled, ret:" << hccl_result; return false; diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.cc index 557022bebd7..ad45b54046e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.cc @@ -16,13 +16,27 @@ #include "backend/kernel_compiler/hccl/hcom_all_reduce_scatter.h" #include -#include "utils/ms_context.h" +#include "runtime/hccl_adapter/hccl_adapter.h" namespace mindspore { namespace kernel { -bool HcomAllReduceScatterKernel::Launch(const std::vector &, const std::vector &, - const std::vector &, void *) { - MS_LOG(INFO) << "HcomAllReduceScatter launch"; +bool HcomAllReduceScatterKernel::Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs, void *stream_ptr) { + MS_LOG(DEBUG) << "HcomAllReduceScatter launch"; + if (inputs.empty() || outputs.empty() || hccl_data_type_list_.empty()) { + MS_LOG(ERROR) << "Invalid AllReduceScatter input, output or data type size(" << inputs.size() << ", " + << outputs.size() << ", " << hccl_data_type_list_.size() << ")."; + return false; + } + MS_EXCEPTION_IF_NULL(inputs[0]); + MS_EXCEPTION_IF_NULL(outputs[0]); + MS_EXCEPTION_IF_NULL(stream_ptr); + auto hccl_result = hccl::HcclAdapter::GetInstance().HcclReduceScatter( + inputs[0]->addr, outputs[0]->addr, hccl_count_, hccl_data_type_list_[0], op_type_, stream_ptr, group_); + if (hccl_result != HCCL_SUCCESS) { + MS_LOG(ERROR) << "HcclReduceScatter faled, ret:" << hccl_result; + return false; + } return true; } } // namespace kernel diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.h index 987982a73c8..fcddfa34e8d 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.h +++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_all_reduce_scatter.h @@ -19,7 +19,6 @@ #include #include -#include "hccl/hcom.h" #include "backend/kernel_compiler/hccl/hccl_kernel.h" namespace mindspore { diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_receive.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_receive.cc index 2b49199b7ef..1f0fbda2101 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_receive.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_receive.cc @@ -16,12 +16,26 @@ #include "backend/kernel_compiler/hccl/hcom_receive.h" #include -#include "utils/ms_context.h" +#include "runtime/hccl_adapter/hccl_adapter.h" + namespace mindspore { namespace kernel { bool HcomReceiveKernel::Launch(const std::vector &, const std::vector &, - const std::vector &, void *) { - MS_LOG(INFO) << "HcomReceive launch"; + const std::vector &outputs, void *stream_ptr) { + MS_LOG(DEBUG) << "HcomReceive launch"; + if (outputs.empty() || hccl_data_type_list_.empty()) { + MS_LOG(ERROR) << "Invalid HcomReceive outputs size or data type size (" << outputs.size() << ", " + << hccl_data_type_list_.size() << ")."; + return false; + } + MS_EXCEPTION_IF_NULL(outputs[0]); + MS_EXCEPTION_IF_NULL(stream_ptr); + auto hccl_result = hccl::HcclAdapter::GetInstance().HcclRecv(outputs[0]->addr, hccl_count_, hccl_data_type_list_[0], + src_rank_, stream_ptr, group_); + if (hccl_result != HCCL_SUCCESS) { + MS_LOG(ERROR) << "HcomReceive failed, ret:" << hccl_result; + return false; + } return true; } } // namespace kernel diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_send.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_send.cc index 9951cdeb61f..2349e363323 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_send.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_send.cc @@ -16,13 +16,26 @@ #include "backend/kernel_compiler/hccl/hcom_send.h" #include -#include "utils/ms_context.h" +#include "runtime/hccl_adapter/hccl_adapter.h" namespace mindspore { namespace kernel { -bool HcomSendKernel::Launch(const std::vector &, const std::vector &, - const std::vector &, void *) { - MS_LOG(INFO) << "HcomSend launch"; +bool HcomSendKernel::Launch(const std::vector &inputs, const std::vector &, + const std::vector &, void *stream_ptr) { + MS_LOG(DEBUG) << "HcomSend launch"; + if (inputs.empty() || hccl_data_type_list_.empty()) { + MS_LOG(ERROR) << "Invalid HcomSend input size or data type size (" << inputs.size() << ", " + << hccl_data_type_list_.size() << ")."; + return false; + } + MS_EXCEPTION_IF_NULL(inputs[0]); + MS_EXCEPTION_IF_NULL(stream_ptr); + auto hccl_result = hccl::HcclAdapter::GetInstance().HcclSend(inputs[0]->addr, hccl_count_, hccl_data_type_list_[0], + dest_rank_, stream_ptr, group_); + if (hccl_result != HCCL_SUCCESS) { + MS_LOG(ERROR) << "HcomSend faled, ret:" << hccl_result; + return false; + } return true; } } // namespace kernel diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.cc b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.cc index d5814fcbfe6..033f20ee234 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.cc @@ -22,11 +22,13 @@ #include "utils/utils.h" namespace mindspore { +namespace { bool IsPyNativeMode() { auto ms_context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(ms_context); return ms_context->get_param(MS_CTX_EXECUTION_MODE) == kPynativeMode; } +} // namespace bool HcomUtil::GetKernelInputShape(const AnfNodePtr &anf_node, vector> *hccl_kernel_intput_shape_list) { MS_EXCEPTION_IF_NULL(anf_node); @@ -67,8 +69,8 @@ bool HcomUtil::GetHcomDataType(const AnfNodePtr &anf_node, vector } else { type_ptr = AnfAlgo::GetInputDeviceDataType(anf_node, i); } - auto iter = CONST_OP_HCOM_DATA_TYPE_MAP.find(type_ptr); - if (iter == CONST_OP_HCOM_DATA_TYPE_MAP.end()) { + auto iter = kConstOpHcomDataTypeMap.find(type_ptr); + if (iter == kConstOpHcomDataTypeMap.end()) { MS_LOG(EXCEPTION) << "HcomDataType can't support Current Ascend Data Type : " << type_ptr; } data_type_list->emplace_back(iter->second); @@ -102,8 +104,8 @@ bool HcomUtil::GetHcclOpSize(const HcclDataType &data_type, const vector bool HcomUtil::GetHcomTypeSize(const HcclDataType &data_type, uint32_t *size) { MS_EXCEPTION_IF_NULL(size); - auto iter = CONST_OP_HCOM_DATA_TYPE_SIZE_MAP.find(data_type); - if (iter == CONST_OP_HCOM_DATA_TYPE_SIZE_MAP.end()) { + auto iter = kConstOpHcomDataTypeSizeMap.find(data_type); + if (iter == kConstOpHcomDataTypeSizeMap.end()) { MS_LOG(ERROR) << "HcomUtil::HcomDataTypeSize, No DataTypeSize!"; return false; } @@ -123,6 +125,7 @@ bool HcomUtil::GetHcomCount(const AnfNodePtr &anf_node, const vectorcast(); + MS_EXCEPTION_IF_NULL(cnode); if (AnfAlgo::GetCNodeName(anf_node) == kReceiveOpName) { size = AnfAlgo::GetOutputTensorNum(anf_node); } @@ -140,8 +143,8 @@ bool HcomUtil::GetHcomCount(const AnfNodePtr &anf_node, const vectorGetAttr("rank_size") != nullptr) { - rank_size = GetValue(primitive->GetAttr("rank_size")); + if (primitive->GetAttr(kAttrRankSize) != nullptr) { + rank_size = GetValue(primitive->GetAttr(kAttrRankSize)); } else { MS_LOG(ERROR) << "Get rank size failed"; return false; @@ -181,11 +184,11 @@ bool HcomUtil::GetHcomOperationType(const AnfNodePtr &anf_node, HcclReduceOp *op MS_EXCEPTION_IF_NULL(op_type); auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); MS_EXCEPTION_IF_NULL(primitive); - if (primitive->GetAttr("op") == nullptr) { + if (primitive->GetAttr(kAttrOp) == nullptr) { MS_LOG(ERROR) << "Get HCOM_ATTR_REDUCE_TYPE fail, not support!"; return false; } - auto hcom_op_type = GetValue(primitive->GetAttr("op")); + auto hcom_op_type = GetValue(primitive->GetAttr(kAttrOp)); if (hcom_op_type == "min") { *op_type = HCCL_REDUCE_MIN; } else if (hcom_op_type == "max") { @@ -206,8 +209,8 @@ bool HcomUtil::GetHcomRootId(const AnfNodePtr &anf_node, uint32_t *root_id) { MS_EXCEPTION_IF_NULL(root_id); auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); MS_EXCEPTION_IF_NULL(primitive); - if (primitive->GetAttr("root_rank") != nullptr) { - *root_id = (uint32_t)GetValue(primitive->GetAttr("root_rank")); + if (primitive->GetAttr(kAttrRootRank) != nullptr) { + *root_id = (uint32_t)GetValue(primitive->GetAttr(kAttrRootRank)); } else { MS_LOG(ERROR) << "HcomUtil::Get HCOM_ATTR_ROOT_INDEX fail, not support!"; return false; @@ -215,6 +218,34 @@ bool HcomUtil::GetHcomRootId(const AnfNodePtr &anf_node, uint32_t *root_id) { return true; } +bool HcomUtil::GetHcomSrcRank(const AnfNodePtr &anf_node, uint32_t *src_rank) { + MS_EXCEPTION_IF_NULL(anf_node); + MS_EXCEPTION_IF_NULL(src_rank); + auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); + MS_EXCEPTION_IF_NULL(primitive); + if (primitive->GetAttr("src_rank") != nullptr) { + *src_rank = static_cast(GetValue(primitive->GetAttr("src_rank"))); + } else { + MS_LOG(ERROR) << "HcomUtil::Get HCOM_ATTR_SRC_RANK fail, not support!"; + return false; + } + return true; +} + +bool HcomUtil::GetHcomDestRank(const AnfNodePtr &anf_node, uint32_t *dest_rank) { + MS_EXCEPTION_IF_NULL(anf_node); + MS_EXCEPTION_IF_NULL(dest_rank); + auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); + MS_EXCEPTION_IF_NULL(primitive); + if (primitive->GetAttr("dest_rank") != nullptr) { + *dest_rank = static_cast(GetValue(primitive->GetAttr("dest_rank"))); + } else { + MS_LOG(ERROR) << "HcomUtil::Get HCOM_ATTR_DEST_RANK fail, not support!"; + return false; + } + return true; +} + bool HcomUtil::GetHcomReceiveType(const AnfNodePtr &anf_node, TypeId *receive_type) { MS_EXCEPTION_IF_NULL(anf_node); MS_EXCEPTION_IF_NULL(receive_type); @@ -232,7 +263,7 @@ bool HcomUtil::GetHcomReceiveType(const AnfNodePtr &anf_node, TypeId *receive_ty void HcomUtil::GetHcomGroup(NotNull anf_node, NotNull group) { auto primitive = AnfAlgo::GetCNodePrimitive(anf_node); MS_EXCEPTION_IF_NULL(primitive); - auto attr = primitive->GetAttr("group"); + auto attr = primitive->GetAttr(kAttrGroup); if (attr != nullptr) { *group = GetValue(attr); } else { diff --git a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.h b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.h index 13427e852b7..c08c6762386 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.h +++ b/mindspore/ccsrc/backend/kernel_compiler/hccl/hcom_util.h @@ -40,7 +40,7 @@ constexpr auto kReduceScatter = "ReduceScatter"; constexpr auto kAllToAllv = "AllToAllv"; /* Correspondence between data_type and hcom data type in Ascend */ -static map CONST_OP_HCOM_DATA_TYPE_MAP = { +static map kConstOpHcomDataTypeMap = { {TypeId::kNumberTypeFloat32, HCCL_DATA_TYPE_FP32}, {TypeId::kNumberTypeFloat16, HCCL_DATA_TYPE_FP16}, {TypeId::kNumberTypeInt8, HCCL_DATA_TYPE_INT8}, @@ -48,7 +48,7 @@ static map CONST_OP_HCOM_DATA_TYPE_MAP = { }; /* Correspondence between data_type and occupied byte size in hcom */ -static map CONST_OP_HCOM_DATA_TYPE_SIZE_MAP = { +static map kConstOpHcomDataTypeSizeMap = { {HCCL_DATA_TYPE_FP32, sizeof(float)}, {HCCL_DATA_TYPE_FP16, sizeof(float) / 2}, {HCCL_DATA_TYPE_INT8, sizeof(int8_t)}, @@ -66,6 +66,8 @@ class HcomUtil { const vector> &shape_list, uint64_t *total_count); static bool GetHcomOperationType(const AnfNodePtr &anf_node, HcclReduceOp *op_type); static bool GetHcomRootId(const AnfNodePtr &anf_node, uint32_t *root_id); + static bool GetHcomSrcRank(const AnfNodePtr &anf_node, uint32_t *src_rank); + static bool GetHcomDestRank(const AnfNodePtr &anf_node, uint32_t *dest_rank); static void GetHcomGroup(NotNull anf_node, NotNull group); static bool GetHcomReceiveType(const AnfNodePtr &anf_node, TypeId *receive_type); }; diff --git a/mindspore/ccsrc/backend/kernel_compiler/kash/kernel_pack.cc b/mindspore/ccsrc/backend/kernel_compiler/kash/kernel_pack.cc index 9651eea3e69..88442c17511 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/kash/kernel_pack.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/kash/kernel_pack.cc @@ -118,6 +118,16 @@ bool KernelPack::ReadFromJsonFile(const std::string &json_f, const std::string & if (!CheckHash(json_f, bin_f, js)) { return false; } + + // cuda json file may have workspace information + if (js.find("workspace") != js.end()) { + auto workspace = js.at("workspace"); + std::vector sizes = workspace.at("size"); + for (auto size : sizes) { + kernel_json_info_.workspaces.push_back(size); + } + } + return true; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc b/mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc index 533d8660685..dee08117266 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/kernel_query.cc @@ -112,6 +112,12 @@ void KernelQuery(const CNodePtr &kernel_node, std::vector("LoadIm2Col"); + if (IsPrimitiveCNode(kernel_node, kPrimLoadIm2Col)) { + kernel_type = KernelType::AKG_KERNEL; + } // use LoadIm2Col only for THOR optimizer + switch (kernel_type) { case KernelType::AKG_KERNEL: AkgMetadataInfo(kernel_node, kernel_info_list); diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.cc index 44902348473..27861c773c9 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.cc @@ -372,7 +372,7 @@ std::shared_ptr TbeDynamicShapeUtil::FindOp(const std::string &op_name, RangePair TbeDynamicShapeUtil::GetInputDynamicRange(const AnfNodePtr &anf_node, size_t index, const std::string &def_format) { MS_EXCEPTION_IF_NULL(anf_node); - auto kernel_info = static_cast(anf_node->kernel_info()); + auto kernel_info = dynamic_cast(anf_node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto format = kernel_info->select_kernel_build_info() == nullptr ? def_format : AnfAlgo::GetInputFormat(anf_node, index); @@ -396,7 +396,7 @@ RangePair TbeDynamicShapeUtil::GetInputDynamicRange(const AnfNodePtr &anf_node, RangePair TbeDynamicShapeUtil::GetOutputDynamicRange(const AnfNodePtr &anf_node, size_t index, const std::string &def_format) { MS_EXCEPTION_IF_NULL(anf_node); - auto kernel_info = static_cast(anf_node->kernel_info()); + auto kernel_info = dynamic_cast(anf_node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto format = kernel_info->select_kernel_build_info() == nullptr ? def_format : AnfAlgo::GetOutputFormat(anf_node, index); diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.cc b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.cc index 6d230e078b8..f194b8f2a81 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.cc @@ -192,7 +192,7 @@ bool TbeJsonCreator::GenComputeJson(const AnfNodePtr &anf_node, nlohmann::json * void TbeJsonCreator::GenFusionOpName(nlohmann::json *kernel_json, std::string prefix) { json_name_.clear(); - size_t hash_id = GenJsonHash((*kernel_json)); + json_hash_ = GenJsonHash((*kernel_json)); auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); json_name_ = std::move(prefix); @@ -203,7 +203,7 @@ void TbeJsonCreator::GenFusionOpName(nlohmann::json *kernel_json, std::string pr json_name_.append("_"); } } - json_name_ = json_name_ + std::to_string(hash_id) + "_" + std::to_string(device_id); + json_name_ = json_name_ + std::to_string(json_hash_) + "_" + std::to_string(device_id); MS_LOG(DEBUG) << "Generate Json name: " << json_name_; (*kernel_json)[kJFusionOpName] = json_name_; } @@ -231,7 +231,7 @@ size_t TbeJsonCreator::GenJsonHash(nlohmann::json tbe_json) { DeleteDescName(&op.at(kJInputDesc)); } } - return std::hash()(tbe_json.dump()); + return std::hash()(op_lists.dump()); } void TbeJsonCreator::AddOpNameForComputeNode(nlohmann::json *kernel_json) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.h b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.h index 83c3bfdc90f..e71838dfa0e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.h +++ b/mindspore/ccsrc/backend/kernel_compiler/tbe/tbe_json/tbe_json_creator.h @@ -48,6 +48,7 @@ class TbeJsonCreator { virtual bool GenJson(const AnfNodePtr &anf_node, nlohmann::json *kernel_json) { return false; } virtual bool GenJson(const FusionScopeInfo &fusion_scope_info, nlohmann::json *fusion_json) { return false; } std::string GetJsonName() { return json_name_; } + size_t GetJsonHash() { return json_hash_; } protected: bool GenComputeJson(const AnfNodePtr &anf_node, nlohmann::json *compute_json); @@ -72,6 +73,7 @@ class TbeJsonCreator { private: std::string json_name_; + size_t json_hash_; }; } // namespace mindspore::kernel diff --git a/mindspore/ccsrc/backend/optimizer/ascend/enhancer/concat_outputs_for_all_gather.cc b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/concat_outputs_for_all_gather.cc index 09aa7ea04f9..e7538b6fc04 100644 --- a/mindspore/ccsrc/backend/optimizer/ascend/enhancer/concat_outputs_for_all_gather.cc +++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/concat_outputs_for_all_gather.cc @@ -33,7 +33,7 @@ OutputInfo GetNodeOutputInfo(const AnfNodePtr &node) { auto type_ptr = node->Type(); auto shape_ptr = node->Shape(); size_t output_num = AnfAlgo::GetOutputTensorNum(node); - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto build_info = kernel_info->select_kernel_build_info(); MS_EXCEPTION_IF_NULL(build_info); diff --git a/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.cc b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.cc index fd66dd7ecc3..07957ee3334 100644 --- a/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.cc +++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.cc @@ -27,7 +27,7 @@ namespace opt { namespace { // insert tensormove for some cnode even if not a Ref cnode const std::set kNeedInsertTensorMoveOpSet = {kLambNextMVOpName, kLambNextMVWithDecayOpName, - kLambUpdateWithLROpName}; + kLambUpdateWithLROpName, kGetNextOpName}; bool IsParameterOrValueNode(const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node); @@ -41,15 +41,18 @@ bool IsParameterOrValueNode(const AnfNodePtr &node) { } // NodeUsersMap, for node B input i use node A, it will be one item in map with key: A, and value: (B, i) -bool IsNodeOutPutUsedByOtherRealKernel(const AnfNodeIndexSet &node_users) { +bool IsNodeOutPutUsedByOtherRealKernel(const AnfNodeIndexSet &node_users, const CNodePtr &known_user, + size_t known_index) { if (node_users.size() == 1) { MS_LOG(INFO) << "This node only used once, no need to insert tensormove node."; return false; } for (const auto &node_pair : node_users) { - auto node = node_pair.first; - if (AnfAlgo::IsRealKernel(node) && !AnfAlgo::IsCommunicationOp(node)) { - MS_LOG(INFO) << "This node only used other real kernel: " << node->fullname_with_scope(); + auto &node = node_pair.first; + size_t idx = IntToSize(node_pair.second); + if (AnfAlgo::IsRealKernel(node) && !(known_user == node && known_index == idx)) { + MS_LOG(INFO) << "User " << node->DebugString() << " idx " << idx << " is real kernel and diff with known " + << known_user->DebugString() << " idx " << known_index; return true; } } @@ -58,11 +61,13 @@ bool IsNodeOutPutUsedByOtherRealKernel(const AnfNodeIndexSet &node_users) { } } // namespace -bool InsertTensorMoveForHcclOp::NeedInsertTensorMove(const FuncGraphPtr &graph, const AnfNodePtr &input, - const CNodePtr &cur_node) const { +bool InsertTensorMoveForHcclOp::NeedInsertTensorMove(const FuncGraphPtr &graph, const CNodePtr &cur_node, + size_t input_idx) const { MS_EXCEPTION_IF_NULL(graph); - MS_EXCEPTION_IF_NULL(input); MS_EXCEPTION_IF_NULL(cur_node); + auto input = cur_node->input(input_idx); + MS_EXCEPTION_IF_NULL(input); + if (IsPrimitiveCNode(cur_node, prim::kPrimReceive)) { return false; } @@ -81,9 +86,10 @@ bool InsertTensorMoveForHcclOp::NeedInsertTensorMove(const FuncGraphPtr &graph, if (kernel_query_->IsTbeRef(input)) { return true; } - + auto kernel_with_index = AnfAlgo::VisitKernelWithReturnType(input, 0, true); + auto real_node = kernel_with_index.first; // when input is some special cnodes - if (kNeedInsertTensorMoveOpSet.find(AnfAlgo::GetCNodeName(input)) != kNeedInsertTensorMoveOpSet.end()) { + if (kNeedInsertTensorMoveOpSet.find(AnfAlgo::GetCNodeName(real_node)) != kNeedInsertTensorMoveOpSet.end()) { return true; } @@ -93,7 +99,7 @@ bool InsertTensorMoveForHcclOp::NeedInsertTensorMove(const FuncGraphPtr &graph, MS_LOG(EXCEPTION) << "node has no output in manager" << " trace: " << trace::DumpSourceLines(input); } - if (IsNodeOutPutUsedByOtherRealKernel(iter->second)) { + if (IsNodeOutPutUsedByOtherRealKernel(iter->second, cur_node, input_idx)) { return true; } } @@ -107,7 +113,7 @@ void InsertTensorMoveForHcclOp::InsertTensorMove(const FuncGraphPtr &graph, cons std::vector new_inputs = {hccl_node->input(0)}; for (size_t i = 1; i < hccl_node->size(); ++i) { auto input = hccl_node->input(i); - if (NeedInsertTensorMove(graph, input, hccl_node)) { + if (NeedInsertTensorMove(graph, hccl_node, i)) { auto tensor_move = CreateTensorMoveOp(graph, input); if (tensor_move == nullptr) { MS_LOG(EXCEPTION) << "Create tensor_move op failed."; diff --git a/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.h b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.h index 98856375c24..7f3c146b339 100644 --- a/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.h +++ b/mindspore/ccsrc/backend/optimizer/ascend/enhancer/insert_tensor_move_for_hccl_op.h @@ -32,7 +32,7 @@ class InsertTensorMoveForHcclOp : public PatternProcessPass { private: void InsertTensorMove(const FuncGraphPtr &graph, const CNodePtr &hccl_node) const; - bool NeedInsertTensorMove(const FuncGraphPtr &graph, const AnfNodePtr &input, const CNodePtr &cur_node) const; + bool NeedInsertTensorMove(const FuncGraphPtr &graph, const CNodePtr &cur_node, size_t input_idx) const; KernelQueryPtr kernel_query_; }; } // namespace opt diff --git a/mindspore/ccsrc/backend/optimizer/ascend/format_type/change_axis_of_reduce_kernel.cc b/mindspore/ccsrc/backend/optimizer/ascend/format_type/change_axis_of_reduce_kernel.cc index 30d899f3916..8e240b308fd 100644 --- a/mindspore/ccsrc/backend/optimizer/ascend/format_type/change_axis_of_reduce_kernel.cc +++ b/mindspore/ccsrc/backend/optimizer/ascend/format_type/change_axis_of_reduce_kernel.cc @@ -53,6 +53,15 @@ void SafeCheckFunction(const CNodePtr &cnode, const std::vector &reduce } } +void DynamicAttrUpdate(const AnfNodePtr &node) { + MS_EXCEPTION_IF_NULL(node); + auto primitive = AnfAlgo::GetCNodePrimitive(node); + MS_EXCEPTION_IF_NULL(primitive); + auto axis_attr = primitive->GetAttr(kAttrAxis); + AnfAlgo::SetNodeAttr(kAttrAxes, axis_attr, node); + AnfAlgo::EraseNodeAttr(kAttrAxis, node); +} + void ConvertReduceAttrFraczAnd6HD(const CNodePtr &cnode) { auto axis = kernel::GetReduceAttrAxis(cnode); std::vector convert_axis; @@ -95,9 +104,15 @@ const AnfNodePtr ChangeAxisOfReduceKernel::Process(const FuncGraphPtr &, const A } auto convert_map = kReduceConvertMap.find(AnfAlgo::GetInputFormat(node, 0)); if (convert_map == kReduceConvertMap.end()) { + if (AnfAlgo::IsDynamicShape(node)) { + DynamicAttrUpdate(node); + } return nullptr; } convert_map->second(node->cast()); + if (AnfAlgo::IsDynamicShape(node)) { + DynamicAttrUpdate(node); + } return nullptr; } } // namespace opt diff --git a/mindspore/ccsrc/backend/optimizer/ascend/mindir/all_to_all_unify_mindir.cc b/mindspore/ccsrc/backend/optimizer/ascend/mindir/all_to_all_unify_mindir.cc index 08cf18be42c..6b2d57b9a18 100644 --- a/mindspore/ccsrc/backend/optimizer/ascend/mindir/all_to_all_unify_mindir.cc +++ b/mindspore/ccsrc/backend/optimizer/ascend/mindir/all_to_all_unify_mindir.cc @@ -69,7 +69,7 @@ CNodePtr CreateSplitNode(const FuncGraphPtr &graph, const CNodePtr &all_to_all) if (SizeToLong(shape.size()) <= split_dim) { MS_LOG(EXCEPTION) << "Invalid split dim " << split_dim << " is over the shape size " << shape.size(); } - if (shape[LongToSize(split_dim)] % split_count != 0) { + if (split_count == 0 || shape[LongToSize(split_dim)] % split_count != 0) { MS_LOG(EXCEPTION) << "Invalid split count " << split_count << " cannot be divisible by shape[" << split_dim << "] = " << shape[LongToSize(split_dim)]; } diff --git a/mindspore/ccsrc/backend/optimizer/common/helper.cc b/mindspore/ccsrc/backend/optimizer/common/helper.cc index f39ed59a594..a59499da83d 100644 --- a/mindspore/ccsrc/backend/optimizer/common/helper.cc +++ b/mindspore/ccsrc/backend/optimizer/common/helper.cc @@ -484,6 +484,7 @@ bool IsNotRealUsedByOthers(const FuncGraphPtr &graph, const AnfNodePtr &node) { } CNodePtr CreatTupleGetItemNode(const FuncGraphPtr &func_graph, const AnfNodePtr &node, size_t output_idx) { + MS_EXCEPTION_IF_NULL(func_graph); auto idx = NewValueNode(SizeToLong(output_idx)); MS_EXCEPTION_IF_NULL(idx); auto imm = std::make_shared(SizeToLong(output_idx)); @@ -713,8 +714,17 @@ AbstractBasePtrList RectifyAbstractFromRegAttr(const PrimitivePtr &primitive, if (!opt::ConstInputToAttrInfoRegistry::Instance().GetRegisterByOpName(primitive->name(), ®)) { return input_abstract; } - if (AnfAlgo::HasDynamicShapeFlag(primitive) || - DynamicShapeConstInputToAttr.find(primitive->name()) != DynamicShapeConstInputToAttr.end()) { + if (AnfAlgo::HasDynamicShapeFlag(primitive)) { + return input_abstract; + } + auto ms_context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(ms_context); + auto device = ms_context->get_param(MS_CTX_DEVICE_TARGET); + if (device == kGPUDevice) { + if (DynamicShapeConstInputToAttrGPU.find(primitive->name()) != DynamicShapeConstInputToAttrGPU.end()) { + return input_abstract; + } + } else if (DynamicShapeConstInputToAttr.find(primitive->name()) != DynamicShapeConstInputToAttr.end()) { return input_abstract; } auto convert_input_list = reg.GetConstInputAttrInfo(); diff --git a/mindspore/ccsrc/backend/optimizer/cpu/insert_cast_cpu.cc b/mindspore/ccsrc/backend/optimizer/cpu/insert_cast_cpu.cc index 7859345fbb7..0ed7c6ca663 100644 --- a/mindspore/ccsrc/backend/optimizer/cpu/insert_cast_cpu.cc +++ b/mindspore/ccsrc/backend/optimizer/cpu/insert_cast_cpu.cc @@ -20,6 +20,7 @@ #include #include #include +#include "backend/optimizer/common/helper.h" #include "backend/kernel_compiler/kernel_build_info.h" #include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" #include "backend/session/anf_runtime_algorithm.h" @@ -89,6 +90,34 @@ void InsertCast(const FuncGraphPtr &func_graph, const CNodePtr &cnode) { } } } + +void InsertCastForGraphOutput(const FuncGraphPtr &func_graph, const CNodePtr &cnode, const AnfNodePtr &func_output) { + MS_EXCEPTION_IF_NULL(cnode); + size_t output_num = AnfAlgo::GetOutputTensorNum(cnode); + for (size_t i = 0; i < output_num; i++) { + auto infer_type = AnfAlgo::GetOutputInferDataType(cnode, i); + auto device_type = AnfAlgo::GetOutputDeviceDataType(cnode, i); + const std::string dev_fmt = AnfAlgo::GetOutputFormat(cnode, i); + if (infer_type != device_type) { + auto used_node_list = GetRealNodeUsedListByOutputIdx(func_graph, cnode, i); + for (size_t j = 0; j < used_node_list->size(); j++) { + auto used_node = used_node_list->at(j).first; + if (used_node != func_output) { + continue; + } + auto used_node_index = static_cast(used_node_list->at(j).second - 1); + auto cur_input = AnfAlgo::GetInputNode(utils::cast(used_node), used_node_index); + const std::vector origin_shape = + AnfAlgo::GetPrevNodeOutputInferShape(utils::cast(used_node), i); + auto cast = + AddCastOpNodeToGraph(func_graph, cur_input, dev_fmt, device_type, infer_type, origin_shape, infer_type); + MS_EXCEPTION_IF_NULL(cast); + cast->set_scope(used_node->scope()); + utils::cast(used_node)->set_input(used_node_index + 1, cast); + } + } + } +} } // namespace bool InsertCastCPU::Run(const FuncGraphPtr &func_graph) { @@ -100,6 +129,15 @@ bool InsertCastCPU::Run(const FuncGraphPtr &func_graph) { InsertCast(func_graph, cnode); } } + AnfNodePtrList outputs; + kernel::GetFuncGraphOutputNodes(func_graph, &outputs); + auto func_output = func_graph->output(); + for (auto node : outputs) { + if (node != nullptr && node->isa() && AnfAlgo::IsRealKernel(node)) { + auto cnode = node->cast(); + InsertCastForGraphOutput(func_graph, cnode, func_output); + } + } return true; } } // namespace opt diff --git a/mindspore/ccsrc/backend/optimizer/gpu/concat_outputs_for_all_gather.cc b/mindspore/ccsrc/backend/optimizer/gpu/concat_outputs_for_all_gather.cc index 0f105b9090b..f7ea32119aa 100644 --- a/mindspore/ccsrc/backend/optimizer/gpu/concat_outputs_for_all_gather.cc +++ b/mindspore/ccsrc/backend/optimizer/gpu/concat_outputs_for_all_gather.cc @@ -33,7 +33,7 @@ OutputInfo GetNodeOutputInfo(const AnfNodePtr &node) { auto type_ptr = node->Type(); auto shape_ptr = node->Shape(); size_t output_num = AnfAlgo::GetOutputTensorNum(node); - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto build_info = kernel_info->select_kernel_build_info(); MS_EXCEPTION_IF_NULL(build_info); diff --git a/mindspore/ccsrc/backend/optimizer/gpu/insert_format_transform_op.cc b/mindspore/ccsrc/backend/optimizer/gpu/insert_format_transform_op.cc index 95733a9b4f6..0aff8d507b9 100644 --- a/mindspore/ccsrc/backend/optimizer/gpu/insert_format_transform_op.cc +++ b/mindspore/ccsrc/backend/optimizer/gpu/insert_format_transform_op.cc @@ -100,9 +100,13 @@ CNodePtr InsertTransposeOp(const FuncGraphPtr &graph, const AnfNodePtr &node, co MS_EXCEPTION_IF_NULL(transpose_op); // 3.Set the output info of transpose. auto transpose_type = {AnfAlgo::GetPrevNodeOutputInferDataType(used_node, used_node_index)}; - auto transpose_shape = {AnfAlgo::GetPrevNodeOutputInferShape(used_node, used_node_index)}; - AnfAlgo::SetOutputInferTypeAndShape(transpose_type, transpose_shape, transpose_op.get()); - if (!is_fake) { + auto transpose_shape = AnfAlgo::GetPrevNodeOutputInferShape(used_node, used_node_index); + AnfAlgo::SetOutputInferTypeAndShape(transpose_type, {transpose_shape}, transpose_op.get()); + if (is_fake) { + std::vector shape; + std::transform(transpose_shape.begin(), transpose_shape.end(), std::back_inserter(shape), SizeToLong); + AnfAlgo::SetNodeAttr("shape", MakeValue(shape), transpose_op); + } else { AnfAlgo::SetNodeAttr(kAttrPerm, MakeValue(transpose_perm), transpose_op); } // 4. Set the new edge of transpose op. diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/add_atomic_clean.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/add_atomic_clean.cc index 8d99cbb24b6..2cbe882d595 100644 --- a/mindspore/ccsrc/backend/optimizer/graph_kernel/add_atomic_clean.cc +++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/add_atomic_clean.cc @@ -230,7 +230,7 @@ bool AtomicAddCheckerAscend::SuitableForAtomicAdd(const AnfNodePtr &node) { void AtomicCleanInsertter::CorrectKernelBuildInfo(const AnfNodePtr &composite_node, const AnfNodePtr &new_input) { // Change kernel build info. - auto kernel_info = static_cast(composite_node->kernel_info()); + auto kernel_info = dynamic_cast(composite_node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); const auto &origin_kernel_build_info = kernel_info->GetMutableSelectKernelBuildInfo(); auto origin_inputs_format = origin_kernel_build_info->GetAllInputFormats(); diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/add_stitch_atomic_clean_gpu.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/add_stitch_atomic_clean_gpu.cc index 740e97f0b45..ab181401fdb 100644 --- a/mindspore/ccsrc/backend/optimizer/graph_kernel/add_stitch_atomic_clean_gpu.cc +++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/add_stitch_atomic_clean_gpu.cc @@ -41,7 +41,7 @@ namespace mindspore { namespace opt { void StitchAtomicCleanInsertter::CorrectKernelBuildInfo(const AnfNodePtr &composite_node, const AnfNodePtr &new_input) { // Change kernel build info. - auto kernel_info = static_cast(composite_node->kernel_info()); + auto kernel_info = dynamic_cast(composite_node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); const auto &origin_kernel_build_info = kernel_info->GetMutableSelectKernelBuildInfo(); auto origin_inputs_format = origin_kernel_build_info->GetAllInputFormats(); diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/arithmetic_simplify.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/arithmetic_simplify.cc index e4c2e59ec2f..4ea6b813056 100644 --- a/mindspore/ccsrc/backend/optimizer/graph_kernel/arithmetic_simplify.cc +++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/arithmetic_simplify.cc @@ -643,29 +643,33 @@ bool ArithmeticSimplify::Run(const FuncGraphPtr &func_graph) { expressions_map_ = GetExpressions(); for (auto node : func_graph->GetOrderedCnodes()) { if (AnfAlgo::IsGraphKernel(node)) { - auto sub_graph = AnfAlgo::GetCNodeFuncGraphPtr(node); - graphkernel::LiteGraphPtr lg = AnfGraph2LiteGraph(sub_graph); - bool find_pattern = true; - bool change_anf_graph = false; - while (find_pattern) { - find_pattern = false; - find_pattern = DoArithmeticTrans(lg) || find_pattern; - find_pattern = DoConstantFold(lg) || find_pattern; - change_anf_graph = change_anf_graph || find_pattern; + try { + auto sub_graph = AnfAlgo::GetCNodeFuncGraphPtr(node); + graphkernel::LiteGraphPtr lg = AnfGraph2LiteGraph(sub_graph); + bool find_pattern = true; + bool change_anf_graph = false; + while (find_pattern) { + find_pattern = false; + find_pattern = DoArithmeticTrans(lg) || find_pattern; + find_pattern = DoConstantFold(lg) || find_pattern; + change_anf_graph = change_anf_graph || find_pattern; + } + if (!change_anf_graph) continue; + ReorganizeEmptyGraph(lg); + AnfNodePtrList outputs; + auto new_funcgraph = LiteGraph2AnfGraph(lg, &outputs); + new_funcgraph->set_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL, sub_graph->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)); + auto cnode = node->cast(); + AnfNodePtrList inputs(cnode->inputs().begin() + 1, cnode->inputs().end()); + EliminateRedundantParameters(new_funcgraph, &inputs); + auto new_node = CreateNewFuseCNode(func_graph, new_funcgraph, inputs, outputs); + SetNewKernelInfo(new_node, new_funcgraph, inputs, outputs); + mng->Replace(node, new_node); + mng->AddFuncGraph(new_funcgraph); + do_simplify = true; + } catch (const graphkernel::GKException &e) { + MS_LOG(WARNING) << e.what() << ", so we undo airthmetic simplify for this graph"; } - if (!change_anf_graph) continue; - ReorganizeEmptyGraph(lg); - AnfNodePtrList outputs; - auto new_funcgraph = LiteGraph2AnfGraph(lg, &outputs); - new_funcgraph->set_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL, sub_graph->get_attr(FUNC_GRAPH_ATTR_GRAPH_KERNEL)); - auto cnode = node->cast(); - AnfNodePtrList inputs(cnode->inputs().begin() + 1, cnode->inputs().end()); - EliminateRedundantParameters(new_funcgraph, &inputs); - auto new_node = CreateNewFuseCNode(func_graph, new_funcgraph, inputs, outputs); - SetNewKernelInfo(new_node, new_funcgraph, inputs, outputs); - mng->Replace(node, new_node); - mng->AddFuncGraph(new_funcgraph); - do_simplify = true; } } return do_simplify; diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cse.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cse.cc index 01c004a06f8..12ffe5ee75c 100644 --- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cse.cc +++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_cse.cc @@ -78,8 +78,8 @@ bool GraphKernelBackendCSE::CheckEqualKernelBuildInfo(const AnfNodePtr &main, co return BackendCSE::CheckEqualKernelBuildInfo(main, node); } - auto main_kernel_info = static_cast(main->kernel_info()); - auto node_kernel_info = static_cast(node->kernel_info()); + auto main_kernel_info = dynamic_cast(main->kernel_info()); + auto node_kernel_info = dynamic_cast(node->kernel_info()); if (main_kernel_info == nullptr && node_kernel_info == nullptr) { return true; } diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc index bf25889d1e5..d5a16a15b51 100644 --- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc +++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_helper.cc @@ -613,7 +613,7 @@ void ResetKernelInfo(const AnfNodePtr &node, KernelType kernel_type) { } std::string GetFormat(const AnfNodePtr &node) { - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto kernel_build_info = kernel_info->select_kernel_build_info(); MS_EXCEPTION_IF_NULL(kernel_build_info); diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_optimization.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_optimization.cc index 9ace0cb9a6b..30e160ee01d 100644 --- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_optimization.cc +++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_optimization.cc @@ -42,8 +42,11 @@ #include "backend/optimizer/graph_kernel/reorder_ops.h" #include "backend/optimizer/graph_kernel/update_state_formatter.h" #include "backend/optimizer/graph_kernel/axis_normalizer.h" +#include "backend/optimizer/graph_kernel/decrease_compute_precision.h" +#include "backend/optimizer/graph_kernel/decrease_transfer_precision.h" #include "backend/optimizer/pass/getitem_tuple.h" #include "backend/optimizer/graph_kernel/graph_kernel_pass_manager.h" +#include "backend/optimizer/graph_kernel/rewrite_output_shape.h" namespace mindspore { namespace opt { @@ -60,6 +63,9 @@ PassManagerPtr GraphKernelOptimizer::PreProcess() const { // Do cse before all passes of graphkernel pm->AddPass(std::make_shared("cse1"), OptLevel_1); + // Save the original output info + pm->AddPass(std::make_shared(), OptLevel_1); + // Change Assign(p, a, U) to Assign(Depend(p, U), a) pm->AddPass(std::make_shared(), OptLevel_1, is_gpu); @@ -152,6 +158,10 @@ PassManagerPtr GraphKernelOptimizer::HighLevelOpt2() const { auto level = GetPassLevelByFlag(context::GraphKernelFlags::GetInstance().enable_stitch_fusion); pm->AddPass(std::make_shared(), level, is_gpu); + // Enable low precision + auto level_low_precision = GetPassLevelByFlag(context::GraphKernelFlags::GetInstance().enable_low_precision); + pm->AddPass(std::make_shared(), level_low_precision); + pm->AddPass(std::make_shared(), level_low_precision, is_ascend); return pm; } @@ -166,11 +176,15 @@ PassManagerPtr GraphKernelOptimizer::Combine() const { PassManagerPtr GraphKernelOptimizer::PostProcess() const { auto pm = std::make_shared(6, "postprocess"); - // Add the new tensors to the kernel_graph - pm->AddPass(std::make_shared(), OptLevel_1); - // Make Tuple for the inputs of UpdateState. (the reverse of SpreadUpdateState) pm->AddPass(std::make_shared(), OptLevel_1); + + // Recover the original output info + pm->AddPass(std::make_shared(), OptLevel_1); + pm->AddPass(std::make_shared(), OptLevel_1); + + // Add the new tensors to the kernel_graph + pm->AddPass(std::make_shared(), OptLevel_1); return pm; } diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_splitter.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_splitter.cc index eacbff0907d..f8917fa0092 100644 --- a/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_splitter.cc +++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/graph_kernel_splitter.cc @@ -425,7 +425,12 @@ class AreaGraph { AnfNodePtrList getitem_inputs = {NewValueNode(prim::kPrimTupleGetItem), main_cnodes[input_area], idx}; TraceGuard g_sub(std::make_shared(main_cnodes[input_area]->debug_info())); auto getitem_node = main_func_graph->NewCNode(getitem_inputs); - getitem_node->set_abstract(main_cnodes[input_area]->abstract()); + auto abs_tuple = dyn_cast(main_cnodes[input_area]->abstract()); + if (idx_val < SizeToLong(abs_tuple->size())) { + getitem_node->set_abstract(abs_tuple->elements()[idx_val]); + } else { + getitem_node->set_abstract(main_cnodes[input_area]->abstract()); + } main_cnode_inputs.emplace_back(getitem_node); } else { main_cnode_inputs.emplace_back(main_cnodes[input_area]); diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.cc index 6a0f9168c03..d113064a337 100644 --- a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.cc +++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.cc @@ -27,6 +27,7 @@ #include "backend/optimizer/graph_kernel/model/node.h" #include "backend/optimizer/graph_kernel/model/op_node.h" +#include "backend/optimizer/graph_kernel/model/op_register.h" namespace mindspore { namespace opt { @@ -107,36 +108,15 @@ NodePtr LiteGraph::GraphBuilder::Emit(const std::string &op, const NodePtrList & NodePtr LiteGraph::GraphBuilder::Op(const std::string &op, const NodeBase &baseinfo, const NodePtrList &inputs, const DAttrs &attrs, std::string node_name) { - auto op_ptr = Emit(op, inputs, attrs, node_name); + PrimOpPtr op_ptr = CreateOp(op, node_name); + op_ptr->SetInputs(inputs); + op_ptr->SetAttrs(attrs); op_ptr->SetBaseInfo(baseinfo); - return op_ptr; + return graph_->Add(op_ptr); } PrimOpPtr LiteGraph::GraphBuilder::CreateOp(const std::string &op, const std::string &node_name) { - static std::map> creators; - if (creators.empty()) { - creators = {{"Add", Elemwise}, - {"Sub", Elemwise}, - {"RealDiv", Elemwise}, - {"Mul", Elemwise}, - {"Log", Elemwise}, - {"Exp", Elemwise}, - {"Pow", Elemwise}, - {"Sqrt", Elemwise}, - {"Rsqrt", Elemwise}, - {"Neg", Elemwise}, - {"Reciprocal", Elemwise}, - {"Abs", Elemwise}, - {"BroadcastTo", BroadcastTo}, - {"Reshape", Reshape}, - {"ReduceSum", Reduce}, - {"ReduceMax", Reduce}, - {"ReduceMin", Reduce}, - {"Conv2D", Conv2d}}; - } - auto iter = creators.find(op); - auto creator = (iter == creators.end() ? Opaque : iter->second); - return creator(op, node_name); + return OpRegistry::Instance().NewOp(op, node_name); } } // namespace graphkernel } // namespace opt diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.h b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.h index 439a172fc58..fc1cb42475e 100644 --- a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.h +++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/lite_graph.h @@ -81,28 +81,6 @@ class LiteGraph::GraphBuilder { LiteGraphPtr Get() { return graph_; } private: - static PrimOpPtr Elemwise(const std::string &op, const std::string &name) { - return std::make_shared(op, name); - } - - static PrimOpPtr BroadcastTo(const std::string &op, const std::string &name) { - return std::make_shared(op, name); - } - - static PrimOpPtr Reshape(const std::string &op, const std::string &name) { - return std::make_shared(op, name); - } - - static PrimOpPtr Reduce(const std::string &op, const std::string &name) { - return std::make_shared(op, name); - } - static PrimOpPtr Opaque(const std::string &op, const std::string &name) { - return std::make_shared(op, name); - } - static PrimOpPtr Conv2d(const std::string &op, const std::string &name) { - return std::make_shared(op, name); - } - PrimOpPtr CreateOp(const std::string &id, const std::string &name); std::string NewName(std::string prefix = "output_") { return prefix + std::to_string(graph_->name_id_++); } diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/node.h b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/node.h index 50dd34fb5e4..7c34218f14e 100644 --- a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/node.h +++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/node.h @@ -26,6 +26,7 @@ #include #include #include +#include #include "mindspore/core/ir/dtype/type_id.h" #include "mindspore/core/ir/value.h" @@ -85,6 +86,8 @@ class Node : public NodeBase { void SetInput(size_t i, const NodePtr &new_input); void SetInputs(const NodePtrList &inputs); void ReplaceWith(const NodePtr &other_node); + void SetAttrs(const DAttrs &attrs) { attrs_ = attrs; } + void SetAttr(const std::string &key, const ValuePtr &value) { attrs_[key] = value; } template T *As() { @@ -146,6 +149,15 @@ class OutputNode : public Node { void Dump(std::ostringstream &os) const override { ; } NType NodeType() override { return NType::Output; } }; + +class GKException : public std::exception { + public: + explicit GKException(const std::string &message) : msg_(message) {} + const char *what() const noexcept override { return msg_.c_str(); } + + protected: + std::string msg_; +}; } // namespace graphkernel } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.cc b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.cc index 3a03f3cf4b5..0d8a073c0a5 100644 --- a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.cc +++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.cc @@ -31,7 +31,58 @@ namespace mindspore { namespace opt { namespace graphkernel { +std::vector GetListInt(const ValuePtr &attr_value) { + bool is_int64 = true; + auto get_int_value = [&is_int64](const ValuePtr &value) -> int64_t { + if (value->isa()) { + return GetValue(value); + } + is_int64 = false; + return static_cast(GetValue(value)); + }; + std::vector list_int; + const auto &vals = attr_value->cast()->value(); + (void)std::transform(vals.begin(), vals.end(), std::back_inserter(list_int), get_int_value); + if (!is_int64) { + MS_LOG(WARNING) << "Vector type should be 'int64_t' but got 'int'"; + } + return list_int; +} + +void PrimOp::Check(const NodePtrList &inputs, const DAttrs &attrs) { + CheckShape(inputs, attrs); + CheckType(inputs, attrs); + CheckFormat(inputs, attrs); +} + +// check all type to be identical +void PrimOp::CheckType(const NodePtrList &inputs, const DAttrs &attrs) { + TypeId tid = inputs[0]->type; + for (size_t i = 1; i < inputs.size(); i++) { + if (inputs[i]->type != tid) { + MS_LOG(EXCEPTION) << "Incompatible dtype between input " << 0 << "and" << i; + } + } +} + +// check all formats are compatible, only DefaultForant is compatible with others +void PrimOp::CheckFormat(const NodePtrList &inputs, const DAttrs &attrs) { + DFormat res = inputs[0]->format; + size_t i = 0; + for (size_t j = 1; j < inputs.size(); j++) { + if (inputs[j]->format != res) { + if (inputs[j]->format != kOpFormat_DEFAULT && res != kOpFormat_DEFAULT) { + MS_LOG(EXCEPTION) << "Incompatible format between input " << i << "and" << (j + 1); + } + if (res == kOpFormat_DEFAULT) { + res = inputs[j]->format; + i = j + 1; + } + } + } +} void PrimOp::Infer(const NodePtrList &inputs, const DAttrs &attrs) { + Check(inputs, attrs); this->shape = InferShape(inputs, attrs); this->type = InferType(inputs, attrs); this->format = InferFormat(inputs, attrs); @@ -146,6 +197,88 @@ NodePtr PrimOp::InferValue(const NodePtrList &inputs, const DAttrs &attrs, const return res == nullptr ? nullptr : std::make_shared(res); } +// default format shape to fractal_Nz format shape +DShape ToNz(const DShape &default_shape) { + if (default_shape.size() != 1 && default_shape.size() != 2) { + throw GKException("shape is too long"); + } + DShape output_shape; + if (default_shape.size() == 1 || (default_shape.size() == 2 && default_shape[0] == 1)) { + output_shape = {default_shape[default_shape.size() - 1] / 16, 1, 1, 16}; + if (default_shape[default_shape.size() - 1] % 16 != 0) { + throw GKException("should be multiplies of 16"); + } + + } else if (default_shape.size() == 2 || default_shape[1] == 1) { + output_shape = {1, default_shape[0] / 16, 16, 1}; + if (default_shape[0] % 16 != 0) { + throw GKException("should be multiplies of 16"); + } + + } else { + output_shape = {default_shape[1] / 16, default_shape[0] / 16, 16, 16}; + if (default_shape[0] % 16 != 0 || default_shape[1] % 16 != 0) { + throw GKException("should be multiplies of 16"); + } + } + return output_shape; +} + +DShape BroadcastShape(const NodePtrList &inputs, bool to_nz = false) { + std::vector> shapes; + for (auto &input : inputs) { + if (to_nz && input->format != kOpFormat_FRAC_NZ) { + shapes.emplace_back(ToNz(input->shape)); + } else { + shapes.emplace_back(input->shape); + } + } + auto max_dim_input = + std::max_element(shapes.begin(), shapes.end(), + [](const std::vector &a, const std::vector &b) { return a.size() < b.size(); }); + auto max_dim = max_dim_input->size(); + std::vector> align_shapes; + for (auto &s : shapes) { + std::vector cur(max_dim - s.size(), 1); + cur.insert(cur.end(), s.begin(), s.end()); + align_shapes.emplace_back(cur); + } + std::vector output_shape(max_dim, 1); + for (size_t i = 0; i < max_dim; i++) { + for (auto &align_shape : align_shapes) { + if (align_shape[i] > 1) { + if (output_shape[i] == 1) { + output_shape[i] = align_shape[i]; + } + if (output_shape[i] != align_shape[i]) { + throw GKException("shape broadcast failed"); + } + } + } + } + return output_shape; +} + +DShape ElemwiseOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) { + if (std::all_of(inputs.begin(), inputs.end(), [](const NodePtr &input) { + return input->format == kOpFormat_DEFAULT || input->format == kOpFormat_NHWC || input->format == kOpFormat_NCHW; + })) { + return BroadcastShape(inputs, false); + } + if (std::all_of(inputs.begin(), inputs.end(), [](const NodePtr &input) { + return input->format == kOpFormat_DEFAULT || input->format == kOpFormat_NHWC || + input->format == kOpFormat_NCHW || input->format == kOpFormat_FRAC_NZ; + })) { + return BroadcastShape(inputs, true); + } + throw GKException("Only support default and fractal_nz"); +} + +DFormat ElemwiseOp::InferFormat(const NodePtrList &inputs, const DAttrs &attrs) { + auto it = std::find_if(inputs.begin(), inputs.end(), [](const NodePtr &i) { return i->format != kOpFormat_DEFAULT; }); + return it == inputs.end() ? kOpFormat_DEFAULT : (*it)->format; +} + void ElemwiseOp::Infer(const NodePtrList &inputs, const DAttrs &attrs) { PrimOp::Infer(inputs, attrs); auto IsBroadcast = [this](const NodePtrList &inputs) -> bool { @@ -160,26 +293,64 @@ void ElemwiseOp::Infer(const NodePtrList &inputs, const DAttrs &attrs) { compute_type_ = IsBroadcast(inputs) ? BROADCAST : ELEMWISE; } -DShape BroadcastToOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) { - return GetValue>(attrs.find("shape")->second); +TypeId CastOp::InferType(const NodePtrList &inputs, const DAttrs &attrs) { + CHECK_ATTR(attrs, "dst_type"); + auto dst_type = attrs.find("dst_type")->second; + if (dst_type->isa()) { + return dst_type->cast()->type_id(); + } + return kernel::DtypeToTypeId(GetValue(dst_type)); +} + +void SelectOp::CheckType(const NodePtrList &inputs, const DAttrs &attrs) { + if (inputs[0]->type != TypeId::kNumberTypeBool) { + MS_LOG(EXCEPTION) << "Select's input[0] should be bool type"; + } + if (inputs[1]->type != inputs[2]->type) { + MS_LOG(EXCEPTION) << "Select's input[1] and input[2]'s type doesn't match"; + } } DShape ReshapeOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) { - auto new_shape = GetValue>(attrs.find("shape")->second); + CHECK_ATTR(attrs, "shape"); + auto new_shape = GetListInt(attrs.find("shape")->second); auto origin_shape = inputs[0]->shape; + auto origin_product = std::accumulate(origin_shape.begin(), origin_shape.end(), 1, std::multiplies()); + auto new_product = std::accumulate(new_shape.begin(), new_shape.end(), 1, std::multiplies()); for (size_t i = 0; i < new_shape.size(); i++) { if (new_shape[i] == -1) { - auto origin_product = std::accumulate(origin_shape.begin(), origin_shape.end(), 1, std::multiplies()); - auto new_product = std::accumulate(new_shape.begin(), new_shape.end(), 1, std::multiplies()); new_shape[i] = origin_product / new_product * (-1); - break; + return new_shape; } } + if (origin_product != new_product) { + MS_LOG(EXCEPTION) << "The shape product before and after reshaping should be equal"; + } return new_shape; } +DShape BroadcastToOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) { + CHECK_ATTR(attrs, "shape"); + return GetListInt(attrs.find("shape")->second); +} + +// check rudece axis in range [-size,size) +void ReduceOp::Check(const NodePtrList &inputs, const DAttrs &attrs) { + PrimOp::Check(inputs, attrs); + CHECK_ATTR(attrs, "axis"); + auto axis = GetListInt(attrs.find("axis")->second); + int64_t size = static_cast(inputs[0]->shape.size()); + auto it = std::find_if(axis.begin(), axis.end(), [&size](const int64_t &i) { return (i >= size || i < (-size)); }); + if (it != axis.end()) { + MS_LOG(EXCEPTION) << "reduce_axis should be in range [" << (-size) << "," << size << ")" + << ",but got " << (*it); + } +} + DShape ReduceOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) { - auto axis = GetValue>(attrs.find("axis")->second); + CHECK_ATTR(attrs, "axis"); + CHECK_ATTR(attrs, "keep_dims"); + auto axis = GetListInt(attrs.find("axis")->second); auto keepdims = GetValue(attrs.find("keep_dims")->second); if (keepdims) { DShape new_shape = inputs[0]->shape; @@ -200,6 +371,171 @@ DShape ReduceOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) { } return new_shape; } + +void CheckNd(const std::vector &shape, size_t n) { + if (shape.size() != n) { + std::ostringstream info; + info << "input dimension should be " << n << ", but got " << shape.size(); + throw GKException(info.str()); + } +} + +DShape Conv2dOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) { + auto shape0 = inputs[0]->shape; + auto shape1 = inputs[1]->shape; + CheckNd(shape0, 4); + CheckNd(shape1, 4); + if (inputs[0]->format != kOpFormat_NHWC && inputs[1]->format != kOpFormat_NHWC && + GetValue(attrs.find("format")->second) != kOpFormat_NHWC) { + throw GKException("check NHWC format failed"); + } + auto n = shape0[0]; + auto h = shape0[1]; + auto w = shape0[2]; + auto out_channel = shape1[0]; + CHECK_ATTR(attrs, "pad_list"); + CHECK_ATTR(attrs, "pad_mode"); + CHECK_ATTR(attrs, "kernel_size"); + CHECK_ATTR(attrs, "stride"); + CHECK_ATTR(attrs, "dilation"); + auto pad_list = GetListInt(attrs.find("pad_list")->second); + auto pad_mode = GetValue(attrs.find("pad_mode")->second); + auto kernel_size = GetListInt(attrs.find("kernel_size")->second); + auto stride = GetListInt(attrs.find("stride")->second); + auto dilation = GetListInt(attrs.find("dilation")->second); + CheckNd(pad_list, 4); + CheckNd(kernel_size, 2); + CheckNd(stride, 4); + CheckNd(dilation, 4); + bool has_pad = false; + if (pad_list[0] != pad_list[1] || pad_list[2] != pad_list[3]) { + has_pad = true; + } else { + if (pad_mode == "VALID" || pad_mode == "valid") { + if (std::any_of(pad_list.begin(), pad_list.end(), [](int i) { return i == 0; })) { + has_pad = true; + } + } + } + if (!has_pad) { + pad_list = {0, 0, 0, 0}; + } + auto k_h = (kernel_size[0] - 1) * dilation[2] + 1; + auto k_w = (kernel_size[1] - 1) * dilation[3] + 1; + auto out_h = (h + pad_list[0] + pad_list[1] - k_h) / stride[2] + 1; + auto out_w = (w + pad_list[2] + pad_list[3] - k_w) / stride[3] + 1; + std::vector output = {n, out_h, out_w, out_channel}; + return output; +} + +TypeId Conv2dOp::InferType(const NodePtrList &inputs, const DAttrs &attrs) { + if (attrs.find("dst_type") == attrs.end()) return inputs[0]->type; + auto dst_type = attrs.find("dst_type")->second; + if (dst_type->isa()) { + return dst_type->cast()->type_id(); + } + return kernel::DtypeToTypeId(GetValue(dst_type)); +} + +DShape TransposeOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) { + CHECK_ATTR(attrs, "perm"); + auto perm = GetListInt(attrs.find("perm")->second); + auto &old_shape = inputs[0]->shape; + DShape new_shape; + if (perm.size() != old_shape.size()) { + MS_LOG(EXCEPTION) << "perm.size() != old_shape.size(). " << perm.size() << " vs " << old_shape.size(); + } + std::transform(perm.begin(), perm.end(), std::back_inserter(new_shape), + [&old_shape](int64_t p) { return old_shape[p]; }); + return new_shape; +} + +DFormat TransposeOp::InferFormat(const NodePtrList &inputs, const DAttrs &attrs) { + if (inputs[0]->shape.size() != 4) return kOpFormat_DEFAULT; + CHECK_ATTR(attrs, "perm"); + auto perm = GetListInt(attrs.find("perm")->second); + const auto &ori_format = inputs[0]->format; + if (ori_format == kOpFormat_DEFAULT || ori_format == kOpFormat_NCHW) { + std::vector nchw2nhwc = {0, 2, 3, 1}; + if (perm == nchw2nhwc) return kOpFormat_NHWC; + } else if (ori_format == kOpFormat_NHWC) { + std::vector nhwc2nchw = {0, 3, 1, 2}; + if (perm == nhwc2nchw) return kOpFormat_DEFAULT; + } + std::ostringstream info; + info << "Unsupported Transpose. ori_format = " << ori_format << ", perm = " << attrs.find("perm")->second->ToString(); + throw GKException(info.str()); +} + +DShape MatMulOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) { + std::vector shape0 = inputs[0]->shape; + std::vector shape1 = inputs[1]->shape; + if (shape0.size() != 2 || shape1.size() != 2) { + std::ostringstream info; + info << "MatMul's input's dimension must be 2, but got " << shape0.size() << " and " << shape1.size(); + throw GKException(info.str()); + } + auto transpose_a = GetValue(attrs.find("transpose_a")->second); + auto transpose_b = GetValue(attrs.find("transpose_b")->second); + int64_t m = transpose_a ? shape0[1] : shape0[0]; + int64_t k1 = transpose_a ? shape0[0] : shape0[1]; + int64_t k2 = transpose_b ? shape1[1] : shape1[0]; + int64_t n = transpose_b ? shape1[0] : shape1[1]; + if (k1 != k2) { + MS_LOG(EXCEPTION) << "MatMul's inputs have different k value " << k1 << " vs " << k2; + } + std::vector output = {m, n}; + return output; +} + +TypeId MatMulOp::InferType(const NodePtrList &inputs, const DAttrs &attrs) { + if (attrs.find("dst_type") == attrs.end()) return inputs[0]->type; + auto dst_type = attrs.find("dst_type")->second; + if (dst_type->isa()) { + return dst_type->cast()->type_id(); + } + return kernel::DtypeToTypeId(GetValue(dst_type)); +} + +DShape PadAkgOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) { + std::vector shape0 = inputs[0]->shape; + size_t n = shape0.size(); + std::vector pad_before = GetListInt(attrs.find("head")->second); + std::vector pad_after = GetListInt(attrs.find("tail")->second); + if (pad_before.size() != n || pad_after.size() != n) { + MS_LOG(EXCEPTION) << "Input dimension and pad mismatch: " << n << " vs " << pad_before.size() << " vs " + << pad_after.size(); + } + std::vector output; + for (size_t i = 0; i < n; i++) { + output.emplace_back(shape0[i] + pad_before[i] + pad_after[i]); + } + return output; +} + +DShape UnPadAkgOp::InferShape(const NodePtrList &inputs, const DAttrs &attrs) { + std::vector shape0 = inputs[0]->shape; + size_t n = shape0.size(); + std::vector unpad_after = GetListInt(attrs.find("tail")->second); + if (unpad_after.size() != n) { + MS_LOG(EXCEPTION) << "Input dimension and pad mismatch: " << n << " vs " << unpad_after.size(); + } + std::vector output; + for (size_t i = 0; i < n; i++) { + output.emplace_back(shape0[i] - unpad_after[i]); + } + return output; +} + +void ComplexOp::CheckType(const NodePtrList &inputs, const DAttrs &attrs) { + if (inputs[0]->type != TypeId::kNumberTypeFloat32) { + throw GKException("Complex's input[0] should be float32"); + } + if (inputs[0]->type != inputs[1]->type) { + MS_LOG(EXCEPTION) << "Complex's input[0] and inputs[1]'s type mismatch"; + } +} + } // namespace graphkernel } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.h b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.h index c477bd08488..fd59c677ce8 100644 --- a/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.h +++ b/mindspore/ccsrc/backend/optimizer/graph_kernel/model/op_node.h @@ -20,12 +20,23 @@ #include #include #include +#include +#include #include "backend/optimizer/graph_kernel/model/node.h" +#include "backend/kernel_compiler/common_utils.h" +#include "ir/dtype/type.h" namespace mindspore { namespace opt { namespace graphkernel { +#define CHECK_ATTR(attrs, attr_name) \ + do { \ + if (attrs.count(attr_name) == 0) { \ + MS_LOG(EXCEPTION) << "The attr [" << attr_name << "] does not exist in [" << #attrs << "]"; \ + } \ + } while (0) + class PrimOp : public Node { public: enum ComputeType { @@ -39,43 +50,109 @@ class PrimOp : public Node { PrimOp(const std::string &op, const std::string &node_name, ComputeType compute) : Node({{}, TypeId::kNumberTypeBegin, kOpFormat_DEFAULT}, node_name), op_(op), compute_type_(compute) {} + virtual void Check(const NodePtrList &inputs, const DAttrs &attrs); + virtual void CheckShape(const NodePtrList &inputs, const DAttrs &attrs) {} + virtual void CheckType(const NodePtrList &inputs, const DAttrs &attrs); + virtual void CheckFormat(const NodePtrList &inputs, const DAttrs &attrs); + virtual void Infer(const NodePtrList &inputs, const DAttrs &attrs); + virtual NodePtr InferValue(const NodePtrList &inputs, const DAttrs &attrs, const std::string &op); + virtual DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) { return inputs[0]->shape; } + virtual TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) { return inputs[0]->type; } + virtual DFormat InferFormat(const NodePtrList &inputs, const DAttrs &attrs) { return inputs[0]->format; } + void Dump(std::ostringstream &os) const override; NType NodeType() override { return NType::Primitive; } const std::string &op() const { return op_; } ComputeType compute_type() const { return compute_type_; } - virtual NodePtr InferValue(const NodePtrList &inputs, const DAttrs &attrs, const std::string &op); protected: std::string op_; ComputeType compute_type_; - virtual DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) { return inputs[0]->shape; } - virtual TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) { return inputs[0]->type; } - virtual DFormat InferFormat(const NodePtrList &inputs, const DAttrs &attrs) { return inputs[0]->format; } }; using PrimOpPtr = std::shared_ptr; class ElemwiseOp : public PrimOp { public: ElemwiseOp(const std::string &op, const std::string &node_name) : PrimOp(op, node_name, ELEMWISE) {} + void Infer(const NodePtrList &inputs, const DAttrs &attrs) override; - // TODO(dayschan) rewrite InferShape/InferFormat + DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override; + DFormat InferFormat(const NodePtrList &inputs, const DAttrs &attrs) override; +}; + +class CastOp : public ElemwiseOp { + public: + CastOp(const std::string &op, const std::string &node_name) : ElemwiseOp("Cast", node_name) {} + + TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override; +}; + +class InplaceAssignOp : public ElemwiseOp { + public: + InplaceAssignOp(const std::string &op, const std::string &node_name) : ElemwiseOp("InplaceAssign", node_name) {} + + DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override { return inputs[2]->shape; } + TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override { return inputs[2]->type; } + DFormat InferFormat(const NodePtrList &inputs, const DAttrs &attrs) override { return inputs[2]->format; } +}; + +class SelectOp : public ElemwiseOp { + public: + SelectOp(const std::string &op, const std::string &node_name) : ElemwiseOp("Select", node_name) {} + + void CheckType(const NodePtrList &inputs, const DAttrs &attrs) override; + TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override { return inputs[1]->type; } +}; + +class CompareOp : public ElemwiseOp { + public: + CompareOp(const std::string &op, const std::string &node_name) : ElemwiseOp(op, node_name) {} + + TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override { return TypeId::kNumberTypeBool; } +}; + +class LessOp : public CompareOp { + public: + LessOp(const std::string &op, const std::string &node_name) : CompareOp("Less", node_name) {} +}; + +class EqualOp : public CompareOp { + public: + EqualOp(const std::string &op, const std::string &node_name) : CompareOp("Equal", node_name) {} +}; + +class LessEqualOp : public CompareOp { + public: + LessEqualOp(const std::string &op, const std::string &node_name) : CompareOp("LessEqual", node_name) {} +}; + +class GreaterOp : public CompareOp { + public: + GreaterOp(const std::string &op, const std::string &node_name) : CompareOp("Greater", node_name) {} +}; + +class GreaterEqualOp : public CompareOp { + public: + GreaterEqualOp(const std::string &op, const std::string &node_name) : CompareOp("GreaterEqual", node_name) {} }; class ReshapeOp : public PrimOp { public: ReshapeOp(const std::string &op, const std::string &node_name) : PrimOp(op, node_name, RESHAPE) {} - protected: DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override; + DFormat InferFormat(const NodePtrList &inputs, const DAttrs &attrs) override { + return attrs.find("format") == attrs.end() ? kOpFormat_DEFAULT + : GetValue(attrs.find("format")->second); + } }; class BroadcastToOp : public PrimOp { public: BroadcastToOp(const std::string &op, const std::string &node_name) : PrimOp(op, node_name, BROADCAST) {} - protected: DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override; }; @@ -83,8 +160,10 @@ class ReduceOp : public PrimOp { public: ReduceOp(const std::string &op, const std::string &node_name) : PrimOp(op, node_name, REDUCE) {} - protected: + void Check(const NodePtrList &inputs, const DAttrs &attrs) override; + DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override; + DFormat InferFormat(const NodePtrList &inputs, const DAttrs &attrs) override { return kOpFormat_DEFAULT; }; }; class OpaqueOp : public PrimOp { @@ -95,6 +174,74 @@ class OpaqueOp : public PrimOp { class Conv2dOp : public OpaqueOp { public: Conv2dOp(const std::string &op, const std::string &node_name) : OpaqueOp("Conv2D", node_name) {} + + DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override; + TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override; +}; + +class TransposeOp : public OpaqueOp { + public: + TransposeOp(const std::string &op, const std::string &node_name) : OpaqueOp("Transpose", node_name) {} + + DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override; + DFormat InferFormat(const NodePtrList &inputs, const DAttrs &attrs) override; +}; + +class MatMulOp : public OpaqueOp { + public: + MatMulOp(const std::string &op, const std::string &node_name) : OpaqueOp("MatMul", node_name) {} + + DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override; + TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override; +}; + +class PadAkgOp : public OpaqueOp { + public: + PadAkgOp(const std::string &op, const std::string &node_name) : OpaqueOp("PadAkg", node_name) {} + + DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override; +}; + +class UnPadAkgOp : public OpaqueOp { + public: + UnPadAkgOp(const std::string &op, const std::string &node_name) : OpaqueOp("UnPadAkg", node_name) {} + + DShape InferShape(const NodePtrList &inputs, const DAttrs &attrs) override; +}; + +class CImagOp : public ElemwiseOp { + public: + CImagOp(const std::string &op, const std::string &node_name) : ElemwiseOp("CImag", node_name) {} + + void CheckType(const NodePtrList &inputs, const DAttrs &attrs) override { + if (inputs[0]->type != TypeId::kNumberTypeComplex64) { + throw GKException("CImag's input[0] should be complex64"); + } + }; + + TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override { return TypeId::kNumberTypeFloat32; } +}; + +class CRealOp : public ElemwiseOp { + public: + CRealOp(const std::string &op, const std::string &node_name) : ElemwiseOp("CReal", node_name) {} + + void CheckType(const NodePtrList &inputs, const DAttrs &attrs) override { + if (inputs[0]->type != TypeId::kNumberTypeComplex64) { + throw GKException("CReal's input[0] should be complex64"); + } + }; + + TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override { return TypeId::kNumberTypeFloat32; } +}; + +class ComplexOp : public ElemwiseOp { + public: + ComplexOp(const std::string &op, const std::string &node_name) : ElemwiseOp("Complex", node_name) {} + + void CheckType(const NodePtrList &inputs, const DAttrs &attrs) override; + + TypeId InferType(const NodePtrList &inputs, const DAttrs &attrs) override { return TypeId::kNumberTypeComplex64; } }; } // namespace graphkernel } // namespace opt diff --git a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.cc b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.cc index 47b1d74de2a..8551e59f098 100644 --- a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.cc +++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.cc @@ -47,14 +47,17 @@ std::vector DynamicMemPoolBestFit::AllocContinuousTensorMem(size_t } std::lock_guard locker(mutex_); // Remove the pre-alloc memory. - auto mem_block = FindMemBlock(device_addr); + const auto &mem_block = FindMemBlock(device_addr); MS_EXCEPTION_IF_NULL(mem_block); - auto iter = mem_block->block_all_mem_buf_map_.find(device_addr); + const auto &iter = mem_block->block_all_mem_buf_map_.find(device_addr); if (iter == mem_block->block_all_mem_buf_map_.end()) { MS_LOG(EXCEPTION) << "Can't find the device address[" << device_addr << "]."; } auto mem_buf = iter->second; MS_EXCEPTION_IF_NULL(mem_buf); + if (mem_buf->size_ < total_size) { + MS_LOG(EXCEPTION) << "The size of membuf is less than total_size."; + } auto rest_size = mem_buf->size_ - total_size; (void)mem_block->block_all_mem_buf_map_.erase(iter); // Split the pre-alloc memory into continuous memory by the size list. @@ -79,7 +82,7 @@ size_t DynamicMemPoolBestFit::AlignMemorySize(size_t size) const { } DeviceMemPtr DynamicMemPoolBestFit::FindIdleMemBuf(size_t size) { - auto iter = global_idle_mem_buf_map_.lower_bound(size); + const auto &iter = global_idle_mem_buf_map_.lower_bound(size); if (iter != global_idle_mem_buf_map_.end()) { auto mem_buf = iter->second; MS_EXCEPTION_IF_NULL(mem_buf); @@ -120,7 +123,8 @@ DeviceMemPtr DynamicMemPoolBestFit::AddMemBlockAndMemBuf(size_t size) { mem_alloc_unit_size_ = DYNAMIC_MEM_ALLOC_UNIT_SIZE; auto mem_block = std::make_shared(device_addr, real_alloc_size); MS_EXCEPTION_IF_NULL(mem_block); - auto iter = std::upper_bound(global_mem_block_list_.begin(), global_mem_block_list_.end(), device_addr, CmpMemBlock); + const auto &iter = + std::upper_bound(global_mem_block_list_.begin(), global_mem_block_list_.end(), device_addr, CmpMemBlock); (void)global_mem_block_list_.insert(iter, mem_block); // Add new memory buf auto mem_buf = std::make_shared(device_addr, kMemBufUsed, real_alloc_size); @@ -163,9 +167,12 @@ bool DynamicMemPoolBestFit::IsDivide(size_t tensor_size, size_t mem_buf_size) co void DynamicMemPoolBestFit::DivideMemBuf(size_t size, const DynamicMemBufPtr &mem_buf) { MS_EXCEPTION_IF_NULL(mem_buf); - auto mem_block = FindMemBlock(mem_buf->device_addr_); + const auto &mem_block = FindMemBlock(mem_buf->device_addr_); MS_EXCEPTION_IF_NULL(mem_block); // Divide new memory buf + if (mem_buf->size_ < size) { + MS_LOG(EXCEPTION) << "The size of membuf is less than size."; + } size_t newbuf_size = mem_buf->size_ - size; mem_buf->size_ = size; DeviceMemPtr newbuf_addr = AddressOffset(mem_buf->device_addr_, size); @@ -184,7 +191,8 @@ bool DynamicMemPoolBestFit::CmpMemBlock(const DeviceMemPtr &device_addr, const D DynamicMemBlockPtr DynamicMemPoolBestFit::FindMemBlock(const DeviceMemPtr &device_addr) { MS_EXCEPTION_IF_NULL(device_addr); - auto iter = std::upper_bound(global_mem_block_list_.begin(), global_mem_block_list_.end(), device_addr, CmpMemBlock); + auto &&iter = + std::upper_bound(global_mem_block_list_.begin(), global_mem_block_list_.end(), device_addr, CmpMemBlock); if (iter != global_mem_block_list_.begin()) { return *(--iter); } @@ -194,7 +202,7 @@ DynamicMemBlockPtr DynamicMemPoolBestFit::FindMemBlock(const DeviceMemPtr &devic void DynamicMemPoolBestFit::FreeTensorMem(const DeviceMemPtr &device_addr) { MS_EXCEPTION_IF_NULL(device_addr); std::lock_guard locker(mutex_); - auto mem_block = FindMemBlock(device_addr); + const auto &mem_block = FindMemBlock(device_addr); if (mem_block == nullptr) { // May be destroy the memory pool first, then destroy the address, so this is normal case. MS_LOG(DEBUG) << "Can't find the mem_block of the device address[" << device_addr << "]."; @@ -206,7 +214,7 @@ void DynamicMemPoolBestFit::FreeTensorMem(const DeviceMemPtr &device_addr) { void DynamicMemPoolBestFit::CombineMemBuf(const DynamicMemBlockPtr &mem_block, const DeviceMemPtr &device_addr) { MS_EXCEPTION_IF_NULL(mem_block); MS_EXCEPTION_IF_NULL(device_addr); - auto iter = mem_block->block_all_mem_buf_map_.find(device_addr); + const auto &iter = mem_block->block_all_mem_buf_map_.find(device_addr); if (iter == mem_block->block_all_mem_buf_map_.end()) { MS_LOG(EXCEPTION) << "Can't find the device address[" << device_addr << "]."; } @@ -216,6 +224,9 @@ void DynamicMemPoolBestFit::CombineMemBuf(const DynamicMemBlockPtr &mem_block, c MS_LOG(EXCEPTION) << "Find the mem_buf is not used, mem_buf_address[" << mem_buf->device_addr_ << "]."; } mem_buf->status_ = kMemBufIdle; + if (total_used_mem_statistics_ < mem_buf->size_) { + MS_LOG(EXCEPTION) << "The total used mem size is less than the size of membuf."; + } total_used_mem_statistics_ -= mem_buf->size_; // Combine backward(combine the next_mem_buf to mem_buf) auto next_iter = iter; @@ -254,7 +265,7 @@ void DynamicMemPoolBestFit::CombineMemBuf(const DynamicMemBlockPtr &mem_block, c void DynamicMemPoolBestFit::EraseIdleMemBuf(size_t size, const DeviceMemPtr &device_addr) { MS_EXCEPTION_IF_NULL(device_addr); - auto iter = global_idle_mem_buf_map_.equal_range(size); + auto &&iter = global_idle_mem_buf_map_.equal_range(size); while (iter.first != iter.second) { MS_EXCEPTION_IF_NULL(iter.first->second); // Remove map of the idle memory buf by size and device address @@ -272,7 +283,7 @@ void DynamicMemPoolBestFit::ReleaseDeviceRes() { MS_LOG(INFO) << "The dynamic memory pool total size is " << total_mem_statistics_ << ", total used size is " << total_used_mem_statistics_ << ", used peak size is " << used_mem_peak_statistics_ << "."; for (auto iter = global_mem_block_list_.begin(); iter != global_mem_block_list_.end(); ++iter) { - auto device_addr = (*iter)->device_addr(); + auto &device_addr = (*iter)->device_addr_base_; if (device_addr != nullptr) { if (!FreeDeviceMem(device_addr)) { MS_LOG(EXCEPTION) << "Free device memory[" << device_addr << "] error."; diff --git a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.h b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.h index 6141a9a2711..a90429f9f30 100644 --- a/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.h +++ b/mindspore/ccsrc/backend/optimizer/mem_reuse/mem_dynamic_allocator.h @@ -65,10 +65,13 @@ class DynamicMemBlock { ~DynamicMemBlock() { block_all_mem_buf_map_.clear(); } const DeviceMemPtr &device_addr() const { return device_addr_base_; } size_t size() const { return mem_block_size_; } + + private: + friend class DynamicMemPoolBestFit; + // The map of all memory buf in this memory block by device address. DeviceAddrMapMemBuf block_all_mem_buf_map_; - private: DeviceMemPtr device_addr_base_{nullptr}; size_t mem_block_size_{0}; }; diff --git a/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.cc b/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.cc index c4befc7e8f9..06b7edafb68 100644 --- a/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.cc +++ b/mindspore/ccsrc/backend/optimizer/pass/communication_op_fusion.cc @@ -391,7 +391,7 @@ bool CommunicationOpFusion::DoFusion(const FuncGraphPtr &func_graph, const Commu MS_EXCEPTION_IF_NULL(communication_op_node_item); tuple_getitem->set_abstract(communication_op_node_item->abstract()); if (kernel_graph->IsInternalOutput(communication_op_node_item, 0)) { - kernel_graph->ReplaceInternalOutput(communication_op_node_item, new_communication_op, 0, offset); + kernel_graph->ReplaceInternalOutput(communication_op_node_item, new_communication_op, 0, LongToSize(offset)); } if (!manager->Replace(communication_op_node_item, tuple_getitem)) { MS_LOG(EXCEPTION) << "manager replace node failed"; diff --git a/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.cc b/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.cc index 02316be0e11..b6b48703573 100644 --- a/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.cc +++ b/mindspore/ccsrc/backend/optimizer/pass/convert_const_input_to_attr.cc @@ -46,17 +46,26 @@ const AnfNodePtr ConvertConstInputToAttr::Process(const FuncGraphPtr &, const An return nullptr; } } + auto ms_context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(ms_context); + auto device = ms_context->get_param(MS_CTX_DEVICE_TARGET); if (AnfAlgo::GetCNodeName(cnode) == prim::kPrimGatherD->name()) { - auto ms_context = MsContext::GetInstance(); - MS_EXCEPTION_IF_NULL(ms_context); - if (ms_context->get_param(MS_CTX_DEVICE_TARGET) != kGPUDevice) { + if (device != kGPUDevice) { return nullptr; } } - if (AnfAlgo::IsDynamicShape(cnode) && - DynamicShapeConstInputToAttr.find(AnfAlgo::GetCNodeName(cnode)) == DynamicShapeConstInputToAttr.end()) { - MS_LOG(INFO) << "current node is dynamic shape " << cnode->fullname_with_scope(); - return nullptr; + if (AnfAlgo::IsDynamicShape(cnode)) { + if (device == kGPUDevice) { + if (DynamicShapeConstInputToAttrGPU.find(AnfAlgo::GetCNodeName(cnode)) == DynamicShapeConstInputToAttrGPU.end()) { + MS_LOG(INFO) << "current node is dynamic shape " << cnode->fullname_with_scope(); + return nullptr; + } + } else { + if (DynamicShapeConstInputToAttr.find(AnfAlgo::GetCNodeName(cnode)) == DynamicShapeConstInputToAttr.end()) { + MS_LOG(INFO) << "current node is dynamic shape " << cnode->fullname_with_scope(); + return nullptr; + } + } } ConstInputToAttr(cnode, reg.GetConstInputAttrInfo()); diff --git a/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.cc b/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.cc index 94ec9ed5ca0..c86db4644ce 100644 --- a/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.cc +++ b/mindspore/ccsrc/backend/optimizer/pass/convert_tuple_input_to_dynamic_input.cc @@ -50,7 +50,7 @@ int64_t SplitTupleInputs(const FuncGraphPtr &graph, const AnfNodePtr &tuple_inpu } for (size_t index = 0; index < input_size; ++index) { auto dynamic_input_node = CreatTupleGetItemNode(graph, tuple_input, index); - plant_inputs->emplace_back(dynamic_input_node); + (void)plant_inputs->emplace_back(dynamic_input_node); } return input_size; } diff --git a/mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.cc b/mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.cc index e8f311c04d3..4b04d4de543 100644 --- a/mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.cc +++ b/mindspore/ccsrc/backend/optimizer/pass/optimize_dependence.cc @@ -191,6 +191,9 @@ const AnfNodePtr OptimizeDependence::Process(const FuncGraphPtr &func_graph, con std::vector new_inputs = cnode->inputs(); bool inputs_changed = false; for (auto index : candidate_inputs) { + if (index >= new_inputs.size()) { + MS_LOG(EXCEPTION) << "Index is out of the size of cnode inputs."; + } auto replace_node = GetConvertNode(func_graph, cnode, index); if (replace_node != nullptr) { new_inputs[index] = replace_node; diff --git a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc index 8f71663b0a4..25d38c9bd0d 100644 --- a/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc +++ b/mindspore/ccsrc/backend/session/anf_runtime_algorithm.cc @@ -620,7 +620,7 @@ std::vector AnfRuntimeAlgorithm::GetAllOutputFormats(const AnfNodeP << "#node [" << node->DebugString() << "]" << " trace: " << trace::DumpSourceLines(node); } - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto build_info = kernel_info->select_kernel_build_info(); MS_EXCEPTION_IF_NULL(build_info); @@ -635,7 +635,7 @@ std::vector AnfRuntimeAlgorithm::GetAllInputFormats(const AnfNodePt << "#node [" << node->DebugString() << "]" << " trace: " << trace::DumpSourceLines(node); } - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto build_info = kernel_info->select_kernel_build_info(); MS_EXCEPTION_IF_NULL(build_info); @@ -650,7 +650,7 @@ std::vector AnfRuntimeAlgorithm::GetAllInputDeviceTypes(const AnfNodePtr << "#node [" << node->DebugString() << "]" << " trace: " << trace::DumpSourceLines(node); } - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto build_info = kernel_info->select_kernel_build_info(); MS_EXCEPTION_IF_NULL(build_info); @@ -665,7 +665,7 @@ std::vector AnfRuntimeAlgorithm::GetAllOutputDeviceTypes(const AnfNodePt << "#node [" << node->DebugString() << "]" << " trace: " << trace::DumpSourceLines(node); } - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto build_info = kernel_info->select_kernel_build_info(); MS_EXCEPTION_IF_NULL(build_info); @@ -680,7 +680,7 @@ std::string AnfRuntimeAlgorithm::GetOriginDataFormat(const AnfNodePtr &node) { << "#node [" << node->DebugString() << "]" << " trace: " << trace::DumpSourceLines(node); } - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto build_info = kernel_info->select_kernel_build_info(); MS_EXCEPTION_IF_NULL(build_info); @@ -699,7 +699,7 @@ std::string AnfRuntimeAlgorithm::GetOutputFormat(const AnfNodePtr &node, size_t if (!AnfAlgo::IsRealKernel(node)) { return AnfAlgo::GetPrevNodeOutputFormat(node, output_idx); } - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto build_info = kernel_info->select_kernel_build_info(); MS_EXCEPTION_IF_NULL(build_info); @@ -723,7 +723,7 @@ std::string AnfRuntimeAlgorithm::GetInputFormat(const AnfNodePtr &node, size_t i if (!IsRealKernel(node)) { return GetPrevNodeOutputFormat(node, input_idx); } - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto build_info = kernel_info->select_kernel_build_info(); MS_EXCEPTION_IF_NULL(build_info); @@ -869,7 +869,7 @@ std::string AnfRuntimeAlgorithm::GetInputReshapeType(const AnfNodePtr &node, siz if (!IsRealKernel(node)) { return GetPrevNodeOutputReshapeType(node, input_idx); } - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto build_info = kernel_info->select_kernel_build_info(); MS_EXCEPTION_IF_NULL(build_info); @@ -889,7 +889,7 @@ std::string AnfRuntimeAlgorithm::GetOutputReshapeType(const AnfNodePtr &node, si if (!IsRealKernel(node)) { return GetPrevNodeOutputReshapeType(node, output_idx); } - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto build_info = kernel_info->select_kernel_build_info(); MS_EXCEPTION_IF_NULL(build_info); @@ -943,7 +943,7 @@ TypeId AnfRuntimeAlgorithm::GetOutputDeviceDataType(const AnfNodePtr &node, size if (!IsRealKernel(node)) { return GetPrevNodeOutputDeviceDataType(node, output_idx); } - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto build_info = kernel_info->select_kernel_build_info(); MS_EXCEPTION_IF_NULL(build_info); @@ -966,7 +966,7 @@ TypeId AnfRuntimeAlgorithm::GetInputDeviceDataType(const AnfNodePtr &node, size_ if (!IsRealKernel(node)) { return GetPrevNodeOutputDeviceDataType(node, 0); } - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto build_info = kernel_info->select_kernel_build_info(); MS_EXCEPTION_IF_NULL(build_info); @@ -998,7 +998,7 @@ const DeviceAddress *AnfRuntimeAlgorithm::GetOutputAddr(const AnfNodePtr &node, << " trace: " << trace::DumpSourceLines(node); } } - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto addr = kernel_info->GetOutputAddr(output_idx); if (addr == nullptr) { @@ -1023,7 +1023,7 @@ DeviceAddressPtr AnfRuntimeAlgorithm::GetMutableOutputAddr(const AnfNodePtr &nod } } // Critical path performance optimization: `KernelInfo` is unique subclass of `KernelInfoDevice` - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto addr = kernel_info->GetMutableOutputAddr(output_idx); if (addr == nullptr) { @@ -1046,7 +1046,7 @@ bool AnfRuntimeAlgorithm::OutputAddrExist(const AnfNodePtr &node, size_t output_ return false; } // Critical path performance optimization: `KernelInfo` is unique subclass of `KernelInfoDevice` - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); return kernel_info->OutputAddrExist(output_idx); } @@ -1054,7 +1054,7 @@ bool AnfRuntimeAlgorithm::OutputAddrExist(const AnfNodePtr &node, size_t output_ bool AnfRuntimeAlgorithm::WorkspaceAddrExist(const AnfNodePtr &node, size_t output_idx) { MS_EXCEPTION_IF_NULL(node); // Critical path performance optimization: `KernelInfo` is unique subclass of `KernelInfoDevice` - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); return kernel_info->WorkspaceAddrExist(output_idx); } @@ -1074,7 +1074,7 @@ DeviceAddressPtr AnfRuntimeAlgorithm::GetPrevNodeMutableOutputAddr(const AnfNode // set output device addr of anf_node void AnfRuntimeAlgorithm::SetOutputAddr(const DeviceAddressPtr &addr, size_t output_idx, AnfNode *node) { MS_EXCEPTION_IF_NULL(node); - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); if (!kernel_info->SetOutputAddr(addr, output_idx)) { MS_LOG(EXCEPTION) << "Node " << node->DebugString() << "set adr" << output_idx << " fail." @@ -1085,7 +1085,7 @@ void AnfRuntimeAlgorithm::SetOutputAddr(const DeviceAddressPtr &addr, size_t out // set workspace device addr of anf_node void AnfRuntimeAlgorithm::SetWorkspaceAddr(const DeviceAddressPtr &addr, size_t output_idx, AnfNode *node) { MS_EXCEPTION_IF_NULL(node); - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); if (!kernel_info->SetWorkspaceAddr(addr, output_idx)) { MS_LOG(EXCEPTION) << "Node " << node->DebugString() << "set adr" << output_idx << " fail。" @@ -1096,7 +1096,7 @@ void AnfRuntimeAlgorithm::SetWorkspaceAddr(const DeviceAddressPtr &addr, size_t // get workspace device addr of anf_node DeviceAddress *AnfRuntimeAlgorithm::GetWorkspaceAddr(const AnfNodePtr &node, size_t output_idx) { MS_EXCEPTION_IF_NULL(node); - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto addr = kernel_info->GetWorkspaceAddr(output_idx); if (addr == nullptr) { @@ -1110,7 +1110,7 @@ DeviceAddress *AnfRuntimeAlgorithm::GetWorkspaceAddr(const AnfNodePtr &node, siz // get workspace device mutable addr of anf_node DeviceAddressPtr AnfRuntimeAlgorithm::GetMutableWorkspaceAddr(const AnfNodePtr &node, size_t index) { MS_EXCEPTION_IF_NULL(node); - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto addr = kernel_info->GetMutableWorkspaceAddr(index); if (addr == nullptr) { @@ -1248,7 +1248,7 @@ void AnfRuntimeAlgorithm::CopyAbstract(const AnfNodePtr &from_node, AnfNode *to_ kernel::OpPattern AnfRuntimeAlgorithm::GetOpPattern(const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node); - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); // select_kernel_build_info() has checked whether return pointer is null auto build_info = kernel_info->select_kernel_build_info(); @@ -1259,7 +1259,7 @@ kernel::OpPattern AnfRuntimeAlgorithm::GetOpPattern(const AnfNodePtr &node) { // get KernelBuildType of node, such as ATT,RT,FWK and so on KernelType AnfRuntimeAlgorithm::GetKernelType(const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node); - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); // select_kernel_build_info() has checked whether return pointer is null auto build_info = kernel_info->select_kernel_build_info(); @@ -1287,7 +1287,7 @@ void AnfRuntimeAlgorithm::SetOutputDataDesc(const AnfNodePtr &node, const std::v std::vector AnfRuntimeAlgorithm::GetOutputDataDesc(const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node); - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); if (kernel_info == nullptr) { return {}; } @@ -1300,7 +1300,7 @@ std::vector AnfRuntimeAlgorithm::GetOutputDataDesc(const AnfNode kernel::Processor AnfRuntimeAlgorithm::GetProcessor(const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node); - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto build_info = kernel_info->select_kernel_build_info(); MS_EXCEPTION_IF_NULL(build_info); @@ -1309,7 +1309,7 @@ kernel::Processor AnfRuntimeAlgorithm::GetProcessor(const AnfNodePtr &node) { kernel::FusionType AnfRuntimeAlgorithm::GetFusionType(const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node); - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto build_info = kernel_info->select_kernel_build_info(); if (build_info == nullptr) { @@ -1321,7 +1321,7 @@ kernel::FusionType AnfRuntimeAlgorithm::GetFusionType(const AnfNodePtr &node) { // set select kernel_build_info void AnfRuntimeAlgorithm::SetSelectKernelBuildInfo(const KernelBuildInfoPtr &select_kernel_build_info, AnfNode *node) { MS_EXCEPTION_IF_NULL(node); - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); return kernel_info->set_select_kernel_build_info(select_kernel_build_info); } @@ -1329,7 +1329,7 @@ void AnfRuntimeAlgorithm::SetSelectKernelBuildInfo(const KernelBuildInfoPtr &sel // get select kernel_build_info KernelBuildInfoPtr AnfRuntimeAlgorithm::GetSelectKernelBuildInfo(const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node); - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); return kernel_info->GetMutableSelectKernelBuildInfo(); } @@ -1337,7 +1337,7 @@ KernelBuildInfoPtr AnfRuntimeAlgorithm::GetSelectKernelBuildInfo(const AnfNodePt // get kernelMode KernelMod *AnfRuntimeAlgorithm::GetKernelMod(const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node); - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); return kernel_info->MutableKernelMod(); } @@ -1345,7 +1345,7 @@ KernelMod *AnfRuntimeAlgorithm::GetKernelMod(const AnfNodePtr &node) { // set kernel mod void AnfRuntimeAlgorithm::SetKernelMod(const KernelModPtr &kernel_mod, AnfNode *node) { MS_EXCEPTION_IF_NULL(node); - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); kernel_info->set_kernel_mod(kernel_mod); } @@ -1441,42 +1441,42 @@ bool AnfRuntimeAlgorithm::IsLabelIndexInNode(const AnfNodePtr &node, size_t labe void AnfRuntimeAlgorithm::SetStreamId(uint32_t stream_id, AnfNode *node) { MS_EXCEPTION_IF_NULL(node); - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); kernel_info->set_stream_id(stream_id); } uint32_t AnfRuntimeAlgorithm::GetStreamId(const AnfNodePtr &node) { MS_EXCEPTION_IF_NULL(node); - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); return kernel_info->stream_id(); } void AnfRuntimeAlgorithm::SetStreamDistinctionLabel(uint32_t stream_label, AnfNode *node) { MS_EXCEPTION_IF_NULL(node); - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); kernel_info->set_stream_distinction_label(stream_label); } uint32_t AnfRuntimeAlgorithm::GetStreamDistinctionLabel(const AnfNode *node) { MS_EXCEPTION_IF_NULL(node); - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); return kernel_info->stream_distinction_label(); } void AnfRuntimeAlgorithm::SetGraphId(uint32_t graph_id, AnfNode *node) { MS_EXCEPTION_IF_NULL(node); - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); kernel_info->set_graph_id(graph_id); } uint32_t AnfRuntimeAlgorithm::GetGraphId(const AnfNode *node) { MS_EXCEPTION_IF_NULL(node); - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); return kernel_info->graph_id(); } @@ -1510,7 +1510,7 @@ bool AnfRuntimeAlgorithm::IsFeatureMapOutput(const AnfNodePtr &node) { if (IsPrimitiveCNode(node, prim::kPrimLoad)) { return IsFeatureMapOutput(node->cast()->input(1)); } - auto kernel_info = static_cast(node->kernel_info()); + auto kernel_info = dynamic_cast(node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); return kernel_info->is_feature_map(); } @@ -1575,16 +1575,15 @@ bool AnfRuntimeAlgorithm::IsInplaceNode(const mindspore::AnfNodePtr &kernel, con } bool AnfRuntimeAlgorithm::IsCommunicationOp(const AnfNodePtr &node) { + static const std::set kCommunicationOpNames = {kAllReduceOpName, kAllGatherOpName, kBroadcastOpName, + kReduceScatterOpName, kHcomSendOpName, kReceiveOpName, + kAllToAllVOpName}; MS_EXCEPTION_IF_NULL(node); if (!node->isa()) { return false; } auto kernel_name = AnfAlgo::GetCNodeName(node); - if (kernel_name == kAllReduceOpName || kernel_name == kAllGatherOpName || kernel_name == kBroadcastOpName || - kernel_name == kReduceScatterOpName || kernel_name == kHcomSendOpName || kernel_name == kReceiveOpName) { - return true; - } - return false; + return (kCommunicationOpNames.find(kernel_name) != kCommunicationOpNames.end()); } bool AnfRuntimeAlgorithm::IsFusedCommunicationOp(const AnfNodePtr &node) { @@ -2125,6 +2124,8 @@ void AnfRuntimeAlgorithm::InferShape(const CNodePtr &node, std::mapinput(i + 1); + MS_EXCEPTION_IF_NULL(cnode_input); MS_EXCEPTION_IF_NULL(real_input); if (depend_tensors != nullptr) { auto iter_tensor = depend_tensors->find(i); @@ -2133,24 +2134,29 @@ void AnfRuntimeAlgorithm::InferShape(const CNodePtr &node, std::mapdata_sync(); - real_input->abstract()->set_value(tensor_ptr); + auto real_abs = real_input->abstract(); + if (real_abs->isa()) { + real_input->abstract()->set_value(tensor_ptr); + } else if (real_abs->isa()) { + auto tuple_get_item_index = AnfAlgo::GetTupleGetItemOutIndex(cnode_input->cast()); + auto abstract_tuple = real_abs->cast(); + MS_EXCEPTION_IF_NULL(abstract_tuple); + auto tuple_elements = abstract_tuple->elements()[tuple_get_item_index]; + tuple_elements->set_value(tensor_ptr); + } } } - auto cnode_input = node->input(i + 1); - MS_EXCEPTION_IF_NULL(cnode_input); if (AnfAlgo::CheckPrimitiveType(cnode_input, prim::kPrimTupleGetItem)) { auto base_shape = real_input->Shape(); if (!base_shape->isa()) { MS_LOG(EXCEPTION) << "Node:" << node->DebugString() << " input is a tuple_get_item but real input node shape is not a TupleShape"; } - auto tuple_ptr = base_shape->cast(); - MS_EXCEPTION_IF_NULL(tuple_ptr); - auto tuple_get_item_index = AnfAlgo::GetTupleGetItemOutIndex(cnode_input->cast()); - auto real_shape = tuple_ptr->shape().at(tuple_get_item_index); - auto abstract_tensor = cnode_input->abstract()->cast(); - MS_EXCEPTION_IF_NULL(abstract_tensor); - args_spec_list.emplace_back(std::make_shared(abstract_tensor->element(), real_shape)); + auto abs = real_input->abstract()->cast(); + MS_EXCEPTION_IF_NULL(abs); + auto tuple_get_item_indexk = AnfAlgo::GetTupleGetItemOutIndex(cnode_input->cast()); + auto abs_i = abs->elements()[tuple_get_item_indexk]; + args_spec_list.emplace_back(abs_i); } else if (cnode_input->isa() && AnfAlgo::GetCNodeName(cnode_input) == prim::kPrimReshape->name()) { args_spec_list.emplace_back(cnode_input->abstract()); } else { diff --git a/mindspore/ccsrc/backend/session/ascend_session.cc b/mindspore/ccsrc/backend/session/ascend_session.cc index eae542e2164..5504bd5537a 100644 --- a/mindspore/ccsrc/backend/session/ascend_session.cc +++ b/mindspore/ccsrc/backend/session/ascend_session.cc @@ -1173,8 +1173,12 @@ void AscendSession::DumpSetup(const std::shared_ptr &kernel_graph) void AscendSession::Dump(const std::shared_ptr &kernel_graph) const { MS_LOG(DEBUG) << "Start!"; MS_EXCEPTION_IF_NULL(kernel_graph); - E2eDump::DumpData(kernel_graph.get(), rank_id_); - MS_LOG(DEBUG) << "Finish!"; + bool finish = E2eDump::DumpData(kernel_graph.get(), rank_id_); + if (finish) { + MS_LOG(DEBUG) << "Finish!"; + } else { + MS_LOG(ERROR) << "Dump Data failed!"; + } } void AscendSession::DumpAllGraphs(const std::vector &all_graphs) { diff --git a/mindspore/ccsrc/backend/session/executor.cc b/mindspore/ccsrc/backend/session/executor.cc index 94ba3e605e8..ebe54e57dd6 100644 --- a/mindspore/ccsrc/backend/session/executor.cc +++ b/mindspore/ccsrc/backend/session/executor.cc @@ -380,8 +380,8 @@ void Executor::RunGraphAsync(const SessionPtr &session, const GraphId &graph_id, session->CreateOutputTensors(graph_id, inputs, outputs, &task->tensor_to_node_); // maintain a copy of output vector task->outputs_ = *outputs; - // sync run graph without output tensor(int dataset graph) - if (!TensorInVector(outputs) && !graph->HasPostGraph()) { + // sync run graph without output tensor(int dataset graph) or the graph require gil. + if ((!TensorInVector(outputs) && !graph->HasPostGraph()) || graph->is_need_gil()) { task->sync_run_ = true; RunTask(task, true, true); return; diff --git a/mindspore/ccsrc/backend/session/gpu_session.cc b/mindspore/ccsrc/backend/session/gpu_session.cc index abf3879ded2..6f4c60987c4 100644 --- a/mindspore/ccsrc/backend/session/gpu_session.cc +++ b/mindspore/ccsrc/backend/session/gpu_session.cc @@ -175,6 +175,7 @@ void GPUSession::Optimize(const std::shared_ptr &kernel_graph) { } void GPUSession::HardwareOptimize(const std::shared_ptr &kernel_graph) { + MS_EXCEPTION_IF_NULL(kernel_graph); auto optimizer = std::make_shared(); auto pm = std::make_shared(); pm->AddPass(std::make_shared()); @@ -212,6 +213,7 @@ void GPUSession::RunOpOptimize(const std::shared_ptr &kernel_graph) } void GPUSession::RunOpHardwareOptimize(const std::shared_ptr &kernel_graph) { + MS_EXCEPTION_IF_NULL(kernel_graph); auto optimizer = std::make_shared(); auto pm = std::make_shared(); pm->AddPass(std::make_shared("reduce_precision")); @@ -334,6 +336,7 @@ void GPUSession::LoadInputData(const std::shared_ptr &kernel_graph, #endif auto pk_node = input_node->cast(); auto device_address = AnfAlgo::GetMutableOutputAddr(pk_node, 0); + MS_EXCEPTION_IF_NULL(device_address); auto tensor_address = std::dynamic_pointer_cast(tensor->device_address()); bool need_sync = false; if (ms_context->get_param(MS_CTX_ENABLE_PYNATIVE_INFER)) { @@ -354,7 +357,6 @@ void GPUSession::LoadInputData(const std::shared_ptr &kernel_graph, ms_context->get_param(MS_CTX_EXECUTION_MODE) == kPynativeMode) { tensor->set_device_address(device_address); } - MS_EXCEPTION_IF_NULL(device_address); auto size = UpdateGraphInputAbstract(input_node, tensor); if (!device_address->SyncHostToDevice(trans::GetRuntimePaddingShape(pk_node, 0), size, tensor->data_type(), tensor->data_c())) { @@ -381,7 +383,7 @@ GraphId GPUSession::CompileGraphImpl(NotNull func_graph) { auto root_graph = ConstructKernelGraph(func_graph, &all_graphs); MS_EXCEPTION_IF_NULL(root_graph); if (all_graphs.size() != 1) { - MS_LOG(EXCEPTION) << "Gpu backend does not support multi-graph schedule. graph num" << all_graphs.size(); + MS_LOG(EXCEPTION) << "Gpu backend does not support multi-graph schedule, graph num is " << all_graphs.size(); } // Insert maketuple graph output in case of multi-outputs. // The ConvertTupleOutputToMaketuple pass will insert TupleGetItem. @@ -391,6 +393,7 @@ GraphId GPUSession::CompileGraphImpl(NotNull func_graph) { } GraphId GPUSession::CompileGraphImpl(KernelGraphPtr graph) { + MS_EXCEPTION_IF_NULL(graph); // Prepare ms context info for dump .pb graph auto context_ptr = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context_ptr); @@ -627,6 +630,7 @@ void GPUSession::RunOpImpl(const GraphInfo &graph_info, OpRunInfo *op_run_info, EraseValueNodeTensor(tensors_mask, input_tensors); // wait for allreduce for (auto &tensor : *input_tensors) { + MS_EXCEPTION_IF_NULL(tensor); if (tensor->NeedWaitDevice()) { tensor->WaitDevice(); } diff --git a/mindspore/ccsrc/backend/session/kernel_graph.cc b/mindspore/ccsrc/backend/session/kernel_graph.cc index 77fbdddc7f1..ee2dcab041f 100644 --- a/mindspore/ccsrc/backend/session/kernel_graph.cc +++ b/mindspore/ccsrc/backend/session/kernel_graph.cc @@ -470,7 +470,7 @@ void KernelGraph::CreateKernelInfoFromNewParameter(const CNodePtr &cnode) { } } -void KernelGraph::ResetAssignInputFeaatureMapFlag(const CNodePtr &cnode) const { +void KernelGraph::ResetAssignInputFeatureMapFlag(const CNodePtr &cnode) const { if (kOpAssignKernelNameList.find(AnfAlgo::GetCNodeName(cnode)) == kOpAssignKernelNameList.end()) { MS_LOG(EXCEPTION) << "Only supported to change the node [Assign , AssignSub, AssignAdd] node's input feature map " "flag but got the node :" @@ -482,7 +482,7 @@ void KernelGraph::ResetAssignInputFeaatureMapFlag(const CNodePtr &cnode) const { return; } if (!AnfAlgo::IsFeatureMapOutput(input_node) && AnfAlgo::IsFeatureMapOutput(assign_value_node)) { - auto kernel_info = static_cast(input_node->kernel_info()); + auto kernel_info = dynamic_cast(input_node->kernel_info()); kernel_info->set_feature_map_flag(true); } } @@ -493,7 +493,7 @@ void KernelGraph::SetKernelInfoForNode(const AnfNodePtr &node) const { node->set_kernel_info(kernel_info); if (node->isa()) { if (kOpAssignKernelNameList.find(AnfAlgo::GetCNodeName(node)) != kOpAssignKernelNameList.end()) { - ResetAssignInputFeaatureMapFlag(node->cast()); + ResetAssignInputFeatureMapFlag(node->cast()); } #if defined(__APPLE__) std::vector feature_map_input_indexs; @@ -1347,6 +1347,9 @@ void KernelGraph::SetOptimizerFlag() { for (const auto &cnode : execution_order_) { MS_EXCEPTION_IF_NULL(cnode); auto node_name = AnfAlgo::GetCNodeName(cnode); + if (AnfAlgo::HasNodeAttr(kAttrAsync, cnode) && AnfAlgo::GetNodeAttr(cnode, kAttrAsync)) { + continue; + } if (kOptOperatorSet.find(node_name) != kOptOperatorSet.end()) { has_optimizer_ = true; } else if (node_name.find("Assign") == string::npos) { diff --git a/mindspore/ccsrc/backend/session/kernel_graph.h b/mindspore/ccsrc/backend/session/kernel_graph.h index 0bd1a75f8cd..bc9e2c4de0d 100644 --- a/mindspore/ccsrc/backend/session/kernel_graph.h +++ b/mindspore/ccsrc/backend/session/kernel_graph.h @@ -111,7 +111,7 @@ class KernelGraph : public FuncGraph { CNodePtr NewCNodeWithInfos(const std::vector &inputs, const CNodePtr &ori_cnode = nullptr); void CreateKernelInfoFromNewParameter(const CNodePtr &cnode); CNodePtr NewCNode(const CNodePtr &cnode); - void ResetAssignInputFeaatureMapFlag(const CNodePtr &cnode) const; + void ResetAssignInputFeatureMapFlag(const CNodePtr &cnode) const; ParameterPtr NewParameter(const ParameterPtr ¶meter = nullptr); ParameterPtr NewParameter(const abstract::AbstractBasePtr &abstract); ValueNodePtr NewValueNode(const AbstractBasePtr &abstract, const ValuePtr &value); @@ -341,6 +341,10 @@ class KernelGraph : public FuncGraph { void set_is_all_nop_node(bool is_all_nop_node) { is_all_nop_node_ = is_all_nop_node; } std::map graph_output_map() { return graph_output_to_front_node_map_; } + // The interface to set/get the graph GIL flag. + void set_is_need_gil(bool flag) { is_need_gil_ = flag; } + bool is_need_gil() { return is_need_gil_; } + private: // remove value node form graph bool RemoveValueNodeFromGraph(const ValueNodePtr &value_node); @@ -446,6 +450,9 @@ class KernelGraph : public FuncGraph { // If all the nodes of graph is the nop node. bool is_all_nop_node_{false}; + + // Indicate whether the kernels in the graphs acquire Python GIL. + bool is_need_gil_{false}; }; } // namespace session using KernelGraphPtr = std::shared_ptr; diff --git a/mindspore/ccsrc/backend/session/session_basic.cc b/mindspore/ccsrc/backend/session/session_basic.cc index a470715accc..a204f11c6f4 100644 --- a/mindspore/ccsrc/backend/session/session_basic.cc +++ b/mindspore/ccsrc/backend/session/session_basic.cc @@ -182,7 +182,7 @@ BaseRef CreateNodeOutputTensor(const session::KernelWithIndex &node_output_pair, const std::vector &input_tensors, std::map *tensor_to_node) { auto &node = node_output_pair.first; - int output_index = SizeToInt(node_output_pair.second); + size_t output_index = node_output_pair.second; MS_EXCEPTION_IF_NULL(node); MS_EXCEPTION_IF_NULL(graph); auto tensor_from_input = GetNodeOutputTensorFromInputs(node_output_pair, graph, input_tensors); @@ -435,6 +435,17 @@ void CheckInputTensorShape(const TensorPtr &tensor, const CNodePtr &kernel, size } } } + +void UpdateGraphAquireGilAttr(const NotNull &root_graph) { + for (const auto &cnode : root_graph->execution_order()) { + if (AnfAlgo::CheckPrimitiveType(cnode, prim::kPyFunc)) { + MS_LOG(INFO) << "The Graph require GIL. Graph id: " << root_graph->graph_id(); + root_graph->set_is_need_gil(true); + return; + } + } + return; +} } // namespace GraphId SessionBasic::graph_sum_ = 0; @@ -1103,6 +1114,7 @@ KernelGraphPtr SessionBasic::ConstructKernelGraph(const AnfNodePtrList &lst, con UnifyMindIR(graph); // Update Graph Dynamic Shape Attr UpdateGraphDynamicShapeAttr(NOT_NULL(graph)); + UpdateGraphAquireGilAttr(NOT_NULL(graph)); opt::BackendCommonOptimization(graph); graph->SetInputNodes(); SetInputNodeUsage(graph, manager); @@ -1566,8 +1578,8 @@ void SessionBasic::UpdateOutputs(const std::shared_ptr &kernel_grap if (AnfAlgo::IsDynamicShape(node)) { const auto &updated_shape = AnfAlgo::GetOutputInferShape(node, output_index); ShapeVector int_shape; - std::transform(updated_shape.begin(), updated_shape.end(), std::back_inserter(int_shape), SizeToInt); - tensor->set_shape(int_shape); + (void)std::transform(updated_shape.begin(), updated_shape.end(), std::back_inserter(int_shape), SizeToInt); + (void)tensor->set_shape(int_shape); } if (ms_context->get_param(MS_CTX_EXECUTION_MODE) != kPynativeMode) { tensor->data_sync(false); @@ -1596,8 +1608,18 @@ std::vector SessionBasic::GetInputNeedLockTensors(const Graph if (!graph->has_optimizer()) { return {}; } + auto input_nodes = graph->inputs(); + bool check_monad = false; + if (input_nodes.size() == inputs.size()) { + check_monad = true; + } std::vector result; - for (auto &tensor : inputs) { + for (size_t i = 0; i < inputs.size(); ++i) { + if (check_monad && HasAbstractMonad(input_nodes[i])) { + continue; + } + auto &tensor = inputs[i]; + MS_EXCEPTION_IF_NULL(tensor); if (!tensor->IsGraphOutput()) { result.emplace_back(tensor); } @@ -1868,8 +1890,7 @@ AnfNodePtr GetSupportedInternalNode(const AnfNodePtr &front_node) { constexpr auto kMixTarget = "MixTarget"; constexpr auto kNoTarget = "NoTarget"; -std::string SessionBasic::AddPartialParametersMap(const FuncGraphManagerPtr &front_func_graph_manager, - const AnfNodePtr &partial_node) { +std::string SessionBasic::AddPartialParametersMap(const AnfNodePtr &partial_node) { MS_EXCEPTION_IF_NULL(partial_node); auto iter = partial_target_map_.find(partial_node); if (iter != partial_target_map_.end()) { @@ -1881,11 +1902,12 @@ std::string SessionBasic::AddPartialParametersMap(const FuncGraphManagerPtr &fro MS_EXCEPTION_IF_NULL(partial_graph); auto parameters = partial_graph->parameters(); auto partial_inputs = partial_cnode->inputs(); - if (parameters.size() + 2 != partial_inputs.size()) { + const size_t kNonParameterNum = 2; + if (parameters.size() + kNonParameterNum != partial_inputs.size()) { return kMixTarget; } for (size_t i = 0; i < parameters.size(); ++i) { - partial_parameters_map_[parameters[i]] = partial_inputs[2 + i]; + partial_parameters_map_[parameters[i]] = partial_inputs[kNonParameterNum + i]; } auto graph_nodes = TopoSort(partial_graph->get_return()); std::string graph_target = kNoTarget; @@ -1905,7 +1927,7 @@ std::string SessionBasic::AddPartialParametersMap(const FuncGraphManagerPtr &fro break; } } - (void)partial_target_map_.insert({partial_node, graph_target}); + (void)partial_target_map_.emplace(std::pair(partial_node, graph_target)); return graph_target; } @@ -1937,7 +1959,7 @@ void SessionBasic::HandleInternalOutput(const AnfNodePtr &input_front_node, cons auto users = ExtendNodeUsers(front_func_graph_manager, front_node); for (auto &user : users) { if (AnfAlgo::CheckPrimitiveType(user, prim::kPrimPartial) && kernel_target != kGPUDevice) { - auto partial_target = AddPartialParametersMap(front_func_graph_manager, user); + auto partial_target = AddPartialParametersMap(user); if (partial_target != kNoTarget && partial_target != kernel_target) { unique_target = false; } @@ -2098,9 +2120,6 @@ KernelGraphPtr SessionBasic::NewKernelGraph() { AnfNodePtr SessionBasic::FindPullNode(const AnfNodePtr &push_node, const std::vector &node_list) { MS_EXCEPTION_IF_NULL(push_node); for (auto &node : node_list) { - if (IsPrimitiveCNode(node, prim::kPrimUpdateState)) { - continue; - } if (node != nullptr && node->isa()) { for (auto input : node->cast()->inputs()) { if (push_node == AnfAlgo::VisitKernel(input, 0).first) { diff --git a/mindspore/ccsrc/backend/session/session_basic.h b/mindspore/ccsrc/backend/session/session_basic.h index e20cd762351..ef3b137626e 100644 --- a/mindspore/ccsrc/backend/session/session_basic.h +++ b/mindspore/ccsrc/backend/session/session_basic.h @@ -176,8 +176,7 @@ class SessionBasic : public std::enable_shared_from_this { void HandleInternalOutput(const AnfNodePtr &input_front_node, const AnfNodePtr &backend_node, const FuncGraphManagerPtr &front_func_graph_manager, const std::shared_ptr &backend_graph); - std::string AddPartialParametersMap(const FuncGraphManagerPtr &front_func_graph_manager, - const AnfNodePtr &partial_node); + std::string AddPartialParametersMap(const AnfNodePtr &partial_node); void GetParameterIndex(const KernelGraph *graph, const std::vector &inputs, std::map *parameter_index); void CreateOutputPlaceholder(const KernelGraphPtr &kernel_graph, const std::vector &input_tensors, diff --git a/mindspore/ccsrc/common/trans.cc b/mindspore/ccsrc/common/trans.cc index bed7d3ca87b..59b95d4a0a2 100644 --- a/mindspore/ccsrc/common/trans.cc +++ b/mindspore/ccsrc/common/trans.cc @@ -1013,7 +1013,7 @@ bool NchwTo4D(const FormatArgs &args, void *result) { for (size_t hi = 0; hi < h; hi++) { for (size_t wi = 0; wi < w; wi++) { auto src_idx = ni * c * h * w + ci * h * w + hi * w + wi; - auto dst_idx = 0; + size_t dst_idx = 0; if (args.device_format == kOpFormat_NHWC) { dst_idx = ni * h * w * c + hi * w * c + wi * c + ci; } else if (args.device_format == kOpFormat_HWCN) { @@ -1045,7 +1045,7 @@ bool ToNchw(const FormatArgs &args, void *result) { for (size_t hi = 0; hi < h; hi++) { for (size_t wi = 0; wi < w; wi++) { auto dst_idx = ni * c * h * w + ci * h * w + hi * w + wi; - auto src_idx = 0; + size_t src_idx = 0; if (args.device_format == kOpFormat_NHWC) { src_idx = ni * h * w * c + hi * w * c + wi * c + ci; } else if (args.device_format == kOpFormat_HWCN) { @@ -1801,7 +1801,7 @@ bool NchwFracZTransWithGroups(const FormatArgs &args, void *result, bool to_devi auto c_dim = args.host_shape[kC]; auto h_dim = args.host_shape[kH]; auto w_dim = args.host_shape[kW]; - size_t d_dim = 1; + const size_t d_dim = 1; size_t group_size = LongToSize(groups); auto cin_ori = c_dim; auto cout_ori = n_dim / group_size; diff --git a/mindspore/ccsrc/cxx_api/graph/acl/acl_graph_impl.cc b/mindspore/ccsrc/cxx_api/graph/acl/acl_graph_impl.cc index d7ba761091d..d41370a996e 100644 --- a/mindspore/ccsrc/cxx_api/graph/acl/acl_graph_impl.cc +++ b/mindspore/ccsrc/cxx_api/graph/acl/acl_graph_impl.cc @@ -17,6 +17,7 @@ #include "include/api/context.h" #include "cxx_api/model/acl/model_converter.h" #include "utils/log_adapter.h" +#include "mindspore/core/utils/convert_utils_base.h" namespace mindspore { API_FACTORY_REG(GraphCell::GraphImpl, Ascend310, AclGraphImpl); @@ -33,7 +34,7 @@ AclGraphImpl::~AclGraphImpl() { (void)FinalizeEnv(); } Status AclGraphImpl::Run(const std::vector &inputs, std::vector *outputs) { MS_EXCEPTION_IF_NULL(outputs); - Status ret = Load(device_id_); + Status ret = Load(IntToUint(device_id_)); if (ret != kSuccess) { MS_LOG(ERROR) << "Prepare model resource failed."; return ret; @@ -43,7 +44,7 @@ Status AclGraphImpl::Run(const std::vector &inputs, std::vector AclGraphImpl::GetInputs() { - Status ret = Load(device_id_); + Status ret = Load(IntToUint(device_id_)); if (ret != kSuccess) { MS_LOG(ERROR) << "Prepare model resource failed."; return {}; @@ -53,7 +54,7 @@ std::vector AclGraphImpl::GetInputs() { } std::vector AclGraphImpl::GetOutputs() { - Status ret = Load(device_id_); + Status ret = Load(IntToUint(device_id_)); if (ret != kSuccess) { MS_LOG(ERROR) << "Prepare model resource failed."; return {}; @@ -176,7 +177,7 @@ Status AclGraphImpl::Load(uint32_t device_id) { auto om_data = graph_data->GetOMData(); // init - device_id_ = device_id; + device_id_ = UintToInt(device_id); Status ret = InitEnv(); if (ret != kSuccess) { MS_LOG(ERROR) << "InitEnv failed."; diff --git a/mindspore/ccsrc/cxx_api/graph/acl/model_process.cc b/mindspore/ccsrc/cxx_api/graph/acl/model_process.cc index 2f724fccd34..3f246dacf26 100644 --- a/mindspore/ccsrc/cxx_api/graph/acl/model_process.cc +++ b/mindspore/ccsrc/cxx_api/graph/acl/model_process.cc @@ -19,6 +19,7 @@ #include #include #include "utils/utils.h" +#include "mindspore/core/utils/convert_utils_base.h" namespace mindspore { static DataType TransToApiType(aclDataType data_type) { @@ -157,13 +158,14 @@ Status ModelProcess::InitInputsBuffer() { if (ret != ACL_ERROR_NONE) { MS_LOG(ERROR) << "Get input shape failed"; if (!is_run_on_device_) { - aclrtFree(data_mem_buffer); + (void)aclrtFree(data_mem_buffer); } return kMCDeviceError; } aclDataType data_type = aclmdlGetInputDataType(model_desc_, i); std::vector shape(dims.dims, dims.dims + dims.dimCount); - std::string input_name = aclmdlGetInputNameByIndex(model_desc_, i); + const char *input_name_char = aclmdlGetInputNameByIndex(model_desc_, i); + std::string input_name = (input_name_char == nullptr) ? input_name_char : std::string(); if (input_name.empty()) { MS_LOG(WARNING) << "Get name of input " << i << " failed."; } @@ -175,7 +177,7 @@ Status ModelProcess::InitInputsBuffer() { return kSuccess; } -Status ModelProcess::CreateDataBuffer(void **data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset) { +Status ModelProcess::CreateDataBuffer(void **data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset) const { MS_EXCEPTION_IF_NULL(data_mem_buffer); aclError ret; auto free_data_buffer = [this](void *dataMemBuffer) { @@ -246,7 +248,8 @@ Status ModelProcess::InitOutputsBuffer() { } aclDataType data_type = aclmdlGetOutputDataType(model_desc_, i); std::vector shape(dims.dims, dims.dims + dims.dimCount); - std::string output_name = aclmdlGetOutputNameByIndex(model_desc_, i); + const char *output_name_char = aclmdlGetOutputNameByIndex(model_desc_, i); + std::string output_name = (output_name_char == nullptr) ? output_name_char : std::string(); if (output_name.empty()) { MS_LOG(WARNING) << "Get name of output " << i << " failed."; } @@ -344,7 +347,7 @@ Status ModelProcess::SetBatchSize(const std::vector &inputs) { } auto *p = reinterpret_cast(inputs[inputs.size() - 1].Data().get()); MS_EXCEPTION_IF_NULL(p); - auto dynamicBatchSize = p[0]; + size_t dynamicBatchSize = FloatToSize(p[0]); ret = aclmdlGetInputIndexByName(model_desc_, ACL_DYNAMIC_TENSOR_NAME, &index); if (ret != ACL_ERROR_NONE) { MS_LOG(ERROR) << "get index failed"; @@ -442,7 +445,7 @@ Status ModelProcess::ResetOutputSize() { aclError ret; size_t output_size = aclmdlGetNumOutputs(model_desc_); for (size_t index = 0; index < output_size; index++) { - size_t dims = 1; + int64_t dims = 1; struct aclmdlIODims output_dims; ret = aclmdlGetCurOutputDims(model_desc_, index, &output_dims); if (ret != ACL_ERROR_NONE) { @@ -453,7 +456,7 @@ Status ModelProcess::ResetOutputSize() { dims *= output_dims.dims[i]; } output_type = aclmdlGetOutputDataType(model_desc_, index); - output_infos_[index].buffer_size = dims * aclDataTypeSize(output_type); + output_infos_[index].buffer_size = LongToSize(dims) * aclDataTypeSize(output_type); } return kSuccess; } diff --git a/mindspore/ccsrc/cxx_api/graph/acl/model_process.h b/mindspore/ccsrc/cxx_api/graph/acl/model_process.h index 342170ecbdd..8da78cb1069 100644 --- a/mindspore/ccsrc/cxx_api/graph/acl/model_process.h +++ b/mindspore/ccsrc/cxx_api/graph/acl/model_process.h @@ -60,7 +60,7 @@ class ModelProcess { uint32_t model_id() const { return model_id_; } private: - Status CreateDataBuffer(void **data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset); + Status CreateDataBuffer(void **data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset) const; Status CheckAndInitInput(const std::vector &inputs); Status ConstructTensors(const std::vector &acl_tensor_list, std::vector *tensor_list); Status BuildOutputs(std::vector *outputs); diff --git a/mindspore/ccsrc/cxx_api/model/acl/model_converter.cc b/mindspore/ccsrc/cxx_api/model/acl/model_converter.cc index 517bb9446cb..e25809bfe3f 100644 --- a/mindspore/ccsrc/cxx_api/model/acl/model_converter.cc +++ b/mindspore/ccsrc/cxx_api/model/acl/model_converter.cc @@ -164,7 +164,7 @@ Buffer ModelConverter::LoadMindIR(const FuncGraphPtr &func_graph) { } // receive convert model result from child CreateBufferCall call = [&buffer_ret](size_t msg_len) -> uint8_t * { - buffer_ret.ResizeData(msg_len); + (void)buffer_ret.ResizeData(msg_len); return reinterpret_cast(buffer_ret.MutableData()); }; status = multi_process->ReceiveMsg(call); @@ -179,7 +179,7 @@ Buffer ModelConverter::LoadMindIR(const FuncGraphPtr &func_graph) { // receive original model from parent Buffer model; CreateBufferCall call = [&model](size_t msg_len) -> uint8_t * { - model.ResizeData(msg_len); + (void)model.ResizeData(msg_len); return reinterpret_cast(model.MutableData()); }; auto status = multi_process->ReceiveMsg(call); diff --git a/mindspore/ccsrc/cxx_api/model/acl/model_converter.h b/mindspore/ccsrc/cxx_api/model/acl/model_converter.h index f75d7a14054..e9652a10665 100644 --- a/mindspore/ccsrc/cxx_api/model/acl/model_converter.h +++ b/mindspore/ccsrc/cxx_api/model/acl/model_converter.h @@ -31,6 +31,7 @@ namespace mindspore { class ModelConverter { public: ModelConverter() : options_(nullptr) {} + ~ModelConverter() = default; Buffer LoadMindIR(const FuncGraphPtr &func_graph); @@ -40,9 +41,9 @@ class ModelConverter { transform::DfGraphPtr ConvertFuncGraphToAIR(const FuncGraphPtr &anf_graph); Buffer BuildAirModel(const transform::DfGraphPtr &graph, const std::map &init_options, const std::map &build_options); - AclModelOptions *options_; - Buffer LoadAscendIRInner(const Buffer &model_data); + + AclModelOptions *options_; }; } // namespace mindspore #endif // MINDSPORE_CCSRC_CXXAPI_SESSION_ACL_MODEL_CONVERTER_H diff --git a/mindspore/ccsrc/cxx_api/model/model.cc b/mindspore/ccsrc/cxx_api/model/model.cc index 699d68a1126..f6282fa5177 100644 --- a/mindspore/ccsrc/cxx_api/model/model.cc +++ b/mindspore/ccsrc/cxx_api/model/model.cc @@ -65,14 +65,14 @@ Status Model::Build(GraphCell graph_cell, const std::shared_ptr &model_ return impl_->Build(); } -Status Model::Build(const void *model_data, size_t data_size, ModelType model_type, - const std::shared_ptr &model_context, const Key &dec_key, const std::string &dec_mode) { +Status Model::Build(const void *, size_t, ModelType, const std::shared_ptr &, const Key &, + const std::string &) { MS_LOG(ERROR) << "Unsupported Feature."; return kMCFailed; } -Status Model::Build(const std::string &model_path, ModelType model_type, const std::shared_ptr &model_context, - const Key &dec_key, const std::string &dec_mode) { +Status Model::Build(const std::string &, ModelType, const std::shared_ptr &, const Key &, + const std::string &) { MS_LOG(ERROR) << "Unsupported Feature."; return kMCFailed; } diff --git a/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.cc b/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.cc index 60b6056dca4..909524e4004 100644 --- a/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.cc +++ b/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.cc @@ -25,14 +25,14 @@ namespace mindspore { namespace { -uint64_t kSharedMemorySize = 100ull << 20; // 100 MB +constexpr uint64_t kSharedMemorySize = 100ull << 20; // 100 MB } MultiProcess::MultiProcess() = default; MultiProcess::~MultiProcess() = default; -Status MultiProcess::MainProcess(ProcessFuncCall parent_process, ProcessFuncCall child_process) { +Status MultiProcess::MainProcess(const ProcessFuncCall &parent_process, const ProcessFuncCall &child_process) { MS_EXCEPTION_IF_NULL(parent_process); MS_EXCEPTION_IF_NULL(child_process); Status ret; @@ -61,7 +61,8 @@ Status MultiProcess::MainProcess(ProcessFuncCall parent_process, ProcessFuncCall } constexpr size_t kMsgStructNum = 2; shmat_data_addr_ = shmat_addr_ + sizeof(MessageFlag) * kMsgStructNum; - shmat_data_max_size_ = memory_size_ - (shmat_data_addr_ - shmat_addr_); + shmat_data_max_size_ = + memory_size_ - (reinterpret_cast(shmat_data_addr_) - reinterpret_cast(shmat_addr_)); MS_LOG_INFO << "Shm addr " << (uint64_t)shmat_addr_; if (pid == 0) { ChildProcess(child_process); @@ -85,7 +86,7 @@ Status MultiProcess::MainProcess(ProcessFuncCall parent_process, ProcessFuncCall child_exited = true; break; } - sleep(1); + (void)sleep(1); } if (!child_exited) { MS_LOG(WARNING) << "Child process " << pid << " has been killed but waitpid failed."; @@ -95,7 +96,7 @@ Status MultiProcess::MainProcess(ProcessFuncCall parent_process, ProcessFuncCall return ret; } -Status MultiProcess::ParentProcess(ProcessFuncCall parent_process) { +Status MultiProcess::ParentProcess(const ProcessFuncCall &parent_process) { auto parent_msg = reinterpret_cast(shmat_addr_); auto child_msg = reinterpret_cast(shmat_addr_ + sizeof(MessageFlag)); send_msg_ = parent_msg; @@ -112,12 +113,12 @@ Status MultiProcess::ParentProcess(ProcessFuncCall parent_process) { ret = kMEFailed; } stopped_ = true; - send_msg_->stop = true; + send_msg_->stop = 1; heartbeat_thread.join(); return ret; } -void MultiProcess::ChildProcess(ProcessFuncCall child_process) { +void MultiProcess::ChildProcess(const ProcessFuncCall &child_process) { auto parent_msg = reinterpret_cast(shmat_addr_); auto child_msg = reinterpret_cast(shmat_addr_ + sizeof(MessageFlag)); send_msg_ = child_msg; @@ -138,26 +139,30 @@ void MultiProcess::ChildProcess(ProcessFuncCall child_process) { } Status MultiProcess::SendMsg(const void *buffer, uint64_t msg_len) { + MS_EXCEPTION_IF_NULL(buffer); MS_LOG_INFO << "Start to send message to peer process, msg len " << msg_len; send_msg_->msg_total_len = msg_len; uint64_t cur_offset = 0; while (msg_len > cur_offset) { uint64_t sub_msg_len = std::min(msg_len - cur_offset, shmat_data_max_size_); - + if (sub_msg_len == 0) { + MS_LOG(ERROR) << "Invalid message len " << sub_msg_len; + return kMEFailed; + } auto ret = memcpy_s(shmat_data_addr_, shmat_data_max_size_, static_cast(buffer) + cur_offset, sub_msg_len); if (ret != EOK) { - MS_LOG(INFO) << "memcpy_s failed, ret = " << ret; + MS_LOG(ERROR) << "memcpy_s failed, ret = " << ret; return kMEFailed; } cur_offset += sub_msg_len; send_msg_->msg_len = sub_msg_len; - send_msg_->read_finish_flag = false; - send_msg_->read_ready_flag = true; + send_msg_->read_finish_flag = 0; + send_msg_->read_ready_flag = 1; MS_LOG_INFO << "Send start " << cur_offset << ", msg len " << sub_msg_len << ", total len " << msg_len; while (!send_msg_->read_finish_flag && !peer_stopped_) { - usleep(1000); // 1ms + (void)usleep(1000); // 1ms } if (peer_stopped_) { if (!send_msg_->read_finish_flag) { @@ -171,14 +176,14 @@ Status MultiProcess::SendMsg(const void *buffer, uint64_t msg_len) { return kSuccess; } -Status MultiProcess::ReceiveMsg(CreateBufferCall create_buffer_call) { +Status MultiProcess::ReceiveMsg(const CreateBufferCall &create_buffer_call) { uint64_t cur_offset = 0; uint8_t *msg_buffer = nullptr; uint64_t msg_len = 0; do { MS_LOG_INFO << "Receive start from " << cur_offset; while (!receive_msg_->read_ready_flag && !peer_stopped_) { - usleep(1000); // 1ms + (void)usleep(1000); // 1ms } if (peer_stopped_) { return kMEFailed; @@ -193,8 +198,8 @@ Status MultiProcess::ReceiveMsg(CreateBufferCall create_buffer_call) { return kMEFailed; } cur_offset += receive_msg_->msg_len; - receive_msg_->read_ready_flag = false; - receive_msg_->read_finish_flag = true; + receive_msg_->read_ready_flag = 0; + receive_msg_->read_finish_flag = 1; MS_LOG_INFO << "Receive end, current length " << cur_offset << ", total length " << msg_len << std::endl; } while (msg_len > cur_offset); return kSuccess; @@ -225,7 +230,7 @@ void MultiProcess::HeartbeatThreadFuncInner() { } } send_msg_->heartbeat += 1; - usleep(100000); // sleep 100 ms + (void)usleep(100000); // sleep 100 ms } } } // namespace mindspore diff --git a/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.h b/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.h index 8958c13e625..e120fa021b3 100644 --- a/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.h +++ b/mindspore/ccsrc/cxx_api/model/model_converter_utils/multi_process.h @@ -39,9 +39,9 @@ class MultiProcess { MultiProcess(); ~MultiProcess(); - Status MainProcess(ProcessFuncCall parent_process, ProcessFuncCall child_process); + Status MainProcess(const ProcessFuncCall &parent_process, const ProcessFuncCall &child_process); Status SendMsg(const void *buffer, uint64_t msg_len); - Status ReceiveMsg(CreateBufferCall create_buffer_call); + Status ReceiveMsg(const CreateBufferCall &create_buffer_call); private: uint8_t *shmat_addr_ = nullptr; @@ -56,8 +56,8 @@ class MultiProcess { static void HeartbeatThreadFunc(MultiProcess *multi_process); void HeartbeatThreadFuncInner(); - Status ParentProcess(ProcessFuncCall parent_process); - void ChildProcess(ProcessFuncCall child_process); + Status ParentProcess(const ProcessFuncCall &parent_process); + void ChildProcess(const ProcessFuncCall &child_process); }; } // namespace mindspore #endif // MINDSPORE_CCSRC_CXXAPI_MULTI_PROCESS_H diff --git a/mindspore/ccsrc/cxx_api/model/model_converter_utils/shared_memory.h b/mindspore/ccsrc/cxx_api/model/model_converter_utils/shared_memory.h index 5200a2d26d6..e49d3167f21 100644 --- a/mindspore/ccsrc/cxx_api/model/model_converter_utils/shared_memory.h +++ b/mindspore/ccsrc/cxx_api/model/model_converter_utils/shared_memory.h @@ -26,9 +26,11 @@ class SharedMemory { Status Attach(); void Detach(); void Destroy(); - uint8_t *GetSharedMemoryAddr() { return shmat_addr_; } private: + friend class MultiProcess; + uint8_t *GetSharedMemoryAddr() { return shmat_addr_; } + int shm_id_ = -1; uint8_t *shmat_addr_ = nullptr; }; diff --git a/mindspore/ccsrc/cxx_api/types.cc b/mindspore/ccsrc/cxx_api/types.cc index 0f4a25dd2c2..5448de2d999 100644 --- a/mindspore/ccsrc/cxx_api/types.cc +++ b/mindspore/ccsrc/cxx_api/types.cc @@ -360,25 +360,25 @@ bool MSTensor::IsDevice() const { return impl_->IsDevice(); } -void MSTensor::SetShape(const std::vector &shape) { MS_LOG_EXCEPTION << "Invalid implement."; } +void MSTensor::SetShape(const std::vector &) { MS_LOG_EXCEPTION << "Invalid implement."; } -void MSTensor::SetDataType(enum DataType data_type) { MS_LOG_EXCEPTION << "Invalid implement."; } +void MSTensor::SetDataType(enum DataType) { MS_LOG_EXCEPTION << "Invalid implement."; } -void MSTensor::SetTensorName(const std::string &name) { MS_LOG_EXCEPTION << "Invalid implement."; } +void MSTensor::SetTensorName(const std::string &) { MS_LOG_EXCEPTION << "Invalid implement."; } -void MSTensor::SetAllocator(std::shared_ptr allocator) { MS_LOG_EXCEPTION << "Invalid implement."; } +void MSTensor::SetAllocator(std::shared_ptr) { MS_LOG_EXCEPTION << "Invalid implement."; } std::shared_ptr MSTensor::allocator() const { MS_LOG_EXCEPTION << "Invalid implement."; } -void MSTensor::SetFormat(mindspore::Format format) { MS_LOG_EXCEPTION << "Invalid implement."; } +void MSTensor::SetFormat(mindspore::Format) { MS_LOG_EXCEPTION << "Invalid implement."; } mindspore::Format MSTensor::format() const { MS_LOG_EXCEPTION << "Invalid implement."; } -void MSTensor::SetData(void *data) { MS_LOG_EXCEPTION << "Invalid implement."; } +void MSTensor::SetData(void *) { MS_LOG_EXCEPTION << "Invalid implement."; } std::vector MSTensor::QuantParams() const { MS_LOG_EXCEPTION << "Invalid implement."; } -void MSTensor::SetQuantParams(std::vector quant_params) { MS_LOG_EXCEPTION << "Invalid implement."; } +void MSTensor::SetQuantParams(std::vector) { MS_LOG_EXCEPTION << "Invalid implement."; } Buffer::Buffer() : impl_(std::make_shared()) {} Buffer::Buffer(const void *data, size_t data_len) : impl_(std::make_shared(data, data_len)) {} diff --git a/mindspore/ccsrc/debug/anf_ir_dump.cc b/mindspore/ccsrc/debug/anf_ir_dump.cc index 502af18e916..4248dc7f5f4 100644 --- a/mindspore/ccsrc/debug/anf_ir_dump.cc +++ b/mindspore/ccsrc/debug/anf_ir_dump.cc @@ -596,7 +596,8 @@ void DumpIR(const std::string &filename, const FuncGraphPtr &graph, bool dump_fu std::ofstream fout(realpath.value()); std::ostringstream buffer; if (!fout.is_open()) { - MS_LOG(ERROR) << "Open dump file '" << realpath.value() << "' failed!"; + MS_LOG(ERROR) << "Open dump file '" << realpath.value() << "' failed!" + << " Errno:" << errno << " ErrInfo:" << strerror(errno); return; } @@ -638,7 +639,8 @@ void DumpIRForRDR(const std::string &filename, const FuncGraphPtr &graph, bool d std::ofstream fout(realpath.value()); std::ostringstream buffer; if (!fout.is_open()) { - MS_LOG(ERROR) << "Open dump file '" << realpath.value() << "' failed!"; + MS_LOG(ERROR) << "Open dump file '" << realpath.value() << "' failed!" + << " Errno:" << errno << " ErrInfo:" << strerror(errno); return; } diff --git a/mindspore/ccsrc/debug/anf_ir_utils.cc b/mindspore/ccsrc/debug/anf_ir_utils.cc index 7130fbc7b83..aba493689ed 100644 --- a/mindspore/ccsrc/debug/anf_ir_utils.cc +++ b/mindspore/ccsrc/debug/anf_ir_utils.cc @@ -606,7 +606,8 @@ void AnfExporter::ExportFuncGraph(const std::string &filename, const FuncGraphPt std::ofstream ofs(filename); if (!ofs.is_open()) { - MS_LOG(ERROR) << "Open file '" << filename << "' failed!"; + MS_LOG(ERROR) << "Open file '" << filename << "' failed!" + << " Errno:" << errno << " ErrInfo:" << strerror(errno); return; } diff --git a/mindspore/ccsrc/debug/common.cc b/mindspore/ccsrc/debug/common.cc index 3758b8787a5..876eb32ecf9 100644 --- a/mindspore/ccsrc/debug/common.cc +++ b/mindspore/ccsrc/debug/common.cc @@ -28,7 +28,8 @@ namespace mindspore { std::optional Common::GetRealPath(const std::string &input_path) { if (input_path.length() >= PATH_MAX) { - MS_LOG(EXCEPTION) << "The length of path: " << input_path << " exceeds limit: " << PATH_MAX; + MS_LOG(ERROR) << "The length of path: " << input_path << " exceeds limit: " << PATH_MAX; + return std::nullopt; } auto path_split_pos = input_path.find_last_of('/'); if (path_split_pos == std::string::npos) { @@ -46,7 +47,8 @@ std::optional Common::GetRealPath(const std::string &input_path) { } #if defined(SYSTEM_ENV_POSIX) if (file_name.length() > NAME_MAX) { - MS_LOG(EXCEPTION) << "The length of file name : " << file_name.length() << " exceeds limit: " << NAME_MAX; + MS_LOG(ERROR) << "The length of file name : " << file_name.length() << " exceeds limit: " << NAME_MAX; + return std::nullopt; } if (realpath(common::SafeCStr(prefix_path), real_path) == nullptr) { MS_LOG(ERROR) << "The dir " << prefix_path << " does not exist."; @@ -63,7 +65,8 @@ std::optional Common::GetRealPath(const std::string &input_path) { // input_path is only file_name #if defined(SYSTEM_ENV_POSIX) if (input_path.length() > NAME_MAX) { - MS_LOG(EXCEPTION) << "The length of file name : " << input_path.length() << " exceeds limit: " << NAME_MAX; + MS_LOG(ERROR) << "The length of file name : " << input_path.length() << " exceeds limit: " << NAME_MAX; + return std::nullopt; } if (realpath(common::SafeCStr(input_path), real_path) == nullptr) { MS_LOG(INFO) << "The file " << input_path << " does not exist, it will be created."; @@ -145,8 +148,8 @@ std::optional Common::GetConfigFile(const std::string &env) { bool Common::IsStrLengthValid(const std::string &str, size_t length_limit, const std::string &error_message) { auto len_str = str.length(); if (len_str > length_limit) { - MS_LOG(WARNING) << error_message << "The length is " << str.length() << ", exceeding the limit of " << length_limit - << "."; + MS_LOG(ERROR) << error_message << "The length is " << str.length() << ", exceeding the limit of " << length_limit + << "."; return false; } return true; @@ -198,14 +201,16 @@ bool Common::IsPathValid(const std::string &path, size_t length_limit, const std return false; } - if (!std::all_of(path.begin(), path.end(), - [](char c) { return ::isalpha(c) || ::isdigit(c) || c == '-' || c == '_' || c == '/'; })) { - MS_LOG(WARNING) << err_msg << "The path only supports alphabets, digit or {'-', '_', '/'}, but got:" << path << "."; + if (!std::all_of(path.begin(), path.end(), [](char c) { + return ::isalpha(c) || ::isdigit(c) || c == '-' || c == '_' || c == '.' || c == '/'; + })) { + MS_LOG(ERROR) << err_msg << "The path only supports alphabets, digit or {'-', '_', '.', '/'}, but got:" << path + << "."; return false; } if (path[0] != '/') { - MS_LOG(WARNING) << err_msg << "The path only supports absolute path and should start with '/'."; + MS_LOG(ERROR) << err_msg << "The path only supports absolute path and should start with '/'."; return false; } @@ -229,11 +234,10 @@ bool Common::IsFilenameValid(const std::string &filename, size_t length_limit, c if (!IsStrLengthValid(filename, length_limit, err_msg)) { return false; } - - if (!std::all_of(filename.begin(), filename.end(), - [](char c) { return ::isalpha(c) || ::isdigit(c) || c == '-' || c == '_' || c == '.'; })) { - MS_LOG(WARNING) << err_msg << "The filename only supports alphabets, digit or {'-', '_', '.'}, but got:" << filename - << "."; + auto func = [](char c) { return ::isalpha(c) || ::isdigit(c) || c == '-' || c == '_' || c == '.'; }; + if (!std::all_of(filename.begin(), filename.end(), func)) { + MS_LOG(ERROR) << err_msg << "The filename only supports alphabets, digit or {'-', '_', '.'}, but got:" << filename + << "."; return false; } return true; @@ -274,7 +278,8 @@ bool Common::SaveStringToFile(const std::string filename, const std::string stri ofs.open(real_path.value()); if (!ofs.is_open()) { - MS_LOG(ERROR) << "Open dump file '" << real_path.value() << "' failed!"; + MS_LOG(ERROR) << "Open dump file '" << real_path.value() << "' failed!" + << " Errno:" << errno << " ErrInfo:" << strerror(errno); return false; } ofs << string_info << std::endl; @@ -300,16 +305,19 @@ struct GlogLogDirRegister { std::string log_dir_str = std::string(log_dir); auto real_log_dir_str = Common::GetRealPath(log_dir_str); - // While 'GLOG_logtostderr' = 0, logs output to files. - // 'GLOG_log_dir' must be specified as the path of log files. + // While 'GLOG_logtostderr' = 0, logs output to files. 'GLOG_log_dir' must be specified as the path of log files. + // Here can not throw exception and use python to catch, because the PYBIND11_MODULE is not yet been initialed. if (logtostderr_str == "0" && real_log_dir_str.has_value()) { if (!Common::IsPathValid(real_log_dir_str.value(), MAX_DIRECTORY_LENGTH, "")) { - MS_LOG(EXCEPTION) << "The path of log files, set by 'GLOG_log_dir', is invalid"; + MS_LOG(ERROR) << "The path of log files, which set by 'GLOG_log_dir', is invalid"; + exit(EXIT_FAILURE); } else if (!Common::CreateNotExistDirs(real_log_dir_str.value())) { - MS_LOG(EXCEPTION) << "Create the path of log files, set by 'GLOG_log_dir', failed."; + MS_LOG(ERROR) << "Create the path of log files, which set by 'GLOG_log_dir', failed."; + exit(EXIT_FAILURE); } } else if (logtostderr_str == "0") { - MS_LOG(EXCEPTION) << "The path of log files, set by 'GLOG_log_dir', is invalid."; + MS_LOG(ERROR) << "The path of log files, which set by 'GLOG_log_dir', is invalid."; + exit(EXIT_FAILURE); } } } diff --git a/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc b/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc index c62716b4a9d..59f3864dc4c 100644 --- a/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc +++ b/mindspore/ccsrc/debug/data_dump/dump_json_parser.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -60,8 +60,8 @@ std::string GetIfstreamString(const std::ifstream &ifstream) { } bool DumpJsonParser::IsDumpEnabled() { - auto config_path = std::getenv(kMindsporeDumpConfig); - if (config_path == nullptr) { + auto config_path = common::GetEnv(kMindsporeDumpConfig); + if (config_path.empty()) { return false; } MS_LOG(INFO) << "Dump config path is " << config_path; @@ -90,9 +90,14 @@ void DumpJsonParser::Parse() { MS_LOG(EXCEPTION) << "Get dump config file failed"; } - std::ifstream json_file(dump_config_file.value()); + auto dump_file_realpath = Common::GetRealPath(dump_config_file.value()); + if (!dump_file_realpath.has_value()) { + MS_LOG(EXCEPTION) << "Get real path failed in Parse."; + } + std::ifstream json_file(dump_file_realpath.value()); if (!json_file.is_open()) { - MS_LOG(EXCEPTION) << "Dump file:" << dump_config_file.value() << " open failed."; + MS_LOG(EXCEPTION) << "Dump file:" << dump_config_file.value() << " open failed." + << " Errno:" << errno << " ErrInfo:" << strerror(errno); } nlohmann::json j; @@ -100,6 +105,7 @@ void DumpJsonParser::Parse() { json_file >> j; } catch (nlohmann::json::parse_error &e) { MS_LOG(ERROR) << "Dump json contents:" << GetIfstreamString(json_file); + json_file.close(); MS_LOG(EXCEPTION) << "Parse dump json failed, error:" << e.what(); } @@ -107,6 +113,7 @@ void DumpJsonParser::Parse() { std::stringstream ss; ss << j; std::string cfg = ss.str(); + json_file.close(); MS_LOG(INFO) << "Dump json:" << cfg; ParseE2eDumpSetting(j); @@ -128,13 +135,14 @@ void DumpJsonParser::CopyJsonToDir(uint32_t rank_id) { auto realpath = Common::GetRealPath(path_ + "/rank_" + std::to_string(rank_id) + "/.dump_metadata/data_dump.json"); if (!realpath.has_value()) { MS_LOG(ERROR) << "Get real path failed in CopyJsonDir."; + } else { + const std::string file_path = realpath.value(); + ChangeFileMode(file_path, S_IWUSR); + std::ofstream json_copy(file_path); + json_copy << json_file.rdbuf(); + json_copy.close(); + ChangeFileMode(file_path, S_IRUSR); } - const std::string file_path = realpath.value(); - ChangeFileMode(file_path, S_IWUSR); - std::ofstream json_copy(file_path); - json_copy << json_file.rdbuf(); - json_copy.close(); - ChangeFileMode(file_path, S_IRUSR); } } @@ -176,7 +184,7 @@ void DumpJsonParser::CopyMSCfgJsonToDir(uint32_t rank_id) { auto context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(context); ms_info["device_target"] = context->get_param(MS_CTX_DEVICE_TARGET); - ms_info["ms_version"] = "1.3.0"; + ms_info["ms_version"] = "1.4.0"; const std::string file_path = realpath.value(); ChangeFileMode(file_path, S_IWUSR); std::ofstream json_create(file_path); @@ -204,7 +212,8 @@ bool DumpJsonParser::DumpToFile(const std::string &filename, const void *data, s ChangeFileMode(file_path, S_IWUSR); std::ofstream fd(file_path, std::ios::out | std::ios::trunc | std::ios::binary); if (!fd.is_open()) { - MS_LOG(ERROR) << "Open file " << file_path << " failed."; + MS_LOG(ERROR) << "Open file " << file_path << " failed." + << " Errno:" << errno << " ErrInfo:" << strerror(errno); return false; } std::string npy_header = GenerateNpyHeader(shape, type); diff --git a/mindspore/ccsrc/debug/debug_services.cc b/mindspore/ccsrc/debug/debug_services.cc index 5ed077f4ff1..1f543f86376 100644 --- a/mindspore/ccsrc/debug/debug_services.cc +++ b/mindspore/ccsrc/debug/debug_services.cc @@ -39,6 +39,10 @@ namespace mindspore { DebugServices::DebugServices() { tensor_loader_ = std::make_shared(); } DebugServices::DebugServices(const DebugServices &other) { + wp_id_cache = other.wp_id_cache; + net_name = other.net_name; + dump_dir = other.dump_dir; + is_sync_mode = other.is_sync_mode; tensor_loader_ = other.tensor_loader_; watchpoint_table = other.watchpoint_table; } @@ -313,14 +317,7 @@ void DebugServices::CheckWatchpoints(std::vector *const name, std:: MS_LOG(INFO) << "tensor list size: " << tensor_list_size; if (tensor_list_size == 0) return; // default value for number of threads - int max_thread_num = 32; - auto thread_num = getenv("MS_dbg_num_thread"); - if (thread_num != nullptr) { - max_thread_num = std::stoi(thread_num); - } - if (max_thread_num > tensor_list_size) { - max_thread_num = tensor_list_size; - } + const int max_thread_num = 32; MS_LOG(INFO) << "Number of threads used for checkwatchpoint: " << max_thread_num; int chunk_size = tensor_list_size / max_thread_num; int remainder = tensor_list_size % max_thread_num; @@ -355,8 +352,7 @@ void DebugServices::CheckWatchpoints(std::vector *const name, std:: tensor_future_vec[i].wait(); tensor_future_vec[i].get(); for (unsigned int j = 0; j < chunk_exec_orders[i].size(); j++) { - std::vector::iterator iter; - iter = std::lower_bound(exec_order.begin(), exec_order.end(), chunk_exec_orders[i][j]); + std::vector::iterator iter = std::lower_bound(exec_order.begin(), exec_order.end(), chunk_exec_orders[i][j]); // if the execution order is repeated,inserts the new one before the others with same execution order. int position = iter - exec_order.begin(); exec_order.insert(iter, chunk_exec_orders[i][j]); @@ -399,7 +395,8 @@ void DebugServices::ReadTensorFromNpy(const std::string &file_name, std::string MS_LOG(INFO) << "Reading in file: " << file_path; infile.open(file_path.c_str(), std::ios::ate | std::ios::binary | std::ios::in); if (!infile.is_open()) { - MS_LOG(ERROR) << "Failed to open file (In ReadTensorFromNpy) " << file_path; + MS_LOG(ERROR) << "Failed to open file (In ReadTensorFromNpy) " << file_path << " Errno:" << errno + << " ErrInfo:" << strerror(errno); return; } uint64_t file_size = infile.tellg(); @@ -409,11 +406,18 @@ void DebugServices::ReadTensorFromNpy(const std::string &file_name, std::string MS_LOG(ERROR) << "Failed to read file (In ReadTensorFromNpy) " << file_path; return; } - constexpr int header_len_offset = 8; + const int substr_len = 2; + const int header_len_offset = 8; + const int header_offset = 9; + const int type_offset = 10; uint16_t header_len = *reinterpret_cast(buffer->data() + header_len_offset); - std::string header(buffer->data() + header_len_offset + 1, header_len); - std::size_t type_i = header.find("descr") + 10; - *tensor_type = header.substr(type_i, 2); + std::string header(buffer->data() + header_offset, header_len); + std::size_t type_i = header.find("descr") + type_offset; + if (header.length() < type_i + substr_len) { + MS_LOG(ERROR) << "Cannot get tensor_type, header length is " << header.length(); + return; + } + *tensor_type = header.substr(type_i, substr_len); std::size_t shape_i_open = header.find("("); std::size_t shape_i_close = header.find(")"); std::string shape_str = header.substr(shape_i_open + 1, shape_i_close - shape_i_open - 1); @@ -426,7 +430,7 @@ void DebugServices::ReadTensorFromNpy(const std::string &file_name, std::string std::size_t word_size = std::stoul(std::string(1, (*tensor_type)[1])); std::size_t data_len = std::accumulate(shape->begin(), shape->end(), 1, std::multiplies()); std::size_t data_size = data_len * word_size; - infile.seekg(header_len + 10); + infile.seekg(header_len + type_offset); *data_buffer = new std::vector(data_size); if (data_buffer == nullptr || !infile.read((*data_buffer)->data(), data_size)) { MS_LOG(ERROR) << "Unable to get tensor data from npy"; @@ -479,25 +483,29 @@ void DebugServices::ConvertToHostFormat(const std::mapd_type == DT_REG) { - std::string candidate = dir->d_name; - for (const std::string &file_to_find : files_to_convert_in_dir) { - std::string file_n = file_to_find.substr(file_to_find.find_last_of("\\/") + 1); - if (candidate.find(file_n) != std::string::npos && candidate.rfind(file_format) != std::string::npos) { - // we found a converted file for this op - std::string found_file = dump_key + "/" + candidate; - if (std::find(result_list->begin(), result_list->end(), found_file) == result_list->end()) { - result_list->push_back(found_file); - } + std::string abspath = RealPath(dump_key); + DIR *d_handle = opendir(abspath.c_str()); + if (d_handle == nullptr) { + MS_LOG(ERROR) << "Directory does not exit in ConvertToHostFormat."; + return; + } + struct dirent *dir = nullptr; + while ((dir = readdir(d_handle)) != NULL) { + if (dir->d_type == DT_REG) { + std::string candidate = dir->d_name; + for (const std::string &file_to_find : files_to_convert_in_dir) { + std::string file_n = file_to_find.substr(file_to_find.find_last_of("\\/") + 1); + if (candidate.find(file_n) != std::string::npos && candidate.rfind(file_format) != std::string::npos) { + // we found a converted file for this op + std::string found_file = dump_key + "/" + candidate; + if (std::find(result_list->begin(), result_list->end(), found_file) == result_list->end()) { + result_list->push_back(found_file); } } } } } + closedir(d_handle); } } } @@ -552,9 +560,12 @@ void DebugServices::ConvertReadTensors(std::vector backend_name, st std::to_string(root_graph_id[i]) + "/" + IterationString(iteration[i]); // search files in dir for the one that meets the filename prefix and read the file into memory - DIR *d; - d = opendir(specific_dump_dir.c_str()); - if (d != nullptr) { + std::string abspath = RealPath(specific_dump_dir); + DIR *d = opendir(abspath.c_str()); + if (d == nullptr) { + MS_LOG(ERROR) << "Directory does not exist in ConvertReadTensors."; + return; + } else { struct dirent *dir = nullptr; while ((dir = readdir(d)) != NULL) { if (dir->d_type == DT_REG) { @@ -575,8 +586,8 @@ void DebugServices::ConvertReadTensors(std::vector backend_name, st } } } + closedir(d); } - closedir(d); } ConvertToHostFormat(dir_to_files_map, result_list); } @@ -590,9 +601,12 @@ void DebugServices::ConvertWatchPointNodes(const std::vector(node); dump_name = dump_name.substr(0, dump_name.rfind(".")); // search files in dir for the one that meets the filename prefix and read the file into memory - DIR *d; - d = opendir(specific_dump_dir.c_str()); - if (d != nullptr) { + std::string abspath = RealPath(specific_dump_dir); + DIR *d = opendir(abspath.c_str()); + if (d == nullptr) { + MS_LOG(ERROR) << "Directory " << specific_dump_dir.c_str() << " does not exist in ConvertWatchPointNodes."; + return; + } else { struct dirent *dir = nullptr; while ((dir = readdir(d)) != NULL) { if (dir->d_type == DT_REG) { @@ -613,8 +627,8 @@ void DebugServices::ConvertWatchPointNodes(const std::vector backend_name, std: std::vector shape; uint64_t data_size = 0; if (is_sync_mode) { - DIR *d; - d = opendir(specific_dump_dir.c_str()); + std::string abspath = RealPath(specific_dump_dir); + DIR *d = opendir(abspath.c_str()); bool found_file = false; std::vector matched_paths; - if (d != nullptr) { + if (d == nullptr) { + MS_LOG(ERROR) << "Directory " << specific_dump_dir << " does not exist!"; + } else { struct dirent *dir = nullptr; while ((dir = readdir(d)) != NULL) { if (dir->d_type == DT_REG) { @@ -770,9 +786,8 @@ void DebugServices::ReadDumpedTensor(std::vector backend_name, std: matched_paths.push_back(full_path); found_file = true; } + closedir(d); } - } else { - MS_LOG(INFO) << "Directory " << specific_dump_dir << " does not exist!"; } if (found_file) { @@ -786,7 +801,6 @@ void DebugServices::ReadDumpedTensor(std::vector backend_name, std: type_name, shape, buffer, result_list); MS_LOG(INFO) << "Target tensor has not been found."; } - closedir(d); } else { bool found = false; std::vector matched_paths; @@ -895,9 +909,11 @@ std::vector> DebugServices::ReadNeededDumpedTensors( } if (is_sync_mode) { // search files in dir for the one that meets the filename prefix and read the file into memory - DIR *d; - d = opendir(specific_dump_dir.c_str()); - if (d != nullptr) { + std::string abspath = RealPath(specific_dump_dir); + DIR *d = opendir(abspath.c_str()); + if (d == nullptr) { + MS_LOG(ERROR) << "Directory " << specific_dump_dir.c_str() << " does not exist in ReadNeededDumpedTensors."; + } else { struct dirent *dir = nullptr; while ((dir = readdir(d)) != NULL) { if (dir->d_type == DT_REG) { @@ -924,6 +940,7 @@ std::vector> DebugServices::ReadNeededDumpedTensors( } } } + closedir(d); } } else { GetTensorDataInfoAsync(proto_to_dump, specific_dump_dir, iteration, device_id, root_graph_id, *async_file_pool, @@ -985,7 +1002,7 @@ bool DebugServices::IsWatchPoint(const std::string &kernel_name, const CNodePtr } bool DebugServices::IsWatchPointNodeInput(const std::string &w_name, const CNodePtr &kernel) const { - if (kernel) { + if (kernel && w_name.length() > 0) { auto input_size = AnfAlgo::GetInputTensorNum(kernel); for (size_t j = 0; j < input_size; ++j) { auto input_kernel = kernel->input(j + 1); @@ -1095,8 +1112,11 @@ bool DebugServices::CheckOpOverflow(std::string node_name_to_find, unsigned int MS_LOG(INFO) << "Processing bin file path " << overflow_bin_path; - DIR *d = opendir(overflow_bin_path.c_str()); - if (d != nullptr) { + std::string abspath = RealPath(overflow_bin_path); + DIR *d = opendir(abspath.c_str()); + if (d == nullptr) { + MS_LOG(ERROR) << "OverFlow bin directory does not exist!"; + } else { struct dirent *dir = nullptr; while ((dir = readdir(d)) != nullptr) { if (dir->d_type == DT_REG) { @@ -1108,8 +1128,8 @@ bool DebugServices::CheckOpOverflow(std::string node_name_to_find, unsigned int std::ifstream infile; infile.open(file_path.c_str(), std::ios::ate | std::ios::binary | std::ios::in); if (!infile.is_open()) { - MS_LOG(ERROR) << "Failed to open overflow bin file " << file_name; - MS_LOG(ERROR) << "Error: " << strerror(errno); + MS_LOG(ERROR) << "Failed to open overflow bin file " << file_name << " Errno:" << errno + << " ErrInfo:" << strerror(errno); continue; } @@ -1149,10 +1169,8 @@ bool DebugServices::CheckOpOverflow(std::string node_name_to_find, unsigned int infile.close(); } } - } else { - MS_LOG(INFO) << "OverFlow bin directory does not exist!"; + closedir(d); } - closedir(d); // find the op_names with an overflow hit for (auto &task_stream : task_stream_hit) { diff --git a/mindspore/ccsrc/debug/debugger/debug_grpc.proto b/mindspore/ccsrc/debug/debugger/debug_grpc.proto index e34dce3b2ed..2d3870cc6e4 100644 --- a/mindspore/ccsrc/debug/debugger/debug_grpc.proto +++ b/mindspore/ccsrc/debug/debugger/debug_grpc.proto @@ -27,6 +27,7 @@ service EventListener { rpc SendTensors (stream TensorProto) returns (EventReply) {}; rpc SendWatchpointHits (stream WatchpointHit) returns (EventReply) {}; rpc SendMultiGraphs (stream Chunk) returns (EventReply) {}; + rpc SendHeartbeat (Heartbeat) returns (EventReply) {}; } message Metadata { @@ -136,3 +137,8 @@ message WatchpointHit { int32 id = 3; int32 error_code = 4; } + +message Heartbeat { + string message = 1; + int32 period = 2; +} diff --git a/mindspore/ccsrc/debug/debugger/debugger.cc b/mindspore/ccsrc/debug/debugger/debugger.cc index 9b509f4e729..bddc3c5a2ce 100644 --- a/mindspore/ccsrc/debug/debugger/debugger.cc +++ b/mindspore/ccsrc/debug/debugger/debugger.cc @@ -59,12 +59,14 @@ using debugger::WatchpointHit; namespace mindspore { static constexpr auto g_chunk_size = 1024 * 1024 * 3; +static constexpr int32_t heartbeat_period_second = 30; DebuggerPtr Debugger::debugger_ = nullptr; std::mutex Debugger::instance_lock_; Debugger::Debugger() : grpc_client_(nullptr), debug_services_(nullptr), + heartbeat_thread_(nullptr), device_id_(0), device_target_(""), num_step_(0), @@ -113,7 +115,7 @@ void Debugger::Init(const uint32_t device_id, const std::string device_target) { device_id_ = device_id; MS_LOG(INFO) << "Debugger got device_target: " << device_target; device_target_ = device_target; - version_ = "1.3.0"; + version_ = "1.4.0"; } bool IsTypeDebuggerSupported(TypeId type) { @@ -132,6 +134,7 @@ void Debugger::EnableDebugger() { partial_memory_ = false; grpc_client_ = nullptr; debug_services_ = nullptr; + heartbeat_thread_ = nullptr; // see if dump using debugger backend is enabled bool dump_enabled = CheckDebuggerDumpEnabled(); @@ -147,8 +150,22 @@ void Debugger::EnableDebugger() { } if (debugger_enabled_) { - std::string host = "localhost"; - + // configure grpc host + std::string env_host_str = common::GetEnv("MS_DEBUGGER_HOST"); + std::string host; + if (!env_host_str.empty()) { + if (CheckIp(env_host_str)) { + MS_LOG(INFO) << "Getenv MS_DEBUGGER_HOST: " << env_host_str; + host = env_host_str; + } else { + debugger_enabled_ = false; + MS_EXCEPTION(ValueError) << "Environment variable MS_DEBUGGER_HOST isn't a valid IP address. " + "Please set environment variable MS_DEBUGGER_HOST=x.x.x.x to a valid IP"; + } + } else { + MS_LOG(INFO) << "Environment variable MS_DEBUGGER_HOST doesn't exist. Using default debugger host: localhost"; + host = "localhost"; + } // configure grpc port std::string env_port_str = common::GetEnv("MS_DEBUGGER_PORT"); std::string port; @@ -170,6 +187,8 @@ void Debugger::EnableDebugger() { } // initialize grpc client grpc_client_ = std::make_unique(host, port); + // initialize sending heartbeat + heartbeat_thread_ = std::make_unique([&]() { SendHeartbeat(heartbeat_period_second); }); } debug_services_ = std::make_unique(); } @@ -561,6 +580,38 @@ GraphProto Debugger::GetGraphProto(const KernelGraphPtr &graph_ptr) const { ModelProto model = GetDebuggerFuncGraphProto(graph_ptr); return model.graph(); } + +void Debugger::SendHeartbeat(int32_t period) { + bool heartbeat_enabled_ = true; + int num_heartbeat_fail = 0; + const int max_num_heartbeat_fail = 5; + const int retry_period = 500; + + Heartbeat heartbeat; + heartbeat.set_message("Debugger is alive"); + heartbeat.set_period(heartbeat_period_second); + + bool run_ = CheckDebuggerEnabled() && heartbeat_enabled_; + while (run_) { + EventReply reply = grpc_client_->SendHeartbeat(heartbeat); + + if (reply.status() != reply.OK) { + MS_LOG(ERROR) << "Error: SendHeartbeat failed"; + num_heartbeat_fail++; + if (num_heartbeat_fail >= max_num_heartbeat_fail) { + MS_LOG(ERROR) << "Maximum number of failure for SendHeartbeat reached : exiting training session."; + Exit(); + run_ = false; + } else { + MS_LOG(ERROR) << "Number of consecutive SendHeartbeat fail:" << num_heartbeat_fail; + std::this_thread::sleep_for(std::chrono::milliseconds(retry_period)); + } + } else { + std::this_thread::sleep_for(std::chrono::milliseconds(period * 1000)); + } + } +} + void Debugger::SendGraphAndSuspend(const GraphProto &graph_proto) { if (SendMetadata(true)) { // send graph to Mindinsight server @@ -1120,6 +1171,17 @@ bool Debugger::CheckPort(const std::string &port) const { return true; } +bool Debugger::CheckIp(const std::string &host) const { + std::regex reg_ip( + "(25[0-4]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[1-9])" + "[.](25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])" + "[.](25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])" + "[.](25[0-4]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[1-9])"); + std::smatch smat; + std::string host_str = host; + return std::regex_match(host_str, smat, reg_ip); +} + uint32_t Debugger::GetFirstRunGraphId() const { return rungraph_id_list_.front(); } void Debugger::LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output_index) { diff --git a/mindspore/ccsrc/debug/debugger/debugger.h b/mindspore/ccsrc/debug/debugger/debugger.h index 49e103ea082..9446f96b61d 100644 --- a/mindspore/ccsrc/debug/debugger/debugger.h +++ b/mindspore/ccsrc/debug/debugger/debugger.h @@ -195,6 +195,9 @@ class Debugger : public std::enable_shared_from_this { // serialize graph and get proto GraphProto GetGraphProto(const KernelGraphPtr &graph_ptr) const; + // send heartbeat message to UI once per 30 second by default + void SendHeartbeat(int32_t period); + // send graph and enter command wait loop void SendGraphAndSuspend(const GraphProto &graph_proto); @@ -235,12 +238,16 @@ class Debugger : public std::enable_shared_from_this { // Check if the port is valid bool CheckPort(const std::string &port) const; + // Check if the IP is valid + bool CheckIp(const std::string &host) const; + void LoadSingleAnfnode(const AnfNodePtr &anf_node, const size_t output_index); // class members std::unique_ptr grpc_client_; std::unique_ptr debug_services_; + std::unique_ptr heartbeat_thread_; KernelGraphPtr graph_ptr_; uint32_t device_id_; std::string device_target_; diff --git a/mindspore/ccsrc/debug/debugger/grpc_client.cc b/mindspore/ccsrc/debug/debugger/grpc_client.cc index 9f1607bc5e5..d0bbc51c87a 100644 --- a/mindspore/ccsrc/debug/debugger/grpc_client.cc +++ b/mindspore/ccsrc/debug/debugger/grpc_client.cc @@ -24,6 +24,7 @@ using debugger::EventListener; using debugger::EventReply; using debugger::EventReply_Status_FAILED; using debugger::GraphProto; +using debugger::Heartbeat; using debugger::Metadata; using debugger::TensorProto; using debugger::WatchpointHit; @@ -185,4 +186,18 @@ EventReply GrpcClient::SendWatchpointHits(const std::list &watchp } return reply; } + +EventReply GrpcClient::SendHeartbeat(const Heartbeat &heartbeat) { + EventReply reply; + grpc::ClientContext context; + + grpc::Status status = stub_->SendHeartbeat(&context, heartbeat, &reply); + + if (!status.ok()) { + MS_LOG(ERROR) << "RPC failed: SendHeartbeat"; + MS_LOG(ERROR) << status.error_code() << ": " << status.error_message(); + reply.set_status(EventReply_Status_FAILED); + } + return reply; +} } // namespace mindspore diff --git a/mindspore/ccsrc/debug/debugger/grpc_client.h b/mindspore/ccsrc/debug/debugger/grpc_client.h index 34f3b4badb5..36479edba50 100644 --- a/mindspore/ccsrc/debug/debugger/grpc_client.h +++ b/mindspore/ccsrc/debug/debugger/grpc_client.h @@ -27,6 +27,7 @@ using debugger::Chunk; using debugger::EventListener; using debugger::EventReply; using debugger::GraphProto; +using debugger::Heartbeat; using debugger::Metadata; using debugger::TensorProto; using debugger::WatchpointHit; @@ -60,6 +61,8 @@ class GrpcClient { std::vector ChunkString(std::string str, int graph_size); + EventReply SendHeartbeat(const Heartbeat &heartbeat); + private: std::unique_ptr stub_; }; diff --git a/mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.cc b/mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.cc index f4fd451bec5..eec6addc0fd 100644 --- a/mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.cc +++ b/mindspore/ccsrc/debug/debugger/offline_debug/dbg_services.cc @@ -48,7 +48,7 @@ DbgServices::~DbgServices() { std::string DbgServices::GetVersion() { MS_LOG(INFO) << "get version is called"; - return "1.3.0"; + return "1.4.0"; } int32_t DbgServices::Initialize(std::string net_name, std::string dump_folder_path, bool is_sync_mode) { diff --git a/mindspore/ccsrc/debug/debugger/offline_debug/offline_logger.h b/mindspore/ccsrc/debug/debugger/offline_debug/offline_logger.h index 3b02b06ead8..7edd0cf016c 100644 --- a/mindspore/ccsrc/debug/debugger/offline_debug/offline_logger.h +++ b/mindspore/ccsrc/debug/debugger/offline_debug/offline_logger.h @@ -18,6 +18,8 @@ #include +#define PATH_MAX 4096 + #define MS_LOG(level) MS_LOG_##level #define MS_LOG_INFO static_cast(0), !(DbgLogger::verbose) ? void(0) : DbgLogger(DbgLoggerLvl::INFO) < std::cout @@ -28,8 +30,7 @@ #define MS_LOG_WARNING MS_LOG_INFO -#define MS_LOG_EXCEPTION \ - static_cast(0), !(DbgLogger::verbose) ? void(0) : DbgLogger(DbgLoggerLvl::EXCEPTION) < std::cout +#define MS_LOG_EXCEPTION static_cast(0), DbgLogger(DbgLoggerLvl::EXCEPTION) < std::cout enum DbgLoggerLvl : int { DEBUG = 0, INFO, WARNING, ERROR, EXCEPTION }; @@ -38,17 +39,20 @@ class DbgLogger { explicit DbgLogger(DbgLoggerLvl lvl) : lvl_(lvl) {} ~DbgLogger() = default; void operator<(std::ostream &os) const { - char *dbg_log_path = getenv("OFFLINE_DBG_LOG"); - if (dbg_log_path != NULL) { - FILE *fp; - fp = freopen(dbg_log_path, "a", stdout); + char *dbg_log_path = std::getenv("OFFLINE_DBG_LOG"); + if (dbg_log_path != nullptr) { + char abspath[PATH_MAX]; + if (sizeof(dbg_log_path) > PATH_MAX || NULL == realpath(dbg_log_path, abspath)) { + return; + } + FILE *fp = freopen(abspath, "a", stdout); if (fp == nullptr) { std::cout << "ERROR: DbgLogger could not redirect all stdout to a file"; } } os << std::endl; if (lvl_ == DbgLoggerLvl::EXCEPTION) { - throw; + throw lvl_; } } static bool verbose; diff --git a/mindspore/ccsrc/debug/debugger/proto_exporter.cc b/mindspore/ccsrc/debug/debugger/proto_exporter.cc index 90ba50569df..3db363edcab 100644 --- a/mindspore/ccsrc/debug/debugger/proto_exporter.cc +++ b/mindspore/ccsrc/debug/debugger/proto_exporter.cc @@ -573,7 +573,8 @@ void DumpIRProtoWithSrcInfo(const FuncGraphPtr &func_graph, const std::string &s // write to pb file std::ofstream ofs(realpath.value()); if (!ofs.is_open()) { - MS_LOG(ERROR) << "Open file '" << realpath.value() << "' failed!"; + MS_LOG(ERROR) << "Open file '" << realpath.value() << "' failed!" + << " Errno:" << errno << " ErrInfo:" << strerror(errno); return; } ofs << graph_proto; diff --git a/mindspore/ccsrc/debug/dump_proto.cc b/mindspore/ccsrc/debug/dump_proto.cc index 180f952dfd4..ec6a67ed872 100644 --- a/mindspore/ccsrc/debug/dump_proto.cc +++ b/mindspore/ccsrc/debug/dump_proto.cc @@ -555,7 +555,8 @@ void DumpIRProto(const FuncGraphPtr &func_graph, const std::string &suffix) { // write to pb file std::ofstream ofs(file_path); if (!ofs.is_open()) { - MS_LOG(ERROR) << "Open file '" << file_path << "' failed!"; + MS_LOG(ERROR) << "Open file '" << file_path << "' failed!" + << " Errno:" << errno << " ErrInfo:" << strerror(errno); return; } ofs << GetFuncGraphProtoString(func_graph); diff --git a/mindspore/ccsrc/debug/env_config_parser.cc b/mindspore/ccsrc/debug/env_config_parser.cc index 58f39ed9aad..3a43fcc1238 100644 --- a/mindspore/ccsrc/debug/env_config_parser.cc +++ b/mindspore/ccsrc/debug/env_config_parser.cc @@ -122,7 +122,8 @@ void EnvConfigParser::ParseFromFile() { std::ifstream json_file(config_file_); if (!json_file.is_open()) { MS_LOG(WARNING) << "Env config file:" << config_file_ << " open failed." - << " Please check the config file '" << config_file_ << "' set by 'env_config_path' in context."; + << " Please check the config file '" << config_file_ << "' set by 'env_config_path' in context." + << " Errno:" << errno << " ErrInfo:" << strerror(errno); return; } diff --git a/mindspore/ccsrc/debug/tensor_data.h b/mindspore/ccsrc/debug/tensor_data.h index e6d5acd8218..0a85b10ede6 100644 --- a/mindspore/ccsrc/debug/tensor_data.h +++ b/mindspore/ccsrc/debug/tensor_data.h @@ -171,6 +171,9 @@ class TensorData { this->shape = obj.shape; this->iteration = obj.iteration; this->device_id = obj.device_id; + this->data_ptr = obj.data_ptr; + this->root_graph_id = obj.root_graph_id; + this->is_output = obj.is_output; #ifdef ONLINE_DBG_MODE this->tensor_ptr = obj.tensor_ptr; #endif @@ -194,39 +197,39 @@ class TensorData { void SetSlot(size_t slot) { this->slot = slot; } - char *GetDataPtr() { return data_ptr; } + char *GetDataPtr() const { return this->data_ptr; } void SetDataPtr(char *data_ptr) { this->data_ptr = data_ptr; } uint32_t GetNumElements() { return size / data_type_size; } - uint64_t GetByteSize() { return size; } + uint64_t GetByteSize() const { return this->size; } void SetByteSize(uint64_t size) { this->size = size; } - std::vector GetShape() { return shape; } + std::vector GetShape() const { return this->shape; } void SetShape(std::vector shape) { this->shape = shape; } - unsigned int GetIteration() { return iteration; } + unsigned int GetIteration() const { return this->iteration; } void SetIteration(unsigned int iteration) { this->iteration = iteration; } - unsigned int GetDeviceId() { return device_id; } + unsigned int GetDeviceId() const { return this->device_id; } void SetDeviceId(unsigned int device_id) { this->device_id = device_id; } - unsigned int GetRootGraphId() { return root_graph_id; } + unsigned int GetRootGraphId() const { return this->root_graph_id; } void SetRootGraphId(unsigned int root_graph_id) { this->root_graph_id = root_graph_id; } - DbgDataType GetType() { return data_type; } + DbgDataType GetType() const { return this->data_type; } void SetType(unsigned int type) { ConvertMsToDbgType(type); } void SetType(std::string type_name) { ConvertStringToDbgType(type_name); } - bool GetIsOutput() { return is_output; } + bool GetIsOutput() const { return this->is_output; } void SetIsOutput(bool is_output) { this->is_output = is_output; } diff --git a/mindspore/ccsrc/debug/trace.cc b/mindspore/ccsrc/debug/trace.cc index 1b0e9399448..72d2de8f796 100644 --- a/mindspore/ccsrc/debug/trace.cc +++ b/mindspore/ccsrc/debug/trace.cc @@ -138,7 +138,7 @@ class AnalyzeFailExporter : public AnfExporter { std::map *const apply_map) override; std::string GetNodeType(const AnfNodePtr &nd) override; AbstractBasePtr GetNodeAbstract(const AnfNodePtr &nd); - AnfNodeConfigPtr GetFordwardConfig(const AnfNodeConfigPtr &cfg); + AnfNodeConfigPtr GetForwardConfig(const AnfNodeConfigPtr &cfg); void ProcessFuncGraphCall(const CNodePtr &node, std::string *const op_comment); void OutputStatementComment(std::ofstream &ofs, const CNodePtr &node); std::unordered_map CreateTaggedNodeMap( @@ -157,7 +157,7 @@ std::unordered_map AnalyzeFailExporter::CreateTagge MS_EXCEPTION_IF_NULL(node_config); // Record new config in set. - auto new_config = GetFordwardConfig(node_config); + auto new_config = GetForwardConfig(node_config); if (new_config != node_config) { MS_LOG(DEBUG) << "The node_config is forwarded, old config: " << node_config->ToString() << ", new_config: " << new_config->ToString(); @@ -218,7 +218,7 @@ AbstractBasePtr AnalyzeFailExporter::GetNodeAbstract(const AnfNodePtr &node) { return nullptr; } -AnfNodeConfigPtr AnalyzeFailExporter::GetFordwardConfig(const AnfNodeConfigPtr &cfg) { +AnfNodeConfigPtr AnalyzeFailExporter::GetForwardConfig(const AnfNodeConfigPtr &cfg) { MS_EXCEPTION_IF_NULL(cfg); MS_EXCEPTION_IF_NULL(engine_); AnfNodeConfigPtr cur_cfg = cfg; @@ -242,7 +242,7 @@ void AnalyzeFailExporter::ProcessFuncGraphCall(const CNodePtr &node, std::string try { FuncGraphPtr dummy_call_func_graph = nullptr; auto cfg = engine_->MakeConfig(node, current_context_, dummy_call_func_graph); - cfg = GetFordwardConfig(cfg); + cfg = GetForwardConfig(cfg); cnode = dyn_cast(cfg->node()); } catch (const std::exception &e) { MS_LOG(INFO) << "Exception: " << e.what(); @@ -346,9 +346,16 @@ bool AnalyzeFailExporter::ExportFuncGraph(const std::string &filename, const Tra MS_LOG(DEBUG) << "Node configs is empty"; return false; } - std::ofstream ofs(filename); + auto real_filepath = Common::GetRealPath(filename); + if (!real_filepath.has_value()) { + MS_LOG(ERROR) << "The export ir path: " << filename << " is not illegal."; + return false; + } + ChangeFileMode(real_filepath.value(), S_IWUSR); + std::ofstream ofs(real_filepath.value()); if (!ofs.is_open()) { - MS_LOG(ERROR) << "Open file '" << filename << "' failed!"; + MS_LOG(ERROR) << "Open file '" << real_filepath.value() << "' failed!" + << " Errno:" << errno << " ErrInfo:" << strerror(errno); return false; } @@ -389,6 +396,7 @@ bool AnalyzeFailExporter::ExportFuncGraph(const std::string &filename, const Tra << " internal frames).\n"; } ofs.close(); + ChangeFileMode(real_filepath.value(), S_IRUSR); return true; } diff --git a/mindspore/ccsrc/fl/server/consistent_hash_ring.cc b/mindspore/ccsrc/fl/server/consistent_hash_ring.cc index db3a35087db..1d170e3873d 100644 --- a/mindspore/ccsrc/fl/server/consistent_hash_ring.cc +++ b/mindspore/ccsrc/fl/server/consistent_hash_ring.cc @@ -38,6 +38,8 @@ bool ConsistentHashRing::Erase(uint32_t rank) { for (auto iterator = ring_.begin(); iterator != ring_.end();) { if (iterator->second == rank) { (void)ring_.erase(iterator++); + } else { + iterator++; } } return true; diff --git a/mindspore/ccsrc/fl/server/distributed_count_service.cc b/mindspore/ccsrc/fl/server/distributed_count_service.cc index e3ceb8ae7a9..c28c76b856c 100644 --- a/mindspore/ccsrc/fl/server/distributed_count_service.cc +++ b/mindspore/ccsrc/fl/server/distributed_count_service.cc @@ -103,6 +103,7 @@ bool DistributedCountService::Count(const std::string &name, const std::string & return false; } + MS_ERROR_IF_NULL_W_RET_VAL(report_cnt_rsp_msg, false); CountResponse count_rsp; (void)count_rsp.ParseFromArray(report_cnt_rsp_msg->data(), SizeToInt(report_cnt_rsp_msg->size())); if (!count_rsp.result()) { diff --git a/mindspore/ccsrc/fl/server/executor.cc b/mindspore/ccsrc/fl/server/executor.cc index 460b8dba502..cf87a3513eb 100644 --- a/mindspore/ccsrc/fl/server/executor.cc +++ b/mindspore/ccsrc/fl/server/executor.cc @@ -231,6 +231,9 @@ bool Executor::IsWeightAggrDone(const std::vector ¶m_names) { std::unique_lock lock(mtx); auto ¶m_aggr = param_aggrs_[name]; MS_ERROR_IF_NULL_W_RET_VAL(param_aggr, false); + if (!param_aggr->requires_aggr()) { + continue; + } if (!param_aggr->IsAggregationDone()) { MS_LOG(DEBUG) << "Update model for " << name << " is not done yet."; return false; @@ -265,6 +268,8 @@ std::map Executor::GetModel() { return model; } +const std::vector &Executor::param_names() const { return param_names_; } + bool Executor::Unmask() { #ifdef ENABLE_ARMOUR auto model = GetModel(); @@ -274,7 +279,17 @@ bool Executor::Unmask() { #endif } -const std::vector &Executor::param_names() const { return param_names_; } +void Executor::set_unmasked(bool unmasked) { unmasked_ = unmasked; } + +bool Executor::unmasked() const { + std::string encrypt_type = ps::PSContext::instance()->encrypt_type(); + if (encrypt_type == ps::kPWEncryptType) { + return unmasked_.load(); + } else { + // If the algorithm of pairwise encrypt is not enabled, consider_ unmasked flag as true. + return true; + } +} std::string Executor::GetTrainableParamName(const CNodePtr &cnode) { MS_EXCEPTION_IF_NULL(cnode); diff --git a/mindspore/ccsrc/fl/server/executor.h b/mindspore/ccsrc/fl/server/executor.h index 1ba82d9a852..bc0963cb519 100644 --- a/mindspore/ccsrc/fl/server/executor.h +++ b/mindspore/ccsrc/fl/server/executor.h @@ -93,10 +93,16 @@ class Executor { bool initialized() const; const std::vector ¶m_names() const; + + // The unmasking method for pairwise encrypt algorithm. bool Unmask(); + // The setter and getter for unmasked flag to judge whether the unmasking is completed. + void set_unmasked(bool unmasked); + bool unmasked() const; + private: - Executor() : initialized_(false), aggregation_count_(0), param_names_({}), param_aggrs_({}) {} + Executor() : initialized_(false), aggregation_count_(0), param_names_({}), param_aggrs_({}), unmasked_(false) {} ~Executor() = default; Executor(const Executor &) = delete; Executor &operator=(const Executor &) = delete; @@ -123,9 +129,13 @@ class Executor { // Because ParameterAggregator is not threadsafe, we have to create mutex for each ParameterAggregator so we can // acquire lock before calling its method. std::map parameter_mutex_; + #ifdef ENABLE_ARMOUR armour::CipherUnmask cipher_unmask_; #endif + + // The flag represents the unmasking status. + std::atomic unmasked_; }; } // namespace server } // namespace fl diff --git a/mindspore/ccsrc/fl/server/iteration_timer.cc b/mindspore/ccsrc/fl/server/iteration_timer.cc index 27a98c4191a..780c2ff2f16 100644 --- a/mindspore/ccsrc/fl/server/iteration_timer.cc +++ b/mindspore/ccsrc/fl/server/iteration_timer.cc @@ -40,7 +40,9 @@ void IterationTimer::Start(const std::chrono::milliseconds &duration) { void IterationTimer::Stop() { running_ = false; - monitor_thread_.join(); + if (monitor_thread_.joinable()) { + monitor_thread_.join(); + } } void IterationTimer::SetTimeOutCallBack(const TimeOutCb &timeout_cb) { diff --git a/mindspore/ccsrc/fl/server/kernel/dense_grad_accum_kernel.h b/mindspore/ccsrc/fl/server/kernel/dense_grad_accum_kernel.h index 90368f5c9f8..eb3b5fd3bb8 100644 --- a/mindspore/ccsrc/fl/server/kernel/dense_grad_accum_kernel.h +++ b/mindspore/ccsrc/fl/server/kernel/dense_grad_accum_kernel.h @@ -60,6 +60,8 @@ class DenseGradAccumKernel : public AggregationKernel { MS_LOG(ERROR) << "The inputs number of DenseGradAccumKernel should be 2, but got " << inputs.size(); return false; } + MS_ERROR_IF_NULL_W_RET_VAL(inputs[0], false); + MS_ERROR_IF_NULL_W_RET_VAL(inputs[1], false); MS_ERROR_IF_NULL_W_RET_VAL(inputs[0]->addr, false); MS_ERROR_IF_NULL_W_RET_VAL(inputs[1]->addr, false); diff --git a/mindspore/ccsrc/fl/server/kernel/fed_avg_kernel.h b/mindspore/ccsrc/fl/server/kernel/fed_avg_kernel.h index fa7b4abc172..b201fa83d92 100644 --- a/mindspore/ccsrc/fl/server/kernel/fed_avg_kernel.h +++ b/mindspore/ccsrc/fl/server/kernel/fed_avg_kernel.h @@ -97,6 +97,10 @@ class FedAvgKernel : public AggregationKernel { MS_LOG(ERROR) << "Federated average allreduce failed."; return; } + if (data_size_addr[0] == 0) { + MS_LOG(ERROR) << "After AllReduce, the data size is 0."; + return; + } LocalMetaStore::GetInstance().put_value(kCtxFedAvgTotalDataSize, data_size_addr[0]); for (size_t i = 0; i < weight_size / sizeof(T); i++) { weight_addr[i] /= data_size_addr[0]; @@ -115,6 +119,10 @@ class FedAvgKernel : public AggregationKernel { MS_LOG(ERROR) << "The inputs number of FedAvgKernel should be 4, but got " << inputs.size(); return false; } + MS_ERROR_IF_NULL_W_RET_VAL(inputs[0], false); + MS_ERROR_IF_NULL_W_RET_VAL(inputs[1], false); + MS_ERROR_IF_NULL_W_RET_VAL(inputs[2], false); + MS_ERROR_IF_NULL_W_RET_VAL(inputs[3], false); MS_ERROR_IF_NULL_W_RET_VAL(inputs[0]->addr, false); MS_ERROR_IF_NULL_W_RET_VAL(inputs[1]->addr, false); MS_ERROR_IF_NULL_W_RET_VAL(inputs[2]->addr, false); diff --git a/mindspore/ccsrc/fl/server/kernel/optimizer_kernel.h b/mindspore/ccsrc/fl/server/kernel/optimizer_kernel.h index f744df961f9..98c41ee2f49 100644 --- a/mindspore/ccsrc/fl/server/kernel/optimizer_kernel.h +++ b/mindspore/ccsrc/fl/server/kernel/optimizer_kernel.h @@ -76,7 +76,7 @@ class OptimizerKernel : public CPUKernel { } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); for (size_t output_index = 0; output_index < output_num; ++output_index) { - std::vector shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, output_index); + std::vector shape = AnfAlgo::GetOutputInferShape(kernel_node, output_index); size_t tensor_size = shape.empty() ? type_size : std::accumulate(shape.begin(), shape.end(), type_size, std::multiplies()); output_size_list_.emplace_back(tensor_size); diff --git a/mindspore/ccsrc/fl/server/kernel/round/pull_weight_kernel.cc b/mindspore/ccsrc/fl/server/kernel/round/pull_weight_kernel.cc index 9cb6799489c..07ce238d926 100644 --- a/mindspore/ccsrc/fl/server/kernel/round/pull_weight_kernel.cc +++ b/mindspore/ccsrc/fl/server/kernel/round/pull_weight_kernel.cc @@ -90,7 +90,7 @@ void PullWeightKernel::PullWeight(const std::shared_ptr &fbb, for (size_t i = 0; i < weights_names_fbs->size(); i++) { weight_names.push_back(weights_names_fbs->Get(i)->str()); } - if (!executor_->IsWeightAggrDone(weight_names)) { + if (!executor_->IsWeightAggrDone(weight_names) || !executor_->unmasked()) { ++retry_count_; std::string reason = "The aggregation for the weights is not done yet."; BuildPullWeightRsp(fbb, schema::ResponseCode_SucNotReady, reason, current_iter, feature_maps); diff --git a/mindspore/ccsrc/fl/server/kernel/round/push_weight_kernel.cc b/mindspore/ccsrc/fl/server/kernel/round/push_weight_kernel.cc index f93a6cbfd99..f851b8cf702 100644 --- a/mindspore/ccsrc/fl/server/kernel/round/push_weight_kernel.cc +++ b/mindspore/ccsrc/fl/server/kernel/round/push_weight_kernel.cc @@ -123,7 +123,7 @@ std::map PushWeightKernel::ParseFeatureMap(const schema::R MS_ERROR_IF_NULL_W_RET_VAL(push_weight_req, {}); std::map upload_feature_map; auto fbs_feature_map = push_weight_req->feature_map(); - MS_ERROR_IF_NULL_W_RET_VAL(push_weight_req, upload_feature_map); + MS_ERROR_IF_NULL_W_RET_VAL(fbs_feature_map, upload_feature_map); for (size_t i = 0; i < fbs_feature_map->size(); i++) { std::string weight_full_name = fbs_feature_map->Get(i)->weight_fullname()->str(); float *weight_data = const_cast(fbs_feature_map->Get(i)->data()->data()); diff --git a/mindspore/ccsrc/fl/server/kernel/round/reconstruct_secrets_kernel.cc b/mindspore/ccsrc/fl/server/kernel/round/reconstruct_secrets_kernel.cc index da1d4dc1f08..3cc0e91695c 100644 --- a/mindspore/ccsrc/fl/server/kernel/round/reconstruct_secrets_kernel.cc +++ b/mindspore/ccsrc/fl/server/kernel/round/reconstruct_secrets_kernel.cc @@ -35,9 +35,11 @@ void ReconstructSecretsKernel::InitKernel(size_t required_cnt) { return; } auto last_cnt_handler = [&](std::shared_ptr) { - MS_LOG(INFO) << "start FinishIteration"; - FinishIteration(); - MS_LOG(INFO) << "end FinishIteration"; + if (ps::PSContext::instance()->resetter_round() == ps::ResetterRound::kReconstructSeccrets) { + MS_LOG(INFO) << "start FinishIteration"; + FinishIteration(); + MS_LOG(INFO) << "end FinishIteration"; + } return; }; auto first_cnt_handler = [&](std::shared_ptr) { return; }; @@ -146,6 +148,7 @@ void ReconstructSecretsKernel::OnLastCountEvent(const std::shared_ptr ModelStore::AssignNewModelMemory() { MS_ERROR_IF_NULL_W_RET_VAL(weight_data, nullptr); MS_ERROR_IF_NULL_W_RET_VAL(weight.second, nullptr); MS_ERROR_IF_NULL_W_RET_VAL(weight.second->addr, nullptr); - if (weight_data == nullptr) { - MS_LOG(EXCEPTION) << "Assign memory for weight failed."; - return nullptr; - } auto src_data_size = weight_size; auto dst_data_size = weight_size; diff --git a/mindspore/ccsrc/fl/server/parameter_aggregator.cc b/mindspore/ccsrc/fl/server/parameter_aggregator.cc index cb93808ad24..9a5cf531821 100644 --- a/mindspore/ccsrc/fl/server/parameter_aggregator.cc +++ b/mindspore/ccsrc/fl/server/parameter_aggregator.cc @@ -174,8 +174,14 @@ bool ParameterAggregator::IsOptimizingDone() const { return optimizing_done_; } bool ParameterAggregator::IsPullingDone() const { return pulling_done_; } +bool ParameterAggregator::requires_aggr() const { return requires_aggr_; } + bool ParameterAggregator::InitAggregationKernels(const CNodePtr &cnode) { MS_EXCEPTION_IF_NULL(cnode); + if (!JudgeRequiresAggr(cnode)) { + MS_LOG(WARNING) << "Aggregation for weight for kernel " << AnfAlgo::GetCNodeName(cnode) << " is not required."; + } + std::vector aggr_kernel_names = SelectAggregationAlgorithm(cnode); for (const std::string &name : aggr_kernel_names) { auto aggr_kernel = kernel::AggregationKernelFactory::GetInstance().Create(name, cnode); @@ -333,13 +339,36 @@ std::vector ParameterAggregator::SelectAggregationAlgorithm(const C } else if (ps::PSContext::instance()->server_mode() == ps::kServerModePS) { (void)aggregation_algorithm.emplace_back("DenseGradAccum"); } else { - MS_LOG(ERROR) << "Server doesn't support mode " << ps::PSContext::instance()->server_mode(); + MS_LOG(EXCEPTION) << "Server doesn't support mode " << ps::PSContext::instance()->server_mode(); + return aggregation_algorithm; } MS_LOG(INFO) << "Aggregation algorithm selection result: " << aggregation_algorithm; return aggregation_algorithm; } +bool ParameterAggregator::JudgeRequiresAggr(const CNodePtr &cnode) { + MS_EXCEPTION_IF_NULL(cnode); + std::string cnode_name = AnfAlgo::GetCNodeName(cnode); + if (kNameToIdxMap.count(cnode_name) == 0 || kNameToIdxMap.at(cnode_name).count("inputs") == 0 || + kNameToIdxMap.at(cnode_name).at("inputs").count("weight") == 0) { + MS_LOG(EXCEPTION) << "Can't find index info of weight for kernel " << cnode_name; + return false; + } + size_t cnode_weight_idx = kNameToIdxMap.at(cnode_name).at("inputs").at("weight"); + auto weight_node = AnfAlgo::VisitKernelWithReturnType(AnfAlgo::GetInputNode(cnode, cnode_weight_idx), 0).first; + MS_EXCEPTION_IF_NULL(weight_node); + + if (!weight_node->isa()) { + MS_LOG(EXCEPTION) << weight_node->fullname_with_scope() << " is not a parameter node."; + return false; + } + auto param_info = weight_node->cast()->param_info(); + MS_EXCEPTION_IF_NULL(param_info); + requires_aggr_ = param_info->requires_aggr(); + return requires_aggr_; +} + template bool ParameterAggregator::AssignMemory(std::shared_ptr server_kernel, const CNodePtr &cnode, const ReuseKernelNodeInfo &reuse_kernel_node_inputs_info, diff --git a/mindspore/ccsrc/fl/server/parameter_aggregator.h b/mindspore/ccsrc/fl/server/parameter_aggregator.h index f7f02f7ea07..4fc3fe60f0c 100644 --- a/mindspore/ccsrc/fl/server/parameter_aggregator.h +++ b/mindspore/ccsrc/fl/server/parameter_aggregator.h @@ -57,7 +57,8 @@ class ParameterAggregator { aggregation_done_(false), optimizing_done_(false), pulling_done_(true), - memory_register_(nullptr) {} + memory_register_(nullptr), + requires_aggr_(true) {} ~ParameterAggregator() = default; // Initialize ParameterAggregator with a cnode. This cnode is normally a optimizer kernel for now. @@ -94,6 +95,9 @@ class ParameterAggregator { bool IsOptimizingDone() const; bool IsPullingDone() const; + // Return whether this parameter requires aggragation. + bool requires_aggr() const; + private: // Initializing aggregation/optimizer kenerls based on the cnode. The reason of this is described in the file // kernel/kernel_factory.h. @@ -118,6 +122,9 @@ class ParameterAggregator { // configuration, etc. std::vector SelectAggregationAlgorithm(const CNodePtr &cnode); + // Judge whether the parameter needs to be aggregated. + bool JudgeRequiresAggr(const CNodePtr &cnode); + ServerMode server_mode_; size_t required_push_count_; size_t required_pull_count_; @@ -135,6 +142,9 @@ class ParameterAggregator { // Here stores multiple pairs of server kernels to parameters of their Launch function. std::vector, KernelParams>> aggregation_kernel_parameters_; std::vector, KernelParams>> optimizer_kernel_parameters_; + + // Whether this parameter needs to be aggregated. + bool requires_aggr_; }; } // namespace server } // namespace fl diff --git a/mindspore/ccsrc/fl/server/round.cc b/mindspore/ccsrc/fl/server/round.cc index 2805d27a880..0b578814b29 100644 --- a/mindspore/ccsrc/fl/server/round.cc +++ b/mindspore/ccsrc/fl/server/round.cc @@ -169,12 +169,11 @@ bool Round::check_timeout() const { return check_timeout_; } size_t Round::time_window() const { return time_window_; } void Round::OnFirstCountEvent(const std::shared_ptr &message) { - MS_ERROR_IF_NULL_WO_RET_VAL(message); MS_ERROR_IF_NULL_WO_RET_VAL(kernel_); - MS_ERROR_IF_NULL_WO_RET_VAL(iter_timer_); MS_LOG(INFO) << "Round " << name_ << " first count event is triggered."; // The timer starts only after the first count event is triggered by DistributedCountService. if (check_timeout_) { + MS_ERROR_IF_NULL_WO_RET_VAL(iter_timer_); iter_timer_->Start(std::chrono::milliseconds(time_window_)); } @@ -184,12 +183,11 @@ void Round::OnFirstCountEvent(const std::shared_ptr &m } void Round::OnLastCountEvent(const std::shared_ptr &message) { - MS_ERROR_IF_NULL_WO_RET_VAL(message); MS_ERROR_IF_NULL_WO_RET_VAL(kernel_); - MS_ERROR_IF_NULL_WO_RET_VAL(iter_timer_); MS_LOG(INFO) << "Round " << name_ << " last count event is triggered."; // Same as the first count event, the timer must be stopped by DistributedCountService. if (check_timeout_) { + MS_ERROR_IF_NULL_WO_RET_VAL(iter_timer_); iter_timer_->Stop(); } diff --git a/mindspore/ccsrc/fl/server/server.h b/mindspore/ccsrc/fl/server/server.h index 8566d4f6f2d..bd0a3c6aa68 100644 --- a/mindspore/ccsrc/fl/server/server.h +++ b/mindspore/ccsrc/fl/server/server.h @@ -72,7 +72,15 @@ class Server { scheduler_ip_(""), scheduler_port_(0), server_num_(0), - worker_num_(0) {} + worker_num_(0), + fl_server_port_(0), + cipher_initial_client_cnt_(0), + cipher_exchange_secrets_cnt_(0), + cipher_share_secrets_cnt_(0), + cipher_get_clientlist_cnt_(0), + cipher_reconstruct_secrets_up_cnt_(0), + cipher_reconstruct_secrets_down_cnt_(0), + cipher_time_window_(0) {} ~Server() = default; Server(const Server &) = delete; Server &operator=(const Server &) = delete; diff --git a/mindspore/ccsrc/frontend/optimizer/ad/dfunctor.cc b/mindspore/ccsrc/frontend/optimizer/ad/dfunctor.cc index 86d0bf78cc0..5f35bc96558 100644 --- a/mindspore/ccsrc/frontend/optimizer/ad/dfunctor.cc +++ b/mindspore/ccsrc/frontend/optimizer/ad/dfunctor.cc @@ -909,9 +909,9 @@ CNodePtr GetPrimalUser(const CNodePtr &j_user, const std::map> FindPrimalJPair(const FuncGraphManagerPtr &manager, - const FuncGraphPtr &primal_graph) { - std::vector> primal_j_pair; +static std::unordered_map> FindPrimalJPair(const FuncGraphManagerPtr &manager, + const FuncGraphPtr &primal_graph) { + std::vector j_users; std::map> primal_map; const auto &node_user_map = manager->node_users(); // Search primal graph user cnodes. @@ -930,20 +930,22 @@ static std::vector> FindPrimalJPair(const FuncGrap primal_map[fg] = {cnode}; } else if (IsPrimitive(cnode->inputs().at(0), prim::kPrimJ)) { // To find J user. - auto j_user = GetJUser(node_user_map, cnode, index); - (void)primal_j_pair.emplace_back(std::pair(nullptr, j_user)); + j_users.emplace_back(GetJUser(node_user_map, cnode, index)); } } - for (auto &[primal_user, j_user] : primal_j_pair) { + std::unordered_map> primal_user_to_j_users; + for (const auto &j_user : j_users) { + MS_EXCEPTION_IF_NULL(j_user); auto primal = GetPrimalUser(j_user, primal_map); - if (primal != nullptr) { - MS_LOG(DEBUG) << "Primal_J pair is found, where primal is: " << primal->DebugString() - << " and J user is: " << j_user->DebugString(); - primal_user = primal; + if (primal == nullptr) { + continue; } + MS_LOG(DEBUG) << "Primal_J pair is found, where primal is: " << primal->DebugString() + << " and J user is: " << j_user->DebugString(); + primal_user_to_j_users[primal].emplace_back(j_user); } - return primal_j_pair; + return primal_user_to_j_users; } static void RemovePrimalUpdateStates(const FuncGraphManagerPtr &manager, const CNodePtr &primal_call) { @@ -1007,26 +1009,32 @@ void DFunctor::EliminatePrimalGraph() { // Find primal user and paired J user cnodes. auto manager = primal_graph_->manager(); MS_EXCEPTION_IF_NULL(manager); - auto prim_j_pair = FindPrimalJPair(manager, primal_graph_); - for (auto &[primal_user, j_user] : prim_j_pair) { - if (primal_user == nullptr || j_user == nullptr) { - // Skip if one of them not found. - return; + auto primal_user_to_j_users = FindPrimalJPair(manager, primal_graph_); + for (const auto &iter : primal_user_to_j_users) { + auto primal_user = iter.first; + auto &j_users = iter.second; + MS_EXCEPTION_IF_NULL(primal_user); + if (j_users.size() == 1) { + // If both inputs are same except monads, we copy primal monad args to k graph + // so that they can be combined in CSE (common subexpression elimination) pass. + // Only do this when the size of j_users is 1 in order to keep the execution order. + const bool has_monad = CopyMonadArguments(primal_user, j_users[0]); + // Remove the UpdateState nodes after primal_user if need. + if (has_monad) { + RemovePrimalUpdateStates(manager, primal_user); + } + } else { + MS_LOG(INFO) << "There are multiple j users with the same primal user " << primal_user->DebugString(); } // Replace primal graph with k graph. auto k_vnode = NewValueNode(k_graph_); primal_user->set_input(0, k_vnode); - primal_user->set_abstract(j_user->abstract()); - - // If both inputs are same except monads, we copy primal monad args to k graph - // so that they can be combined in CSE (common subexpression elimination) pass. - const bool has_monad = CopyMonadArguments(primal_user, j_user); - // Remove the UpdateState nodes after primal_user if need. - if (has_monad) { - RemovePrimalUpdateStates(manager, primal_user); + if (j_users.empty()) { + MS_LOG(EXCEPTION) << "The J nodes for primal graph " << primal_graph_->ToString() + << " should be used by at least one other node."; } - + primal_user->set_abstract(j_users[0]->abstract()); // Insert tuple_getitem after primal user cnode. auto construct_wrapper = primal_user->func_graph(); auto tuple_getitem = NewValueNode(prim::kPrimTupleGetItem); diff --git a/mindspore/ccsrc/frontend/optimizer/irpass.cc b/mindspore/ccsrc/frontend/optimizer/irpass.cc index 1dcc6593bc4..478afa46d86 100644 --- a/mindspore/ccsrc/frontend/optimizer/irpass.cc +++ b/mindspore/ccsrc/frontend/optimizer/irpass.cc @@ -186,8 +186,19 @@ OptimizeIRPassLib::OptimizeIRPassLib() { MakeSubstitution(std::make_shared(), "specialize_transform", IsCNodeGraph); // UpdateState eliminate - updatestate_eliminater_ = - MakeSubstitution(std::make_shared(), "updatestate_eliminater", prim::kPrimUpdateState); + updatestate_only_used_node_eliminater_ = + MakeSubstitution(std::make_shared(), "updatestate_only_used_node_eliminater", + prim::kPrimUpdateState); + updatestate_pure_node_eliminater_ = MakeSubstitution(std::make_shared(), + "updatestate_pure_node_eliminater", prim::kPrimUpdateState); + updatestate_depend_eliminater_ = MakeSubstitution(std::make_shared(), + "updatestate_depend_eliminater", prim::kPrimUpdateState); + updatestate_assign_eliminater_ = MakeSubstitution(std::make_shared(), + "updatestate_assign_eliminater", prim::kPrimUpdateState); + updatestate_maketuple_eliminater_ = MakeSubstitution(std::make_shared(), + "updatestate_maketuple_eliminater", prim::kPrimUpdateState); + updatestate_loads_eliminater_ = MakeSubstitution(std::make_shared(), + "updatestate_loads_eliminater", prim::kPrimUpdateState); switch_call_monad_eliminater_ = MakeSubstitution(std::make_shared(), "switch_call_monad_eliminater", IsCNodeDup); @@ -261,13 +272,9 @@ OptimizeIRPassLib::OptimizeIRPassLib() { } ResolveIRPassLib::ResolveIRPassLib() { - resolver_resolve_and_getattr_ = - MakeSubstitution(std::make_shared(), "resolver_resolve_and_getattr", - {prim::kPrimGetAttr, prim::kPrimResolve}); - resolver_resolve_ = MakeSubstitution(std::make_shared(), "resolver_resolve", prim::kPrimResolve); - resolver_getattr_ = MakeSubstitution(std::make_shared(), "resolver_getattr", prim::kPrimGetAttr); - resolver_getattr_resolve_ = - MakeSubstitution(std::make_shared(), "resolver_getattr_resolve", prim::kPrimGetAttr); + // In resolver_getattr_resolve_, some patterns have priority over others. + resolver_getattr_resolve_ = MakeSubstitution(std::make_shared(), "getattr_resolve", + {prim::kPrimGetAttr, prim::kPrimResolve}, opt::CHECK_RENORM, true); } InferenceOptPrepareLib::InferenceOptPrepareLib() { diff --git a/mindspore/ccsrc/frontend/optimizer/irpass.h b/mindspore/ccsrc/frontend/optimizer/irpass.h index 5d0d2d36e89..6db60d397b2 100644 --- a/mindspore/ccsrc/frontend/optimizer/irpass.h +++ b/mindspore/ccsrc/frontend/optimizer/irpass.h @@ -108,7 +108,12 @@ class OptimizeIRPassLib { SubstitutionPtr specialize_transform_; // Auto-monad related eliminaters. - SubstitutionPtr updatestate_eliminater_; + SubstitutionPtr updatestate_only_used_node_eliminater_; + SubstitutionPtr updatestate_pure_node_eliminater_; + SubstitutionPtr updatestate_depend_eliminater_; + SubstitutionPtr updatestate_assign_eliminater_; + SubstitutionPtr updatestate_maketuple_eliminater_; + SubstitutionPtr updatestate_loads_eliminater_; SubstitutionPtr switch_call_monad_eliminater_; SubstitutionPtr stopgrad_eliminater_; SubstitutionPtr load_eliminater_; @@ -166,10 +171,6 @@ class ResolveIRPassLib { public: ResolveIRPassLib(); ~ResolveIRPassLib() = default; - - SubstitutionPtr resolver_resolve_and_getattr_; - SubstitutionPtr resolver_resolve_; - SubstitutionPtr resolver_getattr_; SubstitutionPtr resolver_getattr_resolve_; }; diff --git a/mindspore/ccsrc/frontend/optimizer/irpass/symbol_resolver.h b/mindspore/ccsrc/frontend/optimizer/irpass/symbol_resolver.h index 68545b213b3..53aa13c93b0 100644 --- a/mindspore/ccsrc/frontend/optimizer/irpass/symbol_resolver.h +++ b/mindspore/ccsrc/frontend/optimizer/irpass/symbol_resolver.h @@ -34,117 +34,17 @@ namespace mindspore { namespace opt { namespace irpass { -const char PARSE_SUPER_NAME[] = "namespace"; - -// {prim::kPrimResolve, Ns, Sym} -class ResolverResolve : public AnfVisitor { - public: - AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override { - Reset(); - AnfVisitor::Match(prim::kPrimResolve, {IsVNode, IsVNode})(node); - if (sym_ != nullptr) { - return parse::ResolveSymbol(optimizer->manager(), ns_, sym_, node); - } - return nullptr; - } - - void Visit(const ValueNodePtr &vnode) override { - if (IsValueNode(vnode)) { - ns_ = GetValueNode(vnode); - } else if (ns_ != nullptr && IsValueNode(vnode)) { - sym_ = GetValueNode(vnode); - } - } - - void Reset() { - ns_ = nullptr; - sym_ = nullptr; - } - - private: - parse::NameSpacePtr ns_{nullptr}; - parse::SymbolPtr sym_{nullptr}; -}; - -// {prim::kPrimGetAttr, Ns, Str} -class ResolverGetAttr : public AnfVisitor { - public: - AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override { - Reset(); - AnfVisitor::Match(prim::kPrimGetAttr, {IsVNode, IsVNode})(node); - if (sym_ != nullptr) { - return parse::ResolveSymbol(optimizer->manager(), ns_, sym_, node); - } - return nullptr; - } - - void Visit(const AnfNodePtr &node) override { - if (IsValueNode(node)) { - ns_ = GetValueNode(node); - } else if (ns_ != nullptr && IsValueNode(node)) { - auto str = GetValue(GetValueNode(node)); - sym_ = std::make_shared(str); - } - } - - void Reset() { - ns_ = nullptr; - sym_ = nullptr; - } - - private: - parse::NameSpacePtr ns_{nullptr}; - parse::SymbolPtr sym_{nullptr}; -}; - -// {prim::kPrimGetAttr, {prim::kPrimResolve, ns_node, sym_node}, attr_node} +// Put GetAttr pattern and Resolve pattern together to ensure that GetAttr pattern always takes precedence over Resolve +// pattern. After matching GetAttr pattern, there may be new nodes that can match GetAttr pattern and Resolve pattern. +// The same is true for matching Resolve pattern. +// +// {prim::kPrimGetAttr, {prim::kPrimResolve, namespace, symbol}, attr} +// {prim::kPrimGetAttr, namespace, attr} +// {prim::kPrimGetAttr, bool, attr} +// {prim::kPrimResolve, namespace, symbol} class ResolverGetAttrResolve : public OptimizerCaller { public: - AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override { - PatternNode ns_node, sym_node, attr_node; - auto ResolveAttrLambda = [&node, &ns_node, &sym_node, &attr_node, &optimizer]() -> AnfNodePtr { - auto node_to_getattr = node->cast()->input(1); - std::string attr_as_string = GetValueNode(attr_node.GetNode(node))->value(); - - auto ns_ = GetValueNode(ns_node.GetNode(node)); - auto sym_ = GetValueNode(sym_node.GetNode(node)); - if (ns_->module() == parse::RESOLVE_NAMESPACE_NAME_CLASS_MEMBER && sym_->symbol() != PARSE_SUPER_NAME) { - // deal with the case of getting attr from a class member - // and avoid the case of getting attr from self (the result of ParseSuper) - auto result = parse::ResolveCellwithAttr(optimizer->manager(), ns_, sym_, node_to_getattr, attr_as_string); - return result; - } - return nullptr; - }; - MATCH_REPLACE_LAMBDA_IF( - node, PPrimitive(prim::kPrimGetAttr, PPrimitive(prim::kPrimResolve, ns_node, sym_node), attr_node), - ResolveAttrLambda, attr_node.CheckFunc(IsValueNode, node)); - - return nullptr; - } -}; - -class ResolverResolveAndGetAttr : public OptimizerCaller { - public: - ResolverResolveAndGetAttr() { - resolver_optimizers_ = {std::make_shared(), std::make_shared(), - std::make_shared()}; - } - virtual ~ResolverResolveAndGetAttr() = default; - - AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override { - AnfNodePtr new_node; - for (const auto &resolver_opt : resolver_optimizers_) { - new_node = (*resolver_opt)(optimizer, node); - if (new_node != nullptr) { - return new_node; - } - } - return nullptr; - } - - private: - std::vector resolver_optimizers_{}; + AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) override; }; } // namespace irpass } // namespace opt diff --git a/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.cc b/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.cc index a4d9137bc09..02eede35af8 100644 --- a/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.cc +++ b/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.cc @@ -22,6 +22,10 @@ #include #include "frontend/operator/ops.h" +#include "frontend/optimizer/irpass.h" +#include "frontend/optimizer/optimizer_caller.h" +#include "frontend/optimizer/anf_visitor.h" +#include "ir/pattern_matcher.h" namespace mindspore::opt::irpass { namespace { @@ -81,35 +85,7 @@ bool OnlyUsedByTwoNode(const AnfNodePtr &be_used_node, const AnfNodePtr &first_n (first_user == second_node && second_user == first_node); } -// Eliminate useless node that only used by associated update_state. -// Convert: -// x1 = node(x, u) -// u1 = update_state(u, x1) # update_state is the only user of node -// user(u1) -// To: -// user(u) -AnfNodePtr EliminateUpdateStateOnlyUsedNode(const CNodePtr &update_state, const AnfNodePtr &node) { - if (!OnlyUsedByOneNode(node, update_state)) { - // Skip if UpdateState is not the only user of cnode. - return nullptr; - } - // Replace UpdateState with the input monad. - return update_state->input(kInputIndex); -} - -// Eliminate UpdateState that attaches a pure (no-side-effect) node. -// Convert: -// x = pure_node(args) # no side effect -// u1 = update_state(u, x) -// user(u1) -// To: -// x = pure_node(args) -// user(u) AnfNodePtr EliminateUpdateStateForPureNode(const CNodePtr &update_state, const AnfNodePtr &attach) { - if (IsPrimitiveCNode(attach, prim::kPrimTupleGetItem)) { - // Skip tuple_getitem. - return nullptr; - } auto cnode = dyn_cast(attach); if (cnode == nullptr) { // Skip value node or parameter. @@ -122,26 +98,11 @@ AnfNodePtr EliminateUpdateStateForPureNode(const CNodePtr &update_state, const A return nullptr; } } - // Skip Call/Switch/SwitchLayer. - auto first_input_node = cnode->input(kFirstInputIndex); - if (IsPrimitiveCNode(first_input_node, prim::kPrimCall) || IsPrimitiveCNode(first_input_node, prim::kPrimSwitch) || - IsPrimitiveCNode(first_input_node, prim::kPrimSwitchLayer)) { - return nullptr; - } // Remove UpdateState by replace it with its input monad. return update_state->input(kInputIndex); } -// Eliminate redundant UpdateState/Depend pair nodes caused by inline. -// Convert: -// x1 = Depend(x, u) -// u1 = UpdateState(u, x1) -// out = x_user(x1) -// u2 = u_user(u1) -// To: -// out = x_user(x) -// u2 = u_user(u) AnfNodePtr EliminateUpdateStateWithDepend(const CNodePtr &update_state, const CNodePtr &depend) { auto input_monad = depend->inputs().back(); if (!HasAbstractMonad(input_monad)) { @@ -638,28 +599,86 @@ AnfNodePtr EliminateUpdateStateBetweenAssignMakeTuple(const CNodePtr &update_sta } return nullptr; } - } // namespace -AnfNodePtr UpdatestateEliminater::operator()(const OptimizerPtr &, const AnfNodePtr &node) { +// Eliminate useless node that only used by associated update_state. +// {prim::kPrimUpdateState, u, {prim::kPrimLoad, m, u}} -> u +// {prim::kPrimUpdateState, u, {prim::kPrimPartial, m, u}} -> u +// Convert: +// x1 = node(x, u) +// u1 = update_state(u, x1) # update_state is the only user of x1. +// user(u1) +// To: +// user(u) +AnfNodePtr UpdatestateOnlyUsedNodeEliminater::operator()(const OptimizerPtr &, const AnfNodePtr &node) { auto update_state_node = dyn_cast(node); if (update_state_node == nullptr || update_state_node->inputs().empty()) { MS_LOG(WARNING) << "UpdatestateEliminater encounter invalid node: " << node->DebugString(); return nullptr; } auto &attach = update_state_node->input(kAttachIndex); + if (IsPrimitiveCNode(attach, prim::kPrimPartial) || IsPrimitiveCNode(attach, prim::kPrimLoad)) { + // Replace UpdateState with the input monad. + if (OnlyUsedByOneNode(attach, update_state_node)) { + return update_state_node->input(kInputIndex); + } + } + return nullptr; +} - // Handle UpdateState(u, Depend(...)). +// Eliminate UpdateState that attaches a pure (no-side-effect) node. +// Convert: +// x = pure_node(args) # no side effect +// u1 = update_state(u, x) +// user(u1) +// To: +// x = pure_node(args) +// user(u) +AnfNodePtr UpdatestatePureNodeEliminater::operator()(const OptimizerPtr &, const AnfNodePtr &node) { + auto update_state_node = dyn_cast(node); + if (update_state_node == nullptr || update_state_node->inputs().empty()) { + MS_LOG(WARNING) << "UpdatestateEliminater encounter invalid node: " << node->DebugString(); + return nullptr; + } + auto &attach = update_state_node->input(kAttachIndex); + if (IsPrimitiveCNode(attach, prim::kPrimTupleGetItem) || IsPrimitiveCNode(attach, prim::kPrimDepend) || + IsPrimitiveCNode(attach, prim::kPrimPartial) || IsPrimitiveCNode(attach, prim::kPrimMakeTuple)) { + return nullptr; + } + return EliminateUpdateStateForPureNode(update_state_node, attach); +} + +// Eliminate redundant UpdateState/Depend pair nodes caused by inline. +// Convert: +// x1 = Depend(x, u) +// u1 = UpdateState(u, x1) +// out = x_user(x1) +// u2 = u_user(u1) +// To: +// out = x_user(x) +// u2 = u_user(u) +AnfNodePtr UpdatestateDependEliminater::operator()(const OptimizerPtr &, const AnfNodePtr &node) { + auto update_state_node = dyn_cast(node); + if (update_state_node == nullptr || update_state_node->inputs().empty()) { + MS_LOG(WARNING) << "UpdatestateEliminater encounter invalid node: " << node->DebugString(); + return nullptr; + } + auto &attach = update_state_node->input(kAttachIndex); if (IsPrimitiveCNode(attach, prim::kPrimDepend)) { return EliminateUpdateStateWithDepend(update_state_node, attach->cast()); } + return nullptr; +} - // Handle UpdateState(u, Partial(...)). - if (IsPrimitiveCNode(attach, prim::kPrimPartial)) { - return EliminateUpdateStateOnlyUsedNode(update_state_node, attach); +// Eliminate UpdateStates between Assign nodes. +// Eliminate UpdateStates between Assign and MakeTuple. +AnfNodePtr UpdatestateAssignEliminater::operator()(const OptimizerPtr &, const AnfNodePtr &node) { + auto update_state_node = dyn_cast(node); + if (update_state_node == nullptr || update_state_node->inputs().empty()) { + MS_LOG(WARNING) << "UpdatestateEliminater encounter invalid node: " << node->DebugString(); + return nullptr; } - - // Handle UpdateState(u, Assign(...)). + auto &attach = update_state_node->input(kAttachIndex); if (IsPrimitiveCNode(attach, prim::kPrimAssign)) { auto new_node = EliminateUpdateStateBetweenAssigns(update_state_node, attach); if (new_node != nullptr) { @@ -667,20 +686,15 @@ AnfNodePtr UpdatestateEliminater::operator()(const OptimizerPtr &, const AnfNode } return EliminateUpdateStateBetweenMakeTupleAssign(update_state_node, attach); } + return nullptr; +} - // Handle UpdateState(u, Load(...)). - const bool attach_is_load = IsPrimitiveCNode(attach, prim::kPrimLoad); - if (attach_is_load) { - auto new_node = EliminateUpdateStateOnlyUsedNode(update_state_node, attach); - if (new_node != nullptr) { - return new_node; - } - } - - // Handle UpdateState(u, MakeTuple(...)). - const bool attach_is_tuple = IsPrimitiveCNode(attach, prim::kPrimMakeTuple); - if (attach_is_tuple) { - auto make_tuple = attach->cast(); +// Eliminate UpdateStates which the second input is MakeTuple. +AnfNodePtr UpdatestateMakeTupleEliminater::operator()(const OptimizerPtr &, const AnfNodePtr &node) { + PatternNode u, attach; + auto MakeTupleLambda = [&node, &u, &attach]() -> AnfNodePtr { + auto update_state_node = node->cast(); + auto make_tuple = attach.GetNode(node)->cast(); auto new_node = EliminateMakeTupleWithDeadNode(update_state_node, make_tuple); if (new_node != nullptr) { return new_node; @@ -689,23 +703,31 @@ AnfNodePtr UpdatestateEliminater::operator()(const OptimizerPtr &, const AnfNode if (new_node != nullptr) { return new_node; } - new_node = EliminateUpdateStateBetweenAssignMakeTuple(update_state_node, make_tuple); - if (new_node != nullptr) { - return new_node; - } + return EliminateUpdateStateBetweenAssignMakeTuple(update_state_node, make_tuple); + }; + + MATCH_REPLACE_LAMBDA_IF(node, PPrimitive(prim::kPrimUpdateState, u, attach), MakeTupleLambda, + IsPrimitiveCNode(attach.GetNode(node), prim::kPrimMakeTuple)); + return nullptr; +} + +// Eliminate UpdateStates for consecutive Loads. +AnfNodePtr UpdatestateLoadsEliminater::operator()(const OptimizerPtr &, const AnfNodePtr &node) { + auto update_state_node = dyn_cast(node); + if (update_state_node == nullptr || update_state_node->inputs().empty()) { + MS_LOG(WARNING) << "UpdatestateEliminater encounter invalid node: " << node->DebugString(); + return nullptr; } - // Merge UpdateStates for Loads. - if (attach_is_load || attach_is_tuple) { + auto &attach = update_state_node->input(kAttachIndex); + if (IsPrimitiveCNode(attach, prim::kPrimLoad) || IsPrimitiveCNode(attach, prim::kPrimMakeTuple)) { std::vector update_states; std::vector loads; GetLoadsFromUpdateState(update_state_node, &update_states, &loads); if (update_states.size() > 1 && loads.size() > 1) { return EliminateUpdateStateForLoads(update_state_node, update_states, loads); } - return nullptr; } - // Eliminate UpdateStates that attaches a no-side-effect node. - return EliminateUpdateStateForPureNode(update_state_node, attach); + return nullptr; } // Eliminate Monad parameter for switch call. @@ -725,7 +747,7 @@ AnfNodePtr UpdatestateEliminater::operator()(const OptimizerPtr &, const AnfNode // g2 = Partial(..., u) // s = switch(cond, g1, g2) // res = s() -AnfNodePtr EliminateMonadParameterForSwitchCall(const AnfNodePtr &node) { +AnfNodePtr SwitchCallMonadParameterEliminater::operator()(const OptimizerPtr &, const AnfNodePtr &node) { const CNodePtr &switch_call = dyn_cast(node); if (switch_call == nullptr) { return nullptr; @@ -777,8 +799,4 @@ AnfNodePtr EliminateMonadParameterForSwitchCall(const AnfNodePtr &node) { auto new_switch_call = fg->NewCNode({new_switch_cnode}); return new_switch_call; } - -AnfNodePtr SwitchCallMonadParameterEliminater::operator()(const OptimizerPtr &, const AnfNodePtr &node) { - return EliminateMonadParameterForSwitchCall(node); -} } // namespace mindspore::opt::irpass diff --git a/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.h b/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.h index 1e61459cc72..60fe63e0d9d 100644 --- a/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.h +++ b/mindspore/ccsrc/frontend/optimizer/irpass/updatestate_eliminate.h @@ -21,17 +21,44 @@ #include "frontend/optimizer/anf_visitor.h" namespace mindspore::opt::irpass { -// -// UpdatestateEliminater eliminates redundant UpdateState related nodes. -// -class UpdatestateEliminater : public AnfVisitor { +// Eliminate useless node that only used by associated update_state. +class UpdatestateOnlyUsedNodeEliminater : public AnfVisitor { + public: + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override; +}; + +// Eliminate UpdateStates that attaches a no-side-effect node. +class UpdatestatePureNodeEliminater : public AnfVisitor { + public: + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override; +}; + +// Eliminate redundant UpdateState/Depend pair nodes caused by inline. +class UpdatestateDependEliminater : public AnfVisitor { + public: + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override; +}; + +// Eliminate UpdateStates between Assign nodes. +// Eliminate UpdateStates between Assign and MakeTuple. +class UpdatestateAssignEliminater : public AnfVisitor { + public: + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override; +}; + +// Eliminate UpdateStates which the second input is MakeTuple. +class UpdatestateMakeTupleEliminater : public AnfVisitor { + public: + AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override; +}; + +// Eliminate UpdateStates for consecutive Loads. +class UpdatestateLoadsEliminater : public AnfVisitor { public: AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override; }; -// // SwitchCallMonadParameterEliminater eliminates Monad parameter in switch call. -// class SwitchCallMonadParameterEliminater : public AnfVisitor { public: AnfNodePtr operator()(const OptimizerPtr &, const AnfNodePtr &node) override; diff --git a/mindspore/ccsrc/frontend/optimizer/opt.cc b/mindspore/ccsrc/frontend/optimizer/opt.cc index 1b0bbf80415..30ec46304b3 100644 --- a/mindspore/ccsrc/frontend/optimizer/opt.cc +++ b/mindspore/ccsrc/frontend/optimizer/opt.cc @@ -30,13 +30,14 @@ namespace mindspore { /* namespace to support opt */ namespace opt { SubstitutionPtr MakeSubstitution(const OptimizerCallerPtr &transform, const std::string &name, const PrimitivePtr &prim, - const RenormAction &renorm_action) { + const RenormAction &renorm_action, bool has_priority_pattern) { auto fn = [prim](const AnfNodePtr &node) -> bool { return IsPrimitiveCNode(node, prim); }; - return std::make_shared(transform, name, fn, renorm_action); + return std::make_shared(transform, name, fn, renorm_action, has_priority_pattern); } SubstitutionPtr MakeSubstitution(const OptimizerCallerPtr &transform, const std::string &name, - const std::vector &prims, const RenormAction &renorm_action) { + const std::vector &prims, const RenormAction &renorm_action, + bool has_priority_pattern) { auto fn = [prims](const AnfNodePtr &node) -> bool { if (!node->isa()) { return false; @@ -59,12 +60,13 @@ SubstitutionPtr MakeSubstitution(const OptimizerCallerPtr &transform, const std: return false; }; - return std::make_shared(transform, name, fn, renorm_action); + return std::make_shared(transform, name, fn, renorm_action, has_priority_pattern); } SubstitutionPtr MakeSubstitution(const OptimizerCallerPtr &transform, const std::string &name, - const PredicateFuncType &predicate, const RenormAction &renorm_action) { - return std::make_shared(transform, name, predicate, renorm_action); + const PredicateFuncType &predicate, const RenormAction &renorm_action, + bool has_priority_pattern) { + return std::make_shared(transform, name, predicate, renorm_action, has_priority_pattern); } AnfNodePtr Substitution::operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node) { @@ -126,16 +128,41 @@ static AnfNodePtr DoTransform(const OptimizerPtr &optimizer, const AnfNodePtr &n return nullptr; } -static void UpdateTransformingList(const OptimizerPtr &optimizer, const AnfNodePtr &node, std::deque *todo, - bool change, size_t seen) { +static void UpdateTransformingListForSubstitutions(const AnfNodePtr &node, std::deque *todo, bool change) { if (IsValueNode(node)) { (*todo).emplace_back(GetValueNode(node)->output()); } - if (node->isa()) { - auto &inputs = node->cast()->inputs(); - (void)std::copy(inputs.begin(), inputs.end(), std::back_inserter(*todo)); + + if (change) { + (*todo).emplace_back(node); + } else { + if (node->isa()) { + auto &inputs = node->cast()->inputs(); + (void)std::copy(inputs.begin(), inputs.end(), std::back_inserter(*todo)); + } + } +} + +static void UpdateTransformingListForIR(const AnfNodePtr &node, std::deque *todo, bool change, + const SubstitutionPtr &substitution) { + if (IsValueNode(node)) { + (*todo).emplace_back(GetValueNode(node)->output()); } + // If there is a priority pattern in substitution, don't transform the new node, + // otherwise some nodes may match the wrong patterns. + if (change && substitution != nullptr && !substitution->has_priority_pattern_) { + (*todo).emplace_back(node); + } else { + if (node->isa()) { + auto &inputs = node->cast()->inputs(); + (void)std::copy(inputs.begin(), inputs.end(), std::back_inserter(*todo)); + } + } +} + +static void UpdateTransformingListWithUserNodes(const OptimizerPtr &optimizer, const AnfNodePtr &node, + std::deque *todo, bool change, size_t seen) { if (!change) { return; } @@ -185,11 +212,11 @@ bool SubstitutionList::ApplyIRToSubstitutions(const OptimizerPtr &optimizer, con change = true; changes = true; node = res; - todo.emplace_back(res); break; } } - UpdateTransformingList(optimizer, node, &todo, change, seen); + UpdateTransformingListForSubstitutions(node, &todo, change); + UpdateTransformingListWithUserNodes(optimizer, node, &todo, change, seen); } #ifdef ENABLE_PROFILE MsProfile::StatTime("opt.transforms." + optimizer->name(), GetTime() - start); @@ -197,7 +224,7 @@ bool SubstitutionList::ApplyIRToSubstitutions(const OptimizerPtr &optimizer, con return changes; } -bool SubstitutionList::ApplySubstitutionToIR(const OptimizerPtr &optimizer, const AnfNodePtr &root_node, +bool SubstitutionList::ApplySubstitutionToIR(const OptimizerPtr &optimizer, const FuncGraphPtr &func_graph, const SubstitutionPtr &substitution) const { #ifdef ENABLE_PROFILE double start = GetTime(); @@ -205,7 +232,7 @@ bool SubstitutionList::ApplySubstitutionToIR(const OptimizerPtr &optimizer, cons FuncGraphManagerPtr manager = optimizer->manager(); auto seen = NewSeenGeneration(); std::deque todo; - todo.emplace_back(root_node); + todo.emplace_back(func_graph->output()); bool changes = false; auto &all_nodes = manager->all_nodes(); @@ -225,7 +252,8 @@ bool SubstitutionList::ApplySubstitutionToIR(const OptimizerPtr &optimizer, cons changes = true; node = res; } - UpdateTransformingList(optimizer, node, &todo, change, seen); + UpdateTransformingListForIR(node, &todo, change, substitution); + UpdateTransformingListWithUserNodes(optimizer, node, &todo, change, seen); } #ifdef ENABLE_PROFILE @@ -268,7 +296,7 @@ bool SubstitutionList::ApplySubstitutionsToIR(const OptimizerPtr &optimizer, con loop = false; for (size_t i = 0; i < list_.size(); i++) { const auto &substitution = list_[i]; - bool change = ApplySubstitutionToIR(optimizer, func_graph->output(), substitution); + bool change = ApplySubstitutionToIR(optimizer, func_graph, substitution); changes = changes || change; loop = loop || change; diff --git a/mindspore/ccsrc/frontend/optimizer/opt.h b/mindspore/ccsrc/frontend/optimizer/opt.h index 74711b4583a..3370f1cebcd 100644 --- a/mindspore/ccsrc/frontend/optimizer/opt.h +++ b/mindspore/ccsrc/frontend/optimizer/opt.h @@ -17,6 +17,7 @@ #ifndef MINDSPORE_CCSRC_FRONTEND_OPTIMIZER_OPT_H_ #define MINDSPORE_CCSRC_FRONTEND_OPTIMIZER_OPT_H_ +#include #include #include #include @@ -41,11 +42,18 @@ class Substitution { OptimizerCallerPtr transform_; std::string name_; PredicateFuncType predicate_{nullptr}; - // an enum to mark this Substitution relation to renormalize pass + // An enum to mark this Substitution relation to renormalize pass RenormAction renorm_action_; + // Determine whether it is a priority substitution, that is, some patterns need to be matched prior to others. + bool has_priority_pattern_{false}; + Substitution(const OptimizerCallerPtr &transform, const std::string &name, const PredicateFuncType &predicate, - const RenormAction &renorm_action) - : transform_(transform), name_(name), predicate_(predicate), renorm_action_(renorm_action) {} + const RenormAction &renorm_action, bool has_priority_pattern) + : transform_(transform), + name_(name), + predicate_(predicate), + renorm_action_(renorm_action), + has_priority_pattern_(has_priority_pattern) {} ~Substitution() = default; AnfNodePtr operator()(const OptimizerPtr &optimizer, const AnfNodePtr &node); }; @@ -53,12 +61,13 @@ class Substitution { using SubstitutionPtr = std::shared_ptr; SubstitutionPtr MakeSubstitution(const OptimizerCallerPtr &transform, const std::string &name, const PrimitivePtr &prim, - const RenormAction &action_renorm = CHECK_RENORM); + const RenormAction &action_renorm = CHECK_RENORM, bool has_priority_pattern = false); SubstitutionPtr MakeSubstitution(const OptimizerCallerPtr &transform, const std::string &name, const std::vector &prims, - const RenormAction &action_renorm = CHECK_RENORM); + const RenormAction &action_renorm = CHECK_RENORM, bool has_priority_pattern = false); SubstitutionPtr MakeSubstitution(const OptimizerCallerPtr &transform, const std::string &name, - const PredicateFuncType &predicate, const RenormAction &action_renorm = CHECK_RENORM); + const PredicateFuncType &predicate, const RenormAction &action_renorm = CHECK_RENORM, + bool has_priority_pattern = false); enum OptTraverseSubstitutionsMode { kOptTraverseFromIRToSubstitutions = 0, kOptTraverseFromSubstitutionsToIR }; @@ -73,15 +82,16 @@ class SubstitutionList { private: bool ApplyIRToSubstitutions(const OptimizerPtr &optimizer, const FuncGraphPtr &func_graph) const; - bool ApplySubstitutionToIR(const OptimizerPtr &optimizer, const AnfNodePtr &node, const SubstitutionPtr &sub) const; + bool ApplySubstitutionToIR(const OptimizerPtr &optimizer, const FuncGraphPtr &func_graph, + const SubstitutionPtr &sub) const; bool ApplySubstitutionsToIR(const OptimizerPtr &optimizer, const FuncGraphPtr &func_graph) const; void DisplayStatusOfSubstitution(const std::unordered_map> &status, const OptimizerPtr &optimizer, size_t space) const; std::vector list_; // a flag to mark this list of Substitution can only be executed only once - bool is_once_; - bool global_sensitive_; + bool is_once_{false}; + bool global_sensitive_{false}; }; } // namespace opt } // namespace mindspore diff --git a/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.cc b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.cc index f57913c4b9f..81d1168667a 100644 --- a/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.cc +++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.cc @@ -29,6 +29,7 @@ namespace mindspore { namespace parallel { + void GenerateStrategy(const std::shared_ptr &graph, const std::vector> &ops, const std::shared_ptr>> &eli_list, const std::vector> &input_tensor_names, @@ -37,6 +38,7 @@ void GenerateStrategy(const std::shared_ptr &graph, const std::vector> no_stra_op_list(new std::vector); for (size_t i = 0; i < eli_list->size(); i++) { no_stra_op_list->push_back(eli_list->at(i)[0]); @@ -488,6 +490,44 @@ Strategys MakeDataParallelStrategy(const std::shared_ptr &graph, return strategies; } +Strategys MakeFullBatchStrategy(const std::shared_ptr &graph, + const std::vector> &ops, const size_t iter_graph, + const size_t iter_ops) { + if (ops.empty()) { + MS_LOG(EXCEPTION) << "Failure: Operators is empty."; + } + if (iter_ops >= ops.size()) { + MS_LOG(EXCEPTION) << "Failure: Operators' elements out of range."; + } + + StrategyPtr origin_strategy = ops[iter_ops]->strategy(); + Strategys strategies; + for (size_t iter_op_inputs = 0; iter_op_inputs < ops[iter_ops]->inputs_tensor_info().size(); iter_op_inputs++) { + if (iter_op_inputs >= origin_strategy->GetInputDim().size()) { + MS_LOG(EXCEPTION) << "Failure: Strategy's InputDim out of range."; + } + Dimensions s; + size_t input_size = origin_strategy->GetInputDim()[iter_op_inputs].size(); + for (size_t dim = 0; dim < input_size; dim++) { + if (input_size >= 1 && input_size <= 4) { + s.push_back(1); + } else if (input_size == 0) { + s = {}; + } else { + MS_LOG(EXCEPTION) << ops[iter_ops]->name() << ": Tensor shape " << input_size << " is unexpected."; + } + } + strategies.push_back(s); + } + // Update the output strategy of Rec Graph + graph->nodes[iter_graph].tensor_parm.tensor_str.str_n = 1.0; + graph->nodes[iter_graph].tensor_parm.tensor_str.str_c = 1.0; + graph->nodes[iter_graph].tensor_parm.tensor_str.str_h = 1.0; + graph->nodes[iter_graph].tensor_parm.tensor_str.str_w = 1.0; + + return strategies; +} + void SetBackToRawStrategy(const std::shared_ptr &op) { StrategyPtr origin_strategy = op->strategy(); Strategys strategies; @@ -528,9 +568,14 @@ Strategys PrepareStrategy(const std::shared_ptr &graph, const std::vector return PrepareOneHot(graph, ops, iter_graph, iter_ops); } else if ((type == SOFTMAX) || (type == LAYER_NORM)) { return PrepareAxisRelatedStrategy(graph, ops, iter_graph, iter_ops); - } else if ((type == SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS) || (type == "_VirtualDataset") || (type == "Dropout") || - (type == BATCH_MATMUL)) { + } else if ((type == SPARSE_SOFTMAX_CROSS_ENTROPY_WITH_LOGITS) || (type == "Dropout") || (type == BATCH_MATMUL)) { return MakeDataParallelStrategy(graph, ops, iter_graph, iter_ops); + } else if (type == "_VirtualDataset") { + if (ParallelContext::GetInstance()->full_batch()) { + return MakeFullBatchStrategy(graph, ops, iter_graph, iter_ops); + } else { + return MakeDataParallelStrategy(graph, ops, iter_graph, iter_ops); + } } else { return MakeRecSearchStrategy(graph, ops, iter_graph, iter_ops); } diff --git a/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.h b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.h index cee86413c2c..cc7c86a2285 100644 --- a/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.h +++ b/mindspore/ccsrc/frontend/parallel/auto_parallel/rec_core/rec_generate_strategy.h @@ -55,6 +55,9 @@ Strategys CheckDivisible(const std::vector> &ops, Strategys MakeDataParallelStrategy(const std::shared_ptr &graph, const std::vector> &ops, const size_t iter_graph, const size_t iter_ops); +Strategys MakeFullBatchStrategy(const std::shared_ptr &graph, + const std::vector> &ops, const size_t iter_graph, + const size_t iter_ops); void SetBackToRawStrategy(const std::shared_ptr &op); Strategys PrepareStrategy(const std::shared_ptr &graph, const std::vector> &ops, const size_t iter_graph, const size_t iter_ops); diff --git a/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.cc b/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.cc index 113227e56e3..2658c3042a2 100644 --- a/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.cc +++ b/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.cc @@ -100,7 +100,7 @@ AnfNodePtr CreatInt64Imm(int64_t value) { return ValuePtrToAnfNodePtr(value_ptr); } -AnfNodePtr CreatTuple(const std::vector &tuple) { +AnfNodePtr CreateTuple(const std::vector &tuple) { std::vector value_list; std::transform(tuple.begin(), tuple.end(), std::back_inserter(value_list), [](const int64_t value) { return MakeValue(value); }); diff --git a/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.h b/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.h index 55801c0af5f..12c0c6bc157 100644 --- a/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.h +++ b/mindspore/ccsrc/frontend/parallel/graph_util/generate_graph.h @@ -41,7 +41,7 @@ AnfNodePtr CreatTypeInt(int64_t value); AnfNodePtr CreatInt64Imm(int64_t value); AnfNodePtr CreateInt32Tensor(int64_t value); AnfNodePtr ValuePtrToAnfNodePtr(const ValuePtr &value_ptr); -AnfNodePtr CreatTuple(const std::vector &tuple); +AnfNodePtr CreateTuple(const std::vector &tuple); std::string HashInstanceName(const std::string &name); class GenerateGraph { diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.cc index 8fc52daed14..39d998aa2aa 100644 --- a/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.cc +++ b/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.cc @@ -148,6 +148,9 @@ Status Conv2DInfo::CheckHWStrategy(int64_t h_strategy, int64_t w_strategy) { return FAILED; } + int64_t h_slice_shape = inputs_shape_[0][2] / h_strategy; + int64_t w_slice_shape = inputs_shape_[0][3] / w_strategy; + if (pad_mode_ == 0) { // 'pad' mode MS_LOG(ERROR) << name_ << ": The 'pad' mode do not support to split H or W"; return FAILED; @@ -160,8 +163,6 @@ Status Conv2DInfo::CheckHWStrategy(int64_t h_strategy, int64_t w_strategy) { } if (kernel_size_[0] <= stride_[2] || kernel_size_[1] <= stride_[3]) { - int64_t h_slice_shape = inputs_shape_[0][2] / h_strategy; - int64_t w_slice_shape = inputs_shape_[0][3] / w_strategy; if (h_slice_shape % stride_[2] != 0 || w_slice_shape % stride_[3] != 0) { MS_LOG(ERROR) << name_ << ": The 'same' mode do not support to split H or W when kernel_size <= stride but slice shape " @@ -177,24 +178,18 @@ Status Conv2DInfo::CheckHWStrategy(int64_t h_strategy, int64_t w_strategy) { return FAILED; } - if (kernel_size_[0] <= stride_[2]) { - int64_t h_slice_shape = inputs_shape_[0][2] / h_strategy; - if (h_slice_shape % stride_[2] != 0) { - MS_LOG(ERROR) << name_ - << ": The 'valid' mode do not support to split H when kernel_size <= stride but slice shape is " - "not divisible by stride "; - return FAILED; - } + if (kernel_size_[0] <= stride_[2] && h_slice_shape % stride_[2] != 0) { + MS_LOG(ERROR) << name_ + << ": The 'valid' mode do not support to split H when kernel_size <= stride but slice shape is " + "not divisible by stride "; + return FAILED; } - if (kernel_size_[1] <= stride_[3]) { - int64_t w_slice_shape = inputs_shape_[0][3] / w_strategy; - if (w_slice_shape % stride_[3] != 0) { - MS_LOG(ERROR) << name_ - << ": The 'valid' mode do not support to split W when kernel_size <= stride but slice shape is " - "not divisible by stride "; - return FAILED; - } + if (kernel_size_[1] <= stride_[3] && w_slice_shape % stride_[3] != 0) { + MS_LOG(ERROR) << name_ + << ": The 'valid' mode do not support to split W when kernel_size <= stride but slice shape is " + "not divisible by stride "; + return FAILED; } } @@ -234,6 +229,7 @@ Status Conv2DInfo::CheckStrategyBase(const StrategyPtr &strategy) { new_out_channel_ = out_channel_ / weight_strategy[0]; } else { out_channel_shard_ = false; + new_out_channel_ = out_channel_; } return SUCCESS; @@ -527,7 +523,19 @@ void Conv2DInfo::InferOverlapShapes() { right_recv_shape[3] = overlap_right_size_; recv_shapes_.push_back(right_recv_shape); } - MS_LOG(INFO) << name_ << ": the recv shapes is " << recv_shapes_; + + if (left_need_send_) { + Shape left_send_shape = input_slice_shape_; + left_send_shape[3] = left_rank_overlap_right_size_; + send_shapes_.push_back(left_send_shape); + } + + if (right_need_send_) { + Shape right_send_shape = input_slice_shape_; + right_send_shape[3] = right_rank_overlap_left_size_; + send_shapes_.push_back(right_send_shape); + } + MS_LOG(INFO) << name_ << ": the recv shapes is " << recv_shapes_ << ", the send shapes is " << send_shapes_; } void Conv2DInfo::InferStridedSliceAttrs() { @@ -536,9 +544,6 @@ void Conv2DInfo::InferStridedSliceAttrs() { left_strided_slice_end_ = input_slice_shape_; left_strided_slice_end_[3] = left_rank_overlap_right_size_; left_strided_slice_strides_ = {1, 1, 1, 1}; - Shape left_send_shape = input_slice_shape_; - left_send_shape[3] = left_rank_overlap_right_size_; - send_shapes_.push_back(left_send_shape); MS_LOG(INFO) << name_ << ": The left strided slice begin is " << left_strided_slice_begin_ << ", end is " << left_strided_slice_end_; } @@ -548,9 +553,6 @@ void Conv2DInfo::InferStridedSliceAttrs() { right_strided_slice_begin_[3] = input_slice_shape_[3] - right_rank_overlap_left_size_; right_strided_slice_end_ = input_slice_shape_; right_strided_slice_strides_ = {1, 1, 1, 1}; - Shape right_send_shape = input_slice_shape_; - right_send_shape[3] = right_rank_overlap_left_size_; - send_shapes_.push_back(right_send_shape); MS_LOG(INFO) << name_ << ": The right strided slice begin is " << right_strided_slice_begin_ << ", end is " << right_strided_slice_end_; } @@ -566,7 +568,7 @@ void Conv2DInfo::InferNewOperatorAttrs() { InferStridedSliceAttrs(); } -OperatorAttrs Conv2DInfo::CreatNeighborExchangeAttrs(const CNodePtr &cnode) { +OperatorAttrs Conv2DInfo::CreateNeighborExchangeAttrs(const CNodePtr &cnode) { auto type = cnode->Type(); MS_EXCEPTION_IF_NULL(type); auto tensor_type = type->cast(); @@ -582,7 +584,7 @@ OperatorAttrs Conv2DInfo::CreatNeighborExchangeAttrs(const CNodePtr &cnode) { return attrs; } -OperatorAttrs Conv2DInfo::CreatConv2DAttrs() { +OperatorAttrs Conv2DInfo::CreateConv2DAttrs() { Attr out_channel = {OUT_CHANNEL, MakeValue(new_out_channel_)}; Attr kernel_size = {KERNEL_SIZE, MakeValue(kernel_size_)}; Attr mode = {MODE, MakeValue(mode_)}; @@ -592,65 +594,130 @@ OperatorAttrs Conv2DInfo::CreatConv2DAttrs() { Attr dilation = {DILATION, MakeValue(dilation_)}; Attr group = {GROUP, MakeValue(group_)}; Attr data_format = {DATA_FORMAT, MakeValue(format_)}; - OperatorAttrs attrs = {out_channel, kernel_size, mode, pad_mode, pad, stride, dilation, group, data_format}; + + OperatorAttrs attrs; + if (name_.find(CONV2D_INFO) != std::string::npos) { + attrs = {out_channel, kernel_size, mode, pad_mode, pad, stride, dilation, group, data_format}; + } else { // Conv2DTranspose + attrs = {out_channel, kernel_size, pad_mode, pad, pad, mode, stride, dilation, group, data_format}; + } + return attrs; } +std::string Conv2DInfo::ReplaceNodeName() { + if (name_.find(CONV2D_INFO) != std::string::npos) { + return CONV2D; + } + + if (name_.find(CONV2D_BACK_PROP_INPUT_INFO) != std::string::npos) { + return CONV2D_BACK_PROP_INPUT; + } + + if (name_.find(CONV2D_TRANSPOSE_INFO) != std::string::npos) { + return CONV2D_TRANSPOSE; + } + + MS_LOG(EXCEPTION) << "Invalid name: " << name_; +} + +AnfNodePtr Conv2DInfo::GenerateConv2DNode(const AnfNodePtr &new_input, const CNodePtr &cnode) { + auto conv2d_attrs = CreateConv2DAttrs(); + auto node_name = ReplaceNodeName(); + + // conv2d + if (name_.find(CONV2D_INFO) != std::string::npos) { + if (cnode->size() < 3) { + MS_LOG(EXCEPTION) << name_ << ": The size of cnode is invalid: " << cnode->size(); + } + return gen_g_.PushBack({gen_g_.NewOpInst(node_name, conv2d_attrs), new_input, cnode->input(2)}); + } + + // conv2dtranspose + if (cnode->size() < 4) { + MS_LOG(EXCEPTION) << name_ << ": The size of cnode is invalid: " << cnode->size(); + } + return gen_g_.PushBack({gen_g_.NewOpInst(node_name, conv2d_attrs), new_input, cnode->input(2), cnode->input(3)}); +} + Status Conv2DInfo::ComputeReplaceGraph(const CNodePtr &cnode) { auto graph = cnode->func_graph(); MS_EXCEPTION_IF_NULL(graph); - GenerateGraph gen_g = GenerateGraph(attrs_); - if (gen_g.Init(cnode) != SUCCESS) { - MS_LOG(ERROR) << "GenerateGraph Init failed"; - return FAILED; + + if (gen_g_.Init(cnode) != SUCCESS) { + MS_LOG(EXCEPTION) << "GenerateGraph Init failed"; } + + if (!left_need_send_ && !right_need_send_) { + MS_LOG(EXCEPTION) << name_ << ": Now do not support left no need to send and right no need to send"; + } + + if (!left_need_recv_ && !right_need_recv_) { + MS_LOG(EXCEPTION) << name_ << ": Now do not support left no need to recv and right no need to recv"; + } + std::vector> input_nodes; std::vector make_tuple_a_inputs = {NewValueNode(prim::kPrimMakeTuple)}; if (left_need_send_) { - auto slice_left_begin = CreatTuple(left_strided_slice_begin_); - auto slice_left_end = CreatTuple(left_strided_slice_end_); - auto slice_left_strided = CreatTuple(left_strided_slice_strides_); - auto slice_left = gen_g.PushBack( - {gen_g.NewOpInst(STRIDED_SLICE), cnode->input(1), slice_left_begin, slice_left_end, slice_left_strided}); + auto slice_left_begin = CreateTuple(left_strided_slice_begin_); + auto slice_left_end = CreateTuple(left_strided_slice_end_); + auto slice_left_strided = CreateTuple(left_strided_slice_strides_); + auto slice_left = gen_g_.PushBack({gen_g_.NewOpInst(STRIDED_SLICE), gen_g_.virtual_input_node(), slice_left_begin, + slice_left_end, slice_left_strided}); make_tuple_a_inputs.push_back(slice_left); + input_nodes.push_back(std::make_pair(slice_left, 1)); } if (right_need_send_) { - auto slice_right_begin = CreatTuple(right_strided_slice_begin_); - auto slice_right_end = CreatTuple(right_strided_slice_end_); - auto slice_right_strided = CreatTuple(right_strided_slice_strides_); - auto slice_right = gen_g.PushBack( - {gen_g.NewOpInst(STRIDED_SLICE), cnode->input(1), slice_right_begin, slice_right_end, slice_right_strided}); + auto slice_right_begin = CreateTuple(right_strided_slice_begin_); + auto slice_right_end = CreateTuple(right_strided_slice_end_); + auto slice_right_strided = CreateTuple(right_strided_slice_strides_); + auto slice_right = gen_g_.PushBack({gen_g_.NewOpInst(STRIDED_SLICE), gen_g_.virtual_input_node(), slice_right_begin, + slice_right_end, slice_right_strided}); make_tuple_a_inputs.push_back(slice_right); + input_nodes.push_back(std::make_pair(slice_right, 1)); } + auto make_tuple_a = graph->NewCNode(make_tuple_a_inputs); - auto alltoall_attrs = CreatNeighborExchangeAttrs(cnode); - auto alltoall_v = gen_g.PushBack({gen_g.NewOpInst(NEIGHBOREXCHANGE, alltoall_attrs), make_tuple_a}); - std::vector make_tuple_inputs = {NewValueNode(prim::kPrimMakeTuple)}; + auto alltoall_attrs = CreateNeighborExchangeAttrs(cnode); + auto alltoall_v = gen_g_.PushBack({gen_g_.NewOpInst(NEIGHBOREXCHANGE, alltoall_attrs), make_tuple_a}); + + AnfNodePtr conv2d; + Attr concat_axis = {AXIS, MakeValue(-1)}; + OperatorAttrs concat_attrs = {concat_axis}; + if (left_need_recv_) { std::vector tuple_getitem_l_inputs = {NewValueNode(prim::kPrimTupleGetItem), alltoall_v, CreatInt64Imm(0)}; auto tuple_getitem_l = graph->NewCNode(tuple_getitem_l_inputs); - std::vector make_tuple_l_inputs = {NewValueNode(prim::kPrimMakeTuple), cnode->input(1), - tuple_getitem_l}; + std::vector make_tuple_l_inputs = {NewValueNode(prim::kPrimMakeTuple), tuple_getitem_l, + cnode->input(1)}; auto make_tuple_l = graph->NewCNode(make_tuple_l_inputs); - auto concat_l = gen_g.PushBack({gen_g.NewOpInst(CONCAT), make_tuple_l}); - make_tuple_inputs.push_back(concat_l); + auto concat_l = gen_g_.PushBack({gen_g_.NewOpInst(CONCAT, concat_attrs), make_tuple_l}); + + if (right_need_recv_) { + std::vector tuple_getitem_r_inputs = {NewValueNode(prim::kPrimTupleGetItem), alltoall_v, + CreatInt64Imm(1)}; + auto tuple_getitem_r = graph->NewCNode(tuple_getitem_r_inputs); + std::vector make_tuple_r_inputs = {NewValueNode(prim::kPrimMakeTuple), concat_l, tuple_getitem_r}; + auto make_tuple_r = graph->NewCNode(make_tuple_r_inputs); + auto concat_r = gen_g_.PushBack({gen_g_.NewOpInst(CONCAT, concat_attrs), make_tuple_r}); + conv2d = GenerateConv2DNode(concat_r, cnode); + } else { + conv2d = GenerateConv2DNode(concat_l, cnode); + } + } else { // left no need recv, and right need recv + std::vector tuple_getitem_r_inputs_1 = {NewValueNode(prim::kPrimTupleGetItem), alltoall_v, + CreatInt64Imm(0)}; + auto tuple_getitem_r_1 = graph->NewCNode(tuple_getitem_r_inputs_1); + std::vector make_tuple_r_inputs_1 = {NewValueNode(prim::kPrimMakeTuple), gen_g_.virtual_input_node(), + tuple_getitem_r_1}; + auto make_tuple_r_1 = graph->NewCNode(make_tuple_r_inputs_1); + input_nodes.push_back(std::make_pair(make_tuple_r_1, 1)); + + auto concat_r_1 = gen_g_.PushBack({gen_g_.NewOpInst(CONCAT, concat_attrs), make_tuple_r_1}); + conv2d = GenerateConv2DNode(concat_r_1, cnode); } - if (right_need_recv_) { - std::vector tuple_getitem_r_inputs = {NewValueNode(prim::kPrimTupleGetItem), alltoall_v, - CreatInt64Imm(0)}; - auto tuple_getitem_r = graph->NewCNode(tuple_getitem_r_inputs); - make_tuple_inputs.push_back(tuple_getitem_r); - } else { - make_tuple_inputs.push_back(cnode->input(1)); - } - auto make_tuple = graph->NewCNode(make_tuple_inputs); - Attr concat_axis = {AXIS, MakeValue(-1)}; - OperatorAttrs concat_attrs = {concat_axis}; - std::vector concat_inputs = {gen_g.NewOpInst(CONCAT, concat_attrs), make_tuple}; - auto concat = graph->NewCNode(concat_inputs); - auto conv2d_attrs = CreatConv2DAttrs(); - auto conv2d = gen_g.PushBack({gen_g.NewOpInst(CONV2D, conv2d_attrs), concat, cnode->input(2)}); + replace_graph_ = std::make_shared>, AnfNodePtr>>( std::make_pair(input_nodes, conv2d)); return SUCCESS; diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.h index 1ae1e4a752a..3786dc5f826 100644 --- a/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.h +++ b/mindspore/ccsrc/frontend/parallel/ops_info/conv2d_info.h @@ -23,6 +23,7 @@ #include #include "ir/value.h" +#include "frontend/parallel/graph_util/generate_graph.h" #include "frontend/parallel/auto_parallel/operator_costmodel.h" #include "frontend/parallel/ops_info/operator_info.h" #include "frontend/parallel/strategy.h" @@ -57,9 +58,11 @@ class Conv2DInfo : public OperatorInfo { void InferSendRecvFlag(); void InferOverlapShapes(); void InferStridedSliceAttrs(); + std::string ReplaceNodeName(); + AnfNodePtr GenerateConv2DNode(const AnfNodePtr &new_input, const CNodePtr &cnode); ReplaceGraphPtr replace_graph(const CNodePtr &cnode) override; - OperatorAttrs CreatNeighborExchangeAttrs(const CNodePtr &cnode); - OperatorAttrs CreatConv2DAttrs(); + OperatorAttrs CreateNeighborExchangeAttrs(const CNodePtr &cnode); + OperatorAttrs CreateConv2DAttrs(); Status ComputeReplaceGraph(const CNodePtr &cnode); int64_t out_channel_ = 1; @@ -106,6 +109,8 @@ class Conv2DInfo : public OperatorInfo { Shapes send_shapes_; Shapes recv_shapes_; + GenerateGraph gen_g_ = GenerateGraph(attrs_); + virtual Status CheckHWStrategy(int64_t h_strategy, int64_t w_strategy); virtual void InferNewPadList(); virtual int64_t ComputeOverlapLeftSizeByRankBias(int64_t rank_bias); diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.cc index 35cd2405c03..64a2a0b3b83 100644 --- a/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.cc +++ b/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.cc @@ -172,6 +172,22 @@ Status GatherDInfo::InferMirrorOps() { return SUCCESS; } +void GatherDInfo::ReComputeBatchSplitFlagList() { + if (InferAttrs() != SUCCESS) { + MS_LOG(EXCEPTION) << name_ << ": Infer attrs failed"; + } + + if (dim_ == 0) { + MS_LOG(EXCEPTION) + << name_ + << ": Can not generate batch data parallel strategy since the dim is 0, please set others strategy for it"; + } + + for (size_t i = 0; i < inputs_shape_.size(); ++i) { + split_flag_list_[i] = true; + } +} + Status GatherDInfo::SetCostUnderStrategy(const StrategyPtr &strategy) { return SetCostUnderStrategyBase(strategy); } std::vector GatherDInfo::GenerateOpStrategies(int64_t stage_id) { diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.h b/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.h index 8288fe11ae1..1d8a2fe24d2 100644 --- a/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.h +++ b/mindspore/ccsrc/frontend/parallel/ops_info/gatherd_info.h @@ -40,6 +40,7 @@ class GatherDInfo : public OperatorInfo { Status InitForCostModel(const StrategyPtr &strategy) override; std::vector GenerateOpStrategies(int64_t) override; Status SetCostUnderStrategy(const StrategyPtr &) override; + void ReComputeBatchSplitFlagList() override; protected: Status GetAttrs() override; diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h b/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h index dd6a3237da5..c0c89beb245 100644 --- a/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h +++ b/mindspore/ccsrc/frontend/parallel/ops_info/ops_utils.h @@ -283,6 +283,9 @@ constexpr char ARGMINWITHVALUE[] = "ArgMinWithValue"; constexpr char CONV2D[] = "Conv2D"; constexpr char CONV2D_BACK_PROP_INPUT[] = "Conv2DBackpropInput"; constexpr char CONV2D_TRANSPOSE[] = "Conv2DTranspose"; +constexpr char CONV2D_INFO[] = "Conv2DInfo"; +constexpr char CONV2D_BACK_PROP_INPUT_INFO[] = "Conv2DBackpropInputInfo"; +constexpr char CONV2D_TRANSPOSE_INFO[] = "Conv2DTransposeInfo"; constexpr char FUSE_BATCH_NORM[] = "FusedBatchNorm"; constexpr char FUSE_BATCH_NORM_EX[] = "FusedBatchNormEx"; constexpr char BATCH_NORM[] = "BatchNorm"; diff --git a/mindspore/ccsrc/frontend/parallel/ops_info/virtual_output_info.cc b/mindspore/ccsrc/frontend/parallel/ops_info/virtual_output_info.cc index ae6411f8f35..712d44e509e 100644 --- a/mindspore/ccsrc/frontend/parallel/ops_info/virtual_output_info.cc +++ b/mindspore/ccsrc/frontend/parallel/ops_info/virtual_output_info.cc @@ -64,8 +64,14 @@ Status VirtualOutputInfo::GenerateStrategies(int64_t stage_id) { } for (auto &shape : inputs_shape_) { Shape temp; - temp.emplace_back(SizeToLong(total_dev_num)); - (void)temp.insert(temp.end(), shape.size() - 1, 1); + if (!shape.empty()) { + if (shape[0] % total_dev_num == 0) { + temp.emplace_back(SizeToLong(total_dev_num)); + } else { + temp.emplace_back(1); + } + (void)temp.insert(temp.end(), shape.size() - 1, 1); + } strategy.push_back(temp); } sp = std::make_shared(stage_id, strategy); diff --git a/mindspore/ccsrc/frontend/parallel/step_parallel.cc b/mindspore/ccsrc/frontend/parallel/step_parallel.cc index 357b115a871..043f8dd9833 100644 --- a/mindspore/ccsrc/frontend/parallel/step_parallel.cc +++ b/mindspore/ccsrc/frontend/parallel/step_parallel.cc @@ -2038,7 +2038,12 @@ void SetVirtualDatasetStrategy(const CNodePtr &node) { if (shape_list[0][i].empty()) { MS_LOG(EXCEPTION) << "shape_list[ " << i << " ].size() is zero"; } - Dimensions input_strategy = {dev_num}; + Dimensions input_strategy; + if (!shape_list[0][i].empty() && shape_list[0][i][0] % dev_num == 0) { + input_strategy.push_back(dev_num); + } else if (!shape_list[0][i].empty()) { + input_strategy.push_back(1); + } for (size_t j = 1; j < shape_list[0][i].size(); j++) { input_strategy.push_back(1); } @@ -3222,12 +3227,9 @@ void MarkForwardCNode(const FuncGraphPtr &root) { } } -Status ParallelInit() { - MS_EXCEPTION_IF_NULL(ParallelContext::GetInstance()); +CommInfo GetCommInfo() { int64_t device_num = ParallelContext::GetInstance()->device_num(); int64_t global_rank = ParallelContext::GetInstance()->global_rank(); - int32_t split_stage_num = ParallelContext::GetInstance()->pipeline_stage_split_num(); - std::string parallel_mode = ParallelContext::GetInstance()->parallel_mode(); auto ms_context = MsContext::GetInstance(); MS_EXCEPTION_IF_NULL(ms_context); std::string backend = ms_context->get_param(MS_CTX_DEVICE_TARGET); @@ -3240,15 +3242,8 @@ Status ParallelInit() { world_group = NCCL_WORLD_GROUP; communication_backend = NCCL_BACKEND; } else { - MS_LOG(ERROR) << "Invalid communication backend: " << backend; - return FAILED; + MS_LOG(EXCEPTION) << "Invalid communication backend: " << backend; } - - if (split_stage_num <= 0) { - MS_LOG(ERROR) << "Invalid stage num " << split_stage_num << ", expected a positive stage number"; - return FAILED; - } - uint32_t world_rank_size = 0; if (!ParallelContext::GetInstance()->device_num_is_set()) { if (!CommManager::GetInstance().GetRankSize(world_group, &world_rank_size)) { @@ -3266,7 +3261,21 @@ Status ParallelInit() { global_rank = UintToInt(rank_id); MS_LOG(INFO) << "Get global rank from communication model, the global rank is " << global_rank; } + CommInfo comm_info{device_num, global_rank, world_group, communication_backend}; + return comm_info; +} +Status ParallelInit() { + MS_EXCEPTION_IF_NULL(ParallelContext::GetInstance()); + int32_t split_stage_num = ParallelContext::GetInstance()->pipeline_stage_split_num(); + std::string parallel_mode = ParallelContext::GetInstance()->parallel_mode(); + if (split_stage_num <= 0) { + MS_LOG(ERROR) << "Invalid stage num " << split_stage_num << ", expected a positive stage number"; + return FAILED; + } + auto comm_info = GetCommInfo(); + int64_t device_num = comm_info.device_num; + int64_t global_rank = comm_info.global_rank; if ((device_num <= 0) || (device_num > MAX_DEVICE_NUM)) { MS_LOG(ERROR) << "Invalid device num " << device_num; return FAILED; @@ -3293,13 +3302,14 @@ Status ParallelInit() { return FAILED; } - if (!InitDevice(device_num, global_rank, communication_backend, stages)) { + if (!InitDevice(device_num, global_rank, comm_info.communication_backend, stages)) { MS_LOG(ERROR) << "Init device failed"; return FAILED; } MS_LOG(INFO) << "The parallel context: dev num: " << device_num << ", global rank: " << global_rank - << ", backend: " << backend << ", gradients_mean: " << ParallelContext::GetInstance()->gradients_mean() + << ", communication_backend: " << comm_info.communication_backend + << ", gradients_mean: " << ParallelContext::GetInstance()->gradients_mean() << ", gradient_fp32_sync: " << ParallelContext::GetInstance()->gradient_fp32_sync(); return SUCCESS; @@ -3714,7 +3724,13 @@ void ReorderForPipelineSplit(const FuncGraphPtr &root, const FuncGraphManagerPtr bool IsInsertVirtualOutput(const FuncGraphPtr &root) { MS_EXCEPTION_IF_NULL(ParallelContext::GetInstance()); - return (!root->has_flag(TRAINING) && ParallelContext::GetInstance()->dataset_strategy().empty()); + auto comm_info = GetCommInfo(); + int32_t split_stage_num = ParallelContext::GetInstance()->pipeline_stage_split_num(); + int32_t per_stage_device_num = comm_info.device_num / split_stage_num; + int32_t current_stage = comm_info.global_rank / per_stage_device_num; + MS_LOG(INFO) << "The current stage is: " << current_stage; + return (!root->has_flag(TRAINING) && ParallelContext::GetInstance()->dataset_strategy().empty() && + current_stage == split_stage_num - 1); } bool StepParallel(const FuncGraphPtr &root, const opt::OptimizerPtr &optimizer) { diff --git a/mindspore/ccsrc/frontend/parallel/step_parallel.h b/mindspore/ccsrc/frontend/parallel/step_parallel.h index 71c69705080..996cc11ba33 100644 --- a/mindspore/ccsrc/frontend/parallel/step_parallel.h +++ b/mindspore/ccsrc/frontend/parallel/step_parallel.h @@ -47,6 +47,13 @@ struct LossNodeInfo { CNodePtr loss_node = nullptr; }; +struct CommInfo { + int64_t device_num = 1; + int64_t global_rank = 0; + std::string world_group; + std::string communication_backend; +}; + struct ParameterSliceInfo { Shape slice_shape; RankList group_ranks; @@ -178,6 +185,8 @@ void InsertVirtualOutput(const FuncGraphPtr &root, const std::vector std::string MirrorOpName(); +CommInfo GetCommInfo(); + void ReorderForPipelineSplit(const FuncGraphPtr &root, const FuncGraphManagerPtr &manager, int64_t pipeline_stages); } // namespace parallel } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/CMakeLists.txt index 6107952a89a..454d33ebb6d 100644 --- a/mindspore/ccsrc/minddata/dataset/CMakeLists.txt +++ b/mindspore/ccsrc/minddata/dataset/CMakeLists.txt @@ -93,6 +93,7 @@ add_dependencies(engine-gnn core) add_dependencies(engine core) add_dependencies(callback core) add_dependencies(audio-kernels core) +add_dependencies(audio-ir core) add_dependencies(audio-ir-kernels core) add_dependencies(text core) add_dependencies(text-kernels core) @@ -156,6 +157,7 @@ set(submodules $ $ $ + $ $ $ $ diff --git a/mindspore/ccsrc/minddata/dataset/api/audio.cc b/mindspore/ccsrc/minddata/dataset/api/audio.cc index 5a9a6498abd..eb4f8c20c1d 100644 --- a/mindspore/ccsrc/minddata/dataset/api/audio.cc +++ b/mindspore/ccsrc/minddata/dataset/api/audio.cc @@ -16,12 +16,56 @@ #include "minddata/dataset/include/dataset/audio.h" +#include "minddata/dataset/audio/ir/kernels/allpass_biquad_ir.h" +#include "minddata/dataset/audio/ir/kernels/amplitude_to_db_ir.h" +#include "minddata/dataset/audio/ir/kernels/angle_ir.h" #include "minddata/dataset/audio/ir/kernels/band_biquad_ir.h" +#include "minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.h" +#include "minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.h" +#include "minddata/dataset/audio/ir/kernels/bass_biquad_ir.h" +#include "minddata/dataset/audio/ir/kernels/time_stretch_ir.h" namespace mindspore { namespace dataset { namespace audio { +// AllpassBiquad Transform Operation. +struct AllpassBiquad::Data { + Data(int32_t sample_rate, float central_freq, float Q) + : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q) {} + int32_t sample_rate_; + float central_freq_; + float Q_; +}; + +AllpassBiquad::AllpassBiquad(int32_t sample_rate, float central_freq, float Q) + : data_(std::make_shared(sample_rate, central_freq, Q)) {} + +std::shared_ptr AllpassBiquad::Parse() { + return std::make_shared(data_->sample_rate_, data_->central_freq_, data_->Q_); +} + +// AmplitudeToDB Operation. +struct AmplitudeToDB::Data { + Data(ScaleType stype, float ref_value, float amin, float top_db) + : stype_(stype), ref_value_(ref_value), amin_(amin), top_db_(top_db) {} + ScaleType stype_; + float ref_value_; + float amin_; + float top_db_; +}; + +AmplitudeToDB::AmplitudeToDB(ScaleType stype, float ref_value, float amin, float top_db) + : data_(std::make_shared(stype, ref_value, amin, top_db)) {} + +std::shared_ptr AmplitudeToDB::Parse() { + return std::make_shared(data_->stype_, data_->ref_value_, data_->amin_, data_->top_db_); +} + +// Angle Transform Operation. +Angle::Angle() {} + +std::shared_ptr Angle::Parse() { return std::make_shared(); } // BandBiquad Transform Operation. struct BandBiquad::Data { Data(int32_t sample_rate, float central_freq, float Q, bool noise) @@ -38,6 +82,74 @@ BandBiquad::BandBiquad(int32_t sample_rate, float central_freq, float Q, bool no std::shared_ptr BandBiquad::Parse() { return std::make_shared(data_->sample_rate_, data_->central_freq_, data_->Q_, data_->noise_); } + +// BandpassBiquad Transform Operation. +struct BandpassBiquad::Data { + Data(int32_t sample_rate, float central_freq, float Q, bool const_skirt_gain) + : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q), const_skirt_gain_(const_skirt_gain) {} + int32_t sample_rate_; + float central_freq_; + float Q_; + bool const_skirt_gain_; +}; + +BandpassBiquad::BandpassBiquad(int32_t sample_rate, float central_freq, float Q, bool const_skirt_gain) + : data_(std::make_shared(sample_rate, central_freq, Q, const_skirt_gain)) {} + +std::shared_ptr BandpassBiquad::Parse() { + return std::make_shared(data_->sample_rate_, data_->central_freq_, data_->Q_, + data_->const_skirt_gain_); +} + +// BandrejectBiquad Transform Operation. +struct BandrejectBiquad::Data { + Data(int32_t sample_rate, float central_freq, float Q) + : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q) {} + int32_t sample_rate_; + float central_freq_; + float Q_; +}; + +BandrejectBiquad::BandrejectBiquad(int32_t sample_rate, float central_freq, float Q) + : data_(std::make_shared(sample_rate, central_freq, Q)) {} + +std::shared_ptr BandrejectBiquad::Parse() { + return std::make_shared(data_->sample_rate_, data_->central_freq_, data_->Q_); +} + +// BassBiquad Transform Operation. +struct BassBiquad::Data { + Data(int32_t sample_rate, float gain, float central_freq, float Q) + : sample_rate_(sample_rate), gain_(gain), central_freq_(central_freq), Q_(Q) {} + int32_t sample_rate_; + float gain_; + float central_freq_; + float Q_; +}; + +BassBiquad::BassBiquad(int32_t sample_rate, float gain, float central_freq, float Q) + : data_(std::make_shared(sample_rate, gain, central_freq, Q)) {} + +std::shared_ptr BassBiquad::Parse() { + return std::make_shared(data_->sample_rate_, data_->gain_, data_->central_freq_, data_->Q_); +} + +// TimeStretch Operation. +struct TimeStretch::Data { + explicit Data(float hop_length, int n_freq, float fixed_rate) + : hop_length_(hop_length), n_freq_(n_freq), fixed_rate_(fixed_rate) {} + float hop_length_; + int n_freq_; + float fixed_rate_; +}; + +TimeStretch::TimeStretch(float hop_length, int n_freq, float fixed_rate) + : data_(std::make_shared(hop_length, n_freq, fixed_rate)) {} + +std::shared_ptr TimeStretch::Parse() { + return std::make_shared(data_->hop_length_, data_->n_freq_, data_->fixed_rate_); +} + } // namespace audio } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/api/datasets.cc b/mindspore/ccsrc/minddata/dataset/api/datasets.cc index bb1d65bc2ca..cf4898f9766 100644 --- a/mindspore/ccsrc/minddata/dataset/api/datasets.cc +++ b/mindspore/ccsrc/minddata/dataset/api/datasets.cc @@ -85,7 +85,7 @@ // IR leaf nodes #include "minddata/dataset/engine/ir/datasetops/source/album_node.h" #include "minddata/dataset/engine/ir/datasetops/source/mnist_node.h" -#include "minddata/dataset/engine/ir/datasetops/source/libri_speech_node.h" +#include "minddata/dataset/engine/ir/datasetops/source/cmu_arctic_node.h" // IR leaf nodes disabled for android #ifndef ENABLE_ANDROID @@ -95,6 +95,7 @@ #include "minddata/dataset/engine/ir/datasetops/source/clue_node.h" #include "minddata/dataset/engine/ir/datasetops/source/coco_node.h" #include "minddata/dataset/engine/ir/datasetops/source/csv_node.h" +#include "minddata/dataset/engine/ir/datasetops/source/flickr_node.h" #include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h" #include "minddata/dataset/engine/ir/datasetops/source/random_node.h" #include "minddata/dataset/engine/ir/datasetops/source/text_file_node.h" @@ -928,6 +929,32 @@ CSVDataset::CSVDataset(const std::vector> &dataset_files, char ir_node_ = std::static_pointer_cast(ds); } +FlickrDataset::FlickrDataset(const std::vector &dataset_dir, const std::vector &annotation_file, + bool decode, const std::shared_ptr &sampler, + const std::shared_ptr &cache) { + auto sampler_obj = sampler ? sampler->Parse() : nullptr; + auto ds = + std::make_shared(CharToString(dataset_dir), CharToString(annotation_file), decode, sampler_obj, cache); + ir_node_ = std::static_pointer_cast(ds); +} + +FlickrDataset::FlickrDataset(const std::vector &dataset_dir, const std::vector &annotation_file, + bool decode, const Sampler *sampler, const std::shared_ptr &cache) { + auto sampler_obj = sampler ? sampler->Parse() : nullptr; + auto ds = + std::make_shared(CharToString(dataset_dir), CharToString(annotation_file), decode, sampler_obj, cache); + ir_node_ = std::static_pointer_cast(ds); +} + +FlickrDataset::FlickrDataset(const std::vector &dataset_dir, const std::vector &annotation_file, + bool decode, const std::reference_wrapper sampler, + const std::shared_ptr &cache) { + auto sampler_obj = sampler.get().Parse(); + auto ds = + std::make_shared(CharToString(dataset_dir), CharToString(annotation_file), decode, sampler_obj, cache); + ir_node_ = std::static_pointer_cast(ds); +} + ImageFolderDataset::ImageFolderDataset(const std::vector &dataset_dir, bool decode, const std::shared_ptr &sampler, const std::set> &extensions, @@ -1110,29 +1137,27 @@ MnistDataset::MnistDataset(const std::vector &dataset_dir, const std::vect ir_node_ = std::static_pointer_cast(ds); } - -LibriSpeechDataset::LibriSpeechDataset(const std::vector &dataset_dir, const std::vector &usage, +CmuArcticDataset::CmuArcticDataset(const std::vector &dataset_dir, const std::vector &usage, const std::shared_ptr &sampler, const std::shared_ptr &cache) { auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = std::make_shared(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache); + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache); ir_node_ = std::static_pointer_cast(ds); } -LibriSpeechDataset::LibriSpeechDataset(const std::vector &dataset_dir, const std::vector &usage, const Sampler *sampler, +CmuArcticDataset::CmuArcticDataset(const std::vector &dataset_dir, const std::vector &usage, const Sampler *sampler, const std::shared_ptr &cache) { auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = std::make_shared(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache); + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache); ir_node_ = std::static_pointer_cast(ds); } -LibriSpeechDataset::LibriSpeechDataset(const std::vector &dataset_dir, const std::vector &usage, +CmuArcticDataset::CmuArcticDataset(const std::vector &dataset_dir, const std::vector &usage, const std::reference_wrapper sampler, const std::shared_ptr &cache) { auto sampler_obj = sampler.get().Parse(); - auto ds = std::make_shared(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache); + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache); ir_node_ = std::static_pointer_cast(ds); } - #ifndef ENABLE_ANDROID TextFileDataset::TextFileDataset(const std::vector> &dataset_files, int64_t num_samples, ShuffleMode shuffle, int32_t num_shards, int32_t shard_id, diff --git a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc index e0c85d69f60..8f4c63469cb 100644 --- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc +++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/audio/kernels/ir/bindings.cc @@ -17,12 +17,58 @@ #include "minddata/dataset/api/python/pybind_conversion.h" #include "minddata/dataset/api/python/pybind_register.h" +#include "minddata/dataset/audio/ir/kernels/allpass_biquad_ir.h" +#include "minddata/dataset/audio/ir/kernels/amplitude_to_db_ir.h" +#include "minddata/dataset/audio/ir/kernels/angle_ir.h" #include "minddata/dataset/audio/ir/kernels/band_biquad_ir.h" +#include "minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.h" +#include "minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.h" +#include "minddata/dataset/audio/ir/kernels/bass_biquad_ir.h" +#include "minddata/dataset/audio/ir/kernels/time_stretch_ir.h" #include "minddata/dataset/include/dataset/transforms.h" namespace mindspore { namespace dataset { +PYBIND_REGISTER( + AllpassBiquadOperation, 1, ([](const py::module *m) { + (void)py::class_>( + *m, "AllpassBiquadOperation") + .def(py::init([](int32_t sample_rate, float central_freq, float Q) { + auto allpass_biquad = std::make_shared(sample_rate, central_freq, Q); + THROW_IF_ERROR(allpass_biquad->ValidateParams()); + return allpass_biquad; + })); + })); + +PYBIND_REGISTER( + AmplitudeToDBOperation, 1, ([](const py::module *m) { + (void)py::class_>( + *m, "AmplitudeToDBOperation") + .def(py::init([](ScaleType stype, float ref_value, float amin, float top_db) { + auto amplitude_to_db = std::make_shared(stype, ref_value, amin, top_db); + THROW_IF_ERROR(amplitude_to_db->ValidateParams()); + return amplitude_to_db; + })); + })); + +PYBIND_REGISTER(ScaleType, 0, ([](const py::module *m) { + (void)py::enum_(*m, "ScaleType", py::arithmetic()) + .value("DE_SCALETYPE_MAGNITUDE", ScaleType::kMagnitude) + .value("DE_SCALETYPE_POWER", ScaleType::kPower) + .export_values(); + })); + +PYBIND_REGISTER(AngleOperation, 1, ([](const py::module *m) { + (void)py::class_>( + *m, "AngleOperation") + .def(py::init([]() { + auto angle = std::make_shared(); + THROW_IF_ERROR(angle->ValidateParams()); + return angle; + })); + })); + PYBIND_REGISTER( BandBiquadOperation, 1, ([](const py::module *m) { (void)py::class_>( @@ -34,5 +80,49 @@ PYBIND_REGISTER( })); })); +PYBIND_REGISTER( + BandpassBiquadOperation, 1, ([](const py::module *m) { + (void)py::class_>( + *m, "BandpassBiquadOperation") + .def(py::init([](int32_t sample_rate, float central_freq, float Q, bool const_skirt_gain) { + auto bandpass_biquad = + std::make_shared(sample_rate, central_freq, Q, const_skirt_gain); + THROW_IF_ERROR(bandpass_biquad->ValidateParams()); + return bandpass_biquad; + })); + })); + +PYBIND_REGISTER(BandrejectBiquadOperation, 1, ([](const py::module *m) { + (void)py::class_>(*m, "BandrejectBiquadOperation") + .def(py::init([](int32_t sample_rate, float central_freq, float Q) { + auto bandreject_biquad = + std::make_shared(sample_rate, central_freq, Q); + THROW_IF_ERROR(bandreject_biquad->ValidateParams()); + return bandreject_biquad; + })); + })); + +PYBIND_REGISTER( + BassBiquadOperation, 1, ([](const py::module *m) { + (void)py::class_>( + *m, "BassBiquadOperation") + .def(py::init([](int32_t sample_rate, float gain, float central_freq, float Q) { + auto bass_biquad = std::make_shared(sample_rate, gain, central_freq, Q); + THROW_IF_ERROR(bass_biquad->ValidateParams()); + return bass_biquad; + })); + })); + +PYBIND_REGISTER( + TimeStretchOperation, 1, ([](const py::module *m) { + (void)py::class_>( + *m, "TimeStretchOperation") + .def(py::init([](float hop_length, int n_freq, float fixed_rate) { + auto timestretch = std::make_shared(hop_length, n_freq, fixed_rate); + THROW_IF_ERROR(timestretch->ValidateParams()); + return timestretch; + })); + })); } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/bindings.cc index 7ef87c941d5..cc486b10336 100644 --- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/bindings.cc +++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/bindings.cc @@ -70,11 +70,23 @@ PYBIND_REGISTER(DatasetNode, 1, ([](const py::module *m) { return zip; }, py::arg("datasets")) - .def("to_json", [](std::shared_ptr self, const std::string &json_filepath) { - nlohmann::json args; - auto serdas = std::make_shared(); - THROW_IF_ERROR(serdas->SaveToJSON(self, json_filepath, &args)); - return args.dump(); + .def("to_json", + [](std::shared_ptr self, const std::string &json_filepath) { + nlohmann::json args; + THROW_IF_ERROR(Serdes::SaveToJSON(self, json_filepath, &args)); + return args.dump(); + }) + .def_static("from_json_file", + [](const std::string &json_filepath) { + std::shared_ptr output; + THROW_IF_ERROR(Serdes::Deserialize(json_filepath, &output)); + return output; + }) + .def_static("from_json_string", [](const std::string &json_string) { + std::shared_ptr output; + nlohmann::json json_obj = nlohmann::json::parse(json_string); + THROW_IF_ERROR(Serdes::ConstructPipeline(json_obj, &output)); + return output; }); })); diff --git a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/source/bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/source/bindings.cc index 73422631b43..a6265bcf592 100644 --- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/source/bindings.cc +++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/engine/ir/datasetops/source/bindings.cc @@ -34,7 +34,7 @@ #include "minddata/dataset/engine/ir/datasetops/source/generator_node.h" #include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h" #include "minddata/dataset/engine/ir/datasetops/source/mnist_node.h" -#include "minddata/dataset/engine/ir/datasetops/source/libri_speech_node.h" +#include "minddata/dataset/engine/ir/datasetops/source/cmu_arctic_node.h" #include "minddata/dataset/engine/ir/datasetops/source/random_node.h" #include "minddata/dataset/engine/ir/datasetops/source/text_file_node.h" @@ -211,13 +211,13 @@ PYBIND_REGISTER(MnistNode, 2, ([](const py::module *m) { -PYBIND_REGISTER(LibriSpeechNode, 2, ([](const py::module *m) { - (void)py::class_>(*m, "LibriSpeechNode", - "to create an LibriSpeechNode") +PYBIND_REGISTER(CmuArcticNode, 2, ([](const py::module *m) { + (void)py::class_>(*m, "CmuArcticNode", + "to create an CmuArcticNode") .def(py::init([](std::string dataset_dir, std::string usage, py::handle sampler) { - auto librispeech = std::make_shared(dataset_dir, usage, toSamplerObj(sampler), nullptr); - THROW_IF_ERROR(librispeech->ValidateParams()); - return librispeech; + auto cmuarctic = std::make_shared(dataset_dir, usage, toSamplerObj(sampler), nullptr); + THROW_IF_ERROR(cmuarctic->ValidateParams()); + return cmuarctic; })); })); diff --git a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/image/bindings.cc b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/image/bindings.cc index 50c427633fd..524b1cd432d 100644 --- a/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/image/bindings.cc +++ b/mindspore/ccsrc/minddata/dataset/api/python/bindings/dataset/kernels/ir/image/bindings.cc @@ -18,6 +18,7 @@ #include "minddata/dataset/api/python/pybind_register.h" #include "minddata/dataset/include/dataset/transforms.h" +#include "minddata/dataset/kernels/ir/vision/adjust_gamma_ir.h" #include "minddata/dataset/kernels/ir/vision/auto_contrast_ir.h" #include "minddata/dataset/kernels/ir/vision/bounding_box_augment_ir.h" #include "minddata/dataset/kernels/ir/vision/center_crop_ir.h" @@ -67,6 +68,17 @@ namespace mindspore { namespace dataset { +PYBIND_REGISTER( + AdjustGammaOperation, 1, ([](const py::module *m) { + (void)py::class_>( + *m, "AdjustGammaOperation") + .def(py::init([](float gamma, float gain) { + auto ajust_gamma = std::make_shared(gamma, gain); + THROW_IF_ERROR(ajust_gamma->ValidateParams()); + return ajust_gamma; + })); + })); + PYBIND_REGISTER( AutoContrastOperation, 1, ([](const py::module *m) { (void)py::class_>( diff --git a/mindspore/ccsrc/minddata/dataset/api/vision.cc b/mindspore/ccsrc/minddata/dataset/api/vision.cc index c451cd19cc5..e933e5fb44b 100644 --- a/mindspore/ccsrc/minddata/dataset/api/vision.cc +++ b/mindspore/ccsrc/minddata/dataset/api/vision.cc @@ -21,6 +21,7 @@ #endif #include "minddata/dataset/include/dataset/transforms.h" +#include "minddata/dataset/kernels/ir/vision/adjust_gamma_ir.h" #include "minddata/dataset/kernels/ir/vision/affine_ir.h" #include "minddata/dataset/kernels/ir/vision/auto_contrast_ir.h" #include "minddata/dataset/kernels/ir/vision/bounding_box_augment_ir.h" @@ -118,6 +119,19 @@ std::shared_ptr Affine::Parse() { } #ifndef ENABLE_ANDROID +// AdjustGamma Transform Operation. +struct AdjustGamma::Data { + Data(float gamma, float gain) : gamma_(gamma), gain_(gain) {} + float gamma_; + float gain_; +}; + +AdjustGamma::AdjustGamma(float gamma, float gain) : data_(std::make_shared(gamma, gain)) {} + +std::shared_ptr AdjustGamma::Parse() { + return std::make_shared(data_->gamma_, data_->gain_); +} + // AutoContrast Transform Operation. struct AutoContrast::Data { Data(float cutoff, const std::vector &ignore) : cutoff_(cutoff), ignore_(ignore) {} diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/audio/ir/CMakeLists.txt index ceebec399c9..f6f6040e52a 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/ir/CMakeLists.txt +++ b/mindspore/ccsrc/minddata/dataset/audio/ir/CMakeLists.txt @@ -2,3 +2,5 @@ add_subdirectory(kernels) file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc") set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) + +add_library(audio-ir OBJECT validators.cc) diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/CMakeLists.txt index a2bd0355c0f..0547fd3850b 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/CMakeLists.txt +++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/CMakeLists.txt @@ -2,5 +2,12 @@ file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc" set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) add_library(audio-ir-kernels OBJECT + allpass_biquad_ir.cc + amplitude_to_db_ir.cc + angle_ir.cc band_biquad_ir.cc + bandpass_biquad_ir.cc + bandreject_biquad_ir.cc + bass_biquad_ir.cc + time_stretch_ir.cc ) diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/allpass_biquad_ir.cc b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/allpass_biquad_ir.cc index b760aae4844..35cf10b83c6 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/allpass_biquad_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/allpass_biquad_ir.cc @@ -16,20 +16,20 @@ #include "minddata/dataset/audio/ir/kernels/allpass_biquad_ir.h" -#include "minddata/dataset/audio/ir/validators.h" #include "minddata/dataset/audio/kernels/allpass_biquad_op.h" +#include "minddata/dataset/audio/ir/validators.h" + namespace mindspore { namespace dataset { namespace audio { - // AllpassBiquadOperation AllpassBiquadOperation::AllpassBiquadOperation(int32_t sample_rate, float central_freq, float Q) : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q) {} Status AllpassBiquadOperation::ValidateParams() { - RETURN_IF_NOT_OK(ValidateScalarNotZero("AllpassBiquad", "sample_rate", sample_rate_)); - RETURN_IF_NOT_OK(ValidateScalarNotZero("AllpassBiquad", "central_freq", central_freq_)); + RETURN_IF_NOT_OK(CheckScalarNotZero("AllpassBiquad", "sample_rate", sample_rate_)); + RETURN_IF_NOT_OK(CheckScalarNotZero("AllpassBiquad", "central_freq", central_freq_)); RETURN_IF_NOT_OK(ValidateScalar("AllpassBiquad", "Q", Q_, {0, 1.0}, true, false)); return Status::OK(); } @@ -38,7 +38,6 @@ std::shared_ptr AllpassBiquadOperation::Build() { std::shared_ptr tensor_op = std::make_shared(sample_rate_, central_freq_, Q_); return tensor_op; } - Status AllpassBiquadOperation::to_json(nlohmann::json *out_json) { nlohmann::json args; args["sample_rate"] = sample_rate_; diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/allpass_biquad_ir.h b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/allpass_biquad_ir.h index 398287db244..c8d2be832bf 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/allpass_biquad_ir.h +++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/allpass_biquad_ir.h @@ -20,7 +20,6 @@ #include #include #include - #include "include/api/status.h" #include "minddata/dataset/include/dataset/constants.h" #include "minddata/dataset/include/dataset/transforms.h" @@ -28,8 +27,9 @@ namespace mindspore { namespace dataset { -namespace audio { +namespace audio { +// Char arrays storing name of corresponding classes (in alphabetical order) constexpr char kAllpassBiquadOperation[] = "AllpassBiquad"; class AllpassBiquadOperation : public TensorOperation { @@ -52,6 +52,7 @@ class AllpassBiquadOperation : public TensorOperation { float Q_; }; } // namespace audio + } // namespace dataset } // namespace mindspore #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_ALLPASS_BIQUAD_IR_H_ diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/amplitude_to_db_ir.cc b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/amplitude_to_db_ir.cc index 61313e7fac1..80412b1c437 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/amplitude_to_db_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/amplitude_to_db_ir.cc @@ -15,15 +15,15 @@ */ #include "minddata/dataset/audio/ir/kernels/amplitude_to_db_ir.h" +#include "minddata/dataset/audio/kernels/amplitude_to_db_op.h" #include "minddata/dataset/audio/ir/validators.h" -#include "minddata/dataset/audio/kernels/amplitude_to_db_op.h" namespace mindspore { namespace dataset { namespace audio { -// AmplitudeToDBOperation +// AmplitudeToDB AmplitudeToDBOperation::AmplitudeToDBOperation(ScaleType stype, float ref_value, float amin, float top_db) : stype_(stype), ref_value_(ref_value), amin_(amin), top_db_(top_db) {} @@ -32,9 +32,9 @@ AmplitudeToDBOperation::~AmplitudeToDBOperation() = default; std::string AmplitudeToDBOperation::Name() const { return kAmplitudeToDBOperation; } Status AmplitudeToDBOperation::ValidateParams() { - RETURN_IF_NOT_OK(ValidateFloatScalarNonNegative("AmplitudeToDB", "top_db", top_db_)); - RETURN_IF_NOT_OK(ValidateFloatScalarPositive("AmplitudeToDB", "amin", amin_)); - RETURN_IF_NOT_OK(ValidateFloatScalarPositive("AmplitudeToDB", "ref_value", ref_value_)); + RETURN_IF_NOT_OK(CheckFloatScalarNonNegative("AmplitudeToDB", "top_db", top_db_)); + RETURN_IF_NOT_OK(CheckFloatScalarPositive("AmplitudeToDB", "amin", amin_)); + RETURN_IF_NOT_OK(CheckFloatScalarPositive("AmplitudeToDB", "ref_value", ref_value_)); return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/angle_ir.cc b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/angle_ir.cc index 131a440e279..53b1850e976 100755 --- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/angle_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/angle_ir.cc @@ -16,12 +16,13 @@ #include "minddata/dataset/audio/ir/kernels/angle_ir.h" +// Kernel Audio headers #include "minddata/dataset/audio/kernels/angle_op.h" namespace mindspore { namespace dataset { -namespace audio { +namespace audio { // AngleOperation AngleOperation::AngleOperation() {} diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/angle_ir.h b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/angle_ir.h index 0c35ba075b0..e0f1ce2ff80 100755 --- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/angle_ir.h +++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/angle_ir.h @@ -29,8 +29,9 @@ namespace mindspore { namespace dataset { -namespace audio { +namespace audio { +// Char arrays storing name of corresponding classes constexpr char kAngleOperation[] = "Angle"; class AngleOperation : public TensorOperation { diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.cc b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.cc index 062cfd2a43a..a335f6500fd 100755 --- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.cc @@ -16,13 +16,13 @@ #include "minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.h" -#include "minddata/dataset/audio/ir/validators.h" #include "minddata/dataset/audio/kernels/bandpass_biquad_op.h" +#include "minddata/dataset/audio/ir/validators.h" + namespace mindspore { namespace dataset { namespace audio { - // BandpassBiquadOperation BandpassBiquadOperation::BandpassBiquadOperation(int32_t sample_rate, float central_freq, float Q, bool const_skirt_gain) @@ -30,10 +30,9 @@ BandpassBiquadOperation::BandpassBiquadOperation(int32_t sample_rate, float cent Status BandpassBiquadOperation::ValidateParams() { RETURN_IF_NOT_OK(ValidateScalar("BandpassBiquad", "Q", Q_, {0, 1.0}, true, false)); - RETURN_IF_NOT_OK(ValidateScalarNotZero("BandpassBiquad", "sample_rate", sample_rate_)); + RETURN_IF_NOT_OK(CheckScalarNotZero("BandpassBiquad", "sample_rate", sample_rate_)); return Status::OK(); } - std::shared_ptr BandpassBiquadOperation::Build() { std::shared_ptr tensor_op = std::make_shared(sample_rate_, central_freq_, Q_, const_skirt_gain_); diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.h b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.h index 309d0453833..23cb220e9f1 100755 --- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.h +++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandpass_biquad_ir.h @@ -21,7 +21,6 @@ #include #include #include - #include "include/api/status.h" #include "minddata/dataset/include/dataset/constants.h" #include "minddata/dataset/include/dataset/transforms.h" @@ -29,8 +28,9 @@ namespace mindspore { namespace dataset { -namespace audio { +namespace audio { +// Char arrays storing name of corresponding classes (in alphabetical order) constexpr char kBandpassBiquadOperation[] = "BandpassBiquad"; class BandpassBiquadOperation : public TensorOperation { diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.cc b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.cc index f66c65030da..0688cb6b4d6 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.cc @@ -15,21 +15,19 @@ */ #include "minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.h" - -#include "minddata/dataset/audio/ir/validators.h" #include "minddata/dataset/audio/kernels/bandreject_biquad_op.h" +#include "minddata/dataset/audio/ir/validators.h" namespace mindspore { namespace dataset { namespace audio { - // BandrejectBiquadOperation BandrejectBiquadOperation::BandrejectBiquadOperation(int32_t sample_rate, float central_freq, float Q) : sample_rate_(sample_rate), central_freq_(central_freq), Q_(Q) {} Status BandrejectBiquadOperation::ValidateParams() { RETURN_IF_NOT_OK(ValidateScalar("BandrejectBiquad", "Q", Q_, {0, 1.0}, true, false)); - RETURN_IF_NOT_OK(ValidateScalarNotZero("BandrejectBiquad", "sample_rate", sample_rate_)); + RETURN_IF_NOT_OK(CheckScalarNotZero("BandrejectBiquad", "sample_rate", sample_rate_)); return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.h b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.h index 28b75c60739..9a38185c4b8 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.h +++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bandreject_biquad_ir.h @@ -16,12 +16,10 @@ #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_BANDREJECT_BIQUAD_IR_H_ #define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_KERNELS_BANDREJECT_BIQUAD_IR_H_ - #include #include #include #include - #include "include/api/status.h" #include "minddata/dataset/include/dataset/constants.h" #include "minddata/dataset/include/dataset/transforms.h" @@ -29,8 +27,10 @@ namespace mindspore { namespace dataset { + namespace audio { +// Char arrays storing name of corresponding classes (in alphabetical order) constexpr char kBandrejectBiquadOperation[] = "BandrejectBiquad"; class BandrejectBiquadOperation : public TensorOperation { diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bass_biquad_ir.cc b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bass_biquad_ir.cc index 83766e50a6a..f2f22aff0be 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bass_biquad_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bass_biquad_ir.cc @@ -16,9 +16,10 @@ #include "minddata/dataset/audio/ir/kernels/bass_biquad_ir.h" -#include "minddata/dataset/audio/ir/validators.h" #include "minddata/dataset/audio/kernels/bass_biquad_op.h" +#include "minddata/dataset/audio/ir/validators.h" + namespace mindspore { namespace dataset { namespace audio { @@ -29,7 +30,7 @@ BassBiquadOperation::BassBiquadOperation(int32_t sample_rate, float gain, float Status BassBiquadOperation::ValidateParams() { RETURN_IF_NOT_OK(ValidateScalar("BassBiquad", "Q", Q_, {0, 1.0}, true, false)); - RETURN_IF_NOT_OK(ValidateScalarNotZero("BassBiquad", "sample_rate", sample_rate_)); + RETURN_IF_NOT_OK(CheckScalarNotZero("BassBiquad", "sample_rate", sample_rate_)); return Status::OK(); } @@ -37,7 +38,6 @@ std::shared_ptr BassBiquadOperation::Build() { std::shared_ptr tensor_op = std::make_shared(sample_rate_, gain_, central_freq_, Q_); return tensor_op; } - Status BassBiquadOperation::to_json(nlohmann::json *out_json) { nlohmann::json args; args["sample_rate"] = sample_rate_; diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bass_biquad_ir.h b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bass_biquad_ir.h index 725000591a7..1fdd38b8a90 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bass_biquad_ir.h +++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/bass_biquad_ir.h @@ -31,6 +31,7 @@ namespace mindspore { namespace dataset { namespace audio { +// Char arrays storing name of corresponding classes (in alphabetical order) constexpr char kBassBiquadOperation[] = "BassBiquad"; class BassBiquadOperation : public TensorOperation { diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/time_stretch_ir.cc b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/time_stretch_ir.cc index 4a94c4c6693..a78c4523705 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/time_stretch_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/audio/ir/kernels/time_stretch_ir.cc @@ -14,15 +14,14 @@ * limitations under the License. */ #include "minddata/dataset/audio/ir/kernels/time_stretch_ir.h" - -#include "minddata/dataset/audio/ir/validators.h" #include "minddata/dataset/audio/kernels/time_stretch_op.h" +#include "minddata/dataset/audio/ir/validators.h" namespace mindspore { namespace dataset { namespace audio { -// TimeStretchOperation +// TimeStretch TimeStretchOperation::TimeStretchOperation(float hop_length, int n_freq, float fixed_rate) : hop_length_(hop_length), n_freq_(n_freq), fixed_rate_(fixed_rate) {} @@ -32,10 +31,10 @@ std::string TimeStretchOperation::Name() const { return kTimeStretchOperation; } Status TimeStretchOperation::ValidateParams() { // param check - RETURN_IF_NOT_OK(ValidateFloatScalarPositive("TimeStretch", "hop_length", hop_length_)); - RETURN_IF_NOT_OK(ValidateIntScalarPositive("TimeStretch", "n_freq", n_freq_)); - RETURN_IF_NOT_OK(ValidateFloatScalarNotNan("TimeStretch", "fixed_rate", fixed_rate_)); - RETURN_IF_NOT_OK(ValidateFloatScalarPositive("TimeStretch", "fixed_rate", fixed_rate_)); + RETURN_IF_NOT_OK(CheckFloatScalarPositive("TimeStretch", "hop_length", hop_length_)); + RETURN_IF_NOT_OK(CheckIntScalarPositive("TimeStretch", "n_freq", n_freq_)); + RETURN_IF_NOT_OK(CheckFloatScalarNotNan("TimeStretch", "fixed_rate", fixed_rate_)); + RETURN_IF_NOT_OK(CheckFloatScalarPositive("TimeStretch", "fixed_rate", fixed_rate_)); return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/validators.cc b/mindspore/ccsrc/minddata/dataset/audio/ir/validators.cc index e3f8c127b54..7700298c1a7 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/ir/validators.cc +++ b/mindspore/ccsrc/minddata/dataset/audio/ir/validators.cc @@ -17,20 +17,82 @@ namespace mindspore { namespace dataset { - -Status ValidateIntScalarNonNegative(const std::string &op_name, const std::string &scalar_name, int32_t scalar) { - RETURN_IF_NOT_OK(ValidateScalar(op_name, scalar_name, scalar, {0}, false)); +/* ####################################### Validator Functions ############################################ */ +Status CheckFloatScalarPositive(const std::string &op_name, const std::string &scalar_name, float scalar) { + RETURN_IF_NOT_OK(CheckScalar(op_name, scalar_name, scalar, {0}, true)); return Status::OK(); } -Status ValidateFloatScalarNotNan(const std::string &op_name, const std::string &scalar_name, float scalar) { +Status CheckFloatScalarNotNan(const std::string &op_name, const std::string &scalar_name, float scalar) { if (std::isnan(scalar)) { - std::string err_msg = op_name + ": " + scalar_name + " should be specified, got: Nan"; + std::string err_msg = op_name + ":" + scalar_name + " should be specified, got: Nan."; MS_LOG(ERROR) << err_msg; return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, err_msg); } return Status::OK(); } +Status CheckFloatScalarNonNegative(const std::string &op_name, const std::string &scalar_name, float scalar) { + RETURN_IF_NOT_OK(CheckScalar(op_name, scalar_name, scalar, {0}, false)); + return Status::OK(); +} + +Status CheckIntScalarPositive(const std::string &op_name, const std::string &scalar_name, int32_t scalar) { + RETURN_IF_NOT_OK(CheckScalar(op_name, scalar_name, scalar, {0}, true)); + return Status::OK(); +} + +Status CheckStringScalarInList(const std::string &op_name, const std::string &scalar_name, const std::string &scalar, + const std::vector &str_vec) { + auto ret = std::find(str_vec.begin(), str_vec.end(), scalar); + if (ret == str_vec.end()) { + std::string interval_description = "["; + for (int m = 0; m < str_vec.size(); m++) { + std::string word = str_vec[m]; + interval_description = interval_description + word; + if (m != str_vec.size() - 1) interval_description = interval_description + ", "; + } + interval_description = interval_description + "]"; + + std::string err_msg = op_name + ": " + scalar_name + " must be one of " + interval_description + ", got: " + scalar; + MS_LOG(ERROR) << err_msg; + return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, err_msg); + } + return Status::OK(); +} + +template +Status CheckScalar(const std::string &op_name, const std::string &scalar_name, const T scalar, + const std::vector &range, bool left_open_interval, bool right_open_interval) { + if (range.empty() || range.size() > 2) { + std::string err_msg = "Range check expecting size 1 or 2, but got: " + std::to_string(range.size()); + MS_LOG(ERROR) << err_msg; + return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, err_msg); + } + if ((left_open_interval && scalar <= range[0]) || (!left_open_interval && scalar < range[0])) { + std::string interval_description = left_open_interval ? " greater than " : " greater than or equal to "; + std::string err_msg = op_name + ":" + scalar_name + " must be" + interval_description + std::to_string(range[0]) + + ", got: " + std::to_string(scalar); + MS_LOG(ERROR) << err_msg; + return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, err_msg); + } + if (range.size() == 2) { + if ((right_open_interval && scalar >= range[1]) || (!right_open_interval && scalar > range[1])) { + std::string left_bracket = left_open_interval ? "(" : "["; + std::string right_bracket = right_open_interval ? ")" : "]"; + std::string err_msg = op_name + ":" + scalar_name + " is out of range " + left_bracket + + std::to_string(range[0]) + ", " + std::to_string(range[1]) + right_bracket + + ", got: " + std::to_string(scalar); + MS_LOG(ERROR) << err_msg; + return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, err_msg); + } + } + return Status::OK(); +} +template Status CheckScalar(const std::string &op_name, const std::string &scalar_name, const float scalar, + const std::vector &range, bool left_open_interval, bool right_open_interval); + +template Status CheckScalar(const std::string &op_name, const std::string &scalar_name, const int32_t scalar, + const std::vector &range, bool left_open_interval, bool right_open_interval); } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/audio/ir/validators.h b/mindspore/ccsrc/minddata/dataset/audio/ir/validators.h index 837c3f0a0f4..7cfa0bfa0be 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/ir/validators.h +++ b/mindspore/ccsrc/minddata/dataset/audio/ir/validators.h @@ -18,11 +18,25 @@ #define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_IR_VALIDATORS_H_ #include +#include #include "minddata/dataset/kernels/ir/validators.h" +#include "minddata/dataset/core/tensor.h" +#include "minddata/dataset/kernels/ir/tensor_operation.h" +#include "minddata/dataset/util/status.h" + namespace mindspore { namespace dataset { +// Helper function to non-nan float scalar +Status CheckFloatScalarNotNan(const std::string &op_name, const std::string &scalar_name, float scalar); + +// Helper function to positive float scalar +Status CheckFloatScalarPositive(const std::string &op_name, const std::string &scalar_name, float scalar); + +// Helper function to positive int scalar +Status CheckIntScalarPositive(const std::string &op_name, const std::string &scalar_name, int32_t scalar); + template // Helper function to check scalar is not equal to zero Status CheckScalarNotZero(const std::string &op_name, const std::string &scalar_name, const T scalar) { @@ -34,6 +48,20 @@ Status CheckScalarNotZero(const std::string &op_name, const std::string &scalar_ return Status::OK(); } +// Helper function to positive float scalar +Status CheckFloatScalarPositive(const std::string &op_name, const std::string &scalar_name, float scalar); + +// Helper function to non-negative float scalar +Status CheckFloatScalarNonNegative(const std::string &op_name, const std::string &scalar_name, float scalar); + +// Helper function to check string scalar +Status CheckStringScalarInList(const std::string &op_name, const std::string &scalar_name, const std::string &scalar, + const std::vector &str_vec); + +// Helper function to validate scalar +template +Status CheckScalar(const std::string &op_name, const std::string &scalar_name, const T scalar, + const std::vector &range, bool left_open_interval = false, bool right_open_interval = false); } // namespace dataset } // namespace mindspore #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ADUIO_IR_VALIDATORS_H_ diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/audio/kernels/CMakeLists.txt index f78a30fd232..c6517814031 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/CMakeLists.txt +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/CMakeLists.txt @@ -2,6 +2,13 @@ file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc" set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) add_library(audio-kernels OBJECT + allpass_biquad_op.cc + amplitude_to_db_op.cc + angle_op.cc + audio_utils.cc band_biquad_op.cc + bandpass_biquad_op.cc + bandreject_biquad_op.cc + bass_biquad_op.cc + time_stretch_op.cc ) - diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/allpass_biquad_op.cc b/mindspore/ccsrc/minddata/dataset/audio/kernels/allpass_biquad_op.cc index b1b4625e066..da2f88964af 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/allpass_biquad_op.cc +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/allpass_biquad_op.cc @@ -20,15 +20,14 @@ namespace mindspore { namespace dataset { - Status AllpassBiquadOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { IO_CHECK(input, output); TensorShape input_shape = input->shape(); - CHECK_FAIL_RETURN_UNEXPECTED(input_shape.Size() > 0, "AllpassBiquad: input tensor is not in shape of <..., time>."); - CHECK_FAIL_RETURN_UNEXPECTED( - input->type() == DataType(DataType::DE_FLOAT32) || input->type() == DataType(DataType::DE_FLOAT16) || - input->type() == DataType(DataType::DE_FLOAT64), - "AllpassBiquad: input tensor type should be float, but got: " + input->type().ToString()); + CHECK_FAIL_RETURN_UNEXPECTED(input_shape.Size() > 0, "AllpassBiquad: input dimension should be greater than 0."); + CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType(DataType::DE_FLOAT32) || + input->type() == DataType(DataType::DE_FLOAT16) || + input->type() == DataType(DataType::DE_FLOAT64), + "AllpassBiquad: input type should be float, but got " + input->type().ToString()); double w0 = 2 * PI * central_freq_ / sample_rate_; double alpha = sin(w0) / 2 / Q_; double b0 = 1 - alpha; @@ -37,16 +36,15 @@ Status AllpassBiquadOp::Compute(const std::shared_ptr &input, std::share double a0 = b2; double a1 = -2 * cos(w0); double a2 = 1 - alpha; - if (input->type() == DataType(DataType::DE_FLOAT32)) { + if (input->type() == DataType(DataType::DE_FLOAT32)) return Biquad(input, output, static_cast(b0), static_cast(b1), static_cast(b2), static_cast(a0), static_cast(a1), static_cast(a2)); - } else if (input->type() == DataType(DataType::DE_FLOAT64)) { + else if (input->type() == DataType(DataType::DE_FLOAT64)) return Biquad(input, output, static_cast(b0), static_cast(b1), static_cast(b2), static_cast(a0), static_cast(a1), static_cast(a2)); - } else { + else return Biquad(input, output, static_cast(b0), static_cast(b1), static_cast(b2), static_cast(a0), static_cast(a1), static_cast(a2)); - } } } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/allpass_biquad_op.h b/mindspore/ccsrc/minddata/dataset/audio/kernels/allpass_biquad_op.h index 26c7b729f0a..d4e7e17b95a 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/allpass_biquad_op.h +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/allpass_biquad_op.h @@ -26,7 +26,6 @@ namespace mindspore { namespace dataset { - class AllpassBiquadOp : public TensorOp { public: AllpassBiquadOp(int32_t sample_rate, float central_freq, float Q) diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/amplitude_to_db_op.cc b/mindspore/ccsrc/minddata/dataset/audio/kernels/amplitude_to_db_op.cc index 8a202f497c4..dbebec42d39 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/amplitude_to_db_op.cc +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/amplitude_to_db_op.cc @@ -13,8 +13,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "minddata/dataset/audio/kernels/amplitude_to_db_op.h" +#include +#include "minddata/dataset/audio/kernels/amplitude_to_db_op.h" #include "minddata/dataset/audio/kernels/audio_utils.h" #include "minddata/dataset/kernels/data/data_utils.h" #include "minddata/dataset/util/status.h" @@ -25,7 +26,7 @@ namespace dataset { Status AmplitudeToDBOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { IO_CHECK(input, output); if (input->shape().Rank() < 2) { - std::string err_msg = "AmplitudeToDB: input tensor is not in shape of <..., freq, time>."; + std::string err_msg = "AmplitudeToDB: input tensor shape should be <..., freq, time>"; MS_LOG(ERROR) << err_msg; RETURN_STATUS_SYNTAX_ERROR(err_msg); } @@ -39,12 +40,12 @@ Status AmplitudeToDBOp::Compute(const std::shared_ptr &input, std::share // typecast CHECK_FAIL_RETURN_UNEXPECTED(input->type() != DataType::DE_STRING, - "AmplitudeToDB: input tensor type should be float, but got: string."); + "AmplitudeToDB: input type should be float, but got string."); if (input->type() != DataType::DE_FLOAT64) { - CHECK_FAIL_RETURN_UNEXPECTED( - TypeCast(input, &input_tensor, DataType(DataType::DE_FLOAT32)), - "AmplitudeToDB: input tensor type should be float, but got: " + input->type().ToString()); + CHECK_FAIL_RETURN_UNEXPECTED(TypeCast(input, &input_tensor, DataType(DataType::DE_FLOAT32)), + "AmplitudeToDB: input type should be float, but got " + input->type().ToString()); return AmplitudeToDB(input_tensor, output, multiplier, amin, db_multiplier, top_db); + } else { input_tensor = input; return AmplitudeToDB(input_tensor, output, multiplier, amin, db_multiplier, top_db); diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/amplitude_to_db_op.h b/mindspore/ccsrc/minddata/dataset/audio/kernels/amplitude_to_db_op.h index 9aa2672878e..bd84e888f9e 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/amplitude_to_db_op.h +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/amplitude_to_db_op.h @@ -29,7 +29,6 @@ namespace mindspore { namespace dataset { - class AmplitudeToDBOp : public TensorOp { public: AmplitudeToDBOp(ScaleType stype, float ref_value, float amin, float top_db) diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/angle_op.cc b/mindspore/ccsrc/minddata/dataset/audio/kernels/angle_op.cc index 9dc313f606a..54827c934ee 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/angle_op.cc +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/angle_op.cc @@ -25,10 +25,8 @@ namespace dataset { Status AngleOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { IO_CHECK(input, output); // if If the last dimension is not 2, then it's not a complex number - CHECK_FAIL_RETURN_UNEXPECTED(input->shape()[-1] == 2, "Angle: input tensor is not in shape of <..., complex=2>."); - CHECK_FAIL_RETURN_UNEXPECTED( - input->type().IsNumeric(), - "Angle: input tensor type should be int, float or double, but got: " + input->type().ToString()); + CHECK_FAIL_RETURN_UNEXPECTED(input->shape()[-1] == 2, "Angle: The input is not several legal complex numbers"); + CHECK_FAIL_RETURN_UNEXPECTED(input->type().IsNumeric(), "Angle: The input type should be numbers"); if (input->type() == DataType(DataType::DE_FLOAT64)) { return Angle(input, output); } else { diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/angle_op.h b/mindspore/ccsrc/minddata/dataset/audio/kernels/angle_op.h index 501981b2138..aff0ab44a4d 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/angle_op.h +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/angle_op.h @@ -26,7 +26,6 @@ namespace mindspore { namespace dataset { - class AngleOp : public TensorOp { public: // Convert complex numbers to angles diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.cc b/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.cc index d225eabd48b..701a4ca6dde 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.cc +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.cc @@ -16,27 +16,62 @@ #include "minddata/dataset/audio/kernels/audio_utils.h" -#include - -#include "mindspore/core/base/float16.h" -#include "minddata/dataset/core/type_id.h" -#include "minddata/dataset/kernels/data/data_utils.h" -#include "minddata/dataset/util/random.h" -#include "minddata/dataset/util/status.h" - namespace mindspore { namespace dataset { -/// \brief Generate linearly spaced vector. +template +Status AmplitudeToDB(const std::shared_ptr &input, std::shared_ptr *output, T multiplier, T amin, + T db_multiplier, T top_db) { + TensorShape input_shape = input->shape(); + TensorShape to_shape = input_shape.Rank() == 2 + ? TensorShape({1, 1, input_shape[-2], input_shape[-1]}) + : TensorShape({input->Size() / (input_shape[-3] * input_shape[-2] * input_shape[-1]), + input_shape[-3], input_shape[-2], input_shape[-1]}); + RETURN_IF_NOT_OK(input->Reshape(to_shape)); + + std::vector max_val; + int step = to_shape[-3] * input_shape[-2] * input_shape[-1]; + int cnt = 0; + T temp_max = std::numeric_limits::lowest(); + for (auto itr = input->begin(); itr != input->end(); itr++) { + // do clamp + *itr = *itr < amin ? log10(amin) * multiplier : log10(*itr) * multiplier; + *itr -= multiplier * db_multiplier; + // calculate max by axis + cnt++; + if ((*itr) > temp_max) temp_max = *itr; + if (cnt % step == 0) { + max_val.push_back(temp_max); + temp_max = std::numeric_limits::lowest(); + } + } + + if (!std::isnan(top_db)) { + int ind = 0; + for (auto itr = input->begin(); itr != input->end(); itr++, ind++) { + float lower_bound = max_val[ind / step] - top_db; + *itr = std::max((*itr), static_cast(lower_bound)); + } + } + RETURN_IF_NOT_OK(input->Reshape(input_shape)); + *output = input; + return Status::OK(); +} +template Status AmplitudeToDB(const std::shared_ptr &input, std::shared_ptr *output, + float multiplier, float amin, float db_multiplier, float top_db); +template Status AmplitudeToDB(const std::shared_ptr &input, std::shared_ptr *output, + double multiplier, double amin, double db_multiplier, double top_db); + +/// \brief Generate linearly spaced vector /// \param[in] start - Value of the startpoint. /// \param[in] end - Value of the endpoint. /// \param[in] n - N points in the output tensor. /// \param[out] output - Tensor has n points with linearly space. The spacing between the points is (end-start)/(n-1). -/// \return Status return code. +/// \return Status return code template -Status Linspace(std::shared_ptr *output, T start, T end, int n) { +Status Linespace(std::shared_ptr *output, T start, T end, int n) { if (start > end) { - std::string err = "Linspace: input param end must be greater than start."; + std::string err = "Linespace: input param end must be greater than start."; RETURN_STATUS_UNEXPECTED(err); } n = std::isnan(n) ? 100 : n; @@ -54,10 +89,10 @@ Status Linspace(std::shared_ptr *output, T start, T end, int n) { return Status::OK(); } -/// \brief Calculate complex tensor angle. +/// \brief Calculate complex tensor angle /// \param[in] input - Input tensor, must be complex, . /// \param[out] output - Complex tensor angle. -/// \return Status return code. +/// \return Status return code template Status ComplexAngle(const std::shared_ptr &input, std::shared_ptr *output) { // check complex @@ -86,10 +121,10 @@ Status ComplexAngle(const std::shared_ptr &input, std::shared_ptr. /// \param[out] output - Complex tensor abs. -/// \return Status return code. +/// \return Status return code template Status ComplexAbs(const std::shared_ptr &input, std::shared_ptr *output) { // check complex @@ -115,17 +150,17 @@ Status ComplexAbs(const std::shared_ptr &input, std::shared_ptr return Status::OK(); } -/// \brief Reconstruct complex tensor from norm and angle. +/// \brief Reconstruct complex tensor from norm and angle /// \param[in] abs - The absolute value of the complex tensor. /// \param[in] angle - The angle of the complex tensor. /// \param[out] output - Complex tensor, . -/// \return Status return code. +/// \return Status return code template Status Polar(const std::shared_ptr &abs, const std::shared_ptr &angle, std::shared_ptr *output) { // check shape if (abs->shape() != angle->shape()) { - std::string err_msg = "Polar: input tensor shape of abs and angle must be the same."; + std::string err_msg = "Polar: input shape of abs and angle must be same."; MS_LOG(ERROR) << err_msg; RETURN_STATUS_SYNTAX_ERROR(err_msg); } @@ -148,12 +183,12 @@ Status Polar(const std::shared_ptr &abs, const std::shared_ptr & return Status::OK(); } -/// \brief Pad complex tensor. +/// \brief Pad complex tensor /// \param[in] input - The complex tensor. /// \param[in] length - The length of padding. /// \param[in] dim - The dim index for padding. /// \param[out] output - Complex tensor, . -/// \return Status return code. +/// \return Status return code template Status PadComplexTensor(const std::shared_ptr &input, std::shared_ptr *output, int length, int dim) { TensorShape input_shape = input->shape(); @@ -181,13 +216,13 @@ Status PadComplexTensor(const std::shared_ptr &input, std::shared_ptr Status Phase(const std::shared_ptr &angle_0, const std::shared_ptr &angle_1, const std::shared_ptr &phase_advance, const std::shared_ptr &phase_time0, @@ -232,12 +267,12 @@ Status Phase(const std::shared_ptr &angle_0, const std::shared_ptr Status Mag(const std::shared_ptr &abs_0, const std::shared_ptr &abs_1, std::shared_ptr *output, const std::vector &alphas) { @@ -332,178 +367,19 @@ Status TimeStretch(std::shared_ptr input, std::shared_ptr *outpu std::shared_ptr phase_advance; switch (input->type().value()) { case DataType::DE_FLOAT32: - RETURN_IF_NOT_OK(Linspace(&phase_advance, 0, PI * hop_length, n_freq)); + RETURN_IF_NOT_OK(Linespace(&phase_advance, 0, PI * hop_length, n_freq)); RETURN_IF_NOT_OK(TimeStretch(input, output, rate, phase_advance)); break; case DataType::DE_FLOAT64: - RETURN_IF_NOT_OK(Linspace(&phase_advance, 0, PI * hop_length, n_freq)); + RETURN_IF_NOT_OK(Linespace(&phase_advance, 0, PI * hop_length, n_freq)); RETURN_IF_NOT_OK(TimeStretch(input, output, rate, phase_advance)); break; default: - RETURN_STATUS_UNEXPECTED("TimeStretch: input tensor type should be float or double, but got: " + - input->type().ToString()); + RETURN_STATUS_UNEXPECTED( + "TimeStretch: unsupported type, currently supported types include " + "[float, double]."); } return Status::OK(); } - -Status RandomMaskAlongAxis(const std::shared_ptr &input, std::shared_ptr *output, int64_t mask_param, - double mask_value, int axis, std::mt19937 rnd) { - std::uniform_int_distribution mask_width_value(0, mask_param); - TensorShape input_shape = input->shape(); - int64_t mask_dim_size = axis == 1 ? input_shape[-2] : input_shape[-1]; - int64_t mask_width = mask_width_value(rnd); - std::uniform_int_distribution min_freq_value(0, mask_dim_size - mask_width); - int64_t mask_start = min_freq_value(rnd); - - return MaskAlongAxis(input, output, mask_width, mask_start, mask_value, axis); -} - -Status MaskAlongAxis(const std::shared_ptr &input, std::shared_ptr *output, int64_t mask_width, - int64_t mask_start, double mask_value, int axis) { - if (axis != 2 && axis != 1) { - RETURN_STATUS_UNEXPECTED("MaskAlongAxis: only support Time and Frequency masking, axis should be 1 or 2."); - } - TensorShape input_shape = input->shape(); - // squeeze input - TensorShape squeeze_shape = TensorShape({-1, input_shape[-2], input_shape[-1]}); - input->Reshape(squeeze_shape); - - int check_dim_ind = (axis == 1) ? -2 : -1; - CHECK_FAIL_RETURN_UNEXPECTED(0 <= mask_start && mask_start <= input_shape[check_dim_ind], - "MaskAlongAxis: mask_start should be less than the length of chosen dimension."); - CHECK_FAIL_RETURN_UNEXPECTED(mask_start + mask_width <= input_shape[check_dim_ind], - "MaskAlongAxis: the sum of mask_start and mask_width is out of bounds."); - - int64_t cell_size = input->type().SizeInBytes(); - - if (axis == 1) { - // freq - for (int ind = 0; ind < input->Size() / input_shape[-2] * mask_width; ind++) { - int block_num = ind / (mask_width * input_shape[-1]); - auto start_pos = ind % (mask_width * input_shape[-1]) + mask_start * input_shape[-1] + - input_shape[-1] * input_shape[-2] * block_num; - auto start_mem_pos = const_cast(input->GetBuffer() + start_pos * cell_size); - if (input->type() != DataType::DE_FLOAT64) { - // tensor float 32 - auto mask_val = static_cast(mask_value); - CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(start_mem_pos, cell_size, &mask_val, cell_size) == 0, - "MaskAlongAxis: mask failed, memory copy error."); - } else { - // tensor float 64 - CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(start_mem_pos, cell_size, &mask_value, cell_size) == 0, - "MaskAlongAxis: mask failed, memory copy error."); - } - } - } else { - // time - for (int ind = 0; ind < input->Size() / input_shape[-1] * mask_width; ind++) { - int row_num = ind / mask_width; - auto start_pos = ind % mask_width + mask_start + input_shape[-1] * row_num; - auto start_mem_pos = const_cast(input->GetBuffer() + start_pos * cell_size); - if (input->type() != DataType::DE_FLOAT64) { - // tensor float 32 - auto mask_val = static_cast(mask_value); - CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(start_mem_pos, cell_size, &mask_val, cell_size) == 0, - "MaskAlongAxis: mask failed, memory copy error."); - } else { - // tensor float 64 - CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(start_mem_pos, cell_size, &mask_value, cell_size) == 0, - "MaskAlongAxis: mask failed, memory copy error."); - } - } - } - // unsqueeze input - input->Reshape(input_shape); - *output = input; - return Status::OK(); -} - -template -Status Norm(const std::shared_ptr &input, std::shared_ptr *output, float power) { - // calcutate total complex num - int32_t dim = input->shape().Size(); - int32_t total_num = 1; - for (int32_t i = 0; i < (dim - 1); i++) { - total_num *= (input->shape()[i]); - } - - // calculate the output dimension - auto input_size = input->shape().AsVector(); - int32_t dim_back = input_size.back(); - CHECK_FAIL_RETURN_UNEXPECTED( - dim_back == 2, "ComplexNorm: expect complex input of shape <..., 2>, but got: " + std::to_string(dim_back)); - input_size.pop_back(); - int32_t complex_num = input_size.back(); - int32_t iter_num = total_num / complex_num; - // TensorShape out_put_shape{} - input_size.pop_back(); - input_size.emplace_back(2); - TensorShape out_shape = TensorShape(input_size); - RETURN_IF_NOT_OK(Tensor::CreateEmpty(out_shape, input->type(), output)); - - // slice input into real tensor and imaginary tensor - std::shared_ptr re_tensor; - std::shared_ptr im_tensor; - RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({total_num, 1}), input->type(), &re_tensor)); - RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({total_num, 1}), input->type(), &im_tensor)); - std::vector slice_re = {}; - std::vector slice_im = {}; - for (int32_t i = 0; i < (dim - 1); i++) { - slice_re.emplace_back(SliceOption(true)); - slice_im.emplace_back(SliceOption(true)); - } - slice_re.emplace_back(SliceOption(std::vector{0})); - slice_im.emplace_back(SliceOption(std::vector{1})); - RETURN_IF_NOT_OK(input->Slice(&re_tensor, slice_re)); - RETURN_IF_NOT_OK(input->Slice(&im_tensor, slice_im)); - - // calculate norm, using: .pow(2.).sum(-1).pow(0.5 * power) - auto itr_out = (*output)->begin(); - auto itr_re = re_tensor->begin(); - auto itr_im = im_tensor->begin(); - for (int32_t i = 0; i < iter_num; i++) { - double re = 0.0; - double im = 0.0; - for (int32_t j = complex_num * i; j < complex_num * (i + 1); j++) { - double a = static_cast(*itr_re); - double b = static_cast(*itr_im); - re = re + (pow(a, 2) - pow(b, 2)); - im = im + (2 * a * b); - ++itr_re; - ++itr_im; - } - std::complex comp(re, im); - comp = std::pow(comp, (0.5 * power)); - *itr_out = static_cast(comp.real()); - ++itr_out; - *itr_out = static_cast(comp.imag()); - ++itr_out; - } - RETURN_IF_NOT_OK((*output)->Reshape(out_shape)); - return Status::OK(); -} - -Status ComplexNorm(const std::shared_ptr &input, std::shared_ptr *output, float power) { - try { - if (input->type().value() >= DataType::DE_INT8 && input->type().value() <= DataType::DE_FLOAT16) { - // convert the data type to float - std::shared_ptr input_tensor; - RETURN_IF_NOT_OK(Tensor::CreateEmpty(input->shape(), DataType(DataType::DE_FLOAT32), &input_tensor)); - RETURN_IF_NOT_OK(TypeCast(input, &input_tensor, DataType(DataType::DE_FLOAT32))); - - Norm(input_tensor, output, power); - } else if (input->type().value() == DataType::DE_FLOAT32) { - Norm(input, output, power); - } else if (input->type().value() == DataType::DE_FLOAT64) { - Norm(input, output, power); - } else { - RETURN_STATUS_UNEXPECTED("ComplexNorm: input tensor type should be int, float or double, but got: " + - input->type().ToString()); - } - return Status::OK(); - } catch (std::runtime_error &e) { - RETURN_STATUS_UNEXPECTED("ComplexNorm: " + std::string(e.what())); - } -} } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h b/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h index 23e1e518219..d66340fbf76 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/audio_utils.h @@ -17,8 +17,11 @@ #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_AUDIO_UTILS_H_ #define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_AUDIO_UTILS_H_ +#include #include +#include #include +#include #include #include "minddata/dataset/core/tensor.h" @@ -28,6 +31,42 @@ constexpr double PI = 3.141592653589793; namespace mindspore { namespace dataset { +/// \brief Turn a tensor from the power/amplitude scale to the decibel scale. +/// \param input/output: Tensor of shape <...,freq,time> +/// \param multiplier: power - 10, amplitude - 20 +/// \param amin: lower bound +/// \param db_multiplier: multiplier for decibels +/// \param top_db: the lower bound for decibels cut-off +/// \return Status code +template +Status AmplitudeToDB(const std::shared_ptr &input, std::shared_ptr *output, T multiplier, T amin, + T db_multiplier, T top_db); + +/// \brief Calculate the angles of the complex numbers +/// \param input/output: Tensor of shape <...,time> +template +Status Angle(const std::shared_ptr &input, std::shared_ptr *output) { + TensorShape shape = input->shape(); + std::vector output_shape = shape.AsVector(); + output_shape.pop_back(); + std::shared_ptr output_tensor; + std::vector out; + T o; + T x; + T y; + for (auto itr = input->begin(); itr != input->end(); itr++) { + x = static_cast(*itr); + itr++; + y = static_cast(*itr); + o = std::atan2(y, x); + out.emplace_back(o); + } + // Generate multidimensional results corresponding to input + Tensor::CreateFromVector(out, TensorShape{output_shape}, &output_tensor); + *output = output_tensor; + return Status::OK(); +} + /// \brief Perform a biquad filter of input tensor. /// \param input/output: Tensor of shape <...,time> /// \param a0: denominator coefficient of current output y[n], typically 1 @@ -138,6 +177,15 @@ Status LFilter(const std::shared_ptr &input, std::shared_ptr *ou return Status::OK(); } +/// \brief Stretch STFT in time at a given rate, without changing the pitch. +/// \param[in] input - Tensor of shape <...,freq,time>. +/// \param[in] rate - Stretch factor. +/// \param[in] phase_advance - Expected phase advance in each bin. +/// \param[out] output - Tensor after stretch in time domain. +/// \return Status return code +Status TimeStretch(std::shared_ptr input, std::shared_ptr *output, float rate, float hop_length, + float n_freq); + } // namespace dataset } // namespace mindspore #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_AUDIO_UTILS_H_ diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/bandpass_biquad_op.cc b/mindspore/ccsrc/minddata/dataset/audio/kernels/bandpass_biquad_op.cc index ab0fa546f3a..475485f0e1a 100755 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/bandpass_biquad_op.cc +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/bandpass_biquad_op.cc @@ -24,12 +24,12 @@ namespace dataset { Status BandpassBiquadOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { IO_CHECK(input, output); TensorShape input_shape = input->shape(); - CHECK_FAIL_RETURN_UNEXPECTED(input_shape.Size() > 0, "BandpassBiquad: input tensor is not in shape of <..., time>."); + CHECK_FAIL_RETURN_UNEXPECTED(input_shape.Size() > 0, "BandpassBiquad: inpute dimension should be greater than 0."); // check input type, it should be DE_FLOAT32 or DE_FLOAT16 or DE_FLOAT64 - CHECK_FAIL_RETURN_UNEXPECTED( - input->type() == DataType(DataType::DE_FLOAT32) || input->type() == DataType(DataType::DE_FLOAT16) || - input->type() == DataType(DataType::DE_FLOAT64), - "BandpassBiquad: input tensor type should be float, but got: " + input->type().ToString()); + CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType(DataType::DE_FLOAT32) || + input->type() == DataType(DataType::DE_FLOAT16) || + input->type() == DataType(DataType::DE_FLOAT64), + "BandpassBiquad: input type should be float, but got " + input->type().ToString()); float w0 = 2 * PI * central_freq_ / sample_rate_; float alpha = sin(w0) / 2 / Q_; float temp; @@ -46,16 +46,15 @@ Status BandpassBiquadOp::Compute(const std::shared_ptr &input, std::shar float a1 = (-2) * cos(w0); float a2 = 1 - alpha; - if (input->type() == DataType(DataType::DE_FLOAT32)) { + if (input->type() == DataType(DataType::DE_FLOAT32)) return Biquad(input, output, static_cast(b0), static_cast(b1), static_cast(b2), static_cast(a0), static_cast(a1), static_cast(a2)); - } else if (input->type() == DataType(DataType::DE_FLOAT64)) { + else if (input->type() == DataType(DataType::DE_FLOAT64)) return Biquad(input, output, static_cast(b0), static_cast(b1), static_cast(b2), static_cast(a0), static_cast(a1), static_cast(a2)); - } else { + else return Biquad(input, output, static_cast(b0), static_cast(b1), static_cast(b2), static_cast(a0), static_cast(a1), static_cast(a2)); - } } } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/bandpass_biquad_op.h b/mindspore/ccsrc/minddata/dataset/audio/kernels/bandpass_biquad_op.h index dead035fbc4..0fb21441425 100755 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/bandpass_biquad_op.h +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/bandpass_biquad_op.h @@ -17,8 +17,8 @@ #define MINDSPORE_CCSRC_MINDDATA_DATASET_AUDIO_KERNELS_BANDPASS_BIQUAD_OP_H_ #include -#include #include +#include #include "minddata/dataset/core/tensor.h" #include "minddata/dataset/kernels/tensor_op.h" @@ -26,7 +26,6 @@ namespace mindspore { namespace dataset { - class BandpassBiquadOp : public TensorOp { public: BandpassBiquadOp(int32_t sample_rate, float central_freq, float Q, bool const_skirt_gain) diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/bandreject_biquad_op.cc b/mindspore/ccsrc/minddata/dataset/audio/kernels/bandreject_biquad_op.cc index 0e9244af2b1..d321cbf6d52 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/bandreject_biquad_op.cc +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/bandreject_biquad_op.cc @@ -20,17 +20,15 @@ namespace mindspore { namespace dataset { - Status BandrejectBiquadOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { IO_CHECK(input, output); // check input type and input shape TensorShape input_shape = input->shape(); - CHECK_FAIL_RETURN_UNEXPECTED(input_shape.Size() > 0, - "BandrejectBiquad: input tensor is not in shape of <..., time>."); - CHECK_FAIL_RETURN_UNEXPECTED( - input->type() == DataType(DataType::DE_FLOAT32) || input->type() == DataType(DataType::DE_FLOAT16) || - input->type() == DataType(DataType::DE_FLOAT64), - "BandrejectBiquad: input tensor type should be float, but got: " + input->type().ToString()); + CHECK_FAIL_RETURN_UNEXPECTED(input_shape.Size() > 0, "BandrejectBiquad: input dimension should be greater than 0."); + CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType(DataType::DE_FLOAT32) || + input->type() == DataType(DataType::DE_FLOAT16) || + input->type() == DataType(DataType::DE_FLOAT64), + "BandrejectBiquad: input type should be float, but got " + input->type().ToString()); double w0 = 2 * PI * central_freq_ / sample_rate_; double alpha = sin(w0) / 2 / Q_; double b0 = 1; @@ -39,16 +37,15 @@ Status BandrejectBiquadOp::Compute(const std::shared_ptr &input, std::sh double a0 = 1 + alpha; double a1 = b1; double a2 = 1 - alpha; - if (input->type() == DataType(DataType::DE_FLOAT32)) { + if (input->type() == DataType(DataType::DE_FLOAT32)) return Biquad(input, output, static_cast(b0), static_cast(b1), static_cast(b2), static_cast(a0), static_cast(a1), static_cast(a2)); - } else if (input->type() == DataType(DataType::DE_FLOAT64)) { + else if (input->type() == DataType(DataType::DE_FLOAT64)) return Biquad(input, output, static_cast(b0), static_cast(b1), static_cast(b2), static_cast(a0), static_cast(a1), static_cast(a2)); - } else { + else return Biquad(input, output, static_cast(b0), static_cast(b1), static_cast(b2), static_cast(a0), static_cast(a1), static_cast(a2)); - } } } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/bandreject_biquad_op.h b/mindspore/ccsrc/minddata/dataset/audio/kernels/bandreject_biquad_op.h index e59d0cf3220..3b42a6ccb82 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/bandreject_biquad_op.h +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/bandreject_biquad_op.h @@ -26,7 +26,6 @@ namespace mindspore { namespace dataset { - class BandrejectBiquadOp : public TensorOp { public: BandrejectBiquadOp(int32_t sample_rate, float central_freq, float Q) diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/bass_biquad_op.cc b/mindspore/ccsrc/minddata/dataset/audio/kernels/bass_biquad_op.cc index d05a7ff2471..71799b17852 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/bass_biquad_op.cc +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/bass_biquad_op.cc @@ -24,12 +24,12 @@ namespace dataset { Status BassBiquadOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { IO_CHECK(input, output); TensorShape input_shape = input->shape(); - CHECK_FAIL_RETURN_UNEXPECTED(input_shape.Size() > 0, "BassBiquad: input tensor is not in shape of <..., time>."); + CHECK_FAIL_RETURN_UNEXPECTED(input_shape.Size() > 0, "BassBiquad: input dimension should be greater than 0."); // check input type, it should be DE_FLOAT32 or DE_FLOAT16 or DE_FLOAT64 CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType(DataType::DE_FLOAT32) || input->type() == DataType(DataType::DE_FLOAT16) || input->type() == DataType(DataType::DE_FLOAT64), - "BassBiquad: input tensor type should be float, but got: " + input->type().ToString()); + "BassBiquad: input type should be float, but got " + input->type().ToString()); double w0 = 2 * PI * central_freq_ / sample_rate_; double alpha = sin(w0) / 2 / Q_; @@ -45,18 +45,17 @@ Status BassBiquadOp::Compute(const std::shared_ptr &input, std::shared_p double a0 = (A + 1) + temp2 + temp1; double a1 = -2 * ((A - 1) + temp3); double a2 = (A + 1) + temp2 - temp1; - if (input->type() == DataType(DataType::DE_FLOAT32)) { + if (input->type() == DataType(DataType::DE_FLOAT32)) return Biquad(input, output, static_cast(b0 / a0), static_cast(b1 / a0), static_cast(b2 / a0), static_cast(1.0), static_cast(a1 / a0), static_cast(a2 / a0)); - } else if (input->type() == DataType(DataType::DE_FLOAT64)) { + else if (input->type() == DataType(DataType::DE_FLOAT64)) return Biquad(input, output, static_cast(b0 / a0), static_cast(b1 / a0), static_cast(b2 / a0), static_cast(1.0), static_cast(a1 / a0), static_cast(a2 / a0)); - } else { + else return Biquad(input, output, static_cast(b0 / a0), static_cast(b1 / a0), static_cast(b2 / a0), static_cast(1.0), static_cast(a1 / a0), static_cast(a2 / a0)); - } } } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/bass_biquad_op.h b/mindspore/ccsrc/minddata/dataset/audio/kernels/bass_biquad_op.h index 68552c1bb80..2aa31f2428c 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/bass_biquad_op.h +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/bass_biquad_op.h @@ -27,7 +27,6 @@ namespace mindspore { namespace dataset { - class BassBiquadOp : public TensorOp { public: BassBiquadOp(int32_t sample_rate, float gain, float central_freq, float Q) @@ -36,7 +35,7 @@ class BassBiquadOp : public TensorOp { ~BassBiquadOp() override = default; void Print(std::ostream &out) const override { - out << Name() << ": sample_rate: " << sample_rate_ << ", gain: " << gain_ << ", central_freq: " << central_freq_ + out << Name() << ": sample_rate: " << sample_rate_ << ", gain:" << gain_ << ", central_freq: " << central_freq_ << ", Q: " << Q_ << std::endl; } diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/time_stretch_op.cc b/mindspore/ccsrc/minddata/dataset/audio/kernels/time_stretch_op.cc index 05a14891b00..0f990348ff7 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/time_stretch_op.cc +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/time_stretch_op.cc @@ -33,8 +33,15 @@ Status TimeStretchOp::Compute(const std::shared_ptr &input, std::shared_ IO_CHECK(input, output); // check shape - if (input->shape().Rank() < 3 || !input->IsComplex()) { - std::string err_msg = "TimeStretch: input tensor is not in shape of <..., freq, num_frame, complex=2>."; + if (input->shape().Rank() < 3) { + std::string err_msg = "TimeStretch: input tensor shape is not <..., freq, num_frame, complex=2>."; + MS_LOG(ERROR) << err_msg; + RETURN_STATUS_SYNTAX_ERROR(err_msg); + } + + // check complex + if (!input->IsComplex()) { + std::string err_msg = "TimeStretch: input tensor is not in shape of <..., 2>."; MS_LOG(ERROR) << err_msg; RETURN_STATUS_SYNTAX_ERROR(err_msg); } @@ -44,7 +51,7 @@ Status TimeStretchOp::Compute(const std::shared_ptr &input, std::shared_ float hop_length = std::isnan(hop_length_) ? (n_freq_ - 1) : hop_length_; // typecast CHECK_FAIL_RETURN_UNEXPECTED(input->type() != DataType::DE_STRING, - "TimeStretch: input tensor type should be int, float or double, but got: string."); + "TimeStretch: input tensor type should be [int, float, double], but got string."); if (input->type() != DataType::DE_FLOAT64) { RETURN_IF_NOT_OK(TypeCast(input, &input_tensor, DataType(DataType::DE_FLOAT32))); } else { diff --git a/mindspore/ccsrc/minddata/dataset/audio/kernels/time_stretch_op.h b/mindspore/ccsrc/minddata/dataset/audio/kernels/time_stretch_op.h index 5a7b1dd9d59..d1a119bf8c0 100644 --- a/mindspore/ccsrc/minddata/dataset/audio/kernels/time_stretch_op.h +++ b/mindspore/ccsrc/minddata/dataset/audio/kernels/time_stretch_op.h @@ -27,7 +27,6 @@ namespace mindspore { namespace dataset { - class TimeStretchOp : public TensorOp { public: /// Default value @@ -44,6 +43,9 @@ class TimeStretchOp : public TensorOp { std::string Name() const override { return kTimeStretchOp; } + /// \param[in] inputs + /// \param[out] outputs + /// \return Status code Status OutputShape(const std::vector &inputs, std::vector &outputs) override; private: diff --git a/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.cc b/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.cc index 85e1177906a..8838c06efd2 100644 --- a/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.cc +++ b/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.cc @@ -61,27 +61,27 @@ Status PyDSCallback::ExecutePyfunc(py::function f, const CallbackParam &cb_param } return Status::OK(); } -void PyDSCallback::setBegin(py::function f) { +void PyDSCallback::setBegin(const py::function &f) { begin_func_ = f; begin_needed_ = true; } -void PyDSCallback::setEnd(py::function f) { +void PyDSCallback::setEnd(const py::function &f) { end_func_ = f; end_needed_ = true; } -void PyDSCallback::setEpochBegin(py::function f) { +void PyDSCallback::setEpochBegin(const py::function &f) { epoch_begin_func_ = f; epoch_begin_needed_ = true; } -void PyDSCallback::setEpochEnd(py::function f) { +void PyDSCallback::setEpochEnd(const py::function &f) { epoch_end_func_ = f; epoch_end_needed_ = true; } -void PyDSCallback::setStepBegin(py::function f) { +void PyDSCallback::setStepBegin(const py::function &f) { step_begin_func_ = f; step_begin_needed_ = true; } -void PyDSCallback::setStepEnd(py::function f) { +void PyDSCallback::setStepEnd(const py::function &f) { step_end_func_ = f; step_end_needed_ = true; } diff --git a/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.h b/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.h index dcc57415014..d3782d51542 100644 --- a/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.h +++ b/mindspore/ccsrc/minddata/dataset/callback/py_ds_callback.h @@ -44,12 +44,12 @@ class PyDSCallback : public DSCallback { ~PyDSCallback() = default; - void setBegin(py::function f); - void setEnd(py::function f); - void setEpochBegin(py::function f); - void setEpochEnd(py::function f); - void setStepBegin(py::function f); - void setStepEnd(py::function f); + void setBegin(const py::function &f); + void setEnd(const py::function &f); + void setEpochBegin(const py::function &f); + void setEpochEnd(const py::function &f); + void setStepBegin(const py::function &f); + void setStepEnd(const py::function &f); /// \brief actual callback function for begin, needs to be overridden in the derived class /// \param cb_param, callback parameter passed in from DatasetOp when calling the callback diff --git a/mindspore/ccsrc/minddata/dataset/core/cv_tensor.cc b/mindspore/ccsrc/minddata/dataset/core/cv_tensor.cc index 0f33d499155..052a585eb1d 100644 --- a/mindspore/ccsrc/minddata/dataset/core/cv_tensor.cc +++ b/mindspore/ccsrc/minddata/dataset/core/cv_tensor.cc @@ -40,12 +40,21 @@ Status CVTensor::CreateEmpty(const TensorShape &shape, DataType type, CVTensorPt return (*out)->MatInit((*out)->GetMutableBuffer(), (*out)->shape_, (*out)->type_, &(*out)->mat_); } -Status CVTensor::CreateFromMat(const cv::Mat &mat, CVTensorPtr *out) { +Status CVTensor::CreateFromMat(const cv::Mat &mat, const dsize_t rank, CVTensorPtr *out) { TensorPtr out_tensor; cv::Mat mat_local = mat; // if the input Mat's memory is not continuous, copy it to one block of memory - if (!mat.isContinuous()) mat_local = mat.clone(); - TensorShape shape(mat.size, mat_local.type()); + if (!mat.isContinuous()) { + mat_local = mat.clone(); + } + TensorShape shape({}); + if (mat.dims == 2 && rank == 2) { + shape = TensorShape({mat.rows, mat.cols}); + } else if (mat.dims == 2 && rank == 3) { + shape = TensorShape({mat.rows, mat.cols, mat.channels()}); + } else { + RETURN_STATUS_UNEXPECTED("Error in creating CVTensor: Invalid input rank or cv::mat dimension."); + } DataType type = DataType::FromCVType(mat_local.type()); RETURN_IF_NOT_OK(CreateFromMemory(shape, type, mat_local.data, &out_tensor)); *out = AsCVTensor(out_tensor); @@ -55,14 +64,13 @@ Status CVTensor::CreateFromMat(const cv::Mat &mat, CVTensorPtr *out) { std::pair, int> CVTensor::IsValidImage(const TensorShape &shape, const DataType &type) { std::array size = {1, 1}; if (shape.Rank() <= 2 || (shape.Rank() == 3 && shape[2] <= CV_CN_MAX)) { - uint8_t ch = 1; + uint16_t ch = 1; if (shape.Rank() == 3) { - ch = static_cast(shape[2]); + ch = static_cast(shape[2]); } if (shape.Rank() > 0) size[0] = static_cast(shape[0]); if (shape.Rank() > 1) size[1] = static_cast(shape[1]); if (type.AsCVType() == kCVInvalidType) return std::make_pair(size, -1); - int cv_type = CV_MAKETYPE(type.AsCVType(), ch); return std::make_pair(size, cv_type); } diff --git a/mindspore/ccsrc/minddata/dataset/core/cv_tensor.h b/mindspore/ccsrc/minddata/dataset/core/cv_tensor.h index 1c10a7066f6..80b125997f6 100644 --- a/mindspore/ccsrc/minddata/dataset/core/cv_tensor.h +++ b/mindspore/ccsrc/minddata/dataset/core/cv_tensor.h @@ -53,9 +53,10 @@ class CVTensor : public Tensor { /// Create CV tensor from cv::Mat /// \note This constructor allocates a new space in the memory and copies the CV::Mat buffer into it. /// \param mat [in] cv::Mat to be copied into the new tensor. + /// \param shape [in] the rank of output CVTensor. /// \param out [out] Generated tensor /// \return Status code - static Status CreateFromMat(const cv::Mat &mat, CVTensorPtr *out); + static Status CreateFromMat(const cv::Mat &mat, const dsize_t rank, CVTensorPtr *out); ~CVTensor() override = default; diff --git a/mindspore/ccsrc/minddata/dataset/core/data_type.cc b/mindspore/ccsrc/minddata/dataset/core/data_type.cc index 0e03a7d3270..71c519c2387 100644 --- a/mindspore/ccsrc/minddata/dataset/core/data_type.cc +++ b/mindspore/ccsrc/minddata/dataset/core/data_type.cc @@ -61,7 +61,7 @@ uint8_t DataType::AsCVType() const { } return res; -} // namespace dataset +} DataType DataType::FromCVType(int cv_type) { auto depth = static_cast(cv_type) & static_cast(CV_MAT_DEPTH_MASK); diff --git a/mindspore/ccsrc/minddata/dataset/core/de_tensor.cc b/mindspore/ccsrc/minddata/dataset/core/de_tensor.cc index ee5768917f7..957e4c763cd 100644 --- a/mindspore/ccsrc/minddata/dataset/core/de_tensor.cc +++ b/mindspore/ccsrc/minddata/dataset/core/de_tensor.cc @@ -76,7 +76,7 @@ size_t DETensor::DataSize() const { } #endif EXCEPTION_IF_NULL(tensor_impl_); - return static_cast(tensor_impl_->SizeInBytes()); + return static_cast(tensor_impl_->SizeInBytes()); } const std::vector &DETensor::Shape() const { return shape_; } diff --git a/mindspore/ccsrc/minddata/dataset/core/tensor.cc b/mindspore/ccsrc/minddata/dataset/core/tensor.cc index c03c78ad8d2..315ce87ed84 100644 --- a/mindspore/ccsrc/minddata/dataset/core/tensor.cc +++ b/mindspore/ccsrc/minddata/dataset/core/tensor.cc @@ -263,10 +263,10 @@ Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr * } std::ifstream fs; fs.open(path, std::ios::binary | std::ios::in); - CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "Fail to open file: " + path); + CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "Failed to open file: " + path); int64_t num_bytes = fs.seekg(0, std::ios::end).tellg(); CHECK_FAIL_RETURN_UNEXPECTED(num_bytes <= kDeMaxDim, "Invalid file to allocate tensor memory, check path: " + path); - CHECK_FAIL_RETURN_UNEXPECTED(fs.seekg(0, std::ios::beg).good(), "Fail to find size of file, check path: " + path); + CHECK_FAIL_RETURN_UNEXPECTED(fs.seekg(0, std::ios::beg).good(), "Failed to find size of file, check path: " + path); RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape{num_bytes}, DataType(DataType::DE_UINT8), out)); int64_t written_bytes = fs.read(reinterpret_cast((*out)->GetMutableBuffer()), num_bytes).gcount(); CHECK_FAIL_RETURN_UNEXPECTED(written_bytes == num_bytes && fs.good(), @@ -508,7 +508,9 @@ Status Tensor::GetItemPtr(uchar **ptr, const std::vector &index, offset RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &flat_idx)); offset_t length_temp = 0; RETURN_IF_NOT_OK(GetStringAt(flat_idx, ptr, &length_temp)); - if (length != nullptr) *length = length_temp; + if (length != nullptr) { + *length = length_temp; + } return Status::OK(); } else { std::string err = "data type not compatible"; @@ -626,15 +628,97 @@ Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) { Status Tensor::to_json(nlohmann::json *out_json) { nlohmann::json args; - args["shape"] = shape_.ToString(); + args["shape"] = shape_.AsVector(); args["type"] = type_.ToString(); - std::stringstream ss; - this->PrintData(ss); - args["data"] = ss.str(); + if (type_ == DataType::DE_BOOL) { + RETURN_IF_NOT_OK(to_json_convert(&args)); + } else if (type_ == DataType::DE_INT8) { + RETURN_IF_NOT_OK(to_json_convert(&args)); + } else if (type_ == DataType::DE_INT16) { + RETURN_IF_NOT_OK(to_json_convert(&args)); + } else if (type_ == DataType::DE_INT32) { + RETURN_IF_NOT_OK(to_json_convert(&args)); + } else if (type_ == DataType::DE_INT64) { + RETURN_IF_NOT_OK(to_json_convert(&args)); + } else if (type_ == DataType::DE_UINT8) { + RETURN_IF_NOT_OK(to_json_convert(&args)); + } else if (type_ == DataType::DE_UINT16) { + RETURN_IF_NOT_OK(to_json_convert(&args)); + } else if (type_ == DataType::DE_UINT32) { + RETURN_IF_NOT_OK(to_json_convert(&args)); + } else if (type_ == DataType::DE_UINT64) { + RETURN_IF_NOT_OK(to_json_convert(&args)); + } else if (type_ == DataType::DE_FLOAT32) { + RETURN_IF_NOT_OK(to_json_convert(&args)); + } else if (type_ == DataType::DE_FLOAT64) { + RETURN_IF_NOT_OK(to_json_convert(&args)); + } else if (type_ == DataType::DE_STRING) { + std::vector data_out; + for (auto it = this->begin(); it != this->end(); it++) { + data_out.emplace_back(*it); + } + args["data"] = data_out; + } else { + return Status(StatusCode::kMDUnexpectedError, "Type is not supported for tensor"); + } *out_json = args; return Status::OK(); } +template +Status Tensor::to_json_convert(nlohmann::json *args) { + std::vector data_out; + for (auto it = this->begin(); it != this->end(); it++) { + data_out.emplace_back(*it); + } + (*args)["data"] = data_out; + return Status::OK(); +} + +Status Tensor::from_json(nlohmann::json op_params, std::shared_ptr *tensor) { + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("shape") != op_params.end(), "Failed to find shape"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("type") != op_params.end(), "Failed to find type"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("data") != op_params.end(), "Failed to find data"); + std::string type = op_params["type"]; + std::vector list = op_params["shape"]; + TensorShape shape = TensorShape(list); + if (type == "bool") { + RETURN_IF_NOT_OK(from_json_convert(op_params["data"], shape, tensor)); + } else if (type == "int8") { + RETURN_IF_NOT_OK(from_json_convert(op_params["data"], shape, tensor)); + } else if (type == "int16") { + RETURN_IF_NOT_OK(from_json_convert(op_params["data"], shape, tensor)); + } else if (type == "int32") { + RETURN_IF_NOT_OK(from_json_convert(op_params["data"], shape, tensor)); + } else if (type == "int64") { + RETURN_IF_NOT_OK(from_json_convert(op_params["data"], shape, tensor)); + } else if (type == "uint8") { + RETURN_IF_NOT_OK(from_json_convert(op_params["data"], shape, tensor)); + } else if (type == "uint16") { + RETURN_IF_NOT_OK(from_json_convert(op_params["data"], shape, tensor)); + } else if (type == "uint32") { + RETURN_IF_NOT_OK(from_json_convert(op_params["data"], shape, tensor)); + } else if (type == "uint64") { + RETURN_IF_NOT_OK(from_json_convert(op_params["data"], shape, tensor)); + } else if (type == "float32") { + RETURN_IF_NOT_OK(from_json_convert(op_params["data"], shape, tensor)); + } else if (type == "float64") { + RETURN_IF_NOT_OK(from_json_convert(op_params["data"], shape, tensor)); + } else if (type == "string") { + RETURN_IF_NOT_OK(from_json_convert(op_params["data"], shape, tensor)); + } else { + return Status(StatusCode::kMDUnexpectedError, "Type is not supported for tensor"); + } + return Status::OK(); +} + +template +Status Tensor::from_json_convert(nlohmann::json json_data, TensorShape shape, std::shared_ptr *tensor) { + std::vector data = json_data; + RETURN_IF_NOT_OK(CreateFromVector(data, shape, tensor)); + return Status::OK(); +} + template Status Tensor::GetItemAt(T *o, const std::vector &index) const { if (data_ == nullptr) { diff --git a/mindspore/ccsrc/minddata/dataset/core/tensor.h b/mindspore/ccsrc/minddata/dataset/core/tensor.h index 50ed6f6a2e7..cc011232fde 100644 --- a/mindspore/ccsrc/minddata/dataset/core/tensor.h +++ b/mindspore/ccsrc/minddata/dataset/core/tensor.h @@ -68,7 +68,7 @@ class Tensor { Tensor(const Tensor &other) = delete; Tensor &operator=(const Tensor &other) = delete; - /// Create a tensor using shape and type. This constructor should not be used directly, use CreateFromTensor instead + /// Create a tensor using shape and type. This constructor should not be used directly, use CreateFromTensor instead. /// \note The shape and type information should be known and valid /// \note The constructor does not allocate data /// \param shape TensorShape @@ -219,6 +219,14 @@ class Tensor { Status to_json(nlohmann::json *out_json); + template + Status to_json_convert(nlohmann::json *args); + + static Status from_json(nlohmann::json op_params, std::shared_ptr *tensor); + + template + static Status from_json_convert(nlohmann::json json_data, TensorShape shape, std::shared_ptr *tensor); + /// Get item located at `index`, caller needs to provide the type. /// \tparam T /// \param[in] index vector @@ -306,6 +314,13 @@ class Tensor { /// \return bool - true if tensor is not empty bool HasData() const { return data_ != nullptr; } + /// Check if tensor is complex + /// \return bool - true if tensor is complex + bool IsComplex() const { + // check the last dim all be 2 + return shape_[-1] == 2; + } + /// Reshape the tensor. The given shape should have the same number of elements in the Tensor /// \param shape virtual Status Reshape(const TensorShape &shape); diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_grpc_client.cc b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_grpc_client.cc index e1ce544f08c..428192bd785 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_grpc_client.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_grpc_client.cc @@ -73,7 +73,7 @@ Status CacheClientGreeter::DoServiceStop() { void *tag; while (cq_.Next(&tag, &success)) { auto r = reinterpret_cast(tag); - req_.erase(r->seqNo_); + (void)req_.erase(r->seqNo_); } } return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc b/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc index 33cfa15d334..c99ffdaf733 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc @@ -251,7 +251,7 @@ Status SaveToDisk::Save() { auto mr_writer = std::make_unique(); std::vector blob_fields; if (mindrecord::SUCCESS != mindrecord::ShardWriter::initialize(&mr_writer, file_names)) { - RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardWriter."); + RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardWriter, please check above `ERROR` level message."); } std::unordered_map column_name_id_map; diff --git a/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc b/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc index 136c331db31..849b903cdb8 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc @@ -408,7 +408,7 @@ void DataSchema::Print(std::ostream &out) const { // Adds a column descriptor to the schema Status DataSchema::AddColumn(const ColDescriptor &cd) { // Sanity check there's not a duplicate name before adding the column - for (int32_t i = 0; i < col_descs_.size(); ++i) { + for (auto i = 0; i < col_descs_.size(); ++i) { if (col_descs_[i].name() == cd.name()) { std::ostringstream ss; ss << "column name '" << cd.name() << "' already exists in schema."; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc index a2ec25124d9..1d45a0437fc 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc @@ -15,6 +15,7 @@ */ #include "minddata/dataset/engine/datasetops/rename_op.h" +#include #include #include @@ -52,6 +53,7 @@ Status RenameOp::ComputeColMap() { std::unordered_map new_col_name_id_map = {}; // parameter for input check size_t found = 0; + std::set new_col_name; // iterate over all the pairs and if there is a name match with rename, rename the column and add it to new map // by doing it this way we recreate a new ColNameIdMap and allow for switching @@ -67,12 +69,27 @@ Status RenameOp::ComputeColMap() { found += 1; int index = std::distance(in_columns_.begin(), it); MS_LOG(DEBUG) << "Rename operator index found " << index << " value " << id << "."; - + if (new_col_name.find(out_columns_[index]) != new_col_name.end()) { + std::string err_msg( + "rename operation does not support rename one column name into another already exist column name, existed" + " column name is: " + + out_columns_[index] + "."); + RETURN_STATUS_UNEXPECTED(err_msg); + } new_col_name_id_map[out_columns_[index]] = id; + new_col_name.insert(out_columns_[index]); } else { // not found + if (new_col_name.find(name) != new_col_name.end()) { + std::string err_msg( + "rename operation does not support rename one column name into another already exist column name, existed" + " column name is: " + + name + "."); + RETURN_STATUS_UNEXPECTED(err_msg); + } MS_LOG(DEBUG) << "Rename operator index not found: " << id << " is the column id."; new_col_name_id_map[name] = id; + new_col_name.insert(name); } } // only checks number of renamed columns have been found, this input check doesn't check everything diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt index 7b882e83558..767cff8c4d2 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/CMakeLists.txt @@ -16,6 +16,7 @@ set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES album_op.cc mappable_leaf_op.cc nonmappable_leaf_op.cc + flickr_op.cc ) set(DATASET_ENGINE_DATASETOPS_SOURCE_SRC_FILES diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc index d11a5a7eb8f..3c8af4dd067 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc @@ -118,7 +118,7 @@ bool AlbumOp::CheckImageType(const std::string &file_name, bool *valid) { return true; } -Status AlbumOp::LoadImageTensor(const std::string &image_file_path, uint32_t col_num, TensorRow *row) { +Status AlbumOp::LoadImageTensor(const std::string &image_file_path, int32_t col_num, TensorRow *row) { TensorPtr image; std::ifstream fs; fs.open(image_file_path, std::ios::binary | std::ios::in); @@ -168,7 +168,7 @@ Status AlbumOp::LoadImageTensor(const std::string &image_file_path, uint32_t col return Status::OK(); } -Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) { +Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) { std::vector data = json_obj; MS_LOG(INFO) << "String array label found: " << data << "."; @@ -178,7 +178,7 @@ Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t c return Status::OK(); } -Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) { +Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) { std::string data = json_obj; // now we iterate over the elements in json @@ -189,7 +189,7 @@ Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_nu return Status::OK(); } -Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) { +Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) { TensorPtr label; // consider templating this function to handle all ints if (data_schema_->column(col_num).type() == DataType::DE_INT64) { @@ -218,7 +218,7 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_ return Status::OK(); } -Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) { +Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) { TensorPtr float_array; // consider templating this function to handle all ints if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) { @@ -247,7 +247,7 @@ Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t co return Status::OK(); } -Status AlbumOp::LoadIDTensor(const std::string &file, uint32_t col_num, TensorRow *row) { +Status AlbumOp::LoadIDTensor(const std::string &file, int32_t col_num, TensorRow *row) { if (data_schema_->column(col_num).type() == DataType::DE_STRING) { TensorPtr id; RETURN_IF_NOT_OK(Tensor::CreateScalar(file, &id)); @@ -263,7 +263,7 @@ Status AlbumOp::LoadIDTensor(const std::string &file, uint32_t col_num, TensorRo return Status::OK(); } -Status AlbumOp::LoadEmptyTensor(uint32_t col_num, TensorRow *row) { +Status AlbumOp::LoadEmptyTensor(int32_t col_num, TensorRow *row) { // hack to get the file name without extension, the 1 is to get rid of the backslash character TensorPtr empty_tensor; RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->column(col_num).type(), &empty_tensor)); @@ -275,7 +275,7 @@ Status AlbumOp::LoadEmptyTensor(uint32_t col_num, TensorRow *row) { // So we actually have to check what type we want to fill the tensor with. // Float64 doesn't work with reinterpret cast here. Otherwise we limit the float in the schema to // only be float32, seems like a weird limitation to impose -Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) { +Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) { TensorPtr float_tensor; if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) { double data = json_obj; @@ -291,7 +291,7 @@ Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num } // Loads a tensor with int value, we have to cast the value to type specified in the schema. -Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row) { +Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) { TensorPtr int_tensor; if (data_schema_->column(col_num).type() == DataType::DE_INT64) { int64_t data = json_obj; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h index 8c8b3e9fd72..f069c7bdbcf 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.h @@ -88,62 +88,62 @@ class AlbumOp : public MappableLeafOp { /// \param[in] col_num Column num in schema /// \param[in, out] row Tensor row to push to /// \return Status The status code returned - Status LoadImageTensor(const std::string &image_file, uint32_t col_num, TensorRow *row); + Status LoadImageTensor(const std::string &image_file, int32_t col_num, TensorRow *row); /// \brief Load vector of ints to tensor, append tensor to tensor row /// \param[in] json_obj Json object containing multi-dimensional label /// \param[in] col_num Column num in schema /// \param[in, out] row Tensor row to push to /// \return Status The status code returned - Status LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row); + Status LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row); /// \brief Load vector of floatss to tensor, append tensor to tensor row /// \param[in] json_obj Json object containing array data /// \param[in] col_num Column num in schema /// \param[in, out] row Tensor row to push to /// \return Status The status code returned - Status LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row); + Status LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row); /// \brief Load string array into a tensor, append tensor to tensor row /// \param[in] json_obj Json object containing string tensor /// \param[in] col_num Column num in schema /// \param[in, out] row Tensor row to push to /// \return Status The status code returned - Status LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row); + Status LoadStringArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row); /// \brief Load string into a tensor, append tensor to tensor row /// \param[in] json_obj Json object containing string tensor /// \param[in] col_num Column num in schema /// \param[in, out] row Tensor row to push to /// \return Status The status code returned - Status LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row); + Status LoadStringTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row); /// \brief Load float value to tensor row /// \param[in] json_obj Json object containing float /// \param[in] col_num Column num in schema /// \param[in, out] row Tensor row to push to /// \return Status The status code returned - Status LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row); + Status LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row); /// \brief Load int value to tensor row /// \param[in] json_obj Json object containing int /// \param[in] col_num Column num in schema /// \param[in, out] row Tensor row to push to /// \return Status The status code returned - Status LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row); + Status LoadIntTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row); /// \brief Load empty tensor to tensor row /// \param[in] col_num Column num in schema /// \param[in, out] row Tensor row to push to /// \return Status The status code returned - Status LoadEmptyTensor(uint32_t col_num, TensorRow *row); + Status LoadEmptyTensor(int32_t col_num, TensorRow *row); /// \brief Load id from file name to tensor row /// \param[in] file The file name to get ID from /// \param[in] col_num Column num in schema /// \param[in, out] row Tensor row to push to /// \return Status The status code returned - Status LoadIDTensor(const std::string &file, uint32_t col_num, TensorRow *row); + Status LoadIDTensor(const std::string &file, int32_t col_num, TensorRow *row); /// \brief Load a tensor row according to a json file /// \param[in] row_id_type row_id - id for this tensor row diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc index 38dd454328e..6b865917ed4 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc @@ -368,7 +368,7 @@ Status CifarOp::CountTotalRows(const std::string &dir, const std::string &usage, Status CifarOp::ComputeColMap() { // set the column name map (base class field) if (column_name_id_map_.empty()) { - for (uint32_t i = 0; i < data_schema_->NumColumns(); ++i) { + for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { column_name_id_map_[data_schema_->column(i).name()] = i; } } else { diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cmu_arctic_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cmu_arctic_op.cc new file mode 100644 index 00000000000..ee7f872b590 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cmu_arctic_op.cc @@ -0,0 +1,254 @@ +/** + * Copyright 2019-2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "minddata/dataset/engine/datasetops/source/cmu_arctic_op.h" + +#include +#include +#include +#include "utils/ms_utils.h" +#include "minddata/dataset/core/config_manager.h" +#include "minddata/dataset/core/tensor_shape.h" +#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h" +#include "minddata/dataset/engine/db_connector.h" +#include "minddata/dataset/engine/execution_tree.h" + +namespace mindspore { +namespace dataset { + +const size_t kWavHandSize=44; +const size_t kReadbufferSize=20480; +const std::string dataDirectory = "wav"; +const std::string labelDirectory = "etc"; +const std::string labelFileName = "txt.done.data"; + +const std::string pre="cmu_us_"; +const std::string suf="_arctic"; + +CmuArcticOp::CmuArcticOp(const std::string &usage, int32_t num_workers, std::string folder_path, int32_t queue_size, + std::unique_ptr data_schema, std::shared_ptr sampler) + : MappableLeafOp(num_workers, queue_size, std::move(sampler)), + usage_(usage), + folder_path_(folder_path), + data_schema_(std::move(data_schema)) { + io_block_queues_.Init(num_workers, queue_size); +} + +Status CmuArcticOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { + CmuArcticLabelTuple audio_tuple = audio_label_tuple_[row_id]; + std::shared_ptr waveform, rate, utterance, utterance_id; + RETURN_IF_NOT_OK(Tensor::CreateFromTensor(audio_tuple.waveform, &waveform)); + RETURN_IF_NOT_OK(Tensor::CreateScalar(audio_tuple.sample_rate, &rate)); + RETURN_IF_NOT_OK(Tensor::CreateScalar(audio_tuple.utterance, &utterance)); + RETURN_IF_NOT_OK(Tensor::CreateScalar(audio_tuple.utterance_id, &utterance_id)); + (*trow) = TensorRow(row_id, {std::move(waveform), std::move(rate), std::move(utterance), std::move(utterance_id)}); + trow->setPath({audio_names_[row_id].first}); + return Status::OK(); +} + +void CmuArcticOp::Print(std::ostream &out, bool show_all) const { + if (!show_all) { + // Call the super class for displaying any common 1-liner info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal 1-liner info for this op + out << "\n"; + } + else { + // Call the super class for displaying any common detailed info + ParallelOp::Print(out, show_all); + // Then show any custom derived-internal stuff + out << "\nNumber of rows:" << num_rows_ << "\nCmuArctic Directory: " << folder_path_ << "\n\n"; + } +} + +// Derived from RandomAccessOp +Status CmuArcticOp::GetClassIds(std::map> *cls_ids) const { + if (cls_ids == nullptr || !cls_ids->empty() || audio_label_tuple_.empty()) { + if (audio_label_tuple_.empty()) { + RETURN_STATUS_UNEXPECTED("No audio found in dataset, please check if Op read audios successfully or not."); + } + else { + RETURN_STATUS_UNEXPECTED( + "Map for storaging audio-index pair is nullptr or has been set in other place," + "it must be empty before using GetClassIds."); + } + } + for (size_t i = 0; i < audio_label_tuple_.size(); ++i) { + (*cls_ids)[audio_label_tuple_[i].utterance_id].push_back(i);// + } + for (auto &pair : (*cls_ids)) { + pair.second.shrink_to_fit(); + } + return Status::OK(); +} + + +Status CmuArcticOp::CountTotalRows(const std::string &dir, const std::string &usage, int64_t *count) { + *count = 0; + const int64_t num_samples = 0; + const int64_t start_index = 0; + auto sampler = std::make_shared(start_index, num_samples); + auto schema = std::make_unique(); + + RETURN_IF_NOT_OK(schema->AddColumn(ColDescriptor("waveform", DataType(DataType::DE_FLOAT64), TensorImpl::kCv, 1))); + TensorShape scalar_rate = TensorShape::CreateScalar(); + RETURN_IF_NOT_OK( + schema->AddColumn(ColDescriptor("sample_rate", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, + &scalar_rate))); + TensorShape scalar_utterance = TensorShape::CreateScalar(); + RETURN_IF_NOT_OK( + schema->AddColumn(ColDescriptor("utterance", DataType(DataType::DE_STRING), TensorImpl::kFlexible, 0, + &scalar_utterance))); + TensorShape scalar_utterance_id = TensorShape::CreateScalar(); + RETURN_IF_NOT_OK( + schema->AddColumn(ColDescriptor("utterance_id", DataType(DataType::DE_STRING), TensorImpl::kFlexible, 0, + &scalar_utterance_id))); + std::shared_ptr cfg = GlobalContext::config_manager(); + + int32_t num_workers = cfg->num_parallel_workers(); + int32_t op_connect_size = cfg->op_connector_size(); + auto op = std::make_shared(usage, num_workers, dir, op_connect_size, std::move(schema), + std::move(sampler)); + RETURN_IF_NOT_OK(op->WalkAllFiles()); + *count = op->audio_names_.size(); + return Status::OK(); +} + +Status CmuArcticOp::ComputeColMap() { + // set the column name map (base class field) + if (column_name_id_map_.empty()) { + for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { + column_name_id_map_[data_schema_->column(i).name()] = i; + } + } + else { + MS_LOG(WARNING) << "Column name map is already set!"; + } + return Status::OK(); +} + +Status CmuArcticOp::ReadLabel() { + char buffer[1024]; + for (std::string u:label_files_) { + std::ifstream in(u); + while (!in.eof()) { + in.getline(buffer, 1024); + if (buffer[0] != '(') + break; + int32_t blank[3] = {0}; + int32_t cur = 0; + for (int32_t i = 0; cur < 2 && i < 1024; i++) { + if (buffer[i] == '"') + blank[cur++] = i; + } + if (cur != 2) + RETURN_STATUS_UNEXPECTED("Label file error!"); + buffer[blank[0] - 1] = 0; + buffer[blank[1]] = 0; + label_pairs_.push_back({std::string(buffer + 2), std::string(buffer + blank[0] + 1)}); + } + } + if (audio_names_.size() != label_pairs_.size()) + RETURN_STATUS_UNEXPECTED("The number of files is different from the number of labels!"); + std::sort(audio_names_.begin(), audio_names_.end()); + std::sort(label_pairs_.begin(), label_pairs_.end()); + return Status::OK(); +} + +Status CmuArcticOp::ReadAudio() { + char header[kWavHandSize]; + short buff[kReadbufferSize]; + const double mx = 32768.0; + std::vector tempArr; + for (uint32_t i = 0; i < audio_names_.size(); i++) { + if (audio_names_[i].first != label_pairs_[i].first + ".wav") { + RETURN_STATUS_UNEXPECTED("An error occurred between the label and the file content!"); + } + tempArr.clear(); + auto item = audio_names_[i]; + const char *dir = item.second.data(); + FILE *fp = fopen(dir, "rb"); + if (fp == NULL) { + MS_LOG(WARNING) << "File missing . dir:" << dir; + continue; + } + uint32_t s = fread(header, 1, kWavHandSize, fp); + if (s != kWavHandSize) + RETURN_STATUS_UNEXPECTED("Audio header error!"); + uint32_t rate = *(uint32_t * )(header + 0x18); + uint32_t frame = *(uint32_t * )(header + 0x28) / 2; + uint32_t surplus = frame; + while (surplus) { + uint32_t len = fread(buff, 2, kReadbufferSize, fp); + for (uint32_t i = 0; i < len; i++) { + tempArr.push_back(buff[i] / mx); + } + surplus -= len; + } + fclose(fp); + std::shared_ptr audio; + RETURN_IF_NOT_OK(Tensor::CreateFromVector(tempArr, &audio)); + audio_label_tuple_.push_back({audio, rate, label_pairs_[i].second, label_pairs_[i].first}); + } + num_rows_ = audio_names_.size(); + return Status::OK(); +} + +Status CmuArcticOp::WalkAllFiles() { + Path dir(folder_path_); + Path fullDir = (dir + pre + usage_ + suf) / dataDirectory; + Path label = (dir + pre + usage_ + suf) / labelDirectory / labelFileName; + label_files_.push_back(label.toString()); + auto dirIt = Path::DirIterator::OpenDirectory(&fullDir); + if (dirIt != nullptr) { + while (dirIt->hasNext()) { + Path file = dirIt->next(); + std::string fileName = file.toString(); + auto pos = fileName.find_last_of('.'); + std::string ext = fileName.substr(pos); + if (ext == ".wav") { + audio_names_.push_back({file.Basename(), file.toString()}); + } + else { + MS_LOG(WARNING) << "File name format error :" << file.toString() << "."; + } + } + } + else { + MS_LOG(WARNING) << "Unable to open directory " << fullDir.toString() << "."; + } + return Status::OK(); +} + +Status CmuArcticOp::LaunchThreadsAndInitOp() { + if (tree_ == nullptr) { + RETURN_STATUS_UNEXPECTED("Pipeline init failed, Execution tree not set."); + } + RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks())); + RETURN_IF_NOT_OK(wait_for_workers_post_.Register(tree_->AllTasks())); + RETURN_IF_NOT_OK( + tree_->LaunchWorkers(num_workers_, std::bind(&CmuArcticOp::WorkerEntry, this, std::placeholders::_1), "", + id())); + TaskManager::FindMe()->Post(); + RETURN_IF_NOT_OK(this->WalkAllFiles()); + RETURN_IF_NOT_OK(this->ReadLabel()); + RETURN_IF_NOT_OK(this->ReadAudio()); + RETURN_IF_NOT_OK(this->InitSampler()); // handle shake with sampler + return Status::OK(); +} + +} // namespace dataset +} // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cmu_arctic_op.h b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cmu_arctic_op.h new file mode 100644 index 00000000000..bb7ceff5a5a --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cmu_arctic_op.h @@ -0,0 +1,126 @@ +/** + * Copyright 2019-2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_CMUARCTIC_OP_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_CMUARCTIC_OP_H_ + +#include +#include +#include +#include +#include +#include + +#include "minddata/dataset/core/tensor.h" + +#include "minddata/dataset/engine/data_schema.h" +#include "minddata/dataset/engine/datasetops/parallel_op.h" +#include "minddata/dataset/engine/datasetops/source/mappable_leaf_op.h" +#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" +#include "minddata/dataset/util/path.h" +#include "minddata/dataset/util/queue.h" +#include "minddata/dataset/util/status.h" +#include "minddata/dataset/util/wait_post.h" + +namespace mindspore { +namespace dataset { + + + + + +struct CmuArcticLabelTuple{ + std::shared_ptr waveform; + uint32_t sample_rate; + std::string utterance; + std::string utterance_id; +}; + + +class CmuArcticOp : public MappableLeafOp { + public: + // Constructor + // @param const std::string &usage - Usage of this dataset, can be 'train', 'test' ,'valid'or 'all' + // @param int32_t num_workers - number of workers reading audios in parallel + // @param std::string folder_path - dir directory of mnist + // @param int32_t queue_size - connector queue size + // @param std::unique_ptr data_schema - the schema of the mnist dataset + // @param td::unique_ptr sampler - sampler tells CmuArcticOp what to read + CmuArcticOp(const std::string &usage, int32_t num_workers, std::string folder_path, int32_t queue_size, + std::unique_ptr data_schema, std::shared_ptr sampler); + + // Destructor. + ~CmuArcticOp() = default; + + // Method derived from RandomAccess Op, enable Sampler to get all ids for each class + // @param (std::map> * map - key label, val all ids for this class + // @return Status The status code returned + Status GetClassIds(std::map> *cls_ids) const ; + + // A print method typically used for debugging + // @param out + // @param show_all + void Print(std::ostream &out, bool show_all) const override; + + // Function to count the number of samples in the MNIST dataset + // @param dir path to the MNIST directory + // @param count output arg that will hold the minimum of the actual dataset size and numSamples + // @return + + static Status CountTotalRows(const std::string &dir, const std::string &usage, int64_t *count); + + // Op name getter + // @return Name of the current Op + std::string Name() const override { return "CmuArcticOp"; } + + private: + // Load a tensor row according to a pair + // @param row_id_type row_id - id for this tensor row + // @param ImageLabelPair pair - + // @param TensorRow row - audio & label read into this tensor row + // @return Status The status code returned + Status LoadTensorRow(row_id_type row_id, TensorRow *row) override; + + Status ReadAudio(); + + Status ReadLabel(); + + // Read all files in the directory + // @return Status The status code returned + Status WalkAllFiles(); + + // Called first when function is called + // @return Status The status code returned + Status LaunchThreadsAndInitOp() override; + + // Private function for computing the assignment of the column name map. + // @return - Status + Status ComputeColMap() override; + + + std::string folder_path_; // directory of audio folder + const std::string usage_; + std::unique_ptr data_schema_; + std::vector audio_label_tuple_; + std::vector> audio_names_; + std::vector> label_pairs_; + std::vector label_files_; +}; + + + +} // namespace dataset +} // namespace mindspore +#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_DATASETOPS_SOURCE_CMUARCTIC_OP_H_ diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/flickr_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/flickr_op.cc index 6a3c17f39c2..65735a488f9 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/flickr_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/flickr_op.cc @@ -17,10 +17,10 @@ #include #include +#include #include #include -#include "debug/common.h" #include "minddata/dataset/core/config_manager.h" #include "minddata/dataset/core/tensor_shape.h" #include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h" @@ -94,13 +94,7 @@ void FlickrOp::Print(std::ostream &out, bool show_all) const { } Status FlickrOp::ParseFlickrData() { - auto real_file_path = Common::GetRealPath(file_path_); - if (!real_file_path.has_value()) { - MS_LOG(ERROR) << "Get real path failed, path=" << file_path_; - RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + file_path_); - } - - std::ifstream file_handle(real_file_path.value()); + std::ifstream file_handle(file_path_); if (!file_handle.is_open()) { RETURN_STATUS_UNEXPECTED("Invalid file, failed to open Flickr annotation file: " + file_path_); } @@ -135,11 +129,7 @@ Status FlickrOp::ParseFlickrData() { } bool valid = false; - Status type_check = CheckImageType(image_file_path, &valid); - if (type_check.IsError()) { - file_handle.close(); - RETURN_IF_NOT_OK(type_check); - } + RETURN_IF_NOT_OK(CheckImageType(image_file_path, &valid)); if (!valid) { continue; } @@ -163,16 +153,10 @@ Status FlickrOp::ParseFlickrData() { // Optimization: Could take in a tensor // This function does not return status because we want to just skip bad input, not crash Status FlickrOp::CheckImageType(const std::string &file_name, bool *valid) { - auto real_file_name = Common::GetRealPath(file_name); - if (!real_file_name.has_value()) { - MS_LOG(ERROR) << "Get real path failed, path=" << file_name; - RETURN_STATUS_UNEXPECTED("Get real path failed, path=" + file_name); - } - std::ifstream file_handle; constexpr int read_num = 3; *valid = false; - file_handle.open(real_file_name.value(), std::ios::binary | std::ios::in); + file_handle.open(file_name, std::ios::binary | std::ios::in); if (!file_handle.is_open()) { RETURN_STATUS_UNEXPECTED("Invalid file, failed to open image file: " + file_name); } @@ -240,7 +224,7 @@ Status FlickrOp::ComputeColMap() { // Set the column name map (base class field) if (column_name_id_map_.empty()) { for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { - column_name_id_map_[data_schema_->Column(i).Name()] = i; + column_name_id_map_[data_schema_->column(i).name()] = i; } } else { MS_LOG(WARNING) << "Column name map is already set!"; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc index 48b8597be9b..91d7c14566c 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc @@ -223,7 +223,7 @@ Status MindRecordOp::GetRowFromReader(TensorRow *fetched_row, uint64_t row_id, i Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector &columns_blob, const mindrecord::json &columns_json, const mindrecord::TaskType task_type) { - for (uint32_t i_col = 0; i_col < columns_to_load_.size(); i_col++) { + for (int32_t i_col = 0; i_col < columns_to_load_.size(); i_col++) { auto column_name = columns_to_load_[i_col]; // Initialize column parameters diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/cache/dataset_cache_impl.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/cache/dataset_cache_impl.cc index e818089636d..55e13659c67 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/cache/dataset_cache_impl.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/cache/dataset_cache_impl.cc @@ -31,10 +31,18 @@ Status DatasetCacheImpl::Build() { CacheClient::Builder builder; builder.SetSessionId(session_id_).SetCacheMemSz(cache_mem_sz_).SetSpill(spill_); - if (hostname_) builder.SetHostname(hostname_.value()); - if (port_) builder.SetPort(port_.value()); - if (num_connections_) builder.SetNumConnections(num_connections_.value()); - if (prefetch_sz_) builder.SetPrefetchSize(prefetch_sz_.value()); + if (hostname_) { + (void)builder.SetHostname(hostname_.value()); + } + if (port_) { + (void)builder.SetPort(port_.value()); + } + if (num_connections_) { + (void)builder.SetNumConnections(num_connections_.value()); + } + if (prefetch_sz_) { + (void)builder.SetPrefetchSize(prefetch_sz_.value()); + } return builder.Build(&cache_client_); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h index bb3752d0505..a591484cc4b 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.h @@ -79,14 +79,15 @@ constexpr char kCelebANode[] = "CelebADataset"; constexpr char kCifar100Node[] = "Cifar100Dataset"; constexpr char kCifar10Node[] = "Cifar10Dataset"; constexpr char kCLUENode[] = "CLUEDataset"; +constexpr char kCmuArcticNode[] = "CmuArcticDataset"; constexpr char kCocoNode[] = "CocoDataset"; constexpr char kCSVNode[] = "CSVDataset"; +constexpr char kFlickrNode[] = "FlickrDataset"; constexpr char kGeneratorNode[] = "GeneratorDataset"; constexpr char kImageFolderNode[] = "ImageFolderDataset"; constexpr char kManifestNode[] = "ManifestDataset"; constexpr char kMindDataNode[] = "MindDataDataset"; constexpr char kMnistNode[] = "MnistDataset"; -constexpr char kLibriSpeechNode[] = "LibriSpeechDataset"; constexpr char kRandomNode[] = "RandomDataset"; constexpr char kTextFileNode[] = "TextFileDataset"; constexpr char kTFRecordNode[] = "TFRecordDataset"; diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.cc index 883f1673ac5..e41b475c694 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.cc @@ -71,13 +71,13 @@ Status EpochCtrlNode::ValidateParams() { } // Visitor accepting method for IRNodePass -Status EpochCtrlNode::Accept(IRNodePass *p, bool *const modified) { +Status EpochCtrlNode::Accept(IRNodePass *const p, bool *const modified) { // Downcast shared pointer then call visitor return p->Visit(shared_from_base(), modified); } // Visitor accepting method for IRNodePass -Status EpochCtrlNode::AcceptAfter(IRNodePass *p, bool *const modified) { +Status EpochCtrlNode::AcceptAfter(IRNodePass *const p, bool *const modified) { // Downcast shared pointer then call visitor return p->VisitAfter(shared_from_base(), modified); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.h index 709f92afa43..867a3010674 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.h +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/epoch_ctrl_node.h @@ -67,13 +67,13 @@ class EpochCtrlNode : public RepeatNode { /// \param[in] p The node to visit /// \param[out] modified Indicator if the node was modified /// \return Status of the node visit - Status Accept(IRNodePass *p, bool *const modified) override; + Status Accept(IRNodePass *const p, bool *const modified) override; /// \brief Base-class override for accepting IRNodePass visitor /// \param[in] p The node to visit /// \param[out] modified Indicator if the node was modified /// \return Status of the node visit - Status AcceptAfter(IRNodePass *p, bool *const modified) override; + Status AcceptAfter(IRNodePass *const p, bool *const modified) override; }; } // namespace dataset diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/CMakeLists.txt index d33d89ffde9..4ca3d503641 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/CMakeLists.txt +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/CMakeLists.txt @@ -10,6 +10,7 @@ set(DATASET_ENGINE_IR_DATASETOPS_SOURCE_SRC_FILES clue_node.cc coco_node.cc csv_node.cc + flickr_node.cc image_folder_node.cc manifest_node.cc minddata_node.cc diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc index 2617c11fa03..54d191be18a 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc @@ -83,7 +83,7 @@ Status AlbumNode::Build(std::vector> *const node_ops) } // Get the shard id of node -Status AlbumNode::GetShardId(int32_t *shard_id) { +Status AlbumNode::GetShardId(int32_t *const shard_id) { *shard_id = sampler_->ShardId(); return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.h index dc19c2c1ca5..23cd4519995 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.h +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.h @@ -59,7 +59,7 @@ class AlbumNode : public MappableSourceNode { /// \brief Get the shard id of node /// \return Status Status::OK() if get shard id successfully - Status GetShardId(int32_t *shard_id) override; + Status GetShardId(int32_t *const shard_id) override; /// \brief Base-class override for GetDatasetSize /// \param[in] size_getter Shared pointer to DatasetSizeGetter diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cmu_arctic_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cmu_arctic_node.cc new file mode 100644 index 00000000000..f86485a0168 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cmu_arctic_node.cc @@ -0,0 +1,107 @@ +#include "minddata/dataset/engine/ir/datasetops/source/cmu_arctic_node.h" + +#include +#include +#include +#include + +#include "minddata/dataset/engine/datasetops/source/cmu_arctic_op.h" + +#include "minddata/dataset/util/status.h" +namespace mindspore { +namespace dataset { + +CmuArcticNode::CmuArcticNode(std::string dataset_dir, std::string usage, std::shared_ptr sampler, + std::shared_ptr cache) + : MappableSourceNode(std::move(cache)), dataset_dir_(dataset_dir), usage_(usage), sampler_(sampler) {} + +void CmuArcticNode::Print(std::ostream &out) const { out << Name(); } + +std::shared_ptr CmuArcticNode::Copy() { + std::shared_ptr sampler = (sampler_ == nullptr) ? nullptr : sampler_->SamplerCopy(); + auto node = std::make_shared(dataset_dir_, usage_, sampler, cache_); + return node; +} + +Status CmuArcticNode::ValidateParams() { + RETURN_IF_NOT_OK(DatasetNode::ValidateParams()); + RETURN_IF_NOT_OK(ValidateDatasetDirParam("CmuArcticNode", dataset_dir_)); + RETURN_IF_NOT_OK(ValidateDatasetSampler("CmuArcticNode", sampler_)); + RETURN_IF_NOT_OK(ValidateStringValue("CmuArcticNode", usage_, {"aew", "ahw", "aup", "awb", "axb", "bdl", "clb", "eey", "fem", "gka", "jmk", "ksp", "ljm", "lnh", "rms", "rxr", "slp" , "slt"})); + return Status::OK(); +} + +Status CmuArcticNode::Build(std::vector> *const node_ops) { + // Do internal Schema generation. + auto schema = std::make_unique(); + + + RETURN_IF_NOT_OK(schema->AddColumn(ColDescriptor("waveform", DataType(DataType::DE_FLOAT64), TensorImpl::kCv, 1))); + TensorShape scalar_rate = TensorShape::CreateScalar(); + RETURN_IF_NOT_OK( + schema->AddColumn(ColDescriptor("sample_rate", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar_rate))); + TensorShape scalar_utterance = TensorShape::CreateScalar(); + RETURN_IF_NOT_OK( + schema->AddColumn(ColDescriptor("utterance", DataType(DataType::DE_STRING), TensorImpl::kFlexible, 0, &scalar_utterance))); + TensorShape scalar_utterance_id = TensorShape::CreateScalar(); + RETURN_IF_NOT_OK( + schema->AddColumn(ColDescriptor("utterance_id", DataType(DataType::DE_STRING), TensorImpl::kFlexible, 0, &scalar_utterance_id))); + + + + std::shared_ptr sampler_rt = nullptr; + RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); + + auto op = std::make_shared(usage_, num_workers_, dataset_dir_, connector_que_size_, std::move(schema),std::move(sampler_rt)); + op->set_total_repeats(GetTotalRepeats()); + op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch()); + node_ops->push_back(op); + + return Status::OK(); +} + +// Get the shard id of node +Status CmuArcticNode::GetShardId(int32_t *shard_id) { + *shard_id = sampler_->ShardId(); + return Status::OK(); +} + + +// Get Dataset size +Status CmuArcticNode::GetDatasetSize(const std::shared_ptr &size_getter, bool estimate,int64_t *dataset_size) { + if (dataset_size_ > 0) { + *dataset_size = dataset_size_; + return Status::OK(); + } + int64_t num_rows, sample_size; + RETURN_IF_NOT_OK(CmuArcticOp::CountTotalRows(dataset_dir_, usage_, &num_rows)); + std::shared_ptr sampler_rt = nullptr; + RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt)); + sample_size = sampler_rt->CalculateNumSamples(num_rows); + if (sample_size == -1) { + RETURN_IF_NOT_OK(size_getter->DryRun(shared_from_this(), &sample_size)); + } + *dataset_size = sample_size; + dataset_size_ = *dataset_size; + return Status::OK(); +} + + +Status CmuArcticNode::to_json(nlohmann::json *out_json) { + nlohmann::json args, sampler_args; + RETURN_IF_NOT_OK(sampler_->to_json(&sampler_args)); + args["sampler"] = sampler_args; + args["num_parallel_workers"] = num_workers_; + args["dataset_dir"] = dataset_dir_; + args["usage"] = usage_; + if (cache_ != nullptr) { + nlohmann::json cache_args; + RETURN_IF_NOT_OK(cache_->to_json(&cache_args)); + args["cache"] = cache_args; + } + *out_json = args; + return Status::OK(); +} + +} // namespace dataset +} // namespace mindspor \ No newline at end of file diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cmu_arctic_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cmu_arctic_node.h new file mode 100644 index 00000000000..6b79dd07a31 --- /dev/null +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/cmu_arctic_node.h @@ -0,0 +1,76 @@ +#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_CUMARCTIC_NODE_H_ +#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_CUMARCTIC_NODE_H_ + +#include +#include +#include + +#include "minddata/dataset/engine/ir/datasetops/dataset_node.h" +namespace mindspore { +namespace dataset { +class CmuArcticNode:public MappableSourceNode { +public: + +CmuArcticNode(std::string dataset_dir, std::string usage, std::shared_ptr sampler, std::shared_ptr cache); + +~ CmuArcticNode() = default; + +/// \brief Node name getter +/// \return Name of the current node +std::string Name() const override { return "kCmuArcticNode"; } + +/// \brief Print the description +/// \param out - The output stream to write output to +void Print(std::ostream &out) const override; + +/// \brief Copy the node to a new object +/// \return A shared pointer to the new copy +std::shared_ptr Copy() override; + +/// \brief a base class override function to create the required runtime dataset op objects for this class +/// \param node_ops - A vector containing shared pointer to the Dataset Ops that this object will create +/// \return Status Status::OK() if build successfully +Status Build(std::vector> *const node_ops) override; + +/// \brief Parameters validation +/// \return Status Status::OK() if all the parameters are valid +Status ValidateParams() override; + +/// \brief Get the shard id of node 什么是shard id?? +/// \return Status Status::OK() if get shard id successfully +Status GetShardId(int32_t *shard_id) override; + +/// \brief Base-class override for GetDatasetSize +/// \param[in] size_getter Shared pointer to DatasetSizeGetter +/// \param[in] estimate This is only supported by some of the ops and it's used to speed up the process of getting +/// dataset size at the expense of accuracy. +/// \param[out] dataset_size the size of the dataset +/// \return Status of the function +Status GetDatasetSize(const std::shared_ptr &size_getter, bool estimate, + int64_t *dataset_size) override; + +/// \brief Getter functions +const std::string &DatasetDir() const { return dataset_dir_; } +const std::string &Usage() const { return usage_; } + +/// \brief Get the arguments of node +/// \param[out] out_json JSON string of all attributes +/// \return Status of the function +Status to_json(nlohmann::json *out_json) override; + +/// \brief Sampler getter +/// \return SamplerObj of the current node +std::shared_ptr Sampler() override { return sampler_; } + + +void SetSampler(std::shared_ptr sampler) override { sampler_ = sampler; } + +private: +std::string dataset_dir_; +std::string usage_; +std::shared_ptr sampler_; +}; + +} // namespace dataset +} // namespace mindspore +#endif ///home/user06/zjm/act/mindspore/mindspore/ccsrc/minddata/dataset/api diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc index e3fa2eca3aa..b14a803ae12 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.cc @@ -70,7 +70,7 @@ Status MnistNode::Build(std::vector> *const node_ops) } // Get the shard id of node -Status MnistNode::GetShardId(int32_t *shard_id) { +Status MnistNode::GetShardId(int32_t *const shard_id) { *shard_id = sampler_->ShardId(); return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.h index 6c1c37a91d1..183ef75cea5 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.h +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/mnist_node.h @@ -58,7 +58,7 @@ class MnistNode : public MappableSourceNode { /// \brief Get the shard id of node /// \return Status Status::OK() if get shard id successfully - Status GetShardId(int32_t *shard_id) override; + Status GetShardId(int32_t *const shard_id) override; /// \brief Base-class override for GetDatasetSize /// \param[in] size_getter Shared pointer to DatasetSizeGetter diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc index d92b9f5bd1a..e1183c49389 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc @@ -118,7 +118,7 @@ Status RandomNode::Build(std::vector> *const node_ops } // Get the shard id of node -Status RandomNode::GetShardId(int32_t *shard_id) { +Status RandomNode::GetShardId(int32_t *const shard_id) { // RandomDataset doesn't support multiple shards *shard_id = 0; return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h index 0758fd2bd91..f099910e677 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.h @@ -80,7 +80,7 @@ class RandomNode : public NonMappableSourceNode { /// \brief Get the shard id of node /// \return Status Status::OK() if get shard id successfully - Status GetShardId(int32_t *shard_id) override; + Status GetShardId(int32_t *const shard_id) override; /// \brief Base-class override for GetDatasetSize /// \param[in] size_getter Shared pointer to DatasetSizeGetter diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc index 3a5e3e97e9f..b9bf8fec4d9 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc @@ -156,7 +156,7 @@ Status TFRecordNode::Build(std::vector> *const node_o } // Get the shard id of node -Status TFRecordNode::GetShardId(int32_t *shard_id) { +Status TFRecordNode::GetShardId(int32_t *const shard_id) { *shard_id = shard_id_; return Status::OK(); @@ -259,7 +259,7 @@ Status TFRecordNode::Accept(IRNodePass *p, bool *const modified) { } // Visitor accepting method for IRNodePass -Status TFRecordNode::AcceptAfter(IRNodePass *p, bool *const modified) { +Status TFRecordNode::AcceptAfter(IRNodePass *const p, bool *const modified) { // Downcast shared pointer then call visitor return p->VisitAfter(shared_from_base(), modified); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.h b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.h index c56f205b580..9a9ccfc0266 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.h +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.h @@ -95,7 +95,7 @@ class TFRecordNode : public NonMappableSourceNode { /// \brief Get the shard id of node /// \return Status Status::OK() if get shard id successfully - Status GetShardId(int32_t *shard_id) override; + Status GetShardId(int32_t *const shard_id) override; /// \brief Base-class override for GetDatasetSize /// \param[in] size_getter Shared pointer to DatasetSizeGetter @@ -152,7 +152,7 @@ class TFRecordNode : public NonMappableSourceNode { /// \param[in] p The node to visit /// \param[out] modified Indicator if the node was modified /// \return Status of the node visit - Status AcceptAfter(IRNodePass *p, bool *const modified) override; + Status AcceptAfter(IRNodePass *const p, bool *const modified) override; private: std::vector dataset_files_; diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.cc index 9fe9eab9b93..2d0bcc6d38d 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/transfer_node.cc @@ -117,6 +117,9 @@ Status TransferNode::AcceptAfter(IRNodePass *const p, bool *const modified) { Status TransferNode::to_json(nlohmann::json *out_json) { nlohmann::json args; + args["queue_name"] = queue_name_; + args["device_type"] = device_type_; + args["device_id"] = device_id_; args["send_epoch_end"] = send_epoch_end_; args["total_batch"] = total_batch_; args["create_data_info_queue"] = create_data_info_queue_; diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.cc index 8eb9b5599fa..778c1262b5d 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.cc @@ -192,7 +192,7 @@ Status RepeatPass::VisitAfter(std::shared_ptr node, bool *const mo } // Adds an operator to the cached operator stack save area -void RepeatPass::AddToCachedNodeStack(std::shared_ptr node) { cached_node_stacks_.push(node); } +void RepeatPass::AddToCachedNodeStack(const std::shared_ptr &node) { cached_node_stacks_.push(node); } // Pops an operator from the cached operator stack save area std::shared_ptr RepeatPass::PopFromCachedNodeStack() { diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.h b/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.h index 6c9f257bd02..708b04ba9c5 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.h +++ b/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.h @@ -112,7 +112,7 @@ class RepeatPass : public IRNodePass { /// \brief Adds an operator to the cached stack save area /// \param node - The dataset node to add to cached stack /// \return Status The status code returned - void AddToCachedNodeStack(std::shared_ptr node); + void AddToCachedNodeStack(const std::shared_ptr &node); /// \brief Pops an operator from the cached stack save area /// \return shared_ptr to the popped dataset node diff --git a/mindspore/ccsrc/minddata/dataset/engine/serdes.cc b/mindspore/ccsrc/minddata/dataset/engine/serdes.cc index b575ce8b27e..5d53483d75c 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/serdes.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/serdes.cc @@ -84,7 +84,7 @@ Status Serdes::Deserialize(std::string json_filepath, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("children") != json_obj.end(), "Fail to find children"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("children") != json_obj.end(), "Failed to find children"); std::shared_ptr child_ds; if (json_obj["children"].size() == 0) { @@ -98,7 +98,7 @@ Status Serdes::ConstructPipeline(nlohmann::json json_obj, std::shared_ptr> datasets; for (auto child_json_obj : json_obj["children"]) { RETURN_IF_NOT_OK(ConstructPipeline(child_json_obj, &child_ds)); @@ -112,7 +112,7 @@ Status Serdes::ConstructPipeline(nlohmann::json json_obj, std::shared_ptr child_ds, nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("op_type") != json_obj.end(), "Fail to find op_type"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("op_type") != json_obj.end(), "Failed to find op_type"); std::string op_type = json_obj["op_type"]; if (child_ds == nullptr) { // if dataset doesn't have any child, then create a source dataset IR. e.g., ImageFolderNode, CocoNode @@ -125,11 +125,11 @@ Status Serdes::CreateNode(std::shared_ptr child_ds, nlohmann::json } Status Serdes::CreateCelebADatasetNode(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Fail to find usage"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Fail to find decode"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("extensions") != json_obj.end(), "Fail to find extension"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Failed to find decode"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("extensions") != json_obj.end(), "Failed to find extension"); std::string dataset_dir = json_obj["dataset_dir"]; std::string usage = json_obj["usage"]; std::shared_ptr sampler; @@ -143,9 +143,9 @@ Status Serdes::CreateCelebADatasetNode(nlohmann::json json_obj, std::shared_ptr< } Status Serdes::CreateCifar10DatasetNode(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Fail to find usage"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler"); std::string dataset_dir = json_obj["dataset_dir"]; std::string usage = json_obj["usage"]; std::shared_ptr sampler; @@ -157,9 +157,9 @@ Status Serdes::CreateCifar10DatasetNode(nlohmann::json json_obj, std::shared_ptr } Status Serdes::CreateCifar100DatasetNode(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Fail to find usage"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler"); std::string dataset_dir = json_obj["dataset_dir"]; std::string usage = json_obj["usage"]; std::shared_ptr sampler; @@ -171,13 +171,13 @@ Status Serdes::CreateCifar100DatasetNode(nlohmann::json json_obj, std::shared_pt } Status Serdes::CreateCLUEDatasetNode(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("task") != json_obj.end(), "Fail to find task"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Fail to find usage"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Fail to find num_samples"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Fail to find shuffle"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Fail to find num_shards"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Fail to find shard_id"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("task") != json_obj.end(), "Failed to find task"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Failed to find num_samples"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Failed to find num_shards"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Failed to find shard_id"); std::vector dataset_files = json_obj["dataset_dir"]; std::string task = json_obj["task"]; std::string usage = json_obj["usage"]; @@ -192,11 +192,11 @@ Status Serdes::CreateCLUEDatasetNode(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("annotation_file") != json_obj.end(), "Fail to find annotation_file"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("task") != json_obj.end(), "Fail to find task"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Fail to find decode"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("annotation_file") != json_obj.end(), "Failed to find annotation_file"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("task") != json_obj.end(), "Failed to find task"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Failed to find decode"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler"); std::string dataset_dir = json_obj["dataset_dir"]; std::string annotation_file = json_obj["annotation_file"]; std::string task = json_obj["task"]; @@ -211,13 +211,13 @@ Status Serdes::CreateCocoDatasetNode(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_files") != json_obj.end(), "Fail to find dataset_files"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("field_delim") != json_obj.end(), "Fail to find field_delim"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("column_names") != json_obj.end(), "Fail to find column_names"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Fail to find num_samples"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Fail to find shuffle"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Fail to find num_shards"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Fail to find shard_id"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_files") != json_obj.end(), "Failed to find dataset_files"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("field_delim") != json_obj.end(), "Failed to find field_delim"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("column_names") != json_obj.end(), "Failed to find column_names"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Failed to find num_samples"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Failed to find num_shards"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Failed to find shard_id"); std::vector dataset_files = json_obj["dataset_files"]; std::string field_delim = json_obj["field_delim"]; std::vector> column_defaults = {}; @@ -234,11 +234,11 @@ Status Serdes::CreateCSVDatasetNode(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Fail to find decode"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("extensions") != json_obj.end(), "Fail to find extension"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("class_indexing") != json_obj.end(), "Fail to find class_indexing"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Failed to find decode"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("extensions") != json_obj.end(), "Failed to find extension"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("class_indexing") != json_obj.end(), "Failed to find class_indexing"); std::string dataset_dir = json_obj["dataset_dir"]; bool decode = json_obj["decode"]; std::shared_ptr sampler; @@ -260,11 +260,11 @@ Status Serdes::CreateImageFolderDatasetNode(nlohmann::json json_obj, std::shared } Status Serdes::CreateManifestDatasetNode(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_file") != json_obj.end(), "Fail to find dataset_file"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Fail to find usage"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("class_indexing") != json_obj.end(), "Fail to find class_indexing"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Fail to find decode"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_file") != json_obj.end(), "Failed to find dataset_file"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("class_indexing") != json_obj.end(), "Failed to find class_indexing"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Failed to find decode"); std::string dataset_file = json_obj["dataset_file"]; std::string usage = json_obj["usage"]; std::shared_ptr sampler; @@ -284,9 +284,9 @@ Status Serdes::CreateManifestDatasetNode(nlohmann::json json_obj, std::shared_pt } Status Serdes::CreateMnistDatasetNode(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Fail to find usage"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler"); std::string dataset_dir = json_obj["dataset_dir"]; std::string usage = json_obj["usage"]; std::shared_ptr sampler; @@ -298,11 +298,11 @@ Status Serdes::CreateMnistDatasetNode(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_files") != json_obj.end(), "Fail to find dataset_files"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Fail to find num_samples"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Fail to find shuffle"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Fail to find num_shards"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Fail to find shard_id"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_files") != json_obj.end(), "Failed to find dataset_files"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Failed to find num_samples"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Failed to find num_shards"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Failed to find shard_id"); std::vector dataset_files = json_obj["dataset_files"]; int64_t num_samples = json_obj["num_samples"]; ShuffleMode shuffle = static_cast(json_obj["shuffle"]); @@ -315,14 +315,14 @@ Status Serdes::CreateTextFileDatasetNode(nlohmann::json json_obj, std::shared_pt } Status Serdes::CreateTFRecordDatasetNode(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_files") != json_obj.end(), "Fail to find dataset_files"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("schema") != json_obj.end(), "Fail to find schema"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("columns_list") != json_obj.end(), "Fail to find columns_list"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Fail to find num_samples"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Fail to find shuffle"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Fail to find num_shards"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Fail to find shard_id"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_equal_rows") != json_obj.end(), "Fail to find shard_equal_rows"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_files") != json_obj.end(), "Failed to find dataset_files"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("schema") != json_obj.end(), "Failed to find schema"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("columns_list") != json_obj.end(), "Failed to find columns_list"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Failed to find num_samples"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Failed to find num_shards"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Failed to find shard_id"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_equal_rows") != json_obj.end(), "Failed to find shard_equal_rows"); std::vector dataset_files = json_obj["dataset_files"]; std::string schema = json_obj["schema"]; std::vector columns_list = json_obj["columns_list"]; @@ -339,12 +339,12 @@ Status Serdes::CreateTFRecordDatasetNode(nlohmann::json json_obj, std::shared_pt } Status Serdes::CreateVOCDatasetNode(nlohmann::json json_obj, std::shared_ptr *ds) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Fail to find dataset_dir"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("task") != json_obj.end(), "Fail to find task"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Fail to find usage"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("class_indexing") != json_obj.end(), "Fail to find class_indexing"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Fail to find decode"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Fail to find sampler"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("dataset_dir") != json_obj.end(), "Failed to find dataset_dir"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("task") != json_obj.end(), "Failed to find task"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("usage") != json_obj.end(), "Failed to find usage"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("class_indexing") != json_obj.end(), "Failed to find class_indexing"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("decode") != json_obj.end(), "Failed to find decode"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler") != json_obj.end(), "Failed to find sampler"); std::string dataset_dir = json_obj["dataset_dir"]; std::string task = json_obj["task"]; std::string usage = json_obj["usage"]; @@ -398,8 +398,8 @@ Status Serdes::CreateDatasetNode(nlohmann::json json_obj, std::string op_type, s Status Serdes::CreateBatchOperationNode(std::shared_ptr ds, nlohmann::json json_obj, std::shared_ptr *result) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("batch_size") != json_obj.end(), "Fail to find batch_size"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("drop_remainder") != json_obj.end(), "Fail to find drop_remainder"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("batch_size") != json_obj.end(), "Failed to find batch_size"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("drop_remainder") != json_obj.end(), "Failed to find drop_remainder"); int32_t batch_size = json_obj["batch_size"]; bool drop_remainder = json_obj["drop_remainder"]; *result = std::make_shared(ds, batch_size, drop_remainder); @@ -408,22 +408,25 @@ Status Serdes::CreateBatchOperationNode(std::shared_ptr ds, nlohman Status Serdes::CreateMapOperationNode(std::shared_ptr ds, nlohmann::json json_obj, std::shared_ptr *result) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("input_columns") != json_obj.end(), "Fail to find input_columns"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("output_columns") != json_obj.end(), "Fail to find output_columns"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("project_columns") != json_obj.end(), "Fail to find project_columns"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("operations") != json_obj.end(), "Fail to find operations"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_parallel_workers") != json_obj.end(), + "Failed to find num_parallel_workers"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("input_columns") != json_obj.end(), "Failed to find input_columns"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("output_columns") != json_obj.end(), "Failed to find output_columns"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("project_columns") != json_obj.end(), "Failed to find project_columns"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("operations") != json_obj.end(), "Failed to find operations"); std::vector input_columns = json_obj["input_columns"]; std::vector output_columns = json_obj["output_columns"]; std::vector project_columns = json_obj["project_columns"]; std::vector> operations; RETURN_IF_NOT_OK(ConstructTensorOps(json_obj["operations"], &operations)); *result = std::make_shared(ds, operations, input_columns, output_columns, project_columns); + (*result)->SetNumWorkers(json_obj["num_parallel_workers"]); return Status::OK(); } Status Serdes::CreateProjectOperationNode(std::shared_ptr ds, nlohmann::json json_obj, std::shared_ptr *result) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("columns") != json_obj.end(), "Fail to find columns"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("columns") != json_obj.end(), "Failed to find columns"); std::vector columns = json_obj["columns"]; *result = std::make_shared(ds, columns); return Status::OK(); @@ -431,8 +434,8 @@ Status Serdes::CreateProjectOperationNode(std::shared_ptr ds, nlohm Status Serdes::CreateRenameOperationNode(std::shared_ptr ds, nlohmann::json json_obj, std::shared_ptr *result) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("input_columns") != json_obj.end(), "Fail to find input_columns"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("output_columns") != json_obj.end(), "Fail to find output_columns"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("input_columns") != json_obj.end(), "Failed to find input_columns"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("output_columns") != json_obj.end(), "Failed to find output_columns"); std::vector input_columns = json_obj["input_columns"]; std::vector output_columns = json_obj["output_columns"]; *result = std::make_shared(ds, input_columns, output_columns); @@ -441,7 +444,7 @@ Status Serdes::CreateRenameOperationNode(std::shared_ptr ds, nlohma Status Serdes::CreateRepeatOperationNode(std::shared_ptr ds, nlohmann::json json_obj, std::shared_ptr *result) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("count") != json_obj.end(), "Fail to find count"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("count") != json_obj.end(), "Failed to find count"); int32_t count = json_obj["count"]; *result = std::make_shared(ds, count); return Status::OK(); @@ -449,9 +452,9 @@ Status Serdes::CreateRepeatOperationNode(std::shared_ptr ds, nlohma Status Serdes::CreateShuffleOperationNode(std::shared_ptr ds, nlohmann::json json_obj, std::shared_ptr *result) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("buffer_size") != json_obj.end(), "Fail to find buffer_size"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("buffer_size") != json_obj.end(), "Failed to find buffer_size"); CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("reshuffle_each_epoch") != json_obj.end(), - "Fail to find reshuffle_each_epoch"); + "Failed to find reshuffle_each_epoch"); int32_t buffer_size = json_obj["buffer_size"]; bool reset_every_epoch = json_obj["reshuffle_each_epoch"]; *result = std::make_shared(ds, buffer_size, reset_every_epoch); @@ -460,15 +463,35 @@ Status Serdes::CreateShuffleOperationNode(std::shared_ptr ds, nlohm Status Serdes::CreateSkipOperationNode(std::shared_ptr ds, nlohmann::json json_obj, std::shared_ptr *result) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("count") != json_obj.end(), "Fail to find count"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("count") != json_obj.end(), "Failed to find count"); int32_t count = json_obj["count"]; *result = std::make_shared(ds, count); return Status::OK(); } +Status Serdes::CreateTransferOperationNode(std::shared_ptr ds, nlohmann::json json_obj, + std::shared_ptr *result) { + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("queue_name") != json_obj.end(), "Failed to find queue_name"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("device_type") != json_obj.end(), "Failed to find device_type"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("device_id") != json_obj.end(), "Failed to find device_id"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("send_epoch_end") != json_obj.end(), "Failed to find send_epoch_end"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("total_batch") != json_obj.end(), "Failed to find total_batch"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("create_data_info_queue") != json_obj.end(), + "Failed to find create_data_info_queue"); + std::string queue_name = json_obj["queue_name"]; + std::string device_type = json_obj["device_type"]; + int32_t device_id = json_obj["device_id"]; + bool send_epoch_end = json_obj["send_epoch_end"]; + int32_t total_batch = json_obj["total_batch"]; + bool create_data_info_queue = json_obj["create_data_info_queue"]; + *result = std::make_shared(ds, queue_name, device_type, device_id, send_epoch_end, total_batch, + create_data_info_queue); + return Status::OK(); +} + Status Serdes::CreateTakeOperationNode(std::shared_ptr ds, nlohmann::json json_obj, std::shared_ptr *result) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("count") != json_obj.end(), "Fail to find count"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("count") != json_obj.end(), "Failed to find count"); int32_t count = json_obj["count"]; *result = std::make_shared(ds, count); return Status::OK(); @@ -490,6 +513,8 @@ Status Serdes::CreateDatasetOperationNode(std::shared_ptr ds, nlohm RETURN_IF_NOT_OK(CreateShuffleOperationNode(ds, json_obj, result)); } else if (op_type == kSkipNode) { RETURN_IF_NOT_OK(CreateSkipOperationNode(ds, json_obj, result)); + } else if (op_type == kTransferNode) { + RETURN_IF_NOT_OK(CreateTransferOperationNode(ds, json_obj, result)); } else if (op_type == kTakeNode) { RETURN_IF_NOT_OK(CreateTakeOperationNode(ds, json_obj, result)); } else { @@ -500,12 +525,12 @@ Status Serdes::CreateDatasetOperationNode(std::shared_ptr ds, nlohm Status Serdes::ConstructDistributedSampler(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr *sampler) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Fail to find num_shards"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Fail to find shard_id"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Fail to find shuffle"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("seed") != json_obj.end(), "Fail to find seed"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("offset") != json_obj.end(), "Fail to find offset"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("even_dist") != json_obj.end(), "Fail to find even_dist"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_shards") != json_obj.end(), "Failed to find num_shards"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shard_id") != json_obj.end(), "Failed to find shard_id"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("seed") != json_obj.end(), "Failed to find seed"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("offset") != json_obj.end(), "Failed to find offset"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("even_dist") != json_obj.end(), "Failed to find even_dist"); int64_t num_shards = json_obj["num_shards"]; int64_t shard_id = json_obj["shard_id"]; bool shuffle = json_obj["shuffle"]; @@ -522,8 +547,8 @@ Status Serdes::ConstructDistributedSampler(nlohmann::json json_obj, int64_t num_ } Status Serdes::ConstructPKSampler(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr *sampler) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_val") != json_obj.end(), "Fail to find num_val"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Fail to find shuffle"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_val") != json_obj.end(), "Failed to find num_val"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("shuffle") != json_obj.end(), "Failed to find shuffle"); int64_t num_val = json_obj["num_val"]; bool shuffle = json_obj["shuffle"]; *sampler = std::make_shared(num_val, shuffle, num_samples); @@ -536,7 +561,7 @@ Status Serdes::ConstructPKSampler(nlohmann::json json_obj, int64_t num_samples, Status Serdes::ConstructRandomSampler(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr *sampler) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("replacement") != json_obj.end(), "Fail to find replacement"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("replacement") != json_obj.end(), "Failed to find replacement"); bool replacement = json_obj["replacement"]; *sampler = std::make_shared(replacement, num_samples); if (json_obj.find("child_sampler") != json_obj.end()) { @@ -548,7 +573,7 @@ Status Serdes::ConstructRandomSampler(nlohmann::json json_obj, int64_t num_sampl Status Serdes::ConstructSequentialSampler(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr *sampler) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("start_index") != json_obj.end(), "Fail to find start_index"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("start_index") != json_obj.end(), "Failed to find start_index"); int64_t start_index = json_obj["start_index"]; *sampler = std::make_shared(start_index, num_samples); if (json_obj.find("child_sampler") != json_obj.end()) { @@ -560,7 +585,7 @@ Status Serdes::ConstructSequentialSampler(nlohmann::json json_obj, int64_t num_s Status Serdes::ConstructSubsetRandomSampler(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr *sampler) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("indices") != json_obj.end(), "Fail to find indices"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("indices") != json_obj.end(), "Failed to find indices"); std::vector indices = json_obj["indices"]; *sampler = std::make_shared(indices, num_samples); if (json_obj.find("child_sampler") != json_obj.end()) { @@ -572,8 +597,8 @@ Status Serdes::ConstructSubsetRandomSampler(nlohmann::json json_obj, int64_t num Status Serdes::ConstructWeightedRandomSampler(nlohmann::json json_obj, int64_t num_samples, std::shared_ptr *sampler) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("replacement") != json_obj.end(), "Fail to find replacement"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("weights") != json_obj.end(), "Fail to find weights"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("replacement") != json_obj.end(), "Failed to find replacement"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("weights") != json_obj.end(), "Failed to find weights"); bool replacement = json_obj["replacement"]; std::vector weights = json_obj["weights"]; *sampler = std::make_shared(weights, num_samples, replacement); @@ -585,8 +610,8 @@ Status Serdes::ConstructWeightedRandomSampler(nlohmann::json json_obj, int64_t n } Status Serdes::ConstructSampler(nlohmann::json json_obj, std::shared_ptr *sampler) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Fail to find num_samples"); - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler_name") != json_obj.end(), "Fail to find sampler_name"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Failed to find num_samples"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler_name") != json_obj.end(), "Failed to find sampler_name"); int64_t num_samples = json_obj["num_samples"]; std::string sampler_name = json_obj["sampler_name"]; if (sampler_name == "DistributedSampler") { @@ -609,7 +634,7 @@ Status Serdes::ConstructSampler(nlohmann::json json_obj, std::shared_ptr parent_sampler, std::shared_ptr *sampler) { - CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("child_sampler") != json_obj.end(), "Fail to find child_sampler"); + CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("child_sampler") != json_obj.end(), "Failed to find child_sampler"); for (nlohmann::json child : json_obj["child_sampler"]) { std::shared_ptr child_sampler; RETURN_IF_NOT_OK(ConstructSampler(child, &child_sampler)); @@ -619,8 +644,8 @@ Status Serdes::ChildSamplerFromJson(nlohmann::json json_obj, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("transform") != op_params.end(), "Fail to find transform"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Fail to find ratio"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("transform") != op_params.end(), "Failed to find transform"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Failed to find ratio"); std::vector> transforms; std::vector json_operations = {}; json_operations.push_back(op_params["transform"]); @@ -633,14 +658,14 @@ Status Serdes::BoundingBoxAugmentFromJson(nlohmann::json op_params, std::shared_ } Status Serdes::RandomSelectSubpolicyFromJson(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("policy") != op_params.end(), "Fail to find policy"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("policy") != op_params.end(), "Failed to find policy"); nlohmann::json policy_json = op_params["policy"]; std::vector, double>>> policy; std::vector, double>> policy_items; for (nlohmann::json item : policy_json) { for (nlohmann::json item_pair : item) { - CHECK_FAIL_RETURN_UNEXPECTED(item_pair.find("prob") != item_pair.end(), "Fail to find prob"); - CHECK_FAIL_RETURN_UNEXPECTED(item_pair.find("tensor_op") != item_pair.end(), "Fail to find tensor_op"); + CHECK_FAIL_RETURN_UNEXPECTED(item_pair.find("prob") != item_pair.end(), "Failed to find prob"); + CHECK_FAIL_RETURN_UNEXPECTED(item_pair.find("tensor_op") != item_pair.end(), "Failed to find tensor_op"); std::vector> operations; std::pair, double> policy_pair; std::shared_ptr operation; @@ -659,8 +684,8 @@ Status Serdes::RandomSelectSubpolicyFromJson(nlohmann::json op_params, std::shar } Status Serdes::UniformAugFromJson(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("transforms") != op_params.end(), "Fail to find transforms"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("num_ops") != op_params.end(), "Fail to find num_ops"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("transforms") != op_params.end(), "Failed to find transforms"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("num_ops") != op_params.end(), "Failed to find num_ops"); std::vector> transforms = {}; RETURN_IF_NOT_OK(ConstructTensorOps(op_params["transforms"], &transforms)); int32_t num_ops = op_params["num_ops"]; @@ -671,12 +696,14 @@ Status Serdes::UniformAugFromJson(nlohmann::json op_params, std::shared_ptr> *result) { std::vector> output; for (auto op : operations) { - CHECK_FAIL_RETURN_UNEXPECTED(op.find("tensor_op_name") != op.end(), "Fail to find tensor_op_name"); - CHECK_FAIL_RETURN_UNEXPECTED(op.find("tensor_op_params") != op.end(), "Fail to find tensor_op_params"); + CHECK_FAIL_RETURN_UNEXPECTED(op.find("is_python_front_end_op") == op.end(), + "python operation is not yet supported"); + CHECK_FAIL_RETURN_UNEXPECTED(op.find("tensor_op_name") != op.end(), "Failed to find tensor_op_name"); + CHECK_FAIL_RETURN_UNEXPECTED(op.find("tensor_op_params") != op.end(), "Failed to find tensor_op_params"); std::string op_name = op["tensor_op_name"]; nlohmann::json op_params = op["tensor_op_params"]; std::shared_ptr operation = nullptr; - CHECK_FAIL_RETURN_UNEXPECTED(func_ptr_.find(op_name) != func_ptr_.end(), "Fail to find " + op_name); + CHECK_FAIL_RETURN_UNEXPECTED(func_ptr_.find(op_name) != func_ptr_.end(), "Failed to find " + op_name); RETURN_IF_NOT_OK(func_ptr_[op_name](op_params, &operation)); output.push_back(operation); } @@ -691,6 +718,7 @@ Serdes::InitializeFuncPtr() { ops_ptr[vision::kAutoContrastOperation] = &(vision::AutoContrastOperation::from_json); ops_ptr[vision::kBoundingBoxAugmentOperation] = &(BoundingBoxAugmentFromJson); ops_ptr[vision::kCenterCropOperation] = &(vision::CenterCropOperation::from_json); + ops_ptr[vision::kCropOperation] = &(vision::CropOperation::from_json); ops_ptr[vision::kCutMixBatchOperation] = &(vision::CutMixBatchOperation::from_json); ops_ptr[vision::kCutOutOperation] = &(vision::CutOutOperation::from_json); ops_ptr[vision::kDecodeOperation] = &(vision::DecodeOperation::from_json); @@ -730,12 +758,20 @@ Serdes::InitializeFuncPtr() { ops_ptr[vision::kResizeWithBBoxOperation] = &(vision::ResizeWithBBoxOperation::from_json); ops_ptr[vision::kRgbaToBgrOperation] = &(vision::RgbaToBgrOperation::from_json); ops_ptr[vision::kRgbaToRgbOperation] = &(vision::RgbaToRgbOperation::from_json); + ops_ptr[vision::kRgbToBgrOperation] = &(vision::RgbToBgrOperation::from_json); + ops_ptr[vision::kRgbToGrayOperation] = &(vision::RgbToGrayOperation::from_json); ops_ptr[vision::kRotateOperation] = &(vision::RotateOperation::from_json); + ops_ptr[vision::kSlicePatchesOperation] = &(vision::SlicePatchesOperation::from_json); ops_ptr[vision::kSoftDvppDecodeRandomCropResizeJpegOperation] = &(vision::SoftDvppDecodeRandomCropResizeJpegOperation::from_json); ops_ptr[vision::kSoftDvppDecodeResizeJpegOperation] = &(vision::SoftDvppDecodeResizeJpegOperation::from_json); ops_ptr[vision::kSwapRedBlueOperation] = &(vision::SwapRedBlueOperation::from_json); ops_ptr[vision::kUniformAugOperation] = &(UniformAugFromJson); + ops_ptr[vision::kVerticalFlipOperation] = &(vision::VerticalFlipOperation::from_json); + ops_ptr[transforms::kFillOperation] = &(transforms::FillOperation::from_json); + ops_ptr[transforms::kOneHotOperation] = &(transforms::OneHotOperation::from_json); + ops_ptr[transforms::kTypeCastOperation] = &(transforms::TypeCastOperation::from_json); + ops_ptr[text::kToNumberOperation] = &(text::ToNumberOperation::from_json); return ops_ptr; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/serdes.h b/mindspore/ccsrc/minddata/dataset/engine/serdes.h index ee7e43a7097..962b622c66d 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/serdes.h +++ b/mindspore/ccsrc/minddata/dataset/engine/serdes.h @@ -39,6 +39,7 @@ #include "minddata/dataset/engine/ir/datasetops/repeat_node.h" #include "minddata/dataset/engine/ir/datasetops/shuffle_node.h" #include "minddata/dataset/engine/ir/datasetops/skip_node.h" +#include "minddata/dataset/engine/ir/datasetops/transfer_node.h" #include "minddata/dataset/engine/ir/datasetops/take_node.h" #include "minddata/dataset/engine/ir/datasetops/zip_node.h" @@ -115,8 +116,10 @@ #include "minddata/dataset/kernels/ir/vision/resize_with_bbox_ir.h" #include "minddata/dataset/kernels/ir/vision/rgba_to_bgr_ir.h" #include "minddata/dataset/kernels/ir/vision/rgba_to_rgb_ir.h" +#include "minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.h" #include "minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.h" #include "minddata/dataset/kernels/ir/vision/rotate_ir.h" +#include "minddata/dataset/kernels/ir/vision/slice_patches_ir.h" #include "minddata/dataset/kernels/ir/vision/softdvpp_decode_random_crop_resize_jpeg_ir.h" #include "minddata/dataset/kernels/ir/vision/softdvpp_decode_resize_jpeg_ir.h" #include "minddata/dataset/kernels/ir/vision/swap_red_blue_ir.h" @@ -142,7 +145,7 @@ class Serdes { /// \param[in] filename The file name. If specified, save the generated JSON string into the file /// \param[out] out_json The result json string /// \return Status The status code returned - Status SaveToJSON(std::shared_ptr node, const std::string &filename, nlohmann::json *out_json); + static Status SaveToJSON(std::shared_ptr node, const std::string &filename, nlohmann::json *out_json); /// \brief function to de-serialize JSON file to IR tree /// \param[in] json_filepath input path of json file @@ -150,19 +153,19 @@ class Serdes { /// \return Status The status code returned static Status Deserialize(std::string json_filepath, std::shared_ptr *ds); - protected: - /// \brief Helper function to save JSON to a file - /// \param[in] json_string The JSON string to be saved to the file - /// \param[in] file_name The file name - /// \return Status The status code returned - Status SaveJSONToFile(nlohmann::json json_string, const std::string &file_name); - /// \brief Helper function to construct IR tree, separate zip and other operations /// \param[in] json_obj The JSON object to be deserialized /// \param[out] ds Shared pointer of a DatasetNode object containing the deserialized IR tree /// \return Status The status code returned static Status ConstructPipeline(nlohmann::json json_obj, std::shared_ptr *ds); + protected: + /// \brief Helper function to save JSON to a file + /// \param[in] json_string The JSON string to be saved to the file + /// \param[in] file_name The file name + /// \return Status The status code returned + static Status SaveJSONToFile(nlohmann::json json_string, const std::string &file_name); + /// \brief Function to determine type of the node - dataset node if no dataset exists or operation node /// \param[in] child_ds children datasets that is already created /// \param[in] json_obj json object to read out type of the node @@ -234,6 +237,8 @@ class Serdes { std::shared_ptr *result); static Status CreateSkipOperationNode(std::shared_ptr ds, nlohmann::json json_obj, std::shared_ptr *result); + static Status CreateTransferOperationNode(std::shared_ptr ds, nlohmann::json json_obj, + std::shared_ptr *result); static Status CreateTakeOperationNode(std::shared_ptr ds, nlohmann::json json_obj, std::shared_ptr *result); diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h b/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h index f3cd204996b..e3286e968ab 100644 --- a/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h +++ b/mindspore/ccsrc/minddata/dataset/include/dataset/audio.h @@ -17,10 +17,12 @@ #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_AUDIO_H_ #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_AUDIO_H_ +#include #include #include #include #include + #include "include/api/dual_abi_helper.h" #include "include/api/status.h" #include "minddata/dataset/include/dataset/constants.h" @@ -33,6 +35,20 @@ class TensorOperation; // Transform operations for performing computer audio. namespace audio { +/// \brief Compute the angle of complex tensor input. +class Angle final : public TensorTransform { + public: + /// \brief Constructor. + Angle(); + /// \brief Destructor. + ~Angle() = default; + + protected: + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; +}; + /// \brief Design two-pole band filter. class BandBiquad final : public TensorTransform { public: @@ -56,6 +72,144 @@ class BandBiquad final : public TensorTransform { std::shared_ptr data_; }; +/// \brief Design two-pole allpass filter. Similar to SoX implementation. +class AllpassBiquad final : public TensorTransform { + public: + /// \param[in] sample_rate Sampling rate of the waveform, e.g. 44100 (Hz). + /// \param[in] central_freq Central frequency (in Hz). + /// \param[in] Q https://en.wikipedia.org/wiki/Q_factor (Default: 0.707). + explicit AllpassBiquad(int32_t sample_rate, float central_freq, float Q = 0.707); + + /// \brief Destructor. + ~AllpassBiquad() = default; + + protected: + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + private: + struct Data; + std::shared_ptr data_; +}; + +/// \brief AmplitudeToDB TensorTransform. +/// \notes Turn a tensor from the power/amplitude scale to the decibel scale. +class AmplitudeToDB final : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] stype ['kPower', 'kMagnitude'] + /// \param[in] ref_value Calculate db_multiplier + /// \param[in] amin Clamp the input waveform + /// \param[in] top_db Decibels cut-off value + explicit AmplitudeToDB(ScaleType stype = ScaleType::kPower, float ref_value = 1.0, float amin = 1e-10, + float top_db = 80.0); + + /// \brief Destructor. + ~AmplitudeToDB() = default; + + protected: + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + private: + struct Data; + std::shared_ptr data_; +}; + +/// \brief Design two-pole band-pass filter. +class BandpassBiquad final : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] sample_rate Sampling rate of the waveform, e.g. 44100 (Hz). + /// \param[in] central_freq Central frequency (in Hz). + /// \param[in] Q Quality factor, https://en.wikipedia.org/wiki/Q_factor (Default: 0.707). + /// \param[in] const_skirt_gain, If ``True``, uses a constant skirt gain (peak gain = Q). If ``False``, uses a + /// constant 0dB peak gain. (Default: False). + explicit BandpassBiquad(int32_t sample_rate, float central_freq, float Q = 0.707, bool const_skirt_gain = false); + + /// \brief Destructor. + ~BandpassBiquad() = default; + + protected: + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + private: + struct Data; + std::shared_ptr data_; +}; + +/// \brief Design two-pole band-reject filter. Similar to SoX implementation. +class BandrejectBiquad final : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] sample_rate Sampling rate of the waveform, e.g. 44100 (Hz). + /// \param[in] central_freq Central frequency (in Hz). + /// \param[in] Q Quality factor, https://en.wikipedia.org/wiki/Q_factor (Default: 0.707). + explicit BandrejectBiquad(int32_t sample_rate, float central_freq, float Q = 0.707); + + /// \brief Destructor. + ~BandrejectBiquad() = default; + + protected: + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + private: + struct Data; + std::shared_ptr data_; +}; + +/// \brief Design a bass tone-control effect. +class BassBiquad final : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] sample_rate Sampling rate of the waveform, e.g. 44100 (Hz). + /// \param[in] gain Desired gain at the boost (or attenuation) in dB. + /// \param[in] central_freq Central frequency (in Hz). + /// \param[in] Q https://en.wikipedia.org/wiki/Q_factor (Default: 0.707). + explicit BassBiquad(int32_t sample_rate, float gain, float central_freq = 100, float Q = 0.707); + + /// \brief Destructor. + ~BassBiquad() = default; + + protected: + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + private: + struct Data; + std::shared_ptr data_; +}; + +/// \brief TimeStretch TensorTransform +/// \notes Stretch STFT in time at a given rate, without changing the pitch. +class TimeStretch final : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] hop_length Length of hop between STFT windows. Default: None. + /// \param[in] n_freq Number of filter banks form STFT. Default: 201. + /// \param[in] fixed_rate Rate to speed up or slow down the input in time. Default: None. + explicit TimeStretch(float hop_length = std::numeric_limits::quiet_NaN(), int n_freq = 201, + float fixed_rate = std::numeric_limits::quiet_NaN()); + + /// \brief Destructor. + ~TimeStretch() = default; + + protected: + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + private: + struct Data; + std::shared_ptr data_; +}; } // namespace audio } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h b/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h index 851ca5637e2..7af6fb81267 100644 --- a/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h +++ b/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h @@ -49,6 +49,12 @@ enum class ShuffleMode { kInfile = 3 ///< Shuffle data within each file. }; +/// \brief Possible scale for input audio. +enum class ScaleType { + kMagnitude = 0, ///< Audio scale is magnitude. + kPower = 1, ///< Audio scale is power. +}; + /// \brief The method of padding. enum class BorderType { kConstant = 0, ///< Fill the border with constant values. diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/datasets.h b/mindspore/ccsrc/minddata/dataset/include/dataset/datasets.h index 53e47112da7..90017f22968 100644 --- a/mindspore/ccsrc/minddata/dataset/include/dataset/datasets.h +++ b/mindspore/ccsrc/minddata/dataset/include/dataset/datasets.h @@ -1091,6 +1091,64 @@ inline std::shared_ptr CSV(const std::vector &dataset_f cache); } +class FlickrDataset : public Dataset { + public: + explicit FlickrDataset(const std::vector &dataset_dir, const std::vector &annotation_file, bool decode, + const std::shared_ptr &sampler, const std::shared_ptr &cache); + explicit FlickrDataset(const std::vector &dataset_dir, const std::vector &annotation_file, bool decode, + const Sampler *sampler, const std::shared_ptr &cache); + explicit FlickrDataset(const std::vector &dataset_dir, const std::vector &annotation_file, bool decode, + const std::reference_wrapper sampler, const std::shared_ptr &cache); + ~FlickrDataset() = default; +}; + +/// \brief Function to create a FlickrDataset +/// \notes The generated dataset has two columns ["image", "annotation"] +/// \param[in] dataset_dir The dataset dir to be read +/// \param[in] annotation_file The annotation file to be read +/// \param[in] decode Decode the images after reading (default=false). +/// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not +/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()). +/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). +/// \return Shared pointer to the current FlickrDataset +inline std::shared_ptr Flickr( + const std::string &dataset_dir, const std::string &annotation_file, bool decode = false, + const std::shared_ptr &sampler = std::make_shared(), + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(annotation_file), decode, sampler, + cache); +} + +/// \brief Function to create a FlickrDataset +/// \notes The generated dataset has two columns ["image", "annotation"] +/// \param[in] dataset_dir The dataset dir to be read +/// \param[in] annotation_file The annotation file to be read +/// \param[in] decode Decode the images after reading. +/// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. +/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). +/// \return Shared pointer to the current FlickrDataset +inline std::shared_ptr Flickr(const std::string &dataset_dir, const std::string &annotation_file, + bool decode, const Sampler *sampler, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(annotation_file), decode, sampler, + cache); +} + +/// \brief Function to create a FlickrDataset +/// \notes The generated dataset has two columns ["image", "annotation"] +/// \param[in] dataset_dir The dataset dir to be read +/// \param[in] annotation_file The annotation file to be read +/// \param[in] decode Decode the images after reading. +/// \param[in] sampler Sampler object used to choose samples from the dataset. +/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). +/// \return Shared pointer to the current FlickrDataset +inline std::shared_ptr Flickr(const std::string &dataset_dir, const std::string &annotation_file, + bool decode, const std::reference_wrapper sampler, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(annotation_file), decode, sampler, + cache); +} + class ImageFolderDataset : public Dataset { public: explicit ImageFolderDataset(const std::vector &dataset_dir, bool decode, @@ -1487,6 +1545,60 @@ inline std::shared_ptr operator+(const std::shared_ptr & return std::make_shared(std::vector({datasets1, datasets2})); } +class CmuArcticDataset : public Dataset { + public: + explicit CmuArcticDataset(const std::vector &dataset_dir, const std::vector &usage, + const std::shared_ptr &sampler, const std::shared_ptr &cache); + explicit CmuArcticDataset(const std::vector &dataset_dir, const std::vector &usage, const Sampler *sampler, + const std::shared_ptr &cache); + explicit CmuArcticDataset(const std::vector &dataset_dir, const std::vector &usage, + const std::reference_wrapper sampler, const std::shared_ptr &cache); + ~CmuArcticDataset() = default; +}; + +/// \brief Function to create a CmuArcticDataset. +/// \note The generated dataset has two columns ["audio", "samplerate", "label"]. +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] usage Part of dataset of GTZAN, can be "training", "validation", "testing" or "all" (default = "all"). +/// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not +/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()). +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the CmuArcticDataset. +inline std::shared_ptr CmuArctic(const std::string &dataset_dir, const std::string &usage = "all", + const std::shared_ptr &sampler = std::make_shared(), + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); +} + + +/// \brief Function to create a CmuArcticDataset. +/// \note The generated dataset has two columns ["audio", "samplerate", "label"]. +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] usage Part of dataset of GTZAN, can be "training", "validation", "testing" or "all" (default = "all"). +/// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the CmuArcticDataset. +inline std::shared_ptr CmuArctic(const std::string &dataset_dir, const std::string &usage, + const Sampler *sampler, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); +} + + +/// \brief Function to create a CmuArcticDataset. +/// \note The generated dataset has two columns ["audio", "samplerate", "label"]. +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] usage Part of dataset of GTZAN, can be "training", "validation", "testing" or "all" (default = "all"). +/// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the CmuArcticDataset. +inline std::shared_ptr CmuArctic(const std::string &dataset_dir, const std::string &usage, + const std::reference_wrapper sampler, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); +} + + class RandomDataDataset : public Dataset { public: RandomDataDataset(const int32_t &total_rows, std::shared_ptr schema, @@ -1519,60 +1631,6 @@ std::shared_ptr RandomData(const int32_t &total_rows = 0, con return ds; } -class LibriSpeechDataset : public Dataset { - public: - explicit LibriSpeechDataset(const std::vector &dataset_dir, const std::vector &usage, - const std::shared_ptr &sampler, const std::shared_ptr &cache); - explicit LibriSpeechDataset(const std::vector &dataset_dir, const std::vector &usage, const Sampler *sampler, - const std::shared_ptr &cache); - explicit LibriSpeechDataset(const std::vector &dataset_dir, const std::vector &usage, - const std::reference_wrapper sampler, const std::shared_ptr &cache); - ~LibriSpeechDataset() = default; -}; - -/// \brief Function to create a LibriSpeechDataset. -/// \note The generated dataset has two columns ["audio", "samplerate", "label"]. -/// \param[in] dataset_dir Path to the root directory that contains the dataset. -/// \param[in] usage Part of dataset of LibriSpeech, can be "training", "validation", "testing" or "all" (default = "all"). -/// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not -/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()). -/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). -/// \return Shared pointer to the LibriSpeechDataset. -inline std::shared_ptr LibriSpeech(const std::string &dataset_dir, const std::string &usage = "all", - const std::shared_ptr &sampler = std::make_shared(), - const std::shared_ptr &cache = nullptr) { - return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); -} - - -/// \brief Function to create a LibriSpeechDataset. -/// \note The generated dataset has two columns ["audio", "samplerate", "label"]. -/// \param[in] dataset_dir Path to the root directory that contains the dataset. -/// \param[in] usage Part of dataset of LibriSpeech, can be "training", "validation", "testing" or "all" (default = "all"). -/// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. -/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). -/// \return Shared pointer to the LibriSpeechDataset. -inline std::shared_ptr LibriSpeech(const std::string &dataset_dir, const std::string &usage, - const Sampler *sampler, - const std::shared_ptr &cache = nullptr) { - return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); -} - - -/// \brief Function to create a LibriSpeechDataset. -/// \note The generated dataset has two columns ["audio", "samplerate", "label"]. -/// \param[in] dataset_dir Path to the root directory that contains the dataset. -/// \param[in] usage Part of dataset of LibriSpeech, can be "training", "validation", "testing" or "all" (default = "all"). -/// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. -/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). -/// \return Shared pointer to the LibriSpeechDataset. -inline std::shared_ptr LibriSpeech(const std::string &dataset_dir, const std::string &usage, - const std::reference_wrapper sampler, - const std::shared_ptr &cache = nullptr) { - return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); -} - - class TextFileDataset : public Dataset { public: explicit TextFileDataset(const std::vector> &dataset_files, int64_t num_samples, diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/samplers.h b/mindspore/ccsrc/minddata/dataset/include/dataset/samplers.h index 5eb5ca2eec0..8bb241fdb3a 100644 --- a/mindspore/ccsrc/minddata/dataset/include/dataset/samplers.h +++ b/mindspore/ccsrc/minddata/dataset/include/dataset/samplers.h @@ -37,11 +37,12 @@ class Sampler : std::enable_shared_from_this { friend class CLUEDataset; friend class CocoDataset; friend class CSVDataset; + friend class FlickrDataset; friend class ImageFolderDataset; friend class ManifestDataset; - friend class LibriSpeechDataset; friend class MindDataDataset; friend class MnistDataset; + friend class CmuArcticDataset; friend class RandomDataDataset; friend class TextFileDataset; friend class TFRecordDataset; diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/vision.h b/mindspore/ccsrc/minddata/dataset/include/dataset/vision.h index 0af4b98efbf..c16b6e9e22b 100644 --- a/mindspore/ccsrc/minddata/dataset/include/dataset/vision.h +++ b/mindspore/ccsrc/minddata/dataset/include/dataset/vision.h @@ -57,7 +57,31 @@ class AutoContrast final : public TensorTransform { std::shared_ptr data_; }; -/// \brief Apply a given image transform on a random selection of bounding box regions of a given image. +/// \brief AdjustGamma TensorTransform. +/// \notes Apply gamma correction on input image. +class AdjustGamma final : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] gamma Non negative real number, which makes the output image pixel value + /// exponential in relation to the input image pixel value. + /// \param[in] gain The constant multiplier. + explicit AdjustGamma(float gamma, float gain = 1); + + /// \brief Destructor. + ~AdjustGamma() = default; + + protected: + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + private: + struct Data; + std::shared_ptr data_; +}; + +/// \brief BoundingBoxAugment TensorTransform. +/// \note Apply a given image transform on a random selection of bounding box regions of a given image. class BoundingBoxAugment final : public TensorTransform { public: /// \brief Constructor. diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt index 8ef4bf82d87..47c4c2c8f71 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/CMakeLists.txt @@ -6,6 +6,7 @@ if(ENABLE_ACL) add_subdirectory(dvpp) endif() add_library(kernels-image OBJECT + adjust_gamma_op.cc affine_op.cc auto_contrast_op.cc bounding_box.cc diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/adjust_gamma_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/adjust_gamma_op.cc index 338d257d547..3698482fe56 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/adjust_gamma_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/adjust_gamma_op.cc @@ -15,21 +15,21 @@ */ #include "minddata/dataset/kernels/image/adjust_gamma_op.h" - +#include #include "minddata/dataset/kernels/data/data_utils.h" #include "minddata/dataset/kernels/image/image_utils.h" namespace mindspore { namespace dataset { -constexpr float AdjustGammaOp::kGain = 1.0; +const float AdjustGammaOp::kGain = 1.0; Status AdjustGammaOp::Compute(const std::shared_ptr &input, std::shared_ptr *output) { IO_CHECK(input, output); // typecast CHECK_FAIL_RETURN_UNEXPECTED(input->type() != DataType::DE_STRING, - "AdjustGamma: input tensor type should be int, float or double, but got: string."); + "AdjustGamma: input tensor type should be [int, float, double], but got string."); if (input->type().IsFloat()) { std::shared_ptr input_tensor; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/crop_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/crop_op.cc index 389452da4e3..46939c4aa32 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/crop_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/crop_op.cc @@ -41,9 +41,15 @@ Status CropOp::OutputShape(const std::vector &inputs, std::vector &inputs, std::vect outputs.clear(); TensorShape in = inputs[0]; TensorShape out = TensorShape{in[2], in[0], in[1]}; - if (inputs[0].Rank() == 3) outputs.emplace_back(out); - if (!outputs.empty()) return Status::OK(); + if (inputs[0].Rank() == 3) { + (void)outputs.emplace_back(out); + } + if (!outputs.empty()) { + return Status::OK(); + } return Status( StatusCode::kMDUnexpectedError, "HWC2CHW: invalid input shape, expected 3D input, but got input dimension is:" + std::to_string(inputs[0].Rank())); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc index 0478382e27c..1c3b7e35b1a 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.cc @@ -189,7 +189,7 @@ Status DecodeCv(const std::shared_ptr &input, std::shared_ptr *o } cv::cvtColor(img_mat, img_mat, static_cast(cv::COLOR_BGR2RGB)); std::shared_ptr output_cv; - RETURN_IF_NOT_OK(CVTensor::CreateFromMat(img_mat, &output_cv)); + RETURN_IF_NOT_OK(CVTensor::CreateFromMat(img_mat, 3, &output_cv)); *output = std::static_pointer_cast(output_cv); return Status::OK(); } catch (const cv::Exception &e) { @@ -600,7 +600,7 @@ Status CropAndResize(const std::shared_ptr &input, std::shared_ptr input_image; - RETURN_IF_NOT_OK(CVTensor::CreateFromMat(input_roi, &input_image)); + RETURN_IF_NOT_OK(CVTensor::CreateFromMat(input_roi, input_cv->Rank(), &input_image)); LiteMat imIn, imOut; std::shared_ptr output_tensor; TensorShape new_shape = TensorShape({target_height, target_width, 3}); @@ -676,7 +676,7 @@ Status Rotate(const std::shared_ptr &input, std::shared_ptr *out // use memcpy and don't compute the new shape since openCV has a rounding problem cv::warpAffine(input_img, output_img, rot, bbox.size(), GetCVInterpolationMode(interpolation), cv::BORDER_CONSTANT, fill_color); - RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_img, &output_cv)); + RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_img, input_cv->Rank(), &output_cv)); RETURN_UNEXPECTED_IF_NULL(output_cv); } *output = std::static_pointer_cast(output_cv); @@ -872,6 +872,64 @@ Status AdjustContrast(const std::shared_ptr &input, std::shared_ptr &input, std::shared_ptr *output, const float &gamma, + const float &gain) { + try { + int num_channels = 1; + if (input->Rank() < 2) { + RETURN_STATUS_UNEXPECTED("AdjustGamma: image shape is not <...,H,W,C> or ."); + } + if (input->Rank() > 2) { + num_channels = input->shape()[-1]; + } + if (num_channels != 1 && num_channels != 3) { + RETURN_STATUS_UNEXPECTED("AdjustGamma: channel of input image should be 1 or 3."); + } + if (input->type().IsFloat()) { + for (auto itr = input->begin(); itr != input->end(); itr++) { + *itr = pow((*itr) * gain, gamma); + *itr = std::min(std::max((*itr), 0.0f), 1.0f); + } + *output = input; + + } else { + std::shared_ptr input_cv = CVTensor::AsCVTensor(input); + if (!input_cv->mat().data) { + RETURN_STATUS_UNEXPECTED("AdjustGamma: load image failed."); + } + cv::Mat input_img = input_cv->mat(); + std::shared_ptr output_cv; + RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv)); + uchar LUT[256] = {}; + for (int i = 0; i < 256; i++) { + float f = i / 255.0; + f = pow(f, gamma); + LUT[i] = static_cast(floor(std::min(f * (255.0 + 1 - 1e-3) * gain, 255.0))); + } + if (input_img.channels() == 1) { + cv::MatIterator_ it = input_img.begin(); + cv::MatIterator_ it_end = input_img.end(); + for (; it != it_end; ++it) { + *it = LUT[(*it)]; + } + } else { + cv::MatIterator_ it = input_img.begin(); + cv::MatIterator_ it_end = input_img.end(); + for (; it != it_end; ++it) { + (*it)[0] = LUT[(*it)[0]]; + (*it)[1] = LUT[(*it)[1]]; + (*it)[2] = LUT[(*it)[2]]; + } + } + output_cv->mat() = input_img * 1; + *output = std::static_pointer_cast(output_cv); + } + } catch (const cv::Exception &e) { + RETURN_STATUS_UNEXPECTED("AdjustGamma: " + std::string(e.what())); + } + return Status::OK(); +} + Status AutoContrast(const std::shared_ptr &input, std::shared_ptr *output, const float &cutoff, const std::vector &ignore) { try { @@ -941,7 +999,7 @@ Status AutoContrast(const std::shared_ptr &input, std::shared_ptrmat().type()); std::shared_ptr output_cv; - RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, &output_cv)); + RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, input_cv->Rank(), &output_cv)); (*output) = std::static_pointer_cast(output_cv); RETURN_IF_NOT_OK((*output)->Reshape(input_cv->shape())); } catch (const cv::Exception &e) { @@ -1042,7 +1100,7 @@ Status Equalize(const std::shared_ptr &input, std::shared_ptr *o cv::Mat result; cv::merge(image_result, result); std::shared_ptr output_cv; - RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, &output_cv)); + RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, input_cv->Rank(), &output_cv)); (*output) = std::static_pointer_cast(output_cv); RETURN_IF_NOT_OK((*output)->Reshape(input_cv->shape())); } catch (const cv::Exception &e) { @@ -1138,7 +1196,7 @@ Status Pad(const std::shared_ptr &input, std::shared_ptr *output cv::copyMakeBorder(input_cv->mat(), out_image, pad_top, pad_bottom, pad_left, pad_right, b_type); } std::shared_ptr output_cv; - RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_image, &output_cv)); + RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_image, input_cv->Rank(), &output_cv)); // pad the dimension if shape information is only 2 dimensional, this is grayscale int num_channels = input_cv->shape()[CHANNEL_INDEX]; if (input_cv->Rank() == DEFAULT_IMAGE_RANK && num_channels == MIN_IMAGE_CHANNELS && @@ -1283,7 +1341,7 @@ Status GaussianBlur(const std::shared_ptr &input, std::shared_ptrmat(), output_cv_mat, cv::Size(kernel_x, kernel_y), static_cast(sigma_x), static_cast(sigma_y)); std::shared_ptr output_cv; - RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_cv_mat, &output_cv)); + RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_cv_mat, input_cv->Rank(), &output_cv)); (*output) = std::static_pointer_cast(output_cv); return Status::OK(); } catch (const cv::Exception &e) { @@ -1356,8 +1414,9 @@ Status SlicePatches(const std::shared_ptr &input, std::vector patch_cv; - cv::Rect patch(j * patch_w, i * patch_h, patch_w, patch_h); - RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_img(patch), &patch_cv)); + cv::Rect rect(j * patch_w, i * patch_h, patch_w, patch_h); + cv::Mat patch(out_img(rect)); + RETURN_IF_NOT_OK(CVTensor::CreateFromMat(patch, input_cv->Rank(), &patch_cv)); (*output).push_back(std::static_pointer_cast(patch_cv)); } } diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.h b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.h index a26671db498..6886f274bbd 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/image_utils.h @@ -234,6 +234,16 @@ Status AdjustContrast(const std::shared_ptr &input, std::shared_ptr &input, std::shared_ptr *output, const float &cutoff, const std::vector &ignore); +/// \brief Returns image with gamma correction. +/// \param[in] input: Tensor of shape // in RGB/Grayscale and any OpenCV compatible type, +/// see CVTensor. +/// \param[in] gamma: Non negative real number, same as gamma in the equation. gamma larger than 1 make the shadows +/// darker, while gamma smaller than 1 make dark regions lighter. +/// \param[in] gain: The constant multiplier. +/// \param[out] output: Adjusted image of same shape and type. +Status AdjustGamma(const std::shared_ptr &input, std::shared_ptr *output, const float &gamma, + const float &gain); + /// \brief Returns image with adjusted saturation. /// \param input: Tensor of shape in RGB order and any OpenCv compatible type, see CVTensor. /// \param alpha: Alpha value to adjust saturation by. Should be a positive number. diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc index d10828c579c..3e1c6f6fe49 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc @@ -1015,7 +1015,7 @@ std::vector> GetDefaultBoxes(BoxesConfig config) { } scales.push_back(1.0f); std::vector> default_boxes; - for (int i = 0; i < config.feature_size.size(); i++) { + for (auto i = 0; i < config.feature_size.size(); i++) { float sk1 = scales[i]; float sk2 = scales[i + 1]; float sk3 = sqrt(sk1 * sk2); @@ -1069,10 +1069,10 @@ void ConvertBoxes(std::vector> &boxes, const std::vector ApplyNms(const std::vector> &all_boxes, std::vector &all_scores, float thres, int max_boxes) { - int boxes_num = all_boxes.size(); + size_t boxes_num = all_boxes.size(); std::vector areas(boxes_num); std::vector order(boxes_num); - for (int i = 0; i < boxes_num; i++) { + for (auto i = 0; i < boxes_num; i++) { if (all_boxes[i].size() < 4) { return {}; } diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/warp_affine.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/warp_affine.cc index 1099941bffb..2ec3fb0fed3 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/warp_affine.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/warp_affine.cc @@ -410,7 +410,7 @@ bool WarpAffineBilinear(const LiteMat &src, LiteMat &dst, const LiteMat &M, int int *a = &_a[0], *b = a + dst.width_; const int SCALE = 1 << 10; const int B_SIZE = 64; - int16_t WH[B_SIZE * B_SIZE * 2]; + int16_t *WH = new int16_t[B_SIZE * B_SIZE * 2]; int16_t A_Ptr[B_SIZE * B_SIZE]; int r_delta = SCALE / kTabSz / 2; int x, y, x1, y1; @@ -449,7 +449,7 @@ bool WarpAffineBilinear(const LiteMat &src, LiteMat &dst, const LiteMat &M, int Remap(src, lite_part, _HW, _matA, borderType, borderValue); } } - + delete[] WH; delete[] _a; return true; } diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/posterize_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/posterize_op.cc index de4c4ab5c07..9757ee1c5a3 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/posterize_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/posterize_op.cc @@ -46,7 +46,8 @@ Status PosterizeOp::Compute(const std::shared_ptr &input, std::shared_pt input->type().ToString()); cv::LUT(in_image, lut_vector, output_img); std::shared_ptr result_tensor; - RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_img, &result_tensor)); + + RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_img, input_cv->Rank(), &result_tensor)); *output = std::static_pointer_cast(result_tensor); return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/random_color_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_color_op.cc index 5d1088a80bf..3a7bb7610be 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/random_color_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_color_op.cc @@ -46,7 +46,7 @@ Status RandomColorOp::Compute(const std::shared_ptr &in, std::shared_ptr cv::Mat cv_out; cv::merge(temp, 3, cv_out); std::shared_ptr cvt_out; - RETURN_IF_NOT_OK(CVTensor::CreateFromMat(cv_out, &cvt_out)); + RETURN_IF_NOT_OK(CVTensor::CreateFromMat(cv_out, cvt_in->Rank(), &cvt_out)); if (abs(t - 0.0) < eps) { // return grayscale *out = std::static_pointer_cast(cvt_out); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.cc index b8fc8ef866d..33d209f37a5 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_and_resize_op.cc @@ -61,9 +61,15 @@ Status RandomCropAndResizeOp::OutputShape(const std::vector &inputs RETURN_IF_NOT_OK(TensorOp::OutputShape(inputs, outputs)); outputs.clear(); TensorShape out = TensorShape{target_height_, target_width_}; - if (inputs[0].Rank() == 2) outputs.emplace_back(out); - if (inputs[0].Rank() == 3) outputs.emplace_back(out.AppendDim(inputs[0][2])); - if (!outputs.empty()) return Status::OK(); + if (inputs[0].Rank() == 2) { + (void)outputs.emplace_back(out); + } + if (inputs[0].Rank() == 3) { + (void)outputs.emplace_back(out.AppendDim(inputs[0][2])); + } + if (!outputs.empty()) { + return Status::OK(); + } return Status(StatusCode::kMDUnexpectedError, "RandomCropAndResize: invalid input shape"); } Status RandomCropAndResizeOp::GetCropBox(int h_in, int w_in, int *x, int *y, int *crop_height, int *crop_width) { diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.cc index e69fc2ab8b7..561e28b0262 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/random_crop_op.cc @@ -143,9 +143,15 @@ Status RandomCropOp::OutputShape(const std::vector &inputs, std::ve RETURN_IF_NOT_OK(TensorOp::OutputShape(inputs, outputs)); outputs.clear(); TensorShape out = TensorShape{crop_height_, crop_width_}; - if (inputs[0].Rank() == 2) outputs.emplace_back(out); - if (inputs[0].Rank() == 3) outputs.emplace_back(out.AppendDim(inputs[0][2])); - if (!outputs.empty()) return Status::OK(); + if (inputs[0].Rank() == 2) { + (void)outputs.emplace_back(out); + } + if (inputs[0].Rank() == 3) { + (void)outputs.emplace_back(out.AppendDim(inputs[0][2])); + } + if (!outputs.empty()) { + return Status::OK(); + } return Status(StatusCode::kMDUnexpectedError, "RandomCrop: invalid input shape, expected 2D or 3D input, but got input dimension is:" + std::to_string(inputs[0].Rank())); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.cc index 62614b89c10..9e06072fc23 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_op.cc @@ -61,9 +61,15 @@ Status ResizeOp::OutputShape(const std::vector &inputs, std::vector outputW = size2_; } TensorShape out = TensorShape{outputH, outputW}; - if (inputs[0].Rank() == 2) outputs.emplace_back(out); - if (inputs[0].Rank() == 3) outputs.emplace_back(out.AppendDim(inputs[0][2])); - if (!outputs.empty()) return Status::OK(); + if (inputs[0].Rank() == 2) { + (void)outputs.emplace_back(out); + } + if (inputs[0].Rank() == 3) { + (void)outputs.emplace_back(out.AppendDim(inputs[0][2])); + } + if (!outputs.empty()) { + return Status::OK(); + } return Status(StatusCode::kMDUnexpectedError, "Resize: invalid input wrong shape."); } } // namespace dataset diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/sharpness_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/sharpness_op.cc index 8dd690d2c25..b24359089ac 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/sharpness_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/sharpness_op.cc @@ -63,7 +63,7 @@ Status SharpnessOp::Compute(const std::shared_ptr &input, std::shared_pt cv::addWeighted(input_img, alpha_, result, 1.0 - alpha_, 0.0, result); std::shared_ptr output_cv; - RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, &output_cv)); + RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, input_cv->Rank(), &output_cv)); RETURN_UNEXPECTED_IF_NULL(output_cv); *output = std::static_pointer_cast(output_cv); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.cc index b54d15dd0cf..237dc590dcc 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.cc @@ -74,7 +74,8 @@ Status SoftDvppDecodeRandomCropResizeJpegOp::Compute(const std::shared_ptr cv_tensor = nullptr; - RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_rgb_img, &cv_tensor)); + + RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_rgb_img, 3, &cv_tensor)); *output = std::static_pointer_cast(cv_tensor); } catch (const cv::Exception &e) { std::string error = "SoftDvppDecodeRandomCropResizeJpeg:" + std::string(e.what()); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.cc index 0a8687d352c..211d706bf51 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.cc @@ -66,7 +66,8 @@ Status SoftDvppDecodeResizeJpegOp::Compute(const std::shared_ptr &input, error_info += std::to_string(ret) + ", please check the log information for more details."; CHECK_FAIL_RETURN_UNEXPECTED(ret == 0, error_info); std::shared_ptr cv_tensor = nullptr; - RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_rgb_img, &cv_tensor)); + + RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_rgb_img, 3, &cv_tensor)); *output = std::static_pointer_cast(cv_tensor); } catch (const cv::Exception &e) { std::string error = "SoftDvppDecodeResizeJpeg:" + std::string(e.what()); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/solarize_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/solarize_op.cc index e8ee2c85cb6..a8762e1af8a 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/solarize_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/solarize_op.cc @@ -41,7 +41,7 @@ Status SolarizeOp::Compute(const std::shared_ptr &input, std::shared_ptr std::shared_ptr mask_mat_tensor; std::shared_ptr output_cv_tensor; - RETURN_IF_NOT_OK(CVTensor::CreateFromMat(input_cv->mat(), &mask_mat_tensor)); + RETURN_IF_NOT_OK(CVTensor::CreateFromMat(input_img, input_cv->Rank(), &mask_mat_tensor)); RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv_tensor)); RETURN_UNEXPECTED_IF_NULL(mask_mat_tensor); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.h index 435876ad947..8fa83efa91c 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.h @@ -49,8 +49,8 @@ class UniformAugOp : public TensorOp { std::string Name() const override { return kUniformAugOp; } private: - int32_t num_ops_; std::vector> tensor_op_list_; + int32_t num_ops_; std::mt19937 rnd_; }; } // namespace dataset diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc index 26542868c9a..ffb398c61ac 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc @@ -135,6 +135,13 @@ Status FillOperation::to_json(nlohmann::json *out_json) { return Status::OK(); } +Status FillOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { + std::shared_ptr fill_value; + RETURN_IF_NOT_OK(Tensor::from_json(op_params, &fill_value)); + *operation = std::make_shared(fill_value); + return Status::OK(); +} + // MaskOperation MaskOperation::MaskOperation(RelationalOp op, const std::shared_ptr &constant, DataType dtype) : op_(op), constant_(constant), dtype_(dtype) {} @@ -173,6 +180,13 @@ Status OneHotOperation::to_json(nlohmann::json *out_json) { return Status::OK(); } +Status OneHotOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("num_classes") != op_params.end(), "Failed tofind num_classes"); + int32_t num_classes = op_params["num_classes"]; + *operation = std::make_shared(num_classes); + return Status::OK(); +} + #ifndef ENABLE_ANDROID // PadEndOperation PadEndOperation::PadEndOperation(const TensorShape &pad_shape, const std::shared_ptr &pad_value) @@ -273,6 +287,13 @@ Status TypeCastOperation::to_json(nlohmann::json *out_json) { return Status::OK(); } +Status TypeCastOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("data_type") != op_params.end(), "Failed tofind data_type"); + std::string data_type = op_params["data_type"]; + *operation = std::make_shared(data_type); + return Status::OK(); +} + #ifndef ENABLE_ANDROID // UniqueOperation Status UniqueOperation::ValidateParams() { return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.h index f0c060529e8..f4be1173d6a 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.h @@ -27,6 +27,10 @@ namespace mindspore { namespace dataset { + +// Transform operations for performing data transformation. +namespace transforms { + // Char arrays storing name of corresponding classes (in alphabetical order) constexpr char kComposeOperation[] = "Compose"; constexpr char kConcatenateOperation[] = "Concatenate"; @@ -42,9 +46,6 @@ constexpr char kRandomChoiceOperation[] = "RandomChoice"; constexpr char kTypeCastOperation[] = "TypeCast"; constexpr char kUniqueOperation[] = "Unique"; constexpr char kPluginOperation[] = "Plugin"; - -// Transform operations for performing data transformation. -namespace transforms { /* ####################################### Derived TensorOperation classes ################################# */ class ComposeOperation : public TensorOperation { @@ -109,6 +110,8 @@ class FillOperation : public TensorOperation { Status to_json(nlohmann::json *out_json) override; + static Status from_json(nlohmann::json op_params, std::shared_ptr *operation); + private: std::shared_ptr fill_value_; }; @@ -145,6 +148,8 @@ class OneHotOperation : public TensorOperation { Status to_json(nlohmann::json *out_json) override; + static Status from_json(nlohmann::json op_params, std::shared_ptr *operation); + private: int32_t num_classes_; }; @@ -248,6 +253,8 @@ class TypeCastOperation : public TensorOperation { Status to_json(nlohmann::json *out_json) override; + static Status from_json(nlohmann::json op_params, std::shared_ptr *operation); + private: DataType data_type_; }; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.cc index 668337777c2..0bc911024f9 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.cc @@ -38,6 +38,11 @@ Status ValidateFloatScalarPositive(const std::string &op_name, const std::string return Status::OK(); } +Status ValidateFloatScalarNonNegative(const std::string &op_name, const std::string &scalar_name, float scalar) { + RETURN_IF_NOT_OK(ValidateScalar(op_name, scalar_name, scalar, {0}, false)); + return Status::OK(); +} + Status ValidateVectorFillvalue(const std::string &op_name, const std::vector &fill_value) { if (fill_value.empty() || (fill_value.size() != 1 && fill_value.size() != 3)) { std::string err_msg = diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.h index d420377bb0e..72bbaf570e3 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/validators.h @@ -36,6 +36,9 @@ Status ValidateIntScalarPositive(const std::string &op_name, const std::string & // Helper function to positive float scalar Status ValidateFloatScalarPositive(const std::string &op_name, const std::string &scalar_name, float scalar); +// Helper function to non-negative float scalar +Status ValidateFloatScalarNonNegative(const std::string &op_name, const std::string &scalar_name, float scalar); + // Helper function to validate scalar template Status ValidateScalar(const std::string &op_name, const std::string &scalar_name, const T scalar, diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/CMakeLists.txt b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/CMakeLists.txt index d46a9bfe52b..7a241b89ed3 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/CMakeLists.txt +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/CMakeLists.txt @@ -2,6 +2,7 @@ file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc" set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD) set(DATASET_KERNELS_IR_VISION_SRC_FILES + adjust_gamma_ir.cc affine_ir.cc auto_contrast_ir.cc bounding_box_augment_ir.cc diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/adjust_gamma_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/adjust_gamma_ir.cc index 52c75289141..8b81888f965 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/adjust_gamma_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/adjust_gamma_ir.cc @@ -13,6 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include + #include "minddata/dataset/kernels/ir/vision/adjust_gamma_ir.h" #ifndef ENABLE_ANDROID diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/affine_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/affine_ir.cc index 30fc14dce81..cc05c637bb3 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/affine_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/affine_ir.cc @@ -82,12 +82,12 @@ Status AffineOperation::to_json(nlohmann::json *out_json) { } Status AffineOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Fail to find degrees"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("translate") != op_params.end(), "Fail to find translate"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Fail to find scale"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("shear") != op_params.end(), "Fail to find shear"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Fail to find resample"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Fail to find fill_value"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Failed to find degrees"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("translate") != op_params.end(), "Failed to find translate"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Failed to find scale"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("shear") != op_params.end(), "Failed to find shear"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Failed to find resample"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value"); float_t degrees = op_params["degrees"]; std::vector translation = op_params["translate"]; float scale = op_params["scale"]; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/auto_contrast_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/auto_contrast_ir.cc index 8cf5bcb36cc..93c7cdfd589 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/auto_contrast_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/auto_contrast_ir.cc @@ -68,8 +68,8 @@ Status AutoContrastOperation::to_json(nlohmann::json *out_json) { } Status AutoContrastOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("cutoff") != op_params.end(), "Fail to find cutoff"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ignore") != op_params.end(), "Fail to find ignore"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("cutoff") != op_params.end(), "Failed to find cutoff"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ignore") != op_params.end(), "Failed to find ignore"); float cutoff = op_params["cutoff"]; std::vector ignore = op_params["ignore"]; *operation = std::make_shared(cutoff, ignore); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/center_crop_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/center_crop_ir.cc index 00b4d72cb3e..174c1bf9dbd 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/center_crop_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/center_crop_ir.cc @@ -55,7 +55,7 @@ Status CenterCropOperation::to_json(nlohmann::json *out_json) { } Status CenterCropOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size"); std::vector size = op_params["size"]; *operation = std::make_shared(size); return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.cc index e46d6682383..db5ad3478cf 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.cc @@ -63,6 +63,21 @@ std::shared_ptr CropOperation::Build() { std::shared_ptr tensor_op = std::make_shared(y, x, height, width); return tensor_op; } + +Status CropOperation::to_json(nlohmann::json *out_json) { + (*out_json)["coordinates"] = coordinates_; + (*out_json)["size"] = size_; + return Status::OK(); +} + +Status CropOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("coordinates") != op_params.end(), "Failed to find coordinates"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size"); + std::vector coordinates = op_params["coordinates"]; + std::vector size = op_params["size"]; + *operation = std::make_shared(coordinates, size); + return Status::OK(); +} } // namespace vision } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.h index 21388f9f301..170323c0c9c 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/crop_ir.h @@ -47,6 +47,10 @@ class CropOperation : public TensorOperation { std::string Name() const override; + Status to_json(nlohmann::json *out_json) override; + + static Status from_json(nlohmann::json op_params, std::shared_ptr *operation); + private: std::vector coordinates_; std::vector size_; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutmix_batch_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutmix_batch_ir.cc index a4adfa0d8bd..49df9682d66 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutmix_batch_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutmix_batch_ir.cc @@ -57,9 +57,9 @@ Status CutMixBatchOperation::to_json(nlohmann::json *out_json) { Status CutMixBatchOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("image_batch_format") != op_params.end(), - "Fail to find image_batch_format"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("alpha") != op_params.end(), "Fail to find alpha"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Fail to find prob"); + "Failed to find image_batch_format"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("alpha") != op_params.end(), "Failed to find alpha"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Failed to find prob"); ImageBatchFormat image_batch = static_cast(op_params["image_batch_format"]); float alpha = op_params["alpha"]; float prob = op_params["prob"]; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutout_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutout_ir.cc index 1b8944fc8bd..50ba03f1d88 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutout_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/cutout_ir.cc @@ -53,8 +53,8 @@ Status CutOutOperation::to_json(nlohmann::json *out_json) { } Status CutOutOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("length") != op_params.end(), "Fail to find length"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("num_patches") != op_params.end(), "Fail to find num_patches"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("length") != op_params.end(), "Failed to find length"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("num_patches") != op_params.end(), "Failed to find num_patches"); int32_t length = op_params["length"]; int32_t num_patches = op_params["num_patches"]; *operation = std::make_shared(length, num_patches); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/decode_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/decode_ir.cc index d4c478cf3d2..cbc457ed167 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/decode_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/decode_ir.cc @@ -40,7 +40,7 @@ Status DecodeOperation::to_json(nlohmann::json *out_json) { return Status::OK(); } Status DecodeOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("rgb") != op_params.end(), "Fail to find rgb"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("rgb") != op_params.end(), "Failed to find rgb"); bool rgb = op_params["rgb"]; *operation = std::make_shared(rgb); return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/gaussian_blur_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/gaussian_blur_ir.cc index b45d8c7d473..88eaaed382b 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/gaussian_blur_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/gaussian_blur_ir.cc @@ -65,8 +65,8 @@ Status GaussianBlurOperation::to_json(nlohmann::json *out_json) { } Status GaussianBlurOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("kernel_size") != op_params.end(), "Fail to find kernel_size"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("sigma") != op_params.end(), "Fail to find sigma"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("kernel_size") != op_params.end(), "Failed to find kernel_size"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("sigma") != op_params.end(), "Failed to find sigma"); std::vector kernel_size = op_params["kernel_size"]; std::vector sigma = op_params["sigma"]; *operation = std::make_shared(kernel_size, sigma); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/mixup_batch_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/mixup_batch_ir.cc index 56e8e72878b..fb23c57d20c 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/mixup_batch_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/mixup_batch_ir.cc @@ -47,7 +47,7 @@ Status MixUpBatchOperation::to_json(nlohmann::json *out_json) { } Status MixUpBatchOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("alpha") != op_params.end(), "Fail to find alpha"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("alpha") != op_params.end(), "Failed to find alpha"); float alpha = op_params["alpha"]; *operation = std::make_shared(alpha); return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/normalize_pad_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/normalize_pad_ir.cc index 8095036afb0..7e9b62f0799 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/normalize_pad_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/normalize_pad_ir.cc @@ -64,9 +64,9 @@ Status NormalizePadOperation::to_json(nlohmann::json *out_json) { } Status NormalizePadOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("mean") != op_params.end(), "Fail to find mean"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("std") != op_params.end(), "Fail to find std"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("dtype") != op_params.end(), "Fail to find dtype"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("mean") != op_params.end(), "Failed to find mean"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("std") != op_params.end(), "Failed to find std"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("dtype") != op_params.end(), "Failed to find dtype"); std::vector mean = op_params["mean"]; std::vector std = op_params["std"]; std::string dtype = op_params["dtype"]; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/pad_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/pad_ir.cc index 5cf7a2ff386..3e5499b41db 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/pad_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/pad_ir.cc @@ -99,9 +99,9 @@ Status PadOperation::to_json(nlohmann::json *out_json) { } Status PadOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding") != op_params.end(), "Fail to find padding"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Fail to find fill_value"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding_mode") != op_params.end(), "Fail to find padding_mode"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding") != op_params.end(), "Failed to find padding"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding_mode") != op_params.end(), "Failed to find padding_mode"); std::vector padding = op_params["padding"]; std::vector fill_value = op_params["fill_value"]; BorderType padding_mode = static_cast(op_params["padding_mode"]); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_affine_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_affine_ir.cc index c77707ff7f5..2c4fc91eedb 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_affine_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_affine_ir.cc @@ -24,7 +24,6 @@ namespace mindspore { namespace dataset { namespace vision { - constexpr size_t dimension_zero = 0; constexpr size_t dimension_one = 1; constexpr size_t dimension_two = 2; @@ -157,12 +156,12 @@ Status RandomAffineOperation::to_json(nlohmann::json *out_json) { } Status RandomAffineOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Fail to find degrees"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("translate") != op_params.end(), "Fail to find translate"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Fail to find scale"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("shear") != op_params.end(), "Fail to find shear"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Fail to find resample"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Fail to find fill_value"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Failed to find degrees"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("translate") != op_params.end(), "Failed to find translate"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Failed to find scale"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("shear") != op_params.end(), "Failed to find shear"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Failed to find resample"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value"); std::vector degrees = op_params["degrees"]; std::vector translate_range = op_params["translate"]; std::vector scale_range = op_params["scale"]; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_adjust_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_adjust_ir.cc index 53d99f00034..f8e38289b92 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_adjust_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_adjust_ir.cc @@ -26,7 +26,6 @@ namespace mindspore { namespace dataset { namespace vision { - constexpr size_t dimension_zero = 0; constexpr size_t dimension_one = 1; constexpr size_t size_two = 2; @@ -96,10 +95,10 @@ Status RandomColorAdjustOperation::to_json(nlohmann::json *out_json) { } Status RandomColorAdjustOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("brightness") != op_params.end(), "Fail to find brightness"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("contrast") != op_params.end(), "Fail to find contrast"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("saturation") != op_params.end(), "Fail to find saturation"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("hue") != op_params.end(), "Fail to find hue"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("brightness") != op_params.end(), "Failed to find brightness"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("contrast") != op_params.end(), "Failed to find contrast"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("saturation") != op_params.end(), "Failed to find saturation"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("hue") != op_params.end(), "Failed to find hue"); std::vector brightness = op_params["brightness"]; std::vector contrast = op_params["contrast"]; std::vector saturation = op_params["saturation"]; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_ir.cc index d70e4715b22..384945c985a 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_color_ir.cc @@ -64,7 +64,7 @@ Status RandomColorOperation::to_json(nlohmann::json *out_json) { } Status RandomColorOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Fail to find degrees"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Failed to find degrees"); std::vector degrees = op_params["degrees"]; CHECK_FAIL_RETURN_UNEXPECTED(degrees.size() == 2, "The number of degrees should be 2"); float t_lb = degrees[0]; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_decode_resize_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_decode_resize_ir.cc index d2008c0018b..e9d2337662f 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_decode_resize_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_decode_resize_ir.cc @@ -79,11 +79,11 @@ Status RandomCropDecodeResizeOperation::to_json(nlohmann::json *out_json) { Status RandomCropDecodeResizeOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Fail to find scale"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Fail to find ratio"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Fail to find interpolation"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Fail to find max_attempts"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Failed to find scale"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Failed to find ratio"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Failed to find interpolation"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Failed to find max_attempts"); std::vector size = op_params["size"]; std::vector scale = op_params["scale"]; std::vector ratio = op_params["ratio"]; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_ir.cc index 3dc38d3eec7..19611028949 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_ir.cc @@ -119,11 +119,11 @@ Status RandomCropOperation::to_json(nlohmann::json *out_json) { } Status RandomCropOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding") != op_params.end(), "Fail to find padding"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("pad_if_needed") != op_params.end(), "Fail to find pad_if_needed"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Fail to find fill_value"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding_mode") != op_params.end(), "Fail to find padding_mode"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding") != op_params.end(), "Failed to find padding"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("pad_if_needed") != op_params.end(), "Failed to find pad_if_needed"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding_mode") != op_params.end(), "Failed to find padding_mode"); std::vector size = op_params["size"]; std::vector padding = op_params["padding"]; bool pad_if_needed = op_params["pad_if_needed"]; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_with_bbox_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_with_bbox_ir.cc index c264f011fcc..2329dffae52 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_with_bbox_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_crop_with_bbox_ir.cc @@ -120,11 +120,11 @@ Status RandomCropWithBBoxOperation::to_json(nlohmann::json *out_json) { } Status RandomCropWithBBoxOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding") != op_params.end(), "Fail to find padding"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("pad_if_needed") != op_params.end(), "Fail to find pad_if_needed"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Fail to find fill_value"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding_mode") != op_params.end(), "Fail to find padding_mode"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding") != op_params.end(), "Failed to find padding"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("pad_if_needed") != op_params.end(), "Failed to find pad_if_needed"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("padding_mode") != op_params.end(), "Failed to find padding_mode"); std::vector size = op_params["size"]; std::vector padding = op_params["padding"]; bool pad_if_needed = op_params["pad_if_needed"]; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_ir.cc index e6aa5e199de..5654905da25 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_ir.cc @@ -50,7 +50,7 @@ Status RandomHorizontalFlipOperation::to_json(nlohmann::json *out_json) { } Status RandomHorizontalFlipOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Fail to find prob"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Failed to find prob"); float prob = op_params["prob"]; *operation = std::make_shared(prob); return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_with_bbox_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_with_bbox_ir.cc index aec39374744..703f737a218 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_with_bbox_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_horizontal_flip_with_bbox_ir.cc @@ -53,7 +53,7 @@ Status RandomHorizontalFlipWithBBoxOperation::to_json(nlohmann::json *out_json) Status RandomHorizontalFlipWithBBoxOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Fail to find prob"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Failed to find prob"); float prob = op_params["prob"]; *operation = std::make_shared(prob); return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_posterize_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_posterize_ir.cc index 174ad1fa8fe..cf95b7affd2 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_posterize_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_posterize_ir.cc @@ -81,7 +81,7 @@ Status RandomPosterizeOperation::to_json(nlohmann::json *out_json) { } Status RandomPosterizeOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("bits") != op_params.end(), "Fail to find bits"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("bits") != op_params.end(), "Failed to find bits"); std::vector bit_range = op_params["bits"]; *operation = std::make_shared(bit_range); return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_ir.cc index 80e6d79a913..c4542b534ce 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_ir.cc @@ -64,7 +64,7 @@ Status RandomResizeOperation::to_json(nlohmann::json *out_json) { } Status RandomResizeOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size"); std::vector size = op_params["size"]; *operation = std::make_shared(size); return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_with_bbox_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_with_bbox_ir.cc index 4dfeddb5a00..46e6b568335 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_with_bbox_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resize_with_bbox_ir.cc @@ -65,7 +65,7 @@ Status RandomResizeWithBBoxOperation::to_json(nlohmann::json *out_json) { } Status RandomResizeWithBBoxOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size"); std::vector size = op_params["size"]; *operation = std::make_shared(size); return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_ir.cc index c2f04243e47..535537851d0 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_ir.cc @@ -90,11 +90,11 @@ Status RandomResizedCropOperation::to_json(nlohmann::json *out_json) { } Status RandomResizedCropOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Fail to find scale"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Fail to find ratio"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Fail to find interpolation"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Fail to find max_attempts"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Failed to find scale"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Failed to find ratio"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Failed to find interpolation"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Failed to find max_attempts"); std::vector size = op_params["size"]; std::vector scale = op_params["scale"]; std::vector ratio = op_params["ratio"]; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_with_bbox_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_with_bbox_ir.cc index 252e29015e5..e33d4dfc02c 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_with_bbox_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_resized_crop_with_bbox_ir.cc @@ -86,11 +86,11 @@ Status RandomResizedCropWithBBoxOperation::to_json(nlohmann::json *out_json) { Status RandomResizedCropWithBBoxOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Fail to find scale"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Fail to find ratio"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Fail to find interpolation"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Fail to find max_attempts"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Failed to find scale"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Failed to find ratio"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Failed to find interpolation"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Failed to find max_attempts"); std::vector size = op_params["size"]; std::vector scale = op_params["scale"]; std::vector ratio = op_params["ratio"]; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_rotation_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_rotation_ir.cc index 91b95ac68f7..4926d3ab574 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_rotation_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_rotation_ir.cc @@ -119,11 +119,11 @@ Status RandomRotationOperation::to_json(nlohmann::json *out_json) { } Status RandomRotationOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Fail to find degrees"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Fail to find resample"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("expand") != op_params.end(), "Fail to find expand"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("center") != op_params.end(), "Fail to find center"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Fail to find fill_value"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Failed to find degrees"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Failed to find resample"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("expand") != op_params.end(), "Failed to find expand"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("center") != op_params.end(), "Failed to find center"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value"); std::vector degrees = op_params["degrees"]; InterpolationMode resample = static_cast(op_params["resample"]); bool expand = op_params["expand"]; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_sharpness_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_sharpness_ir.cc index a2729d9e7d7..82c88eea9e1 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_sharpness_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_sharpness_ir.cc @@ -66,7 +66,7 @@ Status RandomSharpnessOperation::to_json(nlohmann::json *out_json) { } Status RandomSharpnessOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Fail to find degrees"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degrees") != op_params.end(), "Failed to find degrees"); std::vector degrees = op_params["degrees"]; *operation = std::make_shared(degrees); return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_solarize_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_solarize_ir.cc index 988c6da07ff..fecdb96acac 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_solarize_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_solarize_ir.cc @@ -47,7 +47,7 @@ Status RandomSolarizeOperation::ValidateParams() { MS_LOG(ERROR) << err_msg; RETURN_STATUS_SYNTAX_ERROR(err_msg); } - for (int32_t i = 0; i < threshold_.size(); ++i) { + for (size_t i = 0; i < threshold_.size(); ++i) { if (threshold_[i] < 0 || threshold_[i] > kThresholdMax) { std::string err_msg = "RandomSolarize: threshold has to be between 0 and 255, got:" + std::to_string(threshold_[i]); @@ -74,7 +74,7 @@ Status RandomSolarizeOperation::to_json(nlohmann::json *out_json) { } Status RandomSolarizeOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("threshold") != op_params.end(), "Fail to find threshold"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("threshold") != op_params.end(), "Failed to find threshold"); std::vector threshold = op_params["threshold"]; *operation = std::make_shared(threshold); return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_ir.cc index 389daf2fd4a..c0442ffb217 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_ir.cc @@ -51,7 +51,7 @@ Status RandomVerticalFlipOperation::to_json(nlohmann::json *out_json) { } Status RandomVerticalFlipOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Fail to find prob"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Failed to find prob"); float prob = op_params["prob"]; *operation = std::make_shared(prob); return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_with_bbox_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_with_bbox_ir.cc index 2b3fa07bd0d..5c94515b518 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_with_bbox_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/random_vertical_flip_with_bbox_ir.cc @@ -54,7 +54,7 @@ Status RandomVerticalFlipWithBBoxOperation::to_json(nlohmann::json *out_json) { Status RandomVerticalFlipWithBBoxOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Fail to find prob"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("prob") != op_params.end(), "Failed to find prob"); float prob = op_params["prob"]; *operation = std::make_shared(prob); return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rescale_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rescale_ir.cc index 9c0024943b2..7e61d6212b9 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rescale_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rescale_ir.cc @@ -57,8 +57,8 @@ Status RescaleOperation::to_json(nlohmann::json *out_json) { } Status RescaleOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("rescale") != op_params.end(), "Fail to find rescale"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("shift") != op_params.end(), "Fail to find shift"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("rescale") != op_params.end(), "Failed to find rescale"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("shift") != op_params.end(), "Failed to find shift"); float rescale = op_params["rescale"]; float shift = op_params["shift"]; *operation = std::make_shared(rescale, shift); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_ir.cc index 8aeee7f82cd..50d328745bb 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_ir.cc @@ -64,8 +64,8 @@ Status ResizeOperation::to_json(nlohmann::json *out_json) { } Status ResizeOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Fail to find interpolation"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Failed to find interpolation"); std::vector size = op_params["size"]; InterpolationMode interpolation = static_cast(op_params["interpolation"]); *operation = std::make_shared(size, interpolation); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_preserve_ar_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_preserve_ar_ir.cc index 5c22e1894d3..48bf6cf8721 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_preserve_ar_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_preserve_ar_ir.cc @@ -48,9 +48,9 @@ Status ResizePreserveAROperation::to_json(nlohmann::json *out_json) { } Status ResizePreserveAROperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("height") != op_params.end(), "Fail to find height"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("width") != op_params.end(), "Fail to find width"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("img_orientation") != op_params.end(), "Fail to find img_orientation"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("height") != op_params.end(), "Failed to find height"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("width") != op_params.end(), "Failed to find width"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("img_orientation") != op_params.end(), "Failed to find img_orientation"); int32_t height = op_params["height"]; int32_t width = op_params["width"]; int32_t img_orientation = op_params["img_orientation"]; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_with_bbox_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_with_bbox_ir.cc index 2ed1877a027..05503c348e3 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_with_bbox_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/resize_with_bbox_ir.cc @@ -65,8 +65,8 @@ Status ResizeWithBBoxOperation::to_json(nlohmann::json *out_json) { } Status ResizeWithBBoxOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Fail to find interpolation"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("interpolation") != op_params.end(), "Failed to find interpolation"); std::vector size = op_params["size"]; InterpolationMode interpolation = static_cast(op_params["interpolation"]); *operation = std::make_shared(size, interpolation); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.cc index 8c14f5d88c7..4f23dcffb07 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.cc @@ -37,6 +37,11 @@ Status RgbToBgrOperation::ValidateParams() { return Status::OK(); } std::shared_ptr RgbToBgrOperation::Build() { return std::make_shared(); } +Status RgbToBgrOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { + *operation = std::make_shared(); + return Status::OK(); +} + } // namespace vision } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.h index 339e68a4d7d..82aac13c06a 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_bgr_ir.h @@ -46,6 +46,8 @@ class RgbToBgrOperation : public TensorOperation { Status ValidateParams() override; std::string Name() const override; + + static Status from_json(nlohmann::json op_params, std::shared_ptr *operation); }; } // namespace vision diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.cc index c1c1e19c228..b041ecbc902 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.cc @@ -34,6 +34,12 @@ std::string RgbToGrayOperation::Name() const { return kRgbToGrayOperation; } Status RgbToGrayOperation::ValidateParams() { return Status::OK(); } std::shared_ptr RgbToGrayOperation::Build() { return std::make_shared(); } + +Status RgbToGrayOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { + *operation = std::make_shared(); + return Status::OK(); +} + } // namespace vision } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.h index f1a0135923e..45c6630073a 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgb_to_gray_ir.h @@ -46,6 +46,8 @@ class RgbToGrayOperation : public TensorOperation { Status ValidateParams() override; std::string Name() const override; + + static Status from_json(nlohmann::json op_params, std::shared_ptr *operation); }; } // namespace vision diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgba_to_bgr_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgba_to_bgr_ir.cc index 394e3c7efd0..1e402873a4f 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgba_to_bgr_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rgba_to_bgr_ir.cc @@ -25,7 +25,6 @@ namespace mindspore { namespace dataset { - namespace vision { #ifndef ENABLE_ANDROID // RgbaToBgrOperation. diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rotate_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rotate_ir.cc index 24a6ccf4c46..ff0a3d548e9 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rotate_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/rotate_ir.cc @@ -85,11 +85,11 @@ Status RotateOperation::to_json(nlohmann::json *out_json) { Status RotateOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { #ifndef ENABLE_ANDROID - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degree") != op_params.end(), "Fail to find degree"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Fail to find resample"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("expand") != op_params.end(), "Fail to find expand"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("center") != op_params.end(), "Fail to find center"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Fail to find fill_value"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("degree") != op_params.end(), "Failed to find degree"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("resample") != op_params.end(), "Failed to find resample"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("expand") != op_params.end(), "Failed to find expand"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("center") != op_params.end(), "Failed to find center"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value"); float degrees = op_params["degree"]; InterpolationMode resample = static_cast(op_params["resample"]); bool expand = op_params["expand"]; @@ -97,7 +97,7 @@ Status RotateOperation::from_json(nlohmann::json op_params, std::shared_ptr fill_value = op_params["fill_value"]; *operation = std::make_shared(degrees, resample, expand, center, fill_value); #else - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("angle_id") != op_params.end(), "Fail to find angle_id"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("angle_id") != op_params.end(), "Failed to find angle_id"); uint64_t angle_id = op_params["angle_id"]; std::shared_ptr rotate_operation = std::make_shared(); rotate_operation.get()->setAngle(angle_id); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.cc index c8fefe54389..0edaa28ba53 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.cc @@ -57,6 +57,18 @@ Status SlicePatchesOperation::to_json(nlohmann::json *out_json) { return Status::OK(); } +Status SlicePatchesOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("num_height") != op_params.end(), "Failed to find num_height"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("num_width") != op_params.end(), "Failed to find num_width"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("slice_mode") != op_params.end(), "Failed to find slice_mode"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("fill_value") != op_params.end(), "Failed to find fill_value"); + int32_t num_height = op_params["num_height"]; + int32_t num_width = op_params["num_width"]; + SliceMode slice_mode = static_cast(op_params["slice_mode"]); + uint8_t fill_value = op_params["fill_value"]; + *operation = std::make_shared(num_height, num_width, slice_mode, fill_value); + return Status::OK(); +} } // namespace vision } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.h index e65954d3d85..b7b00d86b2d 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/slice_patches_ir.h @@ -48,6 +48,8 @@ class SlicePatchesOperation : public TensorOperation { Status to_json(nlohmann::json *out_json) override; + static Status from_json(nlohmann::json op_params, std::shared_ptr *operation); + private: int32_t num_height_; int32_t num_width_; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_random_crop_resize_jpeg_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_random_crop_resize_jpeg_ir.cc index 80e130de420..c939aa426d9 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_random_crop_resize_jpeg_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_random_crop_resize_jpeg_ir.cc @@ -44,7 +44,7 @@ Status SoftDvppDecodeRandomCropResizeJpegOperation::ValidateParams() { RETURN_IF_NOT_OK(ValidateVectorSize("SoftDvppDecodeRandomCropResizeJpeg", size_)); constexpr int32_t value_one = 1; constexpr int32_t value_two = 2; - for (int32_t i = 0; i < size_.size(); i++) { + for (size_t i = 0; i < size_.size(); i++) { if (size_[i] % value_two == value_one) { std::string err_msg = "SoftDvppDecodeRandomCropResizeJpeg: size[" + std::to_string(i) + "] must be even values, got: " + std::to_string(size_[i]); @@ -96,10 +96,10 @@ Status SoftDvppDecodeRandomCropResizeJpegOperation::to_json(nlohmann::json *out_ Status SoftDvppDecodeRandomCropResizeJpegOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Fail to find scale"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Fail to find ratio"); - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Fail to find max_attempts"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("scale") != op_params.end(), "Failed to find scale"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("ratio") != op_params.end(), "Failed to find ratio"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("max_attempts") != op_params.end(), "Failed to find max_attempts"); std::vector size = op_params["size"]; std::vector scale = op_params["scale"]; std::vector ratio = op_params["ratio"]; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_resize_jpeg_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_resize_jpeg_ir.cc index c00b0d6ddd1..fc1b320438a 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_resize_jpeg_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/softdvpp_decode_resize_jpeg_ir.cc @@ -38,7 +38,7 @@ Status SoftDvppDecodeResizeJpegOperation::ValidateParams() { RETURN_IF_NOT_OK(ValidateVectorSize("SoftDvppDecodeResizeJpeg", size_)); constexpr int32_t value_one = 1; constexpr int32_t value_two = 2; - for (int32_t i = 0; i < size_.size(); i++) { + for (size_t i = 0; i < size_.size(); i++) { if (size_[i] % value_two == value_one) { std::string err_msg = "SoftDvppDecodeResizeJpeg: size[" + std::to_string(i) + "] must be even values, got: " + std::to_string(size_[i]); @@ -74,7 +74,7 @@ Status SoftDvppDecodeResizeJpegOperation::to_json(nlohmann::json *out_json) { Status SoftDvppDecodeResizeJpegOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { - CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Fail to find size"); + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("size") != op_params.end(), "Failed to find size"); std::vector size = op_params["size"]; *operation = std::make_shared(size); return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.cc index f12774aadd1..42989e66b42 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.cc @@ -39,6 +39,12 @@ std::shared_ptr VerticalFlipOperation::Build() { std::shared_ptr tensor_op = std::make_shared(); return tensor_op; } + +Status VerticalFlipOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { + *operation = std::make_shared(); + return Status::OK(); +} + #endif } // namespace vision diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.h b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.h index 35ecf11b683..2c518effba7 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/vision/vertical_flip_ir.h @@ -43,6 +43,8 @@ class VerticalFlipOperation : public TensorOperation { Status ValidateParams() override; std::string Name() const override; + + static Status from_json(nlohmann::json op_params, std::shared_ptr *operation); }; } // namespace vision diff --git a/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h b/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h index d00a5914820..8c4308d41f6 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/tensor_op.h @@ -53,6 +53,7 @@ namespace dataset { constexpr char kTensorOp[] = "TensorOp"; // image +constexpr char kAdjustGammaOp[] = "AdjustGammaOp"; constexpr char kAffineOp[] = "AffineOp"; constexpr char kAutoContrastOp[] = "AutoContrastOp"; constexpr char kBoundingBoxAugmentOp[] = "BoundingBoxAugmentOp"; @@ -137,7 +138,14 @@ constexpr char kRandomSelectSubpolicyOp[] = "RandomSelectSubpolicyOp"; constexpr char kSentencepieceTokenizerOp[] = "SentencepieceTokenizerOp"; // audio +constexpr char kAllpassBiquadOp[] = "AllpassBiquadOp"; +constexpr char kAmplitudeToDBOp[] = "AmplitudeToDBOp"; +constexpr char kAngleOp[] = "AngleOp"; constexpr char kBandBiquadOp[] = "BandBiquadOp"; +constexpr char kBandpassBiquadOp[] = "BandpassBiquadOp"; +constexpr char kBandrejectBiquadOp[] = "BandrejectBiquadOp"; +constexpr char kBassBiquadOp[] = "BassBiquadOp"; +constexpr char kTimeStretchOp[] = "TimeStretchOp"; // data constexpr char kConcatenateOp[] = "ConcatenateOp"; diff --git a/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.cc b/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.cc index 64c7dacb188..028111bfea2 100644 --- a/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.cc @@ -396,6 +396,13 @@ Status ToNumberOperation::to_json(nlohmann::json *out_json) { return Status::OK(); } +Status ToNumberOperation::from_json(nlohmann::json op_params, std::shared_ptr *operation) { + CHECK_FAIL_RETURN_UNEXPECTED(op_params.find("data_type") != op_params.end(), "Failed to find data_type"); + std::string data_type = op_params["data_type"]; + *operation = std::make_shared(data_type); + return Status::OK(); +} + // TruncateSequencePairOperation TruncateSequencePairOperation::TruncateSequencePairOperation(int32_t max_length) : max_length_(max_length) {} diff --git a/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.h b/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.h index 8b2cee15618..43dbe213584 100644 --- a/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.h +++ b/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.h @@ -288,6 +288,8 @@ class ToNumberOperation : public TensorOperation { Status to_json(nlohmann::json *out_json) override; + static Status from_json(nlohmann::json op_params, std::shared_ptr *operation); + private: DataType data_type_; }; diff --git a/mindspore/ccsrc/minddata/mindrecord/io/shard_index_generator.cc b/mindspore/ccsrc/minddata/mindrecord/io/shard_index_generator.cc index 21e223be24e..59a68116912 100644 --- a/mindspore/ccsrc/minddata/mindrecord/io/shard_index_generator.cc +++ b/mindspore/ccsrc/minddata/mindrecord/io/shard_index_generator.cc @@ -223,7 +223,7 @@ MSRStatus ShardIndexGenerator::CreateShardNameTable(sqlite3 *db, const std::stri sql = "INSERT INTO SHARD_NAME (NAME) VALUES (:SHARD_NAME);"; sqlite3_stmt *stmt = nullptr; if (sqlite3_prepare_v2(db, common::SafeCStr(sql), -1, &stmt, 0) != SQLITE_OK) { - if (stmt) { + if (stmt != nullptr) { (void)sqlite3_finalize(stmt); } MS_LOG(ERROR) << "SQL error: could not prepare statement, sql: " << sql; diff --git a/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc b/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc index aff17e3efc5..f182d503b1e 100644 --- a/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc +++ b/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc @@ -877,7 +877,9 @@ std::pair> ShardReader::GetLabels(int page_id, int sqlite3_free(errmsg); } std::vector ret; - for (unsigned int i = 0; i < labels_ptr->size(); ++i) ret.emplace_back(json{}); + for (unsigned int i = 0; i < labels_ptr->size(); ++i) { + (void)ret.emplace_back(json{}); + } for (unsigned int i = 0; i < labels_ptr->size(); ++i) { json construct_json; for (unsigned int j = 0; j < columns.size(); ++j) { diff --git a/mindspore/ccsrc/pipeline/jit/action.cc b/mindspore/ccsrc/pipeline/jit/action.cc index 6460b9786dc..df4f77a8f18 100644 --- a/mindspore/ccsrc/pipeline/jit/action.cc +++ b/mindspore/ccsrc/pipeline/jit/action.cc @@ -121,6 +121,28 @@ using CompileGraphs = compile::CompileGraphs; using abstract::AnalysisResult; using mindspore::abstract::AnalysisContextPtr; +inline bool ResetCNodeFromLoad(const AnfNodePtr &node) { + if (node->isa() && node->cast()->get_load_flag()) { + // Process partial("DeadNode",args) when the graph is loaded. + auto operatorPtr = node->cast()->input(0); + // Set abstract of switch(c,f,t) to null + auto prim = GetValueNode(operatorPtr); + if (IsPrimitiveEquals(prim::kPrimSwitch, prim) || IsPrimitiveEquals(prim::kPrimSwitchLayer, prim)) { + node->set_abstract(nullptr); + return true; + } + // Set abstract of switch(c,f,t)() to null + prim = GetCNodePrimitive(operatorPtr); + if (IsPrimitiveEquals(prim::kPrimSwitch, prim) || IsPrimitiveEquals(prim::kPrimSwitchLayer, prim)) { + node->set_abstract(nullptr); + return true; + } + // Previous inferred value + return true; + } + return false; +} + abstract::AnalysisResult AbstractAnalyze(const ResourcePtr &res, const FuncGraphPtr &func_graph, const abstract::AbstractBasePtrList &args_spec, bool clear) { MS_LOG(DEBUG) << "AbstractAnalyze start"; @@ -133,10 +155,19 @@ abstract::AnalysisResult AbstractAnalyze(const ResourcePtr &res, const FuncGraph for (auto &node : manager->all_nodes()) { MS_EXCEPTION_IF_NULL(node); const AbstractBasePtr &prev_inferred = node->abstract(); - // Keep previous inferred value for CNode if is loaded from MindIR. - if (node->isa() && node->cast()->get_load_flag()) { + + // AbstractFunction has context,but contexts in cache have been cleaned. + if (prev_inferred != nullptr && prev_inferred->isa()) { + node->set_abstract(nullptr); + MS_LOG(DEBUG) << "Abstract of node " << node->ToString() << " is set to nullptr"; continue; } + + // Handle previous inferred value for CNode if is loaded from MindIR + if (res->is_load() && ResetCNodeFromLoad(node)) { + continue; + } + // Keep previous inferred value for ValueNode if the inferred value is not AbstractFunction. if (!node->isa() || (prev_inferred != nullptr && prev_inferred->isa())) { node->set_abstract(nullptr); @@ -200,6 +231,7 @@ const FuncGraphPtr GetLoadedGraph(const ResourcePtr &res) { if (graph->has_attr("is_load")) { loaded_graph = graph; loaded_graph_num += 1; + res->set_is_load(true); } } if (loaded_graph_num == 0) { @@ -218,6 +250,8 @@ void CheckRootInputShapeAndType(const ResourcePtr &res, const FuncGraphPtr &load FuncGraphPtr root_graph = *(manager->roots().begin()); auto root_inputs = root_graph->get_inputs(); auto loaded_inputs = loaded_graph->get_inputs(); + MS_LOG(DEBUG) << "root_graph: " << root_graph->ToString(); + MS_LOG(DEBUG) << "loaded_graph: " << loaded_graph->ToString(); size_t root_inputs_num = root_inputs.size(); size_t loaded_inputs_num = loaded_inputs.size(); @@ -229,10 +263,18 @@ void CheckRootInputShapeAndType(const ResourcePtr &res, const FuncGraphPtr &load auto root_input = root_inputs[index]; auto loaded_input = loaded_inputs[index]; + MS_LOG(DEBUG) << "root_input[" << index << "]: " << root_input->DebugString(1); + MS_LOG(DEBUG) << "loaded_input[" << index << "]: " << loaded_input->DebugString(1); + MS_LOG(DEBUG) << "root_input abstract[" << index + << "]: " << (root_input->abstract() ? root_input->abstract()->ToString() : "NULL"); + MS_LOG(DEBUG) << "loaded_input abstract [" << index + << "]: " << (loaded_input->abstract() ? loaded_input->abstract()->ToString() : "NULL"); + auto root_shape = root_input->Shape() == nullptr ? nullptr : dyn_cast(root_input->Shape()); auto loaded_shape = loaded_input->Shape() == nullptr ? nullptr : dyn_cast(loaded_input->Shape()); auto root_type = root_input->Type() == nullptr ? nullptr : dyn_cast(root_input->Type()); auto loaded_type = loaded_input->Type() == nullptr ? nullptr : dyn_cast(loaded_input->Type()); + MS_EXCEPTION_IF_NULL(root_shape); MS_EXCEPTION_IF_NULL(loaded_shape); MS_EXCEPTION_IF_NULL(root_type); @@ -454,6 +496,7 @@ bool AbstractSpecializeAction(const ResourcePtr &res) { } // Analyze AnalysisResult result = AbstractAnalyze(res, func_graph, args_spec); + // The top graph may be replaced by infer, update the top graph when the infer is done parse::Parser::UpdateTopFuncGraph(result.context->func_graph()); diff --git a/mindspore/ccsrc/pipeline/jit/parse/function_block.cc b/mindspore/ccsrc/pipeline/jit/parse/function_block.cc index 1ba42b20733..9e9110f2fbf 100644 --- a/mindspore/ccsrc/pipeline/jit/parse/function_block.cc +++ b/mindspore/ccsrc/pipeline/jit/parse/function_block.cc @@ -20,6 +20,7 @@ #include #include +#include #include "pybind11/pybind11.h" #include "pipeline/jit/parse/resolve.h" @@ -329,10 +330,10 @@ bool FunctionBlock::CollectRemovablePhi(const ParameterPtr &phi) { // A block should be marked matured if its predecessor blocks have been processed void FunctionBlock::Mature() { - const auto &graphParamVec = func_graph_->parameters(); - for (auto ¶mItr : graphParamVec) { - MS_EXCEPTION_IF_NULL(paramItr); - auto param = paramItr->cast(); + const auto &graph_params = func_graph_->parameters(); + for (auto ¶m_itr : graph_params) { + MS_EXCEPTION_IF_NULL(param_itr); + auto param = param_itr->cast(); if (phi_nodes_.find(param) != phi_nodes_.cend()) { SetPhiArgument(param); } @@ -356,7 +357,7 @@ CNodePtr FunctionBlock::ForceToWhileCond(const AnfNodePtr &cond) { } // Perform a jump from this block to target block -void FunctionBlock::Jump(const FunctionBlockPtr &target_block, const AnfNodePtr &node) { +void FunctionBlock::Jump(const FunctionBlockPtr &target_block, const std::vector &args) { MS_EXCEPTION_IF_NULL(target_block); if (func_graph_->get_return() != nullptr) { MS_LOG(EXCEPTION) << "Failure: have return node! NodeInfo: " @@ -364,9 +365,7 @@ void FunctionBlock::Jump(const FunctionBlockPtr &target_block, const AnfNodePtr } std::vector input_nodes; input_nodes.emplace_back(NewValueNode(target_block->func_graph())); - if (node != nullptr) { - input_nodes.emplace_back(node); - } + (void)std::copy(args.begin(), args.end(), std::back_inserter(input_nodes)); CNodePtr jump = func_graph_->NewCNodeInOrder(input_nodes); jumps_[target_block.get()] = jump; diff --git a/mindspore/ccsrc/pipeline/jit/parse/function_block.h b/mindspore/ccsrc/pipeline/jit/parse/function_block.h index b9a26193ceb..ff45747c828 100644 --- a/mindspore/ccsrc/pipeline/jit/parse/function_block.h +++ b/mindspore/ccsrc/pipeline/jit/parse/function_block.h @@ -57,7 +57,7 @@ class FunctionBlock : public std::enable_shared_from_this { void Mature(); CNodePtr ForceToBoolNode(const AnfNodePtr &cond); CNodePtr ForceToWhileCond(const AnfNodePtr &cond); - void Jump(const FunctionBlockPtr &block, const AnfNodePtr &node); + void Jump(const FunctionBlockPtr &block, const std::vector &args); AnfNodePtr SearchReplaceNode(const std::string &var, const ParameterPtr &phi); void ConditionalJump(AnfNodePtr condNode, const FunctionBlockPtr &trueBlock, const FunctionBlockPtr &falseBlock, bool unroll_loop = true); diff --git a/mindspore/ccsrc/pipeline/jit/parse/parse.cc b/mindspore/ccsrc/pipeline/jit/parse/parse.cc index e70ff90493a..37d4cede426 100644 --- a/mindspore/ccsrc/pipeline/jit/parse/parse.cc +++ b/mindspore/ccsrc/pipeline/jit/parse/parse.cc @@ -130,6 +130,8 @@ void Parser::BuildMethodMap() { expr_method_map_["UnaryOp"] = &Parser::ParseUnaryOp; expr_method_map_["Dict"] = &Parser::ParseDict; expr_method_map_["Ellipsis"] = &Parser::ParseEllipsis; + expr_method_map_["ListComp"] = &Parser::ParseListComp; + expr_method_map_["GeneratorExp"] = &Parser::ParseListComp; // We treat 'GeneratorExp' the same as 'ListComp'. } void Parser::UpdateTopFuncGraph(const FuncGraphPtr &func_graph) { top_func_graph_ = FuncGraphWeakPtr(func_graph); } @@ -156,8 +158,8 @@ void CheckFuncReturn(const FuncGraphPtr &fn, const std::shared_ptr &as } py::object node = ast->GetAstNode(); py::list ret = ast->CallParserObjMethod(PYTHON_PARSE_GET_LOCATION, node); - constexpr auto kMinListSize = 2; - if (ret.size() < kMinListSize) { + constexpr auto min_list_size = 2; + if (ret.size() < min_list_size) { MS_LOG(EXCEPTION) << "list size:" << ret.size() << " is less than 2."; } py::str desc = @@ -169,18 +171,15 @@ void CheckFuncReturn(const FuncGraphPtr &fn, const std::shared_ptr &as FuncGraphPtr Parser::ParseFuncGraph() { // Get ast FunctionDef node py::object node = ast_->GetAstNode(); - FunctionBlockPtr pFnBlock = ParseFunction(node); + FunctionBlockPtr fn_block = ParseFunction(node); if (errcode() != PARSE_SUCCESS) { MS_LOG(ERROR) << "Parse function error, code is " << errcode(); return nullptr; } - RemoveUnnecessaryPhis(); - - MS_EXCEPTION_IF_NULL(pFnBlock); - CheckFuncReturn(pFnBlock->func_graph(), ast_); - - return pFnBlock->func_graph(); + MS_EXCEPTION_IF_NULL(fn_block); + CheckFuncReturn(fn_block->func_graph(), ast_); + return fn_block->func_graph(); } void Parser::GenerateArgsNodeForFunction(const FunctionBlockPtr &block, const py::object &fn_node) { @@ -261,14 +260,14 @@ FunctionBlockPtr Parser::ParseFunction(const py::object &node, const FunctionBlo // The node created in the parsefunction context, will inherit the scope created using scope_guard ScopeGuard scope_guard(scope); TraceGuard trace_guard(data_converter::GetObjKey(ast_->obj())[0], GetLocation(node)); - FunctionBlockPtr pFunBlock = MakeFunctionBlock(*this); + FunctionBlockPtr func_block = MakeFunctionBlock(*this); if (block != nullptr) { - pFunBlock->AddPrevBlock(block); + func_block->AddPrevBlock(block); } else { - func_graph_ = pFunBlock->func_graph(); + func_graph_ = func_block->func_graph(); } - pFunBlock->Mature(); - auto current_fg = pFunBlock->func_graph(); + func_block->Mature(); + auto current_fg = func_block->func_graph(); auto function_name = py::cast(python_adapter::GetPyObjAttr(node, "name")); MS_LOG(DEBUG) << "The function name is " << function_name; current_fg->debug_info()->set_name(function_name); @@ -286,27 +285,27 @@ FunctionBlockPtr Parser::ParseFunction(const py::object &node, const FunctionBlo MS_LOG(ERROR) << "Set flags failed"; return nullptr; } - GenerateArgsNodeForFunction(pFunBlock, node); + GenerateArgsNodeForFunction(func_block, node); // When parsing the top graph of construct, save the top graph if (GetTopFuncGraph() == nullptr) { - UpdateTopFuncGraph(pFunBlock->func_graph()); + UpdateTopFuncGraph(func_block->func_graph()); } // Save the function node to block - pFunBlock->WriteVariable(function_name, NewValueNode(current_fg)); + func_block->WriteVariable(function_name, NewValueNode(current_fg)); py::object funcObj = python_adapter::GetPyObjAttr(node, "body"); - (void)ParseStatements(pFunBlock, funcObj); + (void)ParseStatements(func_block, funcObj); // Add unused variables as isolate nodes. - for (auto &func_block : func_block_list_) { - MS_EXCEPTION_IF_NULL(func_block); - if (func_block->func_graph()->get_return() != nullptr) { + for (auto &func_block_item : func_block_list_) { + MS_EXCEPTION_IF_NULL(func_block_item); + if (func_block_item->func_graph()->get_return() != nullptr) { // Find unused variables. - func_block->FindIsolatedNodes(); + func_block_item->FindIsolatedNodes(); // Attach all isolated nodes. - func_block->AttachIsolatedNodesBeforeReturn(); + func_block_item->AttachIsolatedNodesBeforeReturn(); } } @@ -315,8 +314,8 @@ FunctionBlockPtr Parser::ParseFunction(const py::object &node, const FunctionBlo py::str desc = python_adapter::CallPyModFn(ast_->module(), PYTHON_MOD_GET_OBJECT_DESCRIPTION, node, ret[0], ret[1]); MS_EXCEPTION(TypeError) << "Missing return statement in " << desc.cast() << "."; } - GenerateArgsDefaultValueForFunction(pFunBlock, node); - return pFunBlock; + GenerateArgsDefaultValueForFunction(func_block, node); + return func_block; } FunctionBlockPtr Parser::ParseStatements(FunctionBlockPtr block, const py::object &nodes) { @@ -461,14 +460,14 @@ FunctionBlockPtr Parser::ParseReturn(const FunctionBlockPtr &block, const py::ob MS_LOG(DEBUG) << "Process ast return"; MS_EXCEPTION_IF_NULL(block); // Create return valuenode - AnfNodePtr pReturnValueNode = NewValueNode(prim::kPrimReturn); + AnfNodePtr return_value_node = NewValueNode(prim::kPrimReturn); // Parse the return Statements value py::object value = python_adapter::GetPyObjAttr(node, "value"); - AnfNodePtr pReturnStatementNode = ParseExprNode(block, value); + AnfNodePtr return_expr_node = ParseExprNode(block, value); // Create the cnode auto block_fg = block->func_graph(); - CNodePtr pReturnCNode = block_fg->NewCNodeInOrder({pReturnValueNode, pReturnStatementNode}); - block_fg->set_return(pReturnCNode); + CNodePtr return_node = block_fg->NewCNodeInOrder({return_value_node, return_expr_node}); + block_fg->set_return(return_node); return block; } @@ -583,6 +582,7 @@ AnfNodePtr Parser::ParseNameConstant(const FunctionBlockPtr &, const py::object errcode_ = PARSE_NODE_TYPE_UNKNOWN; MS_LOG(EXCEPTION) << "Unsupported NameConstant type: " << (std::string)py::str(obj); } + AnfNodePtr Parser::GenerateMakeTuple(const FunctionBlockPtr &block, const std::vector &element_nodes) { MS_EXCEPTION_IF_NULL(block); AnfNodePtr make_tuple_op = block->MakeResolveOperation(NAMED_PRIMITIVE_MAKETUPLE); @@ -1117,18 +1117,18 @@ FunctionBlockPtr Parser::ParseIf(const FunctionBlockPtr &block, const py::object py::object bodyNode = python_adapter::GetPyObjAttr(node, "body"); FunctionBlockPtr true_end = ParseStatements(true_block, bodyNode); - // If the return_ is set ,it has its own continuation block + // If the return_ is set, it has its own continuation block if (true_end->func_graph()->get_return() == nullptr) { - true_end->Jump(after_block, nullptr); + true_end->Jump(after_block, {}); } // Process the orelse branch py::object orelseNode = python_adapter::GetPyObjAttr(node, "orelse"); FunctionBlockPtr false_end = ParseStatements(false_block, orelseNode); - // If the return_ is set ,it has its own continuation block + // If the return_ is set, it has its own continuation block if (false_end->func_graph()->get_return() == nullptr) { - false_end->Jump(after_block, nullptr); + false_end->Jump(after_block, {}); } block->ConditionalJump(bool_node, true_block, false_block); @@ -1158,7 +1158,7 @@ FunctionBlockPtr Parser::ParseWhile(const FunctionBlockPtr &block, const py::obj body_block->AddPrevBlock(header_block); after_block->AddPrevBlock(header_block); - block->Jump(header_block, nullptr); + block->Jump(header_block, {}); py::object test_node = python_adapter::GetPyObjAttr(node, "test"); AnfNodePtr condition_node = ParseExprNode(header_block, test_node); @@ -1171,7 +1171,7 @@ FunctionBlockPtr Parser::ParseWhile(const FunctionBlockPtr &block, const py::obj py::object body_node = python_adapter::GetPyObjAttr(node, "body"); FunctionBlockPtr after_body = ParseStatements(body_block, body_node); if (after_body->func_graph()->get_return() == nullptr) { - after_body->Jump(header_block, nullptr); + after_body->Jump(header_block, {}); } header_block->Mature(); @@ -1179,7 +1179,7 @@ FunctionBlockPtr Parser::ParseWhile(const FunctionBlockPtr &block, const py::obj auto &end_block = loop_context.EndBlock(); if (end_block) { // end_block exists if we encounter 'break' in loop body. - after_block->Jump(end_block, nullptr); + after_block->Jump(end_block, {}); end_block->Mature(); return end_block; } @@ -1200,16 +1200,17 @@ CNodePtr Parser::GenerateCondInFor(const ParameterPtr &iter_param, const Functio return header_block->func_graph()->NewCNodeInOrder({op_hasnext, iter_param}); } -FunctionBlockPtr Parser::GenerateBlockInFor(const TraceInfoPtr &trace_info) { +FunctionBlockPtr Parser::GenerateBlock(const TraceInfoPtr &trace_info) { TraceGuard trace_guard(trace_info); - FunctionBlockPtr body_block = MakeFunctionBlock(*this); - return body_block; + FunctionBlockPtr block = MakeFunctionBlock(*this); + MS_EXCEPTION_IF_NULL(block); + return block; } int64_t Parser::GetForTransToWhileLoop() { // int64 support 63bits positive num mostly. - constexpr auto kMaxNumLength = 10; - if (max_for_loop_count_str_.size() > kMaxNumLength || max_for_loop_count_str_.empty()) { + constexpr auto max_num_length = 10; + if (max_for_loop_count_str_.size() > max_num_length || max_for_loop_count_str_.empty()) { return MAX_FOR_LOOP_COUNT; } if (std::any_of(max_for_loop_count_str_.begin(), max_for_loop_count_str_.end(), @@ -1222,6 +1223,7 @@ int64_t Parser::GetForTransToWhileLoop() { ss >> loop_count; return loop_count; } + // A for loop will generate 3 functions :the test, the body, and the continuation // for x in xs: // body @@ -1260,10 +1262,10 @@ FunctionBlockPtr Parser::ParseFor(const FunctionBlockPtr &block, const py::objec } FunctionBlockPtr true_end = ParseForIter(true_block, node); - true_end->Jump(after_block, nullptr); + true_end->Jump(after_block, {}); FunctionBlockPtr false_end = ParseForLoop(false_block, node); - false_end->Jump(after_block, nullptr); + false_end->Jump(after_block, {}); block->ConditionalJump(bool_node, true_block, false_block); after_block->Mature(); @@ -1288,14 +1290,13 @@ FunctionBlockPtr Parser::ParseForIter(const FunctionBlockPtr &block, const py::o // Generate the iterator apply CNodePtr iter_apply = GenerateIteratorInFor(block, node, op_iter); MS_EXCEPTION_IF_NULL(iter_apply); - FunctionBlockPtr header_block = - GenerateBlockInFor(std::make_shared(block->func_graph()->debug_info())); + FunctionBlockPtr header_block = GenerateBlock(std::make_shared(block->func_graph()->debug_info())); MS_EXCEPTION_IF_NULL(header_block); // Generate the hasnext apply which is a condition ParameterPtr iter_param = header_block->func_graph()->add_parameter(); CNodePtr cond_apply = GenerateCondInFor(iter_param, header_block, op_hasnext); // Generate the body of the for statement - FunctionBlockPtr body_block = GenerateBlockInFor(std::make_shared(block->func_graph()->debug_info())); + FunctionBlockPtr body_block = GenerateBlock(std::make_shared(block->func_graph()->debug_info())); MS_EXCEPTION_IF_NULL(body_block); body_block->AddPrevBlock(header_block); // Generate the iterator next apply @@ -1323,7 +1324,7 @@ FunctionBlockPtr Parser::ParseForIter(const FunctionBlockPtr &block, const py::o MS_EXCEPTION_IF_NULL(after_block); after_block->AddPrevBlock(header_block); - block->Jump(header_block, iter_apply); + block->Jump(header_block, {iter_apply}); body_block->Mature(); header_block->ConditionalJump(cond_apply, body_block, after_block); @@ -1332,7 +1333,7 @@ FunctionBlockPtr Parser::ParseForIter(const FunctionBlockPtr &block, const py::o py::object body_node = python_adapter::GetPyObjAttr(node, "body"); FunctionBlockPtr after_body_block = ParseStatements(body_block, body_node); if (after_body_block->func_graph()->get_return() == nullptr) { - after_body_block->Jump(header_block, iter2_app); + after_body_block->Jump(header_block, {iter2_app}); } header_block->Mature(); @@ -1340,7 +1341,7 @@ FunctionBlockPtr Parser::ParseForIter(const FunctionBlockPtr &block, const py::o auto &end_block = loop_context.EndBlock(); if (end_block) { // end_block exists if we encounter 'break' in loop body. - after_block->Jump(end_block, nullptr); + after_block->Jump(end_block, {}); end_block->Mature(); return end_block; } @@ -1377,8 +1378,7 @@ FunctionBlockPtr Parser::ParseForLoop(const FunctionBlockPtr &block, const py::o CNodePtr len_iter = block->func_graph()->NewCNodeInOrder({scalar_to_tensor_node, scalar_len}); - FunctionBlockPtr header_block = - GenerateBlockInFor(std::make_shared(block->func_graph()->debug_info())); + FunctionBlockPtr header_block = GenerateBlock(std::make_shared(block->func_graph()->debug_info())); MS_EXCEPTION_IF_NULL(header_block); // Create loop variable 'i' ParameterPtr loop_var = header_block->func_graph()->add_parameter(); @@ -1388,7 +1388,7 @@ FunctionBlockPtr Parser::ParseForLoop(const FunctionBlockPtr &block, const py::o CNodePtr cond_node = header_block->func_graph()->NewCNodeInOrder({less_node, loop_var, len_iter}); // Generate the body of the for statement - FunctionBlockPtr body_block = GenerateBlockInFor(std::make_shared(block->func_graph()->debug_info())); + FunctionBlockPtr body_block = GenerateBlock(std::make_shared(block->func_graph()->debug_info())); MS_EXCEPTION_IF_NULL(body_block); body_block->AddPrevBlock(header_block); // Create 'x = xs[i]' @@ -1419,7 +1419,7 @@ FunctionBlockPtr Parser::ParseForLoop(const FunctionBlockPtr &block, const py::o CNodePtr zero_tensor = block->func_graph()->NewCNodeInOrder({scalar_to_tensor_node, NewValueNode(static_cast(0))}); - block->Jump(header_block, zero_tensor); + block->Jump(header_block, {zero_tensor}); body_block->Mature(); header_block->ConditionalJump(cond_node, body_block, after_block, false); @@ -1429,7 +1429,7 @@ FunctionBlockPtr Parser::ParseForLoop(const FunctionBlockPtr &block, const py::o py::object body_node = python_adapter::GetPyObjAttr(node, "body"); FunctionBlockPtr after_body_block = ParseStatements(body_block, body_node); if (after_body_block->func_graph()->get_return() == nullptr) { - after_body_block->Jump(header_block, loop_var_inc); + after_body_block->Jump(header_block, {loop_var_inc}); } header_block->Mature(); @@ -1437,7 +1437,7 @@ FunctionBlockPtr Parser::ParseForLoop(const FunctionBlockPtr &block, const py::o auto &end_block = loop_context.EndBlock(); if (end_block) { // end_block exists if we encounter 'break' in loop body. - after_block->Jump(end_block, nullptr); + after_block->Jump(end_block, {}); end_block->Mature(); return end_block; } @@ -1489,6 +1489,155 @@ AnfNodePtr Parser::ParseIfExp(const FunctionBlockPtr &block, const py::object &n return switch_app_call; } +FunctionBlockPtr Parser::ParseListCompIter(const FunctionBlockPtr &block, const py::object &node, + const py::object &generator_node) { + // Create a header block. + FunctionBlockPtr top_block = GenerateBlock(std::make_shared(block->func_graph()->debug_info())); + // Handle iter attribute. + py::object iter_node = python_adapter::GetPyObjAttr(generator_node, "iter"); + AnfNodePtr iter_anf_node = ParseExprNode(block, iter_node); + AnfNodePtr op_iter = top_block->MakeResolveOperation(NAMED_PRIMITIVE_ITER); + CNodePtr iter_apply = top_block->func_graph()->NewCNodeInOrder({op_iter, iter_anf_node}); + + // Create header graph. + FunctionBlockPtr list_header_block = + GenerateBlock(std::make_shared(block->func_graph()->debug_info())); + list_header_block->AddPrevBlock(top_block); + + // Create hasNext apply. + AnfNodePtr op_hasnext = top_block->MakeResolveOperation(NAMED_PRIMITIVE_HASNEXT); + ParameterPtr iter_param = list_header_block->func_graph()->add_parameter(); + constexpr auto iter_param_name = "iter"; + iter_param->set_name(iter_param_name); + iter_param->debug_info()->set_name(iter_param_name); + CNodePtr cond_apply = list_header_block->func_graph()->NewCNodeInOrder({op_hasnext, iter_param}); + + // Call the header graph with iter. + ParameterPtr list_param = list_header_block->func_graph()->add_parameter(); + constexpr auto list_param_name = "list"; + list_param->set_name(list_param_name); + list_param->debug_info()->set_name(list_param_name); + auto empty_list = std::vector(); + AnfNodePtr empty_list_node = NewValueNode(std::make_shared(empty_list)); + top_block->Jump(list_header_block, {iter_apply, empty_list_node}); + + // Create body graph. + FunctionBlockPtr list_body_block = GenerateBlock(std::make_shared(block->func_graph()->debug_info())); + list_body_block->AddPrevBlock(list_header_block); + AnfNodePtr op_next = top_block->MakeResolveOperation(NAMED_PRIMITIVE_NEXT); + CNodePtr next_apply = list_body_block->func_graph()->NewCNodeInOrder({op_next, iter_param}); + AnfNodePtr op_getitem = top_block->MakeResolveOperation(NAMED_PRIMITIVE_GETITEM); + CNodePtr item_apply = + list_body_block->func_graph()->NewCNodeInOrder({op_getitem, next_apply, NewValueNode(static_cast(0))}); + CNodePtr new_iter = + list_body_block->func_graph()->NewCNodeInOrder({op_getitem, next_apply, NewValueNode(static_cast(1))}); + + // Save the `target` in a variable. + py::object gen_target_node = python_adapter::GetPyObjAttr(generator_node, "target"); + WriteAssignVars(list_body_block, gen_target_node, item_apply); + + auto ifs_new_list = ParseListCompIfs(list_body_block, list_param, node, generator_node); + list_body_block->Jump(list_header_block, {new_iter, ifs_new_list}); + + // Create after graph. + FunctionBlockPtr list_after_block = GenerateBlock(std::make_shared(block->func_graph()->debug_info())); + list_after_block->AddPrevBlock(list_header_block); + // Return the list in after graph. + list_after_block->func_graph()->set_output(list_param); + + // Run the branches. + list_header_block->ConditionalJump(cond_apply, list_body_block, list_after_block); + + top_block->Mature(); + list_header_block->Mature(); + list_body_block->Mature(); + list_after_block->Mature(); + return top_block; +} + +AnfNodePtr Parser::ParseListCompIfs(const FunctionBlockPtr &list_body_block, const ParameterPtr &list_param, + const py::object &node, const py::object &generator_node) { + // Handle ifs attribute. + py::list ifs_node = python_adapter::GetPyObjAttr(generator_node, "ifs"); + AnfNodePtr ifs_bool_node; + if (ifs_node.empty()) { + ifs_bool_node = NewValueNode(true); + } else { + ifs_bool_node = ProcessBoolOpValueList(list_body_block, ifs_node, AST_SUB_TYPE_AND); + } + + // Create if-true graph. + FunctionBlockPtr if_true_block = + GenerateBlock(std::make_shared(list_body_block->func_graph()->debug_info())); + if_true_block->AddPrevBlock(list_body_block); + // Handle elt attribute in body block. + py::object elt_obj = python_adapter::GetPyObjAttr(node, "elt"); + AnfNodePtr elt_node = ParseExprNode(list_body_block, elt_obj); + // Append the element. + auto list_append_op = prim::kPrimListAppend; + auto new_list = list_body_block->func_graph()->NewCNodeInOrder({NewValueNode(list_append_op), list_param, elt_node}); + // Return new list in true branch graph. + if_true_block->func_graph()->set_output(new_list); + + // Create if-false graph. + FunctionBlockPtr if_false_block = + GenerateBlock(std::make_shared(list_body_block->func_graph()->debug_info())); + if_false_block->AddPrevBlock(list_body_block); + // Return original list in false branch graph. + if_false_block->func_graph()->set_output(list_param); + + // We don't want to create a header graph, where to get and wrap the result of Switch(). + // So just call ConditionalJump() to set Switch() as output, and reset it later, as tricky. + list_body_block->ConditionalJump(ifs_bool_node, if_true_block, if_false_block); + // Output is Switch() result, i.e. updated list. + auto switch_apply_node = list_body_block->func_graph()->output(); + auto ifs_new_list = switch_apply_node; + // Since we call ConditionalJump() above, to reset the Return as null before call Jump(). + list_body_block->func_graph()->set_return(nullptr); + if_true_block->Mature(); + if_false_block->Mature(); + return ifs_new_list; +} + +// A ListComp contains: `elt` and `generators`. +// `generators` contains: `target`, `iter` and `ifs`. +// For example: +// [x * x for x in range(0, 10) if x % 2 == 0] +// It is compiled to be following statement: +// list = [] +// for x in range(0, 10): +// if x % 2 == 0: +// list.append(x * x) +// return list +AnfNodePtr Parser::ParseListComp(const FunctionBlockPtr &block, const py::object &node) { + MS_LOG(DEBUG) << "Process ast ListComp"; + MS_EXCEPTION_IF_NULL(block); + + // Handle generators attribute. + py::list generators_node = python_adapter::GetPyObjAttr(node, "generators"); + if (generators_node.size() != 1) { + MS_EXCEPTION(TypeError) << "The `generators` supports one `comprehension` in ListComp/GeneratorExp, but got " + << generators_node.size() << " comprehensions."; + } + py::object generator_node = generators_node[0]; + auto generator_node_type = ast_->GetNodeType(generator_node); + auto generator_node_name = generator_node_type->node_name(); + constexpr auto comprehension_name = "comprehension"; + if (generator_node_name != comprehension_name) { + MS_LOG(EXCEPTION) << "Generator node name should be " << comprehension_name << ", but got " << generator_node_name; + } + + // Parse ListComp's `iter` and add `elt` in it. + auto top_block = ParseListCompIter(block, node, generator_node); + + // Call the top graph and return the list. + auto call_function_anf_node = NewValueNode(top_block->func_graph()); + std::vector func_call_nodes; + func_call_nodes.push_back(call_function_anf_node); + AnfNodePtr output = block->func_graph()->NewCNodeInOrder(func_call_nodes); + return output; +} + void Parser::HandleAssignName(const FunctionBlockPtr &block, const py::object &targ, const AnfNodePtr &assigned_node) { MS_EXCEPTION_IF_NULL(block); MS_EXCEPTION_IF_NULL(assigned_node); @@ -1644,7 +1793,7 @@ FunctionBlockPtr Parser::ParseBreak(const FunctionBlockPtr &block, const py::obj loop.end = MakeFunctionBlock(*this); } // Jump to the end_block. - block->Jump(loop.end, nullptr); + block->Jump(loop.end, {}); return block; } @@ -1655,7 +1804,11 @@ FunctionBlockPtr Parser::ParseContinue(const FunctionBlockPtr &block, const py:: } // Jump to the header of the loop with iterator called. Loop &loop = loops_.top(); - block->Jump(loop.header, loop.iterator); + std::vector args; + if (loop.iterator != nullptr) { + args.emplace_back(loop.iterator); + } + block->Jump(loop.header, args); return block; } diff --git a/mindspore/ccsrc/pipeline/jit/parse/parse.h b/mindspore/ccsrc/pipeline/jit/parse/parse.h index a62090e1e6e..06a2dde140c 100644 --- a/mindspore/ccsrc/pipeline/jit/parse/parse.h +++ b/mindspore/ccsrc/pipeline/jit/parse/parse.h @@ -38,19 +38,19 @@ namespace parse { // Parse status define enum ParseStatusCode : int64_t { PARSE_SUCCESS = 0, - PARSE_FUNCTION_IS_NULL, // python function is null - PARSE_PARAMETER_INVALID, // parameter is invalid - PARSE_NO_RETURN, // function no return node - PARSE_NODE_TYPE_NO_MATCH, // ast node type is error - PARSE_NODE_TYPE_UNKNOWN, // node type is unknown - PARSE_NODE_METHOD_UNSUPPORTED, // no method to parse the node - PARSE_DONT_RESOLVE_SYMBOL, // can't resolve the string - PARSE_NOT_SUPPORTED_COMPARE_EXPR, // the comparison is not supported + PARSE_FUNCTION_IS_NULL, // Python function is null + PARSE_PARAMETER_INVALID, // Parameter is invalid + PARSE_NO_RETURN, // Function no return node + PARSE_NODE_TYPE_NO_MATCH, // Ast node type is error + PARSE_NODE_TYPE_UNKNOWN, // Node type is unknown + PARSE_NODE_METHOD_UNSUPPORTED, // No method to parse the node + PARSE_DONT_RESOLVE_SYMBOL, // Can't resolve the string + PARSE_NOT_SUPPORTED_COMPARE_EXPR, // The comparison is not supported PARSE_FAILURE = 0xFF }; -// max loop count of for statement, when loop count is less then this value, the for loop will be unrolled, otherwise it -// will be sunk(i.e. not unrolled) +// Max loop count of for statement, when loop count is less then this value, the for loop will be unrolled, otherwise it +// will be sunk(i.e. not unrolled) // NOTE: Since when the for loop was unrolled, it depends backend operators `tuple_getitem` and `scalar_add` which were // not implemented, so here set MAX_FOR_LOOP_COUNT to int64_t max limit to override default value `600`. This will make // the for loop will always be unrolled, but don't worry about the memory were exhausted, an exception will be raised @@ -97,7 +97,7 @@ class Parser { FuncGraphPtr func_graph() const { return func_graph_; } ParseStatusCode errcode() const { return errcode_; } std::shared_ptr ast() const { return ast_; } - // get location info from the ast node + // Get location info from the ast node LocationPtr GetLocation(const py::object &node) const; static void InitParserEnvironment(const py::object &obj); static void CleanParserResource(); @@ -105,114 +105,118 @@ class Parser { static void UpdateTopFuncGraph(const FuncGraphPtr &func_graph); private: - // process the stmt node method list + // Process the stmt node method list FunctionBlockPtr ParseReturn(const FunctionBlockPtr &block, const py::object &node); - // parse expression + // Parse expression FunctionBlockPtr ParseExpr(const FunctionBlockPtr &block, const py::object &node); - // process a if statement + // Process a if statement FunctionBlockPtr ParseIf(const FunctionBlockPtr &block, const py::object &node); - // process a while statement + // Process a while statement FunctionBlockPtr ParseWhile(const FunctionBlockPtr &block, const py::object &node); - // process a for statement + // Process a for statement FunctionBlockPtr ParseFor(const FunctionBlockPtr &block, const py::object &node); FunctionBlockPtr ParseForIter(const FunctionBlockPtr &block, const py::object &node); FunctionBlockPtr ParseForLoop(const FunctionBlockPtr &block, const py::object &node); - // process a function def statement + // Process a function def statement FunctionBlockPtr ParseFunctionDef(const FunctionBlockPtr &block, const py::object &node); - // process a augment assign + // Process a augment assign FunctionBlockPtr ParseAugAssign(const FunctionBlockPtr &block, const py::object &node); - // process a global declaration + // Process a global declaration FunctionBlockPtr ParseGlobal(const FunctionBlockPtr &block, const py::object &node); - // process assign statement + // Process assign statement FunctionBlockPtr ParseAssign(const FunctionBlockPtr &block, const py::object &node); - // process break statement + // Process break statement FunctionBlockPtr ParseBreak(const FunctionBlockPtr &block, const py::object &node); - // process continue statement + // Process continue statement FunctionBlockPtr ParseContinue(const FunctionBlockPtr &block, const py::object &node); - // process pass statement + // Process pass statement FunctionBlockPtr ParsePass(const FunctionBlockPtr &block, const py::object &node); - // process the expr and slice node method list + + // Process the expr and slice node method list AnfNodePtr ParseBinOp(const FunctionBlockPtr &block, const py::object &node); - // process a variable name + // Process a variable name AnfNodePtr ParseName(const FunctionBlockPtr &block, const py::object &node); - // process NoneType + // Process NoneType AnfNodePtr ParseNone(const FunctionBlockPtr &block, const py::object &node); - // process Ellipsis + // Process Ellipsis AnfNodePtr ParseEllipsis(const FunctionBlockPtr &block, const py::object &node); - // process a integer or float number + // Process a integer or float number AnfNodePtr ParseNum(const FunctionBlockPtr &block, const py::object &node); - // process a string variable + // Process a string variable AnfNodePtr ParseStr(const FunctionBlockPtr &block, const py::object &node); - // process a Constant + // Process a Constant AnfNodePtr ParseConstant(const FunctionBlockPtr &block, const py::object &node); - // process a name + // Process a name AnfNodePtr ParseNameConstant(const FunctionBlockPtr &block, const py::object &node); - // process a function call + // Process a function call AnfNodePtr ParseCall(const FunctionBlockPtr &block, const py::object &node); - // process function 'super' + // Process function 'super' AnfNodePtr ParseSuper(const FunctionBlockPtr &block, const py::list &args); - // process the if expression + // Process the if expression AnfNodePtr ParseIfExp(const FunctionBlockPtr &block, const py::object &node); - // process class type define + // Process class type define AnfNodePtr ParseAttribute(const FunctionBlockPtr &block, const py::object &node); - // process a compare expression + // Process a compare expression AnfNodePtr ParseCompare(const FunctionBlockPtr &block, const py::object &node); - // process a bool operation + // Process a bool operation AnfNodePtr ParseBoolOp(const FunctionBlockPtr &block, const py::object &node); - // process a lambda operation + // Process a lambda operation AnfNodePtr ParseLambda(const FunctionBlockPtr &block, const py::object &node); - // process a tuple + // Process a tuple AnfNodePtr ParseTuple(const FunctionBlockPtr &block, const py::object &node); - // process a tuple + // Process a tuple AnfNodePtr ParseList(const FunctionBlockPtr &block, const py::object &node); - // process a tuple + // Process a tuple AnfNodePtr ParseSubscript(const FunctionBlockPtr &block, const py::object &node); - // process a slice + // Process a slice AnfNodePtr ParseSlice(const FunctionBlockPtr &block, const py::object &node); - - // process a extslice + // Process a extslice AnfNodePtr ParseExtSlice(const FunctionBlockPtr &block, const py::object &node); - - // process a tuple + // Process a tuple AnfNodePtr ParseIndex(const FunctionBlockPtr &block, const py::object &node); - - // process a unaryop + // Process a unaryop AnfNodePtr ParseUnaryOp(const FunctionBlockPtr &block, const py::object &node); - - // process a dict ast node expression + // Process a dict ast node expression AnfNodePtr ParseDict(const FunctionBlockPtr &block, const py::object &node); - // generate argument nodes for ast function node + // Process ListComp expression + AnfNodePtr ParseListComp(const FunctionBlockPtr &block, const py::object &node); + FunctionBlockPtr ParseListCompIter(const FunctionBlockPtr &block, const py::object &node, + const py::object &generator_node); + AnfNodePtr ParseListCompIfs(const FunctionBlockPtr &list_body_block, const ParameterPtr &list_param, + const py::object &node, const py::object &generator_node); + + // Generate argument nodes for ast function node void GenerateArgsNodeForFunction(const FunctionBlockPtr &block, const py::object &function_node); - // generate argument default value for ast function node + // Generate argument default value for ast function node void GenerateArgsDefaultValueForFunction(const FunctionBlockPtr &block, const py::object &function_node); - // parse ast function node + // Parse ast function node FunctionBlockPtr ParseFunction(const py::object &function_node, const FunctionBlockPtr &block = nullptr); - // parse ast statements + // Parse ast statements FunctionBlockPtr ParseStatements(FunctionBlockPtr block, const py::object &stmt_node); - // parse one ast statement node + // Parse one ast statement node FunctionBlockPtr ParseStatement(const FunctionBlockPtr &block, const py::object &node); - // parse an ast expression node + // Parse an ast expression node AnfNodePtr ParseExprNode(const FunctionBlockPtr &block, const py::object &node); void MakeConditionBlocks(const FunctionBlockPtr &block, const FunctionBlockPtr &trueBlock, const FunctionBlockPtr &falseBlock); void RemoveUnnecessaryPhis(); - // write a new var + // Write a new var void WriteAssignVars(const FunctionBlockPtr &block, const py::object &targ, const AnfNodePtr &value_node); - // assign value to single variable name + // Assign value to single variable name void HandleAssignName(const FunctionBlockPtr &block, const py::object &targ, const AnfNodePtr &assigned_node); - // assign value to tuple + // Assign value to tuple void HandleAssignTuple(const FunctionBlockPtr &block, const py::object &targ, const AnfNodePtr &assigned_node); - // assign value to class member + // Assign value to class member void HandleAssignClassMember(const FunctionBlockPtr &block, const py::object &targ, const AnfNodePtr &assigned_node); - // assign value to subscript + // Assign value to subscript void HandleAssignSubscript(const FunctionBlockPtr &block, const py::object &targ, const AnfNodePtr &assigned_node); - // process a bool operation value list + // Process a bool operation value list AnfNodePtr ProcessBoolOpValueList(const FunctionBlockPtr &block, const py::list &value_list, AstSubType mode); CNodePtr GenerateIteratorInFor(const FunctionBlockPtr &block, const pybind11::object &node, @@ -221,7 +225,7 @@ class Parser { CNodePtr GenerateCondInFor(const ParameterPtr &iter_param, const FunctionBlockPtr &header_block, const AnfNodePtr &op_hasnext); - FunctionBlockPtr GenerateBlockInFor(const TraceInfoPtr &trace_info); + FunctionBlockPtr GenerateBlock(const TraceInfoPtr &trace_info); bool ParseKeywordsInCall(const FunctionBlockPtr &block, const py::object &node, std::vector *packed_arguments); @@ -249,27 +253,27 @@ class Parser { func_block_list_.push_back(block); return block; } - // return a make tuple for input elements list + // Return a make tuple for input elements list AnfNodePtr GenerateMakeTuple(const FunctionBlockPtr &block, const std::vector &element_nodes); int64_t GetForTransToWhileLoop(); - // shared_ptr will be hold by GraphManager, so just hold a weak ref here. + // The shared_ptr will be hold by GraphManager, so just hold a weak ref here. static FuncGraphWeakPtr top_func_graph_; // Python function id, used to indicate whether two CNodes come from the same Python function const std::shared_ptr &ast_; FuncGraphPtr func_graph_; - // error code setwhen parsing ast tree + // Error code setwhen parsing ast tree ParseStatusCode errcode_; - // hold all reference for FunctionBlock in this round of parsing, + // Hold all reference for FunctionBlock in this round of parsing, // so in FunctionBlock class we can use FunctionBlock* in member // pre_blocks_ and jumps_ to break reference cycle. std::vector func_block_list_; using pStmtFunc = FunctionBlockPtr (Parser::*)(const FunctionBlockPtr &block, const py::object &node); using pExprFunc = AnfNodePtr (Parser::*)(const FunctionBlockPtr &block, const py::object &node); - // define the function map to parse ast Statement + // Define the function map to parse ast Statement std::map stmt_method_map_; - // define the function map to parse ast expression + // Define the function map to parse ast expression std::map expr_method_map_; // Save current loops to support 'continue', 'break' statement. std::stack loops_; @@ -350,10 +354,10 @@ class ParseAst { bool IsClassMember(const py::object &node); private: - // save obj,eg: class instance or function + // Save obj,eg: class instance or function py::object obj_; - // function or class method. + // Function or class method. py::function function_; py::object ast_tree_; @@ -369,7 +373,7 @@ class ParseAst { int64_t function_line_offset_; }; -// update the graph flags +// Update the graph flags bool UpdateFuncGraphFlags(const py::object &obj, const FuncGraphPtr &func_graph); AnfNodePtr GetMixedPrecisionCastHelp(const FuncGraphPtr &func_graph, const AnfNodePtr ¶m); diff --git a/mindspore/ccsrc/pipeline/jit/parse/resolve.cc b/mindspore/ccsrc/pipeline/jit/parse/resolve.cc index 00c583467ce..cc636afbc9c 100644 --- a/mindspore/ccsrc/pipeline/jit/parse/resolve.cc +++ b/mindspore/ccsrc/pipeline/jit/parse/resolve.cc @@ -28,6 +28,7 @@ #include "frontend/operator/ops.h" #include "frontend/optimizer/opt.h" #include "frontend/optimizer/irpass.h" +#include "frontend/optimizer/irpass/symbol_resolver.h" namespace mindspore { namespace parse { @@ -306,7 +307,7 @@ AnfNodePtr ResolveSymbol(const FuncGraphManagerPtr &manager, const NameSpacePtr } AnfNodePtr ResolveCellwithAttr(const FuncGraphManagerPtr &manager, const NameSpacePtr &name_space, - const SymbolPtr &symbol, const AnfNodePtr &node, const std::string &attr) { + const SymbolPtr &symbol, const AnfNodePtr &node, const AnfNodePtr &attr) { MS_EXCEPTION_IF_NULL(node); TraceGuard trace_guard(std::make_shared(node->debug_info())); if (node->func_graph() == nullptr || manager == nullptr) { @@ -319,14 +320,19 @@ AnfNodePtr ResolveCellwithAttr(const FuncGraphManagerPtr &manager, const NameSpa py::object obj = symbol_resolver.result(); if (!data_converter::IsCellInstance(obj)) { - return nullptr; + AnfNodePtr resolved_node = ResolveObjectAndAddToManager(manager, obj, node); + AnfNodePtrList inputs = {NewValueNode(prim::kPrimGetAttr), resolved_node, attr}; + AnfNodePtr res_node = node->func_graph()->NewCNode(inputs); + TraceManager::ClearParseOrResolveDebugInfo(); + return res_node; } const std::string fn = PYTHON_MOD_GET_MEMBER_NAMESPACE_SYMBOL; const std::string module = "mindspore._extends.parse.parser"; py::object namespace_obj = parse::python_adapter::GetPyFn(module, fn)(obj); auto new_namespace = std::make_shared(RESOLVE_NAMESPACE_NAME_CLASS_MEMBER, namespace_obj); - auto new_symbol = std::make_shared(attr); + std::string attr_as_string = GetValueNode(attr)->value(); + auto new_symbol = std::make_shared(attr_as_string); AnfNodePtrList inputs = {NewValueNode(prim::kPrimResolve), NewValueNode(new_namespace), NewValueNode(new_symbol)}; AnfNodePtr resolved_node = node->func_graph()->NewCNode(inputs); @@ -336,11 +342,11 @@ AnfNodePtr ResolveCellwithAttr(const FuncGraphManagerPtr &manager, const NameSpa namespace { opt::OptPassGroupMap GetOptResolvePasses(const opt::irpass::ResolveIRPassLib &irpass) { + // For resolve and getattr primitive. opt::OptPassGroupMap map({ {"resolve", { - // For resolve and getattr primitive; - irpass.resolver_resolve_and_getattr_, + irpass.resolver_getattr_resolve_, }}, }); return map; diff --git a/mindspore/ccsrc/pipeline/jit/parse/resolve.h b/mindspore/ccsrc/pipeline/jit/parse/resolve.h index ad8bdc27454..bfc0e818b41 100644 --- a/mindspore/ccsrc/pipeline/jit/parse/resolve.h +++ b/mindspore/ccsrc/pipeline/jit/parse/resolve.h @@ -147,7 +147,7 @@ AnfNodePtr ResolveSymbol(const FuncGraphManagerPtr &manager, const NameSpacePtr // Resolve Cell with attr name. AnfNodePtr ResolveCellwithAttr(const FuncGraphManagerPtr &manager, const NameSpacePtr &name_space, - const SymbolPtr &symbol, const AnfNodePtr &node, const std::string &attr); + const SymbolPtr &symbol, const AnfNodePtr &node, const AnfNodePtr &attr); // Resolve one graph which normally is the root graph. FuncGraph shall be managed by res->manager(). bool ResolveFuncGraph(const FuncGraphPtr &func_graph, const pipeline::ResourceBasePtr &res, bool use_profile = true); diff --git a/mindspore/ccsrc/pipeline/jit/pass.cc b/mindspore/ccsrc/pipeline/jit/pass.cc index 2e6e4292809..43bde4e9cd5 100644 --- a/mindspore/ccsrc/pipeline/jit/pass.cc +++ b/mindspore/ccsrc/pipeline/jit/pass.cc @@ -239,7 +239,12 @@ opt::OptPassConfig GetOptPassA1(const opt::irpass::OptimizeIRPassLib &irpass) { // Safe inlining irpass.inline_, - irpass.updatestate_eliminater_, + irpass.updatestate_depend_eliminater_, + irpass.updatestate_assign_eliminater_, + irpass.updatestate_maketuple_eliminater_, + irpass.updatestate_only_used_node_eliminater_, + irpass.updatestate_loads_eliminater_, + irpass.updatestate_pure_node_eliminater_, irpass.load_eliminater_, irpass.stopgrad_eliminater_, irpass.partial_eliminate_, @@ -273,7 +278,12 @@ opt::OptPassConfig GetOptPassA1(const opt::irpass::OptimizeIRPassLib &irpass) { // Safe inlining irpass.inline_, - irpass.updatestate_eliminater_, + irpass.updatestate_depend_eliminater_, + irpass.updatestate_assign_eliminater_, + irpass.updatestate_maketuple_eliminater_, + irpass.updatestate_only_used_node_eliminater_, + irpass.updatestate_loads_eliminater_, + irpass.updatestate_pure_node_eliminater_, irpass.load_eliminater_, irpass.stopgrad_eliminater_, irpass.sparse_tensor_eliminate_, @@ -357,7 +367,12 @@ OptPassGroupMap GetOptPassesAfterCconv(const opt::irpass::OptimizeIRPassLib &irp opt::OptPassConfig c_1 = opt::OptPassConfig({ // Safe inlining, irpass.inline_, - irpass.updatestate_eliminater_, + irpass.updatestate_depend_eliminater_, + irpass.updatestate_assign_eliminater_, + irpass.updatestate_maketuple_eliminater_, + irpass.updatestate_only_used_node_eliminater_, + irpass.updatestate_loads_eliminater_, + irpass.updatestate_pure_node_eliminater_, irpass.load_eliminater_, irpass.switch_call_monad_eliminater_, irpass.stopgrad_eliminater_, @@ -394,7 +409,12 @@ OptPassGroupMap GetOptPassesB(const opt::irpass::OptimizeIRPassLib &irpass) { irpass.float_tuple_getitem_switch_, irpass.reset_defer_inline_, irpass.inline_, - irpass.updatestate_eliminater_, + irpass.updatestate_depend_eliminater_, + irpass.updatestate_assign_eliminater_, + irpass.updatestate_maketuple_eliminater_, + irpass.updatestate_only_used_node_eliminater_, + irpass.updatestate_loads_eliminater_, + irpass.updatestate_pure_node_eliminater_, irpass.load_eliminater_, irpass.stopgrad_eliminater_, irpass.special_op_eliminate_, @@ -663,10 +683,35 @@ bool AutoMonadElimOptPass(const FuncGraphPtr &func_graph) { res->set_manager(func_graph->manager()); // opt::irpass::OptimizeIRPassLib is not used here to avoid double free problems in external calls. - opt::SubstitutionPtr updatestate_eliminater = opt::MakeSubstitution( - std::make_shared(), "updatestate_eliminater", prim::kPrimUpdateState); + opt::SubstitutionPtr updatestate_depend_eliminater = + opt::MakeSubstitution(std::make_shared(), "updatestate_depend_eliminater", + prim::kPrimUpdateState); + opt::SubstitutionPtr updatestate_assign_eliminater = + opt::MakeSubstitution(std::make_shared(), "updatestate_assign_eliminater", + prim::kPrimUpdateState); + opt::SubstitutionPtr updatestate_maketuple_eliminater = + opt::MakeSubstitution(std::make_shared(), + "updatestate_maketuple_eliminater", prim::kPrimUpdateState); + opt::SubstitutionPtr updatestate_only_used_node_eliminater = + opt::MakeSubstitution(std::make_shared(), + "updatestate_only_used_node_eliminater", prim::kPrimUpdateState); + opt::SubstitutionPtr updatestate_loads_eliminater = + opt::MakeSubstitution(std::make_shared(), "updatestate_loads_eliminater", + prim::kPrimUpdateState); + opt::SubstitutionPtr updatestate_pure_node_eliminater = + opt::MakeSubstitution(std::make_shared(), + "updatestate_pure_node_eliminater", prim::kPrimUpdateState); + + opt::OptPassConfig updatestate_eliminater = opt::OptPassConfig({ + updatestate_depend_eliminater, + updatestate_assign_eliminater, + updatestate_maketuple_eliminater, + updatestate_only_used_node_eliminater, + updatestate_loads_eliminater, + updatestate_pure_node_eliminater, + }); opt::OptPassGroupMap elim_map({ - {"updatestate_eliminate", opt::OptPassConfig({updatestate_eliminater})}, + {"updatestate_eliminater", updatestate_eliminater}, {"auto_monad_eliminator", opt::OptPassConfig(opt::AutoMonadEliminator())}, }); diff --git a/mindspore/ccsrc/pipeline/jit/pipeline.cc b/mindspore/ccsrc/pipeline/jit/pipeline.cc index 87f194a304e..6d5cff6cc18 100644 --- a/mindspore/ccsrc/pipeline/jit/pipeline.cc +++ b/mindspore/ccsrc/pipeline/jit/pipeline.cc @@ -206,7 +206,8 @@ void CacheFuncGraph(const ResourcePtr &resource) { ChangeFileMode(realpath.value(), S_IRWXU); std::ofstream fout(realpath.value()); if (!fout.is_open()) { - MS_LOG(EXCEPTION) << "Open cache file '" << realpath.value() << "' failed!"; + MS_LOG(EXCEPTION) << "Open cache file '" << realpath.value() << "' failed!" + << " Errno:" << errno << " ErrInfo:" << strerror(errno); } FuncGraphPtr fg = resource->func_graph(); mind_ir::ModelProto fg_model = GetBinaryProto(fg, true); @@ -707,6 +708,7 @@ bool ExecutorPy::CompileInner(const py::object &obj, const py::tuple &args, cons SaveCompiledGraph(phase_s); opt::python_pass::PyPassManager::GetInstance()->ClearPipelineRes(); + abstract::AnalysisContext::ClearContext(); // Reclaim all resource used by optimizer; ReclaimOptimizer(); resource->Clean(); @@ -1336,6 +1338,7 @@ void ClearResAtexit() { ReleaseGeTsd(); parse::python_adapter::ResetPythonScope(); abstract::AnalysisResultCacheMgr::GetInstance().Clear(); + abstract::AnalysisContext::ClearContext(); #ifdef ENABLE_DEBUGGER Debugger::GetInstance()->Reset(); #endif diff --git a/mindspore/ccsrc/pipeline/jit/resource.h b/mindspore/ccsrc/pipeline/jit/resource.h index 8981d825acf..f31bf37376c 100644 --- a/mindspore/ccsrc/pipeline/jit/resource.h +++ b/mindspore/ccsrc/pipeline/jit/resource.h @@ -79,6 +79,8 @@ class Resource : public ResourceBase { gpu_loopsink_flag_ = flag; gpu_loopsink_size_ = size; } + void set_is_load(bool flag) { is_load_ = flag; } + bool is_load() { return is_load_; } bool gpu_loopsink_flag() { return gpu_loopsink_flag_; } int64_t gpu_loopsink_size() { return gpu_loopsink_size_; } // Reclaim resource and clear the cache. @@ -93,6 +95,8 @@ class Resource : public ResourceBase { py::object input_; bool is_cleaned_; bool gpu_loopsink_flag_{false}; + // The func_graph_ is loaded from mindir + bool is_load_{false}; int64_t gpu_loopsink_size_{1}; }; diff --git a/mindspore/ccsrc/pipeline/jit/static_analysis/order_enforce.cc b/mindspore/ccsrc/pipeline/jit/static_analysis/order_enforce.cc index 922a085a440..b13061ee351 100644 --- a/mindspore/ccsrc/pipeline/jit/static_analysis/order_enforce.cc +++ b/mindspore/ccsrc/pipeline/jit/static_analysis/order_enforce.cc @@ -161,9 +161,17 @@ class OrderEnforcer { auto update_state = FindLastUpdateState(maketuple); if (update_state != nullptr) { std::unordered_set maketuple_users = GetSpecialOperatorRealUsers(maketuple); + std::unordered_set no_push_maketuple_users; + // Push and Pull at the end of the execution order, + // In order to ensure push and pull operator cut into the same graph, do not put push operator into updatestate + for (auto maketuple_user : maketuple_users) { + if (!IsPrimitiveCNode(maketuple_user, prim::kPrimPush)) { + no_push_maketuple_users.insert(maketuple_user); + } + } auto update_state_cnode = update_state->cast(); MS_EXCEPTION_IF_NULL(update_state_cnode); - AddInputEdges(update_state_cnode, maketuple_users); + AddInputEdges(update_state_cnode, no_push_maketuple_users); } } } @@ -207,7 +215,7 @@ class OrderEnforcer { if (!IsPrimitiveCNode(last_input, prim::kPrimUpdateState)) { return; } - const std::set special_operators = {prim::kPrimExpandDims}; + const std::set special_operators = {prim::kPrimExpandDims, prim::kPrimBatchNormGrad}; for (size_t i = 1; i < inputs.size(); ++i) { auto &input = inputs.at(i); if (!IsRef(input)) { diff --git a/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc b/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc index c5e7dec5f37..8ffa2642bc6 100644 --- a/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc +++ b/mindspore/ccsrc/pipeline/pynative/pynative_execute.cc @@ -2309,6 +2309,7 @@ void GradExecutor::GradNetInner(py::object *ret, const prim::GradOperationPtr &g MS_LOG(DEBUG) << "Start update top cell info when run finish"; UpdateTopCellInfo(false, false, true); resource->Clean(); + abstract::AnalysisContext::ClearContext(); } std::vector GradExecutor::GetWeightsArgs(const py::object &weights, const FuncGraphPtr &df_builder) { diff --git a/mindspore/ccsrc/profiler/device/data_saver.cc b/mindspore/ccsrc/profiler/device/data_saver.cc index 17742d0a7e7..c1e6ef01783 100644 --- a/mindspore/ccsrc/profiler/device/data_saver.cc +++ b/mindspore/ccsrc/profiler/device/data_saver.cc @@ -17,7 +17,6 @@ #include #include #include "sys/stat.h" -#include "utils/log_adapter.h" #include "utils/ms_utils.h" #include "utils/ms_context.h" @@ -31,6 +30,10 @@ OpDetailInfo::OpDetailInfo(const std::shared_ptr op_info, float proporti auto op_type_end_iter = op_full_name_.rfind('-'); op_type_ = op_full_name_.substr(op_type_begin_iter, op_type_end_iter - op_type_begin_iter); op_name_ = op_full_name_.substr(op_type_begin_iter); + if (op_info->op_count == 0) { + MS_LOG(ERROR) << "The num of operations can not be 0."; + return; + } op_avg_time_ = op_info->op_host_cost_time / op_info->op_count; } @@ -39,6 +42,10 @@ void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) { float total_time_sum = GetTotalOpTime(op_info_maps); for (auto item : op_info_maps) { op_timestamps_map_[item.first] = item.second.start_duration; + if (total_time_sum == 0.0) { + MS_LOG(ERROR) << "The total operation times can not be 0."; + return; + } float proportion = item.second.op_host_cost_time / total_time_sum; auto op_info = std::make_shared(item.second); if (op_info == nullptr) { @@ -52,6 +59,10 @@ void DataSaver::ParseOpInfo(const OpInfoMap &op_info_maps) { // update average time of op type for (auto &op_type : op_type_infos_) { // device_infos: + if (op_type.second.count_ == 0) { + MS_LOG(ERROR) << "The num of operation type can not be 0."; + return; + } op_type.second.avg_time_ = op_type.second.total_time_ / op_type.second.count_; } MS_LOG(DEBUG) << "Get " << op_detail_infos_.size() << " operation items."; diff --git a/mindspore/ccsrc/profiler/device/data_saver.h b/mindspore/ccsrc/profiler/device/data_saver.h index 13c3ab80227..759a85b04de 100644 --- a/mindspore/ccsrc/profiler/device/data_saver.h +++ b/mindspore/ccsrc/profiler/device/data_saver.h @@ -23,6 +23,7 @@ #include #include #include "profiler/device/profiling.h" +#include "utils/log_adapter.h" namespace mindspore { namespace profiler { struct OpDetailInfo { @@ -73,6 +74,14 @@ struct OpType { std::string GetGpuHeader() const { return "op_type,type_occurrences,total_time(us),total_proportion,avg_time(us)"; } void OutputCpuOpTypeInfo(std::ostream &os) const { + if (step_ == 0) { + MS_LOG(ERROR) << "The run step can not be 0."; + return; + } + if (count_ == 0) { + MS_LOG(ERROR) << "The num of operation type can not be 0."; + return; + } os << op_type_ << ',' << count_ << ',' << count_ / step_ << ',' << total_time_ << ',' << total_time_ / count_ << ',' << proportion_ << std::endl; } diff --git a/mindspore/ccsrc/profiler/device/gpu/gpu_data_saver.cc b/mindspore/ccsrc/profiler/device/gpu/gpu_data_saver.cc index b939ab36bee..3300b2e4925 100644 --- a/mindspore/ccsrc/profiler/device/gpu/gpu_data_saver.cc +++ b/mindspore/ccsrc/profiler/device/gpu/gpu_data_saver.cc @@ -68,6 +68,10 @@ void GpuDataSaver::ParseEvent(const std::vector &events) { for (auto &device_infos : activity_infos_) { // device_infos: for (auto &activity_info : device_infos.second) { + if (activity_info.second.count_ == 0) { + MS_LOG(ERROR) << "The num of operations can not be 0."; + return; + } // activity_info: activity_info.second.avg_duration_ = activity_info.second.total_duration_ / activity_info.second.count_; } diff --git a/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc b/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc index ffa1d513c1d..3553e18bf01 100644 --- a/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc +++ b/mindspore/ccsrc/profiler/device/gpu/gpu_profiling.cc @@ -339,6 +339,10 @@ void GPUProfiler::OpsParser() { std::sort(order_vec.begin(), order_vec.end(), cmp_func); for (auto iter = order_vec.begin(); iter != order_vec.end(); iter++) { + if (iter->second.op_count == 0) { + MS_LOG(ERROR) << "The num of operations can not be 0."; + return; + } MS_LOG(DEBUG) << "GPU_profiler" << "," << iter->first << "," << iter->second.op_count << "," << iter->second.op_kernel_count << "," << iter->second.op_kernel_api_count << "," diff --git a/mindspore/ccsrc/ps/core/communicator/http_communicator.cc b/mindspore/ccsrc/ps/core/communicator/http_communicator.cc index fccb4ab4d1e..a8f376e97a5 100644 --- a/mindspore/ccsrc/ps/core/communicator/http_communicator.cc +++ b/mindspore/ccsrc/ps/core/communicator/http_communicator.cc @@ -42,9 +42,12 @@ bool HttpCommunicator::Start() { bool HttpCommunicator::Stop() { MS_EXCEPTION_IF_NULL(http_server_); - bool res = http_server_->Stop(); + if (!http_server_->Stop()) { + MS_LOG(ERROR) << "Stopping http server failed."; + return false; + } running_ = false; - return res; + return true; } void HttpCommunicator::RegisterMsgCallBack(const std::string &msg_type, const MessageCallback &cb) { @@ -60,6 +63,7 @@ void HttpCommunicator::RegisterMsgCallBack(const std::string &msg_type, const Me std::string url = "/"; url += msg_type; + MS_EXCEPTION_IF_NULL(http_server_); bool is_succeed = http_server_->RegisterRoute(url, &http_msg_callbacks_[msg_type]); if (!is_succeed) { MS_LOG(EXCEPTION) << "Http server register handler for url " << url << " failed."; diff --git a/mindspore/ccsrc/ps/core/communicator/tcp_communicator.cc b/mindspore/ccsrc/ps/core/communicator/tcp_communicator.cc index 468dcf9f3ce..e8378c4c74b 100644 --- a/mindspore/ccsrc/ps/core/communicator/tcp_communicator.cc +++ b/mindspore/ccsrc/ps/core/communicator/tcp_communicator.cc @@ -57,7 +57,10 @@ bool TcpCommunicator::Start() { std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4); server_node_->set_handler(tcp_msg_callback_); - server_node_->Start(); + if (!server_node_->Start()) { + MS_LOG(EXCEPTION) << "Starting server node failed."; + return false; + } running_ = true; running_thread_ = std::thread([&]() { while (running_) { @@ -69,8 +72,14 @@ bool TcpCommunicator::Start() { bool TcpCommunicator::Stop() { MS_EXCEPTION_IF_NULL(server_node_); - server_node_->Finish(); - server_node_->Stop(); + if (!server_node_->Finish()) { + MS_LOG(ERROR) << "Finishing server node failed."; + return false; + } + if (!server_node_->Stop()) { + MS_LOG(ERROR) << "Stopping server node failed."; + return false; + } running_ = false; return true; } @@ -81,6 +90,7 @@ void TcpCommunicator::RegisterMsgCallBack(const std::string &msg_type, const Mes } void TcpCommunicator::RegisterEventCallback(const core::ClusterEvent &event, const EventCallback &event_cb) { + MS_EXCEPTION_IF_NULL(server_node_); server_node_->RegisterEventCallback(event, event_cb); } diff --git a/mindspore/ccsrc/ps/core/communicator/tcp_communicator.h b/mindspore/ccsrc/ps/core/communicator/tcp_communicator.h index 784397165b6..f6ef04fc5e2 100644 --- a/mindspore/ccsrc/ps/core/communicator/tcp_communicator.h +++ b/mindspore/ccsrc/ps/core/communicator/tcp_communicator.h @@ -52,7 +52,6 @@ enum class TcpUserCommand { kPrepareForNextIter, kProceedToNextIter, kEndLastIter, - kStartFLJob, kUpdateModel, kGetModel @@ -102,6 +101,7 @@ class TcpCommunicator : public CommunicatorBase { std::shared_ptr> *output = nullptr) { const std::string &msg_str = pb_msg.SerializeAsString(); std::shared_ptr msg(new unsigned char[msg_str.size()]); + MS_ERROR_IF_NULL_W_RET_VAL(msg, false); size_t dest_size = msg_str.size(); size_t src_size = msg_str.size(); auto ret = memcpy_s(msg.get(), dest_size, msg_str.c_str(), src_size); diff --git a/mindspore/ccsrc/ps/optimizer_info.cc b/mindspore/ccsrc/ps/optimizer_info.cc index fc8ba289283..5bb8019cb52 100644 --- a/mindspore/ccsrc/ps/optimizer_info.cc +++ b/mindspore/ccsrc/ps/optimizer_info.cc @@ -23,7 +23,10 @@ namespace mindspore { namespace ps { -void OptimizerInfo::AddWorkspace(const AddressPtr &workspace) { workspaces_.push_back(workspace); } +void OptimizerInfo::AddWorkspace(const AddressPtr &workspace) { + MS_EXCEPTION_IF_NULL(workspace); + workspaces_.push_back(workspace); +} const std::vector &OptimizerInfo::inputs() const { return inputs_; } @@ -42,6 +45,7 @@ size_t OptimizerInfo::indices_index() { return 0; } template void OptimizerInfo::UpdateOptimInputValue(const std::string &optim_type, const std::string &input_name, void *data, const Lengths &lens) { + MS_EXCEPTION_IF_NULL(data); if (kOptimToOriginIdx.count(optim_type) == 0 || kOptimToPSSendIdx.count(optim_type) == 0) { MS_LOG(EXCEPTION) << "Optimizer type " << optim_type << " in not supported."; } @@ -96,8 +100,8 @@ void DenseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) { void DenseOptimInfo::ComputeMean(const std::vector> &, size_t n, size_t, size_t) { if (n > 1) { + MS_EXCEPTION_IF_NULL(gradient()->addr); float *accum_grad_data = reinterpret_cast(gradient()->addr); - MS_EXCEPTION_IF_NULL(accum_grad_data); size_t size = gradient()->size / sizeof(float); for (size_t i = 0; i < size; i++) { accum_grad_data[i] /= n; @@ -116,8 +120,8 @@ void DenseOptimInfo::Reset() { void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) { // Append grad data to the end + MS_EXCEPTION_IF_NULL(gradient()->addr); float *accum_grad_data = reinterpret_cast(gradient()->addr); - MS_EXCEPTION_IF_NULL(accum_grad_data); size_t grad_index = this->grad_index(); size_t grad_offset = 0; @@ -143,6 +147,7 @@ void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) { gradient()->size += incr_grad_size; // Append indice data to the end + MS_EXCEPTION_IF_NULL(indices()->addr); int *accum_indices_data = reinterpret_cast(indices()->addr); MS_EXCEPTION_IF_NULL(accum_indices_data); @@ -153,10 +158,10 @@ void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) { } void *incr_indice_data_temp = const_cast(values.data()) + indice_offset; - int *incr_indice_data = reinterpret_cast(incr_indice_data_temp); - MS_EXCEPTION_IF_NULL(incr_indice_data_temp); + int *incr_indice_data = reinterpret_cast(incr_indice_data_temp); MS_EXCEPTION_IF_NULL(incr_indice_data); + size_t incr_indice_size = lengths[indices_index]; size_t incr_indice_data_size = incr_indice_size * sizeof(int); dst_size = incr_indice_data_size; @@ -176,8 +181,9 @@ void SparseOptimInfo::Accumulate(const Values &values, const Lengths &lengths) { void SparseOptimInfo::ComputeMean(const std::vector> &shapes, size_t n, size_t server_num, size_t rank_id) { - MS_EXCEPTION_IF_NULL(gradient()); - MS_EXCEPTION_IF_NULL(indices()); + if (n == 0 || indices()->size == 0) { + MS_LOG(EXCEPTION) << "The size of shapes or indices are 0."; + } size_t indices_size = static_cast(indices()->size / sizeof(int)); size_t segment_size = gradient()->size / indices()->size; @@ -259,6 +265,11 @@ void SparseOptimInfo::Reset() { MomentumOptimInfo::MomentumOptimInfo(const AddressPtr &weight, const AddressPtr &accumulate, const AddressPtr &learning_rate, const AddressPtr &gradient, const AddressPtr &momentum) { + MS_EXCEPTION_IF_NULL(weight); + MS_EXCEPTION_IF_NULL(accumulate); + MS_EXCEPTION_IF_NULL(learning_rate); + MS_EXCEPTION_IF_NULL(gradient); + MS_EXCEPTION_IF_NULL(momentum); inputs_.push_back(weight); inputs_.push_back(accumulate); inputs_.push_back(learning_rate); @@ -275,12 +286,14 @@ const size_t SparseOptimInfo::indice_size() const { return indices_offset_; } const AddressPtr &MomentumOptimInfo::gradient() { size_t origin_grad_index = kMomentumOriginIdx.at("grad"); EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index); + MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]); return inputs_[origin_grad_index]; } const AddressPtr &MomentumOptimInfo::indices() { size_t origin_grad_index = kMomentumOriginIdx.at("grad"); EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index); + MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]); return inputs_[origin_grad_index]; } @@ -294,6 +307,17 @@ SparseAdamOptimInfo::SparseAdamOptimInfo(const AddressPtr &weight, const Address const AddressPtr &learning_rate, const AddressPtr &beta1, const AddressPtr &beta2, const AddressPtr &epsilon, const AddressPtr &grad, const AddressPtr &indices, bool sharded) { + MS_EXCEPTION_IF_NULL(weight); + MS_EXCEPTION_IF_NULL(m); + MS_EXCEPTION_IF_NULL(v); + MS_EXCEPTION_IF_NULL(beta1_power); + MS_EXCEPTION_IF_NULL(beta2_power); + MS_EXCEPTION_IF_NULL(learning_rate); + MS_EXCEPTION_IF_NULL(beta1); + MS_EXCEPTION_IF_NULL(beta2); + MS_EXCEPTION_IF_NULL(epsilon); + MS_EXCEPTION_IF_NULL(grad); + MS_EXCEPTION_IF_NULL(indices); inputs_.push_back(weight); inputs_.push_back(m); inputs_.push_back(v); @@ -322,12 +346,14 @@ void SparseAdamOptimInfo::Update(const Values &values, const Lengths &lens) { const AddressPtr &SparseAdamOptimInfo::gradient() { size_t origin_grad_index = kSparseAdamOriginIdx.at("grad"); EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index); + MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]); return inputs_[origin_grad_index]; } const AddressPtr &SparseAdamOptimInfo::indices() { size_t origin_indices_index = kSparseAdamOriginIdx.at("indices"); EXC_IF_VEC_IDX_OOB(inputs_, origin_indices_index); + MS_EXCEPTION_IF_NULL(inputs_[origin_indices_index]); return inputs_[origin_indices_index]; } @@ -345,6 +371,11 @@ size_t SparseAdamOptimInfo::indices_index() { SparseFtrlOptimInfo::SparseFtrlOptimInfo(const AddressPtr &weight, const AddressPtr &accum, const AddressPtr &linear, const AddressPtr &grad, const AddressPtr &indices, bool sharded) { + MS_EXCEPTION_IF_NULL(weight); + MS_EXCEPTION_IF_NULL(accum); + MS_EXCEPTION_IF_NULL(linear); + MS_EXCEPTION_IF_NULL(grad); + MS_EXCEPTION_IF_NULL(indices); inputs_.push_back(weight); inputs_.push_back(accum); inputs_.push_back(linear); @@ -358,12 +389,14 @@ SparseFtrlOptimInfo::SparseFtrlOptimInfo(const AddressPtr &weight, const Address const AddressPtr &SparseFtrlOptimInfo::gradient() { size_t origin_grad_index = kSparseFtrlOriginIdx.at("grad"); EXC_IF_VEC_IDX_OOB(inputs_, origin_grad_index); + MS_EXCEPTION_IF_NULL(inputs_[origin_grad_index]); return inputs_[origin_grad_index]; } const AddressPtr &SparseFtrlOptimInfo::indices() { size_t origin_indices_index = kSparseFtrlOriginIdx.at("indices"); EXC_IF_VEC_IDX_OOB(inputs_, origin_indices_index); + MS_EXCEPTION_IF_NULL(inputs_[origin_indices_index]); return inputs_[origin_indices_index]; } diff --git a/mindspore/ccsrc/ps/optimizer_info_builder.cc b/mindspore/ccsrc/ps/optimizer_info_builder.cc index 5a1f60149c7..68db3d280c0 100644 --- a/mindspore/ccsrc/ps/optimizer_info_builder.cc +++ b/mindspore/ccsrc/ps/optimizer_info_builder.cc @@ -29,6 +29,7 @@ OptimizerInfo *OptimizerInfoBuilder::Build(const std::shared_ptr const Lengths &lens, const InputsShapePtr &inputs_shape, size_t worker_num, bool sharded) { MS_EXCEPTION_IF_NULL(pserver_kernel); + MS_EXCEPTION_IF_NULL(weight); MS_EXCEPTION_IF_NULL(inputs_shape); OptimizerInfo *optim_info = BuildInputs(weight, keys, values, lens, inputs_shape, worker_num, pserver_kernel, sharded); @@ -40,6 +41,7 @@ OptimizerInfo *OptimizerInfoBuilder::Build(const std::shared_ptr } void OptimizerInfoBuilder::BuildWorkspaces(OptimizerInfo *info, const std::vector &ws_sizes, size_t) { + MS_EXCEPTION_IF_NULL(info); for (size_t i = 0; i < ws_sizes.size(); i++) { size_t size = ws_sizes[i]; AddressPtr workspace = std::make_shared(); @@ -116,6 +118,7 @@ AddressPtr OptimizerInfoBuilder::GenInputAddrPtr(const std::string &optim_type, OptimizerInfo *MomentumOptimInfoBuilder::BuildInputs(const WeightPtr &weight, const Keys &, const Values &values, const Lengths &lens, const InputsShapePtr &, size_t, const std::shared_ptr &, bool) { + MS_EXCEPTION_IF_NULL(weight); AddressPtr weight_addr = std::make_shared(); MS_EXCEPTION_IF_NULL(weight_addr); weight_addr->addr = weight->data(); diff --git a/mindspore/ccsrc/ps/ps_cache/ps_cache_manager.cc b/mindspore/ccsrc/ps/ps_cache/ps_cache_manager.cc index 4c4e97f7939..353136b83c2 100644 --- a/mindspore/ccsrc/ps/ps_cache/ps_cache_manager.cc +++ b/mindspore/ccsrc/ps/ps_cache/ps_cache_manager.cc @@ -237,9 +237,6 @@ void PsCacheManager::AllocMemForHashTable() { embedding_device_cache_->hash_swap_value_addr_ = reinterpret_cast( embedding_device_cache_->cache_->MallocMemory(max_embedding_size * batch_elements_ * sizeof(float))); MS_EXCEPTION_IF_NULL(embedding_device_cache_->hash_swap_value_addr_); - if (!(embedding_device_cache_->cache_->MallocConstantMemory(vocab_cache_size_))) { - MS_LOG(EXCEPTION) << "MallocConstantMemory failed."; - } } void PsCacheManager::SetLocalIdRank() { @@ -328,6 +325,14 @@ void PsCacheManager::ProcessDataTask(uint32_t device_id, const void *context) { MS_ERROR_IF_NULL_WO_RET_VAL(embedding_device_cache_); MS_ERROR_IF_NULL_WO_RET_VAL(embedding_device_cache_->cache_); embedding_device_cache_->cache_->InitDevice(device_id, context); + + // MallocConstantMemory need stream on device Ascend, should be called after InitDevice. + if (!(embedding_device_cache_->cache_->MallocConstantMemory(vocab_cache_size_))) { + MS_LOG(ERROR) << "MallocConstantMemory failed."; + running_ = false; + return; + } + InitParameterServer(); InitDataChannel(); while (running_) { @@ -636,6 +641,7 @@ bool PsCacheManager::ParseHostDataHostToDevice(size_t id) { bool PsCacheManager::ParseHostDataDeviceToHost() { MS_ERROR_IF_NULL(embedding_device_cache_); + MS_ERROR_IF_NULL(embedding_host_cache_); int *device_to_host_ids = embedding_device_cache_->device_to_host_ids.get(); int *device_to_host_index = embedding_host_cache_->device_to_host_index.get(); MS_ERROR_IF_NULL(device_to_host_ids); @@ -1053,6 +1059,7 @@ bool PsCacheManager::SyncHostEmbeddingTable() { bool PsCacheManager::SyncDeviceEmbeddingTable() { MS_ERROR_IF_NULL(embedding_device_cache_); + MS_ERROR_IF_NULL(embedding_device_cache_->cache_); const auto &device_hash_map = embedding_device_cache_->device_hash_map_; MS_ERROR_IF_NULL(device_hash_map); const auto &hash_id_to_index = device_hash_map->hash_id_to_index(); @@ -1105,6 +1112,8 @@ bool PsCacheManager::SyncDeviceEmbeddingTable() { } void PsCacheManager::DumpHashTables(bool dump_device_tables) const { + MS_EXCEPTION_IF_NULL(embedding_device_cache_); + MS_EXCEPTION_IF_NULL(embedding_device_cache_->cache_); for (const auto &item : hash_tables_) { const auto ¶m_name = item.first; size_t cache_vocab_size = item.second.cache_vocab_size; diff --git a/mindspore/ccsrc/ps/ps_cache/ps_data/ps_data_prefetch.cc b/mindspore/ccsrc/ps/ps_cache/ps_data/ps_data_prefetch.cc index 17df2f0ad28..eca9209af37 100644 --- a/mindspore/ccsrc/ps/ps_cache/ps_data/ps_data_prefetch.cc +++ b/mindspore/ccsrc/ps/ps_cache/ps_data/ps_data_prefetch.cc @@ -31,6 +31,7 @@ void PsDataPrefetch::CreateDataChannel(const std::string &channel_name, size_t s if (iter != ps_data_channel_map_.end()) { MS_LOG(WARNING) << "The ps data channel already exists, channel name:" << channel_name; auto channel = iter->second; + MS_ERROR_IF_NULL_WO_RET_VAL(channel); channel->set_step_num(step_num); } else { auto channel = std::make_shared(channel_name, step_num); diff --git a/mindspore/ccsrc/ps/ps_context.cc b/mindspore/ccsrc/ps/ps_context.cc index cbaeec47987..36a48183055 100644 --- a/mindspore/ccsrc/ps/ps_context.cc +++ b/mindspore/ccsrc/ps/ps_context.cc @@ -270,6 +270,7 @@ void PSContext::GenerateResetterRound() { bool is_parameter_server_mode = false; bool is_federated_learning_mode = false; bool is_mixed_training_mode = false; + bool use_pairwise_encrypt = (encrypt_type_ == kPWEncryptType); if (server_mode_ == kServerModePS) { is_parameter_server_mode = true; @@ -285,7 +286,7 @@ void PSContext::GenerateResetterRound() { binary_server_context = ((unsigned int)is_parameter_server_mode << 0) | ((unsigned int)is_federated_learning_mode << 1) | - ((unsigned int)is_mixed_training_mode << 2) | ((unsigned int)secure_aggregation_ << 3); + ((unsigned int)is_mixed_training_mode << 2) | ((unsigned int)use_pairwise_encrypt << 3); if (kServerContextToResetRoundMap.count(binary_server_context) == 0) { resetter_round_ = ResetterRound::kNoNeedToReset; } else { diff --git a/mindspore/ccsrc/ps/ps_context.h b/mindspore/ccsrc/ps/ps_context.h index ddf88d8fe05..291a7246038 100644 --- a/mindspore/ccsrc/ps/ps_context.h +++ b/mindspore/ccsrc/ps/ps_context.h @@ -44,14 +44,13 @@ constexpr char kNotEncryptType[] = "NOT_ENCRYPT"; // 0: Server is in parameter server mode. // 1: Server is in federated learning mode. // 2: Server is in mixed training mode. -// 3: Server enables sucure aggregation. -// For example: 1010 stands for that the server is in federated learning mode and sucure aggregation is enabled. +// 3: Server enables pairwise encrypt algorithm. +// For example: 1010 stands for that the server is in federated learning mode and pairwise encrypt algorithm is enabled. enum class ResetterRound { kNoNeedToReset, kUpdateModel, kReconstructSeccrets, kPushWeight }; const std::map kServerContextToResetRoundMap = {{0b0010, ResetterRound::kUpdateModel}, {0b1010, ResetterRound::kReconstructSeccrets}, {0b1100, ResetterRound::kPushWeight}, - {0b0100, ResetterRound::kPushWeight}, - {0b0100, ResetterRound::kUpdateModel}}; + {0b0100, ResetterRound::kPushWeight}}; class PSContext { public: diff --git a/mindspore/ccsrc/pybind_api/ir/param_info_py.cc b/mindspore/ccsrc/pybind_api/ir/param_info_py.cc index d59c197fc55..3a44d7a8ee1 100644 --- a/mindspore/ccsrc/pybind_api/ir/param_info_py.cc +++ b/mindspore/ccsrc/pybind_api/ir/param_info_py.cc @@ -34,6 +34,7 @@ REGISTER_PYBIND_DEFINE(ParamInfo, ([](const py::module *m) { .def_property("comm_fusion", &ParamInfo::comm_fusion, &ParamInfo::set_comm_fusion) .def_property("cache_enable", &ParamInfo::cache_enable, &ParamInfo::set_cache_enable) .def_property("cache_shape", &ParamInfo::cache_shape, &ParamInfo::set_cache_shape) + .def_property("requires_aggr", &ParamInfo::requires_aggr, &ParamInfo::set_requires_aggr) .def(py::pickle( [](const ParamInfo &p) { // __getstate__ return py::make_tuple(p.name(), p.requires_grad(), p.layerwise_parallel()); diff --git a/mindspore/ccsrc/runtime/device/CMakeLists.txt b/mindspore/ccsrc/runtime/device/CMakeLists.txt index c7e99adbbea..56cd9fe6275 100644 --- a/mindspore/ccsrc/runtime/device/CMakeLists.txt +++ b/mindspore/ccsrc/runtime/device/CMakeLists.txt @@ -42,7 +42,7 @@ if(ENABLE_MPI) if(ENABLE_GPU) set_property(SOURCE "gpu/mpi/mpi_initializer.cc" PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_DEVICE) - pybind11_add_module(_ms_mpi "gpu/mpi/mpi_initializer.cc") + pybind11_add_module(_ms_mpi NO_EXTRAS "gpu/mpi/mpi_initializer.cc") target_link_libraries(_ms_mpi PRIVATE mindspore::pybind11_module mindspore::ompi) endif() diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_bucket.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_bucket.cc index 666d79f2fc9..c7473255a40 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_bucket.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_bucket.cc @@ -124,8 +124,8 @@ void AscendBucket::LaunchAllReduce() { MS_LOG(EXCEPTION) << "allreduce input have different dtype"; } - auto iter = CONST_OP_HCOM_DATA_TYPE_MAP.find(type); - if (iter == CONST_OP_HCOM_DATA_TYPE_MAP.end()) { + auto iter = kConstOpHcomDataTypeMap.find(type); + if (iter == kConstOpHcomDataTypeMap.end()) { MS_LOG(EXCEPTION) << "unknown data type:" << type; } diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc index 9ddd0ef3f95..aa2874e022a 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_device_address.cc @@ -175,6 +175,9 @@ bool AscendDeviceAddress::SyncDeviceToHost(const ShapeVector &shape, size_t size void *host_ptr) const { MS_LOG(INFO) << "SyncDeviceToHost, Device(format:" << format_ << ", type_id:" << TypeIdLabel(type_id_) << ", size:" << size_ << "), Host(type_id:" << TypeIdLabel(type) << ", size:" << size << ")"; + if (type_id_ > kMonadTypeBegin && type_id_ < kMonadTypeEnd) { + return true; + } SyncStream(); bool sync_ok = false; std::vector host_shape; diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_event.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_event.cc index 676b311244c..eb063c54c6a 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_event.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_event.cc @@ -53,6 +53,10 @@ void AscendEvent::WaitEvent() { if (ret != RT_ERROR_NONE) { MS_LOG(EXCEPTION) << "rtStreamWaitEvent failed, ret:" << ret; } + ret = rtEventReset(event_, wait_stream_); + if (ret != RT_ERROR_NONE) { + MS_LOG(EXCEPTION) << "rtEventReset failed, ret:" << ret; + } need_wait_ = false; } diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc index 950c9aa97a2..6dad9375810 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_kernel_runtime.cc @@ -22,6 +22,7 @@ #include "utils/signal_util.h" #include "debug/data_dump/e2e_dump.h" #include "runtime/device/ascend/ascend_device_address.h" +#include "runtime/device/ascend/distribute/ascend_collective.h" #include "utils/ms_context.h" #include "utils/context/context_extends.h" #include "utils/mpi/mpi_config.h" @@ -46,7 +47,6 @@ #include "backend/optimizer/mem_reuse/mem_reuse_checker.h" #include "debug/env_config_parser.h" #endif -#include "runtime/device/ascend/executor/tiling/op_tiling_calculater.h" #include "runtime/device/ascend/executor/hccl_dynamic_kernel.h" #include "utils/config_manager.h" #include "runtime/device/ascend/profiling/reporter/op_name_task_stream_reporter.h" @@ -64,6 +64,7 @@ using mindspore::device::ascend::ProfilingManager; using mindspore::device::ascend::ProfilingUtils; using mindspore::device::ascend::tasksink::TaskGenerator; using mindspore::ge::model_runner::ModelRunner; +using HcclCollectiveGroup = mindspore::device::ascend::collective::HcclCollectiveGroup; using mindspore::kernel::tbe::TbeUtils; using std::vector; @@ -78,32 +79,17 @@ namespace mindspore::device::ascend { static thread_local rtContext_t thread_local_rt_context{nullptr}; namespace { std::string GetRankId() { - std::string rank_id_str; -#ifdef ENABLE_MPI - auto mpi_config_ptr = MpiConfig::GetInstance(); - MS_EXCEPTION_IF_NULL(mpi_config_ptr); - if (mpi_config_ptr->enable_mpi()) { - int rank_id = GetMPIRankId(); - const std::string offset = common::GetEnv("RANK_OFFSET"); - if (offset.empty()) { - try { - int rank_offset = std::stoi(offset); - rank_id += rank_offset; - } catch (std::invalid_argument) { - MS_LOG(EXCEPTION) << "Call stoi invalid argument:" << offset; - } catch (std::out_of_range) { - MS_LOG(EXCEPTION) << "Call stoi out_of_range:" << offset; - } - } - rank_id_str = std::to_string(rank_id); - } else { - rank_id_str = common::GetEnv("RANK_ID"); + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + if (!context_ptr->get_param(MS_CTX_ENABLE_TASK_SINK)) { + MS_LOG(INFO) << "Get hccl rankid from mpi"; + auto rank = HcclCollectiveGroup::instance().GetRankId(); + return std::to_string(rank); } -#else - rank_id_str = common::GetEnv("RANK_ID"); -#endif + std::string rank_id_str; + rank_id_str = std::getenv("RANK_ID"); if (rank_id_str.empty()) { - MS_LOG(ERROR) << "Get hccl rankid failed, please set env RANK_ID"; + MS_LOG(EXCEPTION) << "Get hccl rankid failed, please set env RANK_ID"; } return rank_id_str; } @@ -246,7 +232,10 @@ void AscendKernelRuntime::ReleaseDeviceRes() { #ifdef ENABLE_DEBUGGER if (debugger_ && debugger_->debugger_enabled()) { debugger_->SetTrainingDone(true); - debugger_->SendMetadata(false); + bool ret = debugger_->SendMetadata(false); + if (!ret) { + MS_LOG(ERROR) << "Failed to SendMetadata when finalize"; + } } #endif if (!initialized_) { @@ -304,9 +293,7 @@ bool AscendKernelRuntime::Init() { MS_LOG(WARNING) << "Init ErrorManager failed."; } try { - OpTilingCalculater::GetInstance().Init(); // Start up profiling before rtSetDevice - bool ret = InitDevice(); if (!ret) { return ret; @@ -744,6 +731,7 @@ bool AscendKernelRuntime::SyncStream() { MS_LOG(ERROR) << "Call runtime rtStreamSynchronize error."; return false; } + if (RT_ERROR_NONE != rtStreamSynchronize(communication_stream_)) { // o for switch stream MS_LOG(ERROR) << "Call runtime rtStreamSynchronize error."; return false; @@ -832,7 +820,6 @@ bool AscendKernelRuntime::ResetDevice(uint32_t device_id) { } stream_ = nullptr; } - if (communication_stream_ != nullptr) { ret = rtStreamDestroy(communication_stream_); if (ret != RT_ERROR_NONE) { @@ -840,7 +827,6 @@ bool AscendKernelRuntime::ResetDevice(uint32_t device_id) { } communication_stream_ = nullptr; } - ret = rtDeviceReset(device_id); if (ret != RT_ERROR_NONE) { MS_EXCEPTION(DeviceProcessError) << "Call rtDeviceReset, ret[" << ret << "]"; @@ -857,6 +843,19 @@ bool AscendKernelRuntime::HcclInit() { MS_LOG(EXCEPTION) << "Hccl dependent tsd is not open"; } MS_LOG(INFO) << "Do hcom init"; + bool is_task_sink = context_ptr->get_param(MS_CTX_ENABLE_TASK_SINK); + auto mode = context_ptr->get_param(MS_CTX_EXECUTION_MODE); + if (!is_task_sink && mode == kGraphMode) { + hccl::HcclAdapter::GetInstance().InitHccl(); + std::vector ranks; + auto rank_size = HcclCollectiveGroup::instance().GetRankSize(); + for (size_t i = 0; i < IntToSize(rank_size); ++i) { + ranks.push_back(i); + } + HcclCollectiveGroup::instance().CreateCommGroup(kHcclWorldGroup, ranks); + return true; + } + auto config_path_str = std::getenv("MINDSPORE_HCCL_CONFIG_PATH"); if (config_path_str == nullptr) { config_path_str = std::getenv("RANK_TABLE_FILE"); diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc index e1a773864c8..a2850cdc33d 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.cc @@ -1992,6 +1992,28 @@ CNodePtr AscendStreamAssign::CreateRecvApplyKernel(const NotNull return recv_node_ptr; } +bool AscendStreamAssign::IsNopNodeTarget(const AnfNodePtr &nop_node, const CNodePtr &target_node, + const CNodePtr &cur_node, bool exclude_hcom) { + MS_EXCEPTION_IF_NULL(nop_node); + auto cnode = nop_node->cast(); + auto new_inputs = cnode->inputs(); + for (size_t i = 1; i < new_inputs.size(); i++) { + if (opt::IsNopNode(new_inputs[i])) { + if (IsNopNodeTarget(new_inputs[i], target_node, cur_node, exclude_hcom)) { + return true; + } + } else { + auto new_real_input = AnfAlgo::VisitKernel(new_inputs[i], 0); + if (target_node == new_real_input.first) { + if (!(exclude_hcom && IsHcom(cur_node))) { + return true; + } + } + } + } + return false; +} + vector::iterator AscendStreamAssign::FindTargetOp(vector::iterator begin, vector::iterator end, const CNodePtr &node, bool exclude_hcom) { @@ -2000,18 +2022,8 @@ vector::iterator AscendStreamAssign::FindTargetOp(vector::it for (size_t i = 1; i < inputs.size(); i++) { auto input = inputs[i]; if (opt::IsNopNode(input)) { - CNodePtr cnode = input->cast(); - auto new_inputs = cnode->inputs(); - for (size_t j = 1; j < new_inputs.size(); j++) { - auto new_real_input = AnfAlgo::VisitKernel(new_inputs[j], 0); - // find target node except hcom op. insert event for hcom in:InsertEventHcomDependCommonBak function - // only insert one time - if (node == new_real_input.first) { - if (!(exclude_hcom && IsHcom(*begin))) { - MS_LOG(DEBUG) << "Nop node find target op[" << (*begin)->DebugString() << "]"; - return begin; - } - } + if (IsNopNodeTarget(input, node, *begin, exclude_hcom)) { + return begin; } } else { auto real_input = AnfAlgo::VisitKernel(input, 0); diff --git a/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.h b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.h index 8f7773e77bd..bfe55a440dc 100644 --- a/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.h +++ b/mindspore/ccsrc/runtime/device/ascend/ascend_stream_assign.h @@ -175,7 +175,8 @@ class AscendStreamAssign { uint32_t GetIndexByKey(const NotNull &graph_ptr, const CNodeKey &key); uint32_t GetIndependentStreamSwitchStreamId(const NotNull &graph_ptr); void GetIndependentMaxTarget(const NotNull &graph_ptr); - + bool IsNopNodeTarget(const AnfNodePtr &nop_node, const CNodePtr &target_node, const CNodePtr &cur_node, + bool exclude_hcom); bool IsTaskSink(); bool IsHcom(const CNodePtr &cur_cnode_ptr); bool IsIndependentNode(const CNodePtr &node_ptr); diff --git a/mindspore/ccsrc/runtime/device/ascend/executor/ai_core_dynamic_kernel.cc b/mindspore/ccsrc/runtime/device/ascend/executor/ai_core_dynamic_kernel.cc index 21fa2d4263c..c37be1a0f43 100644 --- a/mindspore/ccsrc/runtime/device/ascend/executor/ai_core_dynamic_kernel.cc +++ b/mindspore/ccsrc/runtime/device/ascend/executor/ai_core_dynamic_kernel.cc @@ -19,12 +19,12 @@ #include #include "framework/common/debug/log.h" #include "utils/log_adapter.h" -#include "runtime/device/ascend/executor/tiling/op_tiling_calculater.h" #include "register/op_tiling.h" #include "utils/convert_utils_base.h" #include "utils/ms_context.h" #include "runtime/device/kernel_runtime_manager.h" #include "pipeline/jit/static_analysis/static_analysis.h" +#include "runtime/device/ascend/executor/tiling/op_tiling_adapter.h" #include "common/trans.h" namespace mindspore { @@ -131,14 +131,17 @@ void AiCoreDynamicKernel::ComputeTiling() { auto cnode = cnode_ptr_.lock(); MS_EXCEPTION_IF_NULL(cnode); MS_LOG(INFO) << "Start compute tiling of:" << cnode->fullname_with_scope(); - optiling::OpRunInfo op_run_info; + // start compute tiling + optiling::utils::OpRunInfo op_run_info_v2(-1, true, 0); + tiling::OpTilingCalculateAdapter converter; + ge::ComputeGraphPtr ge_graph = std::make_shared("default"); + auto ge_node = converter.AnfNodeToGeNodeAdapter(cnode, &ge_graph, depend_tensor_map_); + (void)optiling::OpParaCalculateV2(*ge_node, op_run_info_v2); - OpTilingCalculater::GetInstance().CalculateTiling(NOT_NULL(cnode), op_compile_info_, depend_tensor_map_, - NOT_NULL(&op_run_info)); - block_dim_ = op_run_info.block_dim; - workspaces_size_ = op_run_info.workspaces; - tiling_data_ = op_run_info.tiling_data.str(); - tiling_key_ = op_run_info.tiling_key; + block_dim_ = op_run_info_v2.GetBlockDim(); + op_run_info_v2.GetAllWorkspaces(workspaces_size_); + tiling_data_ = op_run_info_v2.GetAllTilingData().str(); + tiling_key_ = op_run_info_v2.GetTilingKey(); } void AiCoreDynamicKernel::AllocateWorkspace() { diff --git a/mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc b/mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc index 706c1dd46c3..c3b897ca391 100644 --- a/mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc +++ b/mindspore/ccsrc/runtime/device/ascend/executor/ai_cpu_dynamic_kernel.cc @@ -182,7 +182,7 @@ bool AiCpuDynamicKernel::UpdateOutputShapeFromExtInfo() { MS_LOG(INFO) << "Get output:" << output_num_ << " Shape"; std::vector shape; TypeId type_id; - ext_info_handler_->GetOutputShapeAndType(SizeToUint(i), NOT_NULL(&shape), NOT_NULL(&type_id)); + (void)ext_info_handler_->GetOutputShapeAndType(SizeToUint(i), NOT_NULL(&shape), NOT_NULL(&type_id)); for (auto x : shape) { MS_LOG(INFO) << "Update output:" << i << " shape:" << x; diff --git a/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_adapter.cc b/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_adapter.cc index f7ebd026df5..5d22d300520 100644 --- a/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_adapter.cc +++ b/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_adapter.cc @@ -138,10 +138,14 @@ void OpTilingCalculateAdapter::ConvertOutputShapeAndType(const CNodePtr &node, g void OpTilingCalculateAdapter::ConvertCompileInfo(const CNodePtr &node, ge::OpDescPtr *op_desc) { MS_EXCEPTION_IF_NULL(node); MS_EXCEPTION_IF_NULL(*op_desc); - MS_LOG(INFO) << "For op " << op_name_ << ", get compile_info: " << op_compile_info_; - std::string compile_info_key = std::to_string(std::hash()(op_compile_info_)); + if (!AnfAlgo::HasNodeAttr(kAttrCompileInfo, node)) { + MS_LOG(EXCEPTION) << "Get compile_info failed"; + } + auto compile_info_attr = AnfAlgo::GetNodeAttr(node, kAttrCompileInfo); + MS_LOG(INFO) << "For op " << op_name_ << ", get compile_info: " << compile_info_attr; + std::string compile_info_key = std::to_string(std::hash()(compile_info_attr)); (void)ge::AttrUtils::SetStr(*(*op_desc), COMPILE_INFO_KEY, compile_info_key); - (void)ge::AttrUtils::SetStr(*(*op_desc), COMPILE_INFO_JSON, op_compile_info_); + (void)ge::AttrUtils::SetStr(*(*op_desc), COMPILE_INFO_JSON, compile_info_attr); } ge::NodePtr OpTilingCalculateAdapter::NewConstantOp(const CNodePtr &node, const std::string &name, @@ -265,11 +269,9 @@ void OpTilingCalculateAdapter::InitOpIoName(const CNodePtr &node) { } ge::NodePtr OpTilingCalculateAdapter::AnfNodeToGeNodeAdapter( - const CNodePtr &node, ge::ComputeGraphPtr *ge_graph, const std::map &depend_tensor_map, - const std::string &op_compile_info) { + const CNodePtr &node, ge::ComputeGraphPtr *ge_graph, const std::map &depend_tensor_map) { MS_EXCEPTION_IF_NULL(node); op_name_ = AnfAlgo::GetCNodeName(node); - op_compile_info_ = op_compile_info; auto op_type = GetRealOpType(op_name_); (void)InitOpIoName(node); ge::OpDescPtr op_desc = std::make_shared(op_name_, op_type); diff --git a/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_adapter.h b/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_adapter.h index 5c92c2bfc0d..9dbfd7ab8ca 100644 --- a/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_adapter.h +++ b/mindspore/ccsrc/runtime/device/ascend/executor/tiling/op_tiling_adapter.h @@ -37,8 +37,7 @@ class OpTilingCalculateAdapter { ~OpTilingCalculateAdapter() = default; ge::NodePtr AnfNodeToGeNodeAdapter(const CNodePtr &node, ge::ComputeGraphPtr *ge_graph, - const std::map &depend_tensor_map, - const std::string &op_compile_info); + const std::map &depend_tensor_map); private: void ConvertInputShapeAndType(const CNodePtr &node, ge::OpDescPtr *op_desc); @@ -56,7 +55,6 @@ class OpTilingCalculateAdapter { std::string GetOutputName(const CNodePtr &node, size_t index); void InitOpIoName(const CNodePtr &node); std::string op_name_; - std::string op_compile_info_; std::vector input_names_; std::vector output_names_; }; diff --git a/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc b/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc index 48725ce7f6d..60ff6bc8b2f 100644 --- a/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc +++ b/mindspore/ccsrc/runtime/device/ascend/kernel_select_ascend.cc @@ -525,7 +525,7 @@ KernelSelectStatus SelectKernelInfo(const CNodePtr &kernel_node, KernelType kern } void SetKernelInfo(const CNodePtr &kernel_node, KernelType kernel_type) { - auto kernel_info = static_cast(kernel_node->kernel_info()); + auto kernel_info = dynamic_cast(kernel_node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto kernel_build_info = kernel_info->select_kernel_build_info(); MS_EXCEPTION_IF_NULL(kernel_build_info); diff --git a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_callback_register.h b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_callback_register.h index ace8c4631d3..61f9b268c05 100644 --- a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_callback_register.h +++ b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_callback_register.h @@ -18,10 +18,10 @@ #define MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_PROFILING_PROFILING_CALLBACK_REGISTER_H_ #include "toolchain/prof_callback.h" +#include "toolchain/prof_acl_api.h" #define MAX_DEV_NUM (64) -using Status = uint32_t; enum ProfCommandHandleType { kProfCommandhandleInit = 0, kProfCommandhandleStart, diff --git a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc index 0d33fa4219e..666d266bc74 100644 --- a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc +++ b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.cc @@ -24,7 +24,6 @@ #include "utils/ms_utils.h" #include "utils/convert_utils.h" #include "runtime/base.h" -#include "toolchain/prof_acl_api.h" #include "runtime/device/ascend/profiling/profiling_callback_register.h" #include diff --git a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h index 0ca8d7971a2..d6b57f373b9 100644 --- a/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h +++ b/mindspore/ccsrc/runtime/device/ascend/profiling/profiling_manager.h @@ -24,11 +24,11 @@ #include "utils/contract.h" #include "utils/ms_context.h" #include "toolchain/prof_callback.h" +#include "toolchain/prof_acl_api.h" #include "runtime/device/ascend/profiling/profiling_callback_register.h" using std::map; using std::string; -using Status = uint32_t; namespace mindspore { namespace device { namespace ascend { diff --git a/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc b/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc index 96c51cd1c34..d096401288f 100644 --- a/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc +++ b/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.cc @@ -31,6 +31,8 @@ namespace cpu { using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm; using mindspore::kernel::KernelBuildInfo; namespace { +constexpr auto kParamDynamic = "dynamic"; + bool IsInputNotCNode(const CNodePtr &kernel_node, size_t input_index) { auto input_node = AnfAlgo::VisitKernel(kernel_node->input(input_index + 1), 0).first; MS_EXCEPTION_IF_NULL(input_node); @@ -66,6 +68,13 @@ void GetOutputDtypes(const CNodePtr &kernel_node, std::vector *output_ty } } +void GetOutputFormat(const CNodePtr &kernel_node, std::vector *output_formats) { + size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); + for (size_t output_index = 0; output_index < output_num; ++output_index) { + output_formats->emplace_back(kOpFormat_DEFAULT); + } +} + void GetInputDtypes(const CNodePtr &kernel_node, std::vector *input_types, std::vector *input_no_cnode_indexes) { size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); @@ -81,6 +90,13 @@ void GetInputDtypes(const CNodePtr &kernel_node, std::vector *input_type } } +void GetInputFormat(const CNodePtr &kernel_node, std::vector *input_formats) { + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + for (size_t input_index = 0; input_index < input_num; ++input_index) { + input_formats->emplace_back(kOpFormat_DEFAULT); + } +} + void GetOutputFormatsAndDtypes(const CNodePtr &kernel_node, const KernelAttr &kernel_attr, std::vector *output_formats, std::vector *output_types) { size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); @@ -200,7 +216,57 @@ void KernelNotSupportException(const AnfNodePtr &kernel_node, const std::vector< operator_info << "is not support."; MS_EXCEPTION(TypeError) << operator_info.str() << " Trace: " << trace::DumpSourceLines(kernel_node); } + +void UpdateDynamicKernelBuildInfoAndAttrs(const CNodePtr &kernel_node) { + const std::string &op_name = AnfAlgo::GetCNodeName(kernel_node); + MS_LOG(INFO) << "Operator name: " << op_name; + // Set kernel build info + std::vector input_types; + std::vector input_not_cnode_indexes; + GetInputDtypes(kernel_node, &input_types, &input_not_cnode_indexes); + std::vector output_types; + GetOutputDtypes(kernel_node, &output_types); + std::vector input_formats; + GetInputFormat(kernel_node, &input_formats); + std::vector output_formats; + GetOutputFormat(kernel_node, &output_formats); + SetKernelBuildInfo(input_formats, input_types, output_formats, output_types, kernel_node.get()); + + // Set kernel attrs + KernelAttr attr; + for (size_t i = 0; i < input_types.size(); i++) { + attr.AddInputAttr(input_types[i]); + } + for (size_t j = 0; j < output_types.size(); j++) { + attr.AddInputAttr(output_types[j]); + } + std::vector kernel_attrs = + kernel::CPUKernelFactory::GetInstance().GetSupportedKernelAttrList(AnfAlgo::GetCNodeName(kernel_node)); + kernel_attrs.emplace_back(attr); + kernel::CPUKernelFactory::GetInstance().UpdateKernelAttrs(op_name, kernel_attrs); + return; +} } // namespace + +bool IsDynamicParamKernel(const std::string &op_name) { + const auto &op_info = kernel::OpLib::FindOp(op_name, kernel::OpImplyType::kCPU); + if (op_info == nullptr) { + return false; + } + + const auto &input_io_info = op_info->inputs_ptr(); + if (input_io_info.size() != 1 || input_io_info[0]->param_type() != kParamDynamic) { + return false; + } + + const auto &output_io_info = op_info->outputs_ptr(); + if (output_io_info.size() != 1 || output_io_info[0]->param_type() != kParamDynamic) { + return false; + } + + return true; +} + bool SelectKernel(const CNodePtr &kernel_node, KernelAttr *selected_kernel_attr, const std::vector &kernel_attrs, const std::vector &input_types, const std::vector &input_not_cnode_indexes, const std::vector &output_types, @@ -229,7 +295,14 @@ bool SelectKernel(const CNodePtr &kernel_node, KernelAttr *selected_kernel_attr, } return false; } + void SetKernelInfo(const CNodePtr &kernel_node) { + // Select for dynamic kernel(both the number and data type are undetermined). + const std::string &op_name = AnfAlgo::GetCNodeName(kernel_node); + if (IsDynamicParamKernel(op_name)) { + return UpdateDynamicKernelBuildInfoAndAttrs(kernel_node); + } + std::vector input_formats; std::vector input_types; std::vector input_not_cnode_indexes; @@ -241,7 +314,6 @@ void SetKernelInfo(const CNodePtr &kernel_node) { kernel::CPUKernelFactory::GetInstance().GetSupportedKernelAttrList(AnfAlgo::GetCNodeName(kernel_node)); if (kernel_attrs.empty() || (kernel_attrs[0].GetInputSize() == 0 && kernel_attrs[0].GetOutputSize() == 0)) { MS_LOG(DEBUG) << "Operator[" << AnfAlgo::GetCNodeName(kernel_node) << "] will get ops attr info."; - std::string op_name = AnfAlgo::GetCNodeName(kernel_node); auto op_info_ptr = mindspore::kernel::OpLib::FindOp(op_name, kernel::OpImplyType::kCPU); if (op_info_ptr == nullptr) { MS_LOG(EXCEPTION) << "Not find op[" << op_name << "] in cpu"; diff --git a/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.h b/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.h index 9fd5c55b7d5..867676cd85e 100644 --- a/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.h +++ b/mindspore/ccsrc/runtime/device/cpu/kernel_select_cpu.h @@ -29,6 +29,8 @@ namespace mindspore { namespace device { namespace cpu { void SetKernelInfo(const CNodePtr &apply_kernel_ptr); +// Indicate whether the kernel input/output number are variable. +bool IsDynamicParamKernel(const std::string &op_name); class KernelAttr { public: diff --git a/mindspore/ccsrc/runtime/device/gpu/blocking_queue.h b/mindspore/ccsrc/runtime/device/gpu/blocking_queue.h index ebb97f0866b..5c9275e36c1 100644 --- a/mindspore/ccsrc/runtime/device/gpu/blocking_queue.h +++ b/mindspore/ccsrc/runtime/device/gpu/blocking_queue.h @@ -30,7 +30,7 @@ namespace mindspore { namespace device { -enum BlockQueueStatus_T : int { SUCCESS = 0, QUEUE_NOT_EXIST, HANDLE_NOT_EXIST, ERROR_INPUT, INTERNAL_ERROR, TIMEOUT }; +enum BlockQueueStatus_T : int { SUCCESS = 0, QUEUE_EXIST, HANDLE_NOT_EXIST, ERROR_INPUT, INTERNAL_ERROR, TIMEOUT }; struct DataItemGpu { int32_t worker_id_; diff --git a/mindspore/ccsrc/runtime/device/gpu/cuda_env_checker.cc b/mindspore/ccsrc/runtime/device/gpu/cuda_env_checker.cc index 9186488945c..e162ffd157f 100644 --- a/mindspore/ccsrc/runtime/device/gpu/cuda_env_checker.cc +++ b/mindspore/ccsrc/runtime/device/gpu/cuda_env_checker.cc @@ -54,6 +54,10 @@ bool CudaEnvChecker::CheckNvccInPath() { } void CudaEnvChecker::GetRealPaths(std::set *paths) const { + if (paths == nullptr) { + MS_LOG(ERROR) << "The pointer paths is nullptr"; + return; + } auto env_paths_ptr = std::getenv(kPathEnv); if (env_paths_ptr == nullptr) { MS_LOG(ERROR) << "Please export environment variable PATH"; diff --git a/mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.cc b/mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.cc index eac50cb9369..3c4745cb0b5 100644 --- a/mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.cc +++ b/mindspore/ccsrc/runtime/device/gpu/distribution/mpi_wrapper.cc @@ -55,6 +55,9 @@ bool MPIWrapper::CreateCommGroup(const std::string &group_name, const std::vecto } ncclUniqueId group_unique_id; + if (ranks.size() == 0) { + return false; + } if (rank_id_ == ranks[0]) { group_unique_id = NCCLWrapper::instance().nccl_unique_id(); } @@ -138,9 +141,10 @@ void MPIWrapper::AssignLocalRankID() { const int kRankSize = rank_size_; size_t all_host_hashs[kRankSize]; + CHECK_RET((rank_id_ < kRankSize), true, "The rank id is not less than rank size."); all_host_hashs[rank_id_] = host_hash; CHECK_RET(MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, all_host_hashs, sizeof(size_t), MPI_BYTE, MPI_COMM_WORLD), - MPI_SUCCESS, "MPI_Allgather host hashs failed."); + MPI_SUCCESS, "MPI_Allgather host hashes failed."); for (int global_rank = 0; global_rank < kRankSize; global_rank++) { if (global_rank == rank_id_) { break; diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.cc index 947490dfde9..02f0bf2fcb6 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_buffer_mgr.cc @@ -52,8 +52,8 @@ BlockQueueStatus_T GpuBufferMgr::Create(unsigned int device_id, const std::strin const std::vector &shape, const size_t &capacity) { std::string name = std::to_string(device_id) + std::string("_") + channel_name; if (name_queue_map_.count(name)) { - MS_LOG(ERROR) << "Queue not exist " << name; - return QUEUE_NOT_EXIST; + MS_LOG(ERROR) << "Queue already exist: " << name; + return QUEUE_EXIST; } std::shared_ptr queue = std::make_shared(); BlockQueueStatus_T rt = queue->Create(addr, shape, capacity); @@ -205,6 +205,10 @@ size_t GpuBufferMgr::Size(unsigned int handle) { MS_LOG(ERROR) << "handle is invalid"; return 0; } + if (handle_queue_map_.count(handle) == 0) { + MS_LOG(ERROR) << "Handle not exist " << handle; + return 0; + } return handle_queue_map_.at(handle)->Size(); } @@ -222,6 +226,10 @@ size_t GpuBufferMgr::Capacity(unsigned int handle) { MS_LOG(ERROR) << "handle is invalid"; return 0; } + if (handle_queue_map_.count(handle) == 0) { + MS_LOG(ERROR) << "Handle not exist " << handle; + return 0; + } return handle_queue_map_.at(handle)->Capacity(); } diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc index eed333d7a17..604ca05328c 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_device_address.cc @@ -135,6 +135,7 @@ bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int executi return true; } + MS_EXCEPTION_IF_NULL(Debugger::GetInstance()); if (Debugger::GetInstance()->TensorExistsInCurrent(tensor_name)) { MS_LOG(INFO) << tensor_name << " already loaded for this step so not loading it again."; return true; diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc index 5be77aef128..b176799dfae 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_build.cc @@ -33,6 +33,7 @@ void CreateGPUKernel(const std::vector &kernels) { bool already_check_nvcc = false; std::vector akg_nodes; for (const auto &kernel : kernels) { + MS_EXCEPTION_IF_NULL(kernel); std::string kernel_name = session::AnfRuntimeAlgorithm::GetCNodeName(kernel); if (kernel_name == prim::kPrimTupleGetItem->name() || kernel_name == prim::kPrimMakeTuple->name() || kernel_name == prim::kPrimDepend->name() || kernel_name == prim::kPrimStateSetItem->name()) { @@ -41,8 +42,7 @@ void CreateGPUKernel(const std::vector &kernels) { if (session::AnfRuntimeAlgorithm::GetKernelType(kernel) == KernelType::AKG_KERNEL) { if (!bin_map->initialized()) { - auto pid = mindspore::kernel::GpuKernelBuildClient::Instance().AkgGetPid(); - bin_map->Initialize(pid); + bin_map->Initialize(); } if (!already_check_nvcc) { already_check_nvcc = true; diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc index e56bdcfa5ad..a6ba90f0ee4 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_kernel_runtime.cc @@ -124,6 +124,8 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, const std::vector &kernel_workspaces, const std::vector &kernel_outputs, int exec_order, void *stream_ptr, bool dump_enabled, bool last_kernel) { + MS_EXCEPTION_IF_NULL(debugger); + MS_EXCEPTION_IF_NULL(kernel); // check if we should read the kernel data bool read_data = false; auto &dump_json_parser = DumpJsonParser::GetInstance(); @@ -147,6 +149,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, auto input_size = AnfAlgo::GetInputTensorNum(kernel); for (size_t j = 0; j < input_size; ++j) { auto input_kernel = kernel->input(j + 1); + MS_EXCEPTION_IF_NULL(input_kernel); std::string input_kernel_name = input_kernel->fullname_with_scope(); auto addr = kernel_inputs[j]; auto type = AnfAlgo::GetOutputInferDataType(input_kernel, PARAMETER_OUTPUT_INDEX); @@ -155,6 +158,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, continue; } auto format = kOpFormat_DEFAULT; + MS_EXCEPTION_IF_NULL(addr); auto gpu_addr = std::make_unique(addr->addr, addr->size, format, type); string input_tensor_name = input_kernel_name + ':' + "0"; ShapeVector int_shapes = trans::GetRuntimePaddingShape(input_kernel, PARAMETER_OUTPUT_INDEX); @@ -181,6 +185,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel, continue; } auto format = kOpFormat_DEFAULT; + MS_EXCEPTION_IF_NULL(addr); auto gpu_addr = std::make_unique(addr->addr, addr->size, format, type); string tensor_name = kernel_name + ':' + std::to_string(j); ShapeVector int_shapes = trans::GetRuntimePaddingShape(kernel, j); @@ -246,7 +251,10 @@ void GPUKernelRuntime::ReleaseDeviceRes() { #ifdef ENABLE_DEBUGGER if (debugger_ && debugger_->debugger_enabled()) { debugger_->SetTrainingDone(true); - debugger_->SendMetadata(false); + bool ret = debugger_->SendMetadata(false); + if (!ret) { + MS_LOG(ERROR) << "Failed to SendMetadata when finalize"; + } } #endif if (GpuBufferMgr::GetInstance().IsInit()) { @@ -272,14 +280,6 @@ void GPUKernelRuntime::ReleaseDeviceRes() { if (mem_manager_ != nullptr) { mem_manager_->FreeDeviceMemory(); } - - auto context_ptr = MsContext::GetInstance(); - MS_EXCEPTION_IF_NULL(context_ptr); - if (!(context_ptr->get_param(MS_CTX_SAVE_GRAPHS_FLAG))) { - kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance(); - MS_EXCEPTION_IF_NULL(bin_map); - bin_map->RemoveKernelCache(); - } } void GPUKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id, const std::vector &inputs, @@ -292,6 +292,7 @@ void GPUKernelRuntime::ClearGraphRuntimeResource(uint32_t graph_id, const std::v } void GPUKernelRuntime::AllocInplaceNodeMemory(const session::KernelGraph *graph) { + MS_EXCEPTION_IF_NULL(graph); if (is_alloc_inplace_res_[graph->graph_id()]) { return; } @@ -304,6 +305,7 @@ void GPUKernelRuntime::AllocInplaceNodeMemory(const session::KernelGraph *graph) continue; } auto primitive = AnfAlgo::GetCNodePrimitive(kernel); + MS_EXCEPTION_IF_NULL(primitive); auto group_attr = primitive->GetAttr("inplace_group"); MS_EXCEPTION_IF_NULL(group_attr); auto group_id = GetValue(group_attr); @@ -318,14 +320,18 @@ void GPUKernelRuntime::AllocInplaceNodeMemory(const session::KernelGraph *graph) } auto primitive = AnfAlgo::GetCNodePrimitive(item[0]); + MS_EXCEPTION_IF_NULL(primitive); auto output_index = GetValue(primitive->GetAttr("inplace_output_index")); auto device_address = GetMutableOutputAddr(item[0], output_index, false); + MS_EXCEPTION_IF_NULL(device_address); if (device_address->GetPtr() != nullptr) { continue; } auto kernel_mod = AnfAlgo::GetKernelMod(item[0]); + MS_EXCEPTION_IF_NULL(kernel_mod); auto output_size = kernel_mod->GetOutputSizeList(); + MS_EXCEPTION_IF_NULL(mem_manager_); auto ret = mem_manager_->MallocMemFromMemPool(device_address, output_size[output_index]); if (!ret) { MS_LOG(EXCEPTION) << "Device memory isn't enough and alloc failed, alloc size:" << output_size[output_index]; @@ -333,6 +339,7 @@ void GPUKernelRuntime::AllocInplaceNodeMemory(const session::KernelGraph *graph) for (auto &node : item) { auto prim = AnfAlgo::GetCNodePrimitive(node); + MS_EXCEPTION_IF_NULL(prim); auto index = GetValue(prim->GetAttr("inplace_output_index")); AnfAlgo::SetOutputAddr(device_address, index, node.get()); } @@ -486,6 +493,7 @@ std::shared_ptr GPUKernelRuntime::CreateDeviceEvent() { } bool GPUKernelRuntime::RunOneStep(const session::KernelGraph *graph) { + MS_EXCEPTION_IF_NULL(graph); auto graph_id = graph->graph_id(); if (!is_first_step_map_[graph_id] || graph->is_dynamic_shape()) { // Normally run graph @@ -508,6 +516,8 @@ bool GPUKernelRuntime::RunOneStep(const session::KernelGraph *graph) { } bool GPUKernelRuntime::SearchMemSwapScheme(const session::KernelGraph *graph) { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(mem_swap_manager_); MS_LOG(INFO) << "Run out of memory and try memory swapping, it may take some time, please wait a moment."; bool ret = false; ClearKernelOldOutputAndWorkspace(graph); @@ -538,6 +548,8 @@ bool GPUKernelRuntime::SearchMemSwapScheme(const session::KernelGraph *graph) { } bool GPUKernelRuntime::RefineMemSwapScheme(const session::KernelGraph *graph) { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(mem_swap_manager_); MS_LOG(INFO) << "Refine memory swap scheme, it may take some time, please wait a moment."; auto &kernels = graph->execution_order(); for (const auto &kernel : kernels) { @@ -650,6 +662,7 @@ void GPUKernelRuntime::ClearKernelOldOutputAndWorkspace(const session::KernelGra void GPUKernelRuntime::ClearKernelOutputAddress(const session::KernelGraph *graph) { MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(mem_manager_); auto &kernels = graph->execution_order(); for (const auto &kernel : kernels) { if (IsGraphOutput(graph, kernel)) { @@ -674,6 +687,7 @@ void GPUKernelRuntime::ClearKernelOutputAddress(const session::KernelGraph *grap void GPUKernelRuntime::ClearKernelWorkspaceAddress(const session::KernelGraph *graph) { MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(mem_manager_); auto &kernels = graph->execution_order(); for (const auto &kernel : kernels) { auto kernel_mod = AnfAlgo::GetKernelMod(kernel); @@ -690,6 +704,7 @@ void GPUKernelRuntime::ClearKernelWorkspaceAddress(const session::KernelGraph *g } CNodePtr GetLastKernel(const session::KernelGraph *graph) { + MS_EXCEPTION_IF_NULL(graph); const auto &kernels = graph->execution_order(); CNodePtr last_kernel; for (const auto &kernel : kernels) { @@ -735,6 +750,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo kernel::GpuKernel *gpu_kernel = nullptr; if (session::AnfRuntimeAlgorithm::GetKernelType(kernel) != KernelType::AKG_KERNEL) { gpu_kernel = dynamic_cast(kernel_mod); + MS_EXCEPTION_IF_NULL(gpu_kernel); dynamic_kernel = gpu_kernel->DynamicKernel(); } @@ -749,6 +765,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo auto ret = AllocKernelDynamicRes(*kernel_mod, kernel, &kernel_inputs, &kernel_workspaces, &kernel_outputs, mock); if (!ret) { if (!mock) { + MS_EXCEPTION_IF_NULL(debugger_); // invalidate current data collected by the debugger debugger_->ClearCurrentData(); } @@ -796,6 +813,9 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, bo void GPUKernelRuntime::LaunchKernelWithoutMock(const session::KernelGraph *graph, const AnfNodePtr &kernel, const AddressPtrList &inputs, const AddressPtrList &workspaces, const AddressPtrList &outputs, bool profiling) { + MS_EXCEPTION_IF_NULL(graph); + MS_EXCEPTION_IF_NULL(kernel); + auto profiler_inst = profiler::gpu::GPUProfiler::GetInstance(); MS_EXCEPTION_IF_NULL(profiler_inst); @@ -810,6 +830,7 @@ void GPUKernelRuntime::LaunchKernelWithoutMock(const session::KernelGraph *graph profiler_inst->OpDataProducerBegin(kernel->fullname_with_scope(), stream_); } auto kernel_mod = AnfAlgo::GetKernelMod(kernel); + MS_EXCEPTION_IF_NULL(kernel_mod); if (!kernel_mod->Launch(inputs, workspaces, outputs, stream_)) { MS_LOG(EXCEPTION) << "Launch kernel failed: " << kernel->fullname_with_scope(); } @@ -836,6 +857,7 @@ bool GPUKernelRuntime::RunOpLaunchKernelDynamic(const session::KernelGraph *grap kernel::GpuKernel *gpu_kernel = nullptr; if (session::AnfRuntimeAlgorithm::GetKernelType(kernel) != KernelType::AKG_KERNEL) { gpu_kernel = dynamic_cast(kernel_mod); + MS_EXCEPTION_IF_NULL(gpu_kernel); dynamic_kernel = gpu_kernel->DynamicKernel(); } // pre-processing for dynamic shape kernel @@ -862,6 +884,7 @@ bool GPUKernelRuntime::RunOpLaunchKernelDynamic(const session::KernelGraph *grap void GPUKernelRuntime::LaunchKernelWithTimeProfiling(const AnfNodePtr &kernel, const AddressPtrList &inputs, const AddressPtrList &workspace, const AddressPtrList &outputs) { + MS_EXCEPTION_IF_NULL(mem_swap_manager_); auto kernel_mod = AnfAlgo::GetKernelMod(kernel); MS_EXCEPTION_IF_NULL(kernel_mod); float cost_time = 0; @@ -886,6 +909,7 @@ void GPUKernelRuntime::LaunchKernelWithTimeProfiling(const AnfNodePtr &kernel, c bool GPUKernelRuntime::AddMemorySwapTask(const AnfNodePtr &kernel, bool mock, bool profiling) { MS_EXCEPTION_IF_NULL(mem_swap_manager_); + MS_EXCEPTION_IF_NULL(mem_manager_); const MemSwapInfoSet &mem_swap_info_set = mem_swap_manager_->QueryKernelMemSwapInfo(kernel); for (auto &mem_swap_info : mem_swap_info_set) { auto need_swap_kernel = mem_swap_manager_->QueryKernelByTopoOrder(mem_swap_info.topo_order_); @@ -893,6 +917,7 @@ bool GPUKernelRuntime::AddMemorySwapTask(const AnfNodePtr &kernel, bool mock, bo const HostAddress &host_address = mem_swap_manager_->QueryKernelHostAddr(need_swap_kernel, mem_swap_info.output_idx_); auto device_address = GetMutableOutputAddr(need_swap_kernel, mem_swap_info.output_idx_, false); + MS_EXCEPTION_IF_NULL(device_address); if (mem_swap_info.swap_kind_ == SwapKind::kDeviceToHost) { if (mem_swap_manager_->QueryKernelHostAddrIsDirty(need_swap_kernel, mem_swap_info.output_idx_)) { @@ -943,6 +968,7 @@ bool GPUKernelRuntime::UpdateMemorySwapTask(const AnfNodePtr &kernel, bool mock, } void GPUKernelRuntime::UpdateHostSwapInQueue(const DeviceAddressPtr device_address, bool mock) { + MS_EXCEPTION_IF_NULL(device_address); MS_EXCEPTION_IF_NULL(mem_swap_manager_); if (!mem_swap_manager_->trigger_swap()) { return; @@ -977,6 +1003,7 @@ void GPUKernelRuntime::UpdateHostSwapInQueue(const DeviceAddressPtr device_addre void GPUKernelRuntime::UpdateHostSwapOutQueue(bool mock) { MS_EXCEPTION_IF_NULL(mem_swap_manager_); + MS_EXCEPTION_IF_NULL(mem_manager_); if (!mem_swap_manager_->trigger_swap()) { return; } @@ -1059,6 +1086,7 @@ bool GPUKernelRuntime::AllocKernelInputDynamicRes(const mindspore::AnfNodePtr &k // Get in-place output_address if (AnfAlgo::IsInplaceNode(kernel, "aggregate")) { auto primitive = AnfAlgo::GetCNodePrimitive(kernel); + MS_EXCEPTION_IF_NULL(primitive); auto input_index = GetValue(primitive->GetAttr("aggregate_input_index")); if (i == input_index) { auto skip_node = AnfAlgo::GetInputNode(utils::cast(kernel), input_index); @@ -1115,6 +1143,7 @@ bool GPUKernelRuntime::AllocKernelWorkspaceDynamicRes(const mindspore::kernel::K continue; } auto device_address = AnfAlgo::GetMutableWorkspaceAddr(kernel, i); + MS_EXCEPTION_IF_NULL(device_address); if (device_address->ptr_ == nullptr && !AttemptMallocMem(device_address, workspace_sizes[i], mock)) { return false; } @@ -1128,12 +1157,12 @@ bool GPUKernelRuntime::AllocKernelWorkspaceDynamicRes(const mindspore::kernel::K } void GPUKernelRuntime::AllocCommunicationOpDynamicRes(const session::KernelGraph *graph) { + MS_EXCEPTION_IF_NULL(graph); if (is_alloc_communication_res_[graph->graph_id()]) { return; } is_alloc_communication_res_[graph->graph_id()] = true; - MS_EXCEPTION_IF_NULL(graph); auto &kernels = graph->execution_order(); for (auto &kernel : kernels) { MS_EXCEPTION_IF_NULL(kernel); @@ -1226,6 +1255,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel) for (size_t i = 0; i < input_num; ++i) { if (AnfAlgo::IsInplaceNode(kernel, "aggregate")) { auto primitive = AnfAlgo::GetCNodePrimitive(kernel); + MS_EXCEPTION_IF_NULL(primitive); auto index = GetValue(primitive->GetAttr("aggregate_input_index")); if (i == index) { continue; @@ -1250,6 +1280,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel) device_address = GetPrevNodeMutableOutputAddr(kernel, i, true); } mem_manager_->FreeMemFromMemPool(device_address); + MS_EXCEPTION_IF_NULL(device_address); device_address->set_status(DeviceAddressStatus::kInDevice); } } @@ -1262,6 +1293,7 @@ void GPUKernelRuntime::FreeKernelDynamicRes(const mindspore::AnfNodePtr &kernel) } if (kernel_ref_count_ptr->ref_count_dynamic_use_ == 0) { auto device_address = GetMutableOutputAddr(kernel, i, false); + MS_EXCEPTION_IF_NULL(device_address); mem_manager_->FreeMemFromMemPool(device_address); device_address->set_status(DeviceAddressStatus::kInDevice); } @@ -1296,7 +1328,7 @@ DeviceAddressPtr GPUKernelRuntime::GetPrevNodeMutableOutputAddr(const AnfNodePtr } session::KernelWithIndex prev_node_with_index = addr_iter->second[i]; - auto kernel_info = static_cast(prev_node_with_index.first->kernel_info()); + auto kernel_info = dynamic_cast(prev_node_with_index.first->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto addr = kernel_info->GetMutableOutputAddr(prev_node_with_index.second); diff --git a/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.cc b/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.cc index 574ddca14d7..60f06c5733a 100644 --- a/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.cc +++ b/mindspore/ccsrc/runtime/device/gpu/gpu_stream_assign.cc @@ -61,6 +61,8 @@ void AssignGpuStream(const std::shared_ptr &kernel_graph) bool FindAllReduceStreamSwitchPos(const std::shared_ptr &kernel_graph, std::vector *send_recv_pairs) { + MS_EXCEPTION_IF_NULL(kernel_graph); + MS_EXCEPTION_IF_NULL(send_recv_pairs); auto execution_kernels = kernel_graph->execution_order(); std::vector::iterator iter, iter_begin; iter = iter_begin = execution_kernels.begin(); @@ -126,6 +128,7 @@ std::vector::iterator FindRecvNodePos(std::vector::iterator for (auto iter = begin; iter != end; iter++) { auto node = *iter; if (stream_switch_type == kAllReduceStreamSwitch) { + MS_EXCEPTION_IF_NULL(node); for (auto input : node->inputs()) { if (mock_send_node == AnfAlgo::VisitKernel(input, 0).first) { if (AnfAlgo::GetCNodeName(node) != kAllReduceOpName) { @@ -142,6 +145,7 @@ std::vector::iterator FindRecvNodePos(std::vector::iterator void InsertStreamSwitchNode(const std::shared_ptr &kernel_graph, const std::vector &send_recv_pairs) { + MS_EXCEPTION_IF_NULL(kernel_graph); std::set ordered_stream_switch_nodes; for (SendRecvPair pair : send_recv_pairs) { StreamSwitchType stream_switch_type = pair.stream_switch_type; @@ -194,6 +198,7 @@ bool GenSendRecvCNodesForAllReduce(const std::shared_ptr & } CNodePtr CreateStreamSwitchNode(const std::shared_ptr &kernel_graph, const std::string &name) { + MS_EXCEPTION_IF_NULL(kernel_graph); auto op = std::make_shared(name); MS_EXCEPTION_IF_NULL(op); auto apply = std::make_shared(op); diff --git a/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc b/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc index 46b99a7766c..36a5271cfcd 100644 --- a/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc +++ b/mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc @@ -242,6 +242,9 @@ bool IsNeedProcessFormatInfo(const CNodePtr &kernel_node, const std::vector &inputs_type, std::vector *inputs_format, std::vector *outputs_format, std::string *origin_data_format) { + MS_EXCEPTION_IF_NULL(kernel_node); + MS_EXCEPTION_IF_NULL(inputs_format); + MS_EXCEPTION_IF_NULL(outputs_format); auto kernel_name = AnfAlgo::GetCNodeName(kernel_node); auto iter = kKernelFormatPositionMap.find(kernel_name); if (iter == kKernelFormatPositionMap.end()) { @@ -351,6 +354,7 @@ void PrintUnsupportedTypeException(const CNodePtr &kernel_node, const std::vecto } // namespace void FormatTransformChecker::CheckSupportFormatTransform(const std::shared_ptr &kernel_graph) { + MS_EXCEPTION_IF_NULL(kernel_graph); // TensorCore can be used only in Volta or newer devices. const int marjor_sm = GET_MAJOR_SM; if (marjor_sm < RECOMMEND_SM) { @@ -387,6 +391,7 @@ void FormatTransformChecker::CheckSupportFormatTransform(const std::shared_ptrinputs().size() - 1; for (size_t k = 0; k < input_size; ++k) { - auto kernel_index = AnfAlgo::VisitKernelWithReturnType(AnfAlgo::GetInputNode(child, k), 0); + auto kernel_index = AnfAlgo::VisitKernelWithReturnType(AnfAlgo::GetInputNode(child, k), 0, true); if (kernel_index.first == kernel) { found_nearest_child = true; break; @@ -617,7 +620,6 @@ void KernelRuntime::AssignCommunicationNodeInputMem(MemType type, const AnfNodeP if (addr_size.empty()) { return; } - if (type == kSomasReuseDynamicMem) { bool not_reuse = KernelMemNotReuse(node); if (not_reuse) { @@ -695,7 +697,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const std::vector tensors; TensorValueToTensor(node_value, &tensors); // Graph id should be passed to record static memory if profiling is enabled. - auto kernel_info = static_cast(value_node->kernel_info()); + auto kernel_info = dynamic_cast(value_node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); uint32_t graph_id = kernel_info->graph_id(); for (const auto &tensor : tensors) { @@ -709,7 +711,7 @@ void KernelRuntime::AssignValueNodeTensor(const ValueNodePtr &value_node, const value_node.get()); continue; } - size_t tensor_size = tensor->data().nbytes(); + size_t tensor_size = LongToSize(tensor->data().nbytes()); auto node_size = AnfAlgo::GetOutputTensorMemSize(value_node, output_idx); TypeId output_type_id = AnfAlgo::GetOutputDeviceDataType(value_node, output_idx); if (output_type_id == kTypeUnknown) { diff --git a/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc b/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc index afd9f03e5c4..d525045a003 100644 --- a/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc +++ b/mindspore/ccsrc/runtime/framework/actor/debug_actor.cc @@ -29,7 +29,6 @@ namespace mindspore { namespace runtime { - void DebugActor::Debug(const AnfNodePtr &node, const KernelLaunchInfo *launch_info_, const DeviceContext *device_context, OpContext *const op_context, const AID *from_aid) { diff --git a/mindspore/ccsrc/runtime/framework/actor/gather_actor.cc b/mindspore/ccsrc/runtime/framework/actor/gather_actor.cc index fe867d82e30..84996aa42fc 100644 --- a/mindspore/ccsrc/runtime/framework/actor/gather_actor.cc +++ b/mindspore/ccsrc/runtime/framework/actor/gather_actor.cc @@ -75,7 +75,7 @@ void GatherActor::RunOpControl(AID *input_control, OpContext *cont } } -void GatherActor::CollectBranchId(const int branch_id, OpContext *context) { +void GatherActor::CollectBranchId(const int branch_id, OpContext *const context) { MS_EXCEPTION_IF_NULL(context); auto &sequential_num = context->sequential_num_; input_branch_ids_[sequential_num] = branch_id; @@ -97,7 +97,7 @@ void GatherActor::FetchBackendInputNode(const FuncGraphPtr &func_graph, const Co } } -void GatherActor::SendOutput(OpContext *context) const { +void GatherActor::SendOutput(OpContext *const context) const { MS_EXCEPTION_IF_NULL(context); // Must be the execution order: send branch id --> send result --> send data --> send control, avoid the illegal // timing problem. @@ -138,7 +138,7 @@ void GatherActor::SendOutput(OpContext *context) const { } } -void GatherActor::FetchInputDeviceTensor(OpContext *context) { +void GatherActor::FetchInputDeviceTensor(OpContext *const context) { MS_EXCEPTION_IF_NULL(context); auto data_iter = input_data_.find(context->sequential_num_); if (data_iter != input_data_.end()) { @@ -175,7 +175,7 @@ void GatherActor::FetchInputDeviceTensor(OpContext *context) { } } -bool GatherActor::CheckLaunchCondition(OpContext *context) const { +bool GatherActor::CheckLaunchCondition(OpContext *const context) const { MS_EXCEPTION_IF_NULL(context); // Fetch input data. @@ -214,7 +214,7 @@ bool GatherActor::CheckLaunchCondition(OpContext *context) const { return true; } -void GatherActor::EraseInput(OpContext *context) { +void GatherActor::EraseInput(OpContext *const context) { MS_EXCEPTION_IF_NULL(context); // Erase input data. diff --git a/mindspore/ccsrc/runtime/framework/actor/gather_actor.h b/mindspore/ccsrc/runtime/framework/actor/gather_actor.h index 3a0f45de737..e446ca59e8c 100644 --- a/mindspore/ccsrc/runtime/framework/actor/gather_actor.h +++ b/mindspore/ccsrc/runtime/framework/actor/gather_actor.h @@ -67,7 +67,7 @@ class GatherActor : public OpActor { // The gather actor run when receive the input control. void RunOpControl(AID *input_control, OpContext *context) override; // The gather actor run when receive the input branch id. - void CollectBranchId(const int branch_id, OpContext *context); + void CollectBranchId(const int branch_id, OpContext *const context); void Init() override; private: @@ -75,12 +75,12 @@ class GatherActor : public OpActor { // Collect the inputs of gather actor. void FetchBackendInputNode(const FuncGraphPtr &func_graph, const ControlNodeParserPtr &parser); - void FetchInputDeviceTensor(OpContext *context); + void FetchInputDeviceTensor(OpContext *const context); // Check whether satisfy the condition for launch. - bool CheckLaunchCondition(OpContext *context) const; - void SendOutput(OpContext *context) const; + bool CheckLaunchCondition(OpContext *const context) const; + void SendOutput(OpContext *const context) const; // Erase input data and input controls when finish gather launch. - void EraseInput(OpContext *context); + void EraseInput(OpContext *const context); // The device tensors for launch. std::vector input_device_tensors_; diff --git a/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc b/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc index 9754a5a8fac..cae678fa23d 100644 --- a/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc +++ b/mindspore/ccsrc/runtime/framework/actor/kernel_actor.cc @@ -30,7 +30,7 @@ void KernelActor::Init() { MS_EXCEPTION_IF_NULL(kernel_); real_input_num_ = AnfAlgo::GetInputTensorNum(kernel_); - kernel_info_ = static_cast(kernel_->kernel_info()); + kernel_info_ = dynamic_cast(kernel_->kernel_info()); is_dynamic_shape_ = AnfAlgo::IsDynamicShape(kernel_); // Init the device tensors and kernel launch info. diff --git a/mindspore/ccsrc/runtime/framework/actor/switch_actor.cc b/mindspore/ccsrc/runtime/framework/actor/switch_actor.cc index 26753a2a02b..30527331fda 100644 --- a/mindspore/ccsrc/runtime/framework/actor/switch_actor.cc +++ b/mindspore/ccsrc/runtime/framework/actor/switch_actor.cc @@ -66,7 +66,7 @@ void SwitchActor::RunOpControl(AID *input_control, OpContext *cont } } -void SwitchActor::CollectBranchId(const int branch_id, OpContext *context) { +void SwitchActor::CollectBranchId(const int branch_id, OpContext *const context) { MS_EXCEPTION_IF_NULL(context); auto &sequential_num = context->sequential_num_; input_branch_ids_[sequential_num].push(branch_id); @@ -262,7 +262,7 @@ void SwitchActor::AddInput(const AnfNodePtr &node, const size_t branch) { } } -size_t SwitchActor::GetIndex(OpContext *context) { +size_t SwitchActor::GetIndex(const OpContext *const context) { if (need_branch_id_input_) { if (input_branch_ids_.find(context->sequential_num_) == input_branch_ids_.end() || input_branch_ids_[context->sequential_num_].empty()) { @@ -313,7 +313,7 @@ size_t SwitchActor::GetIndex(OpContext *context) { return static_cast(index); } -bool SwitchActor::CheckLaunchCondition(OpContext *context) const { +bool SwitchActor::CheckLaunchCondition(OpContext *const context) const { MS_EXCEPTION_IF_NULL(context); if (input_datas_num_ != 0) { auto data_iter = input_data_.find(context->sequential_num_); @@ -346,7 +346,7 @@ bool SwitchActor::CheckLaunchCondition(OpContext *context) const { return true; } -void SwitchActor::FetchInputDeviceTensor(OpContext *context) { +void SwitchActor::FetchInputDeviceTensor(OpContext *const context) { MS_EXCEPTION_IF_NULL(context); input_device_tensors_.resize(input_nodes_.size()); auto data_iter = input_data_.find(context->sequential_num_); @@ -452,7 +452,7 @@ void SwitchActor::SendOutput(OpContext *context) { } } -void SwitchActor::EraseInput(OpContext *context) { +void SwitchActor::EraseInput(OpContext *const context) { MS_EXCEPTION_IF_NULL(context); auto data_iter = input_data_.find(context->sequential_num_); if (data_iter != input_data_.end() && std::all_of(data_iter->second.begin(), data_iter->second.end(), diff --git a/mindspore/ccsrc/runtime/framework/actor/switch_actor.h b/mindspore/ccsrc/runtime/framework/actor/switch_actor.h index 42fb313bb71..5337c520799 100644 --- a/mindspore/ccsrc/runtime/framework/actor/switch_actor.h +++ b/mindspore/ccsrc/runtime/framework/actor/switch_actor.h @@ -75,7 +75,7 @@ class SwitchActor : public SwitchActorBase { // The switch actor run when receive the input control. void RunOpControl(AID *input_control, OpContext *context); // The switch actor run when receive the input branch id. - void CollectBranchId(const int branch_id, OpContext *context); + void CollectBranchId(const int branch_id, OpContext *const context); // Parse the input node information of the switch actor according to node_. void ParseInput(const ControlNodeParserPtr &parser); // Add input for all branches. @@ -96,18 +96,18 @@ class SwitchActor : public SwitchActorBase { // Initialize the size of the vector members. void InitVectorSize(const size_t num); // Get index from DeviceTensor. - size_t GetIndex(OpContext *context); + size_t GetIndex(const OpContext *const context); // Add input for the branch. void AddInput(const AnfNodePtr &node, size_t branch); void AddInput(const KernelWithIndex node_with_index, const size_t branch); // Check whether satisfy the condition for send outputs. - bool CheckLaunchCondition(OpContext *context) const; + bool CheckLaunchCondition(OpContext *const context) const; // Fetch the args of switch branch. - void FetchInputDeviceTensor(OpContext *context); - void SendOutput(OpContext *context); + void FetchInputDeviceTensor(OpContext *const context); + void SendOutput(OpContext *const context); // Erase input data and input controls when finish switch launch. - void EraseInput(OpContext *context); + void EraseInput(OpContext *const context); void SendMemoryFreeReq(OpContext *const context); // Collect all the backend inputs of switch actor. diff --git a/mindspore/ccsrc/runtime/framework/control_node_parser.cc b/mindspore/ccsrc/runtime/framework/control_node_parser.cc index 1cbf40b8e3c..6e73837fc78 100644 --- a/mindspore/ccsrc/runtime/framework/control_node_parser.cc +++ b/mindspore/ccsrc/runtime/framework/control_node_parser.cc @@ -157,7 +157,7 @@ void CreateDeviceTensorForValueNode(const AnfNodePtr &front_node, const AnfNodeP } // Get the select kernel build info. - auto kernel_info = static_cast(backend_node->kernel_info()); + auto kernel_info = dynamic_cast(backend_node->kernel_info()); MS_EXCEPTION_IF_NULL(kernel_info); auto build_info = kernel_info->GetMutableSelectKernelBuildInfo(); MS_EXCEPTION_IF_NULL(build_info); diff --git a/mindspore/ccsrc/runtime/framework/graph_compiler.cc b/mindspore/ccsrc/runtime/framework/graph_compiler.cc index df56b0412e3..ad225024aee 100644 --- a/mindspore/ccsrc/runtime/framework/graph_compiler.cc +++ b/mindspore/ccsrc/runtime/framework/graph_compiler.cc @@ -320,19 +320,24 @@ GraphId GraphCompiler::CompileGraphImpl(const KernelGraphPtr &graph, const Devic DumpIRProto(graph, "before_opt_" + std::to_string(graph->graph_id())); } - // Execute optimization pass. + MS_LOG(INFO) << "Get graph outputs before optimizer, graph id: " << graph->graph_id(); auto outputs_before_optimizer = AnfAlgo::GetAllOutputWithIndex(graph->output()); + + // Execute optimization pass. device_context->OptimizeGraph(graph); - auto outputs_after_optimizer = AnfAlgo::GetAllOutputWithIndex(graph->output()); - // Update the output map of kernel graph by modified output nodes. - graph->UpdateGraphOutputMap(outputs_before_optimizer, outputs_after_optimizer); // Generate 'KernelMod' for all kernels and set 'KernelMod' into kernel, // 'KernelMod' is real executive object of kernel. device_context->CreateKernel(graph->execution_order()); + // Adjust kernel graph before run graph. device_context->PreprocessBeforeRunGraph(graph); + MS_LOG(INFO) << "Get graph outputs after optimizer, graph id: " << graph->graph_id(); + auto outputs_after_optimizer = AnfAlgo::GetAllOutputWithIndex(graph->output()); + // Update the output map of kernel graph by modified output nodes. + graph->UpdateGraphOutputMap(outputs_before_optimizer, outputs_after_optimizer); + if (ms_context->get_param(MS_CTX_EXECUTION_MODE) == kGraphMode) { // Create device address for all anf nodes of graph. CreateDeviceAddress(graph, device_context); diff --git a/mindspore/ccsrc/runtime/framework/graph_scheduler.cc b/mindspore/ccsrc/runtime/framework/graph_scheduler.cc index c8e88ee3adb..a3ddbd9d0a0 100644 --- a/mindspore/ccsrc/runtime/framework/graph_scheduler.cc +++ b/mindspore/ccsrc/runtime/framework/graph_scheduler.cc @@ -282,7 +282,7 @@ void PrepareDataForControlWeightNode( void PrepareDataForHostDataSourceActor(const std::unordered_map &data_node_position_map, const AnfNodePtr &node, const TensorPtr &tensor, - std::vector *host_tensors) { + std::vector *const host_tensors) { MS_EXCEPTION_IF_NULL(tensor); // Fill the host tensors for non weighted parameters. @@ -417,10 +417,6 @@ void GraphScheduler::Clear() { graph_output_to_actor_.clear(); front_node_to_actor_.clear(); copy_actors_.clear(); - - // Delete the thread pool. - delete thread_pool_; - thread_pool_ = nullptr; } void GraphScheduler::Initialize() { @@ -434,16 +430,15 @@ void GraphScheduler::Initialize() { } init_ = true; - auto actorMgr = ActorMgr::GetActorMgrRef(); - MS_EXCEPTION_IF_NULL(actorMgr); - actorMgr->Initialize(); - // Create the thread pool of actor runtime and Set the OMP_NUM_THREADS env. size_t actor_thread_num = 0; size_t OMP_thread_num = 0; ComputeThreadNums(&actor_thread_num, &OMP_thread_num); - thread_pool_ = ActorThreadPool::CreateThreadPool(actor_thread_num); - MS_EXCEPTION_IF_NULL(thread_pool_); + + auto actor_manager = ActorMgr::GetActorMgrRef(); + MS_EXCEPTION_IF_NULL(actor_manager); + actor_manager->Initialize(true, actor_thread_num); + std::string OMP_env = std::to_string(OMP_thread_num); (void)common::SetEnv("OMP_NUM_THREADS", OMP_env.c_str(), 0); auto OMP_thread_num_used = common::GetEnv("OMP_NUM_THREADS"); @@ -463,7 +458,6 @@ void GraphScheduler::BuildAndScheduleGlobalActor() { MS_EXCEPTION_IF_NULL(memory_manager_actor); memory_manager_aid_ = memory_manager_actor->GetAID(); auto base_actor = static_cast(memory_manager_actor); - base_actor->set_thread_pool(thread_pool_); // Bind single thread to response to memory alloc and free quickly. (void)actorMgr->Spawn(base_actor, false); @@ -472,7 +466,6 @@ void GraphScheduler::BuildAndScheduleGlobalActor() { MS_EXCEPTION_IF_NULL(recorder_actor); recorder_aid_ = &(recorder_actor->GetAID()); auto base_recorder_actor = static_cast(recorder_actor); - base_recorder_actor->set_thread_pool(thread_pool_); (void)actorMgr->Spawn(base_recorder_actor, true); // Create and schedule debug actor. @@ -487,7 +480,6 @@ void GraphScheduler::BuildAndScheduleGlobalActor() { MS_EXCEPTION_IF_NULL(debug_actor); debug_aid_ = &(debug_actor->GetAID()); auto base_debug_actor = static_cast(debug_actor); - base_debug_actor->set_thread_pool(thread_pool_); (void)actorMgr->Spawn(base_debug_actor, true); } } @@ -561,7 +553,6 @@ void GraphScheduler::Schedule(const ActorSet *actor_set) { auto actorMgr = ActorMgr::GetActorMgrRef(); MS_EXCEPTION_IF_NULL(actorMgr); for (auto actor : actors) { - actor->set_thread_pool(thread_pool_); (void)actorMgr->Spawn(actor); } } @@ -687,11 +678,11 @@ void GraphScheduler::PrepareRunOp(const ActorSet *actor_set, const GraphCompiler } } -void GraphScheduler::PrepareDataForControlNode(HostQueueDataSourceActor *host_data_source_actor, +void GraphScheduler::PrepareDataForControlNode(HostQueueDataSourceActor *const host_data_source_actor, const ControlNodeParserPtr &control_node_parser, const std::vector &origin_parameters, const std::vector &tensors, - std::vector *host_tensors) { + std::vector *const host_tensors) { const auto &control_node_parameters = control_node_parser->GetControlNodeParameter(); for (size_t j = 0; j < control_node_parameters.size(); ++j) { @@ -800,6 +791,10 @@ ActorSetPtr GraphScheduler::Build(const GraphCompilerInfo &graph_compiler_info) } void GraphScheduler::CacheGraphOutputToActor(const GraphCompilerInfo &graph_compiler_info) { + if (graph_compiler_info.strategy_ == GraphExecutionStrategy::kStep) { + return; + } + for (const auto &graph : graph_compiler_info.graphs_) { MS_EXCEPTION_IF_NULL(graph); auto outputs = AnfAlgo::GetAllOutputWithIndex(graph->output()); @@ -808,6 +803,8 @@ void GraphScheduler::CacheGraphOutputToActor(const GraphCompilerInfo &graph_comp MS_EXCEPTION_IF_NULL(output_kernel); auto origin_output_with_index = graph->GetFrontNodeWithIndexByGraphOutput(output_with_index); if (origin_output_with_index.first == nullptr) { + MS_LOG(WARNING) << "The graph " << graph->graph_id() << " output node:" << output_kernel->fullname_with_scope() + << " with index: " << output_with_index.second << " has no actor."; continue; } @@ -837,7 +834,9 @@ void GraphScheduler::CacheGraphOutputToActor(const GraphCompilerInfo &graph_comp MS_EXCEPTION_IF_NULL(actor); MS_LOG(INFO) << "Cache the graph " << graph->graph_id() << " output node:" << output_kernel->fullname_with_scope() << " with index: " << output_with_index.second << " to actor:" << actor->GetAID().Name() - << " with index:" << actor_output_index; + << " with index:" << actor_output_index + << ", from front node:" << origin_output_with_index.first->fullname_with_scope() + << " with index: " << origin_output_with_index.second; (void)graph_output_to_actor_.emplace(origin_output_with_index, GraphOutputPair(actor, actor_output_index)); } } @@ -968,7 +967,7 @@ std::vector GraphScheduler::BuildDataSourceActor(const Graph InsertActor(device_queue_ds_actor.get()); (void)data_source_actors.emplace_back(device_queue_ds_actor); device_queue_ds_actor->data_kernel_ = *iter; - device_queue_ds_actor->kernel_info_ = static_cast((*iter)->kernel_info()); + device_queue_ds_actor->kernel_info_ = dynamic_cast((*iter)->kernel_info()); } } @@ -1282,9 +1281,9 @@ std::vector GraphScheduler::BuildGatherActor(const GraphCompiler return gather_actors; } -void GraphScheduler::LinkDataArrow(KernelActor *to_actor, const GraphCompilerInfo &graph_compiler_info, - const KernelGraphPtr &graph, KernelWithIndex from_kernel_with_output_idx, - KernelWithIndex to_kernel_with_input_idx) { +void GraphScheduler::LinkDataArrow(KernelActor *const to_actor, const GraphCompilerInfo &graph_compiler_info, + const KernelGraphPtr &graph, const KernelWithIndex &from_kernel_with_output_idx, + const KernelWithIndex &to_kernel_with_input_idx) { MS_EXCEPTION_IF_NULL(to_actor); MS_EXCEPTION_IF_NULL(graph); @@ -2063,7 +2062,7 @@ void GraphScheduler::PrepareInputNodeForSwitchActor(const std::vector *to_actor, const size_t to_index) { + OpActor *const to_actor, const size_t to_index) { // Fetch all the funcgraph that call node would call. const auto cnode = call_node_with_index.first->cast(); std::vector func_graphs = FetchFuncGraphbyCallNode(cnode); @@ -2233,8 +2232,8 @@ void GraphScheduler::LinkDataArrowForSwitchActor(SwitchActor *from_actor, const void GraphScheduler::LinkDataArrowByControlNode(const GraphCompilerInfo &graph_compiler_info, const KernelWithIndex &input_with_index, - const FuncGraphPtr &from_func_graph, OpActor *to_actor, - const size_t to_index) { + const FuncGraphPtr &from_func_graph, + OpActor *const to_actor, const size_t to_index) { const auto ¶meters = graph_compiler_info.origin_parameters_order_; const auto &front_to_backend_parameter = graph_compiler_info.control_node_parser_->front_to_backend_parameters_; const auto &input_node = input_with_index.first; @@ -2314,7 +2313,8 @@ void GraphScheduler::LinkDataArrowByControlNode(const GraphCompilerInfo &graph_c } } -void GraphScheduler::LinkDataArrowForSwitchActor(const GraphCompilerInfo &graph_compiler_info, SwitchActor *actor) { +void GraphScheduler::LinkDataArrowForSwitchActor(const GraphCompilerInfo &graph_compiler_info, + SwitchActor *const actor) { // Link switch input. const auto &inputs = actor->input_nodes_; for (size_t i = 0; i < inputs.size(); ++i) { @@ -2342,13 +2342,14 @@ void GraphScheduler::LinkDataArrowForSwitchActor(const GraphCompilerInfo &graph_ auto to_actor = dynamic_cast(actor_name_to_actor_[gather_name]); for (size_t j = 0; j < actor->branch_inputs_pos_[i].size(); ++j) { auto pos = actor->branch_inputs_pos_[i][j]; - auto op_arrow = std::make_shared(pos, to_actor->GetAID(), j); + auto to_actor_index = j; + auto op_arrow = std::make_shared(pos, to_actor->GetAID(), to_actor_index); (void)actor->output_branch_arrows_[i].emplace_back(op_arrow); } } } -void GraphScheduler::LinkControlArrowForGatherActor(std::vector *kernel_actors, +void GraphScheduler::LinkControlArrowForGatherActor(std::vector *const kernel_actors, const std::vector &graphs, const ControlNodeParserPtr &parser) { // Link control arrow to kernel actor. @@ -2426,8 +2427,8 @@ void GraphScheduler::LinkControlArrowForGatherActor(std::vector } } -void GraphScheduler::LinkControlArrowForSwitchActor(std::vector *switch_actors, - LoopCountActor *to_actor, +void GraphScheduler::LinkControlArrowForSwitchActor(std::vector *const switch_actors, + LoopCountActor *const to_actor, const KernelMapPosition &origin_outputs_order) { if (to_actor == nullptr || (*switch_actors).empty()) { return; diff --git a/mindspore/ccsrc/runtime/framework/graph_scheduler.h b/mindspore/ccsrc/runtime/framework/graph_scheduler.h index 2a149307c09..63c7fc0572a 100644 --- a/mindspore/ccsrc/runtime/framework/graph_scheduler.h +++ b/mindspore/ccsrc/runtime/framework/graph_scheduler.h @@ -195,8 +195,9 @@ class GraphScheduler { // The processing of actors link statically. // 1. The processing of linking data arrows. // The gather of linking data arrows of kernel, it will call following functions by the different from actor type. - void LinkDataArrow(KernelActor *to_actor, const GraphCompilerInfo &graph_compiler_info, const KernelGraphPtr &graph, - KernelWithIndex from_kernel_with_output_idx, KernelWithIndex to_kernel_with_input_idx); + void LinkDataArrow(KernelActor *const to_actor, const GraphCompilerInfo &graph_compiler_info, + const KernelGraphPtr &graph, const KernelWithIndex &from_kernel_with_output_idx, + const KernelWithIndex &to_kernel_with_input_idx); // Link data arrows for internal parameter, convert internal parameter to actor by internal parameter cache to link. void LinkDataArrowForInternalParameter(const AnfNodePtr &internal_parameter, const std::vector &host_parameters, const KernelGraphPtr &graph, @@ -232,37 +233,38 @@ class GraphScheduler { void LinkOutputResultArrowForOutputActor(OutputActor *to_actor, const GraphCompilerInfo &graph_compiler_info); // 4. The processing of control flow linking. - void LinkArrowByControlNode(const GraphCompilerInfo &graph_compiler_info, ActorSet *actor_set); - void LinkDataArrowForGatherActor(GatherActor *from_actor, KernelActor *to_actor, + void LinkArrowByControlNode(const GraphCompilerInfo &graph_compiler_info, ActorSet *const actor_set); + void LinkDataArrowForGatherActor(GatherActor *const from_actor, KernelActor *const to_actor, const KernelWithIndex &front_node_with_index, const KernelWithIndex &to_node_with_index); - void LinkDataArrowForSwitchActor(const GraphCompilerInfo &graph_compiler_info, SwitchActor *actor); + void LinkDataArrowForSwitchActor(const GraphCompilerInfo &graph_compiler_info, SwitchActor *const actor); // Connect the input of the actor. void LinkDataArrowByControlNode(const GraphCompilerInfo &graph_compiler_info, const KernelWithIndex &input_node, - const FuncGraphPtr &from_func_graph, OpActor *to_actor, + const FuncGraphPtr &from_func_graph, OpActor *const to_actor, const size_t to_index); // When the input of the actor is a call node, the output of the funcgraph called by the call node needs to be // connected. void LinkDataArrowByCallInput(const KernelWithIndex &call_node_with_index, const ControlNodeParserPtr &parser, - const FuncGraphPtr &from_func_graph, OpActor *to_actor, + const FuncGraphPtr &from_func_graph, OpActor *const to_actor, const size_t to_index); - void LinkDataArrowForSwitchActor(SwitchActor *from_actor, const size_t from_index, OpActor *to_actor, - const size_t to_index, const size_t branch_index = SIZE_MAX); + void LinkDataArrowForSwitchActor(SwitchActor *const from_actor, const size_t from_index, + OpActor *const to_actor, const size_t to_index, + const size_t branch_index = SIZE_MAX); - void LinkControlArrowForGatherActor(std::vector *kernel_actors, + void LinkControlArrowForGatherActor(std::vector *const kernel_actors, const std::vector &graphs, const ControlNodeParserPtr &parser); - void LinkControlArrowForSwitchActor(std::vector *switch_actors, LoopCountActor *to_actor, + void LinkControlArrowForSwitchActor(std::vector *const switch_actors, LoopCountActor *const to_actor, const KernelMapPosition &origin_outputs_order); // In control flow, there are scenarios where there are multi-branch outputs, and the gather actor needs to // send the branch id to the loop count actor. void LinkBranchArrowForSwitchActor(const GraphCompilerInfo &graph_compiler_info); void LinkBranchArrowForGatherActor(const GraphCompilerInfo &graph_compiler_info); void LinkOutputResultArrowForSwitchActor(const GraphCompilerInfo &graph_compiler_info, const ActorSet *actor_set); - void PrepareDataForControlNode(HostQueueDataSourceActor *host_data_source_actor, + void PrepareDataForControlNode(HostQueueDataSourceActor *const host_data_source_actor, const ControlNodeParserPtr &control_node_parser, const std::vector &origin_parameters, - const std::vector &tensors, std::vector *host_tensors); + const std::vector &tensors, std::vector *const host_tensors); // Add input for switch actor. Since part of the input of funcgraph is on call node, these inputs need to be added // to switch actor. void PrepareInputNodeForSwitchActor(const std::vector &control_nodes); @@ -330,8 +332,6 @@ class GraphScheduler { const AID *recorder_aid_{nullptr}; const AID *debug_aid_{nullptr}; - ActorThreadPool *thread_pool_{nullptr}; - bool init_{false}; }; } // namespace runtime diff --git a/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc b/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc index 010f2682795..52bf733402b 100644 --- a/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc +++ b/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.cc @@ -57,6 +57,14 @@ void CPUDeviceContext::Initialize() { initialized_ = true; } +void CPUDeviceContext::Destroy() { + // Release memory. + if (mem_manager_ != nullptr) { + mem_manager_->FreeDeviceMemory(); + mem_manager_ = nullptr; + } +} + bool CPUDeviceContext::AllocateMemory(DeviceAddress *const &address, size_t size) const { MS_EXCEPTION_IF_NULL(address); MS_EXCEPTION_IF_NULL(mem_manager_); diff --git a/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h b/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h index f7dbdddfa85..7fb859324f5 100644 --- a/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h +++ b/mindspore/ccsrc/runtime/hardware/cpu/cpu_device_context.h @@ -35,6 +35,8 @@ class CPUDeviceContext : public DeviceContext { void Initialize() override; + void Destroy() override; + bool AllocateMemory(DeviceAddress *const &address, size_t size) const override; void FreeMemory(DeviceAddress *const &address) const override; diff --git a/mindspore/ccsrc/runtime/hardware/cpu/cpu_memory_pool.cc b/mindspore/ccsrc/runtime/hardware/cpu/cpu_memory_pool.cc index 14093a5f989..c7b1a706ca1 100644 --- a/mindspore/ccsrc/runtime/hardware/cpu/cpu_memory_pool.cc +++ b/mindspore/ccsrc/runtime/hardware/cpu/cpu_memory_pool.cc @@ -17,6 +17,7 @@ #include "runtime/hardware/cpu/cpu_memory_pool.h" #include #include "utils/log_adapter.h" +#include "utils/convert_utils_base.h" namespace mindspore { namespace device { @@ -42,14 +43,13 @@ size_t GetSystemMemorySize(const std::string &key) { std::string line(buf); auto title_end_pos = line.find(":"); auto title = line.substr(0, title_end_pos); - // Get mem size. if (title == key) { auto mem_size_end_pos = line.find_last_of(" "); auto mem_size_begin_pos = line.find_last_of(" ", mem_size_end_pos - 1); if ((mem_size_end_pos != std::string::npos) && (mem_size_begin_pos != std::string::npos)) { auto mem_size_string = line.substr(mem_size_begin_pos, mem_size_end_pos - mem_size_begin_pos); - mem_size = std::atol(mem_size_string.c_str()); + mem_size = LongToSize(std::atol(mem_size_string.c_str())); } break; } diff --git a/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc b/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc index 4264cdf6d81..fa92a5aac3f 100644 --- a/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc +++ b/mindspore/ccsrc/runtime/hardware/gpu/gpu_device_context.cc @@ -152,15 +152,6 @@ void GPUDeviceContext::Destroy() { mem_manager_->FreeDeviceMemory(); mem_manager_ = nullptr; } - - // Clean GPU cache kernels which is generated by AKG - auto context_ptr = MsContext::GetInstance(); - MS_EXCEPTION_IF_NULL(context_ptr); - if (!(context_ptr->get_param(MS_CTX_SAVE_GRAPHS_FLAG))) { - kernel::KernelMeta *bin_map = kernel::KernelMeta::GetInstance(); - MS_EXCEPTION_IF_NULL(bin_map); - bin_map->RemoveKernelCache(); - } } bool GPUDeviceContext::AllocateMemory(DeviceAddress *const &address, size_t size) const { diff --git a/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.cc b/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.cc index 24fb30f82f6..3fef5113bdf 100644 --- a/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.cc +++ b/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.cc @@ -26,7 +26,10 @@ #include "hccl/hcom.h" #include "utils/log_adapter.h" #include "utils/ms_utils.h" +#include "utils/ms_context.h" #include "runtime/hccl_adapter/converter.h" +#include "runtime/device/ascend/distribute/ascend_collective.h" +using HcclCollectiveGroup = mindspore::device::ascend::collective::HcclCollectiveGroup; static constexpr const char *kHcclPluginFileName = "libhccl_plugin.so"; static constexpr const char *kHcclDeployModeEnv = "DEPLOY_MODE"; @@ -75,7 +78,6 @@ void HcclAdapter::InitPlugin() { if (plugin_handle_ == nullptr) { MS_LOG(EXCEPTION) << "Dlopen " << kHcclPluginFileName << " failed, result = " << GetDlErrorMsg(); } - init_hcom_graph_adapter_ = DlsymFuncObj(InitHcomGraphAdapter, plugin_handle_); finalize_hcom_graph_adapter_ = DlsymFuncObj(FinalizeHcomGraphAdapter, plugin_handle_); get_hccl_kernel_info_store_ = DlsymFuncObj(GetHcclKernelInfoStore, plugin_handle_); @@ -98,7 +100,6 @@ void HcclAdapter::FinalizePlugin() { if (plugin_handle_ == nullptr) { return; } - init_hcom_graph_adapter_ = nullptr; finalize_hcom_graph_adapter_ = nullptr; get_hccl_kernel_info_store_ = nullptr; @@ -107,6 +108,10 @@ void HcclAdapter::FinalizePlugin() { finalize_hccl_comm_ = nullptr; launch_hccl_broadcast_ = nullptr; launch_hccl_all_reduce_ = nullptr; + launch_hccl_reduce_scatter_ = nullptr; + launch_hccl_all_gather_ = nullptr; + launch_hccl_send_ = nullptr; + launch_hccl_recv_ = nullptr; hccl_create_group_ = nullptr; hccl_destroy_group_ = nullptr; hccl_get_rank_id_ = nullptr; @@ -119,6 +124,19 @@ void HcclAdapter::FinalizePlugin() { plugin_handle_ = nullptr; } +bool HcclAdapter::InitHccl() { + MS_LOG(INFO) << "Start init hccl adapter."; + std::lock_guard lock(init_mutex_); + if (init_flag_) { + MS_LOG(INFO) << "Hccl has been inited, skip."; + return true; + } + InitPlugin(); + init_flag_ = true; + MS_LOG(INFO) << "Init hccl adapter success."; + return true; +} + bool HcclAdapter::InitHccl(uint32_t device_id, std::string_view rank_id, std::string_view rank_file) { MS_LOG(INFO) << "Start init hccl adapter."; std::lock_guard lock(init_mutex_); @@ -136,12 +154,10 @@ bool HcclAdapter::InitHccl(uint32_t device_id, std::string_view rank_id, std::st if (!ret) { return false; } - ret = InitHcclExec(); if (!ret) { return false; } - init_flag_ = true; MS_LOG(INFO) << "Init hccl adapter success."; return true; @@ -238,10 +254,69 @@ HcclResult HcclAdapter::HcclBroadcast(void *buf, uint64_t count, HcclDataType da return launch_hccl_broadcast_(buf, count, dataType, root, hccl_comm_, stream); } -HcclResult HcclAdapter::HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, - HcclReduceOp op, aclrtStream stream) const { +HcclResult HcclAdapter::HcclAllReduce(void *send_buf, void *recv_buf, uint64_t count, HcclDataType dataType, + HcclReduceOp op, aclrtStream stream, const std::string &group) const { MS_EXCEPTION_IF_NULL(launch_hccl_all_reduce_); - return launch_hccl_all_reduce_(sendBuf, recvBuf, count, dataType, op, hccl_comm_, stream); + HcclComm hccl_comm; + if (hccl_comm_ != nullptr) { + hccl_comm = hccl_comm_; + } else { + hccl_comm = HcclCollectiveGroup::instance().GetGroupComm(group); + MS_EXCEPTION_IF_NULL(hccl_comm); + } + return launch_hccl_all_reduce_(send_buf, recv_buf, count, dataType, op, hccl_comm, stream); +} + +HcclResult HcclAdapter::HcclReduceScatter(void *send_buf, void *recv_buf, uint64_t count, HcclDataType dataType, + HcclReduceOp op, aclrtStream stream, const std::string &group) const { + MS_EXCEPTION_IF_NULL(launch_hccl_reduce_scatter_); + HcclComm hccl_comm; + if (hccl_comm_ != nullptr) { + hccl_comm = hccl_comm_; + } else { + hccl_comm = HcclCollectiveGroup::instance().GetGroupComm(group); + MS_EXCEPTION_IF_NULL(hccl_comm); + } + return launch_hccl_reduce_scatter_(send_buf, recv_buf, count, dataType, op, hccl_comm, stream); +} + +HcclResult HcclAdapter::HcclAllGather(void *send_buf, void *recv_buf, uint64_t count, HcclDataType dataType, + aclrtStream stream, const std::string &group) const { + MS_EXCEPTION_IF_NULL(launch_hccl_all_gather_); + HcclComm hccl_comm; + if (hccl_comm_ != nullptr) { + hccl_comm = hccl_comm_; + } else { + hccl_comm = HcclCollectiveGroup::instance().GetGroupComm(group); + MS_EXCEPTION_IF_NULL(hccl_comm); + } + return launch_hccl_all_gather_(send_buf, recv_buf, count, dataType, hccl_comm, stream); +} + +HcclResult HcclAdapter::HcclSend(void *send_buf, uint64_t count, HcclDataType dataType, uint32_t destRank, + aclrtStream stream, const std::string &group) const { + MS_EXCEPTION_IF_NULL(launch_hccl_send_); + HcclComm hccl_comm; + if (hccl_comm_ != nullptr) { + hccl_comm = hccl_comm_; + } else { + hccl_comm = HcclCollectiveGroup::instance().GetGroupComm(group); + MS_EXCEPTION_IF_NULL(hccl_comm); + } + return launch_hccl_send_(send_buf, count, dataType, destRank, hccl_comm, stream); +} + +HcclResult HcclAdapter::HcclRecv(void *recv_buf, uint64_t count, HcclDataType dataType, uint32_t srcRank, + aclrtStream stream, const std::string &group) const { + MS_EXCEPTION_IF_NULL(launch_hccl_recv_); + HcclComm hccl_comm; + if (hccl_comm_ != nullptr) { + hccl_comm = hccl_comm_; + } else { + hccl_comm = HcclCollectiveGroup::instance().GetGroupComm(group); + MS_EXCEPTION_IF_NULL(hccl_comm); + } + return launch_hccl_recv_(recv_buf, count, dataType, srcRank, hccl_comm, stream); } bool HcclAdapter::InitKernelInfoStore(uint32_t device_id, std::string_view rank_id, std::string_view rank_file) { @@ -338,6 +413,12 @@ bool HcclAdapter::InitHcclComm(std::string_view rank_id, std::string_view rank_f bool HcclAdapter::FinalizeHcclComm() { MS_LOG(INFO) << "Start finalize hccl comm."; + auto context_ptr = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(context_ptr); + auto task_sink = context_ptr->get_param(MS_CTX_ENABLE_TASK_SINK); + if (!task_sink) { + HcclCollectiveGroup::instance().DestroyCommGroup(); + } if (hccl_comm_ == nullptr) { return true; } diff --git a/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.h b/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.h index f3c39937405..2ed8685d9fd 100644 --- a/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.h +++ b/mindspore/ccsrc/runtime/hccl_adapter/hccl_adapter.h @@ -43,6 +43,7 @@ class HcclAdapter { // common bool InitHccl(uint32_t device_id, std::string_view rank_id, std::string_view rank_file); + bool InitHccl(); bool FinalizeHccl(); HcclResult HcclCreateGroup(const std::string &group, uint32_t rank_num, uint32_t *rank_ids) const; @@ -58,8 +59,16 @@ class HcclAdapter { // for single op HcclResult HcclBroadcast(void *buf, uint64_t count, HcclDataType dataType, uint32_t root, aclrtStream stream) const; - HcclResult HcclAllReduce(void *sendBuf, void *recvBuf, uint64_t count, HcclDataType dataType, HcclReduceOp op, - aclrtStream stream) const; + HcclResult HcclAllReduce(void *send_buf, void *recv_buf, uint64_t count, HcclDataType dataType, HcclReduceOp op, + aclrtStream stream, const std::string &group = "") const; + HcclResult HcclAllGather(void *send_buf, void *recv_buf, uint64_t count, HcclDataType dataType, aclrtStream stream, + const std::string &group = "") const; + HcclResult HcclReduceScatter(void *send_buf, void *recv_buf, uint64_t count, HcclDataType dataType, HcclReduceOp op, + aclrtStream stream, const std::string &group = "") const; + HcclResult HcclSend(void *send_buf, uint64_t count, HcclDataType dataType, uint32_t destRank, aclrtStream stream, + const std::string &group = "") const; + HcclResult HcclRecv(void *recv_buf, uint64_t count, HcclDataType dataType, uint32_t srcRank, aclrtStream stream, + const std::string &group = "") const; // for enqueue op HcclResult HcclExecEnqueueOp(const ::HcomOperation &op_info, const HExecCallBack &callback) const; @@ -91,6 +100,10 @@ class HcclAdapter { HcclCommDestroyFunObj finalize_hccl_comm_ = nullptr; HcclBroadcastFunObj launch_hccl_broadcast_ = nullptr; HcclAllReduceFunObj launch_hccl_all_reduce_ = nullptr; + HcclReduceScatterFunObj launch_hccl_reduce_scatter_ = nullptr; + HcclAllGatherFunObj launch_hccl_all_gather_ = nullptr; + HcclSendFunObj launch_hccl_send_ = nullptr; + HcclRecvFunObj launch_hccl_recv_ = nullptr; HcomCreateGroupFunObj hccl_create_group_ = nullptr; HcomDestroyGroupFunObj hccl_destroy_group_ = nullptr; diff --git a/mindspore/ccsrc/runtime/hccl_adapter/plugin/hccl_plugin.h b/mindspore/ccsrc/runtime/hccl_adapter/plugin/hccl_plugin.h index a4b5fa3b0ae..82e0156abe5 100644 --- a/mindspore/ccsrc/runtime/hccl_adapter/plugin/hccl_plugin.h +++ b/mindspore/ccsrc/runtime/hccl_adapter/plugin/hccl_plugin.h @@ -47,6 +47,12 @@ PLUGIN_METHOD(GetAllKernelBuilder, void, OpsKernelBuilderMap *); ORIGIN_METHOD(HcclBroadcast, HcclResult, void *, uint64_t, HcclDataType, uint32_t, HcclComm, aclrtStream); ORIGIN_METHOD(HcclAllReduce, HcclResult, void *, void *, uint64_t, HcclDataType, HcclReduceOp, HcclComm, aclrtStream); +ORIGIN_METHOD(HcclReduceScatter, HcclResult, void *, void *, uint64_t, HcclDataType, HcclReduceOp, HcclComm, + aclrtStream); +ORIGIN_METHOD(HcclAllGather, HcclResult, void *, void *, uint64_t, HcclDataType, HcclComm, aclrtStream); +ORIGIN_METHOD(HcclSend, HcclResult, void *, uint64_t, HcclDataType, uint32_t, HcclComm, aclrtStream); +ORIGIN_METHOD(HcclRecv, HcclResult, void *, uint64_t, HcclDataType, uint32_t, HcclComm, aclrtStream); + ORIGIN_METHOD(HcclCommInitClusterInfo, HcclResult, const char *, uint32_t, HcclComm *); ORIGIN_METHOD(HcclCommDestroy, HcclResult, HcclComm); ORIGIN_METHOD(HcomCreateGroup, HcclResult, const char *, uint32_t, uint32_t *); diff --git a/mindspore/ccsrc/transform/express_ir/mindir_exporter.cc b/mindspore/ccsrc/transform/express_ir/mindir_exporter.cc index 53626814add..4ee7217e8a9 100644 --- a/mindspore/ccsrc/transform/express_ir/mindir_exporter.cc +++ b/mindspore/ccsrc/transform/express_ir/mindir_exporter.cc @@ -138,6 +138,7 @@ class IrExportBuilder { mind_ir::NodeProto *last_node_{nullptr}; std::list todo_; std::map node_index_map_; + std::set nodeName_; size_t node_index_{0}; size_t shape_index_{0}; }; @@ -145,16 +146,7 @@ class IrExportBuilder { using IrExporterPtr = std::shared_ptr; std::string IrExporter::GetDumpString(const FuncGraphPtr &func_graph) { - if ((builder_ == nullptr) || (func_graph == nullptr)) { - MS_LOG(EXCEPTION) << "Input params is null."; - } - - // Export model info - builder_->BuildModelInfo(); - - // Export model and return string - builder_->BuildModel(func_graph); - + (void)GetDumpProto(func_graph); return builder_->GetProtoString(func_graph); } @@ -168,7 +160,6 @@ mind_ir::ModelProto IrExporter::GetDumpProto(const FuncGraphPtr &func_graph, boo // Export model and return string builder_->BuildModel(func_graph, save_tensor_data); - return builder_->Model(); } @@ -191,16 +182,34 @@ void IrExportBuilder::BuildModel(const FuncGraphPtr &func_graph, bool save_tenso graph_proto->set_bprop_hash(func_graph->bprop_hash()); ResetNodeIndex(); todo_.clear(); - todo_.push_back(func_graph); + nodeName_.clear(); + // Build the main funcGraph + nodeName_.insert(func_graph->ToString()); + BuildFuncGraph(func_graph, graph_proto, save_tensor_data); + std::set graphVisited; + graphVisited.insert(func_graph); while (!todo_.empty()) { FuncGraphPtr fg = todo_.back(); todo_.pop_back(); - BuildFuncGraph(fg, graph_proto, save_tensor_data); + if (graphVisited.count(fg) > 0) { + continue; + } + if (nodeName_.count(fg->ToString()) > 0) { + MS_LOG(EXCEPTION) << "There is a duplicate name: " << fg->ToString(); + } + nodeName_.insert(fg->ToString()); + graphVisited.insert(fg); + auto graph = model_.add_functions(); + BuildFuncGraph(fg, graph, save_tensor_data); } + // Release resource + nodeName_.clear(); } void IrExportBuilder::BuildFuncGraph(const FuncGraphPtr &func_graph, mind_ir::GraphProto *const graph_proto, bool save_tensor_data) { + // Export funcGraph name. + graph_proto->set_name(func_graph->ToString()); // Export parameters // 1. parameters should be mapped to ValueInfoProto // 2. parameters with default value should be mapped to Initializer @@ -232,6 +241,10 @@ void IrExportBuilder::BuildParameters(const FuncGraphPtr &func_graph, mind_ir::G input_proto->set_name(param_name); SetValueInfoProto(param, input_proto); } + if (nodeName_.count(param_name) > 0) { + MS_LOG(EXCEPTION) << "parameter name is duplicate:" << param_name; + } + nodeName_.insert(param_name); } } @@ -383,9 +396,13 @@ std::string IrExportBuilder::GetOpTypeName(const AnfNodePtr &node) { } else if (IsValueNode(node)) { FuncGraphPtr fg = GetValueNode(node); todo_.push_back(fg); - type_name = fg->ToString(); + type_name = "REF::" + fg->ToString(); } else if (node->isa() || node->isa()) { - type_name = node->ToString(); + auto nodeName = GetUniqueNodeName(node); + type_name = "REF::" + nodeName; + if (nodeName_.count(nodeName) == 0) { + MS_LOG(EXCEPTION) << "There is not the name: " << nodeName; + } } else { MS_LOG(EXCEPTION) << "Need to support op type: " << node->type_name(); } @@ -424,6 +441,9 @@ void IrExportBuilder::SetShapeToNodeProto(const TypePtr &type, const BaseShapePt tensor_proto->set_data_type(mind_ir::TensorProto_DataType_UINT64); tensor_proto->add_dims(1); } + } else if (type->isa()) { + attr_proto->set_type(mind_ir::AttributeProto_AttributeType_GRAPH); + *seq_string += type->type_name() + ","; } else if (type->isa() || type->isa() || type->isa()) { *seq_string += type->type_name() + ","; } else { @@ -468,6 +488,10 @@ void IrExportBuilder::BuildCNode(const CNodePtr &node, mind_ir::GraphProto *cons // Build cnode mind_ir::NodeProto *node_proto = graph_proto->add_node(); std::string output_name = GetUniqueNodeName(node); + if (nodeName_.count(output_name) > 0) { + MS_LOG(EXCEPTION) << "There is a duplicate name: " << output_name; + } + nodeName_.insert(output_name); node_proto->add_output(output_name); node_proto->set_name(output_name); node_proto->set_domain(node->fullname_with_scope()); @@ -475,7 +499,9 @@ void IrExportBuilder::BuildCNode(const CNodePtr &node, mind_ir::GraphProto *cons std::string type_name = GetOpTypeName(op); node_proto->set_op_type(type_name); last_node_ = node_proto; + // Maybe Tensor or Function or nullptr SetShapeToNodeProto(node, node_proto); + (void)std::for_each(input_names.begin(), input_names.end(), [&node_proto](const string &name) { node_proto->add_input(name); }); @@ -490,13 +516,17 @@ void IrExportBuilder::BuildCNode(const CNodePtr &node, mind_ir::GraphProto *cons CheckAndConvertUtils::ConvertAttrValueInExport(type_name, attr.first, &attr_value); SetValueToAttributeProto(attr_value, attr_proto); } - } else { - MS_LOG(EXCEPTION) << "Need to support op type: " << op->type_name(); } } std::string IrExportBuilder::BuildInputNode(const AnfNodePtr &node, mind_ir::GraphProto *const graph_proto) { std::string node_name = GetUniqueNodeName(node); + // FuncGraph will be added to functions and the input name is the function name. + if (IsValueNode(node)) { + FuncGraphPtr fg = GetValueNode(node); + todo_.push_back(fg); + return fg->ToString(); + } if (node->isa()) { // When node input is a ValueNode, need to create a Constant Node mind_ir::NodeProto *node_proto = graph_proto->add_node(); @@ -539,7 +569,12 @@ std::string IrExportBuilder::GetNodeName(const AnfNodePtr &node) { if ((node != nullptr) && (node->func_graph() != nullptr)) { node_name = node->func_graph()->ToString() + ":"; } - node_name += node->ToString(); + if (node->isa()) { + // Needn't value + node_name += node->AnfNode::ToString(); + } else { + node_name += node->ToString(); + } MS_LOG(DEBUG) << "GetNodeName: " << node_name; return node_name; } diff --git a/mindspore/ccsrc/transform/express_ir/onnx_exporter.cc b/mindspore/ccsrc/transform/express_ir/onnx_exporter.cc index dfd09a79356..d44baf4c70b 100644 --- a/mindspore/ccsrc/transform/express_ir/onnx_exporter.cc +++ b/mindspore/ccsrc/transform/express_ir/onnx_exporter.cc @@ -29,6 +29,11 @@ namespace mindspore { const int ONNX_VERSION = 11; +const int kZeroNum = 0; +const int kOneNum = 1; +const int kTwoNum = 2; +const int kThreeNum = 3; +const int kFourNum = 4; enum OpMergeMode { OP_MERGE_UNDEFINED = 0, // undefined behavior OP_MERGE_IGNORE = 1, // indicate an input op merged into other op in compute node list @@ -36,6 +41,7 @@ enum OpMergeMode { OP_MERGE_GEMM = 3, // indicate `MindSpore MatMul + BiasAdd` --> `ONNX Gemm` OP_MERGE_BATCH_NORM = 4, // indicate `MindSpore BatchNorm(x)[0]` --> `ONNX Batch Normalization` OP_MERGE_MAXPOOL_WITH_ARGMAX = 5, // indicate `MindSpore MaxPoolWithArgmax(x)[0]` --> `ONNX MaxPool` + OP_MERGE_LAYER_NORM = 6, // indicate `MindSpore LayerNorm(x)[0]` --> `ONNX MeanVarianceNormalization` }; struct OpMergedInfo { @@ -99,6 +105,9 @@ void SetAttrTupleValueToProto(const ValuePtr &value, onnx::AttributeProto_Attrib attr_proto->add_ints(GetValue((*tuple_ptr)[i])); } break; + case onnx::AttributeProto_AttributeType_INT: + attr_proto->set_i(GetValue((*tuple_ptr)[beg_idx])); + break; case onnx::AttributeProto_AttributeType_FLOATS: for (size_t i = beg_idx; i < tuple_ptr->size(); ++i) { attr_proto->add_floats(GetValue((*tuple_ptr)[i])); @@ -266,25 +275,39 @@ OPERATOR_ONNX_CONVERT_DEFINE(RealDiv, Div, OpNameInfo()) OPERATOR_ONNX_CONVERT_DEFINE(ReduceSum, ReduceSum, OpNameInfo()) OPERATOR_ONNX_CONVERT_DEFINE(Sub, Sub, OpNameInfo()) OPERATOR_ONNX_CONVERT_DEFINE(Maximum, Max, OpNameInfo()) +OPERATOR_ONNX_CONVERT_DEFINE(Minimum, Min, OpNameInfo()) OPERATOR_ONNX_CONVERT_DEFINE(Transpose, Transpose, OpNameInfo()) OPERATOR_ONNX_CONVERT_DEFINE(StridedSlice, Slice, OpNameInfo()) OPERATOR_ONNX_CONVERT_DEFINE(Exp, Exp, OpNameInfo()) -OPERATOR_ONNX_CONVERT_DEFINE(ResizeNearestNeighbor, Resize, OpNameInfo()) OPERATOR_ONNX_CONVERT_DEFINE(Softplus, Softplus, OpNameInfo()) OPERATOR_ONNX_CONVERT_DEFINE(Tanh, Tanh, OpNameInfo()) +OPERATOR_ONNX_CONVERT_DEFINE(Abs, Abs, OpNameInfo()) + +// MindSpore Softmax axis(int, Tuple) +OPERATOR_ONNX_CONVERT_DEFINE(Softmax, Softmax, + OpNameInfo().Attr("axis", "axis", onnx::AttributeProto_AttributeType_INT, + SetAttrTupleValueToProto<0>)) + +// MindSpore LogSoftmax axis(int) +OPERATOR_ONNX_CONVERT_DEFINE(LogSoftmax, LogSoftmax, + OpNameInfo().Attr("axis", "axis", onnx::AttributeProto_AttributeType_INT, + SetAttrValueToProto)) + +OPERATOR_ONNX_CONVERT_DEFINE(Softsign, Softsign, OpNameInfo()) +OPERATOR_ONNX_CONVERT_DEFINE(Sqrt, Sqrt, OpNameInfo()) +OPERATOR_ONNX_CONVERT_DEFINE(Equal, Equal, OpNameInfo()) +OPERATOR_ONNX_CONVERT_DEFINE(Floor, Floor, OpNameInfo()) +OPERATOR_ONNX_CONVERT_DEFINE(ACos, Acos, OpNameInfo()) #define OP_CONVERT_FUNCTION_NAME(name) GetOpOnnxConvertInfo_##name void RegisterOpConverters(const std::function &fn) { fn(OP_CONVERT_FUNCTION_NAME(Add)()); fn(OP_CONVERT_FUNCTION_NAME(Mul)()); - fn(OP_CONVERT_FUNCTION_NAME(ReLU)()); fn(OP_CONVERT_FUNCTION_NAME(Sigmoid)()); - fn(OP_CONVERT_FUNCTION_NAME(Conv2D)()); fn(OP_CONVERT_FUNCTION_NAME(Argmax)()); - fn(OP_CONVERT_FUNCTION_NAME(Flatten)()); fn(OP_CONVERT_FUNCTION_NAME(MaxPool)()); fn(OP_CONVERT_FUNCTION_NAME(MaxPoolWithArgmax)()); @@ -293,16 +316,24 @@ void RegisterOpConverters(const std::function &fn) { fn(OP_CONVERT_FUNCTION_NAME(Squeeze)()); fn(OP_CONVERT_FUNCTION_NAME(BatchNorm)()); fn(OP_CONVERT_FUNCTION_NAME(MatMul)()); - fn(OP_CONVERT_FUNCTION_NAME(MakeTuple)()); fn(OP_CONVERT_FUNCTION_NAME(RealDiv)()); fn(OP_CONVERT_FUNCTION_NAME(BiasAdd)()); fn(OP_CONVERT_FUNCTION_NAME(Sub)()); fn(OP_CONVERT_FUNCTION_NAME(Maximum)()); + fn(OP_CONVERT_FUNCTION_NAME(Minimum)()); fn(OP_CONVERT_FUNCTION_NAME(Exp)()); - fn(OP_CONVERT_FUNCTION_NAME(ResizeNearestNeighbor)()); + fn(OP_CONVERT_FUNCTION_NAME(Softplus)()); fn(OP_CONVERT_FUNCTION_NAME(Tanh)()); + fn(OP_CONVERT_FUNCTION_NAME(Softmax)()); + fn(OP_CONVERT_FUNCTION_NAME(LogSoftmax)()); + fn(OP_CONVERT_FUNCTION_NAME(Abs)()); + fn(OP_CONVERT_FUNCTION_NAME(Softsign)()); + fn(OP_CONVERT_FUNCTION_NAME(Sqrt)()); + fn(OP_CONVERT_FUNCTION_NAME(Equal)()); + fn(OP_CONVERT_FUNCTION_NAME(Floor)()); + fn(OP_CONVERT_FUNCTION_NAME(ACos)()); } class OpConvertRegistry { @@ -367,6 +398,12 @@ class OnnxExporter { std::map *node_map_ptr, onnx::GraphProto *graph_proto); void ExportPrimResizeNearestNeighbor(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *graph_proto); + void ExportPrimExpandDims(const FuncGraphPtr &func_graph, const CNodePtr &node, + std::map *node_map_ptr, onnx::GraphProto *graph_proto); + void ExportPrimBatchMatMul(const FuncGraphPtr &func_graph, const CNodePtr &node, + std::map *node_map_ptr, onnx::GraphProto *graph_proto); + void ExportPrimGeLU(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map *node_map_ptr, + onnx::GraphProto *graph_proto); void ExportPrimConcat(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *graph_proto); void ExportPrimCast(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map *node_map_ptr, @@ -383,7 +420,6 @@ class OnnxExporter { std::map *node_map_ptr, onnx::GraphProto *graph_proto); void ExportPrimGatherV2(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *graph_proto); - void ExportMergeConv(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *graph_proto); void ExportMergeGemm(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map *node_map_ptr, @@ -392,6 +428,8 @@ class OnnxExporter { std::map *node_map_ptr, onnx::GraphProto *graph_proto); void ExportMergeMaxPoolWithArgmax(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *graph_proto); + void ExportMergeLayerNorm(const FuncGraphPtr &func_graph, const CNodePtr &node, + std::map *node_map_ptr, onnx::GraphProto *graph_proto); void ExportOutput(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *graph_proto); @@ -400,6 +438,16 @@ class OnnxExporter { void ConvertTupleToTensor(const ValuePtr &value, onnx::TensorProto *tensor_proto); void SetNodeAttribute(const ValuePtr &value, onnx::NodeProto *node_proto); + void SetConstantNodeProtoInfoForGeLU(onnx::NodeProto *const node_proto, std::string output, + onnx::AttributeProto *const attr_proto, onnx::TensorProto *const tensor_proto, + std::string tensor_name, float float_data); + void SetTwoInputNodeProtoInfo(onnx::NodeProto *const node_proto, std::string output, std::string op_type, + std::string input_x, std::string input_y); + void SetOneInputNodeProtoInfo(onnx::NodeProto *const node_proto, std::string output, std::string op_type, + std::string input); + + void SetCastNodeProtoInfo(onnx::NodeProto *const node_proto, std::string output, std::string input, + onnx::AttributeProto *const attr_proto, onnx::TensorProto_DataType i_type); size_t AllocateNodeIndex() { return ++onnx_node_index_; } @@ -526,6 +574,9 @@ void OnnxExporter::SetValueInfoType(const AnfNodePtr &node, onnx::ValueInfoProto for (const auto &dim : dims) { type_proto->mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(dim); } + if (dims.empty()) { + type_proto->mutable_tensor_type()->mutable_shape(); + } } } @@ -593,6 +644,12 @@ void OnnxExporter::MatchAndMark(const FuncGraphPtr &func_graph, const std::vecto op_merged_infos[cnode].mode = OP_MERGE_MAXPOOL_WITH_ARGMAX; op_merged_infos[cnode->input(1)].mode = OP_MERGE_IGNORE; op_merged_infos[cnode->input(1)].referred_count -= 1; + } else if (cnode->IsApply(prim::kPrimTupleGetItem) && + IsPrimitiveCNode(cnode->input(1), std::make_shared("LayerNorm")) && + GetInt64Value(cnode->input(2)) == 0) { + op_merged_infos[cnode].mode = OP_MERGE_LAYER_NORM; + op_merged_infos[cnode->input(1)].mode = OP_MERGE_IGNORE; + op_merged_infos[cnode->input(1)].referred_count -= 1; } } } @@ -612,6 +669,7 @@ void OnnxExporter::ExportNodes(const FuncGraphPtr &func_graph, std::mapisa()) { continue; @@ -623,9 +681,8 @@ void OnnxExporter::ExportNodes(const FuncGraphPtr &func_graph, std::mapcast(); - const int INDEX = 2; if (nextCNode->IsApply(prim::kPrimUpdateState) && - IsPrimitiveCNode(nextCNode->input(INDEX), std::make_shared("MakeTuple"))) { + IsPrimitiveCNode(nextCNode->input(kTwoNum), std::make_shared("MakeTuple"))) { continue; } } @@ -644,6 +701,18 @@ void OnnxExporter::ExportNodes(const FuncGraphPtr &func_graph, std::mapIsApply(prim::kPrimExpandDims)) { + ExportPrimExpandDims(func_graph, cnode, node_map_ptr, graph_proto); + continue; + } + if (cnode->IsApply(prim::kPrimBatchMatMul)) { + ExportPrimBatchMatMul(func_graph, cnode, node_map_ptr, graph_proto); + continue; + } + if (cnode->IsApply(prim::kPrimGeLU)) { + ExportPrimGeLU(func_graph, cnode, node_map_ptr, graph_proto); + continue; + } switch (merged_info.mode) { case OP_MERGE_CONV: ExportMergeConv(func_graph, cnode, node_map_ptr, graph_proto); @@ -657,6 +726,9 @@ void OnnxExporter::ExportNodes(const FuncGraphPtr &func_graph, std::map *node_map_ptr, onnx::GraphProto *const graph_proto) { - auto name_x = GetNodeInputName(node->input(1), node_map_ptr, graph_proto); - auto input_shape = node->input(2); + auto name_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto); + auto input_shape = node->input(kTwoNum); std::string name_shape; if (input_shape->isa()) { auto const_node_idx = AllocateNodeIndex(); (*node_map_ptr)[input_shape] = const_node_idx; onnx::NodeProto *node_proto = graph_proto->add_node(); name_shape = std::to_string(const_node_idx); - node_proto->add_output(name_shape); + auto name = prim::kPrimReshape->name(); + node_proto->set_name(name_shape + name); + node_proto->add_output(name_shape); node_proto->set_op_type("Constant"); onnx::AttributeProto *attr_proto = node_proto->add_attribute(); attr_proto->set_name("value"); - attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR); ConvertTupleToTensor(dyn_cast(input_shape)->value(), attr_proto->mutable_t()); } else { @@ -698,8 +771,8 @@ void OnnxExporter::ExportPrimReshape(const FuncGraphPtr &, const CNodePtr &node, void OnnxExporter::ExportPrimReduce(const FuncGraphPtr &, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *const graph_proto) { - auto input_data = GetNodeInputName(node->input(1), node_map_ptr, graph_proto); - auto input_axis = node->input(2); + auto input_data = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto); + auto input_axis = node->input(kTwoNum); auto node_idx = AllocateNodeIndex(); (*node_map_ptr)[node] = node_idx; @@ -708,6 +781,7 @@ void OnnxExporter::ExportPrimReduce(const FuncGraphPtr &, const CNodePtr &node, if (node->IsApply(prim::kPrimReduceSum)) { name = prim::kPrimReduceSum->name(); } + node_proto->set_name(std::to_string(node_idx) + name); node_proto->set_op_type(name); node_proto->add_output(std::to_string(node_idx)); node_proto->add_input(input_data); @@ -735,14 +809,14 @@ void OnnxExporter::ExportPrimReduce(const FuncGraphPtr &, const CNodePtr &node, void OnnxExporter::ExportPrimTranspose(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *const graph_proto) { - auto input_data = GetNodeInputName(node->input(1), node_map_ptr, graph_proto); - const int PERM_INDEX = 2; - auto input_perm = node->input(PERM_INDEX); - + auto input_data = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto); + auto input_perm = node->input(kTwoNum); auto node_idx = AllocateNodeIndex(); (*node_map_ptr)[node] = node_idx; onnx::NodeProto *node_proto = graph_proto->add_node(); auto name = prim::kPrimTranspose->name(); + + node_proto->set_name(std::to_string(node_idx) + name); node_proto->set_op_type(name); node_proto->add_output(std::to_string(node_idx)); node_proto->add_input(input_data); @@ -771,9 +845,8 @@ void OnnxExporter::ExportPrimTranspose(const FuncGraphPtr &func_graph, const CNo void OnnxExporter::ExportPrimStridedSlice(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *const graph_proto) { - auto input_data = GetNodeInputName(node->input(1), node_map_ptr, graph_proto); - const int BEGIN_INDEX = 2; - auto begin = node->input(BEGIN_INDEX); + auto input_data = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto); + auto begin = node->input(kTwoNum); auto name = prim::kPrimStridedSlice->name(); std::string name_begin; if (begin->isa()) { @@ -785,7 +858,7 @@ void OnnxExporter::ExportPrimStridedSlice(const FuncGraphPtr &func_graph, const node_proto->set_op_type("Constant"); onnx::AttributeProto *attr_proto = node_proto->add_attribute(); - attr_proto->set_name("starts"); + attr_proto->set_name("value"); attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR); ConvertTupleToTensor(dyn_cast(begin)->value(), attr_proto->mutable_t()); @@ -794,8 +867,7 @@ void OnnxExporter::ExportPrimStridedSlice(const FuncGraphPtr &func_graph, const << "Need to insert op convert variable from tuple to tensor for " << name; } - const int END_INDEX = 3; - auto end = node->input(END_INDEX); + auto end = node->input(kThreeNum); std::string name_end; if (end->isa()) { auto const_node_idx = AllocateNodeIndex(); @@ -806,7 +878,7 @@ void OnnxExporter::ExportPrimStridedSlice(const FuncGraphPtr &func_graph, const node_proto->set_op_type("Constant"); onnx::AttributeProto *attr_proto = node_proto->add_attribute(); - attr_proto->set_name("ends"); + attr_proto->set_name("value"); attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR); ConvertTupleToTensor(dyn_cast(end)->value(), attr_proto->mutable_t()); @@ -832,12 +904,11 @@ void OnnxExporter::ExportPrimStridedSlice(const FuncGraphPtr &func_graph, const node_proto_axes->add_output(name_axes); node_proto_axes->set_op_type("Constant"); onnx::AttributeProto *attr_proto_axes = node_proto_axes->add_attribute(); - attr_proto_axes->set_name("axes"); + attr_proto_axes->set_name("value"); attr_proto_axes->set_type(onnx::AttributeProto_AttributeType_TENSOR); ConvertTupleToTensor(dyn_cast(axes)->value(), attr_proto_axes->mutable_t()); - const int STRIDES_INDEX = 4; - auto strides = node->input(STRIDES_INDEX); + auto strides = node->input(kFourNum); std::string name_strides; if (strides->isa()) { auto const_node_idx = AllocateNodeIndex(); @@ -848,7 +919,7 @@ void OnnxExporter::ExportPrimStridedSlice(const FuncGraphPtr &func_graph, const node_proto->set_op_type("Constant"); onnx::AttributeProto *attr_proto_steps = node_proto->add_attribute(); - attr_proto_steps->set_name("steps"); + attr_proto_steps->set_name("value"); attr_proto_steps->set_type(onnx::AttributeProto_AttributeType_TENSOR); ConvertTupleToTensor(dyn_cast(strides)->value(), attr_proto_steps->mutable_t()); } else { @@ -871,18 +942,17 @@ void OnnxExporter::ExportPrimStridedSlice(const FuncGraphPtr &func_graph, const void OnnxExporter::ExportPrimResizeNearestNeighbor(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *const graph_proto) { - auto input_data = GetNodeInputName(node->input(1), node_map_ptr, graph_proto); - auto x_shape = dyn_cast(node->input(1)->Shape()); + auto input_data = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto); + auto x_shape = dyn_cast(node->input(kOneNum)->Shape()); - AnfNodePtr op = node->input(0); + AnfNodePtr op = node->input(kZeroNum); auto op_value = dyn_cast(op); auto prim = dyn_cast(op_value->value()); std::vector resize_size; auto tuple_ptr = dyn_cast(prim->GetAttr("size")); - const int NUM = 2; - for (size_t i = 0; i < x_shape->shape().size() - NUM; i++) { + for (size_t i = 0; i < x_shape->shape().size() - kTwoNum; i++) { resize_size.push_back(x_shape->shape()[i]); } for (size_t i = 0; i < tuple_ptr->size(); i++) { @@ -900,7 +970,7 @@ void OnnxExporter::ExportPrimResizeNearestNeighbor(const FuncGraphPtr &func_grap node_proto_size->add_output(name_size); node_proto_size->set_op_type("Constant"); onnx::AttributeProto *attr_proto = node_proto_size->add_attribute(); - attr_proto->set_name("sizes"); + attr_proto->set_name("value"); attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR); ConvertTupleToTensor(resize_size_ptr, attr_proto->mutable_t()); @@ -929,6 +999,293 @@ void OnnxExporter::ExportPrimResizeNearestNeighbor(const FuncGraphPtr &func_grap node_proto->add_input(name_size); } +// MindSpore ExpandDims -> ONNX Reshape +void OnnxExporter::ExportPrimExpandDims(const FuncGraphPtr &func_graph, const CNodePtr &node, + std::map *node_map_ptr, + onnx::GraphProto *const graph_proto) { + auto input_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto); + auto axis = GetInt64Value(node->input(kTwoNum)); + auto x_shape = dyn_cast(node->input(kOneNum)->Shape()); + auto name = prim::kPrimExpandDims->name(); + + std::vector new_shape; + for (size_t i = 0; i < x_shape->shape().size(); i++) { + new_shape.push_back(x_shape->shape()[i]); + } + if (axis < 0) { + axis = axis + 1 + x_shape->shape().size(); + } + new_shape.insert(new_shape.begin() + axis, kOneNum); + auto new_shape_value = MakeValue>(new_shape); + auto shape = NewValueNode(new_shape_value)->cast(); + std::string name_shape; + + if (shape->isa()) { + auto const_node_idx = AllocateNodeIndex(); + (*node_map_ptr)[shape] = const_node_idx; + onnx::NodeProto *node_proto = graph_proto->add_node(); + name_shape = std::to_string(const_node_idx); + node_proto->add_output(name_shape); + node_proto->set_op_type("Constant"); + onnx::AttributeProto *attr_proto = node_proto->add_attribute(); + attr_proto->set_name("value"); + attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR); + ConvertTupleToTensor(dyn_cast(shape)->value(), attr_proto->mutable_t()); + } else { + name_shape = GetNodeInputName(shape, node_map_ptr, graph_proto); + MS_LOG(EXCEPTION) << "Need to insert op convert variable from tuple to tensor for " << name; + } + + auto node_idx = AllocateNodeIndex(); + (*node_map_ptr)[node] = node_idx; + onnx::NodeProto *node_proto = graph_proto->add_node(); + node_proto->set_op_type("Reshape"); + node_proto->add_output(std::to_string(node_idx)); + node_proto->add_input(input_x); + node_proto->add_input(name_shape); +} + +// MindSpore BatchMatMul -> ONNX Transpose + MatMul +void OnnxExporter::ExportPrimBatchMatMul(const FuncGraphPtr &func_graph, const CNodePtr &node, + std::map *node_map_ptr, + onnx::GraphProto *const graph_proto) { + auto input_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto); + auto input_y = GetNodeInputName(node->input(kTwoNum), node_map_ptr, graph_proto); + + AnfNodePtr batchmatmul_op = node->input(kZeroNum); + auto op_value = dyn_cast(batchmatmul_op); + auto prim = dyn_cast(op_value->value()); + auto transpose_a = GetValue(prim->GetAttr("transpose_a")); + auto transpose_b = GetValue(prim->GetAttr("transpose_b")); + std::string transpose_input_x_name = ""; + std::string transpose_input_y_name = ""; + + if (transpose_a) { + auto input_x_shape = dyn_cast(node->input(kOneNum)->Shape()); + // Add Transpose node after input_x of BatchMatMul + auto transpose_input_x_index = AllocateNodeIndex(); + onnx::NodeProto *transpose_inputx_node_proto = graph_proto->add_node(); + transpose_inputx_node_proto->add_input(input_x); + transpose_inputx_node_proto->add_output(std::to_string(transpose_input_x_index)); + transpose_inputx_node_proto->set_op_type(prim::kPrimTranspose->name()); + onnx::AttributeProto *attr_proto = transpose_inputx_node_proto->add_attribute(); + attr_proto->set_name("perm"); + attr_proto->set_type(onnx::AttributeProto_AttributeType_INTS); + for (size_t i = 0; i < input_x_shape->shape().size() - kTwoNum; i++) { + attr_proto->add_ints(i); + } + attr_proto->add_ints(input_x_shape->shape().size() - kOneNum); + attr_proto->add_ints(input_x_shape->shape().size() - kTwoNum); + transpose_input_x_name = std::to_string(transpose_input_x_index); + } + if (transpose_b) { + auto input_y_shape = dyn_cast(node->input(kTwoNum)->Shape()); + // Add Transpose node after input_y of BatchMatMul + auto transpose_input_y_index = AllocateNodeIndex(); + onnx::NodeProto *transpose_inputy_node_proto = graph_proto->add_node(); + transpose_inputy_node_proto->add_input(input_y); + transpose_inputy_node_proto->add_output(std::to_string(transpose_input_y_index)); + transpose_inputy_node_proto->set_op_type(prim::kPrimTranspose->name()); + onnx::AttributeProto *attr_proto = transpose_inputy_node_proto->add_attribute(); + attr_proto->set_name("perm"); + attr_proto->set_type(onnx::AttributeProto_AttributeType_INTS); + for (size_t i = 0; i < input_y_shape->shape().size() - kTwoNum; i++) { + attr_proto->add_ints(i); + } + attr_proto->add_ints(input_y_shape->shape().size() - kOneNum); + attr_proto->add_ints(input_y_shape->shape().size() - kTwoNum); + transpose_input_y_name = std::to_string(transpose_input_y_index); + } + + auto node_idx = AllocateNodeIndex(); + (*node_map_ptr)[node] = node_idx; + onnx::NodeProto *node_proto = graph_proto->add_node(); + node_proto->set_op_type("MatMul"); + node_proto->add_output(std::to_string(node_idx)); + node_proto->set_name(std::to_string(node_idx) + "MatMul"); + if (transpose_a) { + node_proto->add_input(transpose_input_x_name); + } else { + node_proto->add_input(input_x); + } + if (transpose_b) { + node_proto->add_input(transpose_input_y_name); + } else { + node_proto->add_input(input_y); + } +} + +void OnnxExporter::SetConstantNodeProtoInfoForGeLU(onnx::NodeProto *const node_proto, std::string output, + onnx::AttributeProto *const attr_proto, + onnx::TensorProto *const tensor_proto, std::string tensor_name, + float float_data) { + node_proto->set_op_type("Constant"); + node_proto->add_output(output); + + attr_proto->set_name("value"); + attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR); + + tensor_proto->set_name(tensor_name); + tensor_proto->add_dims(static_cast<::google::protobuf::int64>(kOneNum)); + tensor_proto->set_data_type(GetOnnxDataType(kNumberTypeFloat32)); + tensor_proto->add_float_data(float_data); +} + +void OnnxExporter::SetCastNodeProtoInfo(onnx::NodeProto *const node_proto, std::string output, std::string input, + onnx::AttributeProto *const attr_proto, onnx::TensorProto_DataType i_type) { + node_proto->set_op_type(prim::kPrimCast->name()); + node_proto->add_output(output); + node_proto->add_input(input); + + attr_proto->set_name("to"); + attr_proto->set_type(onnx::AttributeProto_AttributeType_INT); + attr_proto->set_i(i_type); +} + +void OnnxExporter::SetTwoInputNodeProtoInfo(onnx::NodeProto *const node_proto, std::string output, std::string op_type, + std::string input_x, std::string input_y) { + node_proto->add_output(output); + node_proto->set_op_type(op_type); + node_proto->add_input(input_x); + node_proto->add_input(input_y); +} + +void OnnxExporter::SetOneInputNodeProtoInfo(onnx::NodeProto *const node_proto, std::string output, std::string op_type, + std::string input) { + node_proto->add_output(output); + node_proto->set_op_type(op_type); + node_proto->add_input(input); +} + +// MindSpore GeLU -> ONNX 0.5 * X * (1.0 + tanh((sqrt(2/pi) * (x + 0.044715 * pow(x, 3))))) +void OnnxExporter::ExportPrimGeLU(const FuncGraphPtr &func_graph, const CNodePtr &node, + std::map *node_map_ptr, onnx::GraphProto *const graph_proto) { + auto input_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto); + auto input_x_node = node->input(kOneNum); + auto dtype = input_x_node->Type(); + auto elem_type = dyn_cast(dtype)->element()->type_id(); + auto pre_cast_node_idx = 0; + + // if type is float16, add cast node cast float16 to float32 + if (elem_type == kNumberTypeFloat16) { + pre_cast_node_idx = AllocateNodeIndex(); + onnx::NodeProto *pre_cast_node_proto = graph_proto->add_node(); + onnx::AttributeProto *pre_cast_attr_proto = pre_cast_node_proto->add_attribute(); + SetCastNodeProtoInfo(pre_cast_node_proto, std::to_string(pre_cast_node_idx), input_x, pre_cast_attr_proto, + onnx::TensorProto_DataType_FLOAT); + } + + // Add Pow node + // Add input exponent node for Pow node + auto exp_node_idx = AllocateNodeIndex(); + onnx::NodeProto *exp_node_proto = graph_proto->add_node(); + onnx::AttributeProto *exp_attr_proto = exp_node_proto->add_attribute(); + onnx::TensorProto *exp_tensor_proto = exp_attr_proto->mutable_t(); + SetConstantNodeProtoInfoForGeLU(exp_node_proto, std::to_string(exp_node_idx), exp_attr_proto, exp_tensor_proto, + "exponent", 3.0); + // Add pow node + auto pow_idx = AllocateNodeIndex(); + auto pow_name = std::to_string(pow_idx); + onnx::NodeProto *pow_node_proto = graph_proto->add_node(); + pow_node_proto->set_op_type("Pow"); + pow_node_proto->add_output(pow_name); + if (elem_type == kNumberTypeFloat16) { + pow_node_proto->add_input(std::to_string(pre_cast_node_idx)); + } else { + pow_node_proto->add_input(input_x); + } + pow_node_proto->add_input(std::to_string(exp_node_idx)); + + // Add first Mul node + // Add input node for first Mul node + auto fmul_input_node_idx = AllocateNodeIndex(); + onnx::NodeProto *fmul_input_node_proto = graph_proto->add_node(); + onnx::AttributeProto *fmul_input_attr_proto = fmul_input_node_proto->add_attribute(); + onnx::TensorProto *fmul_input_tensor_proto = fmul_input_attr_proto->mutable_t(); + SetConstantNodeProtoInfoForGeLU(fmul_input_node_proto, std::to_string(fmul_input_node_idx), fmul_input_attr_proto, + fmul_input_tensor_proto, "input_y_for_mul", 0.044715); + // Add first Mul Node + auto fmul_name = std::to_string(AllocateNodeIndex()); + onnx::NodeProto *fmul_node_proto = graph_proto->add_node(); + SetTwoInputNodeProtoInfo(fmul_node_proto, fmul_name, "Mul", pow_name, std::to_string(fmul_input_node_idx)); + + // Add first Add node + auto fadd_name = std::to_string(AllocateNodeIndex()); + onnx::NodeProto *fadd_node_proto = graph_proto->add_node(); + if (elem_type == kNumberTypeFloat16) { + fadd_node_proto->add_input(std::to_string(pre_cast_node_idx)); + } else { + fadd_node_proto->add_input(input_x); + } + SetOneInputNodeProtoInfo(fadd_node_proto, fadd_name, "Add", fmul_name); + + // Add second Mul node + // Add input node for second Mul node + auto smul_input_node_idx = AllocateNodeIndex(); + onnx::NodeProto *smul_input_node_proto = graph_proto->add_node(); + onnx::AttributeProto *smul_input_attr_proto = smul_input_node_proto->add_attribute(); + onnx::TensorProto *smul_input_tensor_proto = smul_input_attr_proto->mutable_t(); + SetConstantNodeProtoInfoForGeLU(smul_input_node_proto, std::to_string(smul_input_node_idx), smul_input_attr_proto, + smul_input_tensor_proto, "input_y_for_smul", 0.79788456); + // Add second Mul Node + auto smul_name = std::to_string(AllocateNodeIndex()); + onnx::NodeProto *smul_node_proto = graph_proto->add_node(); + SetTwoInputNodeProtoInfo(smul_node_proto, smul_name, "Mul", fadd_name, std::to_string(smul_input_node_idx)); + + // Add tanh node + auto tanh_name = std::to_string(AllocateNodeIndex()); + onnx::NodeProto *tanh_node_proto = graph_proto->add_node(); + SetOneInputNodeProtoInfo(tanh_node_proto, tanh_name, "Tanh", smul_name); + + // Add second Add node + // Add input node for second add node + auto sadd_input_node_idx = AllocateNodeIndex(); + onnx::NodeProto *sadd_input_node_proto = graph_proto->add_node(); + onnx::AttributeProto *sadd_input_attr_proto = sadd_input_node_proto->add_attribute(); + onnx::TensorProto *sadd_input_tensor_proto = sadd_input_attr_proto->mutable_t(); + SetConstantNodeProtoInfoForGeLU(sadd_input_node_proto, std::to_string(sadd_input_node_idx), sadd_input_attr_proto, + sadd_input_tensor_proto, "input_y_for_sadd", 1.0); + // Add second Add node + auto sadd_name = std::to_string(AllocateNodeIndex()); + onnx::NodeProto *sadd_node_proto = graph_proto->add_node(); + SetTwoInputNodeProtoInfo(sadd_node_proto, sadd_name, "Add", tanh_name, std::to_string(sadd_input_node_idx)); + + // Add third Mul node + // Add input node for third Mul node + auto tmul_input_node_idx = AllocateNodeIndex(); + onnx::NodeProto *tmul_input_node_proto = graph_proto->add_node(); + onnx::AttributeProto *tmul_input_attr_proto = tmul_input_node_proto->add_attribute(); + onnx::TensorProto *tmul_input_tensor_proto = tmul_input_attr_proto->mutable_t(); + SetConstantNodeProtoInfoForGeLU(tmul_input_node_proto, std::to_string(tmul_input_node_idx), tmul_input_attr_proto, + tmul_input_tensor_proto, "input_y_for_tmul", 0.5); + // Add third Mul Node + auto tmul_name = std::to_string(AllocateNodeIndex()); + onnx::NodeProto *tmul_node_proto = graph_proto->add_node(); + SetTwoInputNodeProtoInfo(tmul_node_proto, tmul_name, "Mul", sadd_name, std::to_string(tmul_input_node_idx)); + + // Add fourth Mul Node + auto fomul_node_idx = AllocateNodeIndex(); + onnx::NodeProto *fomul_node_proto = graph_proto->add_node(); + if (elem_type == kNumberTypeFloat16) { + fomul_node_proto->add_input(std::to_string(pre_cast_node_idx)); + } else { + fomul_node_proto->add_input(input_x); + } + SetOneInputNodeProtoInfo(fomul_node_proto, std::to_string(fomul_node_idx), "Mul", tmul_name); + + // if type is float16, add cast node cast output node from float16 to float32 + if (elem_type == kNumberTypeFloat16) { + auto aft_cast_node_idx = AllocateNodeIndex(); + (*node_map_ptr)[node] = aft_cast_node_idx; + onnx::NodeProto *aft_cast_node_proto = graph_proto->add_node(); + onnx::AttributeProto *aft_cast_attr_proto = aft_cast_node_proto->add_attribute(); + SetCastNodeProtoInfo(aft_cast_node_proto, std::to_string(aft_cast_node_idx), std::to_string(fomul_node_idx), + aft_cast_attr_proto, onnx::TensorProto_DataType_FLOAT16); + } else { + (*node_map_ptr)[node] = fomul_node_idx; + } +} + void OnnxExporter::ExportPrimConcat(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *const graph_proto) { auto input_data = GetNodeInputName(node->input(1), node_map_ptr, graph_proto); @@ -936,10 +1293,10 @@ void OnnxExporter::ExportPrimConcat(const FuncGraphPtr &func_graph, const CNodeP (*node_map_ptr)[node] = node_idx; onnx::NodeProto *node_proto = graph_proto->add_node(); - AnfNodePtr op = node->input(0); + AnfNodePtr op = node->input(kZeroNum); auto op_value = dyn_cast(op); auto prim = dyn_cast(op_value->value()); - auto input_node = node->input(1)->cast(); + auto input_node = node->input(kOneNum)->cast(); if (input_node->IsApply(prim::kPrimMakeTuple)) { node_proto->set_op_type("ConcatFromSequence"); @@ -957,8 +1314,8 @@ void OnnxExporter::ExportPrimConcat(const FuncGraphPtr &func_graph, const CNodeP void OnnxExporter::ExportPrimCast(const FuncGraphPtr &, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *const graph_proto) { - auto input_data = GetNodeInputName(node->input(1), node_map_ptr, graph_proto); - auto input_type = node->input(2); + auto input_data = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto); + auto input_type = node->input(kTwoNum); auto node_idx = AllocateNodeIndex(); (*node_map_ptr)[node] = node_idx; @@ -982,16 +1339,16 @@ void OnnxExporter::ExportPrimCast(const FuncGraphPtr &, const CNodePtr &node, void OnnxExporter::ExportPrimPReLU(const FuncGraphPtr &, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *const graph_proto) { - auto input_x = GetNodeInputName(node->input(1), node_map_ptr, graph_proto); - auto input_slope = GetNodeInputName(node->input(2), node_map_ptr, graph_proto); + auto input_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto); + auto input_slope = GetNodeInputName(node->input(kTwoNum), node_map_ptr, graph_proto); - auto x_shape = dyn_cast(node->input(1)->Shape()); - auto slope_shape = dyn_cast(node->input(2)->Shape()); + auto x_shape = dyn_cast(node->input(kOneNum)->Shape()); + auto slope_shape = dyn_cast(node->input(kTwoNum)->Shape()); MS_EXCEPTION_IF_NULL(x_shape); MS_EXCEPTION_IF_NULL(slope_shape); // format of x is NCHW, input format is NCHW, if length of input_slope is 1, insert Unsqueeze [1,2] - if (x_shape->shape().size() == 4 && slope_shape->shape().size() == 1) { + if (x_shape->shape().size() == kFourNum && slope_shape->shape().size() == kOneNum) { auto node_idx = AllocateNodeIndex(); onnx::NodeProto *node_proto = graph_proto->add_node(); node_proto->set_op_type("Unsqueeze"); @@ -1000,8 +1357,8 @@ void OnnxExporter::ExportPrimPReLU(const FuncGraphPtr &, const CNodePtr &node, onnx::AttributeProto *attr_proto = node_proto->add_attribute(); attr_proto->set_type(onnx::AttributeProto_AttributeType_INTS); attr_proto->set_name("axes"); - attr_proto->add_ints(1); - attr_proto->add_ints(2); + attr_proto->add_ints(kOneNum); + attr_proto->add_ints(kTwoNum); node_proto->add_input(input_slope); input_slope = std::to_string(node_idx); @@ -1018,7 +1375,7 @@ void OnnxExporter::ExportPrimPReLU(const FuncGraphPtr &, const CNodePtr &node, void OnnxExporter::ExportPrimReLU6(const FuncGraphPtr &, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *const graph_proto) { - auto input_x = GetNodeInputName(node->input(1), node_map_ptr, graph_proto); + auto input_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto); auto node_idx = AllocateNodeIndex(); (*node_map_ptr)[node] = node_idx; onnx::NodeProto *node_proto = graph_proto->add_node(); @@ -1038,16 +1395,16 @@ void OnnxExporter::ExportPrimReLU6(const FuncGraphPtr &, const CNodePtr &node, void OnnxExporter::ExportPrimDepthwiseConv2d(const FuncGraphPtr &, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *const graph_proto) { - auto input_x = GetNodeInputName(node->input(1), node_map_ptr, graph_proto); - auto input_w = GetNodeInputName(node->input(2), node_map_ptr, graph_proto); - auto x_shape = dyn_cast(node->input(1)->Shape()); - auto w_shape = dyn_cast(node->input(2)->Shape()); + auto input_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto); + auto input_w = GetNodeInputName(node->input(kTwoNum), node_map_ptr, graph_proto); + auto x_shape = dyn_cast(node->input(kOneNum)->Shape()); + auto w_shape = dyn_cast(node->input(kTwoNum)->Shape()); MS_EXCEPTION_IF_NULL(x_shape); MS_EXCEPTION_IF_NULL(w_shape); - if (x_shape->shape().size() != 4 || w_shape->shape().size() != 4) { + if (x_shape->shape().size() != kFourNum || w_shape->shape().size() != kFourNum) { MS_LOG(EXCEPTION) << "DepthwiseConv2d input shape should be 4d."; } - if (w_shape->shape()[0] != 1 && w_shape->shape()[1] != 1) { + if (w_shape->shape()[kZeroNum] != kOneNum && w_shape->shape()[kOneNum] != kOneNum) { MS_LOG(EXCEPTION) << "DepthwiseConv2d weight shape[0] != 1 and shape[1] != 1, cannot reshape"; } // create w_shape constant node @@ -1128,8 +1485,8 @@ void OnnxExporter::ExportPrimDepthwiseConv2d(const FuncGraphPtr &, const CNodePt void OnnxExporter::ExportPrimTile(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *const graph_proto) { - auto name_x = GetNodeInputName(node->input(1), node_map_ptr, graph_proto); - auto multiples = node->input(2); + auto name_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto); + auto multiples = node->input(kTwoNum); std::string name_multiples; if (multiples->isa()) { auto const_node_idx = AllocateNodeIndex(); @@ -1137,11 +1494,9 @@ void OnnxExporter::ExportPrimTile(const FuncGraphPtr &func_graph, const CNodePtr onnx::NodeProto *node_proto = graph_proto->add_node(); name_multiples = std::to_string(const_node_idx); node_proto->add_output(name_multiples); - node_proto->set_op_type("Constant"); onnx::AttributeProto *attr_proto = node_proto->add_attribute(); - attr_proto->set_name("repeat"); - + attr_proto->set_name("value"); attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR); ConvertTupleToTensor(dyn_cast(multiples)->value(), attr_proto->mutable_t()); } else { @@ -1160,7 +1515,7 @@ void OnnxExporter::ExportPrimTile(const FuncGraphPtr &func_graph, const CNodePtr void OnnxExporter::ExportPrimSquare(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *const graph_proto) { - auto name_x = GetNodeInputName(node->input(1), node_map_ptr, graph_proto); + auto name_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto); std::string name_exponent; auto const_node_idx = AllocateNodeIndex(); onnx::NodeProto *node_proto_exp = graph_proto->add_node(); @@ -1169,12 +1524,13 @@ void OnnxExporter::ExportPrimSquare(const FuncGraphPtr &func_graph, const CNodeP node_proto_exp->set_op_type("Constant"); onnx::AttributeProto *attr_proto = node_proto_exp->add_attribute(); + attr_proto->set_name("value"); attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR); onnx::TensorProto *tensor_proto = attr_proto->mutable_t(); tensor_proto->set_name("exponent"); tensor_proto->add_dims(static_cast<::google::protobuf::int64>(1)); - tensor_proto->set_data_type(onnx::TensorProto_DataType_INT64); - tensor_proto->add_int64_data(2); + tensor_proto->set_data_type(GetOnnxDataType(kNumberTypeFloat32)); + tensor_proto->add_float_data(2.0); auto node_idx = AllocateNodeIndex(); (*node_map_ptr)[node] = node_idx; @@ -1187,10 +1543,9 @@ void OnnxExporter::ExportPrimSquare(const FuncGraphPtr &func_graph, const CNodeP void OnnxExporter::ExportPrimGatherV2(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *const graph_proto) { - auto name_x = GetNodeInputName(node->input(1), node_map_ptr, graph_proto); - auto name_indices = GetNodeInputName(node->input(2), node_map_ptr, graph_proto); - auto axis = node->input(3)->cast()->value(); - + auto name_x = GetNodeInputName(node->input(kOneNum), node_map_ptr, graph_proto); + auto name_indices = GetNodeInputName(node->input(kTwoNum), node_map_ptr, graph_proto); + auto axis = node->input(kThreeNum)->cast()->value(); auto node_idx = AllocateNodeIndex(); (*node_map_ptr)[node] = node_idx; onnx::NodeProto *node_proto = graph_proto->add_node(); @@ -1199,6 +1554,7 @@ void OnnxExporter::ExportPrimGatherV2(const FuncGraphPtr &func_graph, const CNod node_proto->add_input(name_x); node_proto->add_input(name_indices); onnx::AttributeProto *attr_proto = node_proto->add_attribute(); + attr_proto->set_name("axis"); attr_proto->set_type(onnx::AttributeProto_AttributeType_INT); attr_proto->set_i(static_cast<::google::protobuf::int64>(dyn_cast(axis)->value())); } @@ -1209,11 +1565,9 @@ void OnnxExporter::ExportCNode(const FuncGraphPtr &func_graph, const CNodePtr &n if (node->IsApply(prim::kPrimReshape)) { return ExportPrimReshape(func_graph, node, node_map_ptr, graph_proto); } - if (node->IsApply(prim::kPrimReduceMean) || node->IsApply(prim::kPrimReduceSum)) { return ExportPrimReduce(func_graph, node, node_map_ptr, graph_proto); } - if (node->IsApply(prim::kPrimTranspose)) { return ExportPrimTranspose(func_graph, node, node_map_ptr, graph_proto); } @@ -1257,7 +1611,7 @@ void OnnxExporter::ExportCNode(const FuncGraphPtr &func_graph, const CNodePtr &n return ExportPrimSquare(func_graph, node, node_map_ptr, graph_proto); } - // MindSpore GatherV2(x, indices, axis) --> ONNX Pow(x, indices) + // MindSpore GatherV2(x, indices, axis) --> ONNX Gather(x, indices) if (node->IsApply(prim::kPrimGather)) { return ExportPrimGatherV2(func_graph, node, node_map_ptr, graph_proto); } @@ -1267,7 +1621,7 @@ void OnnxExporter::ExportCNode(const FuncGraphPtr &func_graph, const CNodePtr &n MS_LOG(EXCEPTION) << "Inputs of apply node is empty"; } - AnfNodePtr op = inputs[0]; + AnfNodePtr op = inputs[kZeroNum]; std::vector op_inputs; // first process node input 1,2,..., since when node input is a ValueNode, here need to create a Constant Operator for (size_t i = 1; i < inputs.size(); i++) { @@ -1296,15 +1650,23 @@ size_t OnnxExporter::ExportPrimitive(const FuncGraphPtr &, std::mapname() << " in convert map. " << "Exporting " << prim->name() << " operator is not yet supported."; } + // Get input first, because input maybe valuenode which need create constant node + std::vector input_list; + for (const auto &input : inputs) { + auto input_name = GetNodeInputName(input, node_map_ptr, graph_proto); + input_list.push_back(input_name); + } + const OpNameInfo &op_convert_info = op_iter->second; auto node_idx = AllocateNodeIndex(); onnx::NodeProto *node_proto = graph_proto->add_node(); + node_proto->set_name(std::to_string(node_idx) + op_convert_info.onnx_type()); node_proto->add_output(std::to_string(node_idx)); node_proto->set_op_type(op_convert_info.onnx_type()); // Set inputs - for (const auto &input : inputs) { - auto input_name = GetNodeInputName(input, node_map_ptr, graph_proto); + for (const auto &input_name : input_list) { + // auto input_name = GetNodeInputName(input, node_map_ptr, graph_proto); node_proto->add_input(input_name); } @@ -1327,24 +1689,24 @@ size_t OnnxExporter::ExportPrimitive(const FuncGraphPtr &, std::map *node_map_ptr, onnx::GraphProto *const graph_proto) { - auto conv_node = dyn_cast(node->input(1)); - auto input_x = conv_node->input(1); // conv input x - auto input_w = conv_node->input(2); // conv weight(filter) - auto input_b = node->input(2); // conv bias + auto conv_node = dyn_cast(node->input(kOneNum)); + auto input_x = conv_node->input(kOneNum); // conv input x + auto input_w = conv_node->input(kTwoNum); // conv weight(filter) + auto input_b = node->input(kTwoNum); // conv bias - PrimitivePtr prim_conv = dyn_cast((dyn_cast(conv_node->input(0)))->value()); + PrimitivePtr prim_conv = dyn_cast((dyn_cast(conv_node->input(kZeroNum)))->value()); std::vector inputs{input_x, input_w, input_b}; (*node_map_ptr)[node] = ExportPrimitive(func_graph, node_map_ptr, prim_conv, inputs, graph_proto); } void OnnxExporter::ExportMergeGemm(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *const graph_proto) { - auto matmul_node = dyn_cast(node->input(1)); - auto input_x = matmul_node->input(1); // matmul input x - auto input_y = matmul_node->input(2); // matmul input y - auto input_b = node->input(2); // matmul bias + auto matmul_node = dyn_cast(node->input(kOneNum)); + auto input_x = matmul_node->input(kOneNum); // matmul input x + auto input_y = matmul_node->input(kTwoNum); // matmul input y + auto input_b = node->input(kTwoNum); // matmul bias - PrimitivePtr prim_matmul = dyn_cast((dyn_cast(matmul_node->input(0)))->value()); + PrimitivePtr prim_matmul = dyn_cast((dyn_cast(matmul_node->input(kZeroNum)))->value()); std::vector inputs{input_x, input_y, input_b}; (*node_map_ptr)[node] = ExportPrimitive(func_graph, node_map_ptr, prim_matmul, inputs, graph_proto); } @@ -1352,9 +1714,9 @@ void OnnxExporter::ExportMergeGemm(const FuncGraphPtr &func_graph, const CNodePt void OnnxExporter::ExportMergeBatchNorm(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *const graph_proto) { - auto batch_norm_node = dyn_cast(node->input(1)); + auto batch_norm_node = dyn_cast(node->input(kOneNum)); - PrimitivePtr prim_batch_norm = dyn_cast((dyn_cast(batch_norm_node->input(0)))->value()); + PrimitivePtr prim_batch_norm = dyn_cast((dyn_cast(batch_norm_node->input(kZeroNum)))->value()); std::vector inputs; for (size_t i = 1; i < batch_norm_node->inputs().size(); i++) { inputs.push_back(batch_norm_node->input(i)); @@ -1365,10 +1727,10 @@ void OnnxExporter::ExportMergeBatchNorm(const FuncGraphPtr &func_graph, const CN void OnnxExporter::ExportMergeMaxPoolWithArgmax(const FuncGraphPtr &func_graph, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *const graph_proto) { - auto maxpool_with_argmax_node = dyn_cast(node->input(1)); + auto maxpool_with_argmax_node = dyn_cast(node->input(kOneNum)); PrimitivePtr prim_maxpool_with_argmax = - dyn_cast((dyn_cast(maxpool_with_argmax_node->input(0)))->value()); + dyn_cast((dyn_cast(maxpool_with_argmax_node->input(kZeroNum)))->value()); std::vector inputs; for (size_t i = 1; i < maxpool_with_argmax_node->inputs().size(); i++) { inputs.push_back(maxpool_with_argmax_node->input(i)); @@ -1376,9 +1738,132 @@ void OnnxExporter::ExportMergeMaxPoolWithArgmax(const FuncGraphPtr &func_graph, (*node_map_ptr)[node] = ExportPrimitive(func_graph, node_map_ptr, prim_maxpool_with_argmax, inputs, graph_proto); } +// LayerNorm(N, C1, H, W) --> reshape(1, C2, 1, W) + MeanVarianceNormalization + reshape(N, C1, H, W) +void OnnxExporter::ExportMergeLayerNorm(const FuncGraphPtr &func_graph, const CNodePtr &node, + std::map *node_map_ptr, + onnx::GraphProto *const graph_proto) { + auto LayerNormNode = dyn_cast(node->input(kOneNum)); + auto layernorm_input_x = GetNodeInputName(LayerNormNode->input(kOneNum), node_map_ptr, graph_proto); + auto layernorm_input_gamma = GetNodeInputName(LayerNormNode->input(kTwoNum), node_map_ptr, graph_proto); + auto layernorm_input_beta = GetNodeInputName(LayerNormNode->input(kThreeNum), node_map_ptr, graph_proto); + + auto layernorm_input_x_node = LayerNormNode->input(kOneNum); + auto dtype = layernorm_input_x_node->Type(); + auto elem_type = dyn_cast(dtype)->element()->type_id(); + auto pre_cast_node_idx = 0; + + // if type is float16, add cast node cast type from float16 to float32 + if (elem_type == kNumberTypeFloat16) { + pre_cast_node_idx = AllocateNodeIndex(); + onnx::NodeProto *pre_cast_node_proto = graph_proto->add_node(); + onnx::AttributeProto *pre_cast_attr_proto = pre_cast_node_proto->add_attribute(); + SetCastNodeProtoInfo(pre_cast_node_proto, std::to_string(pre_cast_node_idx), layernorm_input_x, pre_cast_attr_proto, + onnx::TensorProto_DataType_FLOAT); + } + + // reshape before MeanVarianceNormalization + auto input_shape = dyn_cast(LayerNormNode->input(kOneNum)->Shape()); + std::vector new_input_shape; + int64_t n_shape = 1; + int64_t c_shape = 1; + int64_t h_shape = 1; + size_t input_shape_size = input_shape->shape().size(); + for (size_t i = 0; i < input_shape_size - 1; i++) { + c_shape = c_shape * input_shape->shape()[i]; + } + new_input_shape.push_back(n_shape); + new_input_shape.push_back(c_shape); + new_input_shape.push_back(h_shape); + new_input_shape.push_back(input_shape->shape()[input_shape_size - kOneNum]); + + // Add shape node for reshape(before MeanVarianceNormalization) + auto new_shape_value = MakeValue>(new_input_shape); + auto shape_node = NewValueNode(new_shape_value)->cast(); + auto shape_node_idx = AllocateNodeIndex(); + + // (*node_map_ptr)[shape_node] = shape_node_idx; + onnx::NodeProto *shape_node_proto = graph_proto->add_node(); + shape_node_proto->add_output(std::to_string(shape_node_idx)); + shape_node_proto->set_op_type("Constant"); + onnx::AttributeProto *shape_attr_proto = shape_node_proto->add_attribute(); + shape_attr_proto->set_name("value"); + shape_attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR); + ConvertTupleToTensor(dyn_cast(shape_node)->value(), shape_attr_proto->mutable_t()); + + // Add reshape node before MeanVarianceNormalization + auto pre_reshape_node_idx = AllocateNodeIndex(); + onnx::NodeProto *pre_reshape_node_proto = graph_proto->add_node(); + pre_reshape_node_proto->set_op_type("Reshape"); + pre_reshape_node_proto->add_output(std::to_string(pre_reshape_node_idx)); + if (elem_type == kNumberTypeFloat16) { + pre_reshape_node_proto->add_input(std::to_string(pre_cast_node_idx)); + } else { + pre_reshape_node_proto->add_input(layernorm_input_x); + } + pre_reshape_node_proto->add_input(std::to_string(shape_node_idx)); + + // MeanVarianceNormalization + auto meanvariancenormal_node_idx = AllocateNodeIndex(); + onnx::NodeProto *meanvariancenormal_node_proto = graph_proto->add_node(); + meanvariancenormal_node_proto->set_op_type("MeanVarianceNormalization"); + meanvariancenormal_node_proto->add_output(std::to_string(meanvariancenormal_node_idx)); + meanvariancenormal_node_proto->add_input(std::to_string(pre_reshape_node_idx)); + + // if cast type from float16 to float32, add cast node cast type from float32 to float16 + auto aft_cast_node_idx = 0; + if (elem_type == kNumberTypeFloat16) { + aft_cast_node_idx = AllocateNodeIndex(); + onnx::NodeProto *aft_cast_node_proto = graph_proto->add_node(); + onnx::AttributeProto *aft_cast_attr_proto = aft_cast_node_proto->add_attribute(); + SetCastNodeProtoInfo(aft_cast_node_proto, std::to_string(aft_cast_node_idx), + std::to_string(meanvariancenormal_node_idx), aft_cast_attr_proto, + onnx::TensorProto_DataType_FLOAT16); + } + + // Add mul and add node + auto mul_node_idx = AllocateNodeIndex(); + onnx::NodeProto *mul_node_proto = graph_proto->add_node(); + mul_node_proto->set_op_type("Mul"); + if (elem_type == kNumberTypeFloat16) { + mul_node_proto->add_input(std::to_string(aft_cast_node_idx)); + } else { + mul_node_proto->add_input(std::to_string(meanvariancenormal_node_idx)); + } + mul_node_proto->add_input(layernorm_input_gamma); + mul_node_proto->add_output(std::to_string(mul_node_idx)); + + // add beta + auto add_node_idx = AllocateNodeIndex(); + onnx::NodeProto *add_node_proto = graph_proto->add_node(); + SetTwoInputNodeProtoInfo(add_node_proto, std::to_string(add_node_idx), "Add", std::to_string(mul_node_idx), + layernorm_input_beta); + + // reshape after MeanVarianceNormalization + // Add shape node for reshape(after MeanVarianceNormalization) + auto output_shape_value = MakeValue>(input_shape->shape()); + auto output_shape_node = NewValueNode(output_shape_value)->cast(); + auto output_shape_node_idx = AllocateNodeIndex(); + + onnx::NodeProto *output_shape_node_proto = graph_proto->add_node(); + output_shape_node_proto->add_output(std::to_string(output_shape_node_idx)); + output_shape_node_proto->set_op_type("Constant"); + onnx::AttributeProto *output_shape_attr_proto = output_shape_node_proto->add_attribute(); + output_shape_attr_proto->set_name("value"); + output_shape_attr_proto->set_type(onnx::AttributeProto_AttributeType_TENSOR); + ConvertTupleToTensor(dyn_cast(output_shape_node)->value(), output_shape_attr_proto->mutable_t()); + // Add reshape node after MeanVarianceNormalization + auto aft_reshape_node_idx = AllocateNodeIndex(); + (*node_map_ptr)[node] = aft_reshape_node_idx; + onnx::NodeProto *aft_reshape_node_proto = graph_proto->add_node(); + aft_reshape_node_proto->set_op_type("Reshape"); + aft_reshape_node_proto->add_output(std::to_string(aft_reshape_node_idx)); + aft_reshape_node_proto->add_input(std::to_string(add_node_idx)); + aft_reshape_node_proto->add_input(std::to_string(output_shape_node_idx)); +} + void OnnxExporter::ExportOutput(const FuncGraphPtr &, const CNodePtr &node, std::map *node_map_ptr, onnx::GraphProto *const graph_proto) { - if (node->inputs().size() != 2) { + if (node->inputs().size() != kTwoNum) { MS_LOG(EXCEPTION) << "Number of inputs of return node is not equal to 2."; } AnfNodePtr arg = node->input(1); @@ -1416,7 +1901,6 @@ std::string OnnxExporter::GetNodeInputName(const AnfNodePtr &orig_node, std::map onnx::NodeProto *node_proto = graph_proto->add_node(); node_proto->add_output(node_name); - SetNodeAttribute(node->cast()->value(), node_proto); return node_name; diff --git a/mindspore/ccsrc/transform/graph_ir/convert.h b/mindspore/ccsrc/transform/graph_ir/convert.h index 00bde36780d..504bda92482 100644 --- a/mindspore/ccsrc/transform/graph_ir/convert.h +++ b/mindspore/ccsrc/transform/graph_ir/convert.h @@ -78,7 +78,8 @@ class DfGraphConvertor { void DrawComputeGraph(const std::string &name) { std::ofstream fout(name); if (!fout.is_open()) { - MS_LOG(ERROR) << "Open file '" << name << "' failed!"; + MS_LOG(ERROR) << "Open file '" << name << "' failed!" + << " Errno:" << errno << " ErrInfo:" << strerror(errno); return; } fout << compute_sout_.str(); @@ -87,7 +88,8 @@ class DfGraphConvertor { void DrawInitGraph(const std::string &name) { std::ofstream fout(name); if (!fout.is_open()) { - MS_LOG(ERROR) << "Open file '" << name << "' failed!"; + MS_LOG(ERROR) << "Open file '" << name << "' failed!" + << " Errno:" << errno << " ErrInfo:" << strerror(errno); return; } fout << init_sout_.str(); @@ -96,7 +98,8 @@ class DfGraphConvertor { void DrawSaveCheckpointGraph(const std::string &name) { std::ofstream fout(name); if (!fout.is_open()) { - MS_LOG(ERROR) << "Open file '" << name << "' failed!"; + MS_LOG(ERROR) << "Open file '" << name << "' failed!" + << " Errno:" << errno << " ErrInfo:" << strerror(errno); return; } fout << checkpoint_sout_.str(); diff --git a/mindspore/ccsrc/utils/context/graph_kernel_flags.cc b/mindspore/ccsrc/utils/context/graph_kernel_flags.cc index b43049a8b3b..b522c010357 100644 --- a/mindspore/ccsrc/utils/context/graph_kernel_flags.cc +++ b/mindspore/ccsrc/utils/context/graph_kernel_flags.cc @@ -184,6 +184,7 @@ void GraphKernelFlags::RegisterFlags(std::map *flag_ma reg.AddFlag("enable_stitch_fusion", &enable_stitch_fusion, opt_level == OptLevel_3); reg.AddFlag("enable_recompute_fusion", &enable_recompute_fusion, opt_level >= OptLevel_2); reg.AddFlag("enable_parallel_fusion", &enable_parallel_fusion, opt_level == OptLevel_3); + reg.AddFlag("enable_low_precision", &enable_low_precision); // Integer flags reg.AddFlag("online_tuning", &online_tuning); @@ -211,6 +212,7 @@ std::string GraphKernelFlags::DumpAllFlags() const { json["enable_stitch_fusion"] = enable_stitch_fusion; json["enable_recompute_fusion"] = enable_recompute_fusion; json["enable_parallel_fusion"] = enable_parallel_fusion; + json["enable_low_precision"] = enable_low_precision; json["opt_level"] = opt_level; json["online_tuning"] = online_tuning; diff --git a/mindspore/ccsrc/utils/context/graph_kernel_flags.h b/mindspore/ccsrc/utils/context/graph_kernel_flags.h index 6be617452f2..7691609853f 100644 --- a/mindspore/ccsrc/utils/context/graph_kernel_flags.h +++ b/mindspore/ccsrc/utils/context/graph_kernel_flags.h @@ -79,6 +79,13 @@ class GraphKernelFlags { */ bool enable_parallel_fusion; + /** + * Enable low precision in data transferring between graph kernel and computing in graph kernel + * in graph kernel. + * Experimental feature, enabled by the enable_low_precision flag + */ + bool enable_low_precision; + /** * Optimization level, value from 0 to 3. * 0: Disable GraphKernel diff --git a/mindspore/ccsrc/utils/utils.h b/mindspore/ccsrc/utils/utils.h index 7e884d52645..a4983b310a4 100644 --- a/mindspore/ccsrc/utils/utils.h +++ b/mindspore/ccsrc/utils/utils.h @@ -203,6 +203,7 @@ constexpr auto kSoftmaxGradExtOpName = "SoftmaxGradExt"; constexpr auto kStridedReadOpName = "StridedRead"; constexpr auto kStridedWriteOpName = "StridedWrite"; constexpr auto kFusedAdamWeightDecayName = "FusedAdamWeightDecay"; +constexpr auto kAdamWeightDecayName = "AdamWeightDecay"; constexpr auto kFusedAdamName = "FusedAdam"; constexpr auto kFusedSparseAdamName = "FusedSparseAdam"; constexpr auto kFusedMatMulBiasAddName = "FusedMatMulBiasAdd"; @@ -322,6 +323,7 @@ constexpr auto kAttrInputNames = "input_names"; constexpr auto kAttrIsAICPUKernel = "is_AICPU_kernel"; constexpr auto kIsBackendCast = "is_backed_cast"; constexpr auto kAttrOutputNames = "output_names"; +constexpr auto kAttrAsync = "async"; constexpr auto kAttrVisited = "visited"; constexpr auto kAttrShape = "shape"; constexpr auto kAttrMomentum = "momentum"; @@ -333,6 +335,7 @@ constexpr auto kAttrDataShape = "data_shape"; constexpr auto kAttrFormat = "format"; constexpr auto kAttrReshapeType = "reshape_type"; constexpr auto kAttrAxis = "axis"; +constexpr auto kAttrAxes = "axes"; constexpr auto kAttrKeepDims = "keep_dims"; constexpr auto kAttrShapeGamma = "shape_gamma"; constexpr auto kAttrPerm = "perm"; @@ -589,6 +592,7 @@ const std::set kOptOperatorSet = {kMomentumOpName, kAdamApplyOneWithDecayOpName, kAdamApplyOneWithDecayAssignOpName, kFusedAdamWeightDecayName, + kAdamWeightDecayName, kFusedAdamName, kFusedSparseAdamName, kFusedMulApplyMomentumOpName, @@ -628,6 +632,10 @@ const std::set k3DFormatSet = {kOpFormat_NCDHW, kOpFormat_NDC1HWC0, kOpFormat_NDHWC, kOpFormat_DHWCN, kOpFormat_DHWNC}; const std::set DynamicShapeConstInputToAttr = { + kCastOpName, kExpandDimsOpName, kReshapeOpName, kEmbeddingLookupOpName, kTransposeOpName, kReduceMinOpName, + kReduceMeanOpName, kReduceMaxOpName, kReduceAllOpName, kReduceAnyOpName, kConcatOpName}; + +const std::set DynamicShapeConstInputToAttrGPU = { kCastOpName, kExpandDimsOpName, kReshapeOpName, kEmbeddingLookupOpName, kTransposeOpName, kReduceSumOpName, kReduceMinOpName, kReduceMeanOpName, kReduceMaxOpName, kReduceAllOpName, kReduceAnyOpName, kConcatOpName}; diff --git a/mindspore/ccsrc/vm/transform.cc b/mindspore/ccsrc/vm/transform.cc index 374685aa085..4a363b65cbb 100644 --- a/mindspore/ccsrc/vm/transform.cc +++ b/mindspore/ccsrc/vm/transform.cc @@ -388,6 +388,13 @@ int64_t CompileGraph::AddCall(const FuncGraphPtr &graph, const CNodePtr &node) { MS_LOG(DEBUG) << "Call:" << Ref(fn) << ", " << height_ << ", " << (size - 1); AddInst(Instruction::kCall, Ref(fn)); Ret(static_cast(size - 1)); + + for (size_t i = size - 1; i > 0; i--) { + const auto iter = slots_.find(inputs[i]); + if (iter != slots_.end() && iter->second >= height_) { + slots_.erase(inputs[i]); + } + } return RET_SUCCESS; } diff --git a/mindspore/common/parameter.py b/mindspore/common/parameter.py index 496c94e4148..018ebaf5190 100644 --- a/mindspore/common/parameter.py +++ b/mindspore/common/parameter.py @@ -136,7 +136,6 @@ class Parameter(Tensor_): def __init__(self, default_input, name=None, requires_grad=True, layerwise_parallel=False, parallel_optimizer=True): self.param_info = ParamInfo() - self.init_param_info = True self.init_in_server = False self.cache_enable = False self.name = name @@ -152,6 +151,7 @@ class Parameter(Tensor_): self.is_param_ps = False self.push_weight_to_server = False self.pull_weight_from_server = False + self.requires_aggr = True self._cast_type = None self._unique = False self.is_in_parallel = _is_in_parallel_mode() @@ -236,18 +236,22 @@ class Parameter(Tensor_): self.init_in_server = init_in_server self.param_info.init_in_server = init_in_server - def set_param_fl(self, push_to_server=False, pull_from_server=False): + def set_param_fl(self, push_to_server=False, pull_from_server=False, requires_aggr=True): """ Set the way of parameter and server interaction. Args: push_to_server (bool): Whether the parameter should be pushed to server. Default: False. pull_from_server (bool): Whether the parameter should be pulled from server. Default: False. + requires_aggr (bool): Whether the parameter should be aggregated in the server. Default: True. """ if push_to_server: self.push_weight_to_server = True if pull_from_server: self.pull_weight_from_server = True + if not requires_aggr: + self.requires_aggr = False + self.param_info.requires_aggr = False @property def inited_param(self): @@ -376,6 +380,7 @@ class Parameter(Tensor_): x.is_param_ps = self.is_param_ps x.init_in_server = self.init_in_server x.cache_enable = self.cache_enable + x.requires_aggr = self.requires_aggr if self.cache_shape: x.cache_shape = self.cache_shape if init != 'same': @@ -581,11 +586,6 @@ class Parameter(Tensor_): obj.sliced = set_sliced return obj - def __del__(self): - if hasattr(self, "init_param_info"): - if self.init_param_info is True and context.get_context("mode") == context.GRAPH_MODE: - self.param_info = None - class ParameterTuple(tuple): """ diff --git a/mindspore/common/seed.py b/mindspore/common/seed.py index 7839cbe1cc5..84157a2771c 100644 --- a/mindspore/common/seed.py +++ b/mindspore/common/seed.py @@ -59,10 +59,8 @@ def set_seed(seed): Examples: >>> import numpy as np >>> import mindspore.ops as ops - >>> from mindspore import Tensor - >>> from mindspore.common import set_seed + >>> from mindspore import Tensor, set_seed, Parameter >>> from mindspore.common.initializer import initializer - >>> from mindspore.common.parameter import Parameter >>> >>> # Note: (1) Please make sure the code is running in PYNATIVE MODE; >>> # (2) Because Composite-level ops need parameters to be Tensors, for below examples, diff --git a/mindspore/common/tensor.py b/mindspore/common/tensor.py index 80a32b93a89..12b11905f87 100644 --- a/mindspore/common/tensor.py +++ b/mindspore/common/tensor.py @@ -40,13 +40,13 @@ class Tensor(Tensor_): input_data (Union[Tensor, float, int, bool, tuple, list, numpy.ndarray]): Input data of the tensor. dtype (:class:`mindspore.dtype`): Input data should be None, bool or numeric type defined in `mindspore.dtype`. The argument is used to define the data type of the output tensor. If it is None, the data type of the - output tensor will be as same as the `input_data`. Default: None. + output tensor will be the same as the `input_data`. Default: None. shape (Union[tuple, list, int]): A list of integers, a tuple of integers or an integer as the shape of output. If `input_data` is available, `shape` doesn't need to be set. Default: None. init (Initializer): the information of init data. 'init' is used for delayed initialization in parallel mode. Usually, it is not recommended to use 'init' interface to initialize parameters in other conditions. If 'init' interface is used to initialize - parameters, the `Tensor.init_data` API need to be called to convert `Tensor` to the actual data. + parameters, the `Tensor.init_data` API needs to be called to convert `Tensor` to the actual data. Outputs: Tensor. If `dtype` and `shape` are not set, return a tensor with the same dtype and shape as `input_data`. @@ -425,12 +425,12 @@ class Tensor(Tensor_): Args: axis (Union[None, int, tuple(int)): Dimensions of reduction, - when axis is None or empty tuple, reduce all dimensions. Default: (). + when the axis is None or empty tuple, reduce all dimensions. Default: (). keep_dims (bool): Whether to keep the reduced dimensions. Default: False. Returns: Tensor, if all array elements along the given axis evaluate to True, its value is True, - otherwise its value is False. If axis is None or empty tuple, reduce all dimensions. + otherwise its value is False. If the axis is None or empty tuple, reduce all dimensions. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` @@ -454,12 +454,12 @@ class Tensor(Tensor_): Args: axis (Union[None, int, tuple(int)): Dimensions of reduction, - when axis is None or empty tuple, reduce all dimensions. Default: (). + when the axis is None or empty tuple, reduce all dimensions. Default: (). keep_dims (bool): Whether to keep the reduced dimensions. Default: False. Returns: Tensor, if any array element along the given axis evaluates to True, its value is True, - otherwise its value is False. If axis is None or empty tuple, reduce all dimensions. + otherwise its value is False. If the axis is None or empty tuple, reduce all dimensions. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` @@ -536,7 +536,7 @@ class Tensor(Tensor_): Args: axis (Union[None, int, tuple(int), list(int)]): Dimensions of reduction, - when axis is None or empty tuple, reduce all dimensions. Default: (). + when the axis is None or empty tuple, reduce all dimensions. Default: (). keep_dims (bool): Whether to keep the reduced dimensions. Default: False. Returns: @@ -772,7 +772,7 @@ class Tensor(Tensor_): def astype(self, dtype, copy=True): """ - Return a copy of the tensor, casted to a specified type. + Return a copy of the tensor, cast to a specified type. Args: dtype (Union[:class:`mindspore.dtype`, str]): Designated tensor dtype, can be in format @@ -818,7 +818,7 @@ class Tensor(Tensor_): shape as self.shape with the dimension along axis removed. Raises: - ValueError: if axis is out of range. + ValueError: if the axis is out of range. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` @@ -852,7 +852,7 @@ class Tensor(Tensor_): shape as self.shape with the dimension along axis removed. Raises: - ValueError: if axis is out of range. + ValueError: if the axis is out of range. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` @@ -890,7 +890,7 @@ class Tensor(Tensor_): In that case, :class:`float32` is used. Default: None. Raises: - ValueError: if axis is out of range. + ValueError: if the axis is out of range. Returns: Tensor. @@ -1024,7 +1024,7 @@ class Tensor(Tensor_): is passed, initial must also be provided. Default: True. Returns: - Tensor or scalar, minimum of input tensor. If axis is None, the result is a scalar + Tensor or scalar, minimum of input tensor. If the axis is None, the result is a scalar value. If `axis` is given, the result is an array of dimension ``self.ndim - 1``. Raises: @@ -1533,7 +1533,7 @@ class Tensor(Tensor_): Args: choices (Union[tuple, list, Tensor]): Choice arrays. `a` and all of the `choices` must - be broadcastable to the same shape. If `choices` is itself an array, then + be broadcasted to the same shape. If `choices` is itself an array, then its outermost dimension (i.e., the one corresponding to ``choices.shape[0]``) is taken as defining the “sequenceâ€. mode (‘raise’, ‘wrap’, ‘clip’, optional): Specifies how indices outside @@ -1764,8 +1764,8 @@ class Tensor(Tensor_): Args: axis (Union[None, int, tuple(int)]): Axis or axes along which a sum is performed. Default: None. If None, sum all of the elements of the input array. - If axis is negative it counts from the last to the first axis. - If axis is a tuple of ints, a sum is performed on all of the axes specified in the tuple + If the axis is negative, it counts from the last to the first axis. + If the axis is a tuple of ints, a sum is performed on all of the axes specified in the tuple instead of a single axis or all the axes as before. dtype (:class:`mindspore.dtype`, optional): defaults to None. Overrides the dtype of the output Tensor. @@ -1778,7 +1778,7 @@ class Tensor(Tensor_): Returns: Tensor. A tensor with the same shape as input, with the specified axis removed. - If input tensor is a 0-d array, or if axis is None, a scalar is returned. + If input tensor is a 0-d array, or if the axis is None, a scalar is returned. Raises: TypeError: If input is not array_like, or `axis` is not int or tuple of ints, @@ -1798,7 +1798,8 @@ class Tensor(Tensor_): >>> print(input_x.sum(axis=1)) [10. 35.] """ - dtype = self.dtype if dtype is None else dtype + input_x = self.astype(mstype.int32) if self.dtype == mstype.bool_ else self + dtype = input_x.dtype if dtype is None else dtype if not isinstance(keepdims, int): raise TypeError(f"integer argument expected, but got {type(keepdims)}") if initial is not None and not isinstance(initial, (int, float, bool)): @@ -1808,7 +1809,9 @@ class Tensor(Tensor_): else: axis = validator.check_and_canonicalize_axes(axis, self.ndim) - input_x = self.astype(mstype.int32) if self.dtype == mstype.bool_ else self + if not validator.check_type_support(input_x.dtype, 'GPU', + (mstype.float64, mstype.float32, mstype.float16)): + input_x = input_x.astype(mstype.float32) if 0 in self.shape: input_x = tensor_operator_registry.get('make_tensor')([0], self.dtype) res = tensor_operator_registry.get('sum')(bool(keepdims))(input_x, axis) @@ -1830,7 +1833,7 @@ class Tensor(Tensor_): Tensor, has the same shape as input tensor except along the given axis. Raises: - ValueError: if axis is out of range. + ValueError: if the axis is out of range. TypeError: if arguments have types not specified above. Supported Platforms: diff --git a/mindspore/context.py b/mindspore/context.py index bcccdd44bf4..c6262fd4e79 100644 --- a/mindspore/context.py +++ b/mindspore/context.py @@ -353,11 +353,11 @@ def set_auto_parallel_context(**kwargs): Note: Attribute name is required for setting attributes. - If a program has tasks with different parallel modes, then before setting new parallel mode for the - next task, interface mindspore.context.reset_auto_parallel_context() needs to be called to reset + If a program has tasks on different parallel modes, before setting a new parallel mode for the + next task, interface mindspore.context.reset_auto_parallel_context() should be called to reset the configuration. - Setting or changing parallel modes must be called before any creating Initializer, otherwise, - RuntimeError may be raised when compiling the network. + Setting or changing parallel modes must be called before creating any Initializer, otherwise, + it may have RuntimeError when compiling the network. Some configurations are parallel mode specific, see the below table for details: @@ -410,7 +410,7 @@ def set_auto_parallel_context(**kwargs): strategy_ckpt_load_file (str): The path to load parallel strategy checkpoint. Default: '' strategy_ckpt_save_file (str): The path to save parallel strategy checkpoint. Default: '' full_batch (bool): If you load whole batch datasets in auto_parallel mode, this parameter - should be set with True. Default: False. + should be set as True. Default: False. enable_parallel_optimizer (bool): This is a developing feature, which shards the weight update computation for data parallel training in the benefit of time and memory saving. Currently, auto and semi auto parallel mode support all optimizers in both Ascend and GPU. Data parallel mode only supports @@ -419,7 +419,7 @@ def set_auto_parallel_context(**kwargs): and HCCL_WORLD_GROUP/NCCL_WORLD_GROUP. No Default, if it is not set, the fusion is closed. pipeline_stages (int): Set the stage information for pipeline parallel. This indicates how the devices are distributed alone the pipeline. The total devices will be divided into - 'pipeline_stags' stages. This currently could only be used when + 'pipeline_stags' stages. Currently this could only be used when parallel mode semi_auto_parallel is enabled. Default: 1. grad_accumulation_step (int): Set the accumulation steps of gradients in auto and semi auto parallel mode. This should be a positive int. Default: 1. @@ -520,14 +520,14 @@ def set_context(**kwargs): Set context for running environment. Context should be configured before running your program. If there is no configuration, - it will automatic acquisition according to device target by default. GRAPH_MODE or + it will be automatically obtained according to the device target by default. GRAPH_MODE or PYNATIVE_MODE can be set by `mode` attribute and both modes support all backends, default mode is GRAPH_MODE. - When the `save_graphs` attribute is set to True, attribute of `save_graphs_path` is used to set the + When the `save_graphs` attribute is set as True, attribute of `save_graphs_path` is used to set the intermediate compilation graph storage path. By default, the graphs are saved in the current directory. For other configurations and arguments, please refer to the corresponding module - description, the configuration is optional and can be enabled when needed. + description. Additionally, the configuration is optional and can be enabled when needed. Note: Attribute name is required for setting attributes. @@ -579,7 +579,7 @@ def set_context(**kwargs): equivalently by setting opt_level greater than 0. - dump_as_text: dump detail info as text files. Default: false. - More options can be referred from the implementation code. + More options can refer to the implementation code. These options can also be set by environment variable `MS_GRAPH_KERNEL_FLAGS`, without modifying network source code. For example, `export MS_GRAPH_KERNEL_FLAGS="--opt_level=2 --dump_as_text"`. reserve_class_name_in_scope (bool) : Whether to save the network class name in the scope. Default: True. @@ -597,15 +597,15 @@ def set_context(**kwargs): profiling_options (str): Set profiling collection options, operators can profiling data here. The values of profiling collection options are as follows, supporting the collection of multiple data. - - output: the saving the path of the profiling collection result file. The directory spectified by this - parameter needs to be created in advance on the training environment (container or host side) and ensure + - output: The saving path of the profiling collection result. The directory specified by this + parameter should be created in advance in the training environment (container or host side) and ensure that the running user configured during installation has read and write permissions.It supports the configuration of absolute or relative paths(relative to the current path when executing the command line). The absolute path configuration starts with '/', for example:/home/data/output. - The relative path configuration directly starts with the directory name,for example:output. + The relative path configuration starts with the directory name,for example:output. - training_trace: collect iterative trajectory data, that is, the training task and software information of - the AI software stack, to achieve performance analysis of the training task, focusing on data + the AI software stack, to realize performance analysis of the training task, focusing on data enhancement, forward and backward calculation, gradient aggregation update and other related data. The value is on/off. @@ -640,11 +640,11 @@ def set_context(**kwargs): max_device_memory (str): Sets the maximum memory available for devices. Currently, it is only supported on GPU. The format is "xxGB". Default: "1024GB". print_file_path (str): The path of saving print data. If this parameter is set, print data is saved to - a file by default, and turns off printing to the screen. If the file already exists, add a timestamp + a file by default, and turns off printing to the screen. If the file exists already, add a timestamp suffix to the file. Default: ''. enable_sparse (bool): Whether to enable sparsity feature. Default: False. For details of sparsity and sparse tensor, please check - ``_. + ``_. max_call_depth (int): Specify the maximum depth of function call. Must be positive integer. Default: 1000. env_config_path (str): Config path for DFX. auto_tune_mode (str): The mode of auto tune when op building, get the best tiling performance, @@ -652,7 +652,7 @@ def set_context(**kwargs): RL: rl_tune; GA: ga_tune; RL,GA: rl_tune/ga_tune(Automatic selection). - - rl_tune: Reinforecement Learning tune. + - rl_tune: Reinforcement Learning tune. - ga_tune: Genetic Algorithm tune. grad_for_scalar (bool): Whether to get gradient for scalar. If set, the gradient of scalar input parameter can be calculated. Now, only part of the scalar operators support this calculation. Default: False. @@ -660,8 +660,8 @@ def set_context(**kwargs): This is an experimental prototype that is subject to change and/or deletion. load_compile_cache (bool): Whether to use the cache of the graph compiled by frontend. When it is true, the graph compilation will skip the frontend compilation process. It means that - you should make sure the network has not been changed since the last execution. Currently we have - not support automatic checking the changes yet. Default: False. + you should make sure the network has not been changed since the last execution. By now, we have + not support automatically checking the changes yet. Default: False. This is an experimental prototype that is subject to change and/or deletion. Raises: @@ -715,7 +715,7 @@ def set_context(**kwargs): def get_context(attr_key): """ Get context attribute value according to the input key. - If some attribute are not set, it will be automatically obtained. + If some attributes are not set, they will be automatically obtained. Args: attr_key (str): The key of the attribute. diff --git a/mindspore/core/abstract/abstract_value.cc b/mindspore/core/abstract/abstract_value.cc index e6c81dc8268..4f93df83a16 100644 --- a/mindspore/core/abstract/abstract_value.cc +++ b/mindspore/core/abstract/abstract_value.cc @@ -271,10 +271,14 @@ const AbstractBasePtr AbstractSequeue::operator[](const std::size_t &dim) const std::string AbstractSequeue::ToString() const { std::ostringstream buffer; - int64_t i = 0; + size_t i = 0; + size_t size = elements_.size(); for (const auto &ele : elements_) { MS_EXCEPTION_IF_NULL(ele); - buffer << "element[" << i << "]: " << ele->ToString() << ","; + buffer << "element[" << i << "]: " << ele->ToString(); + if (i < size - 1) { + buffer << ", "; + } i++; } return buffer.str(); diff --git a/mindspore/core/abstract/analysis_context.cc b/mindspore/core/abstract/analysis_context.cc index 99facd66845..561fa777a43 100644 --- a/mindspore/core/abstract/analysis_context.cc +++ b/mindspore/core/abstract/analysis_context.cc @@ -23,6 +23,7 @@ namespace mindspore { namespace abstract { +std::list AnalysisContext::all_context_; AnalysisContextPtr AnalysisContext::NewContext(const FuncGraphPtr &func_graph, const AbstractBasePtrList &args_spec_list) { // Find func graph's parent and its parent context firstly. @@ -56,7 +57,7 @@ AnalysisContextPtr AnalysisContext::NewContext(const FuncGraphPtr &func_graph, } // Create a new context for the func graph and its specific arguments. - AnalysisContextPtr new_context = std::make_shared(parent_context, func_graph, args_spec_list); + AnalysisContextPtr new_context = CreateContext(parent_context, func_graph, args_spec_list); // To avoid cycle-reference, use weak_ptr here. auto weak_new_context = std::weak_ptr(new_context); new_context->extant_context_cache_[func_graph] = weak_new_context; @@ -102,7 +103,7 @@ AnalysisContextPtr AnalysisContext::FindOwnOrParentContext(const FuncGraphPtr &f } AnalysisContextPtr AnalysisContext::DummyContext() { - AnalysisContextPtr dummy_context = std::make_shared(nullptr, nullptr, AbstractBasePtrList()); + AnalysisContextPtr dummy_context = CreateContext(nullptr, nullptr, AbstractBasePtrList()); dummy_context->extant_context_cache_[nullptr] = std::weak_ptr(dummy_context); return dummy_context; } @@ -112,7 +113,7 @@ bool AnalysisContext::IsDummyContext() { } const AnalysisContextPtr kDummyAnalysisContext = - std::make_shared(nullptr, nullptr, AbstractBasePtrList()); + AnalysisContext::CreateContext(nullptr, nullptr, AbstractBasePtrList()); bool AnalysisContext::operator==(const AnalysisContext &other) const { if (func_graph_ != other.func_graph_) { @@ -174,7 +175,7 @@ AnalysisContextPtr AnalysisContext::SpecializeKey() const { } return arg; }); - AnalysisContextPtr context_new = std::make_shared(nullptr, func_graph_, args_broad_shp); + AnalysisContextPtr context_new = CreateContext(nullptr, func_graph_, args_broad_shp); context_new->parent_ = parent_; return context_new; } @@ -209,5 +210,23 @@ std::string AnalysisContext::ToString() const { buffer << "}"; return buffer.str(); } + +void AnalysisContext::ClearContext() { + for (auto &item : all_context_) { + item->parent_ = nullptr; + item->func_graph_ = nullptr; + item->args_spec_list_.clear(); + item->extant_context_cache_.clear(); + item->children_cache_.clear(); + } + all_context_.clear(); +} + +AnalysisContextPtr AnalysisContext::CreateContext(const AnalysisContextPtr &parent, const FuncGraphPtr &fg, + const AbstractBasePtrList &args_spec_list) { + auto context = std::make_shared(parent, fg, args_spec_list); + all_context_.emplace_back(context); + return context; +} } // namespace abstract } // namespace mindspore diff --git a/mindspore/core/abstract/analysis_context.h b/mindspore/core/abstract/analysis_context.h index e097888ebc7..926697b5759 100644 --- a/mindspore/core/abstract/analysis_context.h +++ b/mindspore/core/abstract/analysis_context.h @@ -22,6 +22,7 @@ #include #include #include +#include #include "abstract/abstract_value.h" #include "ir/meta_func_graph.h" @@ -42,7 +43,6 @@ class AnalysisContext { extant_context_cache_ = parent_->extant_context_cache_; } } - ~AnalysisContext() = default; // Extend this context with values for another graph. @@ -59,6 +59,9 @@ class AnalysisContext { std::string ToString() const; AnalysisContextPtr SpecializeKey() const; AbstractBasePtrList args_spec_list() { return args_spec_list_; } + static void ClearContext(); + static AnalysisContextPtr CreateContext(const AnalysisContextPtr &parent, const FuncGraphPtr &fg, + const AbstractBasePtrList &args_spec_list); private: AnalysisContextPtr parent_; @@ -70,6 +73,11 @@ class AnalysisContext { // Record all created child contexts from this context. // Like: key: [func_graph & arguments], value: [child_context] std::unordered_map children_cache_; + + // There may may be shared_ptr loop like: + // FuncGraphAbstactClosur->AnalysisContext->children_cache_->ArgsSpec->FuncGraphAbstactClosur. + // For break the loop, using all_context_ to clear context_. + static std::list all_context_; }; struct ContextHasher { diff --git a/mindspore/core/abstract/prim_arrays.cc b/mindspore/core/abstract/prim_arrays.cc index 4b5aefeac1a..9c72ad800f2 100644 --- a/mindspore/core/abstract/prim_arrays.cc +++ b/mindspore/core/abstract/prim_arrays.cc @@ -140,7 +140,7 @@ AbstractBasePtr InferImplUnique(const AnalysisEnginePtr &, const PrimitivePtr &p auto shape = input->shape(); MS_EXCEPTION_IF_NULL(shape); - if (shape->shape().empty()) { + if (shape->shape().size() != 1) { MS_LOG(EXCEPTION) << "Rank of " << op_name << "'s input must be 1."; } ShapeVector ids_shape = {Shape::SHP_ANY}; diff --git a/mindspore/core/abstract/prim_structures.cc b/mindspore/core/abstract/prim_structures.cc index a94311edd40..fd429717c0e 100644 --- a/mindspore/core/abstract/prim_structures.cc +++ b/mindspore/core/abstract/prim_structures.cc @@ -318,8 +318,11 @@ AbstractBasePtr InferImplListAppend(const AnalysisEnginePtr &, const PrimitivePt const std::string op_name = primitive->name(); CheckArgsSize(op_name, args_spec_list, 2); AbstractListPtr list = CheckArg(op_name, args_spec_list, 0); - (void)AbstractJoin(list->elements()); - return list; + AbstractBasePtr item = dyn_cast(args_spec_list[1]); + MS_EXCEPTION_IF_NULL(item); + auto new_list = AbstractBasePtrList(list->elements()); + new_list.emplace_back(item); + return std::make_shared(new_list); } AbstractBasePtr InferImplTupleLen(const AnalysisEnginePtr &, const PrimitivePtr &primitive, diff --git a/mindspore/core/abstract/primitive_infer_map.cc b/mindspore/core/abstract/primitive_infer_map.cc index ce46a71137f..f56fbd7ba80 100644 --- a/mindspore/core/abstract/primitive_infer_map.cc +++ b/mindspore/core/abstract/primitive_infer_map.cc @@ -31,13 +31,16 @@ #include "ops/mul.h" #include "ops/sub.h" #include "ops/strided_slice.h" +#include "ops/reduce_sum.h" #include "abstract/abstract_function.h" #include "abstract/infer_functions.h" +#include "utils/ms_context.h" #include "ops/tile.h" namespace mindspore { namespace abstract { std::vector GetDependsFormMap(const CNodePtr &cnode) { + const auto kReduceSum = prim::kPrimReduceSum->name(); const auto kUnsortedSegmentSum = prim::kPrimUnsortedSegmentSum->name(); const auto kUnsortedSegmentMin = prim::kPrimUnsortedSegmentMin->name(); const auto kUnsortedSegmentMax = prim::kPrimUnsortedSegmentMax->name(); @@ -49,6 +52,13 @@ std::vector GetDependsFormMap(const CNodePtr &cnode) { {kUnsortedSegmentSum, {2}}, {kUnsortedSegmentMin, {2}}, {kUnsortedSegmentMax, {2}}, {kGather, {2}}, {kGatherV2, {2}}, {kDynamicShape, {0}}, {kRange, {0, 1, 2}}, }; + auto ms_context = MsContext::GetInstance(); + MS_EXCEPTION_IF_NULL(ms_context); + auto device = ms_context->get_param(MS_CTX_DEVICE_TARGET); + if (device == kAscendDevice) { + dynamic_shape_depends.insert({kReduceSum, {1}}); + } + MS_EXCEPTION_IF_NULL(cnode); if (cnode->inputs().empty()) { MS_LOG(EXCEPTION) << "Invalid inputs"; @@ -194,7 +204,7 @@ PrimitiveEvalImplMap &GetPrimitiveToBackendEvalImplMap() { {prim::kPrimNotEqual, {ops::NotEqualInfer, nullptr, true}}, {prim::kPrimLog, {ops::LogInfer, nullptr, true}}, {prim::kPrimReciprocal, {ops::ReciprocalInfer, nullptr, true}}, - {prim::kPrimReduceSum, {InferImplReduceFunc, nullptr, true}}, + {prim::kPrimReduceSum, {ops::ReduceSumInfer, nullptr, true}}, {prim::kPrimReduceMean, {InferImplReduceFunc, nullptr, true}}, {prim::kPrimReduceAll, {InferImplReduceFunc, nullptr, true}}, {prim::kPrimReduceAny, {InferImplReduceFunc, nullptr, true}}, diff --git a/mindspore/core/base/core_ops.h b/mindspore/core/base/core_ops.h index ab7c128ffbf..9ad67236fc6 100644 --- a/mindspore/core/base/core_ops.h +++ b/mindspore/core/base/core_ops.h @@ -312,6 +312,8 @@ inline const PrimitivePtr kPrimBinaryCrossEntropy = std::make_shared( inline const PrimitivePtr kPrimBinaryCrossEntropyGrad = std::make_shared("BinaryCrossEntropyGrad"); inline const PrimitivePtr kPrimSmoothL1Loss = std::make_shared("SmoothL1Loss"); inline const PrimitivePtr kPrimSmoothL1LossGrad = std::make_shared("SmoothL1LossGrad"); +inline const PrimitivePtr kPrimSoftMarginLoss = std::make_shared("SoftMarginLoss"); +inline const PrimitivePtr kPrimSoftMarginLossGrad = std::make_shared("SoftMarginLossGrad"); inline const PrimitivePtr kPrimSoftmaxCrossEntropyWithLogits = std::make_shared("SoftmaxCrossEntropyWithLogits"); inline const PrimitivePtr kPrimSigmoidCrossEntropyWithLogits = @@ -346,6 +348,7 @@ inline const PrimitivePtr kPrimRelu6 = std::make_shared(kReLU6); inline const PrimitivePtr kPrimReluV2 = std::make_shared(kReLUV2); inline const PrimitivePtr kPrimPRelu = std::make_shared("PReLU"); inline const PrimitivePtr kPrimSoftplus = std::make_shared("Softplus"); +inline const PrimitivePtr kPrimSoftplusGrad = std::make_shared("SoftplusGrad"); inline const PrimitivePtr kPrimZeros = std::make_shared("Zeros"); inline const PrimitivePtr kPrimZerosLike = std::make_shared(kZerosLike); inline const PrimitivePtr kPrimOnesLike = std::make_shared(kOnesLike); @@ -375,6 +378,8 @@ inline const PrimitivePtr kSquareSumV1 = std::make_shared("SquareSumV inline const PrimitivePtr kFusedMulAdd = std::make_shared("FusedMulAdd"); inline const PrimitivePtr kPrimSoftShrink = std::make_shared("SoftShrink"); inline const PrimitivePtr kPrimSoftShrinkGrad = std::make_shared("SoftShrinkGrad"); +inline const PrimitivePtr kPrimHShrink = std::make_shared("HShrink"); +inline const PrimitivePtr kPrimHShrinkGrad = std::make_shared("HShrinkGrad"); // Comm ops inline const PrimitivePtr kPrimMirror = std::make_shared("_MirrorOperator"); @@ -472,6 +477,7 @@ inline const PrimitivePtr kPrimSqrtGrad = std::make_shared("SqrtGrad" inline const PrimitivePtr kPrimReciprocal = std::make_shared(kReciprocal); inline const PrimitivePtr kPrimExpandDims = std::make_shared("ExpandDims"); inline const PrimitivePtr kPrimAbs = std::make_shared("Abs"); +inline const PrimitivePtr kPrimAbsGrad = std::make_shared("AbsGrad"); inline const PrimitivePtr kPrimRint = std::make_shared("Rint"); inline const PrimitivePtr kPrimRound = std::make_shared("Round"); inline const PrimitivePtr kPrimExp = std::make_shared(kExp); @@ -487,6 +493,8 @@ inline const PrimitivePtr kPrimACos = std::make_shared("ACos"); inline const PrimitivePtr kPrimAsinGrad = std::make_shared("AsinGrad"); inline const PrimitivePtr kPrimACosGrad = std::make_shared("ACosGrad"); inline const PrimitivePtr kPrimAtanGrad = std::make_shared("AtanGrad"); +inline const PrimitivePtr kPrimAsinhGrad = std::make_shared("AsinhGrad"); +inline const PrimitivePtr kPrimAcoshGrad = std::make_shared("AcoshGrad"); inline const PrimitivePtr kPrimFloorMod = std::make_shared("FloorMod"); inline const PrimitivePtr kPrimWhere = std::make_shared("Where"); inline const PrimitivePtr kPrimIdentityMath = std::make_shared("Identity", kSideEffectPropagate); @@ -554,7 +562,9 @@ inline const PrimitivePtr kPrimPriorBox = std::make_shared("PriorBox" inline const PrimitivePtr kPrimQuantDTypeCast = std::make_shared("QuantDTypeCast"); inline const PrimitivePtr kPrimWhile = std::make_shared("While"); inline const PrimitivePtr kPrimPull = std::make_shared("Pull"); +inline const PrimitivePtr kPrimPush = std::make_shared("Push"); inline const PrimitivePtr kPrimNPUAllocFloatStatus = std::make_shared("NPUAllocFloatStatus"); +inline const PrimitivePtr kPyFunc = std::make_shared("PyFunc"); // Structures inline const PrimitivePtr kPrimMakeList = std::make_shared("make_list"); diff --git a/mindspore/core/ir/param_info.h b/mindspore/core/ir/param_info.h index cba7dbc4071..490218c8cf0 100644 --- a/mindspore/core/ir/param_info.h +++ b/mindspore/core/ir/param_info.h @@ -72,6 +72,7 @@ class ParamInfo { this->be_cloned_ = true; this->be_cloned_index_.push_back(index); clone->init_in_server_ = this->init_in_server_; + clone->requires_aggr_ = this->requires_aggr_; clone->ClearParameter(); return clone; } @@ -91,6 +92,9 @@ class ParamInfo { void set_parameter(const ParameterPtr ¶meter) { parameter_ = parameter; } void ClearParameter() { parameter_ = nullptr; } + bool requires_aggr() const { return requires_aggr_; } + void set_requires_aggr(bool requires_aggr) { requires_aggr_ = requires_aggr; } + private: std::string name_{"Parameter"}; bool requires_grad_{true}; @@ -105,6 +109,7 @@ class ParamInfo { bool cache_enable_{false}; std::vector cache_shape_; ParameterPtr parameter_{nullptr}; + bool requires_aggr_{true}; }; } // namespace mindspore #endif // MINDSPORE_CORE_IR_PARAM_INFO_H_ diff --git a/mindspore/core/load_mindir/anf_model_parser.cc b/mindspore/core/load_mindir/anf_model_parser.cc index 68c1bbd0e8b..c38868d0d42 100644 --- a/mindspore/core/load_mindir/anf_model_parser.cc +++ b/mindspore/core/load_mindir/anf_model_parser.cc @@ -635,14 +635,12 @@ bool MSANFModelParser::ObtainValueNodeInMonadForm(const std::string &value_node_ const mind_ir::AttributeProto &attr_proto) { const std::string &ref_attr_name = attr_proto.ref_attr_name(); if (ref_attr_name.find("UMonad") != std::string::npos) { - const ValuePtr kUMonad = std::make_shared(); auto monad_abs = kUMonad->ToAbstract(); auto new_value_node = NewValueNode(kUMonad); MS_EXCEPTION_IF_NULL(new_value_node); new_value_node->set_abstract(monad_abs); anfnode_build_map_[value_node_name] = new_value_node; } else if (ref_attr_name.find("IOMonad") != std::string::npos) { - const ValuePtr kIOMonad = std::make_shared(); auto monad_abs = kIOMonad->ToAbstract(); auto new_value_node = NewValueNode(kIOMonad); MS_EXCEPTION_IF_NULL(new_value_node); @@ -768,17 +766,22 @@ std::unordered_map MSANFModelParser::Get return kv; } -CNodePtr MSANFModelParser::BuildCNodeForFuncGraph(const FuncGraphPtr &outputFuncGraph, - const mind_ir::NodeProto &node_proto) { - MS_EXCEPTION_IF_NULL(outputFuncGraph); - if (!node_proto.has_op_type()) { - MS_LOG(ERROR) << "Get CNode op_type failed!"; - return nullptr; - } - const std::string &node_name = node_proto.output(0); - const std::string &fullname_with_scope = node_proto.domain(); +AnfNodePtr MSANFModelParser::BuildOperatorNode(const mind_ir::NodeProto &node_proto) { + const std::string kOperatorTypeFlag = std::string("REF::"); + const size_t kOpTypeFlagSize = kOperatorTypeFlag.length(); const std::string &node_type = node_proto.op_type(); + MS_LOG(DEBUG) << "Process Operator :" << node_type; + // Operator maybe CNode,FuncGraph or Parameter. + if (node_type.size() > kOpTypeFlagSize && node_type.substr(0, kOpTypeFlagSize) == kOperatorTypeFlag) { + auto it = anfnode_build_map_.find(node_type.substr(kOpTypeFlagSize)); + if (it != anfnode_build_map_.end()) { + return it->second; + } + MS_LOG(EXCEPTION) << "Can't find the ref:" << node_type; + } + + // Operator is primitive. std::shared_ptr prim; auto op_primc_fns = ops::OpPrimCRegister::GetInstance().GetPrimCMap(); if (op_primc_fns.find(node_type) != op_primc_fns.end()) { @@ -794,52 +797,65 @@ CNodePtr MSANFModelParser::BuildCNodeForFuncGraph(const FuncGraphPtr &outputFunc } } MS_EXCEPTION_IF_NULL(prim); + for (int i = 0; i < node_proto.attribute_size(); ++i) { + const mind_ir::AttributeProto &attr_proto = node_proto.attribute(i); + // CNode abstract + if (attr_proto.ref_attr_name().find("shape:") != string::npos) { + continue; + } + if (!GetAttrValueForCNode(prim, attr_proto)) { + MS_LOG(EXCEPTION) << "Parser prim: " << node_type << " attributes error : " << attr_proto.DebugString(); + } + } + prim->set_attr("is_load", MakeValue(true)); + return std::make_shared(prim); +} + +// Set CNode abstract. +void MSANFModelParser::SetCNodeAbastract(const mind_ir::NodeProto &node_proto, CNodePtr cnode_ptr) { + const std::string &node_type = node_proto.op_type(); + // Handle control flow operator. + auto operatorPtr = cnode_ptr->input(0); + // Set abstract of switch(c,f,t),switchLayer(c,tup) and + // partial(func,args) to null + auto prim = GetValueNode(operatorPtr); + if (IsPrimitiveEquals(prim::kPrimSwitch, prim) || IsPrimitiveEquals(prim::kPrimSwitchLayer, prim) || + IsPrimitiveEquals(prim::kPrimPartial, prim)) { + cnode_ptr->set_abstract(nullptr); + return; + } + // Set abstract of switch(c,f,t)() to null + prim = GetCNodePrimitive(operatorPtr); + if (IsPrimitiveEquals(prim::kPrimSwitch, prim) || IsPrimitiveEquals(prim::kPrimSwitchLayer, prim)) { + cnode_ptr->set_abstract(nullptr); + return; + } std::unordered_map kv; string shape_ref_attr_name; + for (int i = 0; i < node_proto.attribute_size(); ++i) { const mind_ir::AttributeProto &attr_proto = node_proto.attribute(i); if (attr_proto.ref_attr_name().find("shape:") != string::npos) { shape_ref_attr_name = attr_proto.ref_attr_name(); kv = GetAbstractForCNode(attr_proto); - continue; - } - - if (!GetAttrValueForCNode(prim, attr_proto)) { - MS_LOG(ERROR) << "Get CNode attr failed!"; - return nullptr; + break; } } - std::vector inputs; - inputs.clear(); - for (int i = 0; i < node_proto.input_size(); ++i) { - const std::string &input_name = node_proto.input(i); - if (anfnode_build_map_.find(input_name) == anfnode_build_map_.end()) { - MS_LOG(ERROR) << node_name << " input " << i << input_name << "can't find in nodes have parsed"; - return nullptr; - } - - inputs.push_back(anfnode_build_map_[input_name]); - } - prim->set_attr("is_load", MakeValue(true)); - CNodePtr cnode_ptr; - cnode_ptr = outputFuncGraph->NewCNode(prim, inputs); - MS_EXCEPTION_IF_NULL(cnode_ptr); - + // Because there is not context in unit test, + // abstract->broaden() is replaced by abstract->set_value(kAnyValue). if (kv.size() == 0) { if (node_type == "UpdateState") { - const ValuePtr kUMonad = std::make_shared(); - auto monad_abs = kUMonad->ToAbstract(); - cnode_ptr->set_abstract(monad_abs); + cnode_ptr->set_abstract(kUMonad->ToAbstract()); } else if (node_type == "Depend") { - const ValuePtr kBool = std::make_shared(true); cnode_ptr->set_abstract(kBool->ToAbstract()); } else { AbstractBasePtrList elem; for (size_t index = 1; index < cnode_ptr->inputs().size(); ++index) { auto abs = cnode_ptr->input(index)->abstract(); if (abs != nullptr) { + abs->set_value(kAnyValue); elem.push_back(abs); } } @@ -849,22 +865,56 @@ CNodePtr MSANFModelParser::BuildCNodeForFuncGraph(const FuncGraphPtr &outputFunc } } else if (kv.size() == 1) { std::unordered_map::iterator iter = kv.begin(); - cnode_ptr->set_abstract(iter->second); + if (iter->second != nullptr) { + iter->second->set_value(kAnyValue); + cnode_ptr->set_abstract(iter->second); + } } else { auto abstract = ParserAttrShape(shape_ref_attr_name, kv); if (abstract == nullptr) { + cnode_ptr->set_abstract(nullptr); MS_LOG(ERROR) << "Node's attribute is nullptr."; + } else { + abstract->set_value(kAnyValue); + cnode_ptr->set_abstract(abstract); + } + } +} + +CNodePtr MSANFModelParser::BuildCNodeForFuncGraph(const FuncGraphPtr &outputFuncGraph, + const mind_ir::NodeProto &node_proto) { + MS_EXCEPTION_IF_NULL(outputFuncGraph); + if (!node_proto.has_op_type()) { + MS_LOG(ERROR) << "Get CNode op_type failed!"; + return nullptr; + } + const std::string &node_name = node_proto.output(0); + MS_LOG(DEBUG) << "Process CNode: " << node_name; + // Build inputs. + std::vector inputs; + inputs.push_back(BuildOperatorNode(node_proto)); + for (int i = 0; i < node_proto.input_size(); ++i) { + const std::string &input_name = node_proto.input(i); + if (anfnode_build_map_.find(input_name) == anfnode_build_map_.end()) { + MS_LOG(ERROR) << node_name << " input " << i << input_name << "can't find in nodes have parsed"; return nullptr; } - cnode_ptr->set_abstract(abstract); + inputs.push_back(anfnode_build_map_[input_name]); } + CNodePtr cnode_ptr = outputFuncGraph->NewCNode(inputs); + MS_EXCEPTION_IF_NULL(cnode_ptr); + SetCNodeAbastract(node_proto, cnode_ptr); + + const std::string &fullname_with_scope = node_proto.domain(); string debug_info_name = ParseCNodeName(node_name); auto debug_info_ptr = std::make_shared(debug_info_name); cnode_ptr->set_debug_info(debug_info_ptr); cnode_ptr->set_fullname_with_scope(fullname_with_scope); cnode_ptr->set_load_flag(true); - + if (anfnode_build_map_.count(node_name) > 0) { + MS_LOG(EXCEPTION) << "Duplicate CNode name: " << node_name; + } anfnode_build_map_[node_name] = cnode_ptr; return cnode_ptr; } @@ -992,11 +1042,41 @@ FuncGraphPtr MSANFModelParser::Parse(const mind_ir::ModelProto &model_proto) { MS_LOG(ERROR) << "Parse configuration info for pb file failed!"; } const mind_ir::GraphProto &graphBuild = model_proto.graph(); + + // Forward declare FuncGraph name + // Compatible with the previous proto. + if (graphBuild.has_name()) { + anfnode_build_map_[graphBuild.name()] = std::make_shared(dstGraph); + } + for (int i = 0; i < model_proto.functions_size(); ++i) { + FuncGraphPtr graph = std::make_shared(); + const auto &graph_proto = model_proto.functions(i); + if (!graph_proto.has_name()) { + MS_LOG(EXCEPTION) << "The function has not a name. Please export mindIR again. "; + } + if (anfnode_build_map_.count(graph_proto.name()) > 0) { + MS_LOG(EXCEPTION) << "There is a duplication function graph name: " << graph_proto.name(); + } + anfnode_build_map_[graph_proto.name()] = std::make_shared(graph); + } + + // Parser the proto. if (!BuildFuncGraph(dstGraph, graphBuild)) { MS_LOG(ERROR) << "Build funcgraph failed!"; return nullptr; } - MS_LOG(INFO) << "Parse pb to build FuncGraph Success!"; + MS_LOG(DEBUG) << "Parse pb to build FuncGraph Success! " << graphBuild.name(); + for (int i = 0; i < model_proto.functions_size(); ++i) { + const auto &graph_proto = model_proto.functions(i); + FuncGraphPtr graph = GetValueNode(anfnode_build_map_[graph_proto.name()]); + if (!BuildFuncGraph(graph, graph_proto)) { + MS_LOG(ERROR) << "Build funcgraph failed!"; + return nullptr; + } + MS_LOG(DEBUG) << "Parse pb to build FuncGraph Success! " << graph_proto.name(); + } + // Release resource + anfnode_build_map_.clear(); return dstGraph; } } // namespace mindspore diff --git a/mindspore/core/load_mindir/anf_model_parser.h b/mindspore/core/load_mindir/anf_model_parser.h index 4d7ce1adecb..dffc78deeff 100644 --- a/mindspore/core/load_mindir/anf_model_parser.h +++ b/mindspore/core/load_mindir/anf_model_parser.h @@ -62,6 +62,8 @@ class MSANFModelParser { ValuePtr ObtainCNodeAttrInSingleScalarForm(const mind_ir::AttributeProto &attr_proto); bool ObtainCNodeAttrInTensorForm(const PrimitivePtr &prim, const mind_ir::AttributeProto &attr_proto); bool BuildValueNodeForFuncGraph(const mind_ir::NodeProto &node_proto); + AnfNodePtr BuildOperatorNode(const mind_ir::NodeProto &node_proto); + void SetCNodeAbastract(const mind_ir::NodeProto &node_proto, CNodePtr cnode_ptr); bool ObtainValueNodeInTensorForm(const string &value_node_name, const mind_ir::TensorProto &attr_tensor); bool ObtainValueNodeInTupleTensorForm(const string &value_node_name, const mind_ir::AttributeProto &attr_proto); bool GetAttrValueForValueNode(const std::string &value_node_name, const mind_ir::AttributeProto &attr_tensor); diff --git a/mindspore/core/load_mindir/load_model.cc b/mindspore/core/load_mindir/load_model.cc index 62574ee7db2..afc37e9ad45 100644 --- a/mindspore/core/load_mindir/load_model.cc +++ b/mindspore/core/load_mindir/load_model.cc @@ -92,7 +92,7 @@ bool get_all_files(const std::string &dir_in, std::vector *files) { return false; } DIR *open_dir = opendir(dir_in.c_str()); - if (NULL == open_dir) { + if (open_dir == NULL) { MS_LOG(EXCEPTION) << "open dir " << dir_in.c_str() << " failed"; } dirent *p = nullptr; @@ -217,7 +217,7 @@ std::shared_ptr LoadMindIR(const std::string &file_name, bool is_lite return nullptr; } abs_path[path_len] = '\0'; - snprintf(abs_path + path_len, sizeof(abs_path), "variables"); + snprintf(abs_path + path_len, sizeof(abs_path) - path_len, "variables"); std::ifstream ifs(abs_path); if (ifs.good()) { MS_LOG(DEBUG) << "MindIR file has variables path, load parameter into graph."; diff --git a/mindspore/core/mindrt/src/actor/actormgr.cc b/mindspore/core/mindrt/src/actor/actormgr.cc index 686942aecc0..4c28eea3de4 100644 --- a/mindspore/core/mindrt/src/actor/actormgr.cc +++ b/mindspore/core/mindrt/src/actor/actormgr.cc @@ -46,6 +46,30 @@ ActorMgr::ActorMgr() : actors(), procotols(), urls() { ActorMgr::~ActorMgr() {} +void ActorMgr::Initialize(bool use_inner_pool, size_t thread_num) { + bool expected = false; + if (!initialized_.compare_exchange_strong(expected, true)) { + MS_LOG(DEBUG) << "Actor Manager has been initialized before"; + return; + } + // create inner thread pool only when specified use_inner_pool + if (use_inner_pool) { + inner_pool_ = ActorThreadPool::CreateThreadPool(thread_num); + } +} + +void ActorMgr::SetActorReady(const ActorReference &actor) const { + // use inner thread pool or actor thread pool created externally + // priority to use actor thread pool + ActorThreadPool *pool = actor->pool_ ? actor->pool_ : inner_pool_; + if (pool == nullptr) { + MS_LOG(ERROR) << "ThreadPool is nullptr, " << actor->pool_ << ", " << inner_pool_ + << ", actor: " << actor->GetAID().Name(); + return; + } + pool->PushActorToQueue(actor.get()); +} + const std::string ActorMgr::GetUrl(const std::string &protocol) { auto it = procotols.find(protocol); if (it != procotols.end()) { @@ -109,6 +133,10 @@ void ActorMgr::Finalize() { MS_LOG(INFO) << "finalize IOMgr=" << mgrIt->first.c_str(); mgrIt->second->Finish(); } + + // delete actor thread pool if use_inner_pool + delete inner_pool_; + inner_pool_ = nullptr; MS_LOG(INFO) << "mindrt IOMGRS finish exiting."; } @@ -171,7 +199,7 @@ int ActorMgr::Send(const AID &to, std::unique_ptr &&msg, bool remot } } -AID ActorMgr::Spawn(ActorReference &actor, bool shareThread, bool start) { +AID ActorMgr::Spawn(const ActorReference &actor, bool shareThread, bool start) { actorsMutex.lock(); if (actors.find(actor->GetAID().Name()) != actors.end()) { actorsMutex.unlock(); diff --git a/mindspore/core/mindrt/src/actor/actormgr.h b/mindspore/core/mindrt/src/actor/actormgr.h index c4273b821b7..967b77a0b3e 100644 --- a/mindspore/core/mindrt/src/actor/actormgr.h +++ b/mindspore/core/mindrt/src/actor/actormgr.h @@ -17,6 +17,7 @@ #ifndef MINDSPORE_CORE_MINDRT_SRC_ACTOR_ACTORMGR_H #define MINDSPORE_CORE_MINDRT_SRC_ACTOR_ACTORMGR_H +#include #include #include #include @@ -51,28 +52,24 @@ class ActorMgr { ~ActorMgr(); void Finalize(); - void Initialize() {} + // initialize actor manager resource, do not create inner thread pool by default + void Initialize(bool use_inner_pool = false, size_t thread_num = 1); + void RemoveActor(const std::string &name); ActorBase *GetActor(const AID &id); const std::string GetUrl(const std::string &protocol = "tcp"); void AddUrl(const std::string &protocol, const std::string &url); void AddIOMgr(const std::string &protocol, const std::shared_ptr &ioMgr); int Send(const AID &to, std::unique_ptr &&msg, bool remoteLink = false, bool isExactNotRemote = false); - AID Spawn(ActorReference &actor, bool shareThread = true, bool start = true); + AID Spawn(const ActorReference &actor, bool shareThread = true, bool start = true); void Terminate(const AID &id); void TerminateAll(); void Wait(const AID &pid); inline const std::string &GetDelegate() const { return delegate; } inline void SetDelegate(const std::string &d) { delegate = d; } - inline void SetActorReady(std::shared_ptr &actor) const { - auto pool = actor->pool_; - if (pool == nullptr) { - MS_LOG(ERROR) << "ThreadPool is nullptr, actor: " << actor->GetAID().Name(); - return; - } - pool->PushActorToQueue(actor.get()); - } + + void SetActorReady(const ActorReference &actor) const; void SetActorStatus(const AID &pid, bool start); private: @@ -83,6 +80,13 @@ class ActorMgr { return false; } } + // in order to avoid being initialized many times + std::atomic_bool initialized_{false}; + + // actor manager support running on inner thread pool, + // or running on other thread pool created independently externally + ActorThreadPool *inner_pool_{nullptr}; + // Map of all local spawned and running processes. std::map actors; #ifndef MS_COMPILE_IOS diff --git a/mindspore/core/mindrt/src/thread/actor_threadpool.cc b/mindspore/core/mindrt/src/thread/actor_threadpool.cc index 2427a84da48..58966fca13b 100644 --- a/mindspore/core/mindrt/src/thread/actor_threadpool.cc +++ b/mindspore/core/mindrt/src/thread/actor_threadpool.cc @@ -13,7 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - +#ifndef _MSC_VER +#include +#include +#endif #include "thread/actor_threadpool.h" #include "thread/core_affinity.h" @@ -26,6 +29,7 @@ void ActorWorker::CreateThread(ActorThreadPool *pool) { } void ActorWorker::RunWithSpin() { + SetAffinity(); #if !defined(__APPLE__) && !defined(SUPPORT_MSVC) static std::atomic_int index = {0}; pthread_setname_np(pthread_self(), ("ActorThread_" + std::to_string(index++)).c_str()); @@ -116,7 +120,7 @@ void ActorThreadPool::PushActorToQueue(ActorBase *actor) { actor_queue_.push(actor); #endif } - THREAD_INFO("actor[%s] enqueue success", actor->GetAID().Name().c_str()); + THREAD_DEBUG("actor[%s] enqueue success", actor->GetAID().Name().c_str()); // active one idle actor thread if exist for (size_t i = 0; i < actor_thread_num_; ++i) { auto worker = reinterpret_cast(workers_[i]); @@ -126,11 +130,13 @@ void ActorThreadPool::PushActorToQueue(ActorBase *actor) { } } -int ActorThreadPool::CreateThreads(size_t actor_thread_num, size_t all_thread_num) { +int ActorThreadPool::CreateThreads(size_t actor_thread_num, size_t all_thread_num, const std::vector &core_list) { #ifdef USE_HQUEUE actor_queue_.Init(MAX_READY_ACTOR_NR); #endif - +#ifdef BIND_CORE + affinity_->SetCoreId(core_list); +#endif size_t core_num = std::thread::hardware_concurrency(); THREAD_INFO("ThreadInfo, Actor: [%zu], All: [%zu], CoreNum: [%zu]", actor_thread_num, all_thread_num, core_num); actor_thread_num_ = actor_thread_num < core_num ? actor_thread_num : core_num; @@ -142,27 +148,56 @@ int ActorThreadPool::CreateThreads(size_t actor_thread_num, size_t all_thread_nu std::lock_guard _l(pool_mutex_); auto worker = new (std::nothrow) ActorWorker(); THREAD_ERROR_IF_NULL(worker); +#ifdef BIND_CORE + cpu_set_t mask; + CPU_ZERO(&mask); + if (core_list.size() > 0) { + CPU_SET(core_list[workers_.size() % core_list.size()], &mask); + } + worker->set_mask(mask); +#endif worker->CreateThread(this); workers_.push_back(worker); THREAD_INFO("create actor thread[%zu]", i); } size_t kernel_thread_num = all_thread_num - actor_thread_num_; if (kernel_thread_num > 0) { - return ThreadPool::CreateThreads(kernel_thread_num); + return ThreadPool::CreateThreads(kernel_thread_num, core_list); } return THREAD_OK; } -ActorThreadPool *ActorThreadPool::CreateThreadPool(size_t actor_thread_num, size_t all_thread_num) { +ActorThreadPool *ActorThreadPool::CreateThreadPool(size_t actor_thread_num, size_t all_thread_num, BindMode bind_mode) { ActorThreadPool *pool = new (std::nothrow) ActorThreadPool(); if (pool == nullptr) { return nullptr; } - int ret = pool->CreateThreads(actor_thread_num, all_thread_num); + int ret; + std::vector core_list; +#ifdef BIND_CORE + ret = pool->InitAffinityInfo(); if (ret != THREAD_OK) { delete pool; return nullptr; } + core_list = pool->affinity_->GetCoreId(all_thread_num, bind_mode); +#endif // BIND_CORE + ret = pool->CreateThreads(actor_thread_num, all_thread_num, core_list); + if (ret != THREAD_OK) { + delete pool; + return nullptr; + } + + return pool; +} + +ActorThreadPool *ActorThreadPool::CreateThreadPool(size_t actor_thread_num, size_t all_thread_num, + const std::vector &core_list) { + ActorThreadPool *pool = new (std::nothrow) ActorThreadPool(); + if (pool == nullptr) { + return nullptr; + } + int ret; #ifdef BIND_CORE ret = pool->InitAffinityInfo(); if (ret != THREAD_OK) { @@ -170,6 +205,12 @@ ActorThreadPool *ActorThreadPool::CreateThreadPool(size_t actor_thread_num, size return nullptr; } #endif // BIND_CORE + ret = pool->CreateThreads(actor_thread_num, all_thread_num, core_list); + if (ret != THREAD_OK) { + delete pool; + return nullptr; + } + return pool; } @@ -178,7 +219,7 @@ ActorThreadPool *ActorThreadPool::CreateThreadPool(size_t thread_num) { if (pool == nullptr) { return nullptr; } - int ret = pool->CreateThreads(thread_num, thread_num); + int ret = pool->CreateThreads(thread_num, thread_num, {}); if (ret != THREAD_OK) { delete pool; return nullptr; diff --git a/mindspore/core/mindrt/src/thread/actor_threadpool.h b/mindspore/core/mindrt/src/thread/actor_threadpool.h index b588844388c..bb4bc4f57ba 100644 --- a/mindspore/core/mindrt/src/thread/actor_threadpool.h +++ b/mindspore/core/mindrt/src/thread/actor_threadpool.h @@ -18,6 +18,7 @@ #define MINDSPORE_CORE_MINDRT_RUNTIME_ACTOR_THREADPOOL_H_ #include +#include #include #include #include @@ -43,7 +44,10 @@ class ActorWorker : public Worker { class ActorThreadPool : public ThreadPool { public: // create ThreadPool that contains actor thread and kernel thread - static ActorThreadPool *CreateThreadPool(size_t actor_thread_num, size_t all_thread_num); + static ActorThreadPool *CreateThreadPool(size_t actor_thread_num, size_t all_thread_num, BindMode bind_mode); + + static ActorThreadPool *CreateThreadPool(size_t actor_thread_num, size_t all_thread_num, + const std::vector &core_list); // create ThreadPool that contains only actor thread static ActorThreadPool *CreateThreadPool(size_t thread_num); ~ActorThreadPool() override; @@ -53,7 +57,7 @@ class ActorThreadPool : public ThreadPool { private: ActorThreadPool() {} - int CreateThreads(size_t actor_thread_num, size_t all_thread_num); + int CreateThreads(size_t actor_thread_num, size_t all_thread_num, const std::vector &core_list); size_t actor_thread_num_{0}; std::mutex actor_mutex_; diff --git a/mindspore/core/mindrt/src/thread/core_affinity.cc b/mindspore/core/mindrt/src/thread/core_affinity.cc index 72417f018c7..f24f0d613cd 100644 --- a/mindspore/core/mindrt/src/thread/core_affinity.cc +++ b/mindspore/core/mindrt/src/thread/core_affinity.cc @@ -248,21 +248,31 @@ int CoreAffinity::InitHardwareCoreInfo() { return THREAD_OK; } -int CoreAffinity::InitBindCoreId(size_t thread_num, BindMode bind_mode) { +std::vector CoreAffinity::GetCoreId(size_t thread_num, BindMode bind_mode) { + std::vector bind_id; if (core_num_ != sorted_id_.size()) { THREAD_ERROR("init sorted core id failed"); - return THREAD_ERROR; + return bind_id; } - bind_id_.clear(); if (bind_mode == Power_Higher || bind_mode == Power_NoBind) { for (size_t i = 0; i < thread_num; ++i) { - bind_id_.push_back(sorted_id_[i % core_num_]); + bind_id.push_back(sorted_id_[i % core_num_]); } } else if (bind_mode == Power_Middle) { for (size_t i = 0; i < thread_num; ++i) { - bind_id_.push_back(sorted_id_[(i + higher_num_) % core_num_]); + bind_id.push_back(sorted_id_[(i + higher_num_) % core_num_]); } } else { + return bind_id; + } + return bind_id; +} +void CoreAffinity::SetCoreId(const std::vector &core_list) { bind_id_ = core_list; } + +int CoreAffinity::InitBindCoreId(size_t thread_num, BindMode bind_mode) { + bind_id_.clear(); + bind_id_ = GetCoreId(thread_num, bind_mode); + if (bind_id_.empty()) { return THREAD_ERROR; } return THREAD_OK; diff --git a/mindspore/core/mindrt/src/thread/core_affinity.h b/mindspore/core/mindrt/src/thread/core_affinity.h index 6dc3aae44ae..7138e41d131 100644 --- a/mindspore/core/mindrt/src/thread/core_affinity.h +++ b/mindspore/core/mindrt/src/thread/core_affinity.h @@ -43,6 +43,8 @@ class CoreAffinity { int BindThreads(const std::vector &workers, const std::vector &core_list); int BindThreads(const std::vector &workers, BindMode bind_mode); int BindProcess(BindMode bind_mode) const; + std::vector GetCoreId(size_t thread_num, BindMode bind_mode); + void SetCoreId(const std::vector &core_list); private: #ifdef BIND_CORE diff --git a/mindspore/core/mindrt/src/thread/threadlog.h b/mindspore/core/mindrt/src/thread/threadlog.h index 5318fa9d899..8594d852daa 100644 --- a/mindspore/core/mindrt/src/thread/threadlog.h +++ b/mindspore/core/mindrt/src/thread/threadlog.h @@ -20,14 +20,23 @@ namespace mindspore { #ifdef THREAD_POOL_DEBUG #include +#define THREAD_DEBUG(content, args...) \ + { printf("[DEBUG] %s|%d: " #content "\r\n", __func__, __LINE__, ##args); } #define THREAD_INFO(content, args...) \ { printf("[INFO] %s|%d: " #content "\r\n", __func__, __LINE__, ##args); } #define THREAD_ERROR(content, args...) \ { printf("[ERROR] %s|%d: " #content "\r\n", __func__, __LINE__, ##args); } #else +#define THREAD_DEBUG(content, ...) #define THREAD_INFO(content, ...) +#if defined(__ANDROID__) +#include +#define THREAD_ERROR(content, args...) \ + { __android_log_print(ANDROID_LOG_ERROR, "MS_LITE", "%s|%d: " #content "\r\n", __func__, __LINE__, ##args); } +#else #define THREAD_ERROR(content, ...) #endif +#endif #define THREAD_ERROR_IF_NULL(ptr) \ do { \ diff --git a/mindspore/core/mindrt/src/thread/threadpool.cc b/mindspore/core/mindrt/src/thread/threadpool.cc index 1690e91f291..fec5dedfd8f 100644 --- a/mindspore/core/mindrt/src/thread/threadpool.cc +++ b/mindspore/core/mindrt/src/thread/threadpool.cc @@ -13,7 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - +#ifndef _MSC_VER +#include +#include +#endif #include "thread/threadpool.h" #include "thread/core_affinity.h" @@ -31,7 +34,28 @@ Worker::~Worker() { void Worker::CreateThread() { thread_ = std::thread(&Worker::Run, this); } +void Worker::SetAffinity() { +#ifdef BIND_CORE +#ifdef __ANDROID__ + int ret = sched_setaffinity(gettid(), sizeof(cpu_set_t), &mask_); + if (ret != THREAD_OK) { + THREAD_ERROR("bind thread %d to cpu failed. ERROR %d", gettid(), errno); + } + return; +#else +#if !defined(__APPLE__) && !defined(SUPPORT_MSVC) + int ret = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &mask_); + if (ret != THREAD_OK) { + THREAD_ERROR("bind thread %lu to cpu failed. ERROR %d", pthread_self(), errno); + } + return; +#endif +#endif +#endif +} + void Worker::Run() { + SetAffinity(); #if !defined(__APPLE__) && !defined(SUPPORT_MSVC) static std::atomic_int index = {0}; pthread_setname_np(pthread_self(), ("KernelThread_" + std::to_string(index++)).c_str()); @@ -105,7 +129,7 @@ ThreadPool::~ThreadPool() { THREAD_INFO("destruct success"); } -int ThreadPool::CreateThreads(size_t thread_num) { +int ThreadPool::CreateThreads(size_t thread_num, const std::vector &core_list) { size_t core_num = std::thread::hardware_concurrency(); thread_num = thread_num < core_num ? thread_num : core_num; THREAD_INFO("ThreadInfo, Num: [%zu], CoreNum: [%zu]", thread_num, core_num); @@ -117,6 +141,14 @@ int ThreadPool::CreateThreads(size_t thread_num) { for (size_t i = 0; i < thread_num; ++i) { auto worker = new (std::nothrow) Worker(); THREAD_ERROR_IF_NULL(worker); +#ifdef BIND_CORE + cpu_set_t mask; + CPU_ZERO(&mask); + if (core_list.size() > 0) { + CPU_SET(core_list[workers_.size() % core_list.size()], &mask); + } + worker->set_mask(mask); +#endif worker->CreateThread(); workers_.push_back(worker); THREAD_INFO("create kernel thread[%zu]", i); @@ -127,7 +159,7 @@ int ThreadPool::CreateThreads(size_t thread_num) { int ThreadPool::ParallelLaunch(const Func &func, Content content, int task_num) const { // distribute task to the KernelThread and the idle ActorThread, // if the task num is greater than the KernelThread num - THREAD_INFO("launch: %d", task_num); + THREAD_DEBUG("launch: %d", task_num); Task task = {func, content}; DistributeTask(&task, task_num); @@ -266,12 +298,12 @@ int ThreadPool::SetProcessAffinity(BindMode bind_mode) const { #endif // BIND_CORE } -ThreadPool *ThreadPool::CreateThreadPool(size_t thread_num) { +ThreadPool *ThreadPool::CreateThreadPool(size_t thread_num, const std::vector &core_list) { ThreadPool *pool = new (std::nothrow) ThreadPool(); if (pool == nullptr) { return nullptr; } - int ret = pool->CreateThreads(thread_num); + int ret = pool->CreateThreads(thread_num, core_list); if (ret != THREAD_OK) { delete pool; return nullptr; diff --git a/mindspore/core/mindrt/src/thread/threadpool.h b/mindspore/core/mindrt/src/thread/threadpool.h index f6b478391ac..4db2c8e4aea 100644 --- a/mindspore/core/mindrt/src/thread/threadpool.h +++ b/mindspore/core/mindrt/src/thread/threadpool.h @@ -73,16 +73,21 @@ class Worker { std::thread::id thread_id() const { return thread_.get_id(); } #ifdef BIND_CORE + void set_mask(const cpu_set_t &mask) { mask_ = mask; } pthread_t handle() { return thread_.native_handle(); } #endif protected: + void SetAffinity(); void Run(); void YieldAndDeactive(); void WaitUntilActive(); bool alive_{true}; std::thread thread_; +#ifdef BIND_CORE + cpu_set_t mask_; +#endif std::atomic_int status_{kThreadBusy}; std::mutex mutex_; @@ -98,7 +103,7 @@ class Worker { class ThreadPool { public: - static ThreadPool *CreateThreadPool(size_t thread_num); + static ThreadPool *CreateThreadPool(size_t thread_num, const std::vector &core_list = {}); virtual ~ThreadPool(); size_t thread_num() const { return workers_.size(); } @@ -112,7 +117,7 @@ class ThreadPool { protected: ThreadPool() = default; - int CreateThreads(size_t thread_num); + int CreateThreads(size_t thread_num, const std::vector &core_list); int InitAffinityInfo(); diff --git a/mindspore/core/ops/apply_momentum.cc b/mindspore/core/ops/apply_momentum.cc index 1d1c38c319f..888081700fc 100644 --- a/mindspore/core/ops/apply_momentum.cc +++ b/mindspore/core/ops/apply_momentum.cc @@ -63,6 +63,9 @@ AbstractBasePtr ApplyMomentumInfer(const abstract::AnalysisEnginePtr &, const Pr auto prim_name = primitive->name(); (void)CheckAndConvertUtils::CheckInteger("apply_momentum_infer", SizeToLong(input_args.size()), kEqual, 5, prim_name); + for (const auto &item : input_args) { + MS_EXCEPTION_IF_NULL(item); + } // Infer shape auto v_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape]; diff --git a/mindspore/core/ops/arg_min.cc b/mindspore/core/ops/arg_min.cc index 532a2f9b6e6..ae92481a448 100644 --- a/mindspore/core/ops/arg_min.cc +++ b/mindspore/core/ops/arg_min.cc @@ -42,6 +42,7 @@ AbstractBasePtr ArgMinInfer(const abstract::AnalysisEnginePtr &, const Primitive // Infer shape auto axis = GetValue(primitive->GetAttr(kAxis)); + MS_EXCEPTION_IF_NULL(input_args[0]); auto x_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape]; auto x_rank = SizeToLong(x_shape.size()); CheckAndConvertUtils::CheckInRange("axis", axis, kIncludeLeft, {-x_rank, x_rank}, prim_name); diff --git a/mindspore/core/ops/asin.cc b/mindspore/core/ops/asin.cc index fb78967c815..dfdcabec6e1 100644 --- a/mindspore/core/ops/asin.cc +++ b/mindspore/core/ops/asin.cc @@ -32,6 +32,7 @@ AbstractBasePtr AsinInfer(const abstract::AnalysisEnginePtr &, const PrimitivePt auto x_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape]; auto infer_shape = std::make_shared(x_shape); + MS_EXCEPTION_IF_NULL(input_args[0]); // Infer Type auto dtype = input_args[0]->BuildType(); const std::set valid_types = {kFloat16, kFloat32, kInt32}; diff --git a/mindspore/core/ops/assert.cc b/mindspore/core/ops/assert.cc index 1900e484c09..22755b87630 100644 --- a/mindspore/core/ops/assert.cc +++ b/mindspore/core/ops/assert.cc @@ -38,6 +38,9 @@ AbstractBasePtr AssertInfer(const abstract::AnalysisEnginePtr &, const Primitive const std::vector &input_args) { MS_EXCEPTION_IF_NULL(primitive); auto op_name = primitive->name(); + for (const auto &item : input_args) { + MS_EXCEPTION_IF_NULL(item); + } TypePtr condition; if (!(input_args[0]->BuildType()->type_id() == kObjectTypeTensorType)) { auto condition_values = GetValue>(input_args[0]->BuildValue()); diff --git a/mindspore/core/ops/batch_to_space_nd.cc b/mindspore/core/ops/batch_to_space_nd.cc index 2ba2a24a106..ffb6e66e6a6 100644 --- a/mindspore/core/ops/batch_to_space_nd.cc +++ b/mindspore/core/ops/batch_to_space_nd.cc @@ -92,7 +92,7 @@ std::vector BatchToSpaceND::get_block_shape() const { return GetValue>(value_ptr); } -void BatchToSpaceND::Init(std::vector block_shape, std::vector> crops) { +void BatchToSpaceND::Init(const std::vector block_shape, const std::vector> crops) { this->set_crops(crops); this->set_block_shape(block_shape); } diff --git a/mindspore/core/ops/batch_to_space_nd.h b/mindspore/core/ops/batch_to_space_nd.h index 3a745b5f42e..99df67a6ba7 100644 --- a/mindspore/core/ops/batch_to_space_nd.h +++ b/mindspore/core/ops/batch_to_space_nd.h @@ -33,7 +33,7 @@ class BatchToSpaceND : public PrimitiveC { BatchToSpaceND() : PrimitiveC(kNameBatchToSpaceND) {} ~BatchToSpaceND() = default; MS_DECLARE_PARENT(BatchToSpaceND, PrimitiveC); - void Init(std::vector block_shape, std::vector> crops); + void Init(const std::vector block_shape, const std::vector> crops); void set_crops(std::vector> crops); void set_block_shape(std::vector block_shape); std::vector get_block_shape() const; diff --git a/mindspore/core/ops/conv2d.cc b/mindspore/core/ops/conv2d.cc index c579f0ce6bb..07c493a5840 100644 --- a/mindspore/core/ops/conv2d.cc +++ b/mindspore/core/ops/conv2d.cc @@ -144,6 +144,9 @@ void Conv2DPadFunction(std::vector *output_hw, std::vector *pa abstract::ShapePtr Conv2dInferShape(const PrimitivePtr &primitive, const std::vector &input_args) { MS_EXCEPTION_IF_NULL(primitive); auto prim_name = primitive->name(); + for (const auto &item : input_args) { + MS_EXCEPTION_IF_NULL(item); + } auto x_shape_map = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape()); auto w_shape_map = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[1]->BuildShape()); auto x_shape = x_shape_map[kShape]; diff --git a/mindspore/core/ops/cos.cc b/mindspore/core/ops/cos.cc index 845261b3f6e..be4e80b1b62 100644 --- a/mindspore/core/ops/cos.cc +++ b/mindspore/core/ops/cos.cc @@ -32,7 +32,7 @@ abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector &input_args) { - if (std::any_of(input_args.begin(), input_args.end(), [](AbstractBasePtr a) { return a == nullptr; })) { + if (std::any_of(input_args.begin(), input_args.end(), [](const AbstractBasePtr arg) { return arg == nullptr; })) { MS_LOG(EXCEPTION) << "nullptr"; } std::map types; diff --git a/mindspore/core/ops/fake_quant_with_min_max_vars.cc b/mindspore/core/ops/fake_quant_with_min_max_vars.cc index 6c5fa3e8fd0..21ffb6c4dd7 100644 --- a/mindspore/core/ops/fake_quant_with_min_max_vars.cc +++ b/mindspore/core/ops/fake_quant_with_min_max_vars.cc @@ -47,7 +47,7 @@ abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector &input_args) { const std::set valid_types = {kFloat16, kFloat32}; - if (std::any_of(input_args.begin(), input_args.end(), [](AbstractBasePtr arg) { return arg == nullptr; })) { + if (std::any_of(input_args.begin(), input_args.end(), [](const AbstractBasePtr arg) { return arg == nullptr; })) { MS_LOG(EXCEPTION) << "nullptr"; } std::map types; diff --git a/mindspore/core/ops/grad/hshrink_grad.h b/mindspore/core/ops/grad/hshrink_grad.h index 45e92b79b33..210b8b47965 100644 --- a/mindspore/core/ops/grad/hshrink_grad.h +++ b/mindspore/core/ops/grad/hshrink_grad.h @@ -25,7 +25,7 @@ namespace mindspore { namespace ops { constexpr auto kNameHShrinkGrad = "HShrinkGrad"; -class MS_CORE_API HShrinkGrad : public PrimitiveC { +class HShrinkGrad : public PrimitiveC { public: HShrinkGrad() : PrimitiveC(kNameHShrinkGrad) { InitIOName({"gradients", "features"}, {"backprops"}); } ~HShrinkGrad() = default; diff --git a/mindspore/core/ops/grad/soft_margin_loss_grad.h b/mindspore/core/ops/grad/soft_margin_loss_grad.h index e5a47350ab1..152ff646fe6 100644 --- a/mindspore/core/ops/grad/soft_margin_loss_grad.h +++ b/mindspore/core/ops/grad/soft_margin_loss_grad.h @@ -28,7 +28,7 @@ namespace mindspore { namespace ops { constexpr auto kNameSoftMarginLossGrad = "SoftMarginLossGrad"; -class MS_CORE_API SoftMarginLossGrad : public PrimitiveC { +class SoftMarginLossGrad : public PrimitiveC { public: SoftMarginLossGrad() : PrimitiveC(kNameSoftMarginLossGrad) { InitIOName({"predict", "label", "dout"}, {"gradient"}); } ~SoftMarginLossGrad() = default; diff --git a/mindspore/core/ops/hshrink.h b/mindspore/core/ops/hshrink.h index 5bff01a8319..582e8847dea 100644 --- a/mindspore/core/ops/hshrink.h +++ b/mindspore/core/ops/hshrink.h @@ -26,7 +26,7 @@ namespace mindspore { namespace ops { constexpr auto kNameHShrink = "HShrink"; -class MS_CORE_API HShrink : public PrimitiveC { +class HShrink : public PrimitiveC { public: HShrink() : PrimitiveC(kNameHShrink) { InitIOName({"input_x"}, {"output"}); } ~HShrink() = default; diff --git a/mindspore/core/ops/logical_not.cc b/mindspore/core/ops/logical_not.cc index cc215908fbc..5b71d133ee9 100644 --- a/mindspore/core/ops/logical_not.cc +++ b/mindspore/core/ops/logical_not.cc @@ -32,6 +32,7 @@ abstract::ShapePtr LogicalNotInferShape(const PrimitivePtr &primitive, const std TypePtr LogicalNotInferType(const PrimitivePtr &prim, const std::vector &input_args) { MS_EXCEPTION_IF_NULL(prim); auto op_name = prim->name(); + MS_EXCEPTION_IF_NULL(input_args[0]); auto infer_dtype = input_args[0]->BuildType(); std::set local_bool = {kBool}; return CheckAndConvertUtils::CheckTensorTypeValid("x", infer_dtype, local_bool, op_name); diff --git a/mindspore/core/ops/lrn.cc b/mindspore/core/ops/lrn.cc index d7025310d65..d4eadbd1360 100644 --- a/mindspore/core/ops/lrn.cc +++ b/mindspore/core/ops/lrn.cc @@ -86,10 +86,11 @@ abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector &input_args) { const std::set valid_types = {kFloat16, kFloat32}; - if (std::any_of(input_args.begin(), input_args.end(), [](AbstractBasePtr a) { return a == nullptr; })) { + if (std::any_of(input_args.begin(), input_args.end(), [](const AbstractBasePtr arg) { return arg == nullptr; })) { MS_LOG(EXCEPTION) << "nullptr"; } std::map types; + MS_EXCEPTION_IF_NULL(input_args[0]); types.emplace("x", input_args[0]->BuildType()); return CheckAndConvertUtils::CheckTensorTypeSame(types, valid_types, prim->name()); } diff --git a/mindspore/core/ops/max_pool.cc b/mindspore/core/ops/max_pool.cc index c7e1618c459..4583fe0a196 100644 --- a/mindspore/core/ops/max_pool.cc +++ b/mindspore/core/ops/max_pool.cc @@ -82,6 +82,7 @@ namespace { abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector &input_args) { MS_EXCEPTION_IF_NULL(primitive); auto op_name = primitive->name(); + MS_EXCEPTION_IF_NULL(input_args[0]); auto in_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->GetShapeTrack())[kShape]; auto format = Format(GetValue(primitive->GetAttr(kFormat))); if (format == NHWC) { @@ -123,7 +124,7 @@ abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector &input_args) { - if (std::any_of(input_args.begin(), input_args.end(), [](AbstractBasePtr a) { return a == nullptr; })) { + if (std::any_of(input_args.begin(), input_args.end(), [](const AbstractBasePtr arg) { return arg == nullptr; })) { MS_LOG(EXCEPTION) << "nullptr"; } auto input_type = input_args[0]->BuildType(); diff --git a/mindspore/core/ops/ones_like.cc b/mindspore/core/ops/ones_like.cc index 90e07ae6e88..d2b85398c0d 100644 --- a/mindspore/core/ops/ones_like.cc +++ b/mindspore/core/ops/ones_like.cc @@ -34,7 +34,9 @@ abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector &input_args) { + MS_EXCEPTION_IF_NULL(primitive); auto op_name = primitive->name(); + MS_EXCEPTION_IF_NULL(input_args[0]); auto infer_type = input_args[0]->BuildType(); auto valid_type = common_valid_types; valid_type.insert(kBool); diff --git a/mindspore/core/ops/pack.cc b/mindspore/core/ops/pack.cc index 6bb6ce9a577..08965c86c18 100644 --- a/mindspore/core/ops/pack.cc +++ b/mindspore/core/ops/pack.cc @@ -58,6 +58,7 @@ AbstractBasePtr PackInfer(const abstract::AnalysisEnginePtr &, const PrimitivePt MS_EXCEPTION_IF_NULL(primitive); auto prim_name = primitive->name(); + MS_EXCEPTION_IF_NULL(input_args[0]); auto x_shapes = input_args[0]->BuildShape()->cast()->shape(); auto x_types = input_args[0]->BuildType()->cast()->elements(); auto all_shape = _get_pack_shape(x_shapes, x_types, GetValue(primitive->GetAttr(kAxis)), prim_name); diff --git a/mindspore/core/ops/rank.cc b/mindspore/core/ops/rank.cc index b969ec1fd05..1bf9569ccc3 100644 --- a/mindspore/core/ops/rank.cc +++ b/mindspore/core/ops/rank.cc @@ -22,6 +22,7 @@ namespace { TypePtr RankInferType(const PrimitivePtr &prim, const std::vector &input_args) { MS_EXCEPTION_IF_NULL(prim); auto op_name = prim->name(); + MS_EXCEPTION_IF_NULL(input_args[0]); auto infer_dtype = input_args[0]->BuildType(); CheckAndConvertUtils::CheckTensorTypeValid("x", infer_dtype, {kTensorType}, op_name); return kTypeNone; diff --git a/mindspore/core/ops/reduce_sum.cc b/mindspore/core/ops/reduce_sum.cc index ec4e5f7ee1f..7bf3912b729 100644 --- a/mindspore/core/ops/reduce_sum.cc +++ b/mindspore/core/ops/reduce_sum.cc @@ -15,12 +15,178 @@ */ #include +#include #include "ops/reduce_sum.h" #include "ops/op_utils.h" namespace mindspore { namespace ops { -REGISTER_PRIMITIVE_C(kNameReduceSum, ReduceSum); +namespace { +int64_t InferImplReduceFuncCheckAxis(const int64_t &axis, const size_t dim) { + int64_t dim_ = static_cast(dim); + if (axis < -dim_ || axis >= dim_) { + MS_LOG(EXCEPTION) << "axis should be in [" << -dim_ << ", " << dim_ << "). But got axis = " << axis; + } + int64_t ret_axis = axis; + if (axis >= -dim_ && axis < 0) { + ret_axis += dim_; + } + return ret_axis; +} + +void InferImplReduceFuncCalShape(ShapeVector *shape, const ShapeVector &x_shape, const ValuePtr &axis, + bool keep_dims_value) { + if (axis->isa() || axis->isa()) { + auto axis_ptr_list = + axis->isa() ? axis->cast()->value() : axis->cast()->value(); + if (!axis_ptr_list.size()) { + if (keep_dims_value) (void)shape->insert(shape->end(), x_shape.size(), 1); + } else { + (void)shape->insert(shape->end(), x_shape.begin(), x_shape.end()); + ValuePtrList axis_items = axis_ptr_list; + ValuePtrList::iterator it; + if (keep_dims_value) { + for (it = axis_items.begin(); it != axis_items.end(); ++it) { + auto axis_value = GetValue(*it); + shape->at(axis_value) = 1; + } + } else { + std::vector axis_value_list; + for (it = axis_items.begin(); it != axis_items.end(); ++it) { + auto axis_value = GetValue(*it); + auto axis_positive_value = InferImplReduceFuncCheckAxis(axis_value, x_shape.size()); + axis_value_list.push_back(axis_positive_value); + } + std::sort(axis_value_list.begin(), axis_value_list.end()); + std::vector::reverse_iterator it_re; + for (it_re = axis_value_list.rbegin(); it_re != axis_value_list.rend(); ++it_re) { + (void)shape->erase(shape->begin() + *it_re); + } + } + } + } else if (axis->isa() || axis->isa()) { + (void)shape->insert(shape->end(), x_shape.begin(), x_shape.end()); + int64_t axis_value = GetValue(axis); + axis_value = InferImplReduceFuncCheckAxis(axis_value, x_shape.size()); + if (keep_dims_value) { + shape->at(axis_value) = 1; + } else { + (void)shape->erase(shape->begin() + axis_value); + } + } else { + MS_LOG(EXCEPTION) << "Axis should be one of types: [int/tuple/list]."; + } + return; +} + +abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector &input_args) { + MS_EXCEPTION_IF_NULL(primitive); + auto shape_ptr = CheckAndConvertUtils::GetTensorInputShape("ReduceSum", input_args, 0); + auto input_shape = shape_ptr->shape(); + auto input_min_shape = shape_ptr->min_shape(); + auto input_max_shape = shape_ptr->max_shape(); + auto keep_dimis_value_ptr = primitive->GetAttr(kKeepDims); + MS_EXCEPTION_IF_NULL(keep_dimis_value_ptr); + if (!keep_dimis_value_ptr->isa()) { + MS_LOG(EXCEPTION) << "Keep_dims should be Bool."; + } + bool keep_dims = GetValue(keep_dimis_value_ptr); + ShapeVector out_shape = {}; + ShapeVector out_min_shape = {}; + ShapeVector out_max_shape = {}; + int64_t max_v; + if (shape_ptr->IsDynamic()) { + max_v = *max_element(input_max_shape.begin(), input_max_shape.end()); + } else { + max_v = *max_element(input_shape.begin(), input_shape.end()); + } + const int64_t input_num_ascend = 2; + if (input_args.size() == input_num_ascend && input_args[1]->isa() && + input_args[1]->BuildValue()->isa()) { + auto axis_tensor = input_args[1]->cast(); + auto axis_shape = axis_tensor->shape()->shape(); + if (axis_shape.size() == 1 && axis_shape[0] == -1 && !keep_dims) { + out_shape.push_back(-2); + for (size_t i = 0; i < input_shape.size(); ++i) { + out_min_shape.push_back(1); + out_max_shape.push_back(max_v); + } + } else if (!keep_dims) { + for (size_t i = 0; i < input_shape.size() - axis_shape.size(); ++i) { + out_shape.push_back(-1); + out_min_shape.push_back(1); + out_max_shape.push_back(max_v); + } + } else { + for (size_t i = 0; i < input_shape.size(); ++i) { + out_shape.push_back(-1); + out_min_shape.push_back(1); + out_max_shape.push_back(max_v); + } + } + return std::make_shared(out_shape, out_min_shape, out_max_shape); + } else { + ValuePtr axis_value; + ValuePtr axis_ptr; + if (input_args.size() == input_num_ascend) { + axis_ptr = input_args[1]->BuildValue(); + } else { + axis_ptr = primitive->GetAttr("axis"); + } + MS_EXCEPTION_IF_NULL(axis_ptr); + if (axis_ptr->isa()) { + MS_LOG(ERROR) << "Tensor with value"; + auto axis_type = input_args[1]->BuildType(); + MS_EXCEPTION_IF_NULL(axis_type); + auto axis_type_id = axis_type->cast(); + MS_EXCEPTION_IF_NULL(axis_type_id); + auto axis_tensor = axis_ptr->cast(); + MS_EXCEPTION_IF_NULL(axis_tensor); + size_t data_size = LongToSize(axis_tensor->DataSize()); + std::vector value_list; + if (axis_type_id->element()->type_id() == kNumberTypeInt32) { + auto shape_data = reinterpret_cast(axis_tensor->data_c()); + MS_EXCEPTION_IF_NULL(shape_data); + for (size_t i = 0; i < data_size; i++) { + value_list.push_back(MakeValue(static_cast(*shape_data))); + ++shape_data; + } + } else { + auto shape_data2 = reinterpret_cast(axis_tensor->data_c()); + for (size_t i = 0; i < data_size; i++) { + value_list.push_back(MakeValue(static_cast(*shape_data2))); + ++shape_data2; + } + } + axis_value = std::make_shared(value_list); + } else { + axis_value = axis_ptr; + } + InferImplReduceFuncCalShape(&out_shape, input_shape, axis_value, keep_dims); + + if (!input_min_shape.empty() && !input_max_shape.empty()) { + ShapeVector shape_min = {}; + ShapeVector shape_max = {}; + InferImplReduceFuncCalShape(&shape_min, input_min_shape, axis_value, keep_dims); + InferImplReduceFuncCalShape(&shape_max, input_max_shape, axis_value, keep_dims); + return std::make_shared(out_shape, shape_min, shape_max); + } + return std::make_shared(out_shape); + } +} + +TypePtr InferType(const PrimitivePtr &prim, const std::vector &input_args) { + MS_EXCEPTION_IF_NULL(prim); + return CheckAndConvertUtils::CheckTensorTypeValid("x dtype", input_args[0]->BuildType(), common_valid_types, + "ReduceSum"); +} +} // namespace + +AbstractBasePtr ReduceSumInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive, + const std::vector &input_args) { + CheckAndConvertUtils::CheckInteger("input size", input_args.size(), kGreaterEqual, 1, primitive->name()); + return abstract::MakeAbstract(InferShape(primitive, input_args), InferType(primitive, input_args)); +} } // namespace ops } // namespace mindspore diff --git a/mindspore/core/ops/reduce_sum.h b/mindspore/core/ops/reduce_sum.h index ec9e4a499df..3c67e181b79 100644 --- a/mindspore/core/ops/reduce_sum.h +++ b/mindspore/core/ops/reduce_sum.h @@ -29,11 +29,13 @@ namespace ops { constexpr auto kNameReduceSum = "ReduceSum"; class ReduceSum : public Reduce { public: - ReduceSum() : Reduce(kNameReduceSum) { InitIOName({"input_x", "axis"}, {"y"}); } + ReduceSum() : Reduce(kNameReduceSum) { InitIOName({"x", "axis"}, {"y"}); } ~ReduceSum() = default; MS_DECLARE_PARENT(ReduceSum, Reduce); void Init() {} }; +AbstractBasePtr ReduceSumInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive, + const std::vector &input_args); } // namespace ops } // namespace mindspore diff --git a/mindspore/core/ops/round.cc b/mindspore/core/ops/round.cc index fb1d345a2e3..a8c4a59e9b9 100644 --- a/mindspore/core/ops/round.cc +++ b/mindspore/core/ops/round.cc @@ -28,6 +28,7 @@ abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector &input_args) { + MS_EXCEPTION_IF_NULL(input_args[0]); auto infer_type = input_args[0]->BuildType(); return CheckAndConvertUtils::CheckTensorTypeValid("x", infer_type, common_valid_types, prim->name()); } diff --git a/mindspore/core/ops/scatter_nd_update.h b/mindspore/core/ops/scatter_nd_update.h index 03b42cc86b7..5909f0ef48a 100644 --- a/mindspore/core/ops/scatter_nd_update.h +++ b/mindspore/core/ops/scatter_nd_update.h @@ -26,7 +26,7 @@ namespace mindspore { namespace ops { constexpr auto kNameScatterNdUpdate = "ScatterNdUpdate"; -class MS_CORE_API ScatterNdUpdate : public PrimitiveC { +class ScatterNdUpdate : public PrimitiveC { public: ScatterNdUpdate() : PrimitiveC(kNameScatterNdUpdate) { InitIOName({"input_x", "indices", "update"}, {"output"}); } ~ScatterNdUpdate() = default; diff --git a/mindspore/core/ops/soft_margin_loss.h b/mindspore/core/ops/soft_margin_loss.h index 53f63fa38be..e670d99dc51 100644 --- a/mindspore/core/ops/soft_margin_loss.h +++ b/mindspore/core/ops/soft_margin_loss.h @@ -28,7 +28,7 @@ namespace mindspore { namespace ops { constexpr auto kNameSoftMarginLoss = "SoftMarginLoss"; -class MS_CORE_API SoftMarginLoss : public PrimitiveC { +class SoftMarginLoss : public PrimitiveC { public: SoftMarginLoss() : PrimitiveC(kNameSoftMarginLoss) { InitIOName({"predict", "label"}, {"loss"}); } ~SoftMarginLoss() = default; diff --git a/mindspore/core/ops/space_to_batch_nd.cc b/mindspore/core/ops/space_to_batch_nd.cc index 98efcf0a5b6..91d27235405 100644 --- a/mindspore/core/ops/space_to_batch_nd.cc +++ b/mindspore/core/ops/space_to_batch_nd.cc @@ -89,7 +89,7 @@ std::vector SpaceToBatchND::get_block_shape() const { return GetValue>(GetAttr(kBlockShape)); } -void SpaceToBatchND::Init(std::vector block_shape, std::vector> paddings) { +void SpaceToBatchND::Init(const std::vector block_shape, const std::vector> paddings) { this->set_paddings(paddings); this->set_block_shape(block_shape); } diff --git a/mindspore/core/ops/space_to_batch_nd.h b/mindspore/core/ops/space_to_batch_nd.h index 8ca02e35fe4..dafd345d262 100644 --- a/mindspore/core/ops/space_to_batch_nd.h +++ b/mindspore/core/ops/space_to_batch_nd.h @@ -33,7 +33,7 @@ class SpaceToBatchND : public PrimitiveC { SpaceToBatchND() : PrimitiveC(kNameSpaceToBatchND) {} ~SpaceToBatchND() = default; MS_DECLARE_PARENT(SpaceToBatchND, PrimitiveC); - void Init(std::vector block_shape, const std::vector> paddings); + void Init(const std::vector block_shape, const std::vector> paddings); void set_paddings(const std::vector> paddings); void set_block_shape(std::vector block_shape); std::vector get_block_shape() const; diff --git a/mindspore/core/ops/squeeze.cc b/mindspore/core/ops/squeeze.cc index fd0139d8599..f144611cd7a 100644 --- a/mindspore/core/ops/squeeze.cc +++ b/mindspore/core/ops/squeeze.cc @@ -54,7 +54,7 @@ abstract::ShapePtr InferShape(const PrimitivePtr &primitive, const std::vector &input_args) { - if (std::any_of(input_args.begin(), input_args.end(), [](AbstractBasePtr a) { return a == nullptr; })) { + if (std::any_of(input_args.begin(), input_args.end(), [](const AbstractBasePtr arg) { return arg == nullptr; })) { MS_LOG(EXCEPTION) << "nullptr"; } return input_args[0]->BuildType(); diff --git a/mindspore/core/ops/stack.cc b/mindspore/core/ops/stack.cc index 9740a757ed7..fe47c844b49 100644 --- a/mindspore/core/ops/stack.cc +++ b/mindspore/core/ops/stack.cc @@ -28,6 +28,9 @@ abstract::AbstractBasePtr StackInfer(const PrimitivePtr &primitive, const std::v if (input_args.size() < 1) { MS_LOG(ERROR) << "Invalid input size " << input_args.size(); } + for (const auto &item : input_args) { + MS_EXCEPTION_IF_NULL(item); + } auto input_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape]; for (int64_t i = 1; i < SizeToLong(input_args.size()); ++i) { auto input_shape_tmp = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[i]->BuildShape())[kShape]; diff --git a/mindspore/core/ops/strided_slice.cc b/mindspore/core/ops/strided_slice.cc index bc7d459e41e..0292bd4d0a7 100644 --- a/mindspore/core/ops/strided_slice.cc +++ b/mindspore/core/ops/strided_slice.cc @@ -28,6 +28,20 @@ namespace mindspore { namespace ops { namespace { +std::vector TenToTwo(int64_t num) { + std::vector output; + if (num == 0) { + output.push_back(0); + return output; + } + while (num) { + output.push_back(num % 2); + num /= 2; + } + + return output; +} + void EllipsisInferShape(const PrimitivePtr &primitive, const std::vector &x_shape, const std::vector &begin_v, const std::vector &end_v, const std::vector &strides_v, std::vector *infer_shape, size_t i, size_t j, @@ -40,10 +54,11 @@ void EllipsisInferShape(const PrimitivePtr &primitive, const std::vector begin_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_begin_mask()); - std::vector end_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_end_mask()); - std::vector new_axis_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_new_axis_mask()); - std::vector shrink_axis_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_shrink_axis_mask()); + std::vector begin_pos = TenToTwo(GetValue(primitive->GetAttr(kBeginMask))); + std::vector end_pos = TenToTwo(GetValue(primitive->GetAttr(kEndMask))); + std::vector ellipsis_pos = TenToTwo(GetValue(primitive->GetAttr(kEllipsisMask))); + std::vector new_axis_pos = TenToTwo(GetValue(primitive->GetAttr(kNewAxisMask))); + std::vector shrink_axis_pos = TenToTwo(GetValue(primitive->GetAttr(kShrinkAxisMask))); (void)CheckAndConvertUtils::CheckInteger("infer", SizeToLong(new_axis_pos.size()), kGreaterEqual, SizeToLong(slice_len), primitive->name()); @@ -105,10 +120,12 @@ const std::vector CheckAndGetValidStrides(const AbstractBasePtr &stride std::vector ComputeInferShape(const PrimitivePtr &primitive, const std::vector &begin_v, const std::vector &end_v, const std::vector &x_shape, - const std::vector &strides_v, const std::vector &begin_pos, - const std::vector &shrink_axis_pos, const std::vector &end_pos, - const std::vector &new_axis_pos, - const std::vector &ellipsis_pos) { + const std::vector &strides_v) { + std::vector begin_pos = TenToTwo(GetValue(primitive->GetAttr(kBeginMask))); + std::vector end_pos = TenToTwo(GetValue(primitive->GetAttr(kEndMask))); + std::vector ellipsis_pos = TenToTwo(GetValue(primitive->GetAttr(kEllipsisMask))); + std::vector new_axis_pos = TenToTwo(GetValue(primitive->GetAttr(kNewAxisMask))); + std::vector shrink_axis_pos = TenToTwo(GetValue(primitive->GetAttr(kShrinkAxisMask))); size_t i = 0; size_t j = 0; int64_t start; @@ -171,8 +188,6 @@ std::vector ComputeInferShape(const PrimitivePtr &primitive, const std: abstract::ShapePtr StridedSliceInferShape(const PrimitivePtr &primitive, const std::vector &input_args) { MS_EXCEPTION_IF_NULL(primitive); - auto strided_slice_prim = primitive->cast(); - MS_EXCEPTION_IF_NULL(strided_slice_prim); auto tuple_begin_v = input_args[1]->cast(); MS_EXCEPTION_IF_NULL(tuple_begin_v); auto temp_begin_v = tuple_begin_v->BuildValue(); @@ -189,20 +204,12 @@ abstract::ShapePtr StridedSliceInferShape(const PrimitivePtr &primitive, auto x_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape]; auto min_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kMinShape]; auto max_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kMaxShape]; - std::vector begin_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_begin_mask()); - std::vector end_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_end_mask()); - std::vector ellipsis_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_ellipsis_mask()); - std::vector new_axis_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_new_axis_mask()); - std::vector shrink_axis_pos = strided_slice_prim->TenToTwo(strided_slice_prim->get_shrink_axis_mask()); - auto ret_in_shape = ComputeInferShape(primitive, begin_v, end_v, x_shape, strides_v, begin_pos, shrink_axis_pos, - end_pos, new_axis_pos, ellipsis_pos); + auto ret_in_shape = ComputeInferShape(primitive, begin_v, end_v, x_shape, strides_v); if (min_shape.empty() || max_shape.empty()) { return std::make_shared(ret_in_shape); } - auto ret_min_shape = ComputeInferShape(primitive, begin_v, end_v, min_shape, strides_v, begin_pos, shrink_axis_pos, - end_pos, new_axis_pos, ellipsis_pos); - auto ret_max_shape = ComputeInferShape(primitive, begin_v, end_v, max_shape, strides_v, begin_pos, shrink_axis_pos, - end_pos, new_axis_pos, ellipsis_pos); + auto ret_min_shape = ComputeInferShape(primitive, begin_v, end_v, min_shape, strides_v); + auto ret_max_shape = ComputeInferShape(primitive, begin_v, end_v, max_shape, strides_v); return std::make_shared(ret_in_shape, ret_min_shape, ret_max_shape); } @@ -267,20 +274,6 @@ void StridedSlice::Init(const int64_t begin_mask, const int64_t end_mask, const this->set_shrink_axis_mask(shrink_axis_mask); } -std::vector StridedSlice::TenToTwo(int64_t num) { - std::vector output; - if (num == 0) { - output.push_back(0); - return output; - } - while (num) { - output.push_back(num % 2); - num /= 2; - } - - return output; -} - int64_t StridedSlice::compute_slicing_length(int64_t start_pos, int64_t end_pos, int64_t strides, int64_t x_dim) const { int64_t slicing_length = 0; if (strides > 0) { diff --git a/mindspore/core/ops/topk.cc b/mindspore/core/ops/topk.cc index e861dc7d4e5..c1fa50e0c62 100644 --- a/mindspore/core/ops/topk.cc +++ b/mindspore/core/ops/topk.cc @@ -35,6 +35,9 @@ AbstractBasePtr TopKInfer(const abstract::AnalysisEnginePtr &, const PrimitivePt (void)CheckAndConvertUtils::CheckInteger("top_k_infer", SizeToLong(input_args.size()), kEqual, 2, prim_name); // Infer dtype + for (const auto &item : input_args) { + MS_EXCEPTION_IF_NULL(item); + } auto output1_type = kInt32; const std::set valid_types = {kFloat16, kFloat32}; auto output0_type = diff --git a/mindspore/core/ops/unpack.cc b/mindspore/core/ops/unpack.cc index 2a7a19a7667..faf02a802be 100644 --- a/mindspore/core/ops/unpack.cc +++ b/mindspore/core/ops/unpack.cc @@ -26,6 +26,7 @@ AbstractBasePtr UnpackInfer(const abstract::AnalysisEnginePtr &, const Primitive const std::vector &input_args) { MS_EXCEPTION_IF_NULL(primitive); auto prim_name = primitive->name(); + MS_EXCEPTION_IF_NULL(input_args[0]); CheckAndConvertUtils::CheckSubClass("x", input_args[0]->BuildType(), {TypeIdToType(kObjectTypeTensorType)}, prim_name); auto x_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape]; diff --git a/mindspore/core/ops/unsorted_segment_sum.cc b/mindspore/core/ops/unsorted_segment_sum.cc index 8a6f914fd54..8b84c30759b 100644 --- a/mindspore/core/ops/unsorted_segment_sum.cc +++ b/mindspore/core/ops/unsorted_segment_sum.cc @@ -31,6 +31,9 @@ AbstractBasePtr UnsortedSegmentSumInfer(const abstract::AnalysisEnginePtr &, con auto prim_name = primitive->name(); // Infer type + for (const auto &item : input_args) { + MS_EXCEPTION_IF_NULL(item); + } auto x_type = input_args[0]->BuildType()->cast()->element(); // Infer shape auto x_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape]; diff --git a/mindspore/core/ops/unstack.cc b/mindspore/core/ops/unstack.cc index 29f4a8eca2d..01159252aef 100644 --- a/mindspore/core/ops/unstack.cc +++ b/mindspore/core/ops/unstack.cc @@ -25,6 +25,7 @@ AbstractBasePtr UnstackInfer(const abstract::AnalysisEnginePtr &, const Primitiv const std::vector &input_args) { MS_EXCEPTION_IF_NULL(primitive); auto prim_name = primitive->name(); + MS_EXCEPTION_IF_NULL(input_args[0]); auto x_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape]; int64_t dim = x_shape.size(); int64_t axis = GetValue(primitive->GetAttr(kAxis)); diff --git a/mindspore/core/proto/mind_ir.proto b/mindspore/core/proto/mind_ir.proto index cd6182b9e15..8d9c9ecc434 100644 --- a/mindspore/core/proto/mind_ir.proto +++ b/mindspore/core/proto/mind_ir.proto @@ -23,6 +23,9 @@ message AttributeProto { TENSOR = 17; GRAPH = 18; TENSORS = 19; + TUPLE = 20; // tuple + LIST = 21; // list + DICT = 22; // dictionary } optional string name = 1; optional float f = 2; @@ -40,6 +43,8 @@ message AttributeProto { optional string doc_string = 14; optional string ref_attr_name = 15; optional AttributeType type = 16; + repeated AttributeProto values = 17; // tuple, list,dict of value + optional AttributeType type_val = 18; // type type info } @@ -70,6 +75,7 @@ message ModelProto { optional string model_version = 5; optional string doc_string = 6; optional GraphProto graph = 7; + repeated GraphProto functions = 8; // all the graphs without the main graph. } diff --git a/mindspore/core/utils/check_convert_utils.cc b/mindspore/core/utils/check_convert_utils.cc index e5553cf2ab1..6be7796aebc 100644 --- a/mindspore/core/utils/check_convert_utils.cc +++ b/mindspore/core/utils/check_convert_utils.cc @@ -175,6 +175,21 @@ void CheckAndConvertUtils::GetPadModEnumValue(const ValuePtr &value, int64_t *en } } +void CheckAndConvertUtils::GetReductionEnumValue(const ValuePtr &value, int64_t *enum_value) { + MS_EXCEPTION_IF_NULL(value); + if (value->isa()) { + auto attr_value_str = GetValue(value); + + std::map pad_map = ReductionToEnumMap; + if (pad_map.find(attr_value_str) == pad_map.end()) { + MS_LOG(EXCEPTION) << "Invalid pad mode " << attr_value_str << " use pad, valid or same"; + } + *enum_value = pad_map[attr_value_str]; + } else { + *enum_value = GetValue(value); + } +} + AttrConverterPair CheckAndConvertUtils::GetAttrConvertPair(const std::string &op_type, const std::string &attr_name) { AttrConverterPair attr_pair; if (op_type.empty() || attr_name.empty()) { diff --git a/mindspore/core/utils/check_convert_utils.h b/mindspore/core/utils/check_convert_utils.h index ac7aa08a8c0..6e8820c0402 100644 --- a/mindspore/core/utils/check_convert_utils.h +++ b/mindspore/core/utils/check_convert_utils.h @@ -297,6 +297,7 @@ class CheckAndConvertUtils { static AttrConverterPair GetAttrConvertPair(const std::string &op_type, const std::string &attr_name); static bool GetDataFormatEnumValue(const ValuePtr &value, int64_t *enum_value); static void GetPadModEnumValue(const ValuePtr &value, int64_t *enum_value, bool is_upper = false); + static void GetReductionEnumValue(const ValuePtr &value, int64_t *enum_value); static bool CheckIrAttrtoOpAttr(const std::string &op_type, const std::string &attr_name, ValuePtr *const value); static void CheckSummaryParam(const AbstractBasePtr &name, const AbstractBasePtr &value, const std::string &class_name); diff --git a/mindspore/core/utils/log_adapter.cc b/mindspore/core/utils/log_adapter.cc index 7358cadbbe5..1bd1c7888fb 100644 --- a/mindspore/core/utils/log_adapter.cc +++ b/mindspore/core/utils/log_adapter.cc @@ -437,7 +437,9 @@ void common_log_init(void) { if (logtostderr.empty()) { FLAGS_logtostderr = true; } else if (logtostderr == "0" && mindspore::GetEnv("GLOG_log_dir").empty()) { - MS_LOG(EXCEPTION) << "`GLOG_log_dir` is empty, it must be set while 'logtostderr' equals to 0."; + MS_LOG(ERROR) << "`GLOG_log_dir` is empty, it must be set while 'logtostderr' equals to 0."; + // Here can not throw exception and use python to catch, because the PYBIND11_MODULE is not yet been initialed. + exit(EXIT_FAILURE); } // default GLOG_stderrthreshold level to WARNING diff --git a/mindspore/core/utils/parallel_node_check.cc b/mindspore/core/utils/parallel_node_check.cc index 2259be72856..85a077918c9 100644 --- a/mindspore/core/utils/parallel_node_check.cc +++ b/mindspore/core/utils/parallel_node_check.cc @@ -30,7 +30,7 @@ static const std::set PARALLEL_BLACK_LIST_ = {prim::kTupleGetItem, "get_ref_value", "get_ref_origin", "dot", "im2col", "col2im", "im2col_v1", "state_setitem", "ScalarSummary", "ImageSummary", "TensorSummary", "Debug", "HistogramSummary", "col2im_v1", "resolve", "BroadcastGradientArgs", "InvertPermutation", "DropoutGenMask", "embed", "create_instance", "RefToEmbed", - "stop_gradient", "UpdateState", "Load", "Switch"}; + "stop_gradient", "UpdateState", "Load", "Switch", "Print"}; static const std::set ALLGATHER_NODE_LIST_ = {prim::kPrimAllGather, prim::kPrimMiniStepAllGather, prim::kPrimMicroStepAllGather}; static const std::set TRIVIAL_NODE_LIST_ = {prim::kPrimCast, prim::kPrimDepend}; diff --git a/mindspore/core/utils/trace_info.h b/mindspore/core/utils/trace_info.h index e9b29c7b478..22f7252d141 100644 --- a/mindspore/core/utils/trace_info.h +++ b/mindspore/core/utils/trace_info.h @@ -430,6 +430,14 @@ class TraceOpt : public TraceInfo { ~TraceOpt() override = default; TraceInfoPtr clone() override { return std::make_shared(*shared_from_base()); } }; + +class TraceListComp : public TraceInfo { + public: + explicit TraceListComp(const DebugInfoPtr &info) : TraceInfo(info, "ListComp", "G-") {} + MS_DECLARE_PARENT(TraceListComp, TraceInfo); + ~TraceListComp() override = default; + TraceInfoPtr clone() override { return std::make_shared(*shared_from_base()); } +}; } // namespace mindspore #endif // MINDSPORE_CORE_UTILS_TRACE_INFO_H_ diff --git a/mindspore/dataset/audio/transforms.py b/mindspore/dataset/audio/transforms.py index f6f97ac0e95..aff46d944f4 100644 --- a/mindspore/dataset/audio/transforms.py +++ b/mindspore/dataset/audio/transforms.py @@ -20,7 +20,9 @@ to improve their training models. import mindspore._c_dataengine as cde import numpy as np from ..transforms.c_transforms import TensorOperation -from .validators import check_band_biquad +from .utils import ScaleType +from .validators import check_allpass_biquad, check_amplitude_to_db, check_band_biquad, check_bandpass_biquad, \ + check_bandreject_biquad, check_bass_biquad, check_time_stretch class AudioTensorOperation(TensorOperation): @@ -40,6 +42,94 @@ class AudioTensorOperation(TensorOperation): "AudioTensorOperation has to implement parse() method.") +class AllpassBiquad(AudioTensorOperation): + """ + Design two-pole all-pass filter for audio waveform of dimension of `(..., time)` + + Args: + sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz), + the value must be greater than 0 . + central_freq (float): central frequency (in Hz), + the value must be greater than 0 . + Q(float, optional): Quality factor,https://en.wikipedia.org/wiki/Q_factor, + Range: (0, 1] (Default=0.707). + + Examples: + >>> import mindspore.dataset.audio.transforms as audio + >>> import numpy as np + + >>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03],[9.246826171875e-03, 1.0894775390625e-02]]) + >>> allpasspass_biquad_op = audio.AllpassBiquad(44100, 200.0) + >>> waveform_filtered = allpass_biquad_op(waveform) + + References: + https://www.w3.org/2011/audio/audio-eq-cookbook.html#APF + """ + @check_allpass_biquad + def __init__(self, sample_rate, central_freq, Q=0.707): + self.sample_rate = sample_rate + self.central_freq = central_freq + self.Q = Q + + def parse(self): + return cde.AllpassBiquadOperation(self.sample_rate, self.central_freq, self.Q) + + +DE_C_SCALETYPE_TYPE = {ScaleType.MAGNITUDE: cde.ScaleType.DE_SCALETYPE_MAGNITUDE, + ScaleType.POWER: cde.ScaleType.DE_SCALETYPE_POWER} + + +class AmplitudeToDB(AudioTensorOperation): + """ + Converts the input tensor from amplitude/power scale to decibel scale. + + Args: + stype (ScaleType, optional): Scale of the input tensor. (Default="ScaleType.POWER"). + It can be any of [ScaleType.MAGNITUDE, ScaleType.POWER]. + ref_value (float, optional): Param for generate db_multiplier. + amin (float, optional): Lower bound to clamp the input waveform. + top_db (float, optional): Minimum cut-off decibels. The range of values is non-negative. Commonly set at 80. + (Default=80.0) + Examples: + >>> channel = 1 + >>> n_fft = 400 + >>> n_frame = 30 + >>> specrogram = np.random.random([channel, n_fft//2+1, n_frame]) + >>> numpy_slices_dataset = ds.NumpySlicesDataset(data=specrogram, column_names=["audio"]) + >>> transforms = [audio.AmplitudeToDB(stype=ScaleType.POWER)] + >>> numpy_slices_dataset = numpy_slices_dataset.map(operations=transforms, input_columns=["audio"]) + """ + + @ check_amplitude_to_db + def __init__(self, stype=ScaleType.POWER, ref_value=1.0, amin=1e-10, top_db=80.0): + self.stype = stype + self.ref_value = ref_value + self.amin = amin + self.top_db = top_db + + def parse(self): + return cde.AmplitudeToDBOperation(DE_C_SCALETYPE_TYPE[self.stype], self.ref_value, self.amin, self.top_db) + + +class Angle(AudioTensorOperation): + """ + Calculate the angle of the complex number sequence of shape (..., 2). + The first dimension represents the real part while the second represents the imaginary. + Args: + + Examples: + >>> import mindspore.dataset.audio.transforms as audio + >>> import numpy as np + + >>> input_complex = np.array([[1.43, 5.434], [23.54, 89.38]]) + >>> angle_op = audio.Angle() + >>> angles = angle_op(input_complex) + """ + + def parse(self): + return cde.AngleOperation() + + class BandBiquad(AudioTensorOperation): """ Design two-pole band filter for audio waveform of dimension of `(..., time)` @@ -69,3 +159,126 @@ class BandBiquad(AudioTensorOperation): def parse(self): return cde.BandBiquadOperation(self.sample_rate, self.central_freq, self.Q, self.noise) + + +class BandpassBiquad(TensorOperation): + """ + Design two-pole band-pass filter. Similar to SoX implementation. + + Args: + sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz) + central_freq (float): central frequency (in Hz) + Q (float, optional): https://en.wikipedia.org/wiki/Q_factor Range: (0,1] (Default=0.707). + const_skirt_gain (bool, optional) : If ``True``, uses a constant skirt gain (peak gain = Q). + If ``False``, uses a constant 0dB peak gain. (Default: ``False``) + + Examples: + >>> import mindspore.dataset.audio.transforms as audio + >>> import numpy as np + + >>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03],[9.246826171875e-03, 1.0894775390625e-02]]) + >>> bandpass_biquad_op = audio.BandpassBiquad(44100, 200.0) + >>> waveform_filtered = bandpass_biquad_op(waveform) + """ + @check_bandpass_biquad + def __init__(self, sample_rate, central_freq, Q=0.707, const_skirt_gain=False): + self.sample_rate = sample_rate + self.central_freq = central_freq + self.Q = Q + self.const_skirt_gain = const_skirt_gain + + def parse(self): + return cde.BandpassBiquadOperation(self.sample_rate, self.central_freq, self.Q, self.const_skirt_gain) + + +class BandrejectBiquad(AudioTensorOperation): + """ + Design two-pole band filter for audio waveform of dimension of `(..., time)` + + Args: + sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz), + the value must be greater than 0 . + central_freq (float): central frequency (in Hz), + the value must be greater than 0 . + Q(float, optional): Quality factor,https://en.wikipedia.org/wiki/Q_factor, + Range: (0, 1] (Default=0.707). + + Examples: + >>> import mindspore.dataset.audio.transforms as audio + >>> import numpy as np + + >>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03],[9.246826171875e-03, 1.0894775390625e-02]]) + >>> band_biquad_op = audio.BandBiquad(44100, 200.0) + >>> waveform_filtered = band_biquad_op(waveform) + """ + + @check_bandreject_biquad + def __init__(self, sample_rate, central_freq, Q=0.707): + self.sample_rate = sample_rate + self.central_freq = central_freq + self.Q = Q + + def parse(self): + return cde.BandrejectBiquadOperation(self.sample_rate, self.central_freq, self.Q) + + +class BassBiquad(AudioTensorOperation): + """ + Design a bass tone-control effect for audio waveform of dimension of `(..., time)` + + Args: + sample_rate (int): sampling rate of the waveform, e.g. 44100 (Hz) + gain (float): desired gain at the boost (or attenuation) in dB. + central_freq (float): central frequency (in Hz)(Default=100.0). + Q(float, optional): Quality factor, https://en.wikipedia.org/wiki/Q_factor, Range: (0, 1] (Default=0.707). + + Examples: + >>> import mindspore.dataset.audio.transforms as audio + >>> import numpy as np + + >>> waveform = np.array([[2.716064453125e-03, 6.34765625e-03],[9.246826171875e-03, 1.0894775390625e-02]]) + >>> bass_biquad_op = audio.BassBiquad(44100, 100.0) + >>> waveform_filtered = bass_biquad_op(waveform) + """ + @check_bass_biquad + def __init__(self, sample_rate, gain, central_freq=100.0, Q=0.707): + self.sample_rate = sample_rate + self.gain = gain + self.central_freq = central_freq + self.Q = Q + + def parse(self): + return cde.BassBiquadOperation(self.sample_rate, self.gain, self.central_freq, self.Q) + + +class TimeStretch(AudioTensorOperation): + """ + Stretch STFT in time at a given rate, without changing the pitch. + + Args: + hop_length (int, optional): Length of hop between STFT windows (default=None). + n_freq (int, optional): Number of filter banks form STFT (default=201). + fixed_rate (float, optional): Rate to speed up or slow down the input in time (default=None). + + Examples: + >>> freq = 44100 + >>> num_frame = 30 + >>> def gen(): + ... np.random.seed(0) + ... data = np.random.random([freq, num_frame]) + ... yield (np.array(data, dtype=np.float32), ) + >>> data1 = ds.GeneratorDataset(source=gen, column_names=["multi_dimensional_data"]) + >>> transforms = [py_audio.TimeStretch()] + >>> data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"]) + """ + @check_time_stretch + def __init__(self, hop_length=None, n_freq=201, fixed_rate=None): + self.n_freq = n_freq + self.fixed_rate = fixed_rate + + n_fft = (n_freq - 1) * 2 + self.hop_length = hop_length if hop_length is not None else n_fft // 2 + self.fixed_rate = fixed_rate if fixed_rate is not None else np.nan + + def parse(self): + return cde.TimeStretchOperation(self.hop_length, self.n_freq, self.fixed_rate) diff --git a/mindspore/dataset/audio/utils.py b/mindspore/dataset/audio/utils.py index 3b1f42579eb..1bf00f2da0d 100644 --- a/mindspore/dataset/audio/utils.py +++ b/mindspore/dataset/audio/utils.py @@ -11,11 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================== """ enum for audio ops """ - from enum import Enum diff --git a/mindspore/dataset/audio/validators.py b/mindspore/dataset/audio/validators.py index da3d4b045f6..ad10b842b68 100644 --- a/mindspore/dataset/audio/validators.py +++ b/mindspore/dataset/audio/validators.py @@ -16,8 +16,41 @@ Validators for TensorOps. """ from functools import wraps +from mindspore.dataset.core.validator_helpers import check_not_zero, check_int32, check_float32, check_value, \ + check_value_normalize_std, check_value_ratio, FLOAT_MAX_INTEGER, INT64_MAX, parse_user_args, type_check +from .utils import ScaleType -from mindspore.dataset.core.validator_helpers import check_not_zero, check_int32, check_float32, check_value_normalize_std, parse_user_args, type_check + +def check_amplitude_to_db(method): + """Wrapper method to check the parameters of amplitude_to_db.""" + + @wraps(method) + def new_method(self, *args, **kwargs): + [stype, ref_value, amin, top_db], _ = parse_user_args(method, *args, **kwargs) + + # type check stype + type_check(stype, (ScaleType,), "stype") + + # type check ref_value + type_check(ref_value, (int, float), "ref_value") + # value check ref_value + if not ref_value is None: + check_value_ratio(ref_value, (0, FLOAT_MAX_INTEGER), "ref_value") + + # type check amin + type_check(amin, (int, float), "amin") + # value check amin + if not amin is None: + check_value_ratio(amin, (0, FLOAT_MAX_INTEGER), "amin") + + # type check top_db + type_check(top_db, (int, float), "top_db") + # value check top_db + if not top_db is None: + check_value_ratio(top_db, (0, FLOAT_MAX_INTEGER), "top_db") + + return method(self, *args, **kwargs) + return new_method def check_biquad_sample_rate(sample_rate): @@ -44,6 +77,17 @@ def check_biquad_noise(noise): type_check(noise, (bool,), "noise") +def check_biquad_const_skirt_gain(const_skirt_gain): + """Wrapper method to check the parameters of const_skirt_gain.""" + type_check(const_skirt_gain, (bool,), "const_skirt_gain") + + +def check_biquad_gain(gain): + """Wrapper method to check the parameters of gain.""" + type_check(gain, (float, int), "gain") + check_float32(gain, "gain") + + def check_band_biquad(method): """Wrapper method to check the parameters of BandBiquad.""" @@ -58,3 +102,87 @@ def check_band_biquad(method): return method(self, *args, **kwargs) return new_method + + +def check_allpass_biquad(method): + """Wrapper method to check the parameters of CutMixBatch.""" + + @wraps(method) + def new_method(self, *args, **kwargs): + [sample_rate, central_freq, Q], _ = parse_user_args( + method, *args, **kwargs) + check_biquad_sample_rate(sample_rate) + check_biquad_central_freq(central_freq) + check_biquad_Q(Q) + return method(self, *args, **kwargs) + + return new_method + + +def check_bandpass_biquad(method): + """Wrapper method to check the parameters of BandpassBiquad.""" + + @ wraps(method) + def new_method(self, *args, **kwargs): + [sample_rate, central_freq, Q, const_skirt_gain], _ = parse_user_args( + method, *args, **kwargs) + check_biquad_sample_rate(sample_rate) + check_biquad_central_freq(central_freq) + check_biquad_Q(Q) + check_biquad_const_skirt_gain(const_skirt_gain) + return method(self, *args, **kwargs) + + return new_method + + +def check_bandreject_biquad(method): + """Wrapper method to check the parameters of BandrejectBiquad.""" + + @wraps(method) + def new_method(self, *args, **kwargs): + [sample_rate, central_freq, Q], _ = parse_user_args( + method, *args, **kwargs) + check_biquad_sample_rate(sample_rate) + check_biquad_central_freq(central_freq) + check_biquad_Q(Q) + return method(self, *args, **kwargs) + + return new_method + + +def check_bass_biquad(method): + """Wrapper method to check the parameters of CutMixBatch.""" + + @wraps(method) + def new_method(self, *args, **kwargs): + [sample_rate, gain, central_freq, Q], _ = parse_user_args( + method, *args, **kwargs) + check_biquad_sample_rate(sample_rate) + check_biquad_gain(gain) + check_biquad_central_freq(central_freq) + check_biquad_Q(Q) + return method(self, *args, **kwargs) + + return new_method + + +def check_time_stretch(method): + """Wrapper method to check the parameters of time_stretch.""" + @wraps(method) + def new_method(self, *args, **kwargs): + [hop_length, n_freq, fixed_rate], _ = parse_user_args(method, *args, **kwargs) + # type check + type_check(hop_length, (int, type(None)), "hop_length") + type_check(n_freq, (int,), "n_freq") + type_check(fixed_rate, (int, float, type(None)), "fixed_rate") + + # value check + if hop_length is not None: + check_value(hop_length, (1, INT64_MAX), "hop_length") + check_value(n_freq, (1, INT64_MAX), "n_freq") + if fixed_rate is not None: + check_value_ratio(fixed_rate, (0, FLOAT_MAX_INTEGER), "fixed_rate") + + return method(self, *args, **kwargs) + + return new_method diff --git a/mindspore/dataset/core/validator_helpers.py b/mindspore/dataset/core/validator_helpers.py index 55591b9d9d2..7ca1696e857 100644 --- a/mindspore/dataset/core/validator_helpers.py +++ b/mindspore/dataset/core/validator_helpers.py @@ -210,6 +210,11 @@ def check_2tuple(value, arg_name=""): raise ValueError("Value {0} needs to be a 2-tuple.".format(arg_name)) +def check_int32(value, arg_name=""): + type_check(value, (int,), arg_name) + check_value(value, [INT32_MIN, INT32_MAX], arg_name) + + def check_uint8(value, arg_name=""): """ Validates the value of a variable is within the range of uint8. @@ -246,11 +251,6 @@ def check_pos_uint32(value, arg_name=""): check_value(value, [POS_INT_MIN, UINT32_MAX]) -def check_int32(value, arg_name=""): - type_check(value, (int,), arg_name) - check_value(value, [INT32_MIN, INT32_MAX], arg_name) - - def check_pos_int32(value, arg_name=""): """ Validates the value of a variable is within the range of int32. @@ -482,8 +482,6 @@ def check_filename(path): if filename.startswith(' ') or filename.endswith(' '): raise ValueError("filename should not start/end with space.") - return True - def check_dir(dataset_dir): """ @@ -682,3 +680,4 @@ def check_c_tensor_op(param, param_name): def replace_none(value, default): """ replaces None with a default value.""" return value if value is not None else default + \ No newline at end of file diff --git a/mindspore/dataset/engine/__init__.py b/mindspore/dataset/engine/__init__.py index 51103dcd204..c445542f630 100644 --- a/mindspore/dataset/engine/__init__.py +++ b/mindspore/dataset/engine/__init__.py @@ -33,7 +33,7 @@ from .serializer_deserializer import compare, deserialize, serialize, show __all__ = ["CelebADataset", "Cifar100Dataset", "Cifar10Dataset", "CLUEDataset", "CocoDataset", "CSVDataset", "GeneratorDataset", "GraphData", "ImageFolderDataset", "ManifestDataset", "MindDataset", "MnistDataset", - "LibriSpeechDataset", + "CmuArcticDataset", "NumpySlicesDataset", "PaddedDataset", "TextFileDataset", "TFRecordDataset", "VOCDataset", "DistributedSampler", "PKSampler", "RandomSampler", "SequentialSampler", "SubsetRandomSampler", "WeightedRandomSampler", "SubsetSampler", diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py index 7168200e0ca..eab49e87cbd 100644 --- a/mindspore/dataset/engine/datasets.py +++ b/mindspore/dataset/engine/datasets.py @@ -58,7 +58,7 @@ from .queue import _SharedQueue from .validators import check_batch, check_shuffle, check_map, check_filter, check_repeat, check_skip, check_zip, \ check_rename, check_numpyslicesdataset, check_device_send, check_take, check_project, check_imagefolderdataset, \ check_mnist_cifar_dataset, check_manifestdataset, check_tfrecorddataset, check_vocdataset, check_cocodataset, \ - check_celebadataset, check_minddataset,check_libri_speech_dataset, check_generatordataset, check_sync_wait, check_zip_dataset, \ + check_celebadataset, check_minddataset,check_cmu_arctic_dataset, check_generatordataset, check_sync_wait, check_zip_dataset, \ check_add_column, check_textfiledataset, check_concat, check_random_dataset, check_split, \ check_bucket_batch_by_length, check_cluedataset, check_save, check_csvdataset, check_paddeddataset, \ check_tuple_iterator, check_dict_iterator, check_schema, check_to_device_send @@ -4369,19 +4369,19 @@ class Cifar10Dataset(MappableDataset): return cde.Cifar10Node(self.dataset_dir, self.usage, self.sampler) -class LibriSpeechDataset(MappableDataset): +class CmuArcticDataset(MappableDataset): - @check_libri_speech_dataset + @check_cmu_arctic_dataset def __init__(self, dataset_dir, usage=None, num_samples=None, num_parallel_workers=None, shuffle=None, sampler=None, num_shards=None, shard_id=None, cache=None): super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples, shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache) self.dataset_dir = dataset_dir - self.usage = replace_none(usage, "test-other") + self.usage = replace_none(usage, "aew") def parse(self, children=None): - return cde.LibriSpeechNode(self.dataset_dir, self.usage, self.sampler) + return cde.CmuArcticNode(self.dataset_dir, self.usage, self.sampler) class Cifar100Dataset(MappableDataset): """ diff --git a/mindspore/dataset/engine/serializer_deserializer.py b/mindspore/dataset/engine/serializer_deserializer.py index deacd6e2408..0ec39085a28 100644 --- a/mindspore/dataset/engine/serializer_deserializer.py +++ b/mindspore/dataset/engine/serializer_deserializer.py @@ -17,12 +17,9 @@ Functions to support dataset serialize and deserialize. """ import json import os -import sys -import mindspore.common.dtype as mstype from mindspore import log as logger from . import datasets as de -from ..vision.utils import Inter, Border, ImageBatchFormat def serialize(dataset, json_filepath=""): @@ -87,15 +84,10 @@ def deserialize(input_dict=None, json_filepath=None): """ data = None if input_dict: - data = construct_pipeline(input_dict) + data = de.DeserializedDataset(input_dict) if json_filepath: - dict_pipeline = dict() - real_file_path = os.path.realpath(json_filepath) - with open(real_file_path, 'r') as json_file: - dict_pipeline = json.load(json_file) - data = construct_pipeline(dict_pipeline) - + data = de.DeserializedDataset(json_filepath) return data @@ -146,341 +138,3 @@ def compare(pipeline1, pipeline2): """ return pipeline1.to_json() == pipeline2.to_json() - - -def construct_pipeline(node): - """Construct the Python Dataset objects by following the dictionary deserialized from JSON file.""" - op_type = node.get('op_type') - if not op_type: - raise ValueError("op_type field in the json file can't be None.") - - # Instantiate Python Dataset object based on the current dictionary element - dataset = create_node(node) - # Initially it is not connected to any other object. - dataset.children = [] - - # Construct the children too and add edge between the children and parent. - for child in node['children']: - dataset.children.append(construct_pipeline(child)) - - return dataset - - -def create_node(node): - """Parse the key, value in the node dictionary and instantiate the Python Dataset object""" - logger.info('creating node: %s', node['op_type']) - dataset_op = node['op_type'] - op_module = "mindspore.dataset" - - # Get the Python class to be instantiated. - # Example: - # "op_type": "MapDataset", - # "op_module": "mindspore.dataset.datasets", - if node.get("children"): - pyclass = getattr(sys.modules[op_module], "Dataset") - else: - pyclass = getattr(sys.modules[op_module], dataset_op) - - pyobj = None - # Find a matching Dataset class and call the constructor with the corresponding args. - # When a new Dataset class is introduced, another if clause and parsing code needs to be added. - # Dataset Source Ops (in alphabetical order) - pyobj = create_dataset_node(pyclass, node, dataset_op) - if not pyobj: - # Dataset Ops (in alphabetical order) - pyobj = create_dataset_operation_node(node, dataset_op) - - return pyobj - - -def create_dataset_node(pyclass, node, dataset_op): - """Parse the key, value in the dataset node dictionary and instantiate the Python Dataset object""" - pyobj = None - if dataset_op == 'CelebADataset': - sampler = construct_sampler(node.get('sampler')) - num_samples = check_and_replace_input(node.get('num_samples'), 0, None) - pyobj = pyclass(node['dataset_dir'], node.get('num_parallel_workers'), node.get('shuffle'), node.get('usage'), - sampler, node.get('decode'), node.get('extensions'), num_samples, node.get('num_shards'), - node.get('shard_id')) - - elif dataset_op == 'Cifar10Dataset': - sampler = construct_sampler(node.get('sampler')) - num_samples = check_and_replace_input(node.get('num_samples'), 0, None) - pyobj = pyclass(node['dataset_dir'], node['usage'], num_samples, node.get('num_parallel_workers'), - node.get('shuffle'), sampler, node.get('num_shards'), node.get('shard_id')) - - elif dataset_op == 'Cifar100Dataset': - sampler = construct_sampler(node.get('sampler')) - num_samples = check_and_replace_input(node.get('num_samples'), 0, None) - pyobj = pyclass(node['dataset_dir'], node['usage'], num_samples, node.get('num_parallel_workers'), - node.get('shuffle'), sampler, node.get('num_shards'), node.get('shard_id')) - - elif dataset_op == 'ClueDataset': - shuffle = to_shuffle_mode(node.get('shuffle')) - if isinstance(shuffle, str): - shuffle = de.Shuffle(shuffle) - num_samples = check_and_replace_input(node.get('num_samples'), 0, None) - pyobj = pyclass(node['dataset_files'], node.get('task'), - node.get('usage'), num_samples, node.get('num_parallel_workers'), shuffle, - node.get('num_shards'), node.get('shard_id')) - - elif dataset_op == 'CocoDataset': - sampler = construct_sampler(node.get('sampler')) - num_samples = check_and_replace_input(node.get('num_samples'), 0, None) - pyobj = pyclass(node['dataset_dir'], node.get('annotation_file'), node.get('task'), num_samples, - node.get('num_parallel_workers'), node.get('shuffle'), node.get('decode'), sampler, - node.get('num_shards'), node.get('shard_id')) - - elif dataset_op == 'CSVDataset': - shuffle = to_shuffle_mode(node.get('shuffle')) - if isinstance(shuffle, str): - shuffle = de.Shuffle(shuffle) - num_samples = check_and_replace_input(node.get('num_samples'), 0, None) - pyobj = pyclass(node['dataset_files'], node.get('field_delim'), - node.get('column_defaults'), node.get('column_names'), num_samples, - node.get('num_parallel_workers'), shuffle, - node.get('num_shards'), node.get('shard_id')) - - elif dataset_op == 'ImageFolderDataset': - sampler = construct_sampler(node.get('sampler')) - num_samples = check_and_replace_input(node.get('num_samples'), 0, None) - pyobj = pyclass(node['dataset_dir'], num_samples, node.get('num_parallel_workers'), - node.get('shuffle'), sampler, node.get('extensions'), - node.get('class_indexing'), node.get('decode'), node.get('num_shards'), - node.get('shard_id')) - - elif dataset_op == 'ManifestDataset': - sampler = construct_sampler(node.get('sampler')) - num_samples = check_and_replace_input(node.get('num_samples'), 0, None) - pyobj = pyclass(node['dataset_file'], node['usage'], num_samples, - node.get('num_parallel_workers'), node.get('shuffle'), sampler, - node.get('class_indexing'), node.get('decode'), node.get('num_shards'), - node.get('shard_id')) - - elif dataset_op == 'MnistDataset': - sampler = construct_sampler(node.get('sampler')) - num_samples = check_and_replace_input(node.get('num_samples'), 0, None) - pyobj = pyclass(node['dataset_dir'], node['usage'], num_samples, node.get('num_parallel_workers'), - node.get('shuffle'), sampler, node.get('num_shards'), node.get('shard_id')) - - elif dataset_op == 'TextFileDataset': - shuffle = to_shuffle_mode(node.get('shuffle')) - if isinstance(shuffle, str): - shuffle = de.Shuffle(shuffle) - num_samples = check_and_replace_input(node.get('num_samples'), 0, None) - pyobj = pyclass(node['dataset_files'], num_samples, - node.get('num_parallel_workers'), shuffle, - node.get('num_shards'), node.get('shard_id')) - - elif dataset_op == 'TFRecordDataset': - shuffle = to_shuffle_mode(node.get('shuffle')) - if isinstance(shuffle, str): - shuffle = de.Shuffle(shuffle) - num_samples = check_and_replace_input(node.get('num_samples'), 0, None) - pyobj = pyclass(node['dataset_files'], node.get('schema'), node.get('columns_list'), - num_samples, node.get('num_parallel_workers'), - shuffle, node.get('num_shards'), node.get('shard_id')) - - elif dataset_op == 'VOCDataset': - sampler = construct_sampler(node.get('sampler')) - num_samples = check_and_replace_input(node.get('num_samples'), 0, None) - pyobj = pyclass(node['dataset_dir'], node.get('task'), node.get('usage'), node.get('class_indexing'), - num_samples, node.get('num_parallel_workers'), node.get('shuffle'), - node.get('decode'), sampler, node.get('num_shards'), node.get('shard_id')) - - return pyobj - - -def create_dataset_operation_node(node, dataset_op): - """Parse the key, value in the dataset operation node dictionary and instantiate the Python Dataset object""" - pyobj = None - if dataset_op == 'Batch': - pyobj = de.Dataset().batch(node['batch_size'], node.get('drop_remainder')) - - elif dataset_op == 'Map': - tensor_ops = construct_tensor_ops(node.get('operations')) - pyobj = de.Dataset().map(tensor_ops, node.get('input_columns'), node.get('output_columns'), - node.get('column_order'), node.get('num_parallel_workers'), - False, None, node.get('callbacks')) - - elif dataset_op == 'Project': - pyobj = de.Dataset().project(node['columns']) - - elif dataset_op == 'Rename': - pyobj = de.Dataset().rename(node['input_columns'], node['output_columns']) - - elif dataset_op == 'Repeat': - pyobj = de.Dataset().repeat(node.get('count')) - - elif dataset_op == 'Shuffle': - pyobj = de.Dataset().shuffle(node.get('buffer_size')) - - elif dataset_op == 'Skip': - pyobj = de.Dataset().skip(node.get('count')) - - elif dataset_op == 'Take': - pyobj = de.Dataset().take(node.get('count')) - - elif dataset_op == 'Transfer': - pyobj = de.Dataset().to_device(node.get('send_epoch_end'), node.get('create_data_info_queue')) - - elif dataset_op == 'Zip': - # Create ZipDataset instance, giving dummy input dataset that will be overrode in the caller. - pyobj = de.ZipDataset((de.Dataset(), de.Dataset())) - - else: - raise RuntimeError(dataset_op + " is not yet supported by ds.engine.deserialize().") - - return pyobj - - -def construct_sampler(in_sampler): - """Instantiate Sampler object based on the information from dictionary['sampler']""" - sampler = None - if in_sampler is not None: - if "num_samples" in in_sampler: - num_samples = check_and_replace_input(in_sampler['num_samples'], 0, None) - sampler_name = in_sampler['sampler_name'] - sampler_module = "mindspore.dataset" - sampler_class = getattr(sys.modules[sampler_module], sampler_name) - if sampler_name == 'DistributedSampler': - sampler = sampler_class(in_sampler['num_shards'], in_sampler['shard_id'], in_sampler.get('shuffle')) - elif sampler_name == 'PKSampler': - sampler = sampler_class(in_sampler['num_val'], in_sampler.get('num_class'), in_sampler('shuffle')) - elif sampler_name == 'RandomSampler': - sampler = sampler_class(in_sampler.get('replacement'), num_samples) - elif sampler_name == 'SequentialSampler': - sampler = sampler_class(in_sampler.get('start_index'), num_samples) - elif sampler_name == 'SubsetRandomSampler': - sampler = sampler_class(in_sampler['indices'], num_samples) - elif sampler_name == 'WeightedRandomSampler': - sampler = sampler_class(in_sampler['weights'], num_samples, in_sampler.get('replacement')) - else: - raise ValueError("Sampler type is unknown: {}.".format(sampler_name)) - if in_sampler.get("child_sampler"): - for child in in_sampler["child_sampler"]: - sampler.add_child(construct_sampler(child)) - - return sampler - - -def construct_tensor_ops(operations): - """Instantiate tensor op object(s) based on the information from dictionary['operations']""" - result = [] - for op in operations: - op_name = op.get('tensor_op_name') - op_params = op.get('tensor_op_params') - - if op.get('is_python_front_end_op'): # check if it's a py_transform op - raise NotImplementedError("python function is not yet supported by de.deserialize().") - - if op_name == "HwcToChw": - op_name = "HWC2CHW" - if op_name == "UniformAug": - op_name = "UniformAugment" - op_module_vis = sys.modules["mindspore.dataset.vision.c_transforms"] - op_module_trans = sys.modules["mindspore.dataset.transforms.c_transforms"] - - if hasattr(op_module_vis, op_name): - op_class = getattr(op_module_vis, op_name, None) - elif hasattr(op_module_trans, op_name): - op_class = getattr(op_module_trans, op_name, None) - else: - raise RuntimeError(op_name + " is not yet supported by deserialize().") - - if op_params is None: # If no parameter is specified, call it directly - result.append(op_class()) - else: - # Input parameter type cast - for key, val in op_params.items(): - if key in ['center', 'fill_value']: - op_params[key] = tuple(val) - elif key in ['interpolation', 'resample']: - op_params[key] = Inter(to_interpolation_mode(val)) - elif key in ['padding_mode']: - op_params[key] = Border(to_border_mode(val)) - elif key in ['data_type']: - op_params[key] = to_mstype(val) - elif key in ['image_batch_format']: - op_params[key] = to_image_batch_format(val) - elif key in ['policy']: - op_params[key] = to_policy(val) - elif key in ['transform', 'transforms']: - op_params[key] = construct_tensor_ops(val) - - result.append(op_class(**op_params)) - return result - - -def to_policy(op_list): - """ op_list to policy """ - policy_tensor_ops = [] - for policy_list in op_list: - sub_policy_tensor_ops = [] - for policy_item in policy_list: - sub_policy_tensor_ops.append( - (construct_tensor_ops(policy_item.get('tensor_op')), policy_item.get('prob'))) - policy_tensor_ops.append(sub_policy_tensor_ops) - return policy_tensor_ops - - -def to_shuffle_mode(shuffle): - """ int to shuffle mode """ - ret_val = False - if shuffle == 2: - ret_val = "global" - elif shuffle == 1: - ret_val = "files" - return ret_val - - -def to_interpolation_mode(inter): - """ int to interpolation mode """ - return { - 0: Inter.LINEAR, - 1: Inter.NEAREST, - 2: Inter.CUBIC, - 3: Inter.AREA - }[inter] - - -def to_border_mode(border): - """ int to border mode """ - return { - 0: Border.CONSTANT, - 1: Border.EDGE, - 2: Border.REFLECT, - 3: Border.SYMMETRIC - }[border] - - -def to_mstype(data_type): - """ str to mstype """ - return { - "bool": mstype.bool_, - "int8": mstype.int8, - "int16": mstype.int16, - "int32": mstype.int32, - "int64": mstype.int64, - "uint8": mstype.uint8, - "uint16": mstype.uint16, - "uint32": mstype.uint32, - "uint64": mstype.uint64, - "float16": mstype.float16, - "float32": mstype.float32, - "float64": mstype.float64, - "string": mstype.string - }[data_type] - - -def to_image_batch_format(image_batch_format): - """ int to image batch format """ - return { - 0: ImageBatchFormat.NHWC, - 1: ImageBatchFormat.NCHW - }[image_batch_format] - - -def check_and_replace_input(input_value, expect, replace): - """ check and replace input arg """ - return replace if input_value == expect else input_value diff --git a/mindspore/dataset/engine/validators.py b/mindspore/dataset/engine/validators.py index 083bfa66f01..2db4b32ef16 100644 --- a/mindspore/dataset/engine/validators.py +++ b/mindspore/dataset/engine/validators.py @@ -92,8 +92,8 @@ def check_mnist_cifar_dataset(method): return new_method -def check_libri_speech_dataset(method): - """A wrapper that wraps a parameter checker around the original LirbiSpeechDataset.""" +def check_cmu_arctic_dataset(method): + """A wrapper that wraps a parameter checker around the original CmuArcticDataset.""" @wraps(method) def new_method(self, *args, **kwargs): @@ -107,7 +107,7 @@ def check_libri_speech_dataset(method): usage = param_dict.get('usage') if usage is not None: - check_valid_str(usage, ['dev-clean', 'dev-other', 'test-clean','test-other', 'train-clean-100', 'train-clean-360','train-other-500'], "usage") + check_valid_str(usage, ['aew', 'ahw', 'aup', 'awb', 'axb', 'bdl', 'clb', 'eey', 'fem', 'gka', 'jmk', 'ksp', 'ljm', 'lnh', 'rms', 'rxr', 'slp' , 'slt'], "usage") validate_dataset_param_value(nreq_param_int, param_dict, int) validate_dataset_param_value(nreq_param_bool, param_dict, bool) diff --git a/mindspore/dataset/vision/c_transforms.py b/mindspore/dataset/vision/c_transforms.py index fd6e1a0c2a5..1f7fb720e82 100644 --- a/mindspore/dataset/vision/c_transforms.py +++ b/mindspore/dataset/vision/c_transforms.py @@ -54,7 +54,7 @@ from .validators import check_prob, check_crop, check_center_crop, check_resize_ check_uniform_augment_cpp, \ check_bounding_box_augment_cpp, check_random_select_subpolicy_op, check_auto_contrast, check_random_affine, \ check_random_solarize, check_soft_dvpp_decode_random_crop_resize_jpeg, check_positive_degrees, FLOAT_MAX_INTEGER, \ - check_cut_mix_batch_c, check_posterize, check_gaussian_blur, check_rotate, check_slice_patches + check_cut_mix_batch_c, check_posterize, check_gaussian_blur, check_rotate, check_slice_patches, check_adjust_gamma from ..transforms.c_transforms import TensorOperation @@ -107,6 +107,37 @@ def parse_padding(padding): return padding +class AdjustGamma(ImageTensorOperation): + r""" + Apply gamma correction on input image. Input image is expected to be in [..., H, W, C] or [H, W, C] format. + .. math:: + I_{\text{out}} = 255 \times \text{gain} \times \left(\frac{I_{\text{in}}}{255}\right)^{\gamma} + + See `Gamma Correction`_ for more details. + + .. _Gamma Correction: https://en.wikipedia.org/wiki/Gamma_correction + + Args: + gamma (float): Non negative real number. + The output image pixel value is exponentially related to the input image pixel value. + gamma larger than 1 make the shadows darker, + while gamma smaller than 1 make dark regions lighter. + gain (float, optional): The constant multiplier (default=1). + + Examples: + >>> transforms_list = [c_vision.Decode(), c_vision.AdjustGamma(gamma=10.0, gain=1.0)] + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns=["image"]) + """ + @check_adjust_gamma + def __init__(self, gamma, gain=1): + self.gamma = gamma + self.gain = gain + + def parse(self): + return cde.AdjustGammaOperation(self.gamma, self.gain) + + class AutoContrast(ImageTensorOperation): """ Apply automatic contrast on input image. This operator calculates histogram of image, reassign cutoff percent diff --git a/mindspore/dataset/vision/py_transforms.py b/mindspore/dataset/vision/py_transforms.py index af0ae88bc8e..989d53c7a39 100644 --- a/mindspore/dataset/vision/py_transforms.py +++ b/mindspore/dataset/vision/py_transforms.py @@ -31,7 +31,8 @@ from .validators import check_prob, check_center_crop, check_five_crop, check_re check_normalize_py, check_normalizepad_py, check_random_crop, check_random_color_adjust, check_random_rotation, \ check_ten_crop, check_num_channels, check_pad, check_rgb_to_hsv, check_hsv_to_rgb, \ check_random_perspective, check_random_erasing, check_cutout, check_linear_transform, check_random_affine, \ - check_mix_up, check_positive_degrees, check_uniform_augment_py, check_auto_contrast, check_rgb_to_bgr + check_mix_up, check_positive_degrees, check_uniform_augment_py, check_auto_contrast, check_rgb_to_bgr, \ + check_adjust_gamma from .utils import Inter, Border from .py_transforms_util import is_pil @@ -1375,7 +1376,6 @@ class RgbToBgr: return util.rgb_to_bgrs(rgb_imgs, self.is_hwc) - class RgbToHsv: """ Convert a NumPy RGB image or a batch of NumPy RGB images to HSV images. @@ -1525,6 +1525,44 @@ class RandomSharpness: return util.random_sharpness(img, self.degrees) +class AdjustGamma: + """ + Adjust gamma of the input PIL image. + + Args: + gamma (float): Non negative real number, same as gamma in the equation. + gain (float, optional): The constant multiplier. + + Examples: + >>> from mindspore.dataset.transforms.py_transforms import Compose + >>> transforms_list = Compose([py_vision.Decode(), + ... py_vision.AdjustGamma(), + ... py_vision.ToTensor()]) + >>> # apply the transform to dataset through map function + >>> image_folder_dataset = image_folder_dataset.map(operations=transforms_list, + ... input_columns="image") + """ + + @check_adjust_gamma + def __init__(self, gamma, gain=1.0): + self.gamma = gamma + self.gain = gain + self.random = False + + def __call__(self, img): + """ + Call method. + + Args: + img (PIL image): Image to be augmented with AutoContrast. + + Returns: + img (PIL image), Augmented image. + """ + + return util.adjust_gamma(img, self.gamma, self.gain) + + class AutoContrast: """ Automatically maximize the contrast of the input PIL image. diff --git a/mindspore/dataset/vision/py_transforms_util.py b/mindspore/dataset/vision/py_transforms_util.py index 475a4bab9bc..48ed3457837 100644 --- a/mindspore/dataset/vision/py_transforms_util.py +++ b/mindspore/dataset/vision/py_transforms_util.py @@ -19,7 +19,6 @@ import math import numbers import random import colorsys - import numpy as np from PIL import Image, ImageOps, ImageEnhance, __version__ @@ -1243,6 +1242,7 @@ def rgb_to_bgr(np_rgb_img, is_hwc): np_bgr_img = np_rgb_img[::-1, :, :] return np_bgr_img + def rgb_to_bgrs(np_rgb_imgs, is_hwc): """ Convert RGB imgs to BGR imgs. @@ -1473,6 +1473,32 @@ def random_sharpness(img, degrees): return ImageEnhance.Sharpness(img).enhance(v) +def adjust_gamma(img, gamma, gain): + """ + Adjust gamma of the input PIL image. + + Args: + img (PIL image): Image to be augmented with AdjustGamma. + gamma (float): Non negative real number, same as gamma in the equation. + gain (float, optional): The constant multiplier. + + Returns: + img (PIL image), Augmented image. + + """ + + if not is_pil(img): + raise TypeError("img should be PIL image. Got {}.".format(type(img))) + + gamma_table = [(255 + 1 - 1e-3) * gain * pow(x / 255., gamma) for x in range(256)] + if len(img.split()) == 3: + gamma_table = gamma_table * 3 + img = img.point(gamma_table) + elif len(img.split()) == 1: + img = img.point(gamma_table) + return img + + def auto_contrast(img, cutoff, ignore): """ Automatically maximize the contrast of the input PIL image. diff --git a/mindspore/dataset/vision/validators.py b/mindspore/dataset/vision/validators.py index baecbabce73..d8d7b84385b 100644 --- a/mindspore/dataset/vision/validators.py +++ b/mindspore/dataset/vision/validators.py @@ -19,10 +19,10 @@ from functools import wraps import numpy as np from mindspore._c_dataengine import TensorOp, TensorOperation -from mindspore.dataset.core.validator_helpers import check_value, check_uint8, FLOAT_MAX_INTEGER, check_pos_float32, \ - check_float32, check_2tuple, check_range, check_positive, INT32_MAX, INT32_MIN, parse_user_args, type_check, \ - type_check_list, check_c_tensor_op, UINT8_MAX, check_value_normalize_std, check_value_cutoff, check_value_ratio, \ - check_odd +from mindspore.dataset.core.validator_helpers import check_value, check_uint8, FLOAT_MIN_INTEGER, FLOAT_MAX_INTEGER, \ + check_pos_float32, check_float32, check_2tuple, check_range, check_positive, INT32_MAX, INT32_MIN, \ + parse_user_args, type_check, type_check_list, check_c_tensor_op, UINT8_MAX, check_value_normalize_std, \ + check_value_cutoff, check_value_ratio, check_odd from .utils import Inter, Border, ImageBatchFormat, SliceMode @@ -788,6 +788,22 @@ def check_bounding_box_augment_cpp(method): return new_method +def check_adjust_gamma(method): + """Wrapper method to check the parameters of AdjustGamma ops (Python and C++).""" + + @wraps(method) + def new_method(self, *args, **kwargs): + [gamma, gain], _ = parse_user_args(method, *args, **kwargs) + type_check(gamma, (float, int), "gamma") + check_value(gamma, (0, FLOAT_MAX_INTEGER)) + if gain is not None: + type_check(gain, (float, int), "gain") + check_value(gain, (FLOAT_MIN_INTEGER, FLOAT_MAX_INTEGER)) + return method(self, *args, **kwargs) + + return new_method + + def check_auto_contrast(method): """Wrapper method to check the parameters of AutoContrast ops (Python and C++).""" diff --git a/mindspore/lite/CMakeLists.txt b/mindspore/lite/CMakeLists.txt index 3731938f1a2..17d306a98de 100644 --- a/mindspore/lite/CMakeLists.txt +++ b/mindspore/lite/CMakeLists.txt @@ -33,6 +33,10 @@ option(MSLITE_ENABLE_TESTCASES "enable testcase" off) option(MSLITE_ENABLE_NNIE "enable NNIE" off) option(MSLITE_COMPILE_NNIE "compile NNIE" off) option(MSLITE_ENABLE_HIGH_PERFORMANCE "enable high performance" on) +option(MSLITE_STRING_KERNEL "enable string kernel" on) +option(MSLITE_CONTROL_TENSORLIST "enable control and tensorlist" on) +option(MSLITE_AUTO_PARALLEL "enable automatic parallelism" on) +option(MSLITE_HUFFMAN_DECODE "enable huffman decode" on) # Option that can be configured through manually option(ENABLE_VERBOSE "" off) @@ -82,6 +86,32 @@ endif() if(DEFINED ENV{MSLITE_ENABLE_HIGH_PERFORMANCE}) set(MSLITE_ENABLE_HIGH_PERFORMANCE $ENV{MSLITE_ENABLE_HIGH_PERFORMANCE}) endif() +if(DEFINED ENV{MSLITE_STRING_KERNEL}) + set(MSLITE_STRING_KERNEL $ENV{MSLITE_STRING_KERNEL}) +endif() +if(DEFINED ENV{MSLITE_CONTROL_TENSORLIST}) + set(MSLITE_CONTROL_TENSORLIST $ENV{MSLITE_CONTROL_TENSORLIST}) +endif() +if(DEFINED ENV{MSLITE_AUTO_PARALLEL}) + set(MSLITE_AUTO_PARALLEL $ENV{MSLITE_AUTO_PARALLEL}) +endif() +if(DEFINED ENV{MSLITE_HUFFMAN_DECODE}) + set(MSLITE_HUFFMAN_DECODE $ENV{MSLITE_HUFFMAN_DECODE}) +endif() + + +if(MSLITE_STRING_KERNEL) + add_compile_definitions(ENABLE_STRING_KERNEL) +endif() +if(MSLITE_CONTROL_TENSORLIST) + add_compile_definitions(ENABLE_CONTROL_TENSORLIST) +endif() +if(MSLITE_AUTO_PARALLEL) + add_compile_definitions(ENABLE_AUTO_PARALLEL) +endif() +if(MSLITE_HUFFMAN_DECODE) + add_compile_definitions(ENABLE_HUFFMAN_DECODE) +endif() if(PLATFORM_ARM64) if(MSLITE_GPU_BACKEND STREQUAL "") @@ -191,6 +221,11 @@ else() set(CMAKE_CXX_FLAGS "${LITE_COMPILE_FLAGS} -Wno-overloaded-virtual ${CMAKE_CXX_FLAGS} -std=c++17") set(CMAKE_CXX_FLAGS_DEBUG "-DDebug -g -fvisibility=default") + if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") + string(REPLACE "-O2" "-O0" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + string(REPLACE "-O2" "-O0" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + endif() + if(WIN32) if(CMAKE_SIZEOF_VOID_P EQUAL 4) set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--no-seh ${CMAKE_SHARED_LINKER_FLAGS}") @@ -201,6 +236,10 @@ else() else() set(CMAKE_SHARED_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack -s ${CMAKE_SHARED_LINKER_FLAGS}") set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack -s -pie ${CMAKE_EXE_LINKER_FLAGS}") + if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") + string(REPLACE "-s " "" CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}") + string(REPLACE "-s " "" CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}") + endif() endif() endif() @@ -216,7 +255,10 @@ if(SUPPORT_NPU) endif() add_compile_definitions(NO_DLIB) -add_compile_options(-fPIC) + +if(NOT MSVC) + add_compile_options(-fPIC) +endif() if(PLATFORM_ARM64) set(RUNTIME_COMPONENT_NAME "android-aarch64") diff --git a/mindspore/lite/OWNERS b/mindspore/lite/OWNERS index 65b4352238e..e2e7476b4a9 100644 --- a/mindspore/lite/OWNERS +++ b/mindspore/lite/OWNERS @@ -1,18 +1,4 @@ approvers: -- zhang_xue_tong +- zhaizhiqiang - zhanghaibo5 -- ddwsky -- HilbertDavid -- jpc_chenjianping -- hangangqiang -- zqstar -reviewers: -- yangruoqi713 -- yeyunpeng2020 -- ling_qiao_min -- mengyuanli -- zhujingxuan -- zhanyuan1 -- cjh9368 -- zhaozhenlong diff --git a/mindspore/lite/build_lite.sh b/mindspore/lite/build_lite.sh index 14f8f14fe01..65cec694837 100755 --- a/mindspore/lite/build_lite.sh +++ b/mindspore/lite/build_lite.sh @@ -371,7 +371,7 @@ build_aar() { cp ${LITE_JAVA_PATH}/java/common/build/libs/mindspore-lite-java-common.jar ${LITE_JAVA_PATH}/java/app/libs ${LITE_JAVA_PATH}/java/gradlew clean -p ${LITE_JAVA_PATH}/java/app - ${LITE_JAVA_PATH}/java/gradlew build -p ${LITE_JAVA_PATH}/java/app + ${LITE_JAVA_PATH}/java/gradlew assembleRelease -p ${LITE_JAVA_PATH}/java/app ${LITE_JAVA_PATH}/java/gradlew publish -PLITE_VERSION=${VERSION_STR} -p ${LITE_JAVA_PATH}/java/app cd ${LITE_JAVA_PATH}/java/app/build diff --git a/mindspore/lite/examples/export_models/models/densenet_train_export.py b/mindspore/lite/examples/export_models/models/densenet_train_export.py index 14c36475890..ea801e5403a 100644 --- a/mindspore/lite/examples/export_models/models/densenet_train_export.py +++ b/mindspore/lite/examples/export_models/models/densenet_train_export.py @@ -21,10 +21,9 @@ from train_utils import save_inout, train_wrap import mindspore.common.dtype as mstype from mindspore import context, Tensor, nn from mindspore.train.serialization import export +from src.network.densenet import DenseNet121 #pylint: disable=wrong-import-position sys.path.append(os.environ['CLOUD_MODEL_ZOO'] + 'official/cv/densenet121/') -from src.network.densenet import DenseNet121 - context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU", save_graphs=False) diff --git a/mindspore/lite/examples/quick_start_cpp/build.sh b/mindspore/lite/examples/quick_start_cpp/build.sh index 76f3e1407a7..9e12c9b086d 100644 --- a/mindspore/lite/examples/quick_start_cpp/build.sh +++ b/mindspore/lite/examples/quick_start_cpp/build.sh @@ -37,8 +37,8 @@ if [ ! -e ${BASEPATH}/build/${MINDSPORE_FILE} ]; then wget -c -O ${BASEPATH}/build/${MINDSPORE_FILE} --no-check-certificate ${MINDSPORE_LITE_DOWNLOAD_URL} fi tar xzvf ${BASEPATH}/build/${MINDSPORE_FILE} -C ${BASEPATH}/build/ -cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/inference/lib/libmindspore-lite.a ${BASEPATH}/lib -cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/inference/include ${BASEPATH}/ +cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/runtime/lib/libmindspore-lite.a ${BASEPATH}/lib +cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/runtime/include ${BASEPATH}/ cd ${BASEPATH}/build || exit cmake ${BASEPATH} make diff --git a/mindspore/lite/examples/quick_start_cpp/main.cc b/mindspore/lite/examples/quick_start_cpp/main.cc index 5c3585f4a44..3d4bfe509d1 100644 --- a/mindspore/lite/examples/quick_start_cpp/main.cc +++ b/mindspore/lite/examples/quick_start_cpp/main.cc @@ -19,10 +19,11 @@ #include #include #include -#include "include/errorcode.h" -#include "include/model.h" -#include "include/context.h" -#include "include/lite_session.h" +#include +#include "include/api/model.h" +#include "include/api/context.h" +#include "include/api/status.h" +#include "include/api/types.h" namespace { constexpr int kNumPrintOfOutData = 50; } @@ -95,81 +96,19 @@ void GenerateRandomData(int size, void *data, Distribution distribution) { [&distribution, &random_engine]() { return static_cast(distribution(random_engine)); }); } -int GenerateInputDataWithRandom(std::vector inputs) { +int GenerateInputDataWithRandom(std::vector inputs) { for (auto tensor : inputs) { - auto input_data = tensor->MutableData(); + auto input_data = tensor.MutableData(); if (input_data == nullptr) { std::cerr << "MallocData for inTensor failed." << std::endl; return -1; } - GenerateRandomData(tensor->Size(), input_data, std::uniform_real_distribution(0.1f, 1.0f)); + GenerateRandomData(tensor.DataSize(), input_data, std::uniform_real_distribution(0.1f, 1.0f)); } - return mindspore::lite::RET_OK; + return mindspore::kSuccess; } -int Run(mindspore::session::LiteSession *session) { - auto inputs = session->GetInputs(); - - // Generate random data as input data. - auto ret = GenerateInputDataWithRandom(inputs); - if (ret != mindspore::lite::RET_OK) { - std::cerr << "Generate Random Input Data failed." << std::endl; - return ret; - } - - // Run Inference. - ret = session->RunGraph(); - if (ret != mindspore::lite::RET_OK) { - std::cerr << "Inference error " << ret << std::endl; - return ret; - } - - // Get Output Tensor Data. - auto out_tensors = session->GetOutputs(); - for (auto tensor : out_tensors) { - std::cout << "tensor name is:" << tensor.first << " tensor size is:" << tensor.second->Size() - << " tensor elements num is:" << tensor.second->ElementsNum() << std::endl; - auto out_data = reinterpret_cast(tensor.second->MutableData()); - std::cout << "output data is:"; - for (int i = 0; i < tensor.second->ElementsNum() && i <= kNumPrintOfOutData; i++) { - std::cout << out_data[i] << " "; - } - std::cout << std::endl; - } - return mindspore::lite::RET_OK; -} - -mindspore::session::LiteSession *Compile(mindspore::lite::Model *model) { - // Create and init context. - auto context = std::make_shared(); - if (context == nullptr) { - std::cerr << "New context failed while." << std::endl; - return nullptr; - } - - // Create the session. - mindspore::session::LiteSession *session = mindspore::session::LiteSession::CreateSession(context.get()); - if (session == nullptr) { - std::cerr << "CreateSession failed while running." << std::endl; - return nullptr; - } - - // Compile graph. - auto ret = session->CompileGraph(model); - if (ret != mindspore::lite::RET_OK) { - delete session; - std::cerr << "Compile failed while running." << std::endl; - return nullptr; - } - - // Note: when use model->Free(), the model can not be compiled again. - if (model != nullptr) { - model->Free(); - } - return session; -} - -int CompileAndRun(int argc, const char **argv) { +int QuickStart(int argc, const char **argv) { if (argc < 2) { std::cerr << "Model file must be provided.\n"; return -1; @@ -177,7 +116,7 @@ int CompileAndRun(int argc, const char **argv) { // Read model file. auto model_path = RealPath(argv[1]); if (model_path.empty()) { - std::cerr << "model path " << argv[1] << " is invalid."; + std::cerr << "Model path " << argv[1] << " is invalid."; return -1; } size_t size = 0; @@ -186,33 +125,74 @@ int CompileAndRun(int argc, const char **argv) { std::cerr << "Read model file failed." << std::endl; return -1; } - // Load the .ms model. - auto model = mindspore::lite::Model::Import(model_buf, size); - delete[](model_buf); + + // Create and init context, add CPU device info + auto context = std::make_shared(); + if (context == nullptr) { + delete[](model_buf); + std::cerr << "New context failed." << std::endl; + return -1; + } + auto &device_list = context->MutableDeviceInfo(); + auto device_info = std::make_shared(); + if (device_info == nullptr) { + delete[](model_buf); + std::cerr << "New CPUDeviceInfo failed." << std::endl; + return -1; + } + device_list.push_back(device_info); + + // Create model + auto model = new (std::nothrow) mindspore::Model(); if (model == nullptr) { - std::cerr << "Import model file failed." << std::endl; + delete[](model_buf); + std::cerr << "New Model failed." << std::endl; return -1; } - // Compile MindSpore Lite model. - auto session = Compile(model); - if (session == nullptr) { + // Build model + auto build_ret = model->Build(model_buf, size, mindspore::kMindIR, context); + delete[](model_buf); + if (build_ret != mindspore::kSuccess) { delete model; - std::cerr << "Create session failed." << std::endl; + std::cerr << "Build model failed." << std::endl; return -1; } - // Run inference. - auto ret = Run(session); - if (ret != mindspore::lite::RET_OK) { + + // Get Input + auto inputs = model->GetInputs(); + // Generate random data as input data. + auto ret = GenerateInputDataWithRandom(inputs); + if (ret != mindspore::kSuccess) { delete model; - delete session; - std::cerr << "MindSpore Lite run failed." << std::endl; + std::cerr << "Generate Random Input Data failed." << std::endl; return -1; } - // Delete model buffer. + // Get Output + auto outputs = model->GetOutputs(); + + // Model Predict + auto predict_ret = model->Predict(inputs, &outputs); + if (predict_ret != mindspore::kSuccess) { + delete model; + std::cerr << "Predict error " << ret << std::endl; + return ret; + } + + // Print Output Tensor Data. + for (auto tensor : outputs) { + std::cout << "tensor name is:" << tensor.Name() << " tensor size is:" << tensor.DataSize() + << " tensor elements num is:" << tensor.ElementNum() << std::endl; + auto out_data = reinterpret_cast(tensor.Data().get()); + std::cout << "output data is:"; + for (int i = 0; i < tensor.ElementNum() && i <= 50; i++) { + std::cout << out_data[i] << " "; + } + std::cout << std::endl; + } + + // Delete model. delete model; - // Delete session buffer. - delete session; - return mindspore::lite::RET_OK; + return mindspore::kSuccess; } -int main(int argc, const char **argv) { return CompileAndRun(argc, argv); } +int main(int argc, const char **argv) { return QuickStart(argc, argv); } diff --git a/mindspore/lite/examples/runtime_cpp/build.sh b/mindspore/lite/examples/runtime_cpp/build.sh index 4fafbfc8922..75b9553d11e 100644 --- a/mindspore/lite/examples/runtime_cpp/build.sh +++ b/mindspore/lite/examples/runtime_cpp/build.sh @@ -54,7 +54,7 @@ checkopts() continue elif [[ "X${DEVICE}" == "Xnpu" ]]; then MINDSPORE_FILE_NAME="mindspore-lite-${VERSION_STR}-android-aarch64" - MINDSPORE_LITE_DOWNLOAD_URL="https://ms-release.obs.cn-north-4.myhuaweicloud.com/${VERSION_STR}/MindSpore/lite/release/android/${MINDSPORE_FILE}" + MINDSPORE_LITE_DOWNLOAD_URL="https://ms-release.obs.cn-north-4.myhuaweicloud.com/${VERSION_STR}/MindSpore/lite/release/android/npu/${MINDSPORE_FILE}" SUPPORT_NPU="on" else echo "Unknown DEVICE option ${OPTARG}!" @@ -89,10 +89,10 @@ if [ ! -e ${BASEPATH}/build/${MINDSPORE_FILE} ]; then wget -c -O ${BASEPATH}/build/${MINDSPORE_FILE} --no-check-certificate ${MINDSPORE_LITE_DOWNLOAD_URL} fi tar xzvf ${BASEPATH}/build/${MINDSPORE_FILE} -C ${BASEPATH}/build/ -cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/inference/lib/libmindspore-lite.a ${BASEPATH}/lib -cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/inference/include ${BASEPATH}/ +cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/runtime/lib/libmindspore-lite.a ${BASEPATH}/lib +cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/runtime/include ${BASEPATH}/ if [[ "X${DEVICE}" == "Xnpu" ]]; then - cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/inference/third_party/hiai_ddk/lib/*.so ${BASEPATH}/lib + cp -r ${BASEPATH}/build/${MINDSPORE_FILE_NAME}/runtime/third_party/hiai_ddk/lib/*.so ${BASEPATH}/lib fi cd ${BASEPATH}/build || exit cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" -DANDROID_NATIVE_API_LEVEL="19" \ diff --git a/mindspore/lite/examples/runtime_cpp/main.cc b/mindspore/lite/examples/runtime_cpp/main.cc index 564f16fccf8..cef2f4845a6 100644 --- a/mindspore/lite/examples/runtime_cpp/main.cc +++ b/mindspore/lite/examples/runtime_cpp/main.cc @@ -20,11 +20,11 @@ #include #include #include -#include "include/errorcode.h" -#include "include/model.h" -#include "include/context.h" -#include "include/lite_session.h" -#include "include/version.h" +#include "include/api/allocator.h" +#include "include/api/model.h" +#include "include/api/context.h" +#include "include/api/types.h" +#include "include/api/serialization.h" std::string RealPath(const char *path) { const size_t max = 4096; @@ -99,218 +99,231 @@ void GenerateRandomData(int size, void *data, Distribution distribution) { [&]() { return static_cast(distribution(random_engine)); }); } -std::shared_ptr CreateCPUContext() { - auto context = std::make_shared(); - if (context == nullptr) { - std::cerr << "New context failed while running." << std::endl; +std::shared_ptr CreateCPUDeviceInfo() { + auto device_info = std::make_shared(); + if (device_info == nullptr) { + std::cerr << "New CPUDeviceInfo failed." << std::endl; return nullptr; } - // Configure the number of worker threads in the thread pool to 2, including the main thread. - context->thread_num_ = 2; - // CPU device context has default values. - auto &cpu_device_info = context->device_list_[0].device_info_.cpu_device_info_; - // The large core takes priority in thread and core binding methods. This parameter will work in the BindThread - // interface. For specific binding effect, see the "Run Graph" section. - cpu_device_info.cpu_bind_mode_ = mindspore::lite::HIGHER_CPU; // Use float16 operator as priority. - cpu_device_info.enable_float16_ = true; - return context; + device_info->SetEnableFP16(true); + return device_info; } -std::shared_ptr CreateGPUContext() { - auto context = std::make_shared(); - if (context == nullptr) { - std::cerr << "New context failed while running. " << std::endl; +std::shared_ptr CreateGPUDeviceInfo() { + auto device_info = std::make_shared(); + if (device_info == nullptr) { + std::cerr << "New GPUDeviceInfo failed." << std::endl; return nullptr; } - - // If GPU device context is set. The preferred backend is GPU, which means, if there is a GPU operator, it will run on + // If GPU device info is set. The preferred backend is GPU, which means, if there is a GPU operator, it will run on // the GPU first, otherwise it will run on the CPU. - mindspore::lite::DeviceContext gpu_device_ctx{mindspore::lite::DT_GPU, {false}}; // GPU use float16 operator as priority. - gpu_device_ctx.device_info_.gpu_device_info_.enable_float16_ = true; - // The GPU device context needs to be push_back into device_list to work. - context->device_list_.push_back(gpu_device_ctx); - return context; + device_info->SetEnableFP16(true); + return device_info; } -std::shared_ptr CreateNPUContext() { - auto context = std::make_shared(); - if (context == nullptr) { - std::cerr << "New context failed while running. " << std::endl; +std::shared_ptr CreateNPUDeviceInfo() { + auto device_info = std::make_shared(); + if (device_info == nullptr) { + std::cerr << "New KirinNPUDeviceInfo failed." << std::endl; return nullptr; } - mindspore::lite::DeviceContext npu_device_ctx{mindspore::lite::DT_NPU}; - npu_device_ctx.device_info_.npu_device_info_.frequency_ = 3; - // The NPU device context needs to be push_back into device_list to work. - context->device_list_.push_back(npu_device_ctx); - return context; + device_info->SetFrequency(3); + return device_info; } -int GetInputsAndSetData(mindspore::session::LiteSession *session) { - auto inputs = session->GetInputs(); - +mindspore::Status GetInputsAndSetData(mindspore::Model *model) { + auto inputs = model->GetInputs(); // The model has only one input tensor. auto in_tensor = inputs.front(); if (in_tensor == nullptr) { std::cerr << "Input tensor is nullptr" << std::endl; - return -1; + return mindspore::kLiteNullptr; } - auto input_data = in_tensor->MutableData(); + auto input_data = in_tensor.MutableData(); if (input_data == nullptr) { std::cerr << "MallocData for inTensor failed." << std::endl; - return -1; + return mindspore::kLiteNullptr; } - GenerateRandomData(in_tensor->Size(), input_data, std::uniform_real_distribution(0.1f, 1.0f)); - - return 0; + GenerateRandomData(in_tensor.DataSize(), input_data, std::uniform_real_distribution(0.1f, 1.0f)); + return mindspore::kSuccess; } -int GetInputsByTensorNameAndSetData(mindspore::session::LiteSession *session) { - auto in_tensor = session->GetInputsByTensorName("graph_input-173"); +mindspore::Status GetInputsByTensorNameAndSetData(mindspore::Model *model) { + auto in_tensor = model->GetInputByTensorName("graph_input-173"); if (in_tensor == nullptr) { std::cerr << "Input tensor is nullptr" << std::endl; - return -1; + return mindspore::kLiteNullptr; } - auto input_data = in_tensor->MutableData(); + auto input_data = in_tensor.MutableData(); if (input_data == nullptr) { std::cerr << "MallocData for inTensor failed." << std::endl; - return -1; + return mindspore::kLiteNullptr; } - GenerateRandomData(in_tensor->Size(), input_data, std::uniform_real_distribution(0.1f, 1.0f)); - return 0; + GenerateRandomData(in_tensor.DataSize(), input_data, std::uniform_real_distribution(0.1f, 1.0f)); + return mindspore::kSuccess; } -void GetOutputsByNodeName(mindspore::session::LiteSession *session) { +void GetOutputsByNodeName(mindspore::Model *model) { // model has a output node named output_node_name_0. - auto output_vec = session->GetOutputsByNodeName("Softmax-65"); + auto output_vec = model->GetOutputsByNodeName("Softmax-65"); // output node named output_node_name_0 has only one output tensor. auto out_tensor = output_vec.front(); if (out_tensor == nullptr) { std::cerr << "Output tensor is nullptr" << std::endl; return; } - std::cout << "tensor size is:" << out_tensor->Size() << " tensor elements num is:" << out_tensor->ElementsNum() + std::cout << "tensor size is:" << out_tensor.DataSize() << " tensor elements num is:" << out_tensor.ElementNum() << std::endl; // The model output data is float 32. - if (out_tensor->data_type() != mindspore::TypeId::kNumberTypeFloat32) { + if (out_tensor.DataType() != mindspore::DataType::kNumberTypeFloat32) { std::cerr << "Output should in float32" << std::endl; return; } - auto out_data = reinterpret_cast(out_tensor->MutableData()); + auto out_data = reinterpret_cast(out_tensor.MutableData()); if (out_data == nullptr) { std::cerr << "Data of out_tensor is nullptr" << std::endl; return; } std::cout << "output data is:"; - for (int i = 0; i < out_tensor->ElementsNum() && i < 10; i++) { + for (int i = 0; i < out_tensor.ElementNum() && i < 10; i++) { std::cout << out_data[i] << " "; } std::cout << std::endl; } -void GetOutputByTensorName(mindspore::session::LiteSession *session) { +void GetOutputByTensorName(mindspore::Model *model) { // We can use GetOutputTensorNames method to get all name of output tensor of model which is in order. - auto tensor_names = session->GetOutputTensorNames(); - // Use output tensor name returned by GetOutputTensorNames as key + auto tensor_names = model->GetOutputTensorNames(); for (const auto &tensor_name : tensor_names) { - auto out_tensor = session->GetOutputByTensorName(tensor_name); + auto out_tensor = model->GetOutputByTensorName(tensor_name); if (out_tensor == nullptr) { std::cerr << "Output tensor is nullptr" << std::endl; return; } - std::cout << "tensor size is:" << out_tensor->Size() << " tensor elements num is:" << out_tensor->ElementsNum() + std::cout << "tensor size is:" << out_tensor.DataSize() << " tensor elements num is:" << out_tensor.ElementNum() << std::endl; // The model output data is float 32. - if (out_tensor->data_type() != mindspore::TypeId::kNumberTypeFloat32) { + if (out_tensor.DataType() != mindspore::DataType::kNumberTypeFloat32) { std::cerr << "Output should in float32" << std::endl; return; } - auto out_data = reinterpret_cast(out_tensor->MutableData()); + auto out_data = reinterpret_cast(out_tensor.MutableData()); if (out_data == nullptr) { std::cerr << "Data of out_tensor is nullptr" << std::endl; return; } std::cout << "output data is:"; - for (int i = 0; i < out_tensor->ElementsNum() && i < 10; i++) { + for (int i = 0; i < out_tensor.ElementNum() && i < 10; i++) { std::cout << out_data[i] << " "; } std::cout << std::endl; } } -void GetOutputs(mindspore::session::LiteSession *session) { - auto out_tensors = session->GetOutputs(); +void GetOutputs(mindspore::Model *model) { + auto out_tensors = model->GetOutputs(); for (auto out_tensor : out_tensors) { - std::cout << "tensor name is:" << out_tensor.first << " tensor size is:" << out_tensor.second->Size() - << " tensor elements num is:" << out_tensor.second->ElementsNum() << std::endl; + std::cout << "tensor name is:" << out_tensor.Name() << " tensor size is:" << out_tensor.DataSize() + << " tensor elements num is:" << out_tensor.ElementNum() << std::endl; // The model output data is float 32. - if (out_tensor.second->data_type() != mindspore::TypeId::kNumberTypeFloat32) { + if (out_tensor.DataType() != mindspore::DataType::kNumberTypeFloat32) { std::cerr << "Output should in float32" << std::endl; return; } - auto out_data = reinterpret_cast(out_tensor.second->MutableData()); + auto out_data = reinterpret_cast(out_tensor.MutableData()); if (out_data == nullptr) { std::cerr << "Data of out_tensor is nullptr" << std::endl; return; } std::cout << "output data is:"; - for (int i = 0; i < out_tensor.second->ElementsNum() && i < 10; i++) { + for (int i = 0; i < out_tensor.ElementNum() && i < 10; i++) { std::cout << out_data[i] << " "; } std::cout << std::endl; } } -mindspore::session::LiteSession *CreateSessionAndCompileByModel(mindspore::lite::Model *model) { - // Create and init CPU context. - // If you need to use GPU or NPU, you can refer to CreateGPUContext() or CreateNPUContext(). - auto context = CreateCPUContext(); +mindspore::Model *CreateAndBuildModel(char *model_buf, size_t model_size) { + // Create and init context, add CPU device info + auto context = std::make_shared(); if (context == nullptr) { - std::cerr << "New context failed while." << std::endl; + std::cerr << "New context failed." << std::endl; return nullptr; } - - // Create the session. - mindspore::session::LiteSession *session = mindspore::session::LiteSession::CreateSession(context.get()); - if (session == nullptr) { - std::cerr << "CreateSession failed while running." << std::endl; + auto &device_list = context->MutableDeviceInfo(); + // If you need to use GPU or NPU, you can refer to CreateGPUDeviceInfo() or CreateNPUDeviceInfo(). + auto cpu_device_info = CreateCPUDeviceInfo(); + if (cpu_device_info == nullptr) { + std::cerr << "Create CPUDeviceInfo failed." << std::endl; return nullptr; } + device_list.push_back(cpu_device_info); - // Compile graph. - auto ret = session->CompileGraph(model); - if (ret != mindspore::lite::RET_OK) { - delete session; - std::cerr << "Compile failed while running." << std::endl; + // Create model + auto model = new (std::nothrow) mindspore::Model(); + if (model == nullptr) { + std::cerr << "New Model failed." << std::endl; return nullptr; } - - return session; + // Build model + auto build_ret = model->Build(model_buf, model_size, mindspore::kMindIR, context); + if (build_ret != mindspore::kSuccess) { + delete model; + std::cerr << "Build model failed." << std::endl; + return nullptr; + } + return model; } -mindspore::session::LiteSession *CreateSessionAndCompileByModelBuffer(char *model_buf, size_t size) { - auto context = std::make_shared(); +mindspore::Model *CreateAndBuildModelComplicated(char *model_buf, size_t size) { + // Create and init context, add CPU device info + auto context = std::make_shared(); if (context == nullptr) { - std::cerr << "New context failed while running" << std::endl; + std::cerr << "New context failed." << std::endl; return nullptr; } - // Use model buffer and context to create Session. - auto session = mindspore::session::LiteSession::CreateSession(model_buf, size, context.get()); - if (session == nullptr) { - std::cerr << "CreateSession failed while running" << std::endl; + auto &device_list = context->MutableDeviceInfo(); + auto cpu_device_info = CreateCPUDeviceInfo(); + if (cpu_device_info == nullptr) { + std::cerr << "Create CPUDeviceInfo failed." << std::endl; return nullptr; } - return session; + device_list.push_back(cpu_device_info); + + // Load graph + mindspore::Graph graph; + auto load_ret = mindspore::Serialization::Load(model_buf, size, mindspore::kMindIR, &graph); + if (load_ret != mindspore::kSuccess) { + std::cerr << "Load graph failed." << std::endl; + return nullptr; + } + + // Create model + auto model = new (std::nothrow) mindspore::Model(); + if (model == nullptr) { + std::cerr << "New Model failed." << std::endl; + return nullptr; + } + // Build model + mindspore::GraphCell graph_cell(graph); + auto build_ret = model->Build(graph_cell, context); + if (build_ret != mindspore::kSuccess) { + delete model; + std::cerr << "Build model failed." << std::endl; + return nullptr; + } + return model; } -int ResizeInputsTensorShape(mindspore::session::LiteSession *session) { - auto inputs = session->GetInputs(); - std::vector resize_shape = {1, 128, 128, 3}; +mindspore::Status ResizeInputsTensorShape(mindspore::Model *model) { + auto inputs = model->GetInputs(); + std::vector resize_shape = {1, 128, 128, 3}; // Assume the model has only one input,resize input shape to [1, 128, 128, 3] - std::vector> new_shapes; + std::vector> new_shapes; new_shapes.push_back(resize_shape); - return session->Resize(inputs, new_shapes); + return model->Resize(inputs, new_shapes); } int Run(const char *model_path) { @@ -321,47 +334,40 @@ int Run(const char *model_path) { std::cerr << "Read model file failed." << std::endl; return -1; } - // Load the .ms model. - auto model = mindspore::lite::Model::Import(model_buf, size); + + // Create and Build MindSpore model. + auto model = CreateAndBuildModel(model_buf, size); delete[](model_buf); if (model == nullptr) { - std::cerr << "Import model file failed." << std::endl; + std::cerr << "Create and build model failed." << std::endl; return -1; } - // Compile MindSpore Lite model. - auto session = CreateSessionAndCompileByModel(model); - if (session == nullptr) { - delete model; - std::cerr << "Create session failed." << std::endl; - return -1; - } - - // Note: when use model->Free(), the model can not be compiled again. - model->Free(); // Set inputs data. // You can also get input through other methods, and you can refer to GetInputsAndSetData() - GetInputsByTensorNameAndSetData(session); - - session->BindThread(true); - auto ret = session->RunGraph(); - if (ret != mindspore::lite::RET_OK) { + auto generate_input_ret = GetInputsByTensorNameAndSetData(model); + if (generate_input_ret != mindspore::kSuccess) { delete model; - delete session; - std::cerr << "Inference error " << ret << std::endl; - return ret; + std::cerr << "Set input data error " << generate_input_ret << std::endl; + return -1; + } + + auto inputs = model->GetInputs(); + auto outputs = model->GetOutputs(); + auto predict_ret = model->Predict(inputs, &outputs); + if (predict_ret != mindspore::kSuccess) { + delete model; + std::cerr << "Predict error " << predict_ret << std::endl; + return -1; } - session->BindThread(false); // Get outputs data. // You can also get output through other methods, // and you can refer to GetOutputByTensorName() or GetOutputs(). - GetOutputsByNodeName(session); + GetOutputsByNodeName(model); - // Delete model buffer. + // Delete model. delete model; - // Delete session buffer. - delete session; return 0; } @@ -372,57 +378,52 @@ int RunResize(const char *model_path) { std::cerr << "Read model file failed." << std::endl; return -1; } - // Load the .ms model. - auto model = mindspore::lite::Model::Import(model_buf, size); + + // Create and Build MindSpore model. + auto model = CreateAndBuildModel(model_buf, size); delete[](model_buf); if (model == nullptr) { - std::cerr << "Import model file failed." << std::endl; - return -1; - } - // Compile MindSpore Lite model. - auto session = CreateSessionAndCompileByModel(model); - if (session == nullptr) { - delete model; - std::cerr << "Create session failed." << std::endl; + std::cerr << "Create and build model failed." << std::endl; return -1; } // Resize inputs tensor shape. - auto ret = ResizeInputsTensorShape(session); - if (ret != mindspore::lite::RET_OK) { + auto resize_ret = ResizeInputsTensorShape(model); + if (resize_ret != mindspore::kSuccess) { delete model; - delete session; - std::cerr << "Resize input tensor shape error." << ret << std::endl; - return ret; + std::cerr << "Resize input tensor shape error." << resize_ret << std::endl; + return -1; } // Set inputs data. // You can also get input through other methods, and you can refer to GetInputsAndSetData() - GetInputsByTensorNameAndSetData(session); - - session->BindThread(true); - ret = session->RunGraph(); - if (ret != mindspore::lite::RET_OK) { + auto generate_input_ret = GetInputsByTensorNameAndSetData(model); + if (generate_input_ret != mindspore::kSuccess) { delete model; - delete session; - std::cerr << "Inference error " << ret << std::endl; - return ret; + std::cerr << "Set input data error " << generate_input_ret << std::endl; + return -1; + } + + auto inputs = model->GetInputs(); + auto outputs = model->GetOutputs(); + auto predict_ret = model->Predict(inputs, &outputs); + if (predict_ret != mindspore::kSuccess) { + delete model; + std::cerr << "Predict error " << predict_ret << std::endl; + return -1; } - session->BindThread(false); // Get outputs data. // You can also get output through other methods, // and you can refer to GetOutputByTensorName() or GetOutputs(). - GetOutputsByNodeName(session); + GetOutputsByNodeName(model); - // Delete model buffer. + // Delete model. delete model; - // Delete session buffer. - delete session; return 0; } -int RunCreateSessionSimplified(const char *model_path) { +int RunCreateModelComplicated(const char *model_path) { size_t size = 0; char *model_buf = ReadFile(model_path, &size); if (model_buf == nullptr) { @@ -430,86 +431,93 @@ int RunCreateSessionSimplified(const char *model_path) { return -1; } - // Compile MindSpore Lite model. - auto session = CreateSessionAndCompileByModelBuffer(model_buf, size); - if (session == nullptr) { - std::cerr << "Create session failed." << std::endl; - return -1; - } - - // Set inputs data. - // You can also get input through other methods, and you can refer to GetInputsAndSetData() - GetInputsByTensorNameAndSetData(session); - - session->BindThread(true); - auto ret = session->RunGraph(); - if (ret != mindspore::lite::RET_OK) { - delete session; - std::cerr << "Inference error " << ret << std::endl; - return ret; - } - session->BindThread(false); - - // Get outputs data. - // You can also get output through other methods, - // and you can refer to GetOutputByTensorName() or GetOutputs(). - GetOutputsByNodeName(session); - - // Delete session buffer. - delete session; - return 0; -} - -int RunSessionParallel(const char *model_path) { - size_t size = 0; - char *model_buf = ReadFile(model_path, &size); - if (model_buf == nullptr) { - std::cerr << "Read model file failed." << std::endl; - return -1; - } - // Load the .ms model. - auto model = mindspore::lite::Model::Import(model_buf, size); + // Create and Build MindSpore model. + auto model = CreateAndBuildModelComplicated(model_buf, size); delete[](model_buf); if (model == nullptr) { - std::cerr << "Import model file failed." << std::endl; - return -1; - } - // Compile MindSpore Lite model. - auto session1 = CreateSessionAndCompileByModel(model); - if (session1 == nullptr) { - delete model; - std::cerr << "Create session failed." << std::endl; + std::cerr << "Create and build model failed." << std::endl; return -1; } - // Compile MindSpore Lite model. - auto session2 = CreateSessionAndCompileByModel(model); - if (session2 == nullptr) { + // Set inputs data. + // You can also get input through other methods, and you can refer to GetInputsAndSetData() + auto generate_input_ret = GetInputsByTensorNameAndSetData(model); + if (generate_input_ret != mindspore::kSuccess) { delete model; - std::cerr << "Create session failed." << std::endl; + std::cerr << "Set input data error " << generate_input_ret << std::endl; + return -1; + } + + auto inputs = model->GetInputs(); + auto outputs = model->GetOutputs(); + auto predict_ret = model->Predict(inputs, &outputs); + if (predict_ret != mindspore::kSuccess) { + delete model; + std::cerr << "Predict error " << predict_ret << std::endl; + return -1; + } + + // Get outputs data. + // You can also get output through other methods, + // and you can refer to GetOutputByTensorName() or GetOutputs(). + GetOutputsByNodeName(model); + + // Delete model. + delete model; + return 0; +} + +int RunModelParallel(const char *model_path) { + size_t size = 0; + char *model_buf = ReadFile(model_path, &size); + if (model_buf == nullptr) { + std::cerr << "Read model file failed." << std::endl; + return -1; + } + + // Create and Build MindSpore model. + auto model1 = CreateAndBuildModel(model_buf, size); + auto model2 = CreateAndBuildModel(model_buf, size); + delete[](model_buf); + if (model1 == nullptr || model2 == nullptr) { + std::cerr << "Create and build model failed." << std::endl; return -1; } - // Note: when use model->Free(), the model can not be compiled again. - model->Free(); std::thread thread1([&]() { - GetInputsByTensorNameAndSetData(session1); - auto status = session1->RunGraph(); - if (status != 0) { - std::cerr << "Inference error " << status << std::endl; - return; + auto generate_input_ret = GetInputsByTensorNameAndSetData(model1); + if (generate_input_ret != mindspore::kSuccess) { + std::cerr << "Model1 set input data error " << generate_input_ret << std::endl; + return -1; } - std::cout << "Session1 inference success" << std::endl; + + auto inputs = model1->GetInputs(); + auto outputs = model1->GetOutputs(); + auto predict_ret = model1->Predict(inputs, &outputs); + if (predict_ret != mindspore::kSuccess) { + std::cerr << "Model1 predict error " << predict_ret << std::endl; + return -1; + } + std::cout << "Model1 predict success" << std::endl; + return 0; }); std::thread thread2([&]() { - GetInputsByTensorNameAndSetData(session2); - auto status = session2->RunGraph(); - if (status != 0) { - std::cerr << "Inference error " << status << std::endl; - return; + auto generate_input_ret = GetInputsByTensorNameAndSetData(model2); + if (generate_input_ret != mindspore::kSuccess) { + std::cerr << "Model2 set input data error " << generate_input_ret << std::endl; + return -1; } - std::cout << "Session2 inference success" << std::endl; + + auto inputs = model2->GetInputs(); + auto outputs = model2->GetOutputs(); + auto predict_ret = model2->Predict(inputs, &outputs); + if (predict_ret != mindspore::kSuccess) { + std::cerr << "Model2 predict error " << predict_ret << std::endl; + return -1; + } + std::cout << "Model2 predict success" << std::endl; + return 0; }); thread1.join(); @@ -518,17 +526,12 @@ int RunSessionParallel(const char *model_path) { // Get outputs data. // You can also get output through other methods, // and you can refer to GetOutputByTensorName() or GetOutputs(). - GetOutputsByNodeName(session1); - GetOutputsByNodeName(session2); + GetOutputsByNodeName(model1); + GetOutputsByNodeName(model2); - // Delete model buffer. - if (model != nullptr) { - delete model; - model = nullptr; - } - // Delete session buffer. - delete session1; - delete session2; + // Delete model. + delete model1; + delete model2; return 0; } @@ -539,93 +542,103 @@ int RunWithSharedMemoryPool(const char *model_path) { std::cerr << "Read model file failed." << std::endl; return -1; } - auto model = mindspore::lite::Model::Import(model_buf, size); - delete[](model_buf); - if (model == nullptr) { - std::cerr << "Import model file failed." << std::endl; - return -1; - } - auto context1 = std::make_shared(); + auto context1 = std::make_shared(); if (context1 == nullptr) { - delete model; - std::cerr << "New context failed while running." << std::endl; + std::cerr << "New context failed." << std::endl; return -1; } - auto session1 = mindspore::session::LiteSession::CreateSession(context1.get()); - if (session1 == nullptr) { - delete model; - std::cerr << "CreateSession failed while running." << std::endl; + auto &device_list1 = context1->MutableDeviceInfo(); + auto device_info1 = CreateCPUDeviceInfo(); + if (device_info1 == nullptr) { + std::cerr << "Create CPUDeviceInfo failed." << std::endl; return -1; } - auto ret = session1->CompileGraph(model); - if (ret != mindspore::lite::RET_OK) { - delete model; - delete session1; - std::cerr << "Compile failed while running." << std::endl; + device_list1.push_back(device_info1); + + auto model1 = new (std::nothrow) mindspore::Model(); + if (model1 == nullptr) { + delete[](model_buf); + std::cerr << "New Model failed." << std::endl; + return -1; + } + auto build_ret = model1->Build(model_buf, size, mindspore::kMindIR, context1); + if (build_ret != mindspore::kSuccess) { + delete[](model_buf); + delete model1; + std::cerr << "Build model failed." << std::endl; return -1; } - auto context2 = std::make_shared(); + auto context2 = std::make_shared(); if (context2 == nullptr) { - delete model; - std::cerr << "New context failed while running." << std::endl; + delete[](model_buf); + delete model1; + std::cerr << "New context failed." << std::endl; + return -1; + } + auto &device_list2 = context2->MutableDeviceInfo(); + auto device_info2 = CreateCPUDeviceInfo(); + if (device_info2 == nullptr) { + delete[](model_buf); + delete model1; + std::cerr << "Create CPUDeviceInfo failed." << std::endl; return -1; } // Use the same allocator to share the memory pool. - context2->allocator = context1->allocator; + device_info2->SetAllocator(device_info1->GetAllocator()); + device_list2.push_back(device_info2); - auto session2 = mindspore::session::LiteSession::CreateSession(context2.get()); - if (session2 == nullptr) { - delete model; - delete session1; - std::cerr << "CreateSession failed while running " << std::endl; + auto model2 = new (std::nothrow) mindspore::Model(); + if (model2 == nullptr) { + delete[](model_buf); + delete model1; + std::cerr << "New Model failed." << std::endl; return -1; } - - ret = session2->CompileGraph(model); - if (ret != mindspore::lite::RET_OK) { - delete model; - delete session1; - delete session2; - std::cerr << "Compile failed while running " << std::endl; + build_ret = model2->Build(model_buf, size, mindspore::kMindIR, context2); + delete[](model_buf); + if (build_ret != mindspore::kSuccess) { + delete model1; + delete model2; + std::cerr << "Build model failed." << std::endl; return -1; } - // Note: when use model->Free(), the model can not be compiled again. - model->Free(); - // Set inputs data. // You can also get input through other methods, and you can refer to GetInputsAndSetData() - GetInputsByTensorNameAndSetData(session1); - GetInputsByTensorNameAndSetData(session2); + GetInputsByTensorNameAndSetData(model1); + GetInputsByTensorNameAndSetData(model2); - ret = session1->RunGraph(); - if (ret != mindspore::lite::RET_OK) { - std::cerr << "Inference error " << ret << std::endl; - return ret; + auto inputs1 = model1->GetInputs(); + auto outputs1 = model1->GetOutputs(); + auto predict_ret = model1->Predict(inputs1, &outputs1); + if (predict_ret != mindspore::kSuccess) { + delete model1; + delete model2; + std::cerr << "Inference error " << predict_ret << std::endl; + return -1; } - ret = session2->RunGraph(); - if (ret != mindspore::lite::RET_OK) { - delete model; - delete session1; - delete session2; - std::cerr << "Inference error " << ret << std::endl; - return ret; + auto inputs2 = model2->GetInputs(); + auto outputs2 = model2->GetOutputs(); + predict_ret = model2->Predict(inputs2, &outputs2); + if (predict_ret != mindspore::kSuccess) { + delete model1; + delete model2; + std::cerr << "Inference error " << predict_ret << std::endl; + return -1; } // Get outputs data. // You can also get output through other methods, // and you can refer to GetOutputByTensorName() or GetOutputs(). - GetOutputsByNodeName(session1); - GetOutputsByNodeName(session2); + GetOutputsByNodeName(model1); + GetOutputsByNodeName(model2); - // Delete model buffer. - delete model; - // Delete session buffer. - delete session1; - delete session2; + // Delete model. + delete model1; + delete model2; return 0; } @@ -636,62 +649,56 @@ int RunCallback(const char *model_path) { std::cerr << "Read model file failed." << std::endl; return -1; } - // Load the .ms model. - auto model = mindspore::lite::Model::Import(model_buf, size); + + // Create and Build MindSpore model. + auto model = CreateAndBuildModel(model_buf, size); delete[](model_buf); if (model == nullptr) { - std::cerr << "Import model file failed." << std::endl; - return -1; - } - // Compile MindSpore Lite model. - auto session = CreateSessionAndCompileByModel(model); - if (session == nullptr) { delete model; - std::cerr << "Create session failed." << std::endl; + std::cerr << "Create model failed." << std::endl; return -1; } - // Note: when use model->Free(), the model can not be compiled again. - model->Free(); - // Set inputs data. // You can also get input through other methods, and you can refer to GetInputsAndSetData() - GetInputsByTensorNameAndSetData(session); + auto generate_input_ret = GetInputsByTensorNameAndSetData(model); + if (generate_input_ret != mindspore::kSuccess) { + delete model; + std::cerr << "Set input data error " << generate_input_ret << std::endl; + return -1; + } // Definition of callback function before forwarding operator. - auto before_call_back = [](const std::vector &before_inputs, - const std::vector &before_outputs, - const mindspore::CallBackParam &call_param) { - std::cout << "Before forwarding " << call_param.node_name << " " << call_param.node_type << std::endl; + auto before_call_back = [](const std::vector &before_inputs, + const std::vector &before_outputs, + const mindspore::MSCallBackParam &call_param) { + std::cout << "Before forwarding " << call_param.node_name_ << " " << call_param.node_type_ << std::endl; return true; }; // Definition of callback function after forwarding operator. - auto after_call_back = [](const std::vector &after_inputs, - const std::vector &after_outputs, - const mindspore::CallBackParam &call_param) { - std::cout << "After forwarding " << call_param.node_name << " " << call_param.node_type << std::endl; + auto after_call_back = [](const std::vector &after_inputs, + const std::vector &after_outputs, + const mindspore::MSCallBackParam &call_param) { + std::cout << "After forwarding " << call_param.node_name_ << " " << call_param.node_type_ << std::endl; return true; }; - session->BindThread(true); - auto ret = session->RunGraph(before_call_back, after_call_back); - if (ret != mindspore::lite::RET_OK) { + auto inputs = model->GetInputs(); + auto outputs = model->GetOutputs(); + auto predict_ret = model->Predict(inputs, &outputs, before_call_back, after_call_back); + if (predict_ret != mindspore::kSuccess) { delete model; - delete session; - std::cerr << "Inference error " << ret << std::endl; - return ret; + std::cerr << "Predict error " << predict_ret << std::endl; + return -1; } - session->BindThread(false); // Get outputs data. // You can also get output through other methods, // and you can refer to GetOutputByTensorName() or GetOutputs(). - GetOutputsByNodeName(session); + GetOutputsByNodeName(model); - // Delete model buffer. + // Delete model. delete model; - // Delete session buffer. - delete session; return 0; } @@ -699,16 +706,15 @@ int main(int argc, const char **argv) { if (argc < 3) { std::cerr << "Usage: ./runtime_cpp model_path Option" << std::endl; std::cerr << "Example: ./runtime_cpp ../model/mobilenetv2.ms 0" << std::endl; - std::cerr << "When your Option is 0, you will run MindSpore Lite inference." << std::endl; - std::cerr << "When your Option is 1, you will run MindSpore Lite inference with resize." << std::endl; - std::cerr << "When your Option is 2, you will run MindSpore Lite inference with CreateSession simplified API." - << std::endl; - std::cerr << "When your Option is 3, you will run MindSpore Lite inference with session parallel." << std::endl; - std::cerr << "When your Option is 4, you will run MindSpore Lite inference with shared memory pool." << std::endl; - std::cerr << "When your Option is 5, you will run MindSpore Lite inference with callback." << std::endl; + std::cerr << "When your Option is 0, you will run MindSpore Lite predict." << std::endl; + std::cerr << "When your Option is 1, you will run MindSpore Lite predict with resize." << std::endl; + std::cerr << "When your Option is 2, you will run MindSpore Lite predict with complicated API." << std::endl; + std::cerr << "When your Option is 3, you will run MindSpore Lite predict with model parallel." << std::endl; + std::cerr << "When your Option is 4, you will run MindSpore Lite predict with shared memory pool." << std::endl; + std::cerr << "When your Option is 5, you will run MindSpore Lite predict with callback." << std::endl; return -1; } - std::string version = mindspore::lite::Version(); + std::string version = mindspore::Version(); std::cout << "MindSpore Lite Version is " << version << std::endl; auto model_path = RealPath(argv[1]); if (model_path.empty()) { @@ -721,9 +727,9 @@ int main(int argc, const char **argv) { } else if (strcmp(flag, "1") == 0) { return RunResize(model_path.c_str()); } else if (strcmp(flag, "2") == 0) { - return RunCreateSessionSimplified(model_path.c_str()); + return RunCreateModelComplicated(model_path.c_str()); } else if (strcmp(flag, "3") == 0) { - return RunSessionParallel(model_path.c_str()); + return RunModelParallel(model_path.c_str()); } else if (strcmp(flag, "4") == 0) { return RunWithSharedMemoryPool(model_path.c_str()); } else if (strcmp(flag, "5") == 0) { diff --git a/mindspore/lite/examples/train_lenet_java/prepare_and_run.sh b/mindspore/lite/examples/train_lenet_java/prepare_and_run.sh index b34469175e8..66557812f01 100755 --- a/mindspore/lite/examples/train_lenet_java/prepare_and_run.sh +++ b/mindspore/lite/examples/train_lenet_java/prepare_and_run.sh @@ -75,6 +75,10 @@ LD_LIBRARY_PATH=${MSLITE_LINUX}/tools/converter/lib/:${MSLITE_LINUX}/tools/conve EXPORT=${EXPORT} LD_LIBRARY_PATH=${LD_LIBRARY_PATH} CONVERTER=${CONVERTER} ./prepare_model.sh $DOCKER || exit 1 cd ../ +if [ "$TARBALL" != "" ]; then + rm -rf build +fi + cd target || exit 1 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:../lib/ java -Djava.library.path=../lib/ -classpath .:./train_lenet_java.jar:../lib/mindspore-lite-java.jar com.mindspore.lite.train_lenet.Main ../model/lenet_tod.ms $MNIST_DATA_PATH 1 diff --git a/mindspore/lite/include/registry/kernel_interface.h b/mindspore/lite/include/registry/kernel_interface.h index 0988c3f2395..4ca4d05cb74 100644 --- a/mindspore/lite/include/registry/kernel_interface.h +++ b/mindspore/lite/include/registry/kernel_interface.h @@ -27,12 +27,6 @@ namespace mindspore { namespace kernel { -/// \brief CapabilityParam defined performance of op when running. -struct MS_API CapabilityParam { - float exec_time_; /**< op running time argument */ - float power_usage_; /**< op power waste argument */ -}; - /// \brief KernelInterface defined customized op's interface, such as infershape, and so on. class MS_API KernelInterface { public: @@ -50,18 +44,6 @@ class MS_API KernelInterface { const schema::Primitive *primitive) { return 0; } - - /// \brief Method to get performance of an op when running. - /// - /// \param[in] tensor_in Define the input tensors of op. - /// \param[in] primitive Define the attributes of op. - /// \param[in] param Define the contr of performance. - /// - /// \return STATUS as an error code of inferring, STATUS is defined in errorcode.h. - virtual int GetCapability(const std::vector &tensor_in, const schema::Primitive *primitive, - CapabilityParam *param) { - return 0; - } }; /// \brief KernelInterfaceCreator defined a functor to create KernelInterface. diff --git a/mindspore/lite/include/registry/register_kernel.h b/mindspore/lite/include/registry/register_kernel.h index 1c521b78352..21289bfd77f 100644 --- a/mindspore/lite/include/registry/register_kernel.h +++ b/mindspore/lite/include/registry/register_kernel.h @@ -29,26 +29,6 @@ namespace mindspore { namespace kernel { -/// \brief KernelDesc defined kernel's basic attribute. -struct MS_API KernelDesc { - TypeId data_type; /**< kernel data type argument */ - int type; /**< op type argument */ - std::string arch; /**< deviceType argument */ - std::string provider; /**< user identification argument */ - - bool operator<(const KernelDesc &dst) const { - if (provider != dst.provider) { - return provider < dst.provider; - } else if (arch != dst.arch) { - return arch < dst.arch; - } else if (data_type != dst.data_type) { - return data_type < dst.data_type; - } else { - return type < dst.type; - } - } -}; - /// \brief CreateKernel Defined a functor to create a kernel. /// /// \param[in] inputs Define input tensors of kernel. @@ -87,14 +67,6 @@ class MS_API RegisterKernel { /// \return STATUS as an error code of registering, STATUS is defined in errorcode.h. static int RegCustomKernel(const std::string &arch, const std::string &provider, TypeId data_type, const std::string &type, CreateKernel creator); - - /// \brief Static methon to get a kernel's create function. - /// - /// \param[in] desc Define kernel's basic attribute. - /// \param[in] primitive Define the attributes of op. - /// - /// \return Function pointer to create a kernel. - static CreateKernel GetCreator(const schema::Primitive *primitive, kernel::KernelDesc *desc); }; /// \brief KernelReg Defined registration class of kernel. diff --git a/mindspore/lite/micro/cmake/file_list.cmake b/mindspore/lite/micro/cmake/file_list.cmake index 4e52a61d853..86543f44cdc 100644 --- a/mindspore/lite/micro/cmake/file_list.cmake +++ b/mindspore/lite/micro/cmake/file_list.cmake @@ -139,6 +139,7 @@ set(LITE_SRC ${LITE_DIR}/src/registry/kernel_interface.cc ${LITE_DIR}/src/registry/kernel_interface_registry.cc ${LITE_DIR}/src/registry/register_kernel.cc + ${LITE_DIR}/src/registry/register_utils.cc ${LITE_DIR}/src/registry/register_kernel_impl.cc ${LITE_DIR}/src/lite_model.cc ${LITE_DIR}/src/ms_tensor.cc diff --git a/mindspore/lite/micro/coder/generator/component/weight_component.cc b/mindspore/lite/micro/coder/generator/component/weight_component.cc index ab8fb428bcf..6b377b540e7 100644 --- a/mindspore/lite/micro/coder/generator/component/weight_component.cc +++ b/mindspore/lite/micro/coder/generator/component/weight_component.cc @@ -158,5 +158,4 @@ void SaveDataToNet(const std::map &saved_weights, const s } net.close(); } - } // namespace mindspore::lite::micro diff --git a/mindspore/lite/micro/coder/graph.cc b/mindspore/lite/micro/coder/graph.cc index 1e7a9c7f21c..4044fc4eb3b 100644 --- a/mindspore/lite/micro/coder/graph.cc +++ b/mindspore/lite/micro/coder/graph.cc @@ -30,8 +30,11 @@ namespace mindspore::lite::micro { CoderGraph::~CoderGraph() { - model_->Free(); - delete model_; + if (model_ != nullptr) { + model_->Free(); + delete model_; + model_ = nullptr; + } for (auto &tensor : all_tensors_) { delete tensor; } diff --git a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.cc index 11abe860696..1aed5bb1e21 100644 --- a/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.cc +++ b/mindspore/lite/micro/coder/opcoders/cmsis-nn/int8/conv2d_int8_coder.cc @@ -147,15 +147,16 @@ void Conv2DInt8Coder::CheckSupportOptimize() { } int Conv2DInt8Coder::InitTmpBuffer() { + const size_t kPartial = 2; switch (opt_) { case Basic: buffer_size_ = - static_cast(2 * input_tensor_->Channel() * filter_tensor_->Width() * filter_tensor_->Height()) * + static_cast(kPartial * input_tensor_->Channel() * filter_tensor_->Width() * filter_tensor_->Height()) * sizeof(int16_t); break; case Convolve_1_x_n: buffer_size_ = - static_cast(2 * input_tensor_->Channel() * filter_tensor_->Width() * filter_tensor_->Height()) * + static_cast(kPartial * input_tensor_->Channel() * filter_tensor_->Width() * filter_tensor_->Height()) * sizeof(int16_t); break; case Convolve_1x1_fast: diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/addn_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/addn_fp32_coder.cc index b6e86dd4af5..79a52ac0d0a 100644 --- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/addn_fp32_coder.cc +++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/addn_fp32_coder.cc @@ -38,10 +38,8 @@ int AddNFP32Coder::DoCode(CoderContext *const context) { }); NNaclFp32Serializer code; code.CodeFunction("ElementAdd", input0, input1, output_tensor_, elements_num); - if (input_tensors_.size() > 2) { - for (size_t i = 2; i < input_tensors_.size(); ++i) { - code.CodeFunction("ElementAdd", input_tensors_.at(i), output_tensor_, elements_num); - } + for (size_t i = 2; i < input_tensors_.size(); ++i) { + code.CodeFunction("ElementAdd", input_tensors_.at(i), output_tensor_, elements_num); } context->AppendCode(code.str()); return RET_OK; diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc index 11725e88b71..55f0a6e5023 100644 --- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc +++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/batchnorm_fp32_coder.cc @@ -53,6 +53,8 @@ int BatchnormFP32Coder::DoCode(CoderContext *const context) { MS_CHECK_TRUE(input_tensors_.size() == DIMENSION_3D, "inputs size is not equal to three"); Tensor *mean_tensor = input_tensors_.at(1); Tensor *var_tensor = input_tensors_.at(kInputSize1); + MS_CHECK_PTR(mean_tensor); + MS_CHECK_PTR(var_tensor); Collect(context, { "nnacl/fp32/batchnorm.h", diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc index d179eb0b4d8..b2b689facae 100644 --- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc +++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/biasadd_fp32_coder.cc @@ -55,6 +55,7 @@ int BiasAddFP32Coder::DoCode(CoderContext *ctx) { arithmetic_parameter_->broadcasting_ = false; arithmetic_parameter_->ndim_ = dims.size(); arithmetic_parameter_->activation_type_ = 0; + MS_CHECK_TRUE(dims.size() <= DIMENSION_10D, "dims.size() must not be greater than 10!"); for (size_t i = 0; i < dims.size(); i++) { arithmetic_parameter_->in_shape0_[i] = dims[i]; } diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.cc index 782e1d961f0..3d974db3259 100644 --- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.cc +++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_depthwise_fp32_coder.cc @@ -23,7 +23,7 @@ namespace mindspore::lite::micro::nnacl { int ConvolutionDepthwiseFP32Coder::Prepare(CoderContext *const context) { - Conv2DBaseCoder::Init(); + MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "Conv2DBaseCoder::Init() failed!"); MS_CHECK_RET_CODE(InitWeightBias(), "dwconvolution do init weightbais failed"); conv_param_->thread_num_ = MSMIN(thread_num_, conv_param_->output_h_); return RET_OK; @@ -83,5 +83,4 @@ int ConvolutionDepthwiseFP32Coder::DoCode(CoderContext *const context) { context->AppendCode(code.str()); return RET_OK; } - } // namespace mindspore::lite::micro::nnacl diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.cc index b16b0b402b8..c1c223515db 100644 --- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.cc +++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/convolution_winograd_fp32_coder.cc @@ -146,8 +146,8 @@ int ConvolutionWinogradFP32Coder::InitWeightBias() { if (input_unit_ == DIMENSION_8D) { coef = 0.5f; } - CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, coef, output_unit_, kernel_unit_); - + ret = CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, coef, output_unit_, kernel_unit_); + MS_CHECK_RET_CODE(ret, "CookToomFilter failed!"); auto out_channel_size = static_cast(out_channel); auto weight_data = reinterpret_cast(filter_tensor_->MutableData()); ret = WinogradFilterTransform(weight_data, matrix_g, matrix_gt, oc_block); diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/full_connection_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/full_connection_fp32_coder.cc index ed623b01f32..522629049b9 100644 --- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/full_connection_fp32_coder.cc +++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/full_connection_fp32_coder.cc @@ -28,6 +28,7 @@ int FullConnectionFP32Coder::ReSize() { } params_->row_ = row; params_->col_ = output_tensor_->shape().back(); + MS_CHECK_TRUE(filter_tensor_->shape().size() >= DIMENSION_2D, "filter_tensor_->shape().size() < DIMENSION_2D"); params_->deep_ = filter_tensor_->shape().at(1); return MatMulFP32BaseCoder::ReSize(); } diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc index 9e049a31b03..25f044176c6 100644 --- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc +++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/gather_fp32_coder.cc @@ -30,7 +30,8 @@ int GatherFP32Coder::Prepare(CoderContext *const context) { return RET_OK; } int GatherFP32Coder::DoCode(CoderContext *context) { Tensor *input0 = input_tensors_.at(0); Tensor *input1 = input_tensors_.at(1); - + MS_CHECK_PTR(input0); + MS_CHECK_PTR(input1); // generate code .h .c Collect(context, { @@ -42,8 +43,9 @@ int GatherFP32Coder::DoCode(CoderContext *context) { NNaclFp32Serializer code; std::vector in_shape = input0->shape(); - int in_rank = in_shape.size(); + int in_rank = static_cast(in_shape.size()); int indices_element_size = input1->ElementsNum(); + MS_CHECK_PTR(parameter_); int axis = (reinterpret_cast(parameter_))->axis_; MS_CHECK_TRUE(static_cast(in_shape.size()) >= axis, "invalid axis in gather parameter"); const int limit = in_shape.at(axis); diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc index ce13ba8c52c..13cfc7ac7be 100644 --- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc +++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/matmul_fp32_base_coder.cc @@ -30,7 +30,9 @@ using mindspore::schema::PrimitiveType_MatMul; namespace mindspore::lite::micro::nnacl { int MatMulFP32BaseCoder::ReSize() { ResizeParameter(); + MS_CHECK_TRUE(params_->col_align_ != 0, "params_->col_align_ = 0"); thread_count_ = MSMIN(thread_num_, UP_DIV(params_->col_align_, col_tile_)); + MS_CHECK_TRUE(thread_count_ != 0, "thread_count_ = 0"); thread_stride_ = UP_DIV(UP_DIV(params_->col_align_, col_tile_), thread_count_); // can not call Malloc in DoCode,so move this runtime init to final resize if (!params_->a_const_) { diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc index fbc1adef9ac..c79bc6dad7f 100644 --- a/mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc +++ b/mindspore/lite/micro/coder/opcoders/nnacl/fp32/softmax_fp32_coder.cc @@ -24,7 +24,8 @@ using mindspore::schema::PrimitiveType_Softmax; namespace mindspore::lite::micro::nnacl { int SoftMaxFP32Coder::Prepare(CoderContext *const context) { - SoftmaxBaseCoder::Init(); + auto ret = SoftmaxBaseCoder::Init(); + MS_CHECK_RET_CODE(ret, "SoftmaxBaseCoder::Init() failed!"); // malloc tmp buffer int n_dim = softmax_param_->n_dim_; int32_t axis = softmax_param_->axis_; diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_int8_coder.cc index 076bbf6c492..4df09b2b9d5 100644 --- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_int8_coder.cc +++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/conv2d_int8_coder.cc @@ -165,7 +165,7 @@ int Conv2DINT8Coder::InitWeightBias(CoderContext *const context) { } int Conv2DINT8Coder::Prepare(CoderContext *const context) { - Conv2DBaseCoder::Init(); + MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "Conv2d base init failed."); CheckSupportOptimize(); MS_CHECK_RET_CODE(SetQuantParam(), "Set quant param failed!"); MS_CHECK_RET_CODE(InitWeightBias(context), "Init weight bias failed."); diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/convolution_depthwise_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/int8/convolution_depthwise_int8_coder.cc index ee7a7277f19..00bd0993fa6 100644 --- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/convolution_depthwise_int8_coder.cc +++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/convolution_depthwise_int8_coder.cc @@ -24,7 +24,7 @@ namespace mindspore::lite::micro { int ConvolutionDepthwiseINT8Coder::Prepare(CoderContext *const context) { - Conv2DBaseCoder::Init(); + MS_CHECK_RET_CODE(Conv2DBaseCoder::Init(), "Conv2d base init failed."); // init sliding window param MS_CHECK_RET_CODE(SetQuantParam(), "Set quant param failed."); MS_CHECK_RET_CODE(InitWeightBias(context), "dwconvolution do init weightbais failed"); diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.cc index fe11a943568..00974b29eaa 100644 --- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.cc +++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.cc @@ -69,7 +69,7 @@ int ReduceInt8Coder::CalculateQuantArgs() { QuantizeMultiplierSmallerThanOne(prod_multiplier, &qm->multiplier_, &shift); qm->left_shift_ = shift < 0 ? -shift : 0; qm->right_shift_ = shift > 0 ? shift : 0; - mean_multipliers_.push_back(qm); + prod_multipliers_.push_back(qm); } } diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.h b/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.h index 24fc4564168..bd9d05dfb94 100644 --- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.h +++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/reduce_int8_coder.h @@ -30,7 +30,21 @@ class ReduceInt8Coder final : public ReduceBaseCoder { const Model::Node *node, size_t node_index, Target target) : ReduceBaseCoder(in_tensors, out_tensors, node, node_index, target) {} - ~ReduceInt8Coder() override { begin_src_data_ = nullptr; } + ~ReduceInt8Coder() override { + begin_src_data_ = nullptr; + for (auto &arg : mean_multipliers_) { + delete arg; + arg = nullptr; + } + for (auto &arg : prod_multipliers_) { + delete arg; + arg = nullptr; + } + for (auto &arg : sum_square_multipliers_) { + delete arg; + arg = nullptr; + } + } int Prepare(CoderContext *const context) override; int DoCode(CoderContext *const context) override; diff --git a/mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.cc b/mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.cc index 49727fd4d62..764ebbc8cd5 100644 --- a/mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.cc +++ b/mindspore/lite/micro/coder/opcoders/nnacl/int8/softmax_int8_coder.cc @@ -29,7 +29,7 @@ using mindspore::schema::PrimitiveType_Softmax; namespace mindspore::lite::micro::nnacl { int SoftMaxInt8Coder::Prepare(CoderContext *const context) { - SoftmaxBaseCoder::Init(); + MS_CHECK_RET_CODE(SoftmaxBaseCoder::Init(), "Softmax base init failed."); std::vector in_quant_args = input_tensor_->quant_params(); quant_params_.in_quant_args_.scale_ = in_quant_args.at(0).scale; quant_params_.in_quant_args_.zp_ = -in_quant_args.at(0).zeroPoint; @@ -59,8 +59,7 @@ int SoftMaxInt8Coder::Prepare(CoderContext *const context) { sum_data_size_ = inner_size * sizeof(int); sum_data_ = static_cast(allocator_->Malloc(kNumberTypeInt32, sum_data_size_, kWorkspace)); MS_CHECK_PTR(sum_data_); - ReSize(); - return RET_OK; + return ReSize(); } int SoftMaxInt8Coder::DoCode(CoderContext *const context) { diff --git a/mindspore/lite/micro/coder/train.cc b/mindspore/lite/micro/coder/train.cc index 16f873e01c2..320efe1b66e 100644 --- a/mindspore/lite/micro/coder/train.cc +++ b/mindspore/lite/micro/coder/train.cc @@ -55,6 +55,10 @@ std::set FindInferenceOpcoders(OperatorCoder *edge) { } int Train::TransformGraphForTrain(CoderContext *context, const std::vector> &op_coders) { + if (context == nullptr) { + MS_LOG(INFO) << "input context invalid"; + return RET_ERROR; + } const std::array loss_types = {schema::PrimitiveType_SparseSoftmaxCrossEntropyWithLogits, schema::PrimitiveType_BinaryCrossEntropy, schema::PrimitiveType_SmoothL1Loss, diff --git a/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.c b/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.c index bee2c6e35e9..adb59ac25b8 100644 --- a/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.c +++ b/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.c @@ -20,11 +20,12 @@ extern void MatMulOptR4Int8Neon64(const int8_t *a, const int8_t *b, int *dst, in const int *input_sum, const int *bias); extern void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4, const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, - int *multiplier, int *left_shift, int *right_shift, int row, int col, int stride, - size_t peroc); + const int *multiplier, const int *left_shift, const int *right_shift, int row, int col, + int stride, size_t peroc); extern void MatmulInt8DpOpt(const int8_t *a, const int8_t *b, int8_t *dst, size_t row8, size_t col8, size_t deep4, - const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, int *multiplier, - int *left_shift, int *right_shift, size_t stride, size_t peroc, int *filter_zp); + const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, + const int *multiplier, const int *left_shift, const int *right_shift, size_t stride, + size_t peroc, const int *filter_zp); #ifdef ENABLE_ARM64 void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16, @@ -33,16 +34,17 @@ void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, i } void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, - size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, - int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, - int32_t maxi, size_t per_channel) { + size_t stride, const int32_t *input_sum, const int32_t *bias, + const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier, + int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel) { return MatmulInt8DpNeon64(a, b, dst, UP_ROUND(row, C8NUM), UP_ROUND(col, C8NUM), deep_4, input_sum, bias, mini, maxi, output_zp, multiplier, left_shift, right_shift, row, col, stride, per_channel); } void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, - size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, - int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, - int32_t maxi, size_t per_channel, int32_t *filter_zp) { + size_t stride, const int32_t *input_sum, const int32_t *bias, + const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier, + int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel, + const int32_t *filter_zp) { return MatmulInt8DpOpt(a, b, dst, row, col, deep_4, input_sum, bias, mini, maxi, output_zp, multiplier, left_shift, right_shift, stride, per_channel, filter_zp); } diff --git a/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.h b/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.h index 40e82acbaba..bc76939aa85 100644 --- a/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.h +++ b/mindspore/lite/micro/coder/wrapper/base/optimize_handler_wrapper.h @@ -29,13 +29,14 @@ void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, i const int *input_sum, const int *bias); void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, - size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, - int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, - int32_t maxi, size_t per_channel); + size_t stride, const int32_t *input_sum, const int32_t *bias, + const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier, + int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel); void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, - size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, - int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, - int32_t maxi, size_t per_channel, int32_t *filter_zp); + size_t stride, const int32_t *input_sum, const int32_t *bias, + const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier, + int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel, + const int32_t *filter_zp); #endif #endif // MINDSPORE_LITE_MICRO_CODER_OPERATOR_LIBRARY_OPTIMIZE_HANDLER_WRAPPER_H_ diff --git a/mindspore/lite/micro/coder/wrapper/int8/conv1x1_init_int8_wrapper.c b/mindspore/lite/micro/coder/wrapper/int8/conv1x1_init_int8_wrapper.c index 959d03a8a34..f4ffc047bf4 100644 --- a/mindspore/lite/micro/coder/wrapper/int8/conv1x1_init_int8_wrapper.c +++ b/mindspore/lite/micro/coder/wrapper/int8/conv1x1_init_int8_wrapper.c @@ -35,7 +35,7 @@ int Conv1x1Init(int8_t *src_weight, int32_t *src_bias, int32_t *filter_zps, int3 memset(packed_weight_, 0, size); RowMajor2Row2x16MajorInt8(src_weight, packed_weight_, output_channel, input_channel); /* bias */ - size = UP_ROUND(output_channel, C2NUM); + size = (size_t)UP_ROUND(output_channel, C2NUM); int32_t *bias_data_ = (int32_t *)malloc(size * sizeof(int32_t)); if (bias_data_ == NULL) { free(packed_weight_); @@ -43,7 +43,7 @@ int Conv1x1Init(int8_t *src_weight, int32_t *src_bias, int32_t *filter_zps, int3 } memset(bias_data_, 0, size * sizeof(int32_t)); if (src_bias != NULL) { - memcpy(bias_data_, src_bias, output_channel * sizeof(int32_t)); + memcpy(bias_data_, src_bias, (size_t)output_channel * sizeof(int32_t)); } #else /* InitWeightBias */ @@ -65,6 +65,7 @@ int Conv1x1Init(int8_t *src_weight, int32_t *src_bias, int32_t *filter_zps, int3 int32_t *bias_data_ = (int32_t *)malloc(size * sizeof(int32_t)); if (bias_data_ == NULL) { free(packed_weight_); + packed_weight_ = NULL; return NNACL_ERR; } memset(bias_data_, 0, size * sizeof(int32_t)); diff --git a/mindspore/lite/minddata/example/CMakeLists.txt b/mindspore/lite/minddata/example/CMakeLists.txt index 70b9129e45b..f4403ea5d05 100644 --- a/mindspore/lite/minddata/example/CMakeLists.txt +++ b/mindspore/lite/minddata/example/CMakeLists.txt @@ -4,8 +4,8 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wall -fPIC -std=c++17") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-sign-compare") -set(MS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mindspore-lite-1.2.0-linux-x64/runtime") -set(LITECV_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mindspore-lite-1.2.0-linux-x64/runtime/include/dataset") +set(MS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mindspore-lite-1.3.0-linux-x64/runtime") +set(LITECV_DIR "${CMAKE_CURRENT_SOURCE_DIR}/mindspore-lite-1.3.0-linux-x64/runtime/include/dataset") include_directories(${MS_DIR} ${LITECV_DIR}) diff --git a/mindspore/lite/minddata/example/testlitecv.cpp b/mindspore/lite/minddata/example/testlitecv.cpp index bb67161485a..cd62148ea1d 100644 --- a/mindspore/lite/minddata/example/testlitecv.cpp +++ b/mindspore/lite/minddata/example/testlitecv.cpp @@ -58,16 +58,26 @@ int main(int argc, char **argv) { auto executor = Execute(decode); executor(image, &image); - LiteMat lite_mat_rgb(image.Shape()[1], image.Shape()[0], image.Shape()[2], const_cast(image.Data().get()), - LDataType::UINT8); + constexpr int32_t image_h = 0; + constexpr int32_t image_w = 1; + constexpr int32_t image_c = 2; + LiteMat lite_mat_rgb(image.Shape()[image_w], image.Shape()[image_h], image.Shape()[image_c], + const_cast(image.Data().get()), LDataType::UINT8); std::cout << "lite_mat_rgb: height=" << lite_mat_rgb.height_ << ", width=" << lite_mat_rgb.width_ << std::endl; - LiteMat lite_mat_resize; - ResizeBilinear(lite_mat_rgb, lite_mat_resize, 256, 256); + LiteMat lite_mat_resize; + constexpr target_size = 256; + ResizeBilinear(lite_mat_rgb, lite_mat_resize, target_size, target_size); std::cout << "lite_mat_resize: height=" << lite_mat_resize.height_ << ", width=" << lite_mat_resize.width_ << std::endl; LiteMat lite_mat_pad; - Pad(lite_mat_resize, lite_mat_pad, 30, 30, 10, 10, PaddBorderType::PADD_BORDER_CONSTANT, 255, 255, 255); + constexpr int32_t pad_top = 30; + constexpr int32_t pad_bottom = 30; + constexpr int32_t pad_left = 10; + constexpr int32_t pad_right = 10; + constexpr int32_t pad_color = 255; + Pad(lite_mat_resize, lite_mat_pad, pad_top, pad_bottom, pad_left, pad_right, PaddBorderType::PADD_BORDER_CONSTANT, + pad_color, pad_color, pad_color); std::cout << "lite_mat_pad: height=" << lite_mat_pad.height_ << ", width=" << lite_mat_pad.width_ << std::endl; } diff --git a/mindspore/lite/minddata/wrapper/MDToDApi.cc b/mindspore/lite/minddata/wrapper/MDToDApi.cc index b05007dfd50..cca50a99a64 100644 --- a/mindspore/lite/minddata/wrapper/MDToDApi.cc +++ b/mindspore/lite/minddata/wrapper/MDToDApi.cc @@ -269,10 +269,6 @@ extern "C" int MDToDApi_GetNext(MDToDApi *pMDToDApi, MDToDResult_t *results) { MS_LOG(INFO) << "Start GetNext [1]" << pMDToDApi; // get next row for dataset std::unordered_map> row; - if (pMDToDApi->_iter == nullptr) { - MS_LOG(ERROR) << "GetNext called with no iteratoe. abort"; - return -1; - } // create Execute functions, this replaces Map in Pipeline bool ret = pMDToDApi->_iter->GetNextRow(&row); diff --git a/mindspore/lite/minddata/wrapper/album_op_android.cc b/mindspore/lite/minddata/wrapper/album_op_android.cc index 48d040a96b5..472ce0a1305 100644 --- a/mindspore/lite/minddata/wrapper/album_op_android.cc +++ b/mindspore/lite/minddata/wrapper/album_op_android.cc @@ -177,7 +177,7 @@ bool AlbumOp::IsReadColumn(const std::string &column_name) { return false; } -Status AlbumOp::LoadImageTensor(const std::string &image_file_path, uint32_t col_num, TensorPtr *tensor) { +Status AlbumOp::LoadImageTensor(const std::string &image_file_path, int32_t col_num, TensorPtr *tensor) { TensorPtr image; TensorPtr rotate_tensor; std::ifstream fs; @@ -257,7 +257,7 @@ int AlbumOp::GetOrientation(const std::string &folder_path) { return code; } -Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { +Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) { std::vector data = json_obj.get>(); MS_LOG(INFO) << "String array label found: " << data << "."; @@ -265,7 +265,7 @@ Status AlbumOp::LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t c return Status::OK(); } -Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { +Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) { std::string data = json_obj; // now we iterate over the elements in json @@ -275,7 +275,7 @@ Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_nu return Status::OK(); } -Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { +Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) { // consider templating this function to handle all ints if (data_schema_->column(col_num).type() == DataType::DE_INT64) { std::vector data; @@ -302,7 +302,7 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_ return Status::OK(); } -Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { +Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) { // consider templating this function to handle all ints if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) { std::vector data; @@ -329,7 +329,7 @@ Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t co return Status::OK(); } -Status AlbumOp::LoadIDTensor(const std::string &file, uint32_t col_num, TensorPtr *tensor) { +Status AlbumOp::LoadIDTensor(const std::string &file, int32_t col_num, TensorPtr *tensor) { if (data_schema_->column(col_num).type() == DataType::DE_STRING) { RETURN_IF_NOT_OK(Tensor::CreateScalar(file, tensor)); return Status::OK(); @@ -341,7 +341,7 @@ Status AlbumOp::LoadIDTensor(const std::string &file, uint32_t col_num, TensorPt return Status::OK(); } -Status AlbumOp::LoadEmptyTensor(uint32_t col_num, TensorPtr *tensor) { +Status AlbumOp::LoadEmptyTensor(int32_t col_num, TensorPtr *tensor) { // hack to get the file name without extension, the 1 is to get rid of the backslash character RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->column(col_num).type(), tensor)); return Status::OK(); @@ -351,7 +351,7 @@ Status AlbumOp::LoadEmptyTensor(uint32_t col_num, TensorPtr *tensor) { // So we actually have to check what type we want to fill the tensor with. // Float64 doesn't work with reinterpret cast here. Otherwise we limit the float in the schema to // only be float32, seems like a weird limitation to impose -Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { +Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) { if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) { double data = json_obj; MS_LOG(INFO) << "double found: " << json_obj << "."; @@ -365,7 +365,7 @@ Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num } // Loads a tensor with int value, we have to cast the value to type specified in the schema. -Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor) { +Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) { if (data_schema_->column(col_num).type() == DataType::DE_INT64) { int64_t data = json_obj; MS_LOG(INFO) << "int64 found: " << json_obj << "."; diff --git a/mindspore/lite/minddata/wrapper/album_op_android.h b/mindspore/lite/minddata/wrapper/album_op_android.h index 10d74d073ca..226ba66c9a4 100644 --- a/mindspore/lite/minddata/wrapper/album_op_android.h +++ b/mindspore/lite/minddata/wrapper/album_op_android.h @@ -93,62 +93,62 @@ class AlbumOp { /// \param[in] col_num Column num in schema /// \param[in,out] Tensor to push to /// \return Status The error code returned - Status LoadImageTensor(const std::string &image_file, uint32_t col_num, TensorPtr *tensor); + Status LoadImageTensor(const std::string &image_file, int32_t col_num, TensorPtr *tensor); /// \brief Load vector of ints to tensor, append tensor to tensor /// \param[in] json_obj Json object containing multi-dimensional label /// \param[in] col_num Column num in schema /// \param[in,out] Tensor to push to /// \return Status The error code returned - Status LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); + Status LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor); /// \brief Load vector of floatss to tensor, append tensor to tensor /// \param[in] json_obj Json object containing array data /// \param[in] col_num Column num in schema /// \param[in,out] Tensor to push to /// \return Status The error code returned - Status LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); + Status LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor); /// \brief Load string array into a tensor, append tensor to tensor /// \param[in] json_obj Json object containing string tensor /// \param[in] col_num Column num in schema /// \param[in,out] Tensor to push to /// \return Status The error code returned - Status LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); + Status LoadStringArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor); /// \brief Load string into a tensor, append tensor to tensor /// \param[in] json_obj Json object containing string tensor /// \param[in] col_num Column num in schema /// \param[in,out] Tensor to push to /// \return Status The error code returned - Status LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); + Status LoadStringTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor); /// \brief Load float value to tensor /// \param[in] json_obj Json object containing float /// \param[in] col_num Column num in schema /// \param[in,out] Tensor to push to /// \return Status The error code returned - Status LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); + Status LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor); /// \brief Load int value to tensor /// \param[in] json_obj Json object containing int /// \param[in] col_num Column num in schema /// \param[in,out] Tensor to push to /// \return Status The error code returned - Status LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorPtr *tensor); + Status LoadIntTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor); /// \brief Load empty tensor to tensor /// \param[in] col_num Column num in schema /// \param[in,out] Tensor to push to /// \return Status The error code returned - Status LoadEmptyTensor(uint32_t col_num, TensorPtr *tensor); + Status LoadEmptyTensor(int32_t col_num, TensorPtr *tensor); /// \brief Load id from file name to tensor /// \param[in] file The file name to get ID from /// \param[in] col_num Column num in schema /// \param[in,out] Tensor to push to /// \return Status The error code returned - Status LoadIDTensor(const std::string &file, uint32_t col_num, TensorPtr *tensor); + Status LoadIDTensor(const std::string &file, int32_t col_num, TensorPtr *tensor); /// \brief Load a tensor according to a json file /// \param[in] row_id_type row_id - id for this tensor row diff --git a/mindspore/lite/schema/ops.fbs b/mindspore/lite/schema/ops.fbs index ded169b7171..e1721611ed2 100644 --- a/mindspore/lite/schema/ops.fbs +++ b/mindspore/lite/schema/ops.fbs @@ -220,6 +220,7 @@ union PrimitiveType { Affine, Attention, LSTMGrad, + ScatterNdUpdate, } table Abs { @@ -1212,3 +1213,6 @@ table Affine { table Attention { } + +table ScatterNdUpdate { +} diff --git a/mindspore/lite/src/CMakeLists.txt b/mindspore/lite/src/CMakeLists.txt index 571714c701b..49e33521311 100644 --- a/mindspore/lite/src/CMakeLists.txt +++ b/mindspore/lite/src/CMakeLists.txt @@ -76,9 +76,9 @@ set(LITE_SRC ${CMAKE_CURRENT_SOURCE_DIR}/delegate/delegate.cc ${CMAKE_CURRENT_SOURCE_DIR}/runtime/inner_allocator.cc ${CMAKE_CURRENT_SOURCE_DIR}/runtime/infer_manager.cc + ${CMAKE_CURRENT_SOURCE_DIR}/runtime/runtime_pass.cc ${CMAKE_CURRENT_SOURCE_DIR}/tensor.cc ${CMAKE_CURRENT_SOURCE_DIR}/ms_tensor.cc - ${CMAKE_CURRENT_SOURCE_DIR}/tensorlist.cc ${CMAKE_CURRENT_SOURCE_DIR}/executor.cc ${CMAKE_CURRENT_SOURCE_DIR}/inner_context.cc ${CMAKE_CURRENT_SOURCE_DIR}/lite_model.cc @@ -87,14 +87,30 @@ set(LITE_SRC ${CMAKE_CURRENT_SOURCE_DIR}/lite_kernel.cc ${CMAKE_CURRENT_SOURCE_DIR}/lite_kernel_util.cc ${CMAKE_CURRENT_SOURCE_DIR}/sub_graph_kernel.cc - ${CMAKE_CURRENT_SOURCE_DIR}/sub_graph_split.cc ${CMAKE_CURRENT_SOURCE_DIR}/scheduler.cc ${CMAKE_CURRENT_SOURCE_DIR}/lite_session.cc ${CMAKE_CURRENT_SOURCE_DIR}/errorcode.cc ${CMAKE_CURRENT_SOURCE_DIR}/weight_decoder.cc - ${CMAKE_CURRENT_SOURCE_DIR}/huffman_decode.cc ${CMAKE_CURRENT_SOURCE_DIR}/cpu_info.cc ) +if(MSLITE_CONTROL_TENSORLIST) + set(LITE_SRC + ${LITE_SRC} + ${CMAKE_CURRENT_SOURCE_DIR}/tensorlist.cc + ) +endif() +if(MSLITE_HUFFMAN_DECODE) + set(LITE_SRC + ${LITE_SRC} + ${CMAKE_CURRENT_SOURCE_DIR}/huffman_decode.cc + ) +endif() +if(MSLITE_AUTO_PARALLEL) + set(LITE_SRC + ${LITE_SRC} + ${CMAKE_CURRENT_SOURCE_DIR}/sub_graph_split.cc + ) +endif() file(GLOB KERNEL_REG_SRC ${CMAKE_CURRENT_SOURCE_DIR}/registry/*.cc) set(LITE_SRC ${LITE_SRC} ${KERNEL_REG_SRC}) @@ -133,6 +149,7 @@ set(TRAIN_SRC ${CMAKE_CURRENT_SOURCE_DIR}/train/accuracy_monitor.cc ${CMAKE_CURRENT_SOURCE_DIR}/train/classification_train_accuracy_monitor.cc ${CMAKE_CURRENT_SOURCE_DIR}/train/train_export.cc + ${CMAKE_CURRENT_SOURCE_DIR}/train/opt_allocator.cc ${CMAKE_CURRENT_SOURCE_DIR}/../tools/common/storage.cc ) if(ENABLE_V0) diff --git a/mindspore/lite/src/common/dynamic_library_loader.cc b/mindspore/lite/src/common/dynamic_library_loader.cc index de180f221d3..d27705dfbc2 100644 --- a/mindspore/lite/src/common/dynamic_library_loader.cc +++ b/mindspore/lite/src/common/dynamic_library_loader.cc @@ -28,7 +28,7 @@ namespace mindspore { namespace lite { -int DynamicLibraryLoader::Open(std::string lib_path) { +int DynamicLibraryLoader::Open(const std::string &lib_path) { if (handler_ != nullptr) { return RET_ERROR; } @@ -46,7 +46,7 @@ int DynamicLibraryLoader::Open(std::string lib_path) { return RET_OK; } -void *DynamicLibraryLoader::GetFunc(std::string func_name) { +void *DynamicLibraryLoader::GetFunc(const std::string &func_name) { #ifndef _WIN32 return dlsym(handler_, func_name.c_str()); #else diff --git a/mindspore/lite/src/common/dynamic_library_loader.h b/mindspore/lite/src/common/dynamic_library_loader.h index 2d07dff0fb6..d5771df81f7 100644 --- a/mindspore/lite/src/common/dynamic_library_loader.h +++ b/mindspore/lite/src/common/dynamic_library_loader.h @@ -25,8 +25,8 @@ class DynamicLibraryLoader { public: DynamicLibraryLoader() = default; ~DynamicLibraryLoader(); - int Open(std::string lib_path); - void *GetFunc(std::string func_name); + int Open(const std::string &lib_path); + void *GetFunc(const std::string &func_name); int Close(); private: diff --git a/mindspore/lite/src/common/log_adapter.h b/mindspore/lite/src/common/log_adapter.h index 4c773102f18..39c6b9fbefb 100644 --- a/mindspore/lite/src/common/log_adapter.h +++ b/mindspore/lite/src/common/log_adapter.h @@ -16,6 +16,20 @@ #ifndef MINDSPORE_LITE_SRC_COMMON_LOG_ADAPTER_H_ #define MINDSPORE_LITE_SRC_COMMON_LOG_ADAPTER_H_ +namespace mindspore { +const char *const unsupport_string_tensor_log = + "This mindspore-lite library does not support string tensors. Set environment variable MSLITE_STRING_KERNEL to on to " + "recompile it."; +const char *const unsupport_control_tensorlist_log = + "This mindspore-lite library does not support control and tensorlist op. Set environment variable " + "MSLITE_CONTROL_TENSORLIST to on to recompile it."; +const char *const unsupport_auto_parallel_log = + "The mindspore-lite library does not support auto parallel. Set environment variable MSLITE_AUTO_PARALLEL to on to " + "recompile it."; +const char *const unsupport_huffman_decode_log = + "The mindspore-lite library does not support huffman decode. Set environment variable MSLITE_HUFFMAN_DECODE to on to " + "recompile it."; +} // namespace mindspore #ifdef USE_GLOG #include "utils/log_adapter.h" #else diff --git a/mindspore/lite/src/common/string_util.cc b/mindspore/lite/src/common/string_util.cc index 23a781d2d77..a890c7fd506 100644 --- a/mindspore/lite/src/common/string_util.cc +++ b/mindspore/lite/src/common/string_util.cc @@ -20,6 +20,7 @@ namespace mindspore { namespace lite { +#ifdef ENABLE_STRING_KERNEL std::vector ParseTensorBuffer(Tensor *tensor) { if (tensor == nullptr) { MS_LOG(ERROR) << "tensor is nullptr."; @@ -52,10 +53,10 @@ int WriteStringsToTensor(Tensor *tensor, const std::vector &string_b MS_LOG(ERROR) << "tensor is nullptr."; return RET_ERROR; } - int32_t num = string_buffer.size(); + size_t num = string_buffer.size(); std::vector offset(num + 1); offset[0] = 4 * (num + 2); - for (int i = 0; i < num; i++) { + for (size_t i = 0; i < num; i++) { offset[i + 1] = offset[i] + string_buffer[i].len; } std::vector shape = {offset[num]}; @@ -71,10 +72,10 @@ int WriteStringsToTensor(Tensor *tensor, const std::vector &string_b char *string_data = reinterpret_cast(data); string_info[0] = num; - for (int i = 0; i <= num; i++) { + for (size_t i = 0; i <= num; i++) { string_info[i + 1] = offset[i]; } - for (int i = 0; i < num; i++) { + for (size_t i = 0; i < num; i++) { memcpy(string_data + offset[i], string_buffer[i].data, string_buffer[i].len); } return RET_OK; @@ -85,11 +86,11 @@ int WriteSeperatedStringsToTensor(Tensor *tensor, const std::vector offset(num + 1); offset[0] = 4 * (num + 2); std::vector len(num); - for (int i = 0; i < num; i++) { + for (size_t i = 0; i < num; i++) { len[i] = 0; for (int j = 0; j < static_cast(string_buffer[i].size()); j++) { len[i] += string_buffer[i][j].len; @@ -109,10 +110,10 @@ int WriteSeperatedStringsToTensor(Tensor *tensor, const std::vector(data); string_info[0] = num; - for (int i = 0; i <= num; i++) { + for (size_t i = 0; i <= num; i++) { string_info[i + 1] = offset[i]; } - for (int i = 0; i < num; i++) { + for (size_t i = 0; i < num; i++) { auto *dst = string_data + offset[i]; for (auto string_part : string_buffer[i]) { memcpy(dst, string_part.data, string_part.len); @@ -132,32 +133,6 @@ int GetStringCount(Tensor *tensor) { return GetStringCount(tensor->MutableData()); } -int StringsToMSTensor(const std::vector &inputs, tensor::MSTensor *tensor) { - if (tensor == nullptr) { - return RET_PARAM_INVALID; - } - std::vector all_pack; - for (auto &input : inputs) { - StringPack pack = {static_cast(input.length()), input.data()}; - all_pack.push_back(pack); - } - return WriteStringsToTensor(static_cast(tensor), all_pack); -} - -std::vector MSTensorToStrings(const tensor::MSTensor *tensor) { - if (tensor == nullptr) { - return {""}; - } - const void *ptr = static_cast(tensor)->data_c(); - std::vector all_pack = ParseStringBuffer(ptr); - std::vector result(all_pack.size()); - std::transform(all_pack.begin(), all_pack.end(), result.begin(), [](StringPack &pack) { - std::string str(pack.data, pack.len); - return str; - }); - return result; -} - // Some primes between 2^63 and 2^64 namespace { static const uint64_t k0 = 0xc3a5c85c97cb3127ULL; @@ -302,5 +277,41 @@ uint64_t StringHash64(const char *s, size_t len) { return HashLen16(HashLen16(v.first, w.first, mul) + ShiftMix(y) * k0 + z, HashLen16(v.second, w.second, mul) + x, mul); } +#endif +int StringsToMSTensor(const std::vector &inputs, tensor::MSTensor *tensor) { +#ifdef ENABLE_STRING_KERNEL + if (tensor == nullptr) { + return RET_PARAM_INVALID; + } + std::vector all_pack; + for (auto &input : inputs) { + StringPack pack = {static_cast(input.length()), input.data()}; + all_pack.push_back(pack); + } + return WriteStringsToTensor(static_cast(tensor), all_pack); +#else + MS_LOG(ERROR) << unsupport_string_tensor_log; + return RET_ERROR; +#endif +} + +std::vector MSTensorToStrings(const tensor::MSTensor *tensor) { +#ifdef ENABLE_STRING_KERNEL + if (tensor == nullptr) { + return {""}; + } + const void *ptr = static_cast(tensor)->data_c(); + std::vector all_pack = ParseStringBuffer(ptr); + std::vector result(all_pack.size()); + std::transform(all_pack.begin(), all_pack.end(), result.begin(), [](StringPack &pack) { + std::string str(pack.data, pack.len); + return str; + }); + return result; +#else + MS_LOG(ERROR) << unsupport_string_tensor_log; + return {""}; +#endif +} } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/src/common/string_util.h b/mindspore/lite/src/common/string_util.h index 8811ff00e2a..52ea90ca23f 100644 --- a/mindspore/lite/src/common/string_util.h +++ b/mindspore/lite/src/common/string_util.h @@ -16,7 +16,6 @@ #ifndef MINDSPORE_LITE_SRC_COMMON_STRING_UTIL_H_ #define MINDSPORE_LITE_SRC_COMMON_STRING_UTIL_H_ - #include #include #include @@ -26,6 +25,7 @@ #include "include/errorcode.h" #include "include/lite_utils.h" +#ifdef ENABLE_STRING_KERNEL namespace mindspore { namespace lite { typedef struct StringPack { @@ -47,9 +47,8 @@ int WriteSeperatedStringsToTensor(Tensor *tensor, const std::vector *tensors_in) { if (i == nullptr) { continue; } +#ifdef ENABLE_CONTROL_TENSORLIST if (i->data_type_ == kObjectTypeTensorType) { TensorListC *tensorListC = reinterpret_cast(i); FreeTensorListC(tensorListC); tensorListC = nullptr; } else { +#endif free(i); i = nullptr; +#ifdef ENABLE_CONTROL_TENSORLIST } +#endif } tensors_in->clear(); } -void FreeTensorListC(TensorListC *tensorlist_c) { - MS_ASSERT(tensorlist_c != nullptr); - if (tensorlist_c->tensors_ != nullptr) { - free(tensorlist_c->tensors_); - tensorlist_c->tensors_ = nullptr; - } - free(tensorlist_c); -} - int Tensor2TensorC(const Tensor *src, TensorC *dst) { dst->is_ready_ = src->IsReady(); dst->format_ = src->format(); @@ -115,6 +110,16 @@ void TensorC2Tensor(const TensorC *src, Tensor *dst) { dst->set_shape(std::vector(src->shape_, src->shape_ + src->shape_size_)); } +#ifdef ENABLE_CONTROL_TENSORLIST +void FreeTensorListC(TensorListC *tensorlist_c) { + MS_ASSERT(tensorlist_c != nullptr); + if (tensorlist_c->tensors_ != nullptr) { + free(tensorlist_c->tensors_); + tensorlist_c->tensors_ = nullptr; + } + free(tensorlist_c); +} + int TensorList2TensorListC(TensorList *src, TensorListC *dst) { MS_ASSERT(src != nullptr); MS_ASSERT(dst != nullptr); @@ -172,21 +177,23 @@ int TensorListC2TensorList(const TensorListC *src, TensorList *dst) { return RET_OK; } -int GenerateMergeSwitchOutTensorC(const std::vector &inputs, const std::vector &outputs, +int GenerateMergeSwitchOutTensorC(const std::vector &inputs, int outputs_size, std::vector *out_tensor_c) { MS_ASSERT(out_tensor_c != nullptr); int ret = RET_OK; - for (size_t i = 0; i < outputs.size(); i++) { + for (int i = 0; i < outputs_size; i++) { out_tensor_c->push_back(nullptr); } return ret; } +#endif int GenerateOutTensorC(const OpParameter *const parameter, const std::vector &inputs, const std::vector &outputs, std::vector *out_tensor_c) { MS_ASSERT(out_tensor_c != nullptr); MS_ASSERT(parameter != nullptr); int ret = RET_OK; +#ifdef ENABLE_CONTROL_TENSORLIST if (parameter->type_ == mindspore::schema::PrimitiveType_TensorListFromTensor || parameter->type_ == mindspore::schema::PrimitiveType_TensorListReserve || parameter->type_ == mindspore::schema::PrimitiveType_TensorListSetItem) { @@ -199,10 +206,22 @@ int GenerateOutTensorC(const OpParameter *const parameter, const std::vectorpush_back(reinterpret_cast(tensor_list_c)); } else if (parameter->type_ == mindspore::schema::PrimitiveType_Merge || parameter->type_ == mindspore::schema::PrimitiveType_Switch) { - ret = GenerateMergeSwitchOutTensorC(inputs, outputs, out_tensor_c); + ret = GenerateMergeSwitchOutTensorC(inputs, static_cast(outputs.size()), out_tensor_c); } else { ret = OutputTensor2TensorC(outputs, out_tensor_c); } +#else + if (parameter->type_ == mindspore::schema::PrimitiveType_TensorListFromTensor || + parameter->type_ == mindspore::schema::PrimitiveType_TensorListReserve || + parameter->type_ == mindspore::schema::PrimitiveType_TensorListSetItem || + parameter->type_ == mindspore::schema::PrimitiveType_Merge || + parameter->type_ == mindspore::schema::PrimitiveType_Switch) { + MS_LOG(ERROR) << unsupport_control_tensorlist_log; + return RET_ERROR; + } else { + ret = OutputTensor2TensorC(outputs, out_tensor_c); + } +#endif return ret; } @@ -212,6 +231,7 @@ int GenerateInTensorC(const OpParameter *const parameter, const std::vectordata_type() == kObjectTypeTensorType) { +#ifdef ENABLE_CONTROL_TENSORLIST // Tensor ->TensorList -> TensorListC -> TensorC auto *tensor_list = reinterpret_cast(input); auto *tensor_list_c = reinterpret_cast(malloc(sizeof(TensorListC))); @@ -222,10 +242,15 @@ int GenerateInTensorC(const OpParameter *const parameter, const std::vectortensors_); free(tensor_list_c); return NNACL_ERR; } in_tensor_c->push_back(reinterpret_cast(tensor_list_c)); +#else + MS_LOG(ERROR) << unsupport_control_tensorlist_log; + return RET_NOT_SUPPORT; +#endif } else { // Tensor -> TensorC auto *tensor_c = reinterpret_cast(malloc(sizeof(TensorC))); diff --git a/mindspore/lite/src/common/tensor_util.h b/mindspore/lite/src/common/tensor_util.h index 46c63a2044c..07c3996b693 100644 --- a/mindspore/lite/src/common/tensor_util.h +++ b/mindspore/lite/src/common/tensor_util.h @@ -30,13 +30,15 @@ namespace lite { int InputTensor2TensorC(const std::vector &tensors_in, std::vector *tensors_out); int OutputTensor2TensorC(const std::vector &tensors_in, std::vector *tensors_out); void FreeAllTensorC(std::vector *tensors_in); -void FreeTensorListC(TensorListC *tensorListC); int Tensor2TensorC(const Tensor *src, TensorC *dst); void TensorC2Tensor(const TensorC *src, Tensor *dst); +#ifdef ENABLE_CONTROL_TENSORLIST +void FreeTensorListC(TensorListC *tensorListC); int TensorList2TensorListC(TensorList *src, TensorListC *dst); int TensorListC2TensorList(const TensorListC *src, TensorList *dst); -int GenerateMergeSwitchOutTensorC(const std::vector &inputs, const std::vector &outputs, +int GenerateMergeSwitchOutTensorC(const std::vector &inputs, int output_size, std::vector *out_tensor_c); +#endif int GenerateInTensorC(const OpParameter *const parameter, const std::vector &inputs, const std::vector &outputs, std::vector *in_tensor_c); int GenerateOutTensorC(const OpParameter *const parameter, const std::vector &inputs, diff --git a/mindspore/lite/src/common/utils.cc b/mindspore/lite/src/common/utils.cc index c7baee91eb8..6f3d3e11468 100644 --- a/mindspore/lite/src/common/utils.cc +++ b/mindspore/lite/src/common/utils.cc @@ -26,26 +26,6 @@ namespace mindspore { namespace lite { -std::vector StringSplit(std::string str, const std::string &pattern) { - std::vector result; - if (str.empty()) { - return result; - } - std::string::size_type pos; - str += pattern; - auto size = str.size(); - - for (size_t i = 0; i < size; i++) { - pos = str.find(pattern, i); - if (pos < size) { - std::string s = str.substr(i, pos - i); - result.push_back(s); - i = pos + pattern.size() - 1; - } - } - return result; -} - uint64_t GetTimeUs() { #ifdef SUPPORT_MSVC FILETIME ft; @@ -71,18 +51,22 @@ std::string RemoveSubStr(const std::string &from, const std::string &sub_str, Re MS_LOG(ERROR) << "string is empty"; return ""; } + if (sub_str.length() > from.length()) { + MS_LOG(ERROR) << "sub_str is longer than from"; + return ""; + } if (mode == PREFIX) { if (from.substr(0, sub_str.length()) == sub_str) { - result = from.substr(sub_str.size()); + result = from.substr(sub_str.length()); } } else if (mode == SUFFIX) { - if (from.rfind(sub_str) == from.size() - sub_str.size()) { - result = from.substr(0, from.size() - sub_str.size()); + if (from.rfind(sub_str) == from.length() - sub_str.length()) { + result = from.substr(0, from.length() - sub_str.length()); } } else { size_t index; while ((index = result.find(sub_str)) != std::string::npos) { - result = result.erase(index, sub_str.size()); + result = result.erase(index, sub_str.length()); } } @@ -165,6 +149,5 @@ bool IsSupportSDot() { #endif return status; } - } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/src/common/utils.h b/mindspore/lite/src/common/utils.h index 2881ed2ab70..aae4ce65eb3 100644 --- a/mindspore/lite/src/common/utils.h +++ b/mindspore/lite/src/common/utils.h @@ -37,8 +37,6 @@ enum NodeType { const int USEC = 1000000; const int MSEC = 1000; -std::vector StringSplit(std::string str, const std::string &pattern); - uint64_t GetTimeUs(); bool IsSupportSDot(); @@ -119,7 +117,7 @@ inline std::string GetFileName(const std::string &path) { char delim = '/'; size_t i = path.rfind(delim, path.length()); - if (i != std::string::npos) { + if (i != std::string::npos && i + 1 < path.length()) { return (path.substr(i + 1, path.length() - i)); } diff --git a/mindspore/lite/src/cxx_api/model/model_impl.cc b/mindspore/lite/src/cxx_api/model/model_impl.cc index f22dd3a2f5e..0d69f65649a 100644 --- a/mindspore/lite/src/cxx_api/model/model_impl.cc +++ b/mindspore/lite/src/cxx_api/model/model_impl.cc @@ -210,6 +210,7 @@ Status ModelImpl::Predict(const std::vector &inputs, std::vectordata()); if (input->data_type() == kObjectTypeString) { +#ifdef ENABLE_STRING_KERNEL std::vector shape = TruncateShape(user_input.Shape(), input->data_type(), user_input.DataSize(), false); if (shape.empty() && !(user_input.Shape().empty())) { ResetTensorData(old_data, input_tensors); @@ -218,6 +219,10 @@ Status ModelImpl::Predict(const std::vector &inputs, std::vectorset_shape(shape); input->set_data(user_input.MutableData()); +#else + MS_LOG(ERROR) << unsupport_string_tensor_log; + return kLiteError; +#endif } else { if (user_input.MutableData() != input->data()) { if (input->Size() != user_input.DataSize()) { @@ -260,7 +265,6 @@ std::vector ModelImpl::GetInputs() { } res.resize(inputs.size()); for (size_t i = 0; i < inputs.size(); i++) { - inputs[i]->MutableData(); // prepare data auto impl = std::shared_ptr(new (std::nothrow) MSTensor::Impl(inputs[i])); if (impl == nullptr || impl->lite_tensor() == nullptr) { MS_LOG(ERROR) << "Create tensor failed."; diff --git a/mindspore/lite/src/cxx_api/tensor/tensor_impl.cc b/mindspore/lite/src/cxx_api/tensor/tensor_impl.cc index f7f3ff73924..d12ebd02722 100644 --- a/mindspore/lite/src/cxx_api/tensor/tensor_impl.cc +++ b/mindspore/lite/src/cxx_api/tensor/tensor_impl.cc @@ -57,6 +57,7 @@ std::shared_ptr MSTensor::Impl::CreateTensorImpl(const std::stri std::shared_ptr MSTensor::Impl::StringsToTensorImpl(const std::string &name, const std::vector &str) { +#ifdef ENABLE_STRING_KERNEL auto lite_tensor = new (std::nothrow) lite::Tensor(); if (lite_tensor == nullptr) { MS_LOG(ERROR) << "Failed to allocate lite tensor."; @@ -78,15 +79,24 @@ std::shared_ptr MSTensor::Impl::StringsToTensorImpl(const std::s impl->set_own_data(true); impl->set_from_session(false); return impl; +#else + MS_LOG(ERROR) << unsupport_string_tensor_log; + return nullptr; +#endif } std::vector MSTensor::Impl::TensorImplToStrings(const std::shared_ptr &impl) { std::vector empty; +#ifdef ENABLE_STRING_KERNEL auto lite_tensor = impl->lite_tensor(); if (lite_tensor == nullptr) { MS_LOG(ERROR) << "Invalid tensor impl."; return empty; } return lite::MSTensorToStrings(lite_tensor); +#else + MS_LOG(ERROR) << unsupport_string_tensor_log; + return empty; +#endif } } // namespace mindspore diff --git a/mindspore/lite/src/cxx_api/tensor/tensor_impl.h b/mindspore/lite/src/cxx_api/tensor/tensor_impl.h index da1c1659b51..f2f197b41a3 100644 --- a/mindspore/lite/src/cxx_api/tensor/tensor_impl.h +++ b/mindspore/lite/src/cxx_api/tensor/tensor_impl.h @@ -204,7 +204,7 @@ class MSTensor::Impl { auto lite_quant_params = lite_tensor_->quant_params(); std::vector quant_params; for (size_t i = 0; i < lite_quant_params.size(); i++) { - QuantParam param; + QuantParam param{}; param.bit_num = lite_quant_params[i].bitNum; param.scale = lite_quant_params[i].scale; param.zero_point = lite_quant_params[i].zeroPoint; @@ -220,11 +220,11 @@ class MSTensor::Impl { } std::vector lite_quant_params; for (size_t i = 0; i < quant_params.size(); i++) { - lite::LiteQuantParam lite_arg; - lite_arg.bitNum = quant_params[i].bit_num; - lite_arg.scale = quant_params[i].scale; - lite_arg.zeroPoint = quant_params[i].zero_point; - lite_quant_params.push_back(lite_arg); + lite::LiteQuantParam lite_param{}; + lite_param.bitNum = quant_params[i].bit_num; + lite_param.scale = quant_params[i].scale; + lite_param.zeroPoint = quant_params[i].zero_point; + lite_quant_params.push_back(lite_param); } lite_tensor_->set_quant_params(lite_quant_params); } diff --git a/mindspore/lite/src/delegate/npu/npu_delegate.cc b/mindspore/lite/src/delegate/npu/npu_delegate.cc index 97fc4c936b6..0f5a4dd4632 100644 --- a/mindspore/lite/src/delegate/npu/npu_delegate.cc +++ b/mindspore/lite/src/delegate/npu/npu_delegate.cc @@ -206,6 +206,14 @@ int NPUDelegate::Build(DelegateModel *model) { } NPUOp *NPUDelegate::GetOP(kernel::Kernel *kernel, const schema::Primitive *primitive) { + if (primitive == nullptr) { + MS_LOG(ERROR) << "primitive is NULL!"; + return nullptr; + } + if (kernel == nullptr) { + MS_LOG(ERROR) << "kernel is NULL!"; + return nullptr; + } auto name = kernel->name(); NPUOp *npu_op = nullptr; auto node_type = primitive->value_type(); diff --git a/mindspore/lite/src/delegate/npu/npu_graph.cc b/mindspore/lite/src/delegate/npu/npu_graph.cc index 3a81a50c533..4a924fbaf9a 100644 --- a/mindspore/lite/src/delegate/npu/npu_graph.cc +++ b/mindspore/lite/src/delegate/npu/npu_graph.cc @@ -238,7 +238,7 @@ int NPUGraph::CreateSubgraphFromReadyOps(std::queue *valid_in_ops, std: if ((*is_searched)[op]) { continue; } - if (valid_in_ops->empty()) { + if (!valid_in_ops->empty()) { // use BFS to find out connected input ops FindConnectedOps(op, ready_ops, &connected_ops, is_searched); } else { diff --git a/mindspore/lite/src/delegate/npu/npu_manager.cc b/mindspore/lite/src/delegate/npu/npu_manager.cc index d6606d2ed44..413009039c7 100644 --- a/mindspore/lite/src/delegate/npu/npu_manager.cc +++ b/mindspore/lite/src/delegate/npu/npu_manager.cc @@ -80,9 +80,9 @@ bool NPUManager::CheckDDKVersion() { auto client = std::make_shared(); if (client->GetVersion() != nullptr) { std::string version = client->GetVersion(); - int ret = CompareVersion(version, "100.320.010.023"); - if (ret < 0) { - MS_LOG(WARNING) << "DDK Version " << version << " less than 100.320.010.023"; + int ret = CompareVersion(version, "100.320.011.018"); + if (ret <= 0) { + MS_LOG(WARNING) << "DDK Version " << version << " less than 100.320.011.018"; return false; } } diff --git a/mindspore/lite/src/delegate/npu/op/resize_npu.cc b/mindspore/lite/src/delegate/npu/op/resize_npu.cc index 6b7d0c9a75d..77a4a1bf9e2 100644 --- a/mindspore/lite/src/delegate/npu/op/resize_npu.cc +++ b/mindspore/lite/src/delegate/npu/op/resize_npu.cc @@ -93,6 +93,7 @@ int ResizeNPUOp::Init(const schema::Primitive *primitive, const std::vectorset_attr_align_corners(resize_prim->coordinate_transform_mode() == schema::CoordinateTransformMode_ALIGN_CORNERS); resize_nearest->set_input_size(*out_size_); + resize_ = resize_nearest; } else { MS_LOG(WARNING) << "Unsupported resize method type:" << resize_method_; return RET_ERROR; diff --git a/mindspore/lite/src/delegate/tensorrt/op/activation_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/activation_tensorrt.cc index f81e797efcc..a26c09c2e4a 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/activation_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/activation_tensorrt.cc @@ -21,6 +21,10 @@ namespace mindspore::lite { int ActivationTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector &in_tensors, const std::vector &out_tensors) { + if (!IsShapeKnown()) { + MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_; + return RET_ERROR; + } if (in_tensors.size() != 1) { MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size(); return RET_ERROR; @@ -62,6 +66,7 @@ int ActivationTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { } activation_layer->setName(op_name_.c_str()); + activation_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str()); this->AddInnerOutTensors(activation_layer->getOutput(0)); return RET_OK; diff --git a/mindspore/lite/src/delegate/tensorrt/op/concate_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/concate_tensorrt.cc index 994980e5b29..b156b125dd4 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/concate_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/concate_tensorrt.cc @@ -20,6 +20,10 @@ namespace mindspore::lite { int ConcateTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector &in_tensors, const std::vector &out_tensors) { + if (!IsShapeKnown()) { + MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_; + return RET_ERROR; + } if (in_tensors.size() < 1) { MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size(); return RET_ERROR; @@ -41,7 +45,6 @@ int ConcateTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { MS_LOG(ERROR) << "concate_op convert failed"; return RET_ERROR; } - MS_LOG(INFO) << "in tensort size of concate: " << tensorrt_in_tensors_.size(); if (tensorrt_in_tensors_.size() != in_tensors_.size()) { MS_LOG(ERROR) << "concate_op in tensor is invalid"; return RET_ERROR; @@ -64,6 +67,7 @@ int ConcateTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { concate_layer->setAxis(axis); } concate_layer->setName(op_name_.c_str()); + concate_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str()); this->AddInnerOutTensors(concate_layer->getOutput(0)); return RET_OK; diff --git a/mindspore/lite/src/delegate/tensorrt/op/convolution_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/convolution_tensorrt.cc index 4cbfbd3f207..649158a5365 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/convolution_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/convolution_tensorrt.cc @@ -24,6 +24,10 @@ constexpr int BIAS_INDEX = 2; int ConvolutionTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector &in_tensors, const std::vector &out_tensors) { + if (!IsShapeKnown()) { + MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_; + return RET_ERROR; + } if (in_tensors.size() != 2 && in_tensors.size() != 3) { MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size(); return RET_ERROR; @@ -53,8 +57,12 @@ int ConvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { } transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str()); + // transpose weight + const mindspore::MSTensor &weight_tensor = in_tensors_[1]; + nvinfer1::Weights kernelWeights = lite::TransposeWeight(weight_tensor, &pack_weight_); + // conv - int nbOutputMaps = conv_op->out_channel(); + int nbOutputMaps = weight_tensor.Shape()[0]; if (nbOutputMaps <= 0) { MS_LOG(ERROR) << "out_channel is invalid"; return RET_ERROR; @@ -67,9 +75,6 @@ int ConvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { } nvinfer1::Dims kernelSize = lite::ConvertCudaDims(std::vector(kernel_size->begin(), kernel_size->end())); - // transpose weight - nvinfer1::Weights kernelWeights = lite::TransposeWeight(in_tensors_[1], &pack_weight_); - // bias nvinfer1::Weights biasWeights{}; if (in_tensors_.size() >= INPUT_SIZE3) { @@ -113,7 +118,7 @@ int ConvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { return RET_ERROR; } transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str()); - + transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str()); this->AddInnerOutTensors(transpose_layer_out->getOutput(0)); return RET_OK; } diff --git a/mindspore/lite/src/delegate/tensorrt/op/deconvolution_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/deconvolution_tensorrt.cc index 98d62a5eb9b..8b863ba8349 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/deconvolution_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/deconvolution_tensorrt.cc @@ -23,6 +23,10 @@ namespace mindspore::lite { int DeconvolutionTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector &in_tensors, const std::vector &out_tensors) { + if (!IsShapeKnown()) { + MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_; + return RET_ERROR; + } if (in_tensors.size() != 2 && in_tensors.size() != 3) { MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size(); return RET_ERROR; @@ -51,8 +55,12 @@ int DeconvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { } transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str()); + // transpose weight + const mindspore::MSTensor &weight_tensor = in_tensors_[1]; + nvinfer1::Weights kernelWeights = lite::TransposeWeight(weight_tensor, &pack_weight_); + // deconv basic params - int nbOutputMaps = deconv_op->out_channel(); + int nbOutputMaps = weight_tensor.Shape()[0]; if (nbOutputMaps <= 0) { MS_LOG(ERROR) << "out_channel is invalid"; return RET_ERROR; @@ -65,9 +73,6 @@ int DeconvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { } nvinfer1::Dims kernelSize = lite::ConvertCudaDims(std::vector(kernel_size->begin(), kernel_size->end())); - // transpose weight - nvinfer1::Weights kernelWeights = lite::TransposeWeight(in_tensors_[1], &pack_weight_); - // bias nvinfer1::Weights biasWeights{}; if (in_tensors_.size() >= 3) { @@ -111,7 +116,7 @@ int DeconvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { return RET_ERROR; } transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str()); - + transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str()); this->AddInnerOutTensors(transpose_layer_out->getOutput(0)); return RET_OK; } diff --git a/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.cc index 8f0f2fa2894..2b64aad520c 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.cc @@ -21,6 +21,10 @@ namespace mindspore::lite { int ElementWiseTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector &in_tensors, const std::vector &out_tensors) { + if (!IsShapeKnown()) { + MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_; + return RET_ERROR; + } std::map element_wise_ops = { {schema::PrimitiveType_AddFusion, nvinfer1::ElementWiseOperation::kSUM}, {schema::PrimitiveType_PowFusion, nvinfer1::ElementWiseOperation::kPOW}, @@ -61,6 +65,13 @@ int ElementWiseTensorRT::IsSupport(const schema::Primitive *primitive, MS_LOG(ERROR) << "invalid output tensort size: " << out_tensors.size(); return RET_ERROR; } + + // if constant tensor is scalar, it needs to know another input tensor's shape to broadcast + if (in_tensors[0].Shape()[0] == -1 && in_tensors[1].Shape().size() == 0) { + MS_LOG(ERROR) << "invalid all input tensor shape unknown for: " << op_name_; + return RET_ERROR; + } + return RET_OK; } @@ -69,23 +80,25 @@ int ElementWiseTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { MS_LOG(ERROR) << "network or input tensor size is invalid"; return RET_ERROR; } - // create ITensor from MS scalar - if (this->in_tensors_[1].Shape().size() == 0) { - nvinfer1::ITensor *scalar_input = - lite::ConvertScalarToITensor(network, this->in_tensors_[0].Shape().size(), this->in_tensors_[1].MutableData()); - if (scalar_input == nullptr) { - MS_LOG(ERROR) << "create Itensor from scalar failed"; - return RET_ERROR; - } - this->AddInnerInTensors(scalar_input); - } + first_in_tensor_index_ = strcmp(tensorrt_in_tensors_[0]->getName(), in_tensors_[0].Name().c_str()) == 0 ? 0 : 1; // add elementwise if (this->tensorrt_in_tensors_.size() != 2) { - MS_LOG(ERROR) << "invalid inner in tensors cnt: " << this->tensorrt_in_tensors_.size(); - return RET_ERROR; + // create ITensor from MS constant tensor of index 1 - first_in_tensor_index_ + nvinfer1::ITensor *constant_input = nullptr; + if (this->in_tensors_[1 - first_in_tensor_index_].Shape().size() == 0) { + constant_input = lite::ConvertScalarToITensor(network, this->in_tensors_[first_in_tensor_index_].Shape().size(), + in_tensors_[1 - first_in_tensor_index_].Data().get()); + } else { + constant_input = lite::ConvertConstantTensor(network, in_tensors_[1 - first_in_tensor_index_]); + } + if (constant_input == nullptr) { + MS_LOG(ERROR) << "create Itensor from constant tensor failed: " << op_name_; + return RET_ERROR; + } + this->AddInnerInTensors(constant_input); } - nvinfer1::IElementWiseLayer *cal_layer = - network->addElementWise(*tensorrt_in_tensors_[0], *tensorrt_in_tensors_[1], element_wise_op_); + nvinfer1::IElementWiseLayer *cal_layer = network->addElementWise( + *tensorrt_in_tensors_[first_in_tensor_index_], *tensorrt_in_tensors_[1 - first_in_tensor_index_], element_wise_op_); if (cal_layer == nullptr) { MS_LOG(ERROR) << "addElementWise failed for TensorRT."; diff --git a/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.h b/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.h index a370c80ca5f..c927ab074dd 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.h +++ b/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.h @@ -35,8 +35,12 @@ class ElementWiseTensorRT : public TensorRTOp { const std::vector &out_tensors) override; private: - nvinfer1::ElementWiseOperation element_wise_op_; nvinfer1::ITensor *AddActivation(nvinfer1::INetworkDefinition *network, nvinfer1::ITensor *in_tensor); + + nvinfer1::ElementWiseOperation element_wise_op_; + + // index of first input MSTensor in the trt input tensor vector + size_t first_in_tensor_index_ = 0; }; } // namespace mindspore::lite #endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_ELEMENTWISE_TENSORRT_H_ diff --git a/mindspore/lite/src/delegate/tensorrt/op/gather_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/gather_tensorrt.cc index 410854f0e78..6bdbc2ea740 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/gather_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/gather_tensorrt.cc @@ -22,6 +22,10 @@ constexpr int AXIS_INDEX = 2; int GatherTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector &in_tensors, const std::vector &out_tensors) { + if (!IsShapeKnown()) { + MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_; + return RET_ERROR; + } if (in_tensors.size() != 3) { MS_LOG(ERROR) << "invalid input tensor size: " << in_tensors.size(); return RET_ERROR; @@ -61,6 +65,7 @@ int GatherTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { return RET_ERROR; } gather_layer->setName(op_name_.c_str()); + gather_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str()); this->AddInnerOutTensors(gather_layer->getOutput(0)); return RET_OK; } diff --git a/mindspore/lite/src/delegate/tensorrt/op/matmul_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/matmul_tensorrt.cc index e56a4f3eec8..07a9cf4c7aa 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/matmul_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/matmul_tensorrt.cc @@ -22,6 +22,10 @@ constexpr int BIAS_INDEX = 2; int MatMulTensorRT::IsSupport(const mindspore::schema::Primitive *primitive, const std::vector &in_tensors, const std::vector &out_tensors) { + if (!IsShapeKnown()) { + MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_; + return RET_ERROR; + } if (in_tensors.size() != 2 && in_tensors.size() != 3) { MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size(); return RET_ERROR; @@ -41,16 +45,18 @@ int MatMulTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { auto matmul_layer = network->addMatrixMultiply(*tensorrt_in_tensors_[0], transpose_a_, *weight, transpose_b_); matmul_layer->setName(op_name_.c_str()); + nvinfer1::ITensor *out_tensor = matmul_layer->getOutput(0); - if (in_tensors_.size() == 3) { + if (in_tensors_.size() == BIAS_INDEX + 1) { auto bias = ConvertTensorWithExpandDims(network, in_tensors_[BIAS_INDEX], in_tensors_[0].Shape().size()); auto bias_layer = network->addElementWise(*matmul_layer->getOutput(0), *bias, nvinfer1::ElementWiseOperation::kSUM); auto bias_layer_name = op_name_ + "_bias"; bias_layer->setName(bias_layer_name.c_str()); - this->AddInnerOutTensors(bias_layer->getOutput(0)); - } else { - this->AddInnerOutTensors(matmul_layer->getOutput(0)); + out_tensor = bias_layer->getOutput(0); } + + out_tensor->setName(out_tensors_[0].Name().c_str()); + this->AddInnerOutTensors(out_tensor); return RET_OK; } } // namespace mindspore::lite diff --git a/mindspore/lite/src/delegate/tensorrt/op/pad_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/pad_tensorrt.cc index d5565765c98..5e1e2e72a66 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/pad_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/pad_tensorrt.cc @@ -23,6 +23,10 @@ namespace mindspore::lite { int PadTensorRT::IsSupport(const mindspore::schema::Primitive *primitive, const std::vector &in_tensors, const std::vector &out_tensors) { + if (!IsShapeKnown()) { + MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_; + return RET_ERROR; + } if (in_tensors.size() != 2 && in_tensors.size() != 3) { MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size(); return RET_ERROR; @@ -98,6 +102,7 @@ int PadTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { return RET_ERROR; } transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str()); + transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str()); this->AddInnerOutTensors(transpose_layer_out->getOutput(0)); return RET_OK; diff --git a/mindspore/lite/src/delegate/tensorrt/op/pool_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/pool_tensorrt.cc index 4263755c2fc..3ade0a4834b 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/pool_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/pool_tensorrt.cc @@ -22,6 +22,10 @@ namespace mindspore::lite { int PoolTensorRT::IsSupport(const mindspore::schema::Primitive *primitive, const std::vector &in_tensors, const std::vector &out_tensors) { + if (!IsShapeKnown()) { + MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_; + return RET_ERROR; + } if (in_tensors.size() != 1) { MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size(); return RET_ERROR; @@ -89,6 +93,7 @@ int PoolTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { return RET_ERROR; } transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str()); + transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str()); this->AddInnerOutTensors(transpose_layer_out->getOutput(0)); return RET_OK; } diff --git a/mindspore/lite/src/delegate/tensorrt/op/reduce_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/reduce_tensorrt.cc index 8be59ee52d3..3cf38700868 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/reduce_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/reduce_tensorrt.cc @@ -19,6 +19,10 @@ namespace mindspore::lite { int ReduceTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector &in_tensors, const std::vector &out_tensors) { + if (!IsShapeKnown()) { + MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_; + return RET_ERROR; + } auto reduce_op = primitive->value_as_ReduceFusion(); if (reduce_op == nullptr) { MS_LOG(ERROR) << "convert failed"; diff --git a/mindspore/lite/src/delegate/tensorrt/op/scale_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/scale_tensorrt.cc index b665c65fc7b..f0135bc2ef1 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/scale_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/scale_tensorrt.cc @@ -26,6 +26,10 @@ constexpr int POWER_INDEX = 3; int ScaleTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector &in_tensors, const std::vector &out_tensors) { + if (!IsShapeKnown()) { + MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_; + return RET_ERROR; + } if (in_tensors.size() != 2 && in_tensors.size() != 3 && in_tensors.size() != 4) { MS_LOG(ERROR) << "Unsupported input tensor size, size is: " << in_tensors.size(); return RET_ERROR; diff --git a/mindspore/lite/src/delegate/tensorrt/op/shape_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/shape_tensorrt.cc index 4db3722db10..9e006341215 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/shape_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/shape_tensorrt.cc @@ -19,6 +19,10 @@ namespace mindspore::lite { int ShapeTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector &in_tensors, const std::vector &out_tensors) { + if (!IsShapeKnown()) { + MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_; + return RET_ERROR; + } if (in_tensors.size() != 1) { MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size(); return RET_ERROR; @@ -41,6 +45,7 @@ int ShapeTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { return RET_ERROR; } shape_layer->setName(op_name_.c_str()); + shape_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str()); this->AddInnerOutTensors(shape_layer->getOutput(0)); return RET_OK; } diff --git a/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.cc index d5d21cf9270..21b3ae2e66e 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.cc @@ -16,19 +16,49 @@ #include "src/delegate/tensorrt/op/shuffle_tensorrt.h" #include +#include +#include namespace mindspore::lite { int ShuffleTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector &in_tensors, const std::vector &out_tensors) { - if ((type_ == schema::PrimitiveType::PrimitiveType_Squeeze || - type_ == schema::PrimitiveType::PrimitiveType_Unsqueeze) && - in_tensors.size() != 1) { - MS_LOG(ERROR) << "invalid input tensort size: " << in_tensors.size(); + if (!IsShapeKnown()) { + MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_; return RET_ERROR; } - if ((type_ == schema::PrimitiveType::PrimitiveType_Transpose) && in_tensors.size() != 2) { - MS_LOG(ERROR) << "invalid input tensort size: " << in_tensors.size(); - return RET_ERROR; + switch (type_) { + case schema::PrimitiveType_Flatten: + case schema::PrimitiveType_Squeeze: + case schema::PrimitiveType_Unsqueeze: { + if (in_tensors.size() != 1) { + MS_LOG(ERROR) << "Unsupported in_tensors size " << in_tensors.size() << " of " + << schema::EnumNamePrimitiveType(type_); + return RET_ERROR; + } + break; + } + case schema::PrimitiveType_Reshape: { + if (in_tensors.size() != 2) { + MS_LOG(ERROR) << "PrimitiveType_Transpose Unsupported in_tensors size: " << in_tensors.size(); + return RET_ERROR; + } + break; + } + case schema::PrimitiveType_Transpose: { + if (in_tensors.size() != 2) { + MS_LOG(ERROR) << "PrimitiveType_Transpose Unsupported in_tensors size: " << in_tensors.size(); + return RET_ERROR; + } + if (in_tensors[1].Data() == nullptr) { + MS_LOG(ERROR) << "Unsupported shape tensor of " << schema::EnumNamePrimitiveType(type_); + return RET_ERROR; + } + break; + } + default: { + MS_LOG(ERROR) << "Unsupported op type:" << schema::EnumNamePrimitiveType(type_); + return RET_ERROR; + } } if (out_tensors.size() != 1) { MS_LOG(ERROR) << "invalid output tensort size: " << out_tensors.size(); @@ -49,7 +79,7 @@ int ShuffleTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { } shuffle_layer->setName(op_name_.c_str()); - switch (this->type()) { + switch (type_) { case schema::PrimitiveType_Unsqueeze: { int ret = AddUnsqueezeOp(shuffle_layer); if (ret != RET_OK) { @@ -82,6 +112,14 @@ int ShuffleTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { } break; } + case schema::PrimitiveType_Flatten: { + int ret = AddFlattenOp(shuffle_layer); + if (ret != RET_OK) { + MS_LOG(ERROR) << "AddFlattenOp failed."; + return ret; + } + break; + } default: MS_LOG(ERROR) << "Unsupported op type."; return RET_ERROR; @@ -148,7 +186,6 @@ int ShuffleTensorRT::AddUnsqueezeOp(nvinfer1::IShuffleLayer *shuffle_layer) { } nvinfer1::Dims unsqueeze_dims = lite::ConvertCudaDims(unsqueeze_shape); - MS_LOG(INFO) << "AddUnsqueezeOp: " << op_name_ << " unsqueeze_dims.nbDims: " << unsqueeze_dims.nbDims; shuffle_layer->setReshapeDimensions(unsqueeze_dims); return shuffle_layer->getOutput(0) == nullptr ? RET_ERROR : RET_OK; @@ -166,8 +203,8 @@ int ShuffleTensorRT::AddTransposeOp(nvinfer1::IShuffleLayer *shuffle_layer) { } // perm mindspore::MSTensor perm_ternsor = in_tensors_[1]; - if (perm_ternsor.Data() == nullptr || perm_ternsor.ElementNum() != tensorrt_in_tensors_[0]->getDimensions().nbDims) { - MS_LOG(ERROR) << "AddTransposeOp perm_ternsor data is invalid."; + if (perm_ternsor.Data() == nullptr) { + MS_LOG(ERROR) << "AddTransposeOp perm_ternsor data is invalid: " << op_name_; return RET_ERROR; } int *perm_data = reinterpret_cast(perm_ternsor.MutableData()); @@ -180,26 +217,38 @@ int ShuffleTensorRT::AddTransposeOp(nvinfer1::IShuffleLayer *shuffle_layer) { shuffle_layer->setFirstTranspose(perm); return RET_OK; } + int ShuffleTensorRT::AddReshapeOp(nvinfer1::IShuffleLayer *shuffle_layer) { - auto reshape_op = this->op_primitive_->value_as_Reshape(); - if (reshape_op == nullptr) { - MS_LOG(ERROR) << "AddReshapeOp convert failed"; - return RET_ERROR; - } - if (in_tensors_.size() != 2) { - MS_LOG(ERROR) << "AddReshapeOp size of in tensort needs check: " << in_tensors_.size(); - return RET_ERROR; - } mindspore::MSTensor &shape_tensor = in_tensors_[1]; - nvinfer1::Dims reshape_dims = ConvertCudaDims(shape_tensor.Data().get(), shape_tensor.ElementNum()); - int ret = InferReshapeDims(tensorrt_in_tensors_[0]->getDimensions(), &reshape_dims); - if (ret != RET_OK) { - MS_LOG(ERROR) << "invalid dims for reshape " << op_name_; - return ret; + if (shape_tensor.Data() != nullptr) { + // static shuffle layer + nvinfer1::Dims reshape_dims = lite::ConvertCudaDims(shape_tensor.Data().get(), shape_tensor.ElementNum()); + int ret = InferReshapeDims(tensorrt_in_tensors_[0]->getDimensions(), &reshape_dims); + if (ret != RET_OK) { + MS_LOG(ERROR) << "invalid dims for reshape " << op_name_; + return ret; + } + shuffle_layer->setReshapeDimensions(reshape_dims); + } else { + if (tensorrt_in_tensors_.size() != 2) { + MS_LOG(ERROR) << "invalid shape tensor for reshape " << op_name_; + return RET_ERROR; + } + shuffle_layer->setInput(1, *tensorrt_in_tensors_[1]); } - shuffle_layer->setReshapeDimensions(reshape_dims); return RET_OK; } + +int ShuffleTensorRT::AddFlattenOp(nvinfer1::IShuffleLayer *shuffle_layer) { + nvinfer1::Dims flatten_dims; + const std::vector &input_shape = in_tensors_[0].Shape(); + flatten_dims.nbDims = 2; + flatten_dims.d[0] = input_shape[0]; + flatten_dims.d[1] = std::accumulate(input_shape.begin() + 1, input_shape.end(), 1, std::multiplies()); + shuffle_layer->setReshapeDimensions(flatten_dims); + return RET_OK; +} + int ShuffleTensorRT::InferReshapeDims(nvinfer1::Dims input_dims, nvinfer1::Dims *reshape_dims) { int infer_index = -1; int known_cnt = 1; diff --git a/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.h b/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.h index 98d90d9ac2c..e799a7dcaee 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.h +++ b/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.h @@ -39,6 +39,7 @@ class ShuffleTensorRT : public TensorRTOp { int AddUnsqueezeOp(nvinfer1::IShuffleLayer *shuffle_layer); int AddTransposeOp(nvinfer1::IShuffleLayer *shuffle_layer); int AddReshapeOp(nvinfer1::IShuffleLayer *shuffle_layer); + int AddFlattenOp(nvinfer1::IShuffleLayer *shuffle_layer); int InferReshapeDims(nvinfer1::Dims input_dims, nvinfer1::Dims *reshape_dims); }; } // namespace mindspore::lite diff --git a/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.cc index 4946fa0b501..a5e172e0dc5 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.cc @@ -21,7 +21,11 @@ namespace mindspore::lite { int SliceTensorRT::IsSupport(const mindspore::schema::Primitive *primitive, const std::vector &in_tensors, const std::vector &out_tensors) { - if (in_tensors.size() != 4 && in_tensors.size() != 5) { + if (!IsShapeKnown()) { + MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_; + return RET_ERROR; + } + if (in_tensors.size() < STRIDE_INDEX + 1) { MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size(); return RET_ERROR; } @@ -29,8 +33,8 @@ int SliceTensorRT::IsSupport(const mindspore::schema::Primitive *primitive, MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size(); return RET_ERROR; } - if (in_tensors_[1].Data() == nullptr) { - MS_LOG(ERROR) << "invalid pad tensor for: " << op_name_; + if (in_tensors_[BEGIN_INDEX].Data() == nullptr || in_tensors_[STRIDE_INDEX].Data() == nullptr) { + MS_LOG(ERROR) << "invalid pad or stride tensor for: " << op_name_; return RET_ERROR; } return RET_OK; @@ -42,9 +46,8 @@ int SliceTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { MS_LOG(ERROR) << "convert StridedSlice failed: " << op_name_; return RET_ERROR; } - const mindspore::MSTensor &begin = in_tensors_[1]; - // mindspore::MSTensor &end = in_tensors_[2]; - const mindspore::MSTensor &stride = in_tensors_[3]; + const mindspore::MSTensor &begin = in_tensors_[BEGIN_INDEX]; + const mindspore::MSTensor &stride = in_tensors_[STRIDE_INDEX]; nvinfer1::Dims start_dims = lite::ConvertCudaDims(begin.Data().get(), begin.ElementNum()); nvinfer1::Dims size_dims = lite::ConvertCudaDims(out_tensors_[0].Shape()); diff --git a/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.h b/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.h index 7bedfaf2adf..856f4d50712 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.h +++ b/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.h @@ -20,6 +20,8 @@ #include "src/delegate/tensorrt/op/tensorrt_op.h" namespace mindspore::lite { +constexpr int BEGIN_INDEX = 1; +constexpr int STRIDE_INDEX = 3; class SliceTensorRT : public TensorRTOp { public: SliceTensorRT(const schema::Primitive *primitive, const std::vector &in_tensors, diff --git a/mindspore/lite/src/delegate/tensorrt/op/softmax_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/softmax_tensorrt.cc index 6f3d418fd34..e65508276f7 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/softmax_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/softmax_tensorrt.cc @@ -19,6 +19,10 @@ namespace mindspore::lite { int SoftMaxTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector &in_tensors, const std::vector &out_tensors) { + if (!IsShapeKnown()) { + MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_; + return RET_ERROR; + } if (primitive->value_type() == schema::PrimitiveType::PrimitiveType_LogSoftmax) { with_log_ = true; auto softmax_op = primitive->value_as_LogSoftmax(); diff --git a/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.cc b/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.cc index 5acc69ef559..4f7b3ca8164 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.cc @@ -42,4 +42,15 @@ void TensorRTOp::set_out_ops(const std::vector &out_ops) { this->o const std::vector &TensorRTOp::in_ops() const { return this->in_ops_; } const std::vector &TensorRTOp::out_ops() const { return this->out_ops_; } + +bool TensorRTOp::IsShapeKnown() { + if (this->in_tensors_[0].Shape().size() == 0) { + return false; + } else { + if (this->in_tensors_[0].Shape()[0] == -1) { + return false; + } + } + return true; +} } // namespace mindspore::lite diff --git a/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.h b/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.h index 91e73de901f..9cc77218988 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.h +++ b/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.h @@ -75,6 +75,8 @@ class TensorRTOp { const std::vector &out_ops() const; protected: + bool IsShapeKnown(); + std::vector layers_; const schema::Primitive *op_primitive_; diff --git a/mindspore/lite/src/delegate/tensorrt/op/unary_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/unary_tensorrt.cc index 4549a8f5498..c5f59da7825 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/unary_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/unary_tensorrt.cc @@ -19,6 +19,10 @@ namespace mindspore::lite { int UnaryTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector &in_tensors, const std::vector &out_tensors) { + if (!IsShapeKnown()) { + MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_; + return RET_ERROR; + } if (in_tensors.size() != 1) { MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size(); } diff --git a/mindspore/lite/src/delegate/tensorrt/tensorrt_delegate.cc b/mindspore/lite/src/delegate/tensorrt/tensorrt_delegate.cc index 4965a6c1059..e295c34ef3f 100644 --- a/mindspore/lite/src/delegate/tensorrt/tensorrt_delegate.cc +++ b/mindspore/lite/src/delegate/tensorrt/tensorrt_delegate.cc @@ -69,9 +69,6 @@ int TensorRTDelegate::Init() { op_func_lists_.clear(); op_func_lists_ = { {schema::PrimitiveType_Activation, GetTensorRTOp}, - {schema::PrimitiveType_Unsqueeze, GetTensorRTOp}, - {schema::PrimitiveType_Squeeze, GetTensorRTOp}, - {schema::PrimitiveType_Reshape, GetTensorRTOp}, {schema::PrimitiveType_Concat, GetTensorRTOp}, {schema::PrimitiveType_Conv2DFusion, GetTensorRTOp}, {schema::PrimitiveType_Conv2dTransposeFusion, GetTensorRTOp}, @@ -81,14 +78,20 @@ int TensorRTDelegate::Init() { {schema::PrimitiveType_AddFusion, GetTensorRTOp}, {schema::PrimitiveType_MulFusion, GetTensorRTOp}, {schema::PrimitiveType_Eltwise, GetTensorRTOp}, - {schema::PrimitiveType_Transpose, GetTensorRTOp}, - {schema::PrimitiveType_ReduceFusion, GetTensorRTOp}, - {schema::PrimitiveType_Sqrt, GetTensorRTOp}, + {schema::PrimitiveType_Gather, GetTensorRTOp}, {schema::PrimitiveType_MatMul, GetTensorRTOp}, - {schema::PrimitiveType_ScaleFusion, GetTensorRTOp}, - {schema::PrimitiveType_StridedSlice, GetTensorRTOp}, {schema::PrimitiveType_AvgPoolFusion, GetTensorRTOp}, {schema::PrimitiveType_PadFusion, GetTensorRTOp}, + {schema::PrimitiveType_ReduceFusion, GetTensorRTOp}, + {schema::PrimitiveType_ScaleFusion, GetTensorRTOp}, + {schema::PrimitiveType_StridedSlice, GetTensorRTOp}, + {schema::PrimitiveType_Shape, GetTensorRTOp}, + {schema::PrimitiveType_Unsqueeze, GetTensorRTOp}, + {schema::PrimitiveType_Squeeze, GetTensorRTOp}, + {schema::PrimitiveType_Reshape, GetTensorRTOp}, + {schema::PrimitiveType_Transpose, GetTensorRTOp}, + {schema::PrimitiveType_Flatten, GetTensorRTOp}, + {schema::PrimitiveType_Sqrt, GetTensorRTOp}, }; return RET_OK; } diff --git a/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.cc b/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.cc index 1c3ce666941..2be96a83f27 100644 --- a/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.cc +++ b/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.cc @@ -158,6 +158,7 @@ int TensorRTSubGraph::BuildTensorRTGraph() { return RET_ERROR; } trt_tensor = lite::ConvertConstantTensor(this->network_, in_tensor); + MS_LOG(INFO) << "auto convert constant tensor for: " << cur_op->GetOpName(); cur_op->AddInnerInTensors(trt_tensor); } } else { @@ -178,6 +179,7 @@ int TensorRTSubGraph::BuildTensorRTGraph() { for (size_t index = 0; index < out_op->outputs().size(); index++) { if (out_op->outputs()[index] == out_tensor) { out_op->GetInnerOutTensor()[index]->setName(out_tensor.Name().c_str()); + MS_LOG(INFO) << "markOutput for: " << out_tensor.Name(); this->network_->markOutput(*out_op->GetInnerOutTensor()[index]); } } diff --git a/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.h b/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.h index 80ed386df7d..cd9163112c2 100644 --- a/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.h +++ b/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.h @@ -37,8 +37,10 @@ class TensorRTSubGraph : public kernel::Kernel { trt_specific_weight_nodes_ = { schema::PrimitiveType_Conv2DFusion, schema::PrimitiveType_ReduceFusion, schema::PrimitiveType_Transpose, schema::PrimitiveType_Gather, schema::PrimitiveType_Reshape, schema::PrimitiveType_PowFusion, - schema::PrimitiveType_DivFusion, schema::PrimitiveType_MatMul, schema::PrimitiveType_ScaleFusion, - schema::PrimitiveType_MulFusion, schema::PrimitiveType_StridedSlice, schema::PrimitiveType_PadFusion}; + schema::PrimitiveType_AddFusion, schema::PrimitiveType_DivFusion, schema::PrimitiveType_SubFusion, + schema::PrimitiveType_MatMul, schema::PrimitiveType_PowFusion, schema::PrimitiveType_Eltwise, + schema::PrimitiveType_ScaleFusion, schema::PrimitiveType_MulFusion, schema::PrimitiveType_StridedSlice, + schema::PrimitiveType_PadFusion}; } ~TensorRTSubGraph() override; diff --git a/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.cc b/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.cc index 230c35c829d..52ea5952adb 100644 --- a/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.cc +++ b/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.cc @@ -108,7 +108,7 @@ nvinfer1::ITensor *ConvertConstantTensor(nvinfer1::INetworkDefinition *network, return constant_tensor->getOutput(0); } -nvinfer1::ITensor *ConvertScalarToITensor(nvinfer1::INetworkDefinition *network, size_t shape_size, void *value) { +nvinfer1::ITensor *ConvertScalarToITensor(nvinfer1::INetworkDefinition *network, size_t shape_size, const void *value) { nvinfer1::Dims dims = ConvertCudaDims(1, shape_size); nvinfer1::Weights weights{nvinfer1::DataType::kFLOAT, value, 1}; nvinfer1::IConstantLayer *constant_tensor = network->addConstant(dims, weights); diff --git a/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.h b/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.h index aacaed8534a..ae0a583faee 100644 --- a/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.h +++ b/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.h @@ -51,7 +51,7 @@ nvinfer1::ITensor *ConvertConstantTensor(nvinfer1::INetworkDefinition *network, nvinfer1::ITensor *ConvertTensorWithExpandDims(nvinfer1::INetworkDefinition *network, const mindspore::MSTensor &ms_tensor, size_t expand_shape_size); -nvinfer1::ITensor *ConvertScalarToITensor(nvinfer1::INetworkDefinition *network, size_t shape_size, void *value); +nvinfer1::ITensor *ConvertScalarToITensor(nvinfer1::INetworkDefinition *network, size_t shape_size, const void *value); nvinfer1::Weights TransposeWeight(const mindspore::MSTensor &ms_tensor, float **pack_weight); diff --git a/mindspore/lite/src/huffman_decode.h b/mindspore/lite/src/huffman_decode.h index 37c000cf792..be5e6e37431 100644 --- a/mindspore/lite/src/huffman_decode.h +++ b/mindspore/lite/src/huffman_decode.h @@ -76,5 +76,4 @@ class HuffmanDecode { } // namespace lite } // namespace mindspore - #endif // MINDSPORE_LITE_MINDSPORE_LITE_SRC_HUFFMAN_DECODE_H_ diff --git a/mindspore/lite/src/inner_context.cc b/mindspore/lite/src/inner_context.cc index 7d4a1492fbd..b225d6b2970 100644 --- a/mindspore/lite/src/inner_context.cc +++ b/mindspore/lite/src/inner_context.cc @@ -72,16 +72,21 @@ int InnerContext::Init() { } if (this->thread_pool_ == nullptr && this->IsCpuEnabled()) { int actor_parallel_thread = this->enable_parallel_ ? kDefaultParallelNum : 1; - thread_pool_ = ActorThreadPool::CreateThreadPool(actor_parallel_thread, this->thread_num_); - if (thread_pool_ == nullptr) { - MS_LOG(ERROR) << "Create ThreadPool failed"; - return RET_NULL_PTR; - } + if (this->affinity_core_list_.empty()) { - thread_pool_->SetCpuAffinity( - static_cast(this->device_list_.front().device_info_.cpu_device_info_.cpu_bind_mode_)); + auto bind_mode = static_cast(this->device_list_.front().device_info_.cpu_device_info_.cpu_bind_mode_); + thread_pool_ = ActorThreadPool::CreateThreadPool(actor_parallel_thread, this->thread_num_, bind_mode); + if (thread_pool_ == nullptr) { + MS_LOG(ERROR) << "Create ThreadPool failed"; + return RET_NULL_PTR; + } } else { - thread_pool_->SetCpuAffinity(this->affinity_core_list_); + thread_pool_ = + ActorThreadPool::CreateThreadPool(actor_parallel_thread, this->thread_num_, this->affinity_core_list_); + if (thread_pool_ == nullptr) { + MS_LOG(ERROR) << "Create ThreadPool failed"; + return RET_NULL_PTR; + } } } if (this->allocator == nullptr) { @@ -115,7 +120,6 @@ int InnerContext::Init() { InnerContext::~InnerContext() { if (this->thread_pool_ != nullptr) { - thread_pool_->SetCpuAffinity(static_cast(NO_BIND)); delete thread_pool_; this->thread_pool_ = nullptr; } @@ -126,7 +130,7 @@ int InnerContext::IsValid() const { MS_LOG(ERROR) << "Device list is empty."; return RET_NOT_SUPPORT; } - if (this->device_list_.size() > 2) { + if (this->device_list_.size() > kMaxDeviceNums) { MS_LOG(ERROR) << "Not support device list more than 2."; return RET_NOT_SUPPORT; } diff --git a/mindspore/lite/src/inner_kernel.cc b/mindspore/lite/src/inner_kernel.cc index e9473c760fa..7d590d66385 100644 --- a/mindspore/lite/src/inner_kernel.cc +++ b/mindspore/lite/src/inner_kernel.cc @@ -71,7 +71,42 @@ int InnerKernel::PreProcess() { MS_LOG(ERROR) << "MallocData failed"; return ret; } + output->ResetRefCount(); } return RET_OK; } + +int InnerKernel::Execute() { + auto ret = PreProcess(); + if (lite::RET_OK != ret) { + MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name(); + return ret; + } + + // Support ZeroShape + size_t zero_shape_num = 0; + for (auto tensor : this->out_tensors()) { + for (size_t i = 0; i < tensor->shape().size(); i++) { + if (tensor->shape()[i] == 0) { + zero_shape_num++; + break; + } + } + } + + if (zero_shape_num != this->out_tensors().size()) { + ret = Run(); + if (lite::RET_OK != ret) { + MS_LOG(ERROR) << "run kernel failed, name: " << this->name(); + return ret; + } + } + + ret = PostProcess(); + if (lite::RET_OK != ret) { + MS_LOG(ERROR) << "run kernel PostProcess failed, name: " << this->name(); + return ret; + } + return lite::RET_OK; +} } // namespace mindspore::kernel diff --git a/mindspore/lite/src/inner_kernel.h b/mindspore/lite/src/inner_kernel.h index 93c490544be..8f41a07b260 100644 --- a/mindspore/lite/src/inner_kernel.h +++ b/mindspore/lite/src/inner_kernel.h @@ -52,39 +52,7 @@ class InnerKernel : public Kernel { } } - int Execute() override { - auto ret = PreProcess(); - if (lite::RET_OK != ret) { - MS_LOG(ERROR) << "run kernel PreProcess failed, name: " << this->name(); - return ret; - } - - // Support ZeroShape - size_t zero_shape_num = 0; - for (auto tensor : this->out_tensors()) { - for (size_t i = 0; i < tensor->shape().size(); i++) { - if (tensor->shape()[i] == 0) { - zero_shape_num++; - break; - } - } - } - - if (zero_shape_num != this->out_tensors().size()) { - auto ret = Run(); - if (lite::RET_OK != ret) { - MS_LOG(ERROR) << "run kernel failed, name: " << this->name(); - return ret; - } - } - - ret = PostProcess(); - if (lite::RET_OK != ret) { - MS_LOG(ERROR) << "run kernel PostProcess failed, name: " << this->name(); - return ret; - } - return lite::RET_OK; - } + int Execute() override; // called while compiling graph int Prepare() override { return mindspore::lite::RET_OK; } @@ -94,14 +62,7 @@ class InnerKernel : public Kernel { // called before Run virtual int PreProcess(); // called after Run - virtual int PostProcess() { - for (auto *output : this->out_tensors()) { - MS_ASSERT(output != nullptr); - output->ResetRefCount(); - } - - return FreeInWorkTensor(); - } + virtual int PostProcess() { return FreeInWorkTensor(); } virtual int FreeInWorkTensor() const { for (auto &in_tensor : this->in_tensors()) { @@ -164,14 +125,14 @@ class InnerKernel : public Kernel { void set_in_tensors(const std::vector &in_tensors) { this->in_tensors_ = in_tensors; } - virtual void set_in_tensor(lite::Tensor *in_tensor, int index) { + virtual void set_in_tensor(lite::Tensor *in_tensor, size_t index) { MS_ASSERT(index < in_tensors_.size()); this->in_tensors_[index] = in_tensor; } void set_out_tensors(const std::vector &out_tensors) { this->out_tensors_ = out_tensors; } - virtual void set_out_tensor(lite::Tensor *out_tensor, int index) { + virtual void set_out_tensor(lite::Tensor *out_tensor, size_t index) { MS_ASSERT(index < out_tensors_.size()); this->out_tensors_[index] = out_tensor; } diff --git a/mindspore/lite/src/kernel_registry.cc b/mindspore/lite/src/kernel_registry.cc index 54f4d9799b3..5da2327ceff 100644 --- a/mindspore/lite/src/kernel_registry.cc +++ b/mindspore/lite/src/kernel_registry.cc @@ -18,6 +18,7 @@ #include #include "include/errorcode.h" #include "include/registry/register_kernel.h" +#include "src/registry/register_utils.h" #include "src/ops/populate/populate_register.h" #include "src/common/version_manager.h" #include "nnacl/pooling_parameter.h" @@ -50,21 +51,21 @@ void KernelKeyToKernelDesc(const KernelKey &key, kernel::KernelDesc *desc) { } } // namespace -KernelRegistry *KernelRegistry::GetInstance() { - static KernelRegistry instance; - - std::unique_lock malloc_creator_array(instance.lock_); - if (instance.creator_arrays_ == nullptr) { - instance.creator_arrays_ = reinterpret_cast(malloc(array_size_ * sizeof(KernelCreator))); - if (instance.creator_arrays_ == nullptr) { - return nullptr; +void KernelRegistry::CreatorArraysInit() { + std::unique_lock malloc_creator_array(lock_); + if (creator_arrays_ == nullptr) { + creator_arrays_ = reinterpret_cast(malloc(array_size_ * sizeof(KernelCreator))); + if (creator_arrays_ != nullptr) { + memset(creator_arrays_, 0, array_size_ * sizeof(KernelCreator)); } - memset(instance.creator_arrays_, 0, array_size_ * sizeof(KernelCreator)); } - return &instance; + return; } -int KernelRegistry::Init() { return RET_OK; } +KernelRegistry *KernelRegistry::GetInstance() { + static KernelRegistry instance; + return &instance; +} kernel::KernelCreator KernelRegistry::GetCreator(const KernelKey &desc) { if (desc.provider == kBuiltin) { @@ -74,7 +75,9 @@ kernel::KernelCreator KernelRegistry::GetCreator(const KernelKey &desc) { << desc.type; return nullptr; } - return creator_arrays_[index]; + if (creator_arrays_ != nullptr) { + return creator_arrays_[index]; + } } MS_LOG(ERROR) << "Call wrong interface!provider: " << desc.provider; return nullptr; @@ -89,16 +92,20 @@ int KernelRegistry::GetCreatorFuncIndex(const kernel::KernelKey desc) { } void KernelRegistry::RegKernel(const KernelKey desc, const kernel::KernelCreator creator) { + CreatorArraysInit(); int index = GetCreatorFuncIndex(desc); if (index >= array_size_ || index < 0) { MS_LOG(ERROR) << "invalid kernel key, arch " << desc.arch << ", data_type" << desc.data_type << ",op type " << desc.type; return; } - creator_arrays_[index] = creator; + if (creator_arrays_ != nullptr) { + creator_arrays_[index] = creator; + } } void KernelRegistry::RegKernel(KERNEL_ARCH arch, TypeId data_type, int op_type, kernel::KernelCreator creator) { + CreatorArraysInit(); KernelKey desc = {arch, data_type, op_type}; int index = GetCreatorFuncIndex(desc); if (index >= array_size_ || index < 0) { @@ -106,11 +113,11 @@ void KernelRegistry::RegKernel(KERNEL_ARCH arch, TypeId data_type, int op_type, << desc.type; return; } - creator_arrays_[index] = creator; + if (creator_arrays_ != nullptr) { + creator_arrays_[index] = creator; + } } -bool KernelRegistry::Merge(const std::unordered_map &new_creators) { return false; } - KernelRegistry::~KernelRegistry() { KernelRegistry *instance = GetInstance(); std::unique_lock malloc_creator_array(instance->lock_); @@ -132,7 +139,7 @@ int KernelRegistry::GetCustomKernel(const std::vector &in_tensors, con MS_ASSERT(kernel != nullptr); kernel::KernelDesc desc; KernelKeyToKernelDesc(key, &desc); - CreateKernel creator = kernel::RegisterKernel::GetCreator(static_cast(primitive), &desc); + CreateKernel creator = kernel::RegisterUtils::GetCreator(static_cast(primitive), &desc); if (creator == nullptr) { return RET_NOT_SUPPORT; } diff --git a/mindspore/lite/src/kernel_registry.h b/mindspore/lite/src/kernel_registry.h index 9015caf81a4..af480d3b844 100644 --- a/mindspore/lite/src/kernel_registry.h +++ b/mindspore/lite/src/kernel_registry.h @@ -37,12 +37,10 @@ class KernelRegistry { virtual ~KernelRegistry(); static KernelRegistry *GetInstance(); - static int Init(); virtual kernel::KernelCreator GetCreator(const kernel::KernelKey &desc); int GetCreatorFuncIndex(kernel::KernelKey desc); void RegKernel(kernel::KernelKey desc, kernel::KernelCreator creator); void RegKernel(kernel::KERNEL_ARCH arch, TypeId data_type, int type, kernel::KernelCreator creator); - bool Merge(const std::unordered_map &newCreators); bool SupportKernel(const kernel::KernelKey &key); int GetKernel(const std::vector &in_tensors, const std::vector &out_tensors, const InnerContext *ctx, const mindspore::Context *ms_ctx, const kernel::KernelKey &key, @@ -58,6 +56,9 @@ class KernelRegistry { static const int array_size_{device_type_length_ * data_type_length_ * op_type_length_}; kernel::KernelCreator *creator_arrays_ = nullptr; + private: + void CreatorArraysInit(); + private: std::mutex lock_; }; diff --git a/mindspore/lite/src/lite_kernel.cc b/mindspore/lite/src/lite_kernel.cc index 926a94f3bfd..db1ad97e1d0 100644 --- a/mindspore/lite/src/lite_kernel.cc +++ b/mindspore/lite/src/lite_kernel.cc @@ -38,15 +38,18 @@ bool LiteKernel::IsReady(const std::vector &scope_tensors) { }); } -void LiteKernel::InitOutTensorInitRefCount() { +void LiteKernel::InitOutTensorInitRefCount(const std::vector *mask_kernels) { for (auto *tensor : this->out_tensors()) { MS_ASSERT(tensor != nullptr); size_t init_ref_count = 0; for (auto *post_kernel : this->out_kernels_) { - auto &post_in_tensors = post_kernel->in_tensors(); - init_ref_count += - std::count_if(post_in_tensors.begin(), post_in_tensors.end(), - [&tensor](const lite::Tensor *post_kernel_in_tensor) { return post_kernel_in_tensor == tensor; }); + if ((mask_kernels == nullptr) || + std::find(mask_kernels->begin(), mask_kernels->end(), post_kernel) != mask_kernels->end()) { + auto &post_in_tensors = post_kernel->in_tensors(); + init_ref_count += std::count_if( + post_in_tensors.begin(), post_in_tensors.end(), + [&tensor](const lite::Tensor *post_kernel_in_tensor) { return post_kernel_in_tensor == tensor; }); + } } tensor->set_init_ref_count(init_ref_count); } diff --git a/mindspore/lite/src/lite_kernel.h b/mindspore/lite/src/lite_kernel.h index 55456c46c05..b539849d81f 100644 --- a/mindspore/lite/src/lite_kernel.h +++ b/mindspore/lite/src/lite_kernel.h @@ -238,7 +238,7 @@ class LiteKernel { } } - void set_in_tensor(lite::Tensor *in_tensor, int index) { + void set_in_tensor(lite::Tensor *in_tensor, size_t index) { MS_ASSERT(kernel_ != nullptr); if (desc_.provider == kBuiltin) { std::static_pointer_cast(kernel_)->set_in_tensor(in_tensor, index); @@ -264,7 +264,7 @@ class LiteKernel { } } - virtual void set_out_tensor(lite::Tensor *out_tensor, int index) { + virtual void set_out_tensor(lite::Tensor *out_tensor, size_t index) { MS_ASSERT(kernel_ != nullptr); if (desc_.provider == kBuiltin) { std::static_pointer_cast(kernel_)->set_out_tensor(out_tensor, index); @@ -327,7 +327,7 @@ class LiteKernel { virtual bool IsReady(const std::vector &in_tensor); - virtual void InitOutTensorInitRefCount(); + virtual void InitOutTensorInitRefCount(const std::vector *mask_kernels = nullptr); KernelKey desc() const { return desc_; } @@ -353,7 +353,7 @@ class LiteKernel { mutable std::vector mutable_out_tensors_; bool is_model_output_ = false; SubGraphType subgraph_type_ = kNotSubGraph; - const lite::InnerContext *context_; + const lite::InnerContext *context_ = nullptr; }; typedef InnerKernel *(*KernelCreator)(const std::vector &inputs, @@ -378,4 +378,4 @@ kernel::InnerKernel *LiteKernelCreator(const std::vector &inputs } } // namespace mindspore::kernel -#endif // MINDSPORE_LITE_SRC_INNER_KERNEL_H_ +#endif // MINDSPORE_LITE_SRC_LITE_KERNEL_H_ diff --git a/mindspore/lite/src/lite_kernel_util.cc b/mindspore/lite/src/lite_kernel_util.cc index d3b2df08187..0fac1ba5903 100644 --- a/mindspore/lite/src/lite_kernel_util.cc +++ b/mindspore/lite/src/lite_kernel_util.cc @@ -190,12 +190,13 @@ int LiteKernelUtil::TopologicalSortKernels(std::vector *ke void LiteKernelUtil::InitTensorInitRefCount(const std::vector &kernels) { for (auto *kernel : kernels) { - kernel->InitOutTensorInitRefCount(); + kernel->InitOutTensorInitRefCount(&kernels); } } int LiteKernelUtil::SetInput(const LiteKernel &kernelMod, const std::vector &inputs) { return -1; } +#ifdef ENABLE_CONTROL_TENSORLIST bool LiteKernelUtil::IsSwitchCall(kernel::LiteKernel *kernel) { if (kernel->desc().delegate != nullptr) { return false; @@ -214,6 +215,7 @@ bool LiteKernelUtil::IsSwitchCall(kernel::LiteKernel *kernel) { return false; } +#endif kernel::LiteKernel *LiteKernelUtil::GetInputsSpecificNode(const kernel::LiteKernel *kernel, const schema::PrimitiveType &primitive_type) { diff --git a/mindspore/lite/src/lite_kernel_util.h b/mindspore/lite/src/lite_kernel_util.h index 74db835b68c..0a8bc2ddde4 100644 --- a/mindspore/lite/src/lite_kernel_util.h +++ b/mindspore/lite/src/lite_kernel_util.h @@ -37,7 +37,9 @@ class LiteKernelUtil { static int SetInput(const LiteKernel &kernelMod, const std::vector &inputs); +#ifdef ENABLE_CONTROL_TENSORLIST static bool IsSwitchCall(kernel::LiteKernel *kernel); +#endif static kernel::LiteKernel *GetInputsSpecificNode(const kernel::LiteKernel *kernel, const schema::PrimitiveType &primitive_type); diff --git a/mindspore/lite/src/lite_mindrt.cc b/mindspore/lite/src/lite_mindrt.cc index ab1e87f5517..6c7bfffad4c 100644 --- a/mindspore/lite/src/lite_mindrt.cc +++ b/mindspore/lite/src/lite_mindrt.cc @@ -86,6 +86,7 @@ void LiteOpActor::IsolateInputData(std::vector> *ac if (old_tensor->data_type() == kNumberTypeFloat16 || old_tensor->data_type() == kNumberTypeFloat32) { old_tensor->set_data_type(kernel_->desc().data_type); } +#ifdef ENABLE_CONTROL_TENSORLIST if (old_tensor->data_type() == kObjectTypeTensorType) { auto old_tensorlist = reinterpret_cast(old_tensor); if (old_tensorlist->tensors_data_type() == kNumberTypeFloat16 || @@ -93,6 +94,8 @@ void LiteOpActor::IsolateInputData(std::vector> *ac old_tensorlist->set_tensors_data_type(kernel_->desc().data_type); } } +#endif + old_tensor->set_allocator(kernel_->Context()->allocator); continue; } @@ -102,10 +105,12 @@ void LiteOpActor::IsolateInputData(std::vector> *ac } Tensor *new_tensor = new Tensor(new_data_type, old_tensor->shape(), old_tensor->format(), old_tensor->category()); - new_tensor->set_allocator(old_tensor->allocator()); /* GPU use opencl allocator */ - if (new_tensor->allocator() == nullptr && kernel_->subgraph_type() == kernel::kCpuFP16SubGraph) { + new_tensor->set_allocator(old_tensor->allocator()); + if (new_tensor->allocator() == nullptr && kernel_->Context() != nullptr && + kernel_->desc().arch != kernel::kDelegate) { new_tensor->set_allocator(kernel_->Context()->allocator); } + new_tensor->set_tensor_name(kernel_->name() + "_duplicate_" + old_tensor->tensor_name()); for (LiteQuantParam quant : old_tensor->quant_params()) { new_tensor->AddQuantParam(quant); @@ -187,6 +192,7 @@ int LiteOpActor::CompileArrowThroughOutputKernels() { return RET_OK; } +#ifdef ENABLE_CONTROL_TENSORLIST int LiteOpActor::CompileArrowThroughPartialCall() { if (kernel_->desc().delegate != nullptr) { MS_LOG(INFO) << "kernel is delegate subgraph kernel."; @@ -225,10 +231,13 @@ int LiteOpActor::CompileArrowThroughPartialCall() { subgraph_kernel->DropNode(call_node_); return RET_OK; } +#endif int LiteOpActor::CompileArrow() { + int ret; output_data_arrows_.clear(); - int ret = CompileArrowThroughPartialCall(); +#ifdef ENABLE_CONTROL_TENSORLIST + ret = CompileArrowThroughPartialCall(); if (ret != RET_OK) { output_data_arrows_.clear(); MS_LOG(ERROR) << "CompileArrowThroughPartialCall failed."; @@ -238,6 +247,7 @@ int LiteOpActor::CompileArrow() { MS_LOG(INFO) << "CompileArrowThroughPartialCall done."; return RET_OK; } +#endif ret = CompileArrowThroughOutputKernels(); if (ret != RET_OK) { output_data_arrows_.clear(); @@ -263,6 +273,87 @@ void LiteOpActor::MoveTensorInputData(Tensor *dst_tensor, Tensor *src_tensor) { src_tensor->DecRefCount(); } +void LiteOpActor::MoveInputData(Tensor *dst_tensor, Tensor *src_tensor) { + if (src_tensor == dst_tensor) { + MS_LOG(INFO) << "no need to move."; + return; + } + MS_ASSERT(src_tensor->allocator() != nullptr); +#ifdef ENABLE_CONTROL_TENSORLIST + if (src_tensor->data_type() == kObjectTypeTensorType) { + MoveTensorListInputData(reinterpret_cast(dst_tensor), reinterpret_cast(src_tensor)); + } else { + MoveTensorInputData(dst_tensor, src_tensor); + } +#else + MoveTensorInputData(dst_tensor, src_tensor); +#endif + return; +} + +void LiteOpActor::SetInputData(Tensor *dst_tensor, Tensor *src_tensor) { + dst_tensor->set_data(src_tensor->data()); + dst_tensor->set_own_data(false); +} + +int LiteOpActor::CastInputData(Tensor *dst, Tensor *src) { + int ret = RET_OK; +#ifdef ENABLE_CONTROL_TENSORLIST + if (src->data_type() != kObjectTypeTensorType) { + ret = CastTensorInputData(dst, src); + } else { + ret = CastTensorListInputData(reinterpret_cast(dst), reinterpret_cast(src)); + } +#else + ret = CastTensorInputData(dst, src); +#endif + src->DecRefCount(); + return ret; +} + +bool LiteOpActor::NeedCastData(Tensor *dst_tensor, Tensor *src_tensor) { + if (dst_tensor->data_type() != kObjectTypeTensorType && src_tensor->data_type() != kObjectTypeTensorType && + dst_tensor->data_type() != src_tensor->data_type()) { + return true; + } +#ifdef ENABLE_CONTROL_TENSORLIST + if (dst_tensor->data_type() == kObjectTypeTensorType && src_tensor->data_type() == kObjectTypeTensorType && + reinterpret_cast(dst_tensor)->tensors_data_type() != + reinterpret_cast(src_tensor)->tensors_data_type()) { + return true; + } +#endif + return false; +} + +int LiteOpActor::CastTensorInputData(Tensor *dst, Tensor *src) { + dst->MallocData(); + dst->ResetRefCount(); +#if defined(ENABLE_ARM) && defined(ENABLE_FP16) + if (dst->shape() != src->shape()) { + MS_LOG(ERROR) << "dst tensor: " << dst->tensor_name() << " shape: " << dst->shape() << " vs " + << "src tensor: " << src->tensor_name() << " shape: " << src->shape(); + return RET_PARAM_INVALID; + } + auto dst_data = dst->MutableData(); /* using MutableData to sync GPU data */ + auto src_data = src->MutableData(); + auto src_nums_size = src->ElementsNum(); + auto dst_data_type = static_cast(dst->data_type()); + auto src_data_type = static_cast(src->data_type()); + if (dst_data_type == kNumberTypeFloat32 && src_data_type == kNumberTypeFloat16) { + Float16ToFloat32_fp16_handler(src_data, dst_data, src_nums_size, support_fp16_); + } else if (dst_data_type == kNumberTypeFloat16 && src_data_type == kNumberTypeFloat32) { + Float32ToFloat16_fp16_handler(src_data, dst_data, src_nums_size, support_fp16_); + } else { + MS_LOG(ERROR) << "not support dst_data_type: " << dst_data_type << " src_data_type: " << src_data_type; + return RET_NOT_SUPPORT; + } + return RET_OK; +#endif + return RET_ERROR; +} + +#ifdef ENABLE_CONTROL_TENSORLIST void LiteOpActor::MoveTensorListInputData(TensorList *dst_tensorlist, TensorList *src_tensorlist) { MS_ASSERT(src_tensorlist != nullptr); MS_ASSERT(dst_tensorlist != nullptr); @@ -302,77 +393,6 @@ void LiteOpActor::MoveTensorListInputData(TensorList *dst_tensorlist, TensorList } } -void LiteOpActor::MoveInputData(Tensor *dst_tensor, Tensor *src_tensor) { - if (src_tensor == dst_tensor) { - MS_LOG(INFO) << "no need to move."; - return; - } - MS_ASSERT(src_tensor->allocator() != nullptr); - - if (src_tensor->data_type() == kObjectTypeTensorType) { - MoveTensorListInputData(reinterpret_cast(dst_tensor), reinterpret_cast(src_tensor)); - } else { - MoveTensorInputData(dst_tensor, src_tensor); - } - return; -} - -void LiteOpActor::SetInputData(Tensor *dst_tensor, Tensor *src_tensor) { - dst_tensor->set_data(src_tensor->data()); - dst_tensor->set_own_data(false); -} - -int LiteOpActor::CastInputData(Tensor *dst, Tensor *src) { - int ret = RET_OK; - if (src->data_type() != kObjectTypeTensorType) { - ret = CastTensorInputData(dst, src); - } else { - ret = CastTensorListInputData(reinterpret_cast(dst), reinterpret_cast(src)); - } - src->DecRefCount(); - return ret; -} - -bool LiteOpActor::NeedCastData(Tensor *dst_tensor, Tensor *src_tensor) { - if (dst_tensor->data_type() != kObjectTypeTensorType && src_tensor->data_type() != kObjectTypeTensorType && - dst_tensor->data_type() != src_tensor->data_type()) { - return true; - } - if (dst_tensor->data_type() == kObjectTypeTensorType && src_tensor->data_type() == kObjectTypeTensorType && - reinterpret_cast(dst_tensor)->tensors_data_type() != - reinterpret_cast(src_tensor)->tensors_data_type()) { - return true; - } - return false; -} - -int LiteOpActor::CastTensorInputData(Tensor *dst, Tensor *src) { - dst->MallocData(); - dst->ResetRefCount(); -#if defined(ENABLE_ARM) && defined(ENABLE_FP16) - if (dst->shape() != src->shape()) { - MS_LOG(ERROR) << "dst tensor: " << dst->tensor_name() << " shape: " << dst->shape() << " vs " - << "src tensor: " << src->tensor_name() << " shape: " << src->shape(); - return RET_PARAM_INVALID; - } - auto dst_data = dst->MutableData(); /* using MutableData to sync GPU data */ - auto src_data = src->MutableData(); - auto src_nums_size = src->ElementsNum(); - auto dst_data_type = static_cast(dst->data_type()); - auto src_data_type = static_cast(src->data_type()); - if (dst_data_type == kNumberTypeFloat32 && src_data_type == kNumberTypeFloat16) { - Float16ToFloat32_fp16_handler(src_data, dst_data, src_nums_size, support_fp16_); - } else if (dst_data_type == kNumberTypeFloat16 && src_data_type == kNumberTypeFloat32) { - Float32ToFloat16_fp16_handler(src_data, dst_data, src_nums_size, support_fp16_); - } else { - MS_LOG(ERROR) << "not support dst_data_type: " << dst_data_type << " src_data_type: " << src_data_type; - return RET_NOT_SUPPORT; - } - return RET_OK; -#endif - return RET_ERROR; -} - int LiteOpActor::CastTensorListInputData(TensorList *dst_tensorlist, TensorList *src_tensorlist) { MS_ASSERT(src_tensorlist != nullptr); MS_ASSERT(dst_tensorlist != nullptr); @@ -399,87 +419,6 @@ int LiteOpActor::CastTensorListInputData(TensorList *dst_tensorlist, TensorList return RET_OK; } -void LiteOpActor::SetInputShape() { - for (size_t i = 0; i < inputs_data_.size(); ++i) { - auto &input_tensor = kernel_->in_tensors()[i]; - if (input_tensor->shape() == inputs_data_[i]->shape()) { - continue; - } - MS_LOG(DEBUG) << "inputs_data_[" << i << "].shape: " << inputs_data_[i]->shape() << " vs kernel_->in_tensors()[" - << i << "].shape: " << kernel_->in_tensors()[i]->shape() << " are not equal."; - MS_LOG(DEBUG) << "this->kernel_->name(): " << this->kernel_->name(); - - if (input_tensor->data_type() == kObjectTypeTensorType) { - auto input_tensorlist = reinterpret_cast(input_tensor); - auto input_data_tensorlist = reinterpret_cast(inputs_data_[i]); - input_tensorlist->FreeTensorListData(); - input_tensorlist->set_element_shape(input_data_tensorlist->element_shape()); - input_tensorlist->set_shape(input_data_tensorlist->shape()); - std::vector> tensor_shape{}; - std::transform(input_data_tensorlist->tensors().begin(), input_data_tensorlist->tensors().end(), - std::back_inserter(tensor_shape), [](Tensor *tensor_item) { return tensor_item->shape(); }); - input_tensorlist->MallocTensorListData(input_data_tensorlist->tensors_data_type(), tensor_shape); - } else { - input_tensor->set_shape(inputs_data_[i]->shape()); - input_tensor->set_format(inputs_data_[i]->format()); - } - } -} - -int LiteOpActor::InitInputData() { - SetInputShape(); - - for (size_t i = 0; i < inputs_data_.size(); ++i) { - auto dst_tensor = kernel_->in_tensors()[i]; - auto src_tensor = inputs_data_[i]; - if (dst_tensor->init_ref_count() == 0) { - src_tensor->DecRefCount(); - continue; - } - - if (NeedCastData(dst_tensor, src_tensor)) { - CastInputData(dst_tensor, src_tensor); - continue; - } - - /* same data-type */ - if (src_tensor->allocator() == nullptr || src_tensor->IsGraphInput()) { - // delegate graph kernel output tensor - SetInputData(dst_tensor, src_tensor); - } else { - MoveInputData(dst_tensor, src_tensor); - } - } - return RET_OK; -} - -void LiteOpActor::AsyncOutput(OpContext *context) { - for (size_t i = 0; i < output_data_arrows_.size(); i++) { - auto data = outputs_data_.at(i); - Async(output_data_arrows_[i]->to_op_id_, &mindspore::OpActor::RunOpData, data.get(), context); - } -} - -void LiteOpActor::AddResultIndex(size_t index) { results_index_.push_back(index); } - -void LiteOpActor::SetOutputData(OpContext *context) { - for (auto index : results_index_) { - context->SetResult(index, RET_OK); - } -} - -int LiteOpActor::PrepareOutputData() { - outputs_data_.resize(output_data_arrows_.size()); - for (size_t i = 0; i < output_data_arrows_.size(); i++) { - auto &arrow = output_data_arrows_[i]; - auto data = - std::make_shared>(arrow->to_op_id_, (kernel_->out_tensors()).at(arrow->from_output_index_), - static_cast(arrow->to_input_index_)); - outputs_data_.at(i) = data; - } - return RET_OK; -} - int LiteSwitchOpActor::CompileTrueBranchArrow() { if (true_partial_node_ == nullptr) { MS_LOG(ERROR) << "true_partial_node_ is nullptr."; @@ -719,6 +658,91 @@ void LiteSwitchOpActor::RunOpData(OpData *inputs, OpContext *con } } +#endif + +void LiteOpActor::SetInputShape() { + for (size_t i = 0; i < inputs_data_.size(); ++i) { + auto &input_tensor = kernel_->in_tensors()[i]; + if (input_tensor->shape() == inputs_data_[i]->shape()) { + continue; + } + MS_LOG(DEBUG) << "inputs_data_[" << i << "].shape: " << inputs_data_[i]->shape() << " vs kernel_->in_tensors()[" + << i << "].shape: " << kernel_->in_tensors()[i]->shape() << " are not equal."; + MS_LOG(DEBUG) << "this->kernel_->name(): " << this->kernel_->name(); + + if (input_tensor->data_type() == kObjectTypeTensorType) { +#ifdef ENABLE_CONTROL_TENSORLIST + auto input_tensorlist = reinterpret_cast(input_tensor); + auto input_data_tensorlist = reinterpret_cast(inputs_data_[i]); + input_tensorlist->FreeTensorListData(); + input_tensorlist->set_element_shape(input_data_tensorlist->element_shape()); + input_tensorlist->set_shape(input_data_tensorlist->shape()); + std::vector> tensor_shape{}; + std::transform(input_data_tensorlist->tensors().begin(), input_data_tensorlist->tensors().end(), + std::back_inserter(tensor_shape), [](Tensor *tensor_item) { return tensor_item->shape(); }); + input_tensorlist->MallocTensorListData(input_data_tensorlist->tensors_data_type(), tensor_shape); +#endif + } else { + input_tensor->set_shape(inputs_data_[i]->shape()); + input_tensor->set_format(inputs_data_[i]->format()); + } + } +} + +int LiteOpActor::InitInputData() { + SetInputShape(); + + for (size_t i = 0; i < inputs_data_.size(); ++i) { + auto dst_tensor = kernel_->in_tensors()[i]; + auto src_tensor = inputs_data_[i]; + if (dst_tensor->init_ref_count() == 0) { + src_tensor->DecRefCount(); + continue; + } + + if (NeedCastData(dst_tensor, src_tensor)) { + CastInputData(dst_tensor, src_tensor); + continue; + } + + /* same data-type */ + if (src_tensor->allocator() == nullptr || src_tensor->IsGraphInput()) { + // delegate graph kernel output tensor + SetInputData(dst_tensor, src_tensor); + } else { + MoveInputData(dst_tensor, src_tensor); + } + } + return RET_OK; +} + +void LiteOpActor::AsyncOutput(OpContext *context) { + for (size_t i = 0; i < output_data_arrows_.size(); i++) { + auto data = outputs_data_.at(i); + Async(output_data_arrows_[i]->to_op_id_, &mindspore::OpActor::RunOpData, data.get(), context); + } +} + +void LiteOpActor::AddResultIndex(size_t index) { results_index_.push_back(index); } + +void LiteOpActor::SetOutputData(OpContext *context) { + for (auto index : results_index_) { + context->SetResult(index, RET_OK); + } +} + +int LiteOpActor::PrepareOutputData() { + outputs_data_.resize(output_data_arrows_.size()); + for (size_t i = 0; i < output_data_arrows_.size(); i++) { + auto &arrow = output_data_arrows_[i]; + auto data = + std::make_shared>(arrow->to_op_id_, (kernel_->out_tensors()).at(arrow->from_output_index_), + static_cast(arrow->to_input_index_)); + outputs_data_.at(i) = data; + } + return RET_OK; +} + std::vector> CreateOpActor(const std::vector &kernels, const lite::InnerContext *ctx) { std::vector> actors; @@ -730,8 +754,8 @@ std::vector> CreateOpActor(const std::vectorset_name(kernel->name() + to_string(actor_count++)); - + kernel->set_name(kernel->name() + "_" + to_string(actor_count++)); +#ifdef ENABLE_CONTROL_TENSORLIST if ((kernel::LiteKernelUtil::IsSwitchCall(kernel))) { auto switch_actor = std::make_shared(kernel); if (switch_actor == nullptr) { @@ -743,6 +767,7 @@ std::vector> CreateOpActor(const std::vectorGetAID(); actors.push_back(switch_actor); } else { +#endif auto actor = std::make_shared(kernel); if (actor == nullptr) { MS_LOG(ERROR) << "create LiteOpActor failed: " << kernel->name(); @@ -752,7 +777,9 @@ std::vector> CreateOpActor(const std::vectorset_thread_pool(thread_pool); subgraph_name_AID_map[kernel] = actor->GetAID(); actors.push_back(actor); +#ifdef ENABLE_CONTROL_TENSORLIST } +#endif } for (auto &actor : actors) { diff --git a/mindspore/lite/src/lite_mindrt.h b/mindspore/lite/src/lite_mindrt.h index 2edd9ce8455..3111015153f 100644 --- a/mindspore/lite/src/lite_mindrt.h +++ b/mindspore/lite/src/lite_mindrt.h @@ -95,13 +95,15 @@ class LiteOpActor : public OpActor { private: void IsolateInputData(std::vector> *actors); void MoveTensorInputData(Tensor *dst_tensor, Tensor *src_tensor); - void MoveTensorListInputData(TensorList *dst_tensor, TensorList *src_tensor); void MoveInputData(Tensor *dst_tensor, Tensor *src_tensor); void SetInputData(Tensor *dst_tensor, Tensor *src_tensor); int CastInputData(Tensor *dst_tensor, Tensor *src_tensor); bool NeedCastData(Tensor *dst_tensor, Tensor *src_tensor); int CastTensorInputData(Tensor *dst_tensor, Tensor *src_tensor); +#ifdef ENABLE_CONTROL_TENSORLIST + void MoveTensorListInputData(TensorList *dst_tensor, TensorList *src_tensor); int CastTensorListInputData(TensorList *dst_tensor, TensorList *src_tensor); +#endif private: kernel::LiteKernel *partial_node_ = nullptr; @@ -111,6 +113,7 @@ class LiteOpActor : public OpActor { #endif }; +#ifdef ENABLE_CONTROL_TENSORLIST class LiteSwitchOpActor : public LiteOpActor { public: explicit LiteSwitchOpActor(kernel::LiteKernel *kernel) : LiteOpActor(kernel) {} @@ -146,6 +149,7 @@ class LiteSwitchOpActor : public LiteOpActor { std::vector> true_branch_outputs_data_; std::vector> false_branch_outputs_data_; }; +#endif int MindrtInit(); void MindrtTerminate(const std::vector> &); diff --git a/mindspore/lite/src/lite_model.cc b/mindspore/lite/src/lite_model.cc index 7471ef92735..3f28ebf6186 100644 --- a/mindspore/lite/src/lite_model.cc +++ b/mindspore/lite/src/lite_model.cc @@ -479,5 +479,4 @@ int Model::Export(Model *model, const char *filename) { return chmod(filename, S_IRUSR); #endif } - } // namespace mindspore::lite diff --git a/mindspore/lite/src/lite_session.cc b/mindspore/lite/src/lite_session.cc index 403626ac2a8..f026ffedd20 100644 --- a/mindspore/lite/src/lite_session.cc +++ b/mindspore/lite/src/lite_session.cc @@ -68,11 +68,16 @@ int DecompressTensor(const schema::Tensor &src_tensor, Tensor *dst_tensor) { // huffman code and bit pack are not assumed to be performed at same time STATUS ret = RET_ERROR; if (src_tensor.enableHuffmanCode()) { +#ifdef ENABLE_HUFFMAN_DECODE ret = WeightDecoder::DecodeHuffmanCode(src_tensor, dst_tensor); if (ret != RET_OK && ret != RET_NO_CHANGE) { MS_LOG(ERROR) << "Decode huffman code failed: " << ret; return ret; } +#else + MS_LOG(ERROR) << unsupport_huffman_decode_log; + return RET_ERROR; +#endif } else if (need_bit_unpack) { ret = WeightDecoder::UnPackToInt(src_tensor, dst_tensor); if (ret != RET_OK && ret != RET_NO_CHANGE) { @@ -123,11 +128,16 @@ int LiteSession::ConvertTensorsData(const lite::Model *model, size_t tensor_inde MS_ASSERT(dst_tensor != nullptr); if (src_tensor->data() != nullptr && src_tensor->data()->size() > 0) { if (dst_tensor->data_type() == kObjectTypeTensorType) { +#ifdef ENABLE_CONTROL_TENSORLIST auto tensor_list = reinterpret_cast(dst_tensor); if (tensor_list->Decode(reinterpret_cast(src_tensor->data()->data())) != RET_OK) { MS_LOG(ERROR) << "Decode tensorlist data failed"; return RET_ERROR; } +#else + MS_LOG(ERROR) << unsupport_control_tensorlist_log; + return RET_NOT_SUPPORT; +#endif } else { auto ret = DecompressTensor(*src_tensor, dst_tensor); if (ret == RET_NO_CHANGE) { @@ -159,6 +169,7 @@ lite::Tensor *LiteSession::ConvertTensor(const schema::Tensor &src_tensor) { } lite::Tensor *dst_tensor = nullptr; if (TypeId(src_tensor.dataType()) == kObjectTypeTensorType) { +#ifdef ENABLE_CONTROL_TENSORLIST dst_tensor = new (std::nothrow) TensorList(shape, std::vector(), src_category); // set tensor list datatype auto tensor_list = reinterpret_cast(dst_tensor); @@ -166,6 +177,9 @@ lite::Tensor *LiteSession::ConvertTensor(const schema::Tensor &src_tensor) { auto tensor_data_type = TypeId(reinterpret_cast(src_tensor.data()->data())[0]); tensor_list->set_tensors_data_type(tensor_data_type); } +#else + MS_LOG(ERROR) << unsupport_control_tensorlist_log; +#endif } else { dst_tensor = new (std::nothrow) Tensor(TypeId(src_tensor.dataType()), shape, static_cast(src_tensor.format()), src_category); @@ -689,12 +703,6 @@ int LiteSession::Init(const Context *context) { return RET_ERROR; } } - ret = KernelRegistry::GetInstance()->Init(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "KernelRegistry Init Failed."; - is_running_.store(false); - return ret; - } ret = InitGPURuntime(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init GPU runtime failed."; diff --git a/mindspore/lite/src/ops/CMakeLists.txt b/mindspore/lite/src/ops/CMakeLists.txt index 465d5296fcc..05b1a731ac9 100644 --- a/mindspore/lite/src/ops/CMakeLists.txt +++ b/mindspore/lite/src/ops/CMakeLists.txt @@ -4,9 +4,45 @@ file(GLOB OPS_SRC ${CMAKE_CURRENT_SOURCE_DIR}/*.cc ${CMAKE_CURRENT_SOURCE_DIR}/populate/*.cc ) +if(MSLITE_STRING_KERNEL) + file(GLOB OPS_SRC_STRING + ${CMAKE_CURRENT_SOURCE_DIR}/populate/string/*.cc + ) + set(OPS_SRC + ${OPS_SRC} + ${OPS_SRC_STRING} + ) +endif() +if(MSLITE_CONTROL_TENSORLIST) + file(GLOB OPS_SRC_CONTROL_TENSORLIST + ${CMAKE_CURRENT_SOURCE_DIR}/populate/control/*.cc + ) + set(OPS_SRC + ${OPS_SRC} + ${OPS_SRC_CONTROL_TENSORLIST} + ) +endif() if(ENABLE_V0) file(GLOB_RECURSE COMPAT_SRC ${CMAKE_CURRENT_SOURCE_DIR}/compat/*.cc) file(GLOB OPS_SRC_V0 ${CMAKE_CURRENT_SOURCE_DIR}/populate/v0/*.cc) + if(MSLITE_STRING_KERNEL) + file(GLOB OPS_SRC_STRING_V0 + ${CMAKE_CURRENT_SOURCE_DIR}/populate/v0/string/*.cc + ) + set(OPS_SRC_V0 + ${OPS_SRC_V0} + ${OPS_SRC_STRING_V0} + ) + endif() + if(MSLITE_CONTROL_TENSORLIST) + file(GLOB OPS_SRC_CONTROL_TENSORLIST_V0 + ${CMAKE_CURRENT_SOURCE_DIR}/populate/v0/control/*.cc + ) + set(OPS_SRC_V0 + ${OPS_SRC_V0} + ${OPS_SRC_CONTROL_TENSORLIST_V0} + ) + endif() set(OPS_SRC ${OPS_SRC} ${COMPAT_SRC} ${OPS_SRC_V0}) endif() diff --git a/mindspore/lite/src/ops/compat/v0/expand_dims_compat_v0.cc b/mindspore/lite/src/ops/compat/v0/expand_dims_compat_v0.cc index eb97b279922..4232f59f7c5 100644 --- a/mindspore/lite/src/ops/compat/v0/expand_dims_compat_v0.cc +++ b/mindspore/lite/src/ops/compat/v0/expand_dims_compat_v0.cc @@ -21,7 +21,7 @@ namespace mindspore { namespace lite { int TransferExpandDimsAttr(Model::Node *node, std::vector *dst_tensors, std::vector *const tensor_bufs) { - if (node == nullptr || dst_tensors == nullptr || tensor_bufs == nullptr) { + if (node == nullptr || node->primitive_ == nullptr || dst_tensors == nullptr || tensor_bufs == nullptr) { MS_LOG(ERROR) << "the parameter of this function is nullptr."; return RET_ERROR; } diff --git a/mindspore/lite/src/ops/compat/v0/slice_compat_v0.cc b/mindspore/lite/src/ops/compat/v0/slice_compat_v0.cc index efbf3019e86..a2e794e2099 100644 --- a/mindspore/lite/src/ops/compat/v0/slice_compat_v0.cc +++ b/mindspore/lite/src/ops/compat/v0/slice_compat_v0.cc @@ -21,7 +21,7 @@ namespace mindspore { namespace lite { int TransferSliceAttr(Model::Node *node, std::vector *dst_tensors, std::vector *const tensor_bufs) { - if (node == nullptr || dst_tensors == nullptr || tensor_bufs == nullptr) { + if (node == nullptr || node->primitive_ == nullptr || dst_tensors == nullptr || tensor_bufs == nullptr) { MS_LOG(ERROR) << "the parameter of this function is nullptr."; return RET_ERROR; } diff --git a/mindspore/lite/src/ops/compat/v0/strided_slice_compat_v0.cc b/mindspore/lite/src/ops/compat/v0/strided_slice_compat_v0.cc index 69471b4147a..04ce2dc057c 100644 --- a/mindspore/lite/src/ops/compat/v0/strided_slice_compat_v0.cc +++ b/mindspore/lite/src/ops/compat/v0/strided_slice_compat_v0.cc @@ -28,7 +28,7 @@ int TransferStridedSliceAttr(Model::Node *node, std::vector *d dst_tensors->clear(); auto prim = reinterpret_cast(node->primitive_); MS_ASSERT(prim != nullptr); - int inputs_size = node->input_indices_.size(); + int inputs_size = static_cast(node->input_indices_.size()); auto param = prim->value_as_StridedSlice(); if (param == nullptr) { diff --git a/mindspore/lite/src/ops/compat/v0/topk_compat_v0.cc b/mindspore/lite/src/ops/compat/v0/topk_compat_v0.cc index 3785abc2a32..02bb1ce567b 100644 --- a/mindspore/lite/src/ops/compat/v0/topk_compat_v0.cc +++ b/mindspore/lite/src/ops/compat/v0/topk_compat_v0.cc @@ -21,7 +21,7 @@ namespace mindspore { namespace lite { int TransferTopkAttr(Model::Node *node, std::vector *dst_tensors, std::vector *const tensor_bufs) { - if (node == nullptr || dst_tensors == nullptr || tensor_bufs == nullptr) { + if (node == nullptr || node->primitive_ == nullptr || dst_tensors == nullptr || tensor_bufs == nullptr) { MS_LOG(ERROR) << "the parameter of this function is nullptr."; return RET_ERROR; } diff --git a/mindspore/lite/src/ops/ops_def.cc b/mindspore/lite/src/ops/ops_def.cc index b64ca1619fb..46c264b86c9 100644 --- a/mindspore/lite/src/ops/ops_def.cc +++ b/mindspore/lite/src/ops/ops_def.cc @@ -220,6 +220,7 @@ OP_TYPE(TensorArrayWrite) OP_TYPE(Affine) OP_TYPE(Attention) OP_TYPE(LSTMGrad) +OP_TYPE(ScatterNdUpdate) OP_TYPE_DEF_END(PrimitiveType) OP_SCHEMA_DEF(Abs) @@ -1212,3 +1213,6 @@ OP_SCHEMA_DEF_END(Affine) OP_SCHEMA_DEF(Attention) OP_SCHEMA_DEF_END(Attention) + +OP_SCHEMA_DEF(ScatterNdUpdate) +OP_SCHEMA_DEF_END(ScatterNdUpdate) diff --git a/mindspore/lite/src/ops/ops_func_declare.h b/mindspore/lite/src/ops/ops_func_declare.h index a2dee794b4e..da54b2dc899 100644 --- a/mindspore/lite/src/ops/ops_func_declare.h +++ b/mindspore/lite/src/ops/ops_func_declare.h @@ -131,6 +131,7 @@ #include "ops/rsqrt.h" #include "ops/scale.h" #include "ops/scatter_nd.h" +#include "ops/scatter_nd_update.h" #include "ops/select.h" #include "ops/sgd.h" #include "ops/shape.h" @@ -462,6 +463,7 @@ FUNC_MSOP2SCHEMAOP_DECLARE(TensorArrayRead) FUNC_MSOP2SCHEMAOP_DECLARE(TensorArrayWrite) FUNC_MSOP2SCHEMAOP_DECLARE(Affine) FUNC_MSOP2SCHEMAOP_DECLARE(Attention) +FUNC_MSOP2SCHEMAOP_DECLARE(ScatterNdUpdate) #endif } // namespace mindspore::lite::ops #else diff --git a/mindspore/lite/src/ops/ops_utils.cc b/mindspore/lite/src/ops/ops_utils.cc index 10a23304de7..90f57a89bb5 100644 --- a/mindspore/lite/src/ops/ops_utils.cc +++ b/mindspore/lite/src/ops/ops_utils.cc @@ -809,6 +809,11 @@ std::unique_ptr AttentionPrimitiveCreator(const AnfNodePtr & return ms_primc != nullptr ? ops::MSOp2SchemaOp(ms_primc.get()) : nullptr; } +std::unique_ptr ScatterNdUpdatePrimitiveCreator(const AnfNodePtr &node) { + auto ms_primc = GetValueNode>(node); + return ms_primc != nullptr ? ops::MSOp2SchemaOp(ms_primc.get()) : nullptr; +} + RegistryMSOps g_absPrimitiveCreatorRegistry("Abs", AbsPrimitiveCreator); RegistryMSOps g_absGradPrimitiveCreatorRegistry("AbsGrad", AbsGradPrimitiveCreator); RegistryMSOps g_activationPrimitiveCreatorRegistry("Activation", ActivationPrimitiveCreator); @@ -1034,6 +1039,7 @@ RegistryMSOps g_TensorArrayReadCreatorRegistry("TensorArrayRead", TensorArrayRea RegistryMSOps g_TensorArrayWriteCreatorRegistry("TensorArrayWrite", TensorArrayWritePrimitiveCreator); RegistryMSOps g_AffineCreatorRegistry("Affine", AffinePrimitiveCreator); RegistryMSOps g_AttentionCreatorRegistry("Attention", AttentionPrimitiveCreator); +RegistryMSOps g_ScatterNdUpdateCreatorRegistry("ScatterNdUpdate", ScatterNdUpdatePrimitiveCreator); std::unique_ptr CustomPrimitiveCreator(const AnfNodePtr &node) { auto ms_primc = GetValueNode>(node); diff --git a/mindspore/lite/src/ops/populate/adder_populate.cc b/mindspore/lite/src/ops/populate/adder_populate.cc index 284b632448b..5b41e4f5ae7 100644 --- a/mindspore/lite/src/ops/populate/adder_populate.cc +++ b/mindspore/lite/src/ops/populate/adder_populate.cc @@ -53,8 +53,8 @@ OpParameter *PopulateAdderParameter(const void *prim) { param->stride_w_ = static_cast(*(stride->begin() + 1)); param->pad_u_ = static_cast(*(pad_list->begin())); param->pad_d_ = static_cast(*(pad_list->begin() + 1)); - param->pad_l_ = static_cast(*(pad_list->begin() + 2)); - param->pad_r_ = static_cast(*(pad_list->begin() + 3)); + param->pad_l_ = static_cast(*(pad_list->begin() + kOffsetTwo)); + param->pad_r_ = static_cast(*(pad_list->begin() + kOffsetThree)); param->dilation_h_ = static_cast(*(dilation->begin())); param->dilation_w_ = static_cast(*(dilation->begin() + 1)); param->input_channel_ = static_cast(value->in_channel()); diff --git a/mindspore/lite/src/ops/populate/conv2d_populate.cc b/mindspore/lite/src/ops/populate/conv2d_populate.cc index ab61ea062c8..ceec07cb670 100644 --- a/mindspore/lite/src/ops/populate/conv2d_populate.cc +++ b/mindspore/lite/src/ops/populate/conv2d_populate.cc @@ -20,7 +20,6 @@ using mindspore::schema::PrimitiveType_Conv2DFusion; namespace mindspore { namespace lite { -constexpr auto kMinShapeSize = 2; OpParameter *PopulateConvParameter(const void *prim) { auto primitive = static_cast(prim); MS_ASSERT(primitive != nullptr); @@ -47,7 +46,8 @@ OpParameter *PopulateConvParameter(const void *prim) { free(param); return nullptr; } - if (kernel_size->size() < kMinShapeSize || stride->size() < kMinShapeSize || dilation->size() < kMinShapeSize) { + if (kernel_size->size() < kMinShapeSizeTwo || stride->size() < kMinShapeSizeTwo || + dilation->size() < kMinShapeSizeTwo) { MS_LOG(ERROR) << "Invalid shape size!kernel_size size: " << kernel_size->size() << ", stride size: " << stride->size() << ", dilation size: " << dilation->size(); free(param); @@ -68,7 +68,7 @@ OpParameter *PopulateConvParameter(const void *prim) { default: param->pad_mode_ = Pad_pad; } - if (pad_list == nullptr || pad_list->size() < 4) { + if (pad_list == nullptr || pad_list->size() < kMinShapeSizeFour) { param->pad_u_ = 0; param->pad_d_ = 0; param->pad_l_ = 0; @@ -76,8 +76,8 @@ OpParameter *PopulateConvParameter(const void *prim) { } else { param->pad_u_ = static_cast(*(pad_list->begin())); param->pad_d_ = static_cast(*(pad_list->begin() + 1)); - param->pad_l_ = static_cast(*(pad_list->begin() + 2)); - param->pad_r_ = static_cast(*(pad_list->begin() + 3)); + param->pad_l_ = static_cast(*(pad_list->begin() + kOffsetTwo)); + param->pad_r_ = static_cast(*(pad_list->begin() + kOffsetThree)); } param->dilation_h_ = static_cast(*(dilation->begin())); param->dilation_w_ = static_cast(*(dilation->begin() + 1)); diff --git a/mindspore/lite/src/ops/populate/deconv2d_populate.cc b/mindspore/lite/src/ops/populate/deconv2d_populate.cc index 1e6a8328f12..f9bd06890e2 100644 --- a/mindspore/lite/src/ops/populate/deconv2d_populate.cc +++ b/mindspore/lite/src/ops/populate/deconv2d_populate.cc @@ -20,7 +20,6 @@ using mindspore::schema::PrimitiveType_Conv2dTransposeFusion; namespace mindspore { namespace lite { -constexpr auto kMinShapeSize = 2; OpParameter *PopulateDeconvParameter(const void *prim) { auto primitive = static_cast(prim); MS_ASSERT(primitive != nullptr); @@ -48,7 +47,8 @@ OpParameter *PopulateDeconvParameter(const void *prim) { free(param); return nullptr; } - if (kernel_size->size() < kMinShapeSize || stride->size() < kMinShapeSize || dilation->size() < kMinShapeSize) { + if (kernel_size->size() < kMinShapeSizeTwo || stride->size() < kMinShapeSizeTwo || + dilation->size() < kMinShapeSizeTwo) { MS_LOG(ERROR) << "Invalid shape size!kernel_size size: " << kernel_size->size() << ", stride size: " << stride->size() << ", dilation size: " << dilation->size() << ", output_paddings size:" << output_paddings->size(); @@ -72,7 +72,7 @@ OpParameter *PopulateDeconvParameter(const void *prim) { default: param->pad_mode_ = Pad_pad; } - if (pad_list == nullptr || pad_list->size() < 4) { + if (pad_list == nullptr || pad_list->size() < kMinShapeSizeFour) { param->pad_u_ = 0; param->pad_d_ = 0; param->pad_l_ = 0; @@ -80,8 +80,8 @@ OpParameter *PopulateDeconvParameter(const void *prim) { } else { param->pad_u_ = static_cast(*(pad_list->begin())); param->pad_d_ = static_cast(*(pad_list->begin() + 1)); - param->pad_l_ = static_cast(*(pad_list->begin() + 2)); - param->pad_r_ = static_cast(*(pad_list->begin() + 3)); + param->pad_l_ = static_cast(*(pad_list->begin() + kOffsetTwo)); + param->pad_r_ = static_cast(*(pad_list->begin() + kOffsetThree)); } param->dilation_h_ = static_cast(*(dilation->begin())); param->dilation_w_ = static_cast(*(dilation->begin() + 1)); diff --git a/mindspore/lite/src/ops/populate/detection_post_process_populate.cc b/mindspore/lite/src/ops/populate/detection_post_process_populate.cc index e8526010db0..9ff37e4082b 100644 --- a/mindspore/lite/src/ops/populate/detection_post_process_populate.cc +++ b/mindspore/lite/src/ops/populate/detection_post_process_populate.cc @@ -19,7 +19,6 @@ using mindspore::schema::PrimitiveType_DetectionPostProcess; namespace mindspore { namespace lite { -constexpr auto kScaleMinSize = 4; OpParameter *PopulateDetectionPostProcessParameter(const void *prim) { auto primitive = static_cast(prim); MS_ASSERT(primitive != nullptr); @@ -43,15 +42,15 @@ OpParameter *PopulateDetectionPostProcessParameter(const void *prim) { free(param); return nullptr; } - if (scale->size() < kScaleMinSize) { + if (scale->size() < kMinShapeSizeFour) { MS_LOG(ERROR) << "Invalid scale shape size " << scale->size(); free(param); return nullptr; } param->h_scale_ = *(scale->begin()); param->w_scale_ = *(scale->begin() + 1); - param->x_scale_ = *(scale->begin() + 2); - param->y_scale_ = *(scale->begin() + 3); + param->x_scale_ = *(scale->begin() + kOffsetTwo); + param->y_scale_ = *(scale->begin() + kOffsetThree); param->nms_iou_threshold_ = value->nms_iou_threshold(); param->nms_score_threshold_ = value->nms_score_threshold(); param->max_detections_ = value->max_detections(); diff --git a/mindspore/lite/src/ops/populate/pooling_populate.cc b/mindspore/lite/src/ops/populate/pooling_populate.cc index 29adecdecaa..8b2933aa85b 100644 --- a/mindspore/lite/src/ops/populate/pooling_populate.cc +++ b/mindspore/lite/src/ops/populate/pooling_populate.cc @@ -20,10 +20,6 @@ using mindspore::schema::PrimitiveType_MaxPoolFusion; namespace mindspore { namespace lite { -constexpr size_t kMinShapeSize = 2; -constexpr size_t kMinPadSize = 4; -constexpr int kOffsetTwo = 2; -constexpr int kOffsetThree = 3; OpParameter *PopulateAvgPoolParameter(const void *primitive) { auto pooling_prim = static_cast(primitive); MS_ASSERT(pooling_prim != nullptr); @@ -44,7 +40,7 @@ OpParameter *PopulateAvgPoolParameter(const void *primitive) { param->pool_mode_ = PoolMode_AvgPool; param->global_ = value->global(); auto strides = value->strides(); - if (strides == nullptr || strides->size() < kMinShapeSize) { + if (strides == nullptr || strides->size() < kMinShapeSizeTwo) { MS_LOG(ERROR) << "strides is invalid!"; free(param); return nullptr; @@ -52,7 +48,7 @@ OpParameter *PopulateAvgPoolParameter(const void *primitive) { param->stride_w_ = static_cast(*(strides->begin() + 1)); param->stride_h_ = static_cast(*(strides->begin())); auto pad = value->pad(); - if (pad != nullptr && pad->size() >= kMinPadSize) { + if (pad != nullptr && pad->size() >= kMinShapeSizeFour) { param->pad_u_ = static_cast(*(pad->begin())); param->pad_d_ = static_cast(*(pad->begin() + 1)); param->pad_l_ = static_cast(*(pad->begin() + kOffsetTwo)); @@ -60,7 +56,7 @@ OpParameter *PopulateAvgPoolParameter(const void *primitive) { } if (!param->global_) { auto kernel_size = value->kernel_size(); - if (kernel_size == nullptr || kernel_size->size() < kMinShapeSize) { + if (kernel_size == nullptr || kernel_size->size() < kMinShapeSizeTwo) { MS_LOG(ERROR) << "kernel_size is invalid"; free(param); return nullptr; @@ -126,8 +122,8 @@ OpParameter *PopulateMaxPoolParameter(const void *primitive) { if (!param->global_) { auto kernel_size = value->kernel_size(); auto strides = value->strides(); - if (kernel_size == nullptr || strides == nullptr || kernel_size->size() < kMinShapeSize || - strides->size() < kMinShapeSize) { + if (kernel_size == nullptr || strides == nullptr || kernel_size->size() < kMinShapeSizeTwo || + strides->size() < kMinShapeSizeTwo) { MS_LOG(ERROR) << "kernel_size or strides is invalid"; free(param); return nullptr; @@ -137,7 +133,7 @@ OpParameter *PopulateMaxPoolParameter(const void *primitive) { param->stride_w_ = static_cast(*(strides->begin() + 1)); param->stride_h_ = static_cast(*(strides->begin())); auto pad = value->pad(); - if (pad != nullptr && pad->size() >= kMinPadSize) { + if (pad != nullptr && pad->size() >= kMinShapeSizeFour) { param->pad_u_ = static_cast(*(pad->begin())); param->pad_d_ = static_cast(*(pad->begin() + 1)); param->pad_l_ = static_cast(*(pad->begin() + kOffsetTwo)); diff --git a/mindspore/lite/src/ops/populate/populate_register.h b/mindspore/lite/src/ops/populate/populate_register.h index 0537156cee2..1f248395f99 100644 --- a/mindspore/lite/src/ops/populate/populate_register.h +++ b/mindspore/lite/src/ops/populate/populate_register.h @@ -27,6 +27,10 @@ namespace mindspore { namespace lite { +constexpr int kOffsetTwo = 2; +constexpr int kOffsetThree = 3; +constexpr size_t kMinShapeSizeTwo = 2; +constexpr size_t kMinShapeSizeFour = 4; typedef OpParameter *(*ParameterGen)(const void *prim); class PopulateRegistry { diff --git a/mindspore/lite/src/ops/populate/prior_box_populate.cc b/mindspore/lite/src/ops/populate/prior_box_populate.cc index c16d21cbe2c..f23ab7364d4 100644 --- a/mindspore/lite/src/ops/populate/prior_box_populate.cc +++ b/mindspore/lite/src/ops/populate/prior_box_populate.cc @@ -47,7 +47,7 @@ OpParameter *PopulatePriorBoxParameter(const void *prim) { free(param); return nullptr; } - param->min_sizes_size = min_sizes->size(); + param->min_sizes_size = static_cast(min_sizes->size()); memcpy(param->min_sizes, min_sizes->data(), min_sizes->size() * sizeof(int32_t)); auto max_sizes = value->max_sizes(); @@ -61,7 +61,7 @@ OpParameter *PopulatePriorBoxParameter(const void *prim) { free(param); return nullptr; } - param->max_sizes_size = max_sizes->size(); + param->max_sizes_size = static_cast(max_sizes->size()); memcpy(param->max_sizes, max_sizes->data(), max_sizes->size() * sizeof(int32_t)); auto aspect_ratios = value->aspect_ratios(); @@ -76,7 +76,7 @@ OpParameter *PopulatePriorBoxParameter(const void *prim) { free(param); return nullptr; } - param->aspect_ratios_size = aspect_ratios->size(); + param->aspect_ratios_size = static_cast(aspect_ratios->size()); memcpy(param->aspect_ratios, aspect_ratios->data(), aspect_ratios->size() * sizeof(float)); auto variances = value->variances(); diff --git a/mindspore/lite/src/ops/populate/split_populate.cc b/mindspore/lite/src/ops/populate/split_populate.cc index b2f9b9603c3..c93a42f6dcd 100644 --- a/mindspore/lite/src/ops/populate/split_populate.cc +++ b/mindspore/lite/src/ops/populate/split_populate.cc @@ -37,20 +37,20 @@ OpParameter *PopulateSplitParameter(const void *prim) { param->op_parameter_.type_ = primitive->value_type(); param->num_split_ = value->output_num(); - if (param->num_split_ > std::numeric_limits::max() / static_cast(sizeof(int)) || param->num_split_ < 0) { - MS_LOG(ERROR) << "The value of param->num_split_ is too big"; + if (param->num_split_ > std::numeric_limits::max() / static_cast(sizeof(int)) || param->num_split_ <= 0) { + MS_LOG(ERROR) << "The value of param->num_split_ is not correct"; free(param); return nullptr; } /* free split_sizes_ in split op base */ - param->split_sizes_ = reinterpret_cast(malloc(param->num_split_ * sizeof(int))); + param->split_sizes_ = reinterpret_cast(malloc(static_cast(param->num_split_) * sizeof(int))); if (param->split_sizes_ == nullptr) { MS_LOG(ERROR) << "malloc param split_sizes_ error"; free(param); return nullptr; } - memset(param->split_sizes_, 0, param->num_split_ * sizeof(int)); + memset(param->split_sizes_, 0, static_cast(param->num_split_) * sizeof(int)); auto split_sizes_vector_ = value->size_splits(); if (split_sizes_vector_ != nullptr && split_sizes_vector_->size() <= static_cast(param->num_split_)) { int i = 0; diff --git a/mindspore/lite/src/ops/populate/v0/split_populate_v0.cc b/mindspore/lite/src/ops/populate/v0/split_populate_v0.cc index 3f14f6832d1..d96635c063c 100644 --- a/mindspore/lite/src/ops/populate/v0/split_populate_v0.cc +++ b/mindspore/lite/src/ops/populate/v0/split_populate_v0.cc @@ -37,18 +37,19 @@ OpParameter *PopulateSplitParameter(const void *prim) { memset(split_param, 0, sizeof(SplitParameter)); split_param->op_parameter_.type_ = schema::PrimitiveType_Split; split_param->num_split_ = split_prim->numberSplit(); - if (split_param->num_split_ > std::numeric_limits::max() / static_cast(sizeof(int))) { - MS_LOG(ERROR) << "The value of split_param->num_split_ is too big"; + if (split_param->num_split_ > std::numeric_limits::max() / static_cast(sizeof(int)) || + split_param->num_split_ <= 0) { + MS_LOG(ERROR) << "The value of split_param->num_split_ is out of range."; free(split_param); return nullptr; } - int *split_sizes = reinterpret_cast(malloc(split_param->num_split_ * sizeof(int))); + int *split_sizes = reinterpret_cast(malloc(static_cast(split_param->num_split_) * sizeof(int))); if (split_sizes == nullptr) { MS_LOG(ERROR) << "malloc split size of SplitParameter failed."; free(split_param); return nullptr; } - memset(split_sizes, 0, split_param->num_split_ * sizeof(int)); + memset(split_sizes, 0, static_cast(split_param->num_split_) * sizeof(int)); split_param->split_sizes_ = split_sizes; auto split_sizes_vector_ = split_prim->sizeSplits(); if (split_sizes_vector_ != nullptr) { diff --git a/mindspore/lite/src/registry/register_kernel.cc b/mindspore/lite/src/registry/register_kernel.cc index 2bdf48c9249..07743fa677e 100644 --- a/mindspore/lite/src/registry/register_kernel.cc +++ b/mindspore/lite/src/registry/register_kernel.cc @@ -29,9 +29,5 @@ int RegisterKernel::RegKernel(const std::string &arch, const std::string &provid CreateKernel creator) { return lite::RegistryKernelImpl::GetInstance()->RegKernel(arch, provider, data_type, op_type, creator); } - -CreateKernel RegisterKernel::GetCreator(const schema::Primitive *primitive, kernel::KernelDesc *desc) { - return lite::RegistryKernelImpl::GetInstance()->GetProviderCreator(primitive, desc); -} } // namespace kernel } // namespace mindspore diff --git a/mindspore/lite/src/registry/register_kernel_impl.h b/mindspore/lite/src/registry/register_kernel_impl.h index 1fbe6c58b66..508ccd6fb6b 100644 --- a/mindspore/lite/src/registry/register_kernel_impl.h +++ b/mindspore/lite/src/registry/register_kernel_impl.h @@ -24,6 +24,7 @@ #include #include #include "include/registry/register_kernel.h" +#include "src/registry/register_utils.h" using mindspore::schema::PrimitiveType_MAX; using mindspore::schema::PrimitiveType_MIN; diff --git a/mindspore/lite/src/registry/register_utils.cc b/mindspore/lite/src/registry/register_utils.cc new file mode 100644 index 00000000000..b6b0231927a --- /dev/null +++ b/mindspore/lite/src/registry/register_utils.cc @@ -0,0 +1,25 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/registry/register_utils.h" +#include "src/registry/register_kernel_impl.h" + +namespace mindspore { +namespace kernel { +CreateKernel RegisterUtils::GetCreator(const schema::Primitive *primitive, kernel::KernelDesc *desc) { + return lite::RegistryKernelImpl::GetInstance()->GetProviderCreator(primitive, desc); +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/lite/src/registry/register_utils.h b/mindspore/lite/src/registry/register_utils.h new file mode 100644 index 00000000000..2a0a9746eca --- /dev/null +++ b/mindspore/lite/src/registry/register_utils.h @@ -0,0 +1,59 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_REGISTRY_REGISTER_UTILS_H_ +#define MINDSPORE_LITE_SRC_REGISTRY_REGISTER_UTILS_H_ +#include +#include "include/registry/register_kernel.h" +#include "schema/model_generated.h" +#include "ir/dtype/type_id.h" + +namespace mindspore { +namespace kernel { +/// \brief KernelDesc defined kernel's basic attribute. +struct KernelDesc { + TypeId data_type; /**< kernel data type argument */ + int type; /**< op type argument */ + std::string arch; /**< deviceType argument */ + std::string provider; /**< user identification argument */ + + bool operator<(const KernelDesc &dst) const { + if (provider != dst.provider) { + return provider < dst.provider; + } else if (arch != dst.arch) { + return arch < dst.arch; + } else if (data_type != dst.data_type) { + return data_type < dst.data_type; + } else { + return type < dst.type; + } + } +}; + +/// \brief RegisterKernel Defined registration of kernel. +class RegisterUtils { + public: + /// \brief Static methon to get a kernel's create function. + /// + /// \param[in] desc Define kernel's basic attribute. + /// \param[in] primitive Define the attributes of op. + /// + /// \return Function pointer to create a kernel. + static CreateKernel GetCreator(const schema::Primitive *primitive, kernel::KernelDesc *desc); +}; +} // namespace kernel +} // namespace mindspore +#endif // MINDSPORE_LITE_SRC_REGISTRY_REGISTER_UTILS_H_ diff --git a/mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.cc b/mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.cc index 3507e3dcb01..dbc917a4d40 100644 --- a/mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.cc +++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_allocator.cc @@ -108,12 +108,15 @@ void *OpenCLAllocator::CreateImage2D(size_t size, const ImageSize &img_size, voi } if (*image == nullptr) { delete *buffer; + *buffer = nullptr; MS_LOG(ERROR) << "Create OpenCL Image2D failed! (ERROR CODE: " << mindspore::kernel::CLErrorCode(ret) << ")"; return nullptr; } if (ret != CL_SUCCESS) { delete *buffer; delete *image; + *buffer = nullptr; + *image = nullptr; MS_LOG(ERROR) << "Create OpenCL Image2D (ERROR CODE: " << mindspore::kernel::CLErrorCode(ret) << ")"; return nullptr; } @@ -125,6 +128,8 @@ void *OpenCLAllocator::CreateImage2D(size_t size, const ImageSize &img_size, voi if (host_ptr == nullptr) { delete *buffer; delete *image; + *buffer = nullptr; + *image = nullptr; MS_LOG(ERROR) << "Map image failed, can not found image :" << *image << ", host_ptr=" << host_ptr; return nullptr; } diff --git a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc index c47847c5998..4bac5664132 100644 --- a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc +++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.cc @@ -210,6 +210,7 @@ int OpenCLRuntime::InitQueue(std::vector *platforms) { #endif if (context_ == nullptr || ret != CL_SUCCESS) { delete device_; + device_ = nullptr; MS_LOG(ERROR) << "Context create failed: " << CLErrorCode(ret); return RET_ERROR; } @@ -218,6 +219,8 @@ int OpenCLRuntime::InitQueue(std::vector *platforms) { if (default_command_queue_ == nullptr || ret != CL_SUCCESS) { delete device_; delete context_; + device_ = nullptr; + context_ = nullptr; MS_LOG(ERROR) << "Command Queue create failed: " << CLErrorCode(ret); return RET_ERROR; } @@ -227,6 +230,9 @@ int OpenCLRuntime::InitQueue(std::vector *platforms) { delete device_; delete context_; delete default_command_queue_; + device_ = nullptr; + context_ = nullptr; + default_command_queue_ = nullptr; MS_LOG(ERROR) << "Profiling command Queue create failed: " << CLErrorCode(ret); return RET_ERROR; } @@ -291,6 +297,10 @@ int OpenCLRuntime::Init() { delete context_; delete default_command_queue_; delete profiling_command_queue_; + device_ = nullptr; + context_ = nullptr; + default_command_queue_ = nullptr; + profiling_command_queue_ = nullptr; MS_LOG(ERROR) << "Command OpenCL allocator failed!"; return RET_ERROR; } @@ -305,7 +315,9 @@ int OpenCLRuntime::Uninit() { if (init_state_ != InitSuccess) { return RET_OK; } - StoreCache(); + if (StoreCache() != RET_OK) { + MS_LOG(ERROR) << "StoreCache failed!"; + } program_map_.clear(); delete default_command_queue_; delete profiling_command_queue_; @@ -574,12 +586,15 @@ void *OpenCLRuntime::MapBuffer(const cl::Buffer &buffer, int flags, size_t size, int OpenCLRuntime::MapBuffer(void *host_ptr, int flags, size_t size, cl::CommandQueue *command_queue, bool sync) const { if (GetSVMCapabilities() & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) { - return RET_OK; + return RET_ERROR; } if (command_queue == nullptr) { command_queue = default_command_queue_; } - return clEnqueueSVMMap(command_queue->get(), sync, flags, host_ptr, size, 0, nullptr, nullptr); + if (clEnqueueSVMMap(command_queue->get(), sync, flags, host_ptr, size, 0, nullptr, nullptr) != CL_SUCCESS) { + return RET_ERROR; + } + return RET_OK; } void *OpenCLRuntime::MapBuffer(const cl::Image2D &buffer, bool sync, int flags, const std::vector ®ion, @@ -720,17 +735,17 @@ void OpenCLRuntime::LoadCache() { MS_LOG(INFO) << "Init opencl cache success"; } -void OpenCLRuntime::StoreCache() { +int OpenCLRuntime::StoreCache() { if (!enable_cache_) { - return; + return RET_OK; } if (!flush_cache_) { - return; + return RET_OK; } auto fbb = std::make_unique(); if (fbb == nullptr) { MS_LOG(ERROR) << "new opencl FlatBufferBuilder fail"; - return; + return RET_ERROR; } std::vector> program_binarys; for (const auto &kv : program_map_) { @@ -753,8 +768,12 @@ void OpenCLRuntime::StoreCache() { auto gpu_cache = schema::CreateGpuCache(*fbb, name, version, data); fbb->Finish(gpu_cache); uint8_t *buf = fbb->GetBufferPointer(); - WriteToBin(cache_path_, reinterpret_cast(buf), fbb->GetSize()); + if (WriteToBin(cache_path_, reinterpret_cast(buf), fbb->GetSize()) != RET_OK) { + MS_LOG(ERROR) << "WriteToBin failed."; + return RET_ERROR; + } MS_LOG(INFO) << "store opencl cache ok, size=" << fbb->GetSize(); + return RET_OK; } cl::Buffer *OpenCLRuntime::CreateSharedMemoryBuffer(size_t size, void *host_ptr) { diff --git a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h index 788be5ea97b..024b7b70456 100644 --- a/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h +++ b/mindspore/lite/src/runtime/gpu/opencl/opencl_runtime.h @@ -203,7 +203,7 @@ class OpenCLRuntime { // for cache private: void LoadCache(); - void StoreCache(); + int StoreCache(); #ifdef MS_OPENCL_BINARY_CACHE bool enable_cache_{true}; #else diff --git a/mindspore/lite/src/runtime/infer_manager.cc b/mindspore/lite/src/runtime/infer_manager.cc index 3b10f0b0e8b..bb2720ee651 100644 --- a/mindspore/lite/src/runtime/infer_manager.cc +++ b/mindspore/lite/src/runtime/infer_manager.cc @@ -71,6 +71,12 @@ int KernelInferShape(const std::vector &inputs, const std::vecto MS_LOG(ERROR) << "No input!"; return RET_ERROR; } +#ifndef ENABLE_CONTROL_TENSORLIST + if (parameter->type_ == schema::PrimitiveType_Switch) { + MS_LOG(ERROR) << unsupport_control_tensorlist_log; + return RET_ERROR; + } +#endif std::vector in_tensors; std::vector out_tensors; if (parameter->type_ == schema::PrimitiveType_PartialFusion || parameter->type_ == schema::PrimitiveType_Switch || @@ -101,6 +107,7 @@ int KernelInferShape(const std::vector &inputs, const std::vecto if (out_tensors.at(i) == nullptr) { continue; } +#ifdef ENABLE_CONTROL_TENSORLIST if (reinterpret_cast(out_tensors.at(i))->data_type_ == TypeIdC::kObjectTypeTensorType) { auto *tensor_list_c = reinterpret_cast(out_tensors.at(i)); auto *tensor_list = reinterpret_cast(outputs.at(i)); @@ -112,8 +119,11 @@ int KernelInferShape(const std::vector &inputs, const std::vecto tensor_list->MallocTensorListData(static_cast(tensor_list_c->data_type_), tensor_shape); TensorListC2TensorList(tensor_list_c, tensor_list); } else { +#endif TensorC2Tensor(out_tensors.at(i), outputs.at(i)); +#ifdef ENABLE_CONTROL_TENSORLIST } +#endif if (ret == NNACL_INFER_INVALID) { outputs.at(i)->set_shape({-1}); } diff --git a/mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt b/mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt index 1d74594c9df..be4c29cf375 100644 --- a/mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt +++ b/mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt @@ -4,8 +4,25 @@ file(GLOB KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/base/*.cc ${CMAKE_CURRENT_SOURCE_DIR}/fp32/*.cc ${CMAKE_CURRENT_SOURCE_DIR}/int8/*.cc - ${CMAKE_CURRENT_SOURCE_DIR}/string/*.cc ) +if(MSLITE_STRING_KERNEL) + file(GLOB KERNEL_STRING_SRC + ${CMAKE_CURRENT_SOURCE_DIR}/string/*.cc + ) + set(KERNEL_SRC + ${KERNEL_SRC} + ${KERNEL_STRING_SRC} + ) +endif() +if(MSLITE_CONTROL_TENSORLIST) + file(GLOB KERNEL_CONTROL_TENSORLIST + ${CMAKE_CURRENT_SOURCE_DIR}/control/*.cc + ) + set(KERNEL_SRC + ${KERNEL_SRC} + ${KERNEL_CONTROL_TENSORLIST} + ) +endif() list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc) if(SUPPORT_TRAIN) diff --git a/mindspore/lite/src/runtime/kernel/arm/base/carry_data.cc b/mindspore/lite/src/runtime/kernel/arm/base/carry_data.cc index 93d4fa2b4dc..fef89f2c486 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/carry_data.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/carry_data.cc @@ -19,6 +19,7 @@ #include "src/tensorlist.h" using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_NOT_SUPPORT; using mindspore::lite::RET_OK; namespace mindspore::kernel { @@ -44,9 +45,14 @@ int CarryDataKernel::MoveData(const std::vector::iterator &dst_b MS_LOG(ERROR) << "Carry const data and graph inputs."; } else { if (src_tensor->data_type() == kObjectTypeTensorType && dst_tensor->data_type() == kObjectTypeTensorType) { +#ifdef ENABLE_CONTROL_TENSORLIST MS_LOG(ERROR) << "Carry MoveTensorListData"; ret = MoveTensorListData(reinterpret_cast(dst_tensor), reinterpret_cast(src_tensor)); +#else + MS_LOG(ERROR) << unsupport_control_tensorlist_log; + return RET_NOT_SUPPORT; +#endif } else { MS_LOG(ERROR) << "Carry MoveTensorData"; ret = MoveTensorData(dst_tensor, src_tensor); @@ -81,7 +87,7 @@ int CarryDataKernel::MoveTensorData(lite::Tensor *dst_tensor, lite::Tensor *src_ memcpy(dst_tensor->data(), src_tensor->data(), src_tensor->Size()); return RET_OK; } - +#ifdef ENABLE_CONTROL_TENSORLIST int CarryDataKernel::MoveTensorListData(lite::TensorList *dst_tensorlist, lite::TensorList *src_tensorlist) { // shape may change, because tensors.size() can be change in RunGraph if (dst_tensorlist->data_type() != src_tensorlist->data_type() || @@ -126,4 +132,5 @@ int CarryDataKernel::MoveTensorListData(lite::TensorList *dst_tensorlist, lite:: } return RET_OK; } +#endif } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/base/carry_data.h b/mindspore/lite/src/runtime/kernel/arm/base/carry_data.h index 51462939b35..1a5f47fa30e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/carry_data.h +++ b/mindspore/lite/src/runtime/kernel/arm/base/carry_data.h @@ -35,7 +35,9 @@ class CarryDataKernel : public InnerKernel { const std::vector::iterator &src_begin, const std::vector::iterator &src_limit); int MoveTensorData(lite::Tensor *dst_tensor, lite::Tensor *src_tensor); +#ifdef ENABLE_CONTROL_TENSORLIST int MoveTensorListData(lite::TensorList *dst_tensorlist, lite::TensorList *src_tensorlist); +#endif }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc index ac5c247c713..3448f500547 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.cc @@ -18,7 +18,6 @@ #include #include "schema/model_generated.h" #include "src/kernel_registry.h" -#include "include/errorcode.h" using mindspore::lite::KernelRegistrar; using mindspore::lite::RET_ERROR; @@ -47,7 +46,15 @@ void ConvolutionBaseCPUKernel::FreeAlignedData(void **ptr) { } ConvolutionBaseCPUKernel::~ConvolutionBaseCPUKernel() { - if (bias_data_ != nullptr) { + if (addr_map.find(reinterpret_cast(packed_weight_)) != addr_map.end()) { + FreeAlignedData(reinterpret_cast(&packed_weight_)); + } else if (packed_weight_ != nullptr) { + free(packed_weight_); + packed_weight_ = nullptr; + } + if (addr_map.find(reinterpret_cast(bias_data_)) != addr_map.end()) { + FreeAlignedData(reinterpret_cast(&bias_data_)); + } else if (bias_data_ != nullptr) { free(bias_data_); bias_data_ = nullptr; } @@ -110,6 +117,45 @@ int ConvolutionBaseCPUKernel::Init() { return RET_OK; } +int ConvolutionBaseCPUKernel::InitConvWeightBias() { + auto weight_tensor = in_tensors_.at(kWeightIndex); + auto shape = weight_tensor->shape(); + if (std::find(shape.begin(), shape.end(), -1) != shape.end()) { + MS_LOG(WARNING) << "The shape of weight tensor is not ready, the weight and bias would be inited in runtime."; + return lite::RET_OK; + } + if (MallocWeightBiasData() != RET_OK) { + MS_LOG(ERROR) << "Malloc data for bias and weight failed."; + return lite::RET_ERROR; + } + + if (in_tensors_.size() == kInputSize2) { + memcpy(bias_data_, origin_bias_, in_tensors_.at(kBiasIndex)->Size()); + } else { + MS_ASSERT(in_tensors_.size() == kInputSize1); + } + if (origin_weight_ != nullptr) { + PackWeight(); + } else { + is_repack_ = true; + MS_LOG(WARNING) << "The weight is nullptr, will pack in runtime."; + } + return lite::RET_OK; +} + +int ConvolutionBaseCPUKernel::RepackWeight() { + origin_weight_ = origin_weight_ != nullptr ? origin_weight_ : in_tensors_.at(kWeightIndex)->data_c(); + if (packed_weight_ == nullptr && InitConvWeightBias() != RET_OK) { + MS_LOG(ERROR) << "Malloc data for bias and weight failed."; + return lite::RET_ERROR; + } + if (IsRepack() || (IsTrain() && IsTrainable())) { + is_repack_ = (IsTrain() && IsTrainable()) ? IsRepack() : false; + PackWeight(); + } + return RET_OK; +} + int ConvolutionBaseCPUKernel::CheckResizeValid() { // ===============check in channel================= // auto filter_tensor = in_tensors_.at(kWeightIndex); diff --git a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h index c1908f1d39b..2af15f14667 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h +++ b/mindspore/lite/src/runtime/kernel/arm/base/convolution_base.h @@ -31,6 +31,7 @@ #include "include/context.h" #include "src/runtime/kernel/arm/base/layout_transform.h" #include "src/weight_decoder.h" +#include "include/errorcode.h" using mindspore::lite::InnerContext; @@ -38,8 +39,13 @@ namespace mindspore::kernel { class ConvolutionBaseCPUKernel : public InnerKernel { public: ConvolutionBaseCPUKernel(OpParameter *parameter, const std::vector &inputs, - const std::vector &outputs, const InnerContext *ctx) - : InnerKernel(parameter, inputs, outputs, ctx), ctx_(ctx), thread_count_(op_parameter_->thread_num_) { + const std::vector &outputs, const InnerContext *ctx, void *origin_weight, + void *origin_bias) + : InnerKernel(parameter, inputs, outputs, ctx), + ctx_(ctx), + thread_count_(op_parameter_->thread_num_), + origin_weight_(origin_weight), + origin_bias_(origin_bias) { conv_param_ = reinterpret_cast(op_parameter_); } ~ConvolutionBaseCPUKernel() override; @@ -61,8 +67,14 @@ class ConvolutionBaseCPUKernel : public InnerKernel { void FreeAlignedData(void **ptr); protected: + int InitConvWeightBias(); + int RepackWeight(); + + virtual int MallocWeightBiasData() { return RET_OK; } + virtual void PackWeight() {} bool IsRepack() { return is_repack_; } std::unordered_map addr_map; + void *packed_weight_ = nullptr; void *bias_data_ = nullptr; const InnerContext *ctx_ = nullptr; ConvParameter *conv_param_ = nullptr; @@ -70,6 +82,8 @@ class ConvolutionBaseCPUKernel : public InnerKernel { int tile_num_ = 0; int thread_count_ = 1; bool is_repack_ = false; + void *origin_weight_; // do not free + void *origin_bias_; // do not free }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.cc index 153f50e5ab9..35b1f97596d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.cc @@ -130,6 +130,7 @@ int GroupConvolutionBaseCPUKernel::PreProcess() { MS_LOG(ERROR) << "group conv out tensor malloc data failed."; return ret; } + output->ResetRefCount(); } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.h b/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.h index 3d0e065333b..3dc41306d13 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.h +++ b/mindspore/lite/src/runtime/kernel/arm/base/group_convolution_base.h @@ -31,7 +31,7 @@ class GroupConvolutionBaseCPUKernel : public ConvolutionBaseCPUKernel { GroupConvolutionBaseCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx, GroupConvCreator *group_conv_creator, const int group_num) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx), + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr), group_conv_creator_(group_conv_creator), group_num_(group_num) {} // opParameter(in channel, out channel) in this kernel has been split to groups, if // you want to get real params, multiply in channel / out channel with group num diff --git a/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc b/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc index cb8cfdb648a..29c0f1066f3 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/quant_dtype_cast.cc @@ -83,7 +83,7 @@ int QuantDTypeCastCPUKernel::QuantDTypeCast(int task_id) { (!out_tensors_.front()->quant_params().empty() && out_tensors_.front()->quant_params().front().inited) ? out_tensors_.front()->quant_params().front() : in_tensors_.front()->quant_params().front(); - int ret = RET_OK; + int ret = RET_ERROR; if (src_dtype == TypeId::kNumberTypeInt8 && dst_dtype == TypeId::kNumberTypeFloat32) { ret = DoDequantizeInt8ToFp32(int8_ptr_ + thread_offset, float32_ptr_ + thread_offset, quant_arg.scale, quant_arg.zeroPoint, num_unit_thread); @@ -195,6 +195,9 @@ int QuantDTypeCastCPUKernel::Run() { if (float32_ptr_ == nullptr || uint8_ptr_ == nullptr) { return RET_NULL_PTR; } + } else { + MS_LOG(ERROR) << "Not support"; + return RET_ERROR; } auto ret = ParallelLaunch(this->ms_context_, QuantDTypeCastRun, this, thread_n_num_); diff --git a/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc index ec903b96355..5cfa2f1eccd 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.cc @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "src/runtime/kernel/arm/base/reshape_base.h" #include "schema/model_generated.h" #include "src/kernel_registry.h" @@ -31,54 +32,27 @@ using mindspore::schema::PrimitiveType_Squeeze; using mindspore::schema::PrimitiveType_Unsqueeze; namespace mindspore::kernel { -int ReshapeBaseCPUKernel::Init() { return ReSize(); } +int ReshapeBaseCPUKernel::Run() { + auto in_tensor = in_tensors().front(); + auto out_tensor = out_tensors().front(); -int ReshapeBaseCPUKernel::ReSize() { - int in_data_size = in_tensors_.front()->Size(); - int thread_num = op_parameter_->thread_num_; - if (thread_num == 0) { - MS_LOG(ERROR) << "div zero"; - return RET_ERROR; - } - cal_max_num_per_thread_ = UP_DIV(in_data_size, thread_num); - return RET_OK; -} - -int ReshapeBaseCPUKernel::RunImpl(int task_id) { - size_t start_index = task_id * cal_max_num_per_thread_; - if (start_index >= in_tensors_.front()->Size()) { + /* + * in_tensor : CPU-allocator ; out_tensor : GPU-allocator + * out_tensor data_c can not change + * */ + if (in_tensor->allocator() == nullptr || in_tensor->allocator() != out_tensor->allocator() || + op_parameter_->is_train_session_) { + memcpy(out_tensor->data_c(), in_tensor->data_c(), in_tensor->Size()); return RET_OK; } - auto cur_in_ptr = input_ptr_ + start_index; - auto cur_out_ptr = output_ptr_ + start_index; - size_t data_size = in_tensors_.front()->Size() - start_index; - data_size = data_size > cal_max_num_per_thread_ ? cal_max_num_per_thread_ : data_size; - memcpy(cur_out_ptr, cur_in_ptr, data_size); - return RET_OK; -} + out_tensor->FreeData(); + out_tensor->ResetRefCount(); -int ReshapeRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) { - auto reshape = reinterpret_cast(cdata); - auto ret = reshape->RunImpl(task_id); - if (ret != RET_OK) { - MS_LOG(ERROR) << "ReshapeRun error task_id[" << task_id << "] error_code[" << ret << "]"; - return ret; - } - return RET_OK; -} + in_tensor->allocator()->IncRefCount(in_tensor->data(), out_tensor->ref_count()); -int ReshapeBaseCPUKernel::Run() { - input_ptr_ = reinterpret_cast(in_tensors_.at(kInputIndex)->data_c()); - output_ptr_ = reinterpret_cast(out_tensors_.at(kOutputIndex)->data_c()); - if (input_ptr_ == nullptr || output_ptr_ == nullptr) { - return RET_NULL_PTR; - } - auto ret = ParallelLaunch(this->ms_context_, ReshapeRun, this, op_parameter_->thread_num_); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Reshape run error error_code[" << ret << "]"; - return ret; - } + out_tensor->set_data(in_tensor->data_c()); + out_tensor->set_own_data(in_tensor->own_data()); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h index 774c8652493..4eb846501f0 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h +++ b/mindspore/lite/src/runtime/kernel/arm/base/reshape_base.h @@ -19,6 +19,8 @@ #include #include "src/inner_kernel.h" #include "include/context.h" +#include "include/errorcode.h" +#include "src/runtime/kernel/arm/base/carry_data.h" using mindspore::lite::InnerContext; namespace mindspore::kernel { @@ -28,16 +30,9 @@ class ReshapeBaseCPUKernel : public InnerKernel { const std::vector &outputs, const InnerContext *ctx) : InnerKernel(parameter, inputs, outputs, ctx) {} ~ReshapeBaseCPUKernel() override = default; - - int Init() override; - int ReSize() override; + int Init() override { return lite::RET_OK; }; + int ReSize() override { return lite::RET_OK; }; int Run() override; - int RunImpl(int task_id); - - private: - size_t cal_max_num_per_thread_ = 0; - uint8_t *input_ptr_ = nullptr; - uint8_t *output_ptr_ = nullptr; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/base/slice_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/slice_base.cc index 2b483d03ebe..c2772782962 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/slice_base.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/slice_base.cc @@ -39,8 +39,8 @@ int SliceCPUKernel::ReSize() { auto begin_tensor = in_tensors_[1]; auto size_tensor = in_tensors_[2]; - MS_ASSERT(in_tensor->shape().size() == begin_tensor->ElementsNum()); - MS_ASSERT(in_tensor->shape().size() == size_tensor->ElementsNum()); + MS_ASSERT(in_tensor->shape().size() == static_cast(begin_tensor->ElementsNum())); + MS_ASSERT(in_tensor->shape().size() == static_cast(size_tensor->ElementsNum())); MS_ASSERT(in_tensor->shape().size() <= DIMENSION_8D); auto begin = reinterpret_cast(begin_tensor->data_c()); diff --git a/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.cc index a24dbf76dcf..49e9e9e4d52 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/softmax_base.cc @@ -29,6 +29,8 @@ using mindspore::lite::RET_OK; namespace mindspore::kernel { int SoftmaxBaseCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (softmax_param_ == nullptr) { MS_LOG(ERROR) << "SoftmaxParameter nullptr"; return RET_NULL_PTR; diff --git a/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc b/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc index 91aa761aab1..c9d6c6ae48c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc +++ b/mindspore/lite/src/runtime/kernel/arm/base/stack_base.cc @@ -78,7 +78,7 @@ int StackBaseCPUKernel::Init() { } int StackBaseCPUKernel::Execute(int task_id) { - auto output_data = reinterpret_cast(out_tensors_.at(0)->data_c()); + auto output_data = reinterpret_cast(out_tensors_.at(0)->data_c()); if (output_data == nullptr) { return RET_NULL_PTR; } @@ -86,7 +86,7 @@ int StackBaseCPUKernel::Execute(int task_id) { auto start = task_id * step; auto end = MSMIN(start + step, outer_size_); auto input_num = in_tensors_.size(); - auto output = output_data + input_num * start * copy_size_; + auto output = reinterpret_cast(output_data) + input_num * start * copy_size_; Stack(all_inputs_, reinterpret_cast(output), input_num, copy_size_, start, end); return RET_OK; } @@ -106,7 +106,7 @@ int StackBaseCPUKernel::Run() { return RET_ERROR; } for (size_t j = 0; j < inputs_num; ++j) { - auto input_data = reinterpret_cast(in_tensors_.at(j)->data_c()); + auto input_data = reinterpret_cast(in_tensors_.at(j)->data_c()); if (input_data == nullptr) { return RET_NULL_PTR; } diff --git a/mindspore/lite/src/runtime/kernel/arm/control/tensorlist_reserve.cc b/mindspore/lite/src/runtime/kernel/arm/control/tensorlist_reserve.cc index b7a633b45bd..aba1516c09d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/control/tensorlist_reserve.cc +++ b/mindspore/lite/src/runtime/kernel/arm/control/tensorlist_reserve.cc @@ -42,11 +42,7 @@ int TensorListReserveCPUKernel::Run() { std::vector > tmp_shape(num_elements, std::vector()); output->set_element_shape(std::vector(ele_shape_ptr, ele_shape_ptr + input0->ElementsNum())); output->set_shape(std::vector(1, num_elements)); - auto ret = output->MallocTensorListData(kTypeUnknown, tmp_shape); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Failed to MallocTensorListData"; - return ret; - } + output->MallocTensorListData(kTypeUnknown, tmp_shape); } output->set_tensors_data_type(element_dtype_); return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc index 712f936fd4b..9460cd26043 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/activation_fp16.cc @@ -35,6 +35,8 @@ using mindspore::schema::PrimitiveType_Activation; namespace mindspore::kernel { int ActivationFp16CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (type_ != schema::ActivationType_RELU && type_ != schema::ActivationType_RELU6 && type_ != schema::ActivationType_LEAKY_RELU && type_ != schema::ActivationType_SIGMOID && type_ != schema::ActivationType_TANH && type_ != schema::ActivationType_HSWISH && diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.cc index 72a4f7fa082..d75177920e3 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_compare_fp16.cc @@ -66,6 +66,8 @@ ArithmeticCompareOptFuncFp16 GetOptimizedArithmeticCompareFun(int primitive_type } int ArithmeticCompareFP16CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 2); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } @@ -162,7 +164,7 @@ int ArithmeticCompareFP16CPUKernel::Run() { input0_fp16_ = ConvertInputFp32toFp16(in_tensors_.at(0), static_cast(this->ms_context_)); input1_fp16_ = ConvertInputFp32toFp16(in_tensors_.at(1), static_cast(this->ms_context_)); - output_fp16_ = reinterpret_cast(output_tensor->MutableData()); + output_fp16_ = reinterpret_cast(output_tensor->data_c()); if (input0_fp16_ == nullptr || input1_fp16_ == nullptr || output_fp16_ == nullptr) { MS_LOG(ERROR) << "Memory allocation failed"; FreeTmpBuffer(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc index a81bbff7638..7b417ff90a0 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_fp16.cc @@ -21,6 +21,7 @@ using mindspore::kernel::KERNEL_ARCH; using mindspore::lite::KernelRegistrar; using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_NULL_PTR; using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_AddFusion; @@ -183,8 +184,11 @@ int ArithmeticFP16CPUKernel::Run() { return RET_ERROR; } auto ret = ParallelLaunch(this->ms_context_, ArithmeticsRun, this, op_parameter_->thread_num_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "ArithmeticsRun failed, ret : " << ret; + } if (out_tensors_.at(0)->data_type() == kNumberTypeFloat32) { - Float16ToFloat32(static_cast(output_ptr_), reinterpret_cast(output_tensor->MutableData()), + Float16ToFloat32(static_cast(output_ptr_), reinterpret_cast(output_tensor->data_c()), output_tensor->ElementsNum()); } FreeFp16Buffer(); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_self_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_self_fp16.cc index bcba2c95056..1f75a664e0c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_self_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/arithmetic_self_fp16.cc @@ -76,18 +76,28 @@ int ArithmeticSelfFp16CPUKernel::DoExecute(int task_id) { int ArithmeticSelfFp16CPUKernel::Run() { auto input_tensor = in_tensors_.at(0); auto output_tensor = out_tensors_.at(0); - + MS_ASSERT(input_tensor != nullptr); + MS_ASSERT(output_tensor != nullptr); if (input_tensor->data_type() == kNumberTypeFloat32) { - input_fp16_ptr_ = ConvertInputFp32toFp16(input_tensor, static_cast(this->ms_context_)); + input_fp16_ptr_ = ConvertInputFp32toFp16(input_tensor, static_cast(ms_context_)); + if (input_fp16_ptr_ == nullptr) { + return RET_ERROR; + } } else { input_fp16_ptr_ = reinterpret_cast(input_tensor->data_c()); + MS_ASSERT(input_fp16_ptr_ != nullptr); } output_fp16_ptr_ = reinterpret_cast(output_tensor->data_c()); + MS_ASSERT(output_fp16_ptr_ != nullptr); - auto ret = ParallelLaunch(this->ms_context_, ArithmeticSelfRun, this, op_parameter_->thread_num_); + auto ret = ParallelLaunch(ms_context_, ArithmeticSelfRun, this, op_parameter_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "ArithmeticSelfRun error error_code[" << ret << "]"; } + if (input_tensor->data_type() == kNumberTypeFloat32) { + ms_context_->allocator->Free(input_fp16_ptr_); + input_fp16_ptr_ = nullptr; + } return ret; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc index 35f526afe38..98d6fd5312c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/batchnorm_fp16.cc @@ -38,9 +38,9 @@ int BatchnormFp16CPUKernel::InitConstTensor() { FreeMeanAndVariance(); return RET_ERROR; } - Float32ToFloat16(reinterpret_cast(mean_fp32->MutableData()), reinterpret_cast(mean_), + Float32ToFloat16(reinterpret_cast(mean_fp32->data_c()), reinterpret_cast(mean_), mean_fp32->ElementsNum()); - Float32ToFloat16(reinterpret_cast(variance_fp32->MutableData()), reinterpret_cast(variance_), + Float32ToFloat16(reinterpret_cast(variance_fp32->data_c()), reinterpret_cast(variance_), variance_fp32->ElementsNum()); } else { auto ret = BatchnormCPUKernel::InitConstTensor(); @@ -68,7 +68,7 @@ int BatchnormFp16CPUKernel::Run() { MS_LOG(ERROR) << "BatchnormRun error error_code[" << ret << "]"; } if (is_output_fp32_) { - Float16ToFloat32(output_, reinterpret_cast(output_tensor->MutableData()), output_tensor->ElementsNum()); + Float16ToFloat32(output_, reinterpret_cast(output_tensor->data_c()), output_tensor->ElementsNum()); } FreeInputAndOutput(); return ret; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/biasadd_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/biasadd_fp16.cc index a8da79ef223..58cb9aaa3f2 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/biasadd_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/biasadd_fp16.cc @@ -58,8 +58,10 @@ int BiasAddCPUFp16Kernel::Run() { is_repack_ = false; } } - auto in = reinterpret_cast(in_tensors_.at(0)->MutableData()); - auto out = reinterpret_cast(out_tensors_.at(0)->MutableData()); + auto in = reinterpret_cast(in_tensors_.at(0)->data_c()); + auto out = reinterpret_cast(out_tensors_.at(0)->data_c()); + MS_ASSERT(in != nullptr); + MS_ASSERT(out != nullptr); size_t data_size = in_tensors_.at(0)->ElementsNum(); MS_ASSERT(ms_context_->allocator != nullptr); auto tile_in = reinterpret_cast(ms_context_->allocator->Malloc(data_size * sizeof(float16_t))); @@ -93,7 +95,7 @@ int BiasAddCPUFp16Kernel::GetBiasData() { return RET_NULL_PTR; } } - auto bias = reinterpret_cast(bias_tensor_->MutableData()); + auto bias = reinterpret_cast(bias_tensor_->data_c()); if (bias == nullptr) { MS_LOG(ERROR) << "bias is nullptr!"; return RET_NULL_PTR; @@ -102,7 +104,7 @@ int BiasAddCPUFp16Kernel::GetBiasData() { bias_data_[i] = static_cast(bias[i]); } } else { - bias_data_ = reinterpret_cast(bias_tensor_->MutableData()); + bias_data_ = reinterpret_cast(bias_tensor_->data_c()); if (bias_data_ == nullptr) { MS_LOG(ERROR) << "bias_data_ is nullptr"; return RET_NULL_PTR; @@ -112,6 +114,8 @@ int BiasAddCPUFp16Kernel::GetBiasData() { } int BiasAddCPUFp16Kernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 2); + CHECK_LESS_RETURN(out_tensors_.size(), 1); bias_tensor_ = in_tensors_.at(1); MS_ASSERT(bias_tensor_ != nullptr); if (!InferShapeDone()) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc index 0dc3170de08..a17f381f40e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/cast_fp16.cc @@ -37,6 +37,8 @@ int CastFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) { } // namespace int CastFp16CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } @@ -55,6 +57,9 @@ int CastFp16CPUKernel::ReSize() { int CastFp16CPUKernel::DoCast(int thread_id) { auto input = in_tensors_.at(0); + MS_ASSERT(input != nullptr); + auto input_data = input->data_c(); + MS_ASSERT(input_data != nullptr); int data_num = MSMIN(stride_, data_num_ - thread_id * stride_); if (data_num <= 0) { return RET_OK; @@ -63,26 +68,27 @@ int CastFp16CPUKernel::DoCast(int thread_id) { auto offset = thread_id * stride_; auto output = out_tensors_.at(0); auto output_data = output->data_c(); + MS_ASSERT(output_data != nullptr); auto input_data_type = input->data_type(); auto output_data_type = output->data_type(); if (input_data_type == kNumberTypeFloat16) { switch (output_data_type) { case kNumberTypeInt64: - Float16ToInt64(reinterpret_cast(input->data_c()) + offset, + Float16ToInt64(reinterpret_cast(input_data) + offset, reinterpret_cast(output_data) + offset, data_num); break; case kNumberTypeInt32: - Float16ToInt32(reinterpret_cast(input->data_c()) + offset, + Float16ToInt32(reinterpret_cast(input_data) + offset, reinterpret_cast(output_data) + offset, data_num); break; case kNumberTypeFloat32: - Float16ToFloat32(reinterpret_cast(input->MutableData()) + offset, + Float16ToFloat32(reinterpret_cast(input_data) + offset, reinterpret_cast(output_data) + offset, data_num); break; case kNumberTypeFloat16: - memcpy(reinterpret_cast(output_data) + offset, - reinterpret_cast(input->data_c()) + offset, data_num * sizeof(float16_t)); + memcpy(reinterpret_cast(output_data) + offset, reinterpret_cast(input_data) + offset, + data_num * sizeof(float16_t)); break; default: MS_LOG(ERROR) << "Unsupported output data type " << output_data_type; @@ -91,19 +97,19 @@ int CastFp16CPUKernel::DoCast(int thread_id) { } else if (input_data_type == kNumberTypeFloat32) { switch (output_data_type) { case kNumberTypeInt64: - Float32ToInt64(reinterpret_cast(input->data_c()) + offset, + Float32ToInt64(reinterpret_cast(input_data) + offset, reinterpret_cast(output_data) + offset, data_num); break; case kNumberTypeInt32: - Float32ToInt32(reinterpret_cast(input->data_c()) + offset, + Float32ToInt32(reinterpret_cast(input_data) + offset, reinterpret_cast(output_data) + offset, data_num); break; case kNumberTypeFloat32: - memcpy(reinterpret_cast(output_data) + offset, reinterpret_cast(input->data_c()) + offset, + memcpy(reinterpret_cast(output_data) + offset, reinterpret_cast(input_data) + offset, data_num * sizeof(float)); break; case kNumberTypeFloat16: - Float32ToFloat16(reinterpret_cast(input->MutableData()) + offset, + Float32ToFloat16(reinterpret_cast(input_data) + offset, reinterpret_cast(output_data) + offset, data_num); break; default: @@ -113,7 +119,7 @@ int CastFp16CPUKernel::DoCast(int thread_id) { } else if (input_data_type == kNumberTypeInt32) { switch (output_data_type) { case kNumberTypeFloat32: - Int32ToFloat32(static_cast(input->data_c()) + offset, static_cast(output_data) + offset, + Int32ToFloat32(static_cast(input_data) + offset, static_cast(output_data) + offset, data_num); break; default: diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc index 00d9bb92c7d..355ad85f5d6 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/concat_fp16.cc @@ -24,6 +24,8 @@ using mindspore::schema::PrimitiveType_Concat; namespace mindspore::kernel { int ConcatFp16CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } @@ -98,9 +100,11 @@ int ConcatFp16CPUKernel::Run() { const auto in_tensor = in_tensors_.at(i); if (in_tensor->data_type() == kNumberTypeFloat || in_tensor->data_type() == kNumberTypeFloat32) { auto in_tensor_data = reinterpret_cast(in_tensor->data_c()); + MS_ASSERT(in_tensor_data != nullptr); Float32ToFloat16(in_tensor_data, fp16_inputs_[i], in_tensor->ElementsNum()); } else { fp16_inputs_[i] = reinterpret_cast(in_tensor->data_c()); + MS_ASSERT(fp16_inputs_[i] != nullptr); } shapes.push_back(in_tensors_[i]->shape()); @@ -111,6 +115,7 @@ int ConcatFp16CPUKernel::Run() { auto output_addr = out_tensors_.at(0)->MutableData(); if (out_tensors_.at(0)->data_type() == kNumberTypeFloat16) { fp16_output_ = reinterpret_cast(out_tensors_.at(0)->data_c()); + MS_ASSERT(fp16_output_ != nullptr); } int dtype_len = in_tensors_.at(0)->data_type() == kNumberTypeInt32 ? sizeof(int32_t) : sizeof(float16_t); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc index f3257b424a7..84d1018efa3 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.cc @@ -38,10 +38,6 @@ int Convolution1x1FP16CPUKernel::InitMatmulParam() { Convolution1x1FP16CPUKernel::~Convolution1x1FP16CPUKernel() { FreeTmpBuffer(); - if (weight_ptr_ != nullptr) { - free(weight_ptr_); - weight_ptr_ = nullptr; - } if (matmul_param_ != nullptr) { delete matmul_param_; matmul_param_ = nullptr; @@ -82,14 +78,23 @@ int Convolution1x1FP16CPUKernel::InitConv1x1Param() { return RET_OK; } -int Convolution1x1FP16CPUKernel::InitWeightBias() { +int Convolution1x1FP16CPUKernel::MallocWeightBiasData() { auto weight_tensor = in_tensors_.at(kWeightIndex); auto input_channel = weight_tensor->Channel(); auto output_channel = weight_tensor->Batch(); - if (in_tensors_.size() == 3) { - size_t size = UP_ROUND(output_channel, col_tile_) * sizeof(float16_t); - size_t bias_size = output_channel * sizeof(float16_t); + size_t size = input_channel * UP_ROUND(output_channel, col_tile_) * sizeof(float16_t); + if (packed_weight_ == nullptr) { + packed_weight_ = malloc(size); + if (packed_weight_ == nullptr) { + MS_LOG(ERROR) << "Conv1x1 Malloc packed_weight_ error!"; + return RET_ERROR; + } + } + memset(reinterpret_cast(packed_weight_), 0, size); + + if (in_tensors_.size() == kInputSize2) { + size = UP_ROUND(output_channel, col_tile_) * sizeof(float16_t); if (bias_data_ == nullptr) { bias_data_ = malloc(size); if (bias_data_ == nullptr) { @@ -97,32 +102,29 @@ int Convolution1x1FP16CPUKernel::InitWeightBias() { return RET_ERROR; } } - void *bias_origin_tmp = IsTrainable() ? in_tensors_.at(kBiasIndex)->data_c() : origin_bias_; - memcpy(bias_data_, bias_origin_tmp, output_channel * sizeof(float16_t)); - memset(reinterpret_cast(bias_data_) + bias_size, 0, size - bias_size); + memset(reinterpret_cast(bias_data_), 0, size); } - - size_t size = input_channel * UP_ROUND(output_channel, col_tile_) * sizeof(float16_t); - size_t down_size = input_channel * DOWN_DIV(output_channel, col_tile_) * col_tile_ * sizeof(float16_t); - if (weight_ptr_ == nullptr) { - weight_ptr_ = reinterpret_cast(malloc(size)); - if (weight_ptr_ == nullptr) { - MS_LOG(ERROR) << "Conv1x1 Malloc weight_ptr_ error!"; - return RET_ERROR; - } - } - void *weight_origin_tmp = IsTrainable() ? weight_tensor->data_c() : origin_weight_; - memset(reinterpret_cast(weight_ptr_) + down_size, 0, size - down_size); -#ifdef ENABLE_ARM64 - RowMajor2Col16MajorFp16Opt(static_cast(weight_origin_tmp), weight_ptr_, output_channel, - input_channel); -#else - ColMajor2Row8MajorFp16(weight_origin_tmp, weight_ptr_, input_channel, output_channel, true); -#endif return RET_OK; } +void Convolution1x1FP16CPUKernel::PackWeight() { + auto weight_tensor = in_tensors_.at(kWeightIndex); + auto input_channel = weight_tensor->Channel(); + auto output_channel = weight_tensor->Batch(); + void *weight_origin = IsTrainable() ? weight_tensor->data_c() : origin_weight_; + MS_ASSERT(weight_origin != nullptr); +#ifdef ENABLE_ARM64 + RowMajor2Col16MajorFp16Opt(static_cast(weight_origin), + reinterpret_cast(packed_weight_), output_channel, input_channel); +#else + ColMajor2Row8MajorFp16(weight_origin, reinterpret_cast(packed_weight_), input_channel, output_channel, + true); +#endif +} + int Convolution1x1FP16CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 2); + CHECK_LESS_RETURN(out_tensors_.size(), 1); #ifdef ENABLE_ARM64 row_tile_ = C12NUM; col_tile_ = C16NUM; @@ -135,7 +137,7 @@ int Convolution1x1FP16CPUKernel::Init() { MS_LOG(ERROR) << "Init matmul_param_ failed."; return RET_ERROR; } - int ret = InitWeightBias(); + int ret = InitConvWeightBias(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init weight bias failed."; return ret; @@ -180,11 +182,13 @@ int Convolution1x1FP16CPUKernel::RunOc(int task_id) { auto bias = (bias_data_ == nullptr) ? nullptr : reinterpret_cast(bias_data_) + thread_stride_ * task_id; #ifdef ENABLE_ARM64 - MatMul12x16Fp16Opt(pack_input_, weight_ptr_ + task_id * thread_stride_ * matmul_param_->deep_, + MatMul12x16Fp16Opt(pack_input_, + reinterpret_cast(packed_weight_) + task_id * thread_stride_ * matmul_param_->deep_, output_ptr_ + task_id * thread_stride_, bias, matmul_param_->act_type_, matmul_param_->deep_, matmul_param_->row_, cur_oc, matmul_param_->col_, OutType_Nhwc); #else - MatMul12x8A32Fp16(pack_input_, weight_ptr_ + task_id * thread_stride_ * matmul_param_->deep_, + MatMul12x8A32Fp16(pack_input_, + reinterpret_cast(packed_weight_) + task_id * thread_stride_ * matmul_param_->deep_, output_ptr_ + task_id * thread_stride_, bias, matmul_param_->act_type_, matmul_param_->deep_, matmul_param_->row_, cur_oc, matmul_param_->col_, OutType_Nhwc); #endif @@ -204,13 +208,13 @@ int Convolution1x1FP16CPUKernel::RunHw(int task_id) { float16_t *thread_output_ptr = output_ptr_ + task_id * thread_stride_ * matmul_param_->col_; #ifdef ENABLE_ARM64 - MatMul12x16Fp16Opt(thread_pack_input, weight_ptr_, thread_output_ptr, reinterpret_cast(bias_data_), - matmul_param_->act_type_, matmul_param_->deep_, cur_hw_, matmul_param_->col_, matmul_param_->col_, - OutType_Nhwc); + MatMul12x16Fp16Opt(thread_pack_input, reinterpret_cast(packed_weight_), thread_output_ptr, + reinterpret_cast(bias_data_), matmul_param_->act_type_, matmul_param_->deep_, cur_hw_, + matmul_param_->col_, matmul_param_->col_, OutType_Nhwc); #else - MatMul12x8A32Fp16(thread_pack_input, weight_ptr_, thread_output_ptr, reinterpret_cast(bias_data_), - matmul_param_->act_type_, matmul_param_->deep_, cur_hw_, matmul_param_->col_, matmul_param_->col_, - OutType_Nhwc); + MatMul12x8A32Fp16(thread_pack_input, reinterpret_cast(packed_weight_), thread_output_ptr, + reinterpret_cast(bias_data_), matmul_param_->act_type_, matmul_param_->deep_, cur_hw_, + matmul_param_->col_, matmul_param_->col_, OutType_Nhwc); #endif return RET_OK; } @@ -250,14 +254,9 @@ int Convolution1x1FP16CPUKernel::Run() { MS_LOG(ERROR) << "Conv1x1 Malloc pack_input_ error!"; return RET_MEMORY_FAILED; } - - if (IsTrainable() && (IsTrain() || IsRepack())) { - auto ret = InitWeightBias(); - if (ret != 0) { - MS_LOG(ERROR) << "Convolution 1x1 fp16 repack weight failure"; - return RET_ERROR; - } - is_repack_ = false; + if (RepackWeight() != RET_OK) { + MS_LOG(ERROR) << "Repack weight failed."; + return RET_ERROR; } for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h index 822572aba2a..f2420e2fdfa 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_1x1_fp16.h @@ -31,9 +31,7 @@ class Convolution1x1FP16CPUKernel : public ConvolutionBaseCPUKernel { Convolution1x1FP16CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const InnerContext *ctx, void *origin_weight, void *origin_bias) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx), - origin_weight_(origin_weight), - origin_bias_(origin_bias) {} + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias) {} ~Convolution1x1FP16CPUKernel() override; int Init() override; @@ -49,16 +47,14 @@ class Convolution1x1FP16CPUKernel : public ConvolutionBaseCPUKernel { void FreeTmpBuffer(); int InitConv1x1Param(); int InitMatmulParam(); - int InitWeightBias(); + int MallocWeightBiasData() override; + void PackWeight() override; private: bool pre_trans_input_ = false; bool multi_thread_by_hw_ = false; int thread_count_ = 1; int thread_stride_ = 0; - void *origin_weight_; // do not free - void *origin_bias_; // do not free - float16_t *weight_ptr_ = nullptr; float16_t *input_ptr_ = nullptr; float16_t *pack_input_ = nullptr; float16_t *output_ptr_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.cc index e6e1dfed963..71c79f61139 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.cc @@ -65,8 +65,11 @@ void *ConvolutionDelegateFP16CPUKernel::CopyData(lite::Tensor *tensor) { } int ConvolutionDelegateFP16CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 2); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { - origin_weight_ = CopyData(in_tensors_.at(kWeightIndex)); + auto weight_tensor = in_tensors_.at(kWeightIndex); + origin_weight_ = weight_tensor->data_c() != nullptr ? CopyData(weight_tensor) : nullptr; need_free_ = need_free_ | WEIGHT_NEED_FREE; if (in_tensors_.size() == 3) { origin_bias_ = CopyData(in_tensors_.at(kBiasIndex)); @@ -75,7 +78,6 @@ int ConvolutionDelegateFP16CPUKernel::Init() { return RET_OK; } origin_weight_ = in_tensors_.at(kWeightIndex)->data_c(); - MS_ASSERT(origin_weight_ != nullptr); if (in_tensors_.size() == 3) { origin_bias_ = in_tensors_.at(kBiasIndex)->data_c(); MS_ASSERT(origin_bias_ != nullptr); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.h index 12018df715f..ed20b68a3d2 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_delegate_fp16.h @@ -60,7 +60,7 @@ class ConvolutionDelegateFP16CPUKernel : public InnerKernel { return fp16_conv_kernel_->SetTrainable(trainable); } - void set_in_tensor(lite::Tensor *in_tensor, int index) override { + void set_in_tensor(lite::Tensor *in_tensor, size_t index) override { MS_ASSERT(index < in_tensors_.size()); this->in_tensors_[index] = in_tensor; if (fp16_conv_kernel_ != nullptr) { @@ -68,7 +68,7 @@ class ConvolutionDelegateFP16CPUKernel : public InnerKernel { } } - void set_out_tensor(lite::Tensor *out_tensor, int index) override { + void set_out_tensor(lite::Tensor *out_tensor, size_t index) override { MS_ASSERT(index < out_tensors_.size()); this->out_tensors_[index] = out_tensor; if (fp16_conv_kernel_ != nullptr) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc index 43f86b0f1d6..b5e54dbcb40 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.cc @@ -26,51 +26,42 @@ using mindspore::lite::RET_MEMORY_FAILED; using mindspore::lite::RET_OK; namespace mindspore::kernel { -ConvolutionDepthwise3x3Fp16CPUKernel::~ConvolutionDepthwise3x3Fp16CPUKernel() { - if (packed_weight_ != nullptr) { - free(packed_weight_); - packed_weight_ = nullptr; - } +void ConvolutionDepthwise3x3Fp16CPUKernel::PackWeight() { + auto weight_tensor = in_tensors_.at(kWeightIndex); + int channel = weight_tensor->Batch(); + void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_; + MS_ASSERT(origin_weight != nullptr); + PackWeightConvDw3x3Fp16(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), + channel); } -int ConvolutionDepthwise3x3Fp16CPUKernel::InitWeightBias() { - // init weight: k, h, w, c; k == group == output_channel, c == 1 - auto weight_tensor = in_tensors_[kWeightIndex]; - auto origin_weight = reinterpret_cast(weight_tensor->MutableData()); +int ConvolutionDepthwise3x3Fp16CPUKernel::MallocWeightBiasData() { + auto weight_tensor = in_tensors_.at(kWeightIndex); int channel = weight_tensor->Batch(); int c8 = UP_ROUND(channel, C8NUM); int pack_weight_size = c8 * C12NUM; - if (packed_weight_ == nullptr) { - packed_weight_ = reinterpret_cast(malloc(pack_weight_size * sizeof(float16_t))); + packed_weight_ = malloc(pack_weight_size * sizeof(float16_t)); if (packed_weight_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; } } - PackWeightConvDw3x3Fp16(origin_weight, packed_weight_, channel); - if (bias_data_ == nullptr) { - bias_data_ = reinterpret_cast(malloc(c8 * sizeof(float16_t))); + bias_data_ = malloc(c8 * sizeof(float16_t)); if (bias_data_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; } } memset(bias_data_, 0, c8 * sizeof(float16_t)); - if (in_tensors_.size() == kInputSize2) { - auto bias_tensor = in_tensors_[kBiasIndex]; - auto ori_bias = reinterpret_cast(bias_tensor->MutableData()); - memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(float16_t)); - } - return RET_OK; } int ConvolutionDepthwise3x3Fp16CPUKernel::Init() { - auto ret = InitWeightBias(); + auto ret = InitConvWeightBias(); if (ret != 0) { - MS_LOG(ERROR) << "Convolution depthwise 3x3 fp16 InitWeightBias failed."; + MS_LOG(ERROR) << "Convolution depthwise 3x3 fp16 InitConvWeightBias failed."; return RET_ERROR; } if (!InferShapeDone()) { @@ -92,8 +83,8 @@ int ConvolutionDepthwise3x3Fp16CPUKernel::Execute(int task_id) { int step_oh = UP_DIV(conv_param_->output_h_, conv_param_->thread_num_); int start_oh = step_oh * task_id; int end_oh = MSMIN(start_oh + step_oh, conv_param_->output_h_); - ConvDw3x3Fp16(output_ptr_, buffer, input_ptr_, packed_weight_, reinterpret_cast(bias_data_), conv_param_, - start_oh, end_oh); + ConvDw3x3Fp16(output_ptr_, buffer, input_ptr_, reinterpret_cast(packed_weight_), + reinterpret_cast(bias_data_), conv_param_, start_oh, end_oh); return RET_OK; } @@ -108,14 +99,11 @@ int ConvDw3x3Fp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) } int ConvolutionDepthwise3x3Fp16CPUKernel::Run() { - if (IsTrainable() && (IsTrain() || IsRepack())) { - auto ret = InitWeightBias(); - if (ret != 0) { - MS_LOG(ERROR) << "Convolution depthwise fp16 repack weight failure"; - return RET_ERROR; - } - is_repack_ = false; + if (RepackWeight() != RET_OK) { + MS_LOG(ERROR) << "Repack weight failed."; + return RET_ERROR; } + int units = UP_DIV(conv_param_->output_w_, C2NUM); // F(2, 3) contains 2 conv units int c8 = UP_ROUND(conv_param_->input_channel_, C8NUM); int buffer_size = units * c8 * C12NUM * conv_param_->thread_num_; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.h index c6663837369..cc66bb528d0 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_3x3_fp16.h @@ -28,19 +28,20 @@ class ConvolutionDepthwise3x3Fp16CPUKernel : public ConvolutionBaseCPUKernel { public: ConvolutionDepthwise3x3Fp16CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} - ~ConvolutionDepthwise3x3Fp16CPUKernel() override; + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(), + inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {} + ~ConvolutionDepthwise3x3Fp16CPUKernel() override {} int Init() override; int ReSize() override; int Run() override; - int InitWeightBias(); int Execute(int task_id); int Eval() override; private: - float16_t *packed_weight_ = nullptr; + void PackWeight() override; + int MallocWeightBiasData() override; float16_t *input_ptr_ = nullptr; float16_t *output_ptr_ = nullptr; float16_t *buffer_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc index af240421dee..fae625bc7b9 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.cc @@ -23,50 +23,42 @@ using mindspore::lite::RET_ERROR; using mindspore::lite::RET_OK; namespace mindspore::kernel { -ConvolutionDepthwiseFp16CPUKernel::~ConvolutionDepthwiseFp16CPUKernel() { - if (packed_weight_ != nullptr) { - free(packed_weight_); - packed_weight_ = nullptr; - } +void ConvolutionDepthwiseFp16CPUKernel::PackWeight() { + auto weight_tensor = in_tensors_.at(kWeightIndex); + void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_; + MS_ASSERT(origin_weight != nullptr); + PackNCHWToNHWCFp16(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), 1, + weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch(), 0, 0); } -int ConvolutionDepthwiseFp16CPUKernel::InitWeightBias() { - // init weight: o, h, w, i; o == group, i == 1 +int ConvolutionDepthwiseFp16CPUKernel::MallocWeightBiasData() { auto weight_tensor = in_tensors_.at(kWeightIndex); int channel = weight_tensor->Batch(); int pack_weight_size = channel * weight_tensor->Height() * weight_tensor->Width(); - auto origin_weight = reinterpret_cast(weight_tensor->data_c()); - MS_ASSERT(origin_weight != nullptr); if (packed_weight_ == nullptr) { - packed_weight_ = reinterpret_cast(malloc(pack_weight_size * sizeof(float16_t))); + packed_weight_ = malloc(pack_weight_size * sizeof(float16_t)); if (packed_weight_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; } } - PackNCHWToNHWCFp16(origin_weight, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(), - weight_tensor->Batch(), 0, 0); - if (bias_data_ == nullptr) { - bias_data_ = reinterpret_cast(malloc(channel * sizeof(float16_t))); + bias_data_ = malloc(channel * sizeof(float16_t)); if (bias_data_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; } } memset(bias_data_, 0, channel * sizeof(float16_t)); - if (in_tensors_.size() == kInputSize2) { - auto bias_tensor = in_tensors_.at(kBiasIndex); - auto ori_bias = reinterpret_cast(bias_tensor->data_c()); - memcpy(bias_data_, ori_bias, bias_tensor->Size()); - } return RET_OK; } int ConvolutionDepthwiseFp16CPUKernel::Init() { - auto ret = InitWeightBias(); + CHECK_LESS_RETURN(in_tensors_.size(), 2); + CHECK_LESS_RETURN(out_tensors_.size(), 1); + auto ret = InitConvWeightBias(); if (ret != 0) { - MS_LOG(ERROR) << "Convolution depthwise fp16 InitWeightBias failed."; + MS_LOG(ERROR) << "Convolution depthwise fp16 InitConvWeightBias failed."; return RET_ERROR; } @@ -94,7 +86,8 @@ int ConvolutionDepthwiseFp16CPUKernel::Execute(int task_id) { MS_LOG(ERROR) << "Convolution depthwise Fp16 get null tensor data!"; return RET_ERROR; } - ConvDwFp16(output_ptr, input_ptr, packed_weight_, reinterpret_cast(bias_data_), conv_param_, task_id); + ConvDwFp16(output_ptr, input_ptr, reinterpret_cast(packed_weight_), + reinterpret_cast(bias_data_), conv_param_, task_id); return RET_OK; } @@ -109,13 +102,9 @@ static int ConvDwFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_sc } int ConvolutionDepthwiseFp16CPUKernel::Run() { - if (IsTrainable() && (IsTrain() || IsRepack())) { - auto ret = InitWeightBias(); - if (ret != 0) { - MS_LOG(ERROR) << "Convolution depthwise fp16 repack weight failure"; - return RET_ERROR; - } - is_repack_ = false; + if (RepackWeight() != RET_OK) { + MS_LOG(ERROR) << "Repack weight failed."; + return RET_ERROR; } auto ret = ParallelLaunch(this->ms_context_, ConvDwFp16Run, this, conv_param_->thread_num_); if (ret != RET_OK) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h index 4255ff18094..1b37edc0cd3 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_fp16.h @@ -36,19 +36,20 @@ class ConvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel { public: ConvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const InnerContext *ctx) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} - ~ConvolutionDepthwiseFp16CPUKernel() override; + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(), + inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {} + ~ConvolutionDepthwiseFp16CPUKernel() override {} int Init() override; int ReSize() override; int Run() override; int Eval() override; - int InitWeightBias(); int Execute(int task_id); private: - float16_t *packed_weight_ = nullptr; + void PackWeight() override; + int MallocWeightBiasData() override; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc index dcdcc930b6b..294f8a8a404 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.cc @@ -28,10 +28,6 @@ ConvolutionDepthwiseSWFp16CPUKernel::~ConvolutionDepthwiseSWFp16CPUKernel() { delete sliding_; sliding_ = nullptr; } - if (packed_weight_ != nullptr) { - free(packed_weight_); - packed_weight_ = nullptr; - } } int ConvolutionDepthwiseSWFp16CPUKernel::InitPackedInputOutput() { @@ -51,58 +47,56 @@ int ConvolutionDepthwiseSWFp16CPUKernel::InitPackedInputOutput() { if (packed_output_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; ms_context_->allocator->Free(packed_input_); + packed_input_ = nullptr; return RET_ERROR; } } return RET_OK; } -int ConvolutionDepthwiseSWFp16CPUKernel::InitWeightBias() { - // init weight: o, h, w, i; o == group, i == 1 +void ConvolutionDepthwiseSWFp16CPUKernel::PackWeight() { + auto weight_tensor = in_tensors_.at(kWeightIndex); + void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_; + MS_ASSERT(origin_weight != nullptr); + PackNCHWFp16ToNC8HW8Fp16(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), + 1, weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch()); +} + +int ConvolutionDepthwiseSWFp16CPUKernel::MallocWeightBiasData() { auto weight_tensor = in_tensors_.at(kWeightIndex); int OC8 = UP_DIV(weight_tensor->Batch(), C8NUM); int pack_weight_size = C8NUM * OC8 * weight_tensor->Height() * weight_tensor->Width(); - auto origin_weight = reinterpret_cast(weight_tensor->data_c()); - MS_ASSERT(origin_weight != nullptr); - if (packed_weight_ == nullptr) { - packed_weight_ = reinterpret_cast(malloc(pack_weight_size * sizeof(float16_t))); + packed_weight_ = malloc(pack_weight_size * sizeof(float16_t)); if (packed_weight_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; } } - PackNCHWFp16ToNC8HW8Fp16(origin_weight, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(), - weight_tensor->Batch()); - if (bias_data_ == nullptr) { - bias_data_ = reinterpret_cast(malloc(C8NUM * OC8 * sizeof(float16_t))); + bias_data_ = malloc(C8NUM * OC8 * sizeof(float16_t)); if (bias_data_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; } } memset(bias_data_, 0, C8NUM * OC8 * sizeof(float16_t)); - if (in_tensors_.size() == kInputSize2) { - auto bias_tensor = in_tensors_.at(kBiasIndex); - auto ori_bias = reinterpret_cast(bias_tensor->data_c()); - memcpy(bias_data_, ori_bias, bias_tensor->Size()); - } - conv_param_->thread_num_ = MSMIN(thread_count_, OC8); return RET_OK; -} // namespace mindspore::kernel +} int ConvolutionDepthwiseSWFp16CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 2); + CHECK_LESS_RETURN(out_tensors_.size(), 1); sliding_ = new (std::nothrow) SlidingWindowParam; if (sliding_ == nullptr) { MS_LOG(ERROR) << "new sliding window param failed."; return RET_ERROR; } - auto ret = InitWeightBias(); + auto ret = InitConvWeightBias(); if (ret != 0) { - MS_LOG(ERROR) << "Convolution depthwise fp16 InitWeightBias failed."; + MS_LOG(ERROR) << "Convolution depthwise fp16 InitConvWeightBias failed."; return RET_ERROR; } @@ -122,8 +116,8 @@ int ConvolutionDepthwiseSWFp16CPUKernel::ReSize() { } int ConvolutionDepthwiseSWFp16CPUKernel::Execute(int task_id) { - ConvDwC8Fp16(packed_output_, packed_input_, packed_weight_, reinterpret_cast(bias_data_), conv_param_, - sliding_, task_id); + ConvDwC8Fp16(packed_output_, packed_input_, reinterpret_cast(packed_weight_), + reinterpret_cast(bias_data_), conv_param_, sliding_, task_id); return RET_OK; } @@ -151,6 +145,7 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() { MS_ASSERT(output_ptr != nullptr); if (input_ptr == nullptr || output_ptr == nullptr) { MS_LOG(ERROR) << "Convolution depthwise Fp16 get null tensor data!"; + FreePackedInputOutput(); return RET_ERROR; } @@ -161,14 +156,9 @@ int ConvolutionDepthwiseSWFp16CPUKernel::Run() { packed_input_ = input_ptr; packed_output_ = output_ptr; } - - if (IsTrainable() && (IsTrain() || IsRepack())) { - ret = InitWeightBias(); - if (ret != 0) { - MS_LOG(ERROR) << "Convolution depthwise fp16 repack weight failure"; - return RET_ERROR; - } - is_repack_ = false; + if (RepackWeight() != RET_OK) { + MS_LOG(ERROR) << "Repack weight failed."; + return RET_ERROR; } ret = ParallelLaunch(this->ms_context_, ConvDwSWFp16Run, this, conv_param_->thread_num_); if (ret != RET_OK) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h index 94a8071bd99..5219c2c8570 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_depthwise_slidewindow_fp16.h @@ -37,7 +37,8 @@ class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseCPUKernel { public: ConvolutionDepthwiseSWFp16CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const InnerContext *ctx) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(), + inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {} ~ConvolutionDepthwiseSWFp16CPUKernel() override; int Init() override; @@ -46,13 +47,13 @@ class ConvolutionDepthwiseSWFp16CPUKernel : public ConvolutionBaseCPUKernel { int Eval() override; int InitPackedInputOutput(); - int InitWeightBias(); int Execute(int task_id); private: + void PackWeight() override; + int MallocWeightBiasData() override; void FreePackedInputOutput(); SlidingWindowParam *sliding_ = nullptr; - float16_t *packed_weight_ = nullptr; float16_t *packed_input_ = nullptr; float16_t *packed_output_ = nullptr; bool need_align_ = false; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc index e21fca572a1..25ebcebf147 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.cc @@ -27,7 +27,18 @@ using mindspore::lite::RET_ERROR; using mindspore::lite::RET_OK; namespace mindspore::kernel { -int ConvolutionFP16CPUKernel::InitWeightBias() { +void ConvolutionFP16CPUKernel::PackWeight() { + auto filter_tensor = in_tensors_.at(kWeightIndex); + int in_channel = filter_tensor->Channel(); + int out_channel = filter_tensor->Batch(); + int kernel_plane = filter_tensor->Height() * filter_tensor->Width(); + void *weight_origin = IsTrainable() ? filter_tensor->data_c() : origin_weight_; + MS_ASSERT(weight_origin != nullptr); + RowMajor2Col8MajorFp16(weight_origin, reinterpret_cast(packed_weight_), out_channel, + in_channel * kernel_plane, false); +} + +int ConvolutionFP16CPUKernel::MallocWeightBiasData() { auto filter_tensor = in_tensors_.at(kWeightIndex); int in_channel = filter_tensor->Channel(); int out_channel = filter_tensor->Batch(); @@ -39,15 +50,13 @@ int ConvolutionFP16CPUKernel::InitWeightBias() { // init weight if (packed_weight_ == nullptr) { - packed_weight_ = reinterpret_cast(malloc(pack_weight_size * sizeof(float16_t))); + packed_weight_ = malloc(pack_weight_size * sizeof(float16_t)); if (packed_weight_ == nullptr) { MS_LOG(ERROR) << "malloc packed_weight_ failed."; return RET_ERROR; } } memset(packed_weight_, 0, pack_weight_size * sizeof(float16_t)); - void *weight_origin_tmp = IsTrainable() ? filter_tensor->data_c() : origin_weight_; - RowMajor2Col8MajorFp16(weight_origin_tmp, packed_weight_, out_channel, in_channel * kernel_plane, false); // init bias if (bias_data_ == nullptr) { @@ -58,11 +67,6 @@ int ConvolutionFP16CPUKernel::InitWeightBias() { } } memset(bias_data_, 0, oc8 * sizeof(float16_t)); - if (in_tensors_.size() == kInputSize2) { - auto bias_tensor = in_tensors_.at(kBiasIndex); - void *bias_origin_tmp = IsTrainable() ? bias_tensor->data_c() : origin_bias_; - memcpy(bias_data_, bias_origin_tmp, out_channel * sizeof(float16_t)); - } return RET_OK; } @@ -85,13 +89,15 @@ int ConvolutionFP16CPUKernel::InitTmpBuffer() { } int ConvolutionFP16CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 2); + CHECK_LESS_RETURN(out_tensors_.size(), 1); #ifdef ENABLE_ARM64 row_tile_ = C16NUM; #else row_tile_ = C12NUM; #endif col_tile_ = C8NUM; - auto ret = InitWeightBias(); + auto ret = InitConvWeightBias(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init weight bias failed."; return RET_ERROR; @@ -129,8 +135,8 @@ int ConvolutionFP16CPUKernel::RunImpl(int task_id) { MS_LOG(ERROR) << "Convolution Fp16 get null tensor data!"; return RET_ERROR; } - ConvFp16(input_ptr, packed_input_, packed_weight_, reinterpret_cast(bias_data_), col_major_input_, - output_ptr, task_id, conv_param_); + ConvFp16(input_ptr, packed_input_, reinterpret_cast(packed_weight_), + reinterpret_cast(bias_data_), col_major_input_, output_ptr, task_id, conv_param_); return RET_OK; } @@ -151,14 +157,9 @@ int ConvolutionFP16CPUKernel::Run() { FreeTmpBuffer(); return RET_ERROR; } - - if (IsTrainable() && (IsTrain() || IsRepack())) { - ret = InitWeightBias(); - if (ret != 0) { - MS_LOG(ERROR) << "Convolution 1x1 fp16 repack weight failure"; - return RET_ERROR; - } - is_repack_ = false; + if (RepackWeight() != RET_OK) { + MS_LOG(ERROR) << "Repack weight failed."; + return RET_ERROR; } ret = ParallelLaunch(this->ms_context_, ConvolutionFp16Impl, this, thread_count_); if (ret != RET_OK) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h index 011976a2314..ef08a5dfa2a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_fp16.h @@ -28,26 +28,20 @@ class ConvolutionFP16CPUKernel : public ConvolutionBaseCPUKernel { ConvolutionFP16CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const InnerContext *ctx, void *origin_weight, void *origin_bias) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx), - origin_weight_(origin_weight), - origin_bias_(origin_bias) {} - ~ConvolutionFP16CPUKernel() override { - if (packed_weight_ != nullptr) { - free(packed_weight_); - packed_weight_ = nullptr; - } - } + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias) {} + ~ConvolutionFP16CPUKernel() override {} int Init() override; int ReSize() override; int Run() override; int Eval() override; int RunImpl(int task_id); - int InitWeightBias(); int InitTmpBuffer(); void AdjustNumberOfThread(); private: + void PackWeight() override; + int MallocWeightBiasData() override; void FreeTmpBuffer() { if (packed_input_ != nullptr) { ctx_->allocator->Free(packed_input_); @@ -58,10 +52,7 @@ class ConvolutionFP16CPUKernel : public ConvolutionBaseCPUKernel { col_major_input_ = nullptr; } } - void *origin_weight_; // do not free - void *origin_bias_; // do not free float16_t *packed_input_ = nullptr; - float16_t *packed_weight_ = nullptr; float16_t *col_major_input_ = nullptr; int col_tile_; int row_tile_; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc index d213679f02c..33ad5e4da68 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.cc @@ -27,11 +27,12 @@ int ConvolutionWinogradFP16CPUKernel::WinogradFilterTransformFp16(const float16_ return RET_ERROR; } - return WinogradWeightTransformFp16(weight_data, trans_weight_, matrix_g, matrix_gt, oc_block, input_unit_, - kernel_unit_, conv_param_->input_channel_, conv_param_->output_channel_, true); + return WinogradWeightTransformFp16(weight_data, reinterpret_cast(packed_weight_), matrix_g, matrix_gt, + oc_block, input_unit_, kernel_unit_, conv_param_->input_channel_, + conv_param_->output_channel_, true); } -int ConvolutionWinogradFP16CPUKernel::InitWeightBias() { +int ConvolutionWinogradFP16CPUKernel::MallocWeightBiasData() { auto weight_tensor = in_tensors_.at(kWeightIndex); int in_channel = weight_tensor->Channel(); int out_channel = weight_tensor->Batch(); @@ -39,19 +40,16 @@ int ConvolutionWinogradFP16CPUKernel::InitWeightBias() { conv_param_->output_channel_ = out_channel; int oc_block_num = UP_DIV(out_channel, col_tile_); // init weight - // set data auto trans_matrix_data_size = input_unit_ * input_unit_ * in_channel * oc_block_num * col_tile_ * sizeof(float16_t); - if (trans_weight_ == nullptr) { - trans_weight_ = reinterpret_cast(malloc(trans_matrix_data_size)); - if (trans_weight_ == nullptr) { - MS_LOG(ERROR) << "malloc trans_weight_ failed."; + if (packed_weight_ == nullptr) { + packed_weight_ = malloc(trans_matrix_data_size); + if (packed_weight_ == nullptr) { + MS_LOG(ERROR) << "malloc packed_weight_ failed."; return RET_ERROR; } } - memset(trans_weight_, 0, trans_matrix_data_size); + memset(packed_weight_, 0, trans_matrix_data_size); - float matrix_g[64]; - float matrix_gt[64]; float matrix_a[64]; float matrix_at[64]; float matrix_b[64]; @@ -61,19 +59,12 @@ int ConvolutionWinogradFP16CPUKernel::InitWeightBias() { coef = 0.5f; } auto ret = - CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, coef, output_unit_, kernel_unit_); + CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g_, matrix_gt_, coef, output_unit_, kernel_unit_); if (ret != RET_OK) { MS_LOG(ERROR) << "get matrix g from CookToomFilter failed."; return ret; } - void *weight_origin_tmp = IsTrainable() ? weight_tensor->data_c() : origin_weight_; - ret = WinogradFilterTransformFp16(reinterpret_cast(weight_origin_tmp), matrix_g, matrix_gt, col_tile_); - if (ret != RET_OK) { - MS_LOG(ERROR) << "winograd filter transform failed."; - return ret; - } - // init bias if (bias_data_ == nullptr) { bias_data_ = malloc(oc_block_num * col_tile_ * sizeof(float16_t)); if (bias_data_ == nullptr) { @@ -82,14 +73,16 @@ int ConvolutionWinogradFP16CPUKernel::InitWeightBias() { } } memset(bias_data_, 0, oc_block_num * col_tile_ * sizeof(float16_t)); - if (in_tensors_.size() == kInputSize2) { - auto bias_tensor = in_tensors_.at(kBiasIndex); - void *bias_origin_tmp = IsTrainable() ? bias_tensor->data_c() : origin_bias_; - memcpy(bias_data_, bias_origin_tmp, out_channel * sizeof(float16_t)); - } return RET_OK; } +void ConvolutionWinogradFP16CPUKernel::PackWeight() { + auto weight_tensor = in_tensors_.at(kWeightIndex); + void *weight_origin = IsTrainable() ? weight_tensor->data_c() : origin_weight_; + MS_ASSERT(weight_origin != nullptr); + WinogradFilterTransformFp16(reinterpret_cast(weight_origin), matrix_g_, matrix_gt_, col_tile_); +} + int ConvolutionWinogradFP16CPUKernel::InitTmpBuffer() { int channel_out = conv_param_->output_channel_; size_t tile_buffer_size = @@ -143,6 +136,8 @@ int ConvolutionWinogradFP16CPUKernel::ConfigInputOutput() { } int ConvolutionWinogradFP16CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 2); + CHECK_LESS_RETURN(out_tensors_.size(), 1); col_tile_ = C8NUM; #ifdef ENABLE_ARM64 row_tile_ = C16NUM; @@ -154,7 +149,7 @@ int ConvolutionWinogradFP16CPUKernel::Init() { conv_param_->input_unit_ = input_unit_; conv_param_->output_unit_ = output_unit_; - auto ret = InitWeightBias(); + auto ret = InitConvWeightBias(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init weight bias failed."; return RET_ERROR; @@ -207,8 +202,9 @@ int ConvolutionWinogradFP16CPUKernel::RunImpl(int task_id) { MS_LOG(ERROR) << "Convolution Winograd Fp16 get null tensor data!"; return RET_ERROR; } - ConvWinogardFp16(input_ptr, trans_weight_, reinterpret_cast(bias_data_), output_ptr, - tmp_buffer_address_list_, task_id, conv_param_, in_func_, out_func_); + ConvWinogardFp16(input_ptr, reinterpret_cast(packed_weight_), + reinterpret_cast(bias_data_), output_ptr, tmp_buffer_address_list_, task_id, + conv_param_, in_func_, out_func_); return RET_OK; } @@ -229,13 +225,9 @@ int ConvolutionWinogradFP16CPUKernel::Run() { FreeTmpBuffer(); return RET_ERROR; } - if (IsTrainable() && (IsTrain() || IsRepack())) { - ret = InitWeightBias(); - if (ret != 0) { - MS_LOG(ERROR) << "ConvolutionWinogradFP16 repack weight failure"; - return RET_ERROR; - } - is_repack_ = false; + if (RepackWeight() != RET_OK) { + MS_LOG(ERROR) << "Repack weight failed."; + return RET_ERROR; } ret = ParallelLaunch(this->ms_context_, ConvolutionWinogradFp16Impl, this, thread_count_); if (ret != RET_OK) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h index c41cea67f0d..e94191966b0 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/convolution_winograd_fp16.h @@ -32,29 +32,22 @@ class ConvolutionWinogradFP16CPUKernel : public ConvolutionBaseCPUKernel { ConvolutionWinogradFP16CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const InnerContext *ctx, int out_unit, void *origin_weight, void *origin_bias) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx), - output_unit_(out_unit), - origin_weight_(origin_weight), - origin_bias_(origin_bias) {} - ~ConvolutionWinogradFP16CPUKernel() override { - if (trans_weight_ != nullptr) { - free(trans_weight_); - trans_weight_ = nullptr; - } - } + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias), output_unit_(out_unit) {} + ~ConvolutionWinogradFP16CPUKernel() override {} int Init() override; int ReSize() override; int Run() override; int Eval() override; int RunImpl(int task_id); - int InitWeightBias(); int InitTmpBuffer(); int ConfigInputOutput(); int WinogradFilterTransformFp16(const float16_t *weight_data, float *matrix_g, float *matrix_gt, int oc_block); int AdjustNumberOfThread(); private: + int MallocWeightBiasData() override; + void PackWeight() override; void FreeTmpBuffer() { if (trans_input_ != nullptr) { ctx_->allocator->Free(trans_input_); @@ -76,13 +69,12 @@ class ConvolutionWinogradFP16CPUKernel : public ConvolutionBaseCPUKernel { int kernel_unit_ = 0; int input_unit_ = 0; int output_unit_; - void *origin_weight_; // do not free - void *origin_bias_; // do not free float16_t *tmp_data_ = nullptr; float16_t *trans_input_ = nullptr; float16_t *gemm_out_ = nullptr; - float16_t *trans_weight_ = nullptr; float16_t *col_buffer_ = nullptr; + float matrix_g_[64]; + float matrix_gt_[64]; TmpBufferAddressFp16 tmp_buffer_address_list_[4]; InputTransFp16Func in_func_ = nullptr; OutputTransFp16Func out_func_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.cc index 7cce484401a..8193a2e667b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/crop_fp16.cc @@ -24,6 +24,8 @@ using mindspore::schema::PrimitiveType_Crop; namespace mindspore::kernel { int CropFp16CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } @@ -48,7 +50,8 @@ static int CropFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scal int CropFp16CPUKernel::Run() { auto input_tensor = in_tensors_.at(0); auto output_tensor = out_tensors_.at(0); - + MS_ASSERT(input_tensor != nullptr); + MS_ASSERT(output_tensor != nullptr); input_ptr_ = reinterpret_cast(input_tensor->data_c()); output_ptr_ = reinterpret_cast(output_tensor->data_c()); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc index 79459ad8b74..445003fdf6b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.cc @@ -27,10 +27,6 @@ DeconvolutionDepthwiseFp16CPUKernel::~DeconvolutionDepthwiseFp16CPUKernel() { delete sliding_; sliding_ = nullptr; } - if (packed_weight_ != nullptr) { - free(packed_weight_); - packed_weight_ = nullptr; - } } int DeconvolutionDepthwiseFp16CPUKernel::InitSlideParam() { @@ -69,48 +65,47 @@ int DeconvolutionDepthwiseFp16CPUKernel::InitPackedInputOutput() { return RET_OK; } -int DeconvolutionDepthwiseFp16CPUKernel::InitWeightBias() { - // init weight: o, h, w, i; o == group, i == 1 +int DeconvolutionDepthwiseFp16CPUKernel::MallocWeightBiasData() { auto weight_tensor = in_tensors_.at(kWeightIndex); int OC8 = UP_DIV(weight_tensor->Batch(), C8NUM); - auto origin_weight = reinterpret_cast(weight_tensor->data_c()); - MS_ASSERT(origin_weight != nullptr); int pack_weight_size = C8NUM * OC8 * weight_tensor->Height() * weight_tensor->Width(); - packed_weight_ = reinterpret_cast(malloc(pack_weight_size * sizeof(float16_t))); + packed_weight_ = malloc(pack_weight_size * sizeof(float16_t)); if (packed_weight_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; } - PackNCHWFp16ToNC8HW8Fp16(origin_weight, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(), - weight_tensor->Batch()); - bias_data_ = reinterpret_cast(malloc(C8NUM * OC8 * sizeof(float16_t))); + bias_data_ = malloc(C8NUM * OC8 * sizeof(float16_t)); if (bias_data_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; } memset(bias_data_, 0, C8NUM * OC8 * sizeof(float16_t)); - if (in_tensors_.size() == kInputSize2) { - auto bias_tensor = in_tensors_.at(kBiasIndex); - auto ori_bias = reinterpret_cast(bias_tensor->data_c()); - memcpy(bias_data_, ori_bias, bias_tensor->Size()); - } - conv_param_->thread_num_ = MSMIN(thread_count_, OC8); return RET_OK; } +void DeconvolutionDepthwiseFp16CPUKernel::PackWeight() { + auto weight_tensor = in_tensors_.at(kWeightIndex); + void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_; + MS_ASSERT(origin_weight != nullptr); + PackNCHWFp16ToNC8HW8Fp16(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), + 1, weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch()); +} + int DeconvolutionDepthwiseFp16CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 2); + CHECK_LESS_RETURN(out_tensors_.size(), 1); sliding_ = new (std::nothrow) SlidingWindowParam; if (sliding_ == nullptr) { MS_LOG(ERROR) << "new SlidingWindowParam fail!"; return RET_ERROR; } - auto ret = InitWeightBias(); + auto ret = InitConvWeightBias(); if (ret != 0) { - MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitWeightBias failed."; + MS_LOG(ERROR) << "Deconvolution depthwise fp16 InitConvWeightBias failed."; return RET_ERROR; } if (!InferShapeDone()) { @@ -133,8 +128,8 @@ int DeconvolutionDepthwiseFp16CPUKernel::ReSize() { } int DeconvolutionDepthwiseFp16CPUKernel::Execute(int task_id) { - DeconvDwC8Fp16(packed_output_, packed_input_, packed_weight_, reinterpret_cast(bias_data_), conv_param_, - sliding_, task_id); + DeconvDwC8Fp16(packed_output_, packed_input_, reinterpret_cast(packed_weight_), + reinterpret_cast(bias_data_), conv_param_, sliding_, task_id); return RET_OK; } @@ -159,6 +154,10 @@ int DeconvolutionDepthwiseFp16CPUKernel::Run() { FreePackedInputOutput(); return RET_ERROR; } + if (RepackWeight() != RET_OK) { + MS_LOG(ERROR) << "Repack weight failed."; + return RET_ERROR; + } auto input_ptr = reinterpret_cast(in_tensors_.at(0)->data_c()); auto output_ptr = reinterpret_cast(out_tensors_.at(0)->data_c()); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h index 6ccb8a8c02d..757a7bb7e94 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_depthwise_fp16.h @@ -38,7 +38,8 @@ class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel { public: DeconvolutionDepthwiseFp16CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const InnerContext *ctx) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(), + inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {} ~DeconvolutionDepthwiseFp16CPUKernel() override; int Init() override; @@ -46,14 +47,14 @@ class DeconvolutionDepthwiseFp16CPUKernel : public ConvolutionBaseCPUKernel { int Run() override; int InitPackedInputOutput(); - int InitWeightBias(); int InitSlideParam(); int Execute(int task_id); private: + int MallocWeightBiasData() override; + void PackWeight() override; void FreePackedInputOutput(); SlidingWindowParam *sliding_ = nullptr; - float16_t *packed_weight_ = nullptr; float16_t *packed_input_ = nullptr; float16_t *packed_output_ = nullptr; bool need_align_ = false; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc index 87093a8605d..c80479b2756 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.cc @@ -31,10 +31,6 @@ DeConvolutionFp16CPUKernel::~DeConvolutionFp16CPUKernel() { delete matmul_param_; matmul_param_ = nullptr; } - if (pack_weight_ != nullptr) { - free(pack_weight_); - pack_weight_ = nullptr; - } return; } @@ -52,13 +48,31 @@ int DeConvolutionFp16CPUKernel::ReSize() { return RET_OK; } -int DeConvolutionFp16CPUKernel::InitWeightBias() { +void DeConvolutionFp16CPUKernel::PackWeight() { auto weight_tensor = in_tensors_.at(kWeightIndex); auto input_channel = weight_tensor->Batch(); auto output_channel = weight_tensor->Channel(); auto kernel_h = weight_tensor->Height(); auto kernel_w = weight_tensor->Width(); + void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_; + MS_ASSERT(origin_weight != nullptr); + PackNHWCFp16ToC8HWN8Fp16(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), + input_channel, kernel_w * kernel_h, output_channel); +} +int DeConvolutionFp16CPUKernel::MallocWeightBiasData() { + auto weight_tensor = in_tensors_.at(kWeightIndex); + auto input_channel = weight_tensor->Batch(); + auto output_channel = weight_tensor->Channel(); + auto kernel_h = weight_tensor->Height(); + auto kernel_w = weight_tensor->Width(); + size_t weight_pack_size = input_channel * kernel_w * kernel_h * UP_ROUND(output_channel, C8NUM) * sizeof(float16_t); + packed_weight_ = malloc(weight_pack_size); + if (packed_weight_ == nullptr) { + MS_LOG(ERROR) << "deconv malloc packed_weight_ error!"; + return RET_ERROR; + } + memset(packed_weight_, 0, weight_pack_size); auto bias_size = UP_ROUND(output_channel, C8NUM) * sizeof(float16_t); bias_data_ = malloc(bias_size); if (bias_data_ == nullptr) { @@ -66,33 +80,6 @@ int DeConvolutionFp16CPUKernel::InitWeightBias() { return RET_ERROR; } memset(bias_data_, 0, UP_ROUND(output_channel, C8NUM) * sizeof(float16_t)); - if (in_tensors_.size() == 3) { - if (in_tensors_.at(kBiasIndex)->data_type() != kNumberTypeFloat16) { - MS_LOG(ERROR) << "DeConv fp16 only support fp16 weight"; - return RET_ERROR; - } - if (in_tensors_.at(kBiasIndex)->shape().size() == 1 && - in_tensors_.at(kBiasIndex)->DimensionSize(0) == output_channel) { - memcpy(bias_data_, in_tensors_.at(kBiasIndex)->data_c(), output_channel * sizeof(float16_t)); - } else { - MS_LOG(ERROR) << "unsupported bias shape for deconv!"; - return RET_ERROR; - } - } - - size_t weight_pack_size = input_channel * kernel_w * kernel_h * UP_ROUND(output_channel, C8NUM) * sizeof(float16_t); - pack_weight_ = reinterpret_cast(malloc(weight_pack_size)); - if (pack_weight_ == nullptr) { - MS_LOG(ERROR) << "deconv malloc pack_weight_ error!"; - return RET_ERROR; - } - memset(pack_weight_, 0, weight_pack_size); - if (in_tensors_.at(1)->data_type() != kNumberTypeFloat16) { - MS_LOG(ERROR) << "deconv fp16 kernel require fp16 weight"; - return RET_ERROR; - } - PackNHWCFp16ToC8HWN8Fp16(reinterpret_cast(in_tensors_.at(kWeightIndex)->data_c()), pack_weight_, - input_channel, kernel_w * kernel_h, output_channel); return RET_OK; } @@ -172,7 +159,9 @@ int DeConvolutionFp16CPUKernel::DoDeconv(int task_id) { } auto tmp_buf = tmp_buffer_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->row_16_; - MatMulFp16(pack_input_, pack_weight_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_, + MatMulFp16(pack_input_, + reinterpret_cast(packed_weight_) + + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_, tmp_buf, nullptr, ActType_No, matmul_param_->deep_, matmul_param_->row_, oc * C8NUM * kernel_plane_, 0, OutType_C8); @@ -183,14 +172,16 @@ int DeConvolutionFp16CPUKernel::DoDeconv(int task_id) { } int DeConvolutionFp16CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 2); + CHECK_LESS_RETURN(out_tensors_.size(), 1); matmul_param_ = new (std::nothrow) MatMulParameter(); if (matmul_param_ == nullptr) { MS_LOG(ERROR) << "Memory allocation failed"; return RET_ERROR; } - int ret = InitWeightBias(); + int ret = InitConvWeightBias(); if (ret != RET_OK) { - MS_LOG(ERROR) << "deconv InitWeightBias error!"; + MS_LOG(ERROR) << "deconv InitConvWeightBias error!"; return ret; } if (!InferShapeDone()) { @@ -200,6 +191,10 @@ int DeConvolutionFp16CPUKernel::Init() { } int DeConvolutionFp16CPUKernel::Run() { + if (RepackWeight() != RET_OK) { + MS_LOG(ERROR) << "Repack weight failed."; + return RET_ERROR; + } auto input_ptr = reinterpret_cast(in_tensors_.at(0)->data_c()); auto output_ptr = reinterpret_cast(out_tensors_.at(0)->data_c()); MS_ASSERT(input_ptr != nullptr); @@ -225,6 +220,8 @@ int DeConvolutionFp16CPUKernel::Run() { error_code = ParallelLaunch(this->ms_context_, DeConvFp16Run, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "deconv fp16 run error! error_code[" << error_code << "]"; + FreeRunBuf(); + return error_code; } } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h index da0330a295b..21f286b2998 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_fp16.h @@ -28,7 +28,8 @@ class DeConvolutionFp16CPUKernel : public ConvolutionBaseCPUKernel { public: DeConvolutionFp16CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(), + inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {} ~DeConvolutionFp16CPUKernel() override; int Init() override; int Run() override; @@ -41,7 +42,8 @@ class DeConvolutionFp16CPUKernel : public ConvolutionBaseCPUKernel { int InitRunBuf(); void FreeRunBuf(); int InitParam(); - int InitWeightBias(); + int MallocWeightBiasData() override; + void PackWeight() override; private: MatMulParameter *matmul_param_; @@ -51,7 +53,6 @@ class DeConvolutionFp16CPUKernel : public ConvolutionBaseCPUKernel { int thread_count_; int thread_stride_; float16_t *pack_input_ = nullptr; - float16_t *pack_weight_ = nullptr; float16_t *pack_output_ = nullptr; float16_t *tmp_buffer_ = nullptr; float16_t *batch_input_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc index 13fc716af11..d4e1bb73ce0 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.cc @@ -237,7 +237,13 @@ int DeConvWgPostFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_sca int DeConvWinogradFp16CPUKernel::InitComputeParam() { auto weight_tensor = in_tensors_.at(1); - + auto shape = weight_tensor->shape(); + if (std::find(shape.begin(), shape.end(), -1) != shape.end()) { + MS_LOG(WARNING) << "The shape of weight tensor is invalid."; + valid_weight_shape_ = false; + return RET_OK; + } + valid_weight_shape_ = true; conv_param_->input_channel_ = weight_tensor->Batch(); conv_param_->output_channel_ = weight_tensor->Channel(); conv_param_->kernel_w_ = weight_tensor->Width(); @@ -318,7 +324,11 @@ int DeConvWinogradFp16CPUKernel::InitDataParam() { /* unit data : weight & winograd data */ auto weight_tensor = in_tensors_.at(kWeightIndex); auto origin_weight = reinterpret_cast(weight_tensor->data_c()); - MS_ASSERT(origin_weight != nullptr); + if (origin_weight == nullptr) { + MS_LOG(WARNING) << "The weight data is nullptr, will init data parameter in runtime."; + is_repack_ = true; + return RET_OK; + } for (int i = 0; i < deconv_param_->compute_size_; i++) { DeConvComputeUnit *unit = &deconv_param_->compute_units_[i]; auto ret = PackDeConvWgDataFp16(origin_weight, unit, conv_param_, deconv_param_); @@ -349,6 +359,19 @@ int DeConvWinogradFp16CPUKernel::ReSize() { MS_LOG(ERROR) << "ConvolutionBaseCPUKernel init failed!"; return ret; } + if (!valid_weight_shape_) { + if (InitComputeParam() != RET_OK) { + MS_LOG(ERROR) << "InitComputeParam error!"; + return RET_ERROR; + } else if (!valid_weight_shape_) { + return RET_OK; + } + if (InitDataParam() != RET_OK) { + MS_LOG(ERROR) << "InitDataParam error!"; + return RET_ERROR; + } + } + ret = InitParameter(); if (ret != RET_OK) { MS_LOG(ERROR) << "InitParameter failed!"; @@ -358,6 +381,8 @@ int DeConvWinogradFp16CPUKernel::ReSize() { } int DeConvWinogradFp16CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 2); + CHECK_LESS_RETURN(out_tensors_.size(), 1); deconv_param_ = new (std::nothrow) DeConvParam(); if (deconv_param_ == nullptr) { MS_LOG(ERROR) << "Memory allocation failed"; @@ -368,16 +393,14 @@ int DeConvWinogradFp16CPUKernel::Init() { wg.dest_buffer_ = nullptr; wg.middle_buffer_ = nullptr; } - int error_code = InitComputeParam(); - if (error_code != RET_OK) { - MS_LOG(ERROR) << "InitComputeParam error! ret: " << error_code; - return error_code; - } - error_code = InitDataParam(); - if (error_code != RET_OK) { - MS_LOG(ERROR) << "InitWeightBias error! ret: " << error_code; - return error_code; + if (InitComputeParam() != RET_OK) { + MS_LOG(ERROR) << "InitDataParam error!"; + return RET_ERROR; + } + if (valid_weight_shape_ && InitDataParam() != RET_OK) { + MS_LOG(ERROR) << "InitDataParam error!"; + return RET_ERROR; } if (!InferShapeDone()) { @@ -397,6 +420,21 @@ int DeConvWinogradFp16CPUKernel::Run() { return RET_ERROR; } + if (!valid_weight_shape_) { + if (InitComputeParam() != RET_OK) { + MS_LOG(ERROR) << "InitDataParam error!"; + return RET_ERROR; + } + if (!valid_weight_shape_ || InitParameter() != RET_OK) { + MS_LOG(ERROR) << "InitDataParam error!"; + return RET_ERROR; + } + } + if (IsRepack() && InitDataParam() != RET_OK) { + MS_LOG(ERROR) << "InitDataParam error!"; + return RET_ERROR; + } + for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) { nhwc_input_ = input_ptr + batch_index * deconv_param_->input_plane_ * conv_param_->input_channel_; nhwc_output_ = output_ptr + batch_index * deconv_param_->output_plane_ * conv_param_->output_channel_; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.h index b558c2312a8..c83ee09d84f 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/deconvolution_winograd_fp16.h @@ -29,7 +29,8 @@ class DeConvWinogradFp16CPUKernel : public ConvolutionBaseCPUKernel { public: DeConvWinogradFp16CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(), + inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {} ~DeConvWinogradFp16CPUKernel() override; int Init() override; int Run() override; @@ -56,6 +57,7 @@ class DeConvWinogradFp16CPUKernel : public ConvolutionBaseCPUKernel { float16_t *tile_output_ = nullptr; int thread_num_hw_ = 0; int thread_stride_hw_ = 0; + bool valid_weight_shape_ = true; }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP16_DECONVOLUTION_WINOGRAD_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/fullconnection_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/fullconnection_fp16.cc index 47da33433ef..4cfa3edd456 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/fullconnection_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/fullconnection_fp16.cc @@ -18,6 +18,7 @@ #include "src/kernel_registry.h" using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_FullConnection; @@ -41,6 +42,8 @@ int FullconnectionFP16CPUKernel::ReSize() { } int FullconnectionFP16CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 2); + CHECK_LESS_RETURN(out_tensors_.size(), 1); #ifdef ENABLE_ARM64 row_tile_ = C16NUM; #else diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc index 9e544cd6e30..6f4b7232782 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/fused_batchnorm_fp16.cc @@ -86,6 +86,11 @@ int FusedBatchnormFp16CPUKernel::DoExecute(int task_id) { ms_context_->allocator->Free(output_fp16); return RET_ERROR; } + MS_ASSERT(input->data_c() != nullptr); + MS_ASSERT(scale->data_c() != nullptr); + MS_ASSERT(offset->data_c() != nullptr); + MS_ASSERT(mean->data_c() != nullptr); + MS_ASSERT(variance->data_c() != nullptr); Float32ToFloat16(reinterpret_cast(input->data_c()), reinterpret_cast(input_fp16), input->ElementsNum()); Float32ToFloat16(reinterpret_cast(scale->data_c()), reinterpret_cast(scale_fp16), @@ -116,7 +121,8 @@ int FusedBatchnormFp16CPUKernel::DoExecute(int task_id) { ms_context_->allocator->Free(output_fp16); return RET_OK; } - + MS_ASSERT(in_tensors_.at(0)->data_c() != nullptr); + MS_ASSERT(out_tensors_.at(0)->data_c() != nullptr); if (IsTrain() && IsTrainable() && in_tensors_.size() >= kMaxInIdx) { CalcMeanVar(static_cast(in_tensors_.at(0)->data_c()), static_cast(in_tensors_.at(kInScaleIdx)->data_c()), diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.cc index e9cbb9d2dd5..f88969604d3 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.cc @@ -40,13 +40,17 @@ GatherFp16CPUKernel::~GatherFp16CPUKernel() { } int GatherFp16CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 3); + CHECK_LESS_RETURN(out_tensors_.size(), 1); auto input_tensor = in_tensors_.at(0); + MS_ASSERT(input_tensor != nullptr); if (input_tensor->data_type() == kNumberTypeFloat32 && input_tensor->data_c() != nullptr) { const_input_ = true; input_data_ = reinterpret_cast(ms_context_->allocator->Malloc(input_tensor->ElementsNum() * sizeof(float16_t))); Float32ToFloat16(reinterpret_cast(input_tensor->data_c()), input_data_, input_tensor->ElementsNum()); } + MS_ASSERT(in_tensors_.at(kSecondInput)->data_c() != nullptr); (reinterpret_cast(op_parameter_))->axis_ = *(reinterpret_cast(in_tensors_.at(kSecondInput)->data_c())); if (!InferShapeDone()) { @@ -57,35 +61,6 @@ int GatherFp16CPUKernel::Init() { int GatherFp16CPUKernel::ReSize() { return RET_OK; } -int GatherFp16CPUKernel::PreProcess() { - if (!InferShapeDone()) { - auto ret = lite::KernelInferShape(in_tensors_, out_tensors_, op_parameter_); - if (ret != 0) { - MS_LOG(ERROR) << "InferShape fail!"; - return ret; - } - ret = ReSize(); - if (ret != 0) { - MS_LOG(ERROR) << "ReSize fail!ret: " << ret; - return ret; - } - out_tensors_[0]->set_data_type(kNumberTypeFloat16); - } - for (auto *output : out_tensors_) { - MS_ASSERT(output != nullptr); - auto ret = output->MallocData(); - if (output->ElementsNum() >= MAX_MALLOC_SIZE / static_cast(sizeof(int64_t))) { - MS_LOG(ERROR) << "The size of output tensor is too big"; - return RET_ERROR; - } - if (ret != RET_OK) { - MS_LOG(ERROR) << "gather out tensor malloc data failed."; - return ret; - } - } - return RET_OK; -} - int GatherFp16CPUKernel::DoGather(int task_id) { auto input_tensor = in_tensors_.at(0); auto indices_tensor = in_tensors_.at(1); @@ -118,6 +93,8 @@ int GatherFp16CPUKernel::DoGather(int task_id) { return RET_ERROR; } int8_t *int8_out = reinterpret_cast(out_tensor->data_c()); + MS_ASSERT(int8_in != nullptr); + MS_ASSERT(int8_out != nullptr); int data_size = lite::DataTypeSize(kNumberTypeFloat16); int8_in += thread_stride * limit * inner_size * data_size; int8_out += thread_stride * indices_element_size * inner_size * data_size; @@ -156,6 +133,7 @@ int GatherFp16CPUKernel::Run() { } if (!const_input_) { auto input_tensor = in_tensors_.at(0); + MS_ASSERT(input_tensor->data_c() != nullptr); if (input_tensor->data_type() == kNumberTypeFloat32) { input_data_ = reinterpret_cast(ms_context_->allocator->Malloc(input_tensor->ElementsNum() * sizeof(float16_t))); @@ -176,6 +154,7 @@ int GatherFp16CPUKernel::Run() { } int GatherFp16CPUKernel::AssignIndicesData(bool isIndicesInt32, int indices_num, lite::Tensor *indices_tensor) { + MS_ASSERT(indices_tensor->data_c() != nullptr); if (!isIndicesInt32) { if (indices_num >= std::numeric_limits::max() / static_cast(sizeof(int))) { MS_LOG(ERROR) << "Input indices_num is invalid, indices_num: " << indices_num; @@ -188,18 +167,20 @@ int GatherFp16CPUKernel::AssignIndicesData(bool isIndicesInt32, int indices_num, } if (indices_tensor->data_type() == kNumberTypeInt64) { for (int i = 0; i < indices_num; i++) { - indices_data_[i] = reinterpret_cast(indices_tensor->MutableData())[i]; + indices_data_[i] = reinterpret_cast(indices_tensor->data_c())[i]; } } else if (indices_tensor->data_type() == kNumberTypeFloat16) { for (int i = 0; i < indices_num; i++) { - indices_data_[i] = reinterpret_cast(indices_tensor->MutableData())[i]; + indices_data_[i] = reinterpret_cast(indices_tensor->data_c())[i]; } } else { MS_LOG(ERROR) << "The data type of indices tensor is wrong"; + ms_context_->allocator->Free(indices_data_); + indices_data_ = nullptr; return RET_ERROR; } } else { - indices_data_ = reinterpret_cast(indices_tensor->MutableData()); + indices_data_ = reinterpret_cast(indices_tensor->data_c()); } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.h b/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.h index a1bb9b22e2b..39167c747fd 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/gather_fp16.h @@ -34,7 +34,6 @@ class GatherFp16CPUKernel : public InnerKernel { int Init() override; int ReSize() override; - int PreProcess() override; int Run() override; int DoGather(int task_id); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/group_convolution_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/group_convolution_fp16.cc index 80c3751f1b0..9a968988640 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/group_convolution_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/group_convolution_fp16.cc @@ -83,6 +83,8 @@ int GroupConvolutionFP16CPUKernel::PostConcat(int group_id) { } int GroupConvolutionFP16CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (group_conv_creator_ == nullptr) { return lite::RET_ERROR; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/gru_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/gru_fp16.cc index 9aa8e26a7d0..7be43799813 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/gru_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/gru_fp16.cc @@ -89,6 +89,7 @@ int GruFp16CPUKernel::InitInputWeightBias() { // result -- row: seq_len * batch; col: hidden_size auto weight_g = in_tensors_.at(1); MS_ASSERT(weight_g != nullptr); + MS_ASSERT(weight_g->data_c() != nullptr); weight_g_ptr_ = reinterpret_cast( malloc(weight_batch_ * gru_param_->input_col_align_ * gru_param_->input_size_ * sizeof(float16_t))); if (weight_g_ptr_ == nullptr) { @@ -109,6 +110,7 @@ int GruFp16CPUKernel::InitInputWeightBias() { // input bias auto bias = in_tensors_.at(3); MS_ASSERT(bias != nullptr); + MS_ASSERT(bias->data_c() != nullptr); input_bias_ = reinterpret_cast(malloc(weight_batch_ * gru_param_->input_col_align_ * sizeof(float16_t))); if (input_bias_ == nullptr) { MS_LOG(ERROR) << "GruFp16CPUKernel malloc input_bias_ error."; @@ -135,6 +137,7 @@ int GruFp16CPUKernel::InitStateWeightBias() { // result -- row: batch; col: hidden_size auto weight_r = in_tensors_.at(2); MS_ASSERT(weight_r != nullptr); + MS_ASSERT(weight_r->data_c() != nullptr); weight_r_ptr_ = reinterpret_cast( malloc(weight_batch_ * gru_param_->state_col_align_ * gru_param_->hidden_size_ * sizeof(float16_t))); if (weight_r_ptr_ == nullptr) { @@ -167,6 +170,7 @@ int GruFp16CPUKernel::InitStateWeightBias() { // state bias auto bias = in_tensors_.at(3); MS_ASSERT(bias != nullptr); + MS_ASSERT(bias->data_c() != nullptr); state_bias_ = reinterpret_cast(malloc(weight_batch_ * gru_param_->state_col_align_ * sizeof(float16_t))); if (state_bias_ == nullptr) { MS_LOG(ERROR) << "GruFp16CPUKernel malloc state_bias_ error."; @@ -189,6 +193,8 @@ int GruFp16CPUKernel::InitStateWeightBias() { } int GruFp16CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 5); + CHECK_LESS_RETURN(out_tensors_.size(), 2); if (!InferShapeDone()) { return RET_OK; } @@ -267,10 +273,14 @@ int GruFp16CPUKernel::Run() { auto output_ptr = reinterpret_cast(output->data_c()); MS_ASSERT(output_ptr); auto output_hidden_state = out_tensors_[1]; + MS_ASSERT(output_hidden_state->data_c() != nullptr); + MS_ASSERT(hidden_state->data_c() != nullptr); memcpy(output_hidden_state->data_c(), hidden_state->data_c(), hidden_state->ElementsNum() * sizeof(float16_t)); int check_seq_len = gru_param_->seq_len_; if (in_tensors_.size() == 6) { - auto seq_len = reinterpret_cast(in_tensors_.at(5)->data_c()); + MS_ASSERT(in_tensors_.at(5) != nullptr); + int *seq_len = reinterpret_cast(in_tensors_.at(5)->data_c()); + MS_ASSERT(seq_len != nullptr); if (!std::equal(seq_len + 1, seq_len + gru_param_->batch_, seq_len)) { MS_LOG(ERROR) << "different batch seq_len is currently not supported"; return RET_ERROR; @@ -281,6 +291,7 @@ int GruFp16CPUKernel::Run() { auto ret = MallocRunBuffer(); if (ret != RET_OK) { MS_LOG(ERROR) << "GruFp16CPUKernel MallocRunBuffer error."; + FreeRunBuffer(); return RET_ERROR; } MS_ASSERT(weight_g_ptr_ != nullptr); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/instance_norm_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/instance_norm_fp16.cc index 9af3129b128..ad4bd8870cc 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/instance_norm_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/instance_norm_fp16.cc @@ -43,7 +43,11 @@ void InstanceNormFp16CPUKernel::FreeTmpBuffer() { } int InstanceNormFp16CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 3); + CHECK_LESS_RETURN(out_tensors_.size(), 1); auto gamma = in_tensors_[1]; + MS_ASSERT(gamma != nullptr); + MS_ASSERT(gamma->data_c() != nullptr); if (gamma->data_type() == kNumberTypeFloat32) { gamma_data_ = reinterpret_cast(malloc(gamma->ElementsNum() * sizeof(float16_t))); if (gamma_data_ == nullptr) { @@ -59,6 +63,8 @@ int InstanceNormFp16CPUKernel::Init() { } auto beta = in_tensors_[2]; + MS_ASSERT(beta != nullptr); + MS_ASSERT(beta->data_c() != nullptr); if (beta->data_type() == kNumberTypeFloat32) { beta_data_ = reinterpret_cast(malloc(beta->ElementsNum() * sizeof(float16_t))); if (beta_data_ == nullptr) { @@ -108,6 +114,8 @@ int InstanceNormFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_sca int InstanceNormFp16CPUKernel::Run() { src_data_ = reinterpret_cast(in_tensors_[0]->data_c()); dst_data_ = reinterpret_cast(out_tensors_[0]->data_c()); + MS_ASSERT(src_data_ != nullptr); + MS_ASSERT(dst_data_ != nullptr); auto ret = ParallelLaunch(this->ms_context_, InstanceNormFp16Run, this, op_parameter_->thread_num_); if (ret != RET_OK) { MS_LOG(ERROR) << "InstanceNormFp16Run error error_code[" << ret << "]"; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/lstm_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/lstm_fp16.cc index 786765f2914..7ccdb26f8b1 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/lstm_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/lstm_fp16.cc @@ -96,6 +96,7 @@ int LstmFp16CPUKernel::InitInputWeightBias() { // result -- row: seq_len * batch; col: hidden_size auto weight_i = in_tensors_.at(1); MS_ASSERT(weight_i != nullptr); + MS_ASSERT(weight_i->data_c() != nullptr); weight_i_ptr_ = reinterpret_cast( malloc(weight_batch_ * lstm_param_->input_col_align_ * lstm_param_->input_size_ * sizeof(float16_t))); if (weight_i_ptr_ == nullptr) { @@ -116,6 +117,7 @@ int LstmFp16CPUKernel::InitInputWeightBias() { // input bias auto bias = in_tensors_.at(3); MS_ASSERT(bias != nullptr); + MS_ASSERT(bias->data_c() != nullptr); input_bias_ = reinterpret_cast(malloc(weight_batch_ * lstm_param_->input_col_align_ * sizeof(float16_t))); if (input_bias_ == nullptr) { @@ -143,6 +145,7 @@ int LstmFp16CPUKernel::InitStateWeightBias() { // result -- row: batch; col: hidden_size auto weight_h = in_tensors_.at(2); MS_ASSERT(weight_h != nullptr); + MS_ASSERT(weight_h->data_c() != nullptr); weight_h_ptr_ = reinterpret_cast( malloc(weight_batch_ * lstm_param_->state_col_align_ * lstm_param_->hidden_size_ * sizeof(float16_t))); if (weight_h_ptr_ == nullptr) { @@ -175,6 +178,7 @@ int LstmFp16CPUKernel::InitStateWeightBias() { // state bias auto bias = in_tensors_.at(3); MS_ASSERT(bias != nullptr); + MS_ASSERT(bias->data_c() != nullptr); state_bias_ = reinterpret_cast(malloc(weight_batch_ * lstm_param_->state_col_align_ * sizeof(float16_t))); if (state_bias_ == nullptr) { @@ -198,6 +202,8 @@ int LstmFp16CPUKernel::InitStateWeightBias() { } int LstmFp16CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 6); + CHECK_LESS_RETURN(out_tensors_.size(), 3); if (!InferShapeDone()) { return RET_OK; } @@ -286,23 +292,28 @@ int LstmFp16CPUKernel::Run() { MS_ASSERT(input != nullptr); auto hidden_state = in_tensors_.at(4); MS_ASSERT(hidden_state != nullptr); + MS_ASSERT(hidden_state->data_c() != nullptr); auto cell_state = in_tensors_.at(5); MS_ASSERT(cell_state != nullptr); + MS_ASSERT(cell_state->data_c() != nullptr); auto output = out_tensors_.at(0); MS_ASSERT(output != nullptr); auto input_ptr = reinterpret_cast(input->data_c()); - MS_ASSERT(input_ptr); + MS_ASSERT(input_ptr != nullptr); auto output_ptr = reinterpret_cast(output->data_c()); - MS_ASSERT(output_ptr); + MS_ASSERT(output_ptr != nullptr); auto output_hidden_state = out_tensors_[1]; + MS_ASSERT(output_hidden_state->data_c() != nullptr); memcpy(output_hidden_state->data_c(), hidden_state->data_c(), hidden_state->ElementsNum() * sizeof(float16_t)); auto output_cell_state = out_tensors_[2]; + MS_ASSERT(output_cell_state->data_c()); memcpy(output_cell_state->data_c(), cell_state->data_c(), cell_state->ElementsNum() * sizeof(float16_t)); auto ret = MallocRunBuffer(); if (ret != RET_OK) { MS_LOG(ERROR) << "LstmFp16CPUKernel MallocRunBuffer error."; + FreeRunBuffer(); return RET_ERROR; } MS_ASSERT(weight_i_ptr_); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_base_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_base_fp16.cc index 03aa5338824..256c598b0be 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_base_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_base_fp16.cc @@ -232,11 +232,15 @@ void MatmulBaseFP16CPUKernel::InitMatrixB(void *src_ptr, TypeId src_data_type) { } int MatmulBaseFP16CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 2); + CHECK_LESS_RETURN(out_tensors_.size(), 1); ResizeParameter(); if (params_->a_const_ == true) { if (RET_OK != InitBufferA()) { return RET_ERROR; } + MS_ASSERT(in_tensors_[0] != nullptr); + MS_ASSERT(in_tensors_[0]->data_c() != nullptr); InitMatrixA(reinterpret_cast(in_tensors_[0]->data_c())); } @@ -244,6 +248,8 @@ int MatmulBaseFP16CPUKernel::Init() { /* copy origin b data, pack in resize * pack after a infershape done */ auto b_tensor = in_tensors_[1]; + MS_ASSERT(b_tensor != nullptr); + MS_ASSERT(b_tensor->data_c() != nullptr); src_b_ = reinterpret_cast(malloc(params_->batch * params_->col_ * params_->deep_ * sizeof(float16_t))); if (src_b_ == nullptr) { MS_LOG(ERROR) << "Matmul fp16 malloc src_b_ failed"; @@ -302,6 +308,7 @@ int MatmulBaseFP16CPUKernel::Run() { if (RET_OK != InitBufferA()) { return RET_ERROR; } + MS_ASSERT(in_tensors_.at(0)->data_c() != nullptr); InitMatrixA(in_tensors_.at(0)->data_c()); } if ((params_->b_const_ == false) || IsRepack()) { @@ -309,6 +316,7 @@ int MatmulBaseFP16CPUKernel::Run() { FreeResizeBufA(); return RET_ERROR; } + MS_ASSERT(in_tensors_.at(1)->data_c() != nullptr); InitMatrixB(in_tensors_.at(1)->data_c(), in_tensors_.at(1)->data_type()); InitBias(); } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.cc index c3bb2461107..69583ccfd6e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/matmul_fp16.cc @@ -19,6 +19,7 @@ #include "src/kernel_registry.h" using mindspore::lite::KernelRegistrar; +using mindspore::lite::RET_ERROR; using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_MatMul; @@ -54,6 +55,8 @@ void MatmulFP16CPUKernel::InitBShape() { } int MatmulFP16CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 2); + CHECK_LESS_RETURN(out_tensors_.size(), 1); #ifdef ENABLE_ARM64 row_tile_ = C4NUM; #else diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc index c06b46c0c7c..0a35595eebb 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/pad_fp16.cc @@ -26,7 +26,8 @@ using mindspore::schema::PrimitiveType_PadFusion; namespace mindspore::kernel { namespace { -constexpr size_t kPadMaxInputSize = 2; +constexpr size_t kPadCommonInputSize = 2; +constexpr size_t kPadMaxInputSize = 3; } // namespace int PadFp16CPUKernel::RunImpl(int task_id) { PadFp16(input_, output_, in_, out_, pad_param_->paddings_, task_id, op_parameter_->thread_num_); @@ -53,8 +54,14 @@ int PadFp16CPUKernel::RunMirrorPadImpl(int task_id) { for (int b = 0; b < block.size_[1]; b++) { int out_b_index = out_a_index + b * block.out_stride_[1]; for (int c = 0; c < block.size_[2]; ++c) { - int output_index = out_b_index + c * block.out_stride_[2]; - MirrorPadFp16(input_data, output_data, in_, pad_param_, output_index, output_index + block.size_[3]); + int out_c_index = out_b_index + c * block.out_stride_[2]; + for (int d = 0; d < block.size_[3]; ++d) { + int out_d_index = out_c_index + d * block.out_stride_[3]; + for (int e = 0; e < block.size_[4]; ++e) { + int output_index = out_d_index + e * block.out_stride_[4]; + MirrorPadFp16(input_data, output_data, in_, pad_param_, output_index, output_index + block.size_[5]); + } + } } } } @@ -84,16 +91,20 @@ int PadFp16CPUKernel::Run() { auto output_tensor = out_tensors_.at(0); input_ = reinterpret_cast(input_tensor->data_c()); output_ = reinterpret_cast(output_tensor->data_c()); - + MS_ASSERT(input_ != nullptr); + MS_ASSERT(output_ != nullptr); int ret = 0; if (pad_param_->pad_mode_ == static_cast(schema::PaddingMode_CONSTANT)) { - if (in_tensors_.size() == kPadMaxInputSize) { + if (in_tensors_.size() >= kPadCommonInputSize) { ret = CopyPaddingFromInput(); if (ret != RET_OK) { MS_LOG(ERROR) << "PadFp16CPUKernel CopyPaddingFromInput failed"; return RET_ERROR; } } + if (in_tensors_.size() == kPadMaxInputSize) { + pad_param_->constant_value_ = reinterpret_cast(in_tensors_.at(2)->data_c())[0]; + } if (pad_param_->constant_value_ - 0.0f < 1e-5) { memset(output_, 0, output_tensor->ElementsNum() * sizeof(float16_t)); } else { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.cc index 50c17f0baaf..0ffff245ca8 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/pooling_fp16.cc @@ -88,7 +88,8 @@ int PoolingFp16CPUKernel::Run() { fp16_input_ = reinterpret_cast(input_tensor->data_c()); fp16_output_ = reinterpret_cast(output_tensor->data_c()); - + MS_ASSERT(fp16_input_ != nullptr); + MS_ASSERT(fp16_output_ != nullptr); int error_code = ParallelLaunch(this->ms_context_, PoolingFp16Impl, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "pooling error error_code[" << error_code << "]"; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/power_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/power_fp16.cc index 691afade3c9..ae159e6b9b4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/power_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/power_fp16.cc @@ -27,7 +27,8 @@ using mindspore::schema::PrimitiveType_PowFusion; namespace mindspore::kernel { int PowerFp16CPUKernel::Init() { - MS_ASSERT(in_tensors_.size() == 2); + CHECK_LESS_RETURN(in_tensors_.size(), 2); + CHECK_LESS_RETURN(out_tensors_.size(), 1); exp_tensor_ = in_tensors_[1]; MS_ASSERT(exp_tensor_ != nullptr); if (exp_tensor_->IsConst()) { @@ -50,7 +51,7 @@ int PowerFp16CPUKernel::GetExpData() { MS_LOG(ERROR) << "exp_data_ is nullptr"; return RET_NULL_PTR; } - auto exp = reinterpret_cast(exp_tensor_->MutableData()); + auto exp = reinterpret_cast(exp_tensor_->data_c()); if (exp == nullptr) { MS_LOG(ERROR) << "exp is nullptr!"; return RET_NULL_PTR; @@ -59,7 +60,7 @@ int PowerFp16CPUKernel::GetExpData() { exp_data_[i] = (float16_t)(exp[i]); } } else { - exp_data_ = reinterpret_cast(exp_tensor_->MutableData()); + exp_data_ = reinterpret_cast(exp_tensor_->data_c()); if (exp_data_ == nullptr) { MS_LOG(ERROR) << "exp_data_ is nullptr"; return RET_NULL_PTR; @@ -95,10 +96,8 @@ int PowerFp16CPUKernel::Run() { } int PowerFp16CPUKernel::RunImpl(int task_id) { - auto x_addr = reinterpret_cast(in_tensors_.at(0)->MutableData()); - MS_ASSERT(x_addr); - auto output_addr = reinterpret_cast(out_tensors_.at(0)->MutableData()); - MS_ASSERT(output_addr); + auto x_addr = reinterpret_cast(in_tensors_.at(0)->data_c()); + auto output_addr = reinterpret_cast(out_tensors_.at(0)->data_c()); auto size = in_tensors_.at(0)->ElementsNum(); int stride = UP_DIV(size, thread_count_); int len = MSMIN(stride, size - stride * task_id); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/quant_dtype_cast_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/quant_dtype_cast_fp16.cc index a912c60e786..1df7d4486ac 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/quant_dtype_cast_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/quant_dtype_cast_fp16.cc @@ -30,14 +30,8 @@ using mindspore::schema::PrimitiveType_QuantDTypeCast; namespace mindspore::kernel { int QuantDTypeCastFp16CPUKernel::Init() { - if (in_tensors_.size() != 1) { - MS_LOG(ERROR) << "inputs number should be 1, but " << in_tensors_.size() << " is given."; - return RET_PARAM_INVALID; - } - if (out_tensors_.size() != 1) { - MS_LOG(ERROR) << "outputs number should be 1, but " << out_tensors_.size() << " is given."; - return RET_PARAM_INVALID; - } + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); auto in_tensor = in_tensors_.front(); auto out_tensor = out_tensors_.front(); auto param = reinterpret_cast(op_parameter_); @@ -102,9 +96,9 @@ int QuantDTypeCastFp16CPUKernel::QuantDTypeCast(int task_id) { auto quant_arg = !out_tensors_.front()->quant_params().empty() ? out_tensors_.front()->quant_params().front() : in_tensors_.front()->quant_params().front(); int ret; - MS_ASSERT(float16_ptr_); + MS_ASSERT(float16_ptr_ != nullptr); if (!is_uint8_) { - MS_ASSERT(int8_ptr_); + MS_ASSERT(int8_ptr_ != nullptr); if (int_to_float_) { ret = DoDequantizeInt8ToFp16(int8_ptr_ + thread_offset, float16_ptr_ + thread_offset, quant_arg.scale, quant_arg.zeroPoint, num_unit_thread); @@ -114,7 +108,7 @@ int QuantDTypeCastFp16CPUKernel::QuantDTypeCast(int task_id) { } } else { // uint8 - MS_ASSERT(uint8_ptr_); + MS_ASSERT(uint8_ptr_ != nullptr); if (int_to_float_) { ret = DoDequantizeUInt8ToFp16(uint8_ptr_ + thread_offset, float16_ptr_ + thread_offset, quant_arg.scale, quant_arg.zeroPoint, num_unit_thread); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc index 5af2c51d44e..9973a53efab 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/reduce_fp16.cc @@ -63,9 +63,8 @@ int ReduceFp16CPUKernel::Init() { } int ReduceFp16CPUKernel::CallReduceUnit(int task_id) { - auto ret = - reducer_(outer_size_, inner_size_, axis_size_, fp16_src_data_, fp16_dst_data_, task_id, op_parameter_->thread_num_); - return ret; + return reducer_(outer_size_, inner_size_, axis_size_, fp16_src_data_, fp16_dst_data_, task_id, + op_parameter_->thread_num_); } static int ReduceFp16Impl(void *cdata, int task_id, float lhs_scale, float rhs_scale) { @@ -86,7 +85,9 @@ int ReduceFp16CPUKernel::Run() { } auto in_tensor = in_tensors_.at(0); - fp16_src_data_ = reinterpret_cast(in_tensor->MutableData()); + MS_ASSERT(in_tensor != nullptr); + fp16_src_data_ = reinterpret_cast(in_tensor->data_c()); + MS_ASSERT(fp16_src_data_ != nullptr); for (size_t i = 0; i < data_buffers_.size(); ++i) { fp16_dst_data_ = data_buffers_.at(i); outer_size_ = outer_sizes_.at(i); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.cc index 139027072a8..be8d4eb0728 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/scale_fp16.cc @@ -48,6 +48,7 @@ int ScaleFp16CPUKernel::Init() { MS_LOG(ERROR) << "inputs to Scale operator should be 2 or 3, but " << in_tensors_.size() << " is given."; return RET_ERROR; } + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; @@ -101,9 +102,12 @@ int ScaleFp16Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) { int ScaleFp16CPUKernel::Run() { auto input_tensor = in_tensors_.at(0); auto output_tensor = out_tensors_.at(0); - input_ = reinterpret_cast(input_tensor->MutableData()); - output_ = reinterpret_cast(output_tensor->MutableData()); - + MS_ASSERT(input_tensor != nullptr); + MS_ASSERT(output_tensor != nullptr); + input_ = reinterpret_cast(input_tensor->data_c()); + output_ = reinterpret_cast(output_tensor->data_c()); + MS_ASSERT(input_ != nullptr); + MS_ASSERT(output_ != nullptr); auto ret = InitScaleOffset(); if (ret != RET_OK) { MS_LOG(ERROR) << "Scale fp16 InitScaleOffset failed."; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/softmax_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/softmax_fp16.cc index 640910814f8..abc10c22e02 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/softmax_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/softmax_fp16.cc @@ -78,8 +78,8 @@ int SoftmaxFp16CPUKernel::DoSoftmaxLastAxis(int task_id) { int end = MSMIN(begin + unit, out_plane_size_); int channel = softmax_param_->input_shape_[softmax_param_->axis_]; int offset = begin * channel; - auto input_ptr = reinterpret_cast(in_tensors_.at(kInputIndex)->MutableData()); - auto output_ptr = reinterpret_cast(out_tensors_.at(kOutputIndex)->MutableData()); + auto input_ptr = reinterpret_cast(in_tensors_.at(kInputIndex)->data_c()); + auto output_ptr = reinterpret_cast(out_tensors_.at(kOutputIndex)->data_c()); SoftmaxLastAxisFp16(input_ptr + offset, output_ptr + offset, end - begin, channel); return RET_OK; } @@ -102,14 +102,14 @@ int SoftmaxFp16CPUKernel::Run() { return ret; } else { auto input_tensor = in_tensors_.at(0); - MS_ASSERT(input_tensor); + MS_ASSERT(input_tensor != nullptr); auto output_tensor = out_tensors_.at(0); - MS_ASSERT(output_tensor); + MS_ASSERT(output_tensor != nullptr); input_fp16_ = reinterpret_cast(input_tensor->data_c()); - MS_ASSERT(input_fp16_); + MS_ASSERT(input_fp16_ != nullptr); output_fp16_ = reinterpret_cast(output_tensor->data_c()); - MS_ASSERT(output_fp16_); - MS_ASSERT(sum_data_); + MS_ASSERT(output_fp16_ != nullptr); + MS_ASSERT(sum_data_ != nullptr); SoftmaxFp16(input_fp16_, output_fp16_, sum_data_, softmax_param_); } return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc index 63505d35e6c..e310e07518c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16/stack_fp16.cc @@ -73,6 +73,8 @@ void StackFp16CPUKernel::FreeBuffer() { } int StackFp16CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); data_type_size_ = sizeof(float16_t); if (!InferShapeDone()) { return RET_OK; @@ -114,7 +116,9 @@ int StackFp16CPUKernel::Run() { // if output tensor is fp32, we need to transform if (malloc_out_) { auto out_tensor = out_tensors_.at(0); - Float16ToFloat32(out_buffer_, reinterpret_cast(out_tensor->MutableData()), out_tensor->ElementsNum()); + MS_ASSERT(out_tensor != nullptr); + MS_ASSERT(out_tensor->data_c() != nullptr); + Float16ToFloat32(out_buffer_, reinterpret_cast(out_tensor->data_c()), out_tensor->ElementsNum()); } FreeBuffer(); return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/bn_fp16_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/bn_fp16_grad.cc index 87b956be941..d49759c3296 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/bn_fp16_grad.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/bn_fp16_grad.cc @@ -34,6 +34,16 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_BatchNormGrad; namespace mindspore::kernel { +namespace { +constexpr int kNumInputDim_0 = 0; +constexpr int kNumInputDim_1 = 1; +constexpr int kNumInputDim_2 = 2; +constexpr int kNumInputDim_3 = 3; +constexpr int kNumInputDim_4 = 4; +constexpr int kNumInputDim_5 = 4; +constexpr int kNumOutputDim_2 = 2; +constexpr int kNumJobs = 4; +} // namespace int BNGradCPUKernelFp16::ReSize() { auto *input_x = in_tensors_.at(1); int channels = input_x->shape().at(kNHWC_C); @@ -52,16 +62,16 @@ int BNGradCPUKernelFp16::Init() { } int BNGradCPUKernelFp16::Execute(int task_id) { - auto *input_yt = in_tensors_.at(0); - auto *input_x = in_tensors_.at(1); - auto *input_scale = in_tensors_.at(2); - auto *input_mean = in_tensors_.at(3); - auto *input_var = in_tensors_.at(4); + auto *input_yt = in_tensors_.at(kNumInputDim_0); + auto *input_x = in_tensors_.at(kNumInputDim_1); + auto *input_scale = in_tensors_.at(kNumInputDim_2); + auto *input_mean = in_tensors_.at(kNumInputDim_3); + auto *input_var = in_tensors_.at(kNumInputDim_4); auto kernel_name = this->name(); if (kernel_name.find("FusedBatchNormGradCPU") != std::string::npos) { - input_mean = in_tensors_.at(4); - input_var = in_tensors_.at(5); + input_mean = in_tensors_.at(kNumInputDim_4); + input_var = in_tensors_.at(kNumInputDim_5); } auto bn_param = reinterpret_cast(op_parameter_); int stage = stage_; @@ -71,7 +81,7 @@ int BNGradCPUKernelFp16::Execute(int task_id) { auto *output_dx = out_tensors_.at(0); auto *output_scale = out_tensors_.at(1); - auto *output_bias = out_tensors_.at(2); + auto *output_bias = out_tensors_.at(kNumOutputDim_2); int32_t batch = input_x->Batch(); int32_t channels = input_x->Channel(); int32_t spatial = input_x->Height() * input_x->Width(); @@ -91,7 +101,7 @@ int BNGradCPUKernelFp16::Execute(int task_id) { count = (count < 0) ? 0 : count; switch (stage) { case 0: { - for (int job = task_id; job < 4; job += thread_num) { + for (int job = task_id; job < kNumJobs; job += thread_num) { switch (job) { case 0: var2InvarFp16(save_var, input_var->ElementsNum(), bn_param->epsilon_); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/dropout_fp16_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/dropout_fp16_grad.cc index d9dca4254d9..9c381dd6011 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/dropout_fp16_grad.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/dropout_fp16_grad.cc @@ -41,7 +41,6 @@ int DropoutGradCPUKernelFp16::Init() { MS_LOG(ERROR) << "unsupported ratio value - Dropout ratio should be between zero to one"; return RET_ERROR; } - if (ratio >= 1.0f) { scale_ = 1.0f; } else { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/layernorm_fp16_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/layernorm_fp16_grad.cc index dce310d9fb4..441b4b42d42 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/layernorm_fp16_grad.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/layernorm_fp16_grad.cc @@ -30,6 +30,16 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_LayerNormGrad; namespace mindspore::kernel { +namespace { +constexpr int kNumInputDim_0 = 0; +constexpr int kNumInputDim_1 = 1; +constexpr int kNumInputDim_2 = 2; +constexpr int kNumInputDim_3 = 3; +constexpr int kNumInputDim_4 = 4; +constexpr int kNumOutputDim_0 = 0; +constexpr int kNumOutputDim_1 = 1; +constexpr int kNumOutputDim_2 = 2; +} // namespace int LayerNormGradCPUKernelFp16::ReSize() { return RET_OK; } int LayerNormGradCPUKernelFp16::Init() { @@ -63,14 +73,14 @@ int LayerNormGradCPUKernelFp16::Init() { } int LayerNormGradCPUKernelFp16::Execute(int task_id) { - auto input_x = in_tensors_.at(0); - auto input_dy = in_tensors_.at(1); - auto input_var = in_tensors_.at(2); - auto input_mean = in_tensors_.at(3); - auto input_gamma = in_tensors_.at(4); - auto output_dx = out_tensors_.at(0); - auto output_dg = out_tensors_.at(1); - auto output_db = out_tensors_.at(2); + auto input_x = in_tensors_.at(kNumInputDim_0); + auto input_dy = in_tensors_.at(kNumInputDim_1); + auto input_var = in_tensors_.at(kNumInputDim_2); + auto input_mean = in_tensors_.at(kNumInputDim_3); + auto input_gamma = in_tensors_.at(kNumInputDim_4); + auto output_dx = out_tensors_.at(kNumOutputDim_0); + auto output_dg = out_tensors_.at(kNumOutputDim_1); + auto output_db = out_tensors_.at(kNumOutputDim_2); float16_t *x = reinterpret_cast(input_x->data_c()); float16_t *dy = reinterpret_cast(input_dy->data_c()); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/pooling_fp16_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/pooling_fp16_grad.cc index a4d557d84ad..0f016987be8 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/pooling_fp16_grad.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/pooling_fp16_grad.cc @@ -29,24 +29,23 @@ using mindspore::schema::PrimitiveType_AvgPoolGrad; using mindspore::schema::PrimitiveType_MaxPoolGrad; namespace mindspore::kernel { +namespace { +constexpr int kNumInputDim_2 = 2; +constexpr int kNumShapeDim_2 = 2; +} // namespace int PoolingGradCPUKernelFp16::ReSize() { PoolingParameter *pool_param = reinterpret_cast(op_parameter_); - auto in_shape = in_tensors_.at(0)->shape(); auto out_shape = in_tensors_.at(1)->shape(); - if (pool_param->pool_mode_ == PoolMode_AvgPool) { - out_shape = in_tensors_.at(2)->shape(); + out_shape = in_tensors_.at(kNumInputDim_2)->shape(); } - int input_h = in_shape.at(1); - int input_w = in_shape.at(2); - + int input_w = in_shape.at(kNumShapeDim_2); if (pool_param->global_) { pool_param->window_w_ = input_w; pool_param->window_h_ = input_h; } - pool_param->input_h_ = in_shape[kNHWC_H]; pool_param->input_w_ = in_shape[kNHWC_W]; pool_param->input_batch_ = in_shape[kNHWC_N]; @@ -55,7 +54,6 @@ int PoolingGradCPUKernelFp16::ReSize() { pool_param->output_w_ = out_shape[kNHWC_W]; pool_param->output_batch_ = out_shape[kNHWC_N]; pool_param->output_channel_ = out_shape[kNHWC_C]; - return RET_OK; } @@ -73,11 +71,11 @@ int PoolingGradCPUKernelFp16::Execute(int task_id) { std::fill(output_ptr + task_id * stride * in_batch_size, output_ptr + ((task_id * stride) + count) * in_batch_size, 0.f); if (pool_param->pool_mode_ == PoolMode_MaxPool) { - auto dy_ptr = reinterpret_cast(in_tensors_.at(2)->data_c()); + auto dy_ptr = reinterpret_cast(in_tensors_.at(kNumInputDim_2)->data_c()); MaxPoolingFp16Grad(input_ptr + task_id * stride * in_batch_size, dy_ptr + task_id * stride * out_batch_size, output_ptr + task_id * stride * in_batch_size, count, pool_param); } else { - input_ptr = reinterpret_cast(in_tensors_.at(2)->data_c()); + input_ptr = reinterpret_cast(in_tensors_.at(kNumInputDim_2)->data_c()); AvgPoolingFp16Grad(input_ptr + task_id * stride * out_batch_size, output_ptr + task_id * stride * in_batch_size, count, pool_param); } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/resize_fp16_grad.cc b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/resize_fp16_grad.cc index 74175c9e9b2..6cf30d6820e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp16_grad/resize_fp16_grad.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp16_grad/resize_fp16_grad.cc @@ -46,7 +46,6 @@ int ResizeGradCPUKernelFp16::ReSize() { param->out_width_ = static_cast(out_tensors_.at(0)->Width()); param->height_scale_ = ScalingFp16(param->out_height_, param->in_height_, align_corners); param->width_scale_ = ScalingFp16(param->out_width_, param->in_width_, align_corners); - return RET_OK; } @@ -67,7 +66,6 @@ int ResizeGradCPUKernelFp16::Execute(int task_id) { } auto batch_size = in_tensors_.at(0)->Batch(); auto channel = in_tensors_.at(0)->Channel(); - if (param->method == static_cast(schema::ResizeMethod_NEAREST)) { ResizeNearestNeighborFp16Grad(in_addr, out_addr, batch_size, channel, in_tensors_.at(0)->format(), param); } else { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/activation_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/activation_fp32.cc index 436af3d4bd1..c3e00309c34 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/activation_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/activation_fp32.cc @@ -34,6 +34,8 @@ using mindspore::schema::PrimitiveType_Activation; namespace mindspore::kernel { int ActivationCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (type_ != schema::ActivationType_RELU && type_ != schema::ActivationType_RELU6 && type_ != schema::ActivationType_LEAKY_RELU && type_ != schema::ActivationType_SIGMOID && type_ != schema::ActivationType_TANH && type_ != schema::ActivationType_HSWISH && diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.cc index bc601c4163f..8bfec5f4507 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.cc @@ -31,6 +31,8 @@ using mindspore::schema::PrimitiveType_AdderFusion; namespace mindspore::kernel { int AdderCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); auto ret = InitWeightBias(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init weight bias failed."; @@ -71,13 +73,13 @@ int AdderCPUKernel::InitWeightBias() { int pack_weight_size = oc_block_num * oc_block * in_channel * kernel_plane; auto origin_weight = reinterpret_cast(filter_tensor->MutableData()); - packed_weight_ = reinterpret_cast(malloc(pack_weight_size * sizeof(float))); + packed_weight_ = malloc(pack_weight_size * sizeof(float)); if (packed_weight_ == nullptr) { MS_LOG(ERROR) << "malloc packed weight failed."; return RET_ERROR; } memset(packed_weight_, 0, pack_weight_size * sizeof(float)); - RowMajor2Col4Major(origin_weight, packed_weight_, out_channel, in_channel * kernel_plane); + RowMajor2Col4Major(origin_weight, reinterpret_cast(packed_weight_), out_channel, in_channel * kernel_plane); bias_data_ = reinterpret_cast(malloc(oc_block_num * oc_block * sizeof(float))); if (bias_data_ == nullptr) { @@ -101,8 +103,8 @@ int AdderCPUKernel::RunImpl(int task_id) { auto ori_input_data = reinterpret_cast(input_tensor->MutableData()); MS_ASSERT(ori_input_data != nullptr); auto output_addr = reinterpret_cast(out_tensors_.at(kOutputIndex)->MutableData()); - AdderFp32(ori_input_data, packed_input_, packed_weight_, reinterpret_cast(bias_data_), col_major_input_, - output_addr, task_id, conv_param_); + AdderFp32(ori_input_data, packed_input_, reinterpret_cast(packed_weight_), + reinterpret_cast(bias_data_), col_major_input_, output_addr, task_id, conv_param_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.h index 57ee60126d9..6966ef3ff69 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/adder_fp32.h @@ -30,7 +30,7 @@ class AdderCPUKernel : public ConvolutionCPUKernel { : ConvolutionCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {} ~AdderCPUKernel() override = default; - int InitWeightBias() override; + int InitWeightBias(); int Init() override; int ReSize() override; int Run() override; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/addn_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/addn_fp32.cc index a737ed2e08e..6f389144ebb 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/addn_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/addn_fp32.cc @@ -37,7 +37,11 @@ int AddNLaunch(void *cdata, int task_id, float lhs_scale, float rhs_scale) { } } // namespace -int AddNCPUKernel::Init() { return RET_OK; } +int AddNCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); + return RET_OK; +} int AddNCPUKernel::ReSize() { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc index 104a754dcca..936927c8f05 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_fp32.cc @@ -25,6 +25,8 @@ using mindspore::schema::PrimitiveType_Eltwise; namespace mindspore::kernel { int ArithmeticCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); auto primitive_type = param_->op_parameter_.type_; if (primitive_type == schema::PrimitiveType_Eltwise) { switch (param_->eltwise_mode_) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self_fp32.cc index 6a0138c4fce..d1ef6c994b7 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/arithmetic_self_fp32.cc @@ -60,6 +60,8 @@ ArithmeticSelfBoolFunc ArithmeticSelfCPUKernel::GetArithmeticSelfBoolFun(int pri } int ArithmeticSelfCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space_fp32.cc index b03b63d9701..361d58eec3a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batch_to_space_fp32.cc @@ -45,6 +45,8 @@ int BatchToSpaceCPUKernel::Processinput() { } int BatchToSpaceCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); MS_ASSERT(in_tensors_.at(0)->format() == mindspore::NHWC); if (!InferShapeDone()) { return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.cc index 8142d63c91c..c59bab81c6a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/batchnorm_fp32.cc @@ -24,6 +24,8 @@ using mindspore::schema::PrimitiveType_BatchNorm; namespace mindspore::kernel { int BatchnormCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_3D); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/bias_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/bias_fp32.cc index dbf95716557..36579758f66 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/bias_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/bias_fp32.cc @@ -47,7 +47,7 @@ int BiasCPUKernel::Run() { auto in = reinterpret_cast(in_tensors_.at(0)->MutableData()); auto bias = reinterpret_cast(in_tensors_.at(1)->MutableData()); auto out = reinterpret_cast(out_tensors_.at(0)->MutableData()); - size_t data_size = in_tensors_.at(0)->ElementsNum(); + size_t data_size = static_cast(in_tensors_.at(0)->ElementsNum()); MS_ASSERT(ms_context_->allocator != nullptr); float *tile_in = reinterpret_cast(ms_context_->allocator->Malloc(data_size * sizeof(float))); float *tile_bias = reinterpret_cast(ms_context_->allocator->Malloc(data_size * sizeof(float))); @@ -57,13 +57,15 @@ int BiasCPUKernel::Run() { ms_context_->allocator->Free(tile_bias); return RET_ERROR; } - auto ret = BroadcastAdd(in, bias, tile_in, tile_bias, out, data_size, bias_param_); + auto ret = BroadcastAdd(in, bias, tile_in, tile_bias, out, static_cast(data_size), bias_param_); ms_context_->allocator->Free(tile_in); ms_context_->allocator->Free(tile_bias); return ret; } int BiasCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.cc index cc845aff567..d747858aa00 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/broadcast_to_fp32.cc @@ -53,6 +53,8 @@ int BroadcastToCPUKernel::ReSize() { } int BroadcastToCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); shape_info_ = reinterpret_cast(malloc(sizeof(BroadcastShapeInfo))); if (shape_info_ == nullptr) { MS_LOG(ERROR) << "Malloc BroadcastShapeInfo failed!"; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.cc index d7f5a75e63b..a7dc45c170e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/cast_fp32.cc @@ -36,6 +36,8 @@ int CastRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) { } // namespace int CastCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc index a90882da439..401ba4f74c9 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/concat_fp32.cc @@ -26,6 +26,7 @@ using mindspore::schema::PrimitiveType_Concat; namespace mindspore::kernel { int ConcatCPUKernel::Init() { + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.cc index d39a7bf23b2..7f129a758ed 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.cc @@ -23,10 +23,6 @@ using mindspore::lite::RET_OK; namespace mindspore::kernel { Convolution1x1CPUKernel::~Convolution1x1CPUKernel() { FreeTmpBuffer(); - if (weight_ptr_ != nullptr) { - free(weight_ptr_); - weight_ptr_ = nullptr; - } if (matmul_param_ != nullptr) { delete matmul_param_; matmul_param_ = nullptr; @@ -67,49 +63,6 @@ void Convolution1x1CPUKernel::InitConv1x1MatmulParam() { return; } -int Convolution1x1CPUKernel::InitConv1x1BiasWeight() { - auto filter_tensor = in_tensors_.at(kWeightIndex); - auto input_channel = filter_tensor->Channel(); - if (input_channel < 0) { - MS_LOG(ERROR) << "get channel failed from filter_tensor"; - return RET_ERROR; - } - auto output_channel = filter_tensor->Batch(); - if (output_channel < 0) { - MS_LOG(ERROR) << "get batch failed from filter_tensor"; - return RET_ERROR; - } - - if (in_tensors_.size() == 3) { - int size = UP_ROUND(output_channel, col_tile_) * sizeof(float); - int weight_size = output_channel * sizeof(float); - bias_data_ = malloc(size); - if (bias_data_ == nullptr) { - MS_LOG(ERROR) << "Conv1x1 Malloc bias_ptr_ error!"; - return RET_ERROR; - } - memcpy(bias_data_, origin_bias_, weight_size); - memset(reinterpret_cast(bias_data_) + weight_size, 0, size - weight_size); - } - - int size = input_channel * UP_ROUND(output_channel, col_tile_) * sizeof(float); - int down_size = input_channel * DOWN_DIV(output_channel, col_tile_) * col_tile_ * sizeof(float); - weight_ptr_ = reinterpret_cast(malloc(size)); - if (weight_ptr_ == nullptr) { - MS_LOG(ERROR) << "Conv1x1 Malloc weight_ptr_ error!"; - return RET_ERROR; - } - memset(reinterpret_cast(weight_ptr_) + down_size, 0, size - down_size); -#ifdef ENABLE_AVX - RowMajor2Col16Major(origin_weight_, weight_ptr_, output_channel, input_channel); -#elif defined(ENABLE_ARM32) - RowMajor2Col4Major(origin_weight_, weight_ptr_, output_channel, input_channel); -#else - RowMajor2Col8Major(origin_weight_, weight_ptr_, output_channel, input_channel); -#endif - return RET_OK; -} - int Convolution1x1CPUKernel::InitConv1x1Param() { if ((matmul_param_->row_ > (row_tile_ * op_parameter_->thread_num_)) && (matmul_param_->row_ > matmul_param_->col_)) { multi_thread_by_hw_ = true; @@ -144,6 +97,8 @@ int Convolution1x1CPUKernel::InitConv1x1Param() { } int Convolution1x1CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); #ifdef ENABLE_AVX row_tile_ = C6NUM; col_tile_ = C16NUM; @@ -162,7 +117,7 @@ int Convolution1x1CPUKernel::Init() { MS_LOG(ERROR) << "Memory allocation failed"; return RET_ERROR; } - int error_code = InitConv1x1BiasWeight(); + int error_code = InitConvWeightBias(); if (error_code != RET_OK) { MS_LOG(ERROR) << "Convolution1x1 init weight and bias failed."; return error_code; @@ -187,7 +142,7 @@ int Convolution1x1CPUKernel::DoConv1x1(int task_id) { return RET_OK; } auto bias = (bias_data_ == nullptr) ? nullptr : reinterpret_cast(bias_data_) + thread_stride_ * task_id; - MatMulOpt(pack_input_, weight_ptr_ + task_id * thread_stride_ * matmul_param_->deep_, + MatMulOpt(pack_input_, reinterpret_cast(packed_weight_) + task_id * thread_stride_ * matmul_param_->deep_, output_ptr_ + task_id * thread_stride_, bias, matmul_param_->act_type_, matmul_param_->deep_, matmul_param_->row_, cur_oc, matmul_param_->col_, OutType_Nhwc); return RET_OK; @@ -218,9 +173,9 @@ int Convolution1x1CPUKernel::DoConv1x1Hw(int task_id) { for (int i = 0; i < cur_hw_; i += row_tile_) { int cur_rows = (cur_hw_ - i >= row_tile_) ? row_tile_ : (cur_hw_ - i); PackMatmulInput(cur_intput, thread_pack_input, cur_rows, matmul_param_->deep_); - MatMulOpt(thread_pack_input, weight_ptr_, cur_output, reinterpret_cast(bias_data_), - matmul_param_->act_type_, matmul_param_->deep_, cur_rows, matmul_param_->col_, matmul_param_->col_, - OutType_Nhwc); + MatMulOpt(thread_pack_input, reinterpret_cast(packed_weight_), cur_output, + reinterpret_cast(bias_data_), matmul_param_->act_type_, matmul_param_->deep_, cur_rows, + matmul_param_->col_, matmul_param_->col_, OutType_Nhwc); cur_intput += row_tile_ * matmul_param_->deep_; cur_output += row_tile_ * matmul_param_->col_; } @@ -250,8 +205,9 @@ int Convolution1x1CPUKernel::Run() { MS_LOG(ERROR) << "Conv1x1 Malloc pack_input_ error!"; return RET_MEMORY_FAILED; } - if (IsTrain() && IsTrainable()) { - PackWeight(); + if (RepackWeight() != RET_OK) { + MS_LOG(ERROR) << "Repack weight failed."; + return RET_ERROR; } for (int batch_index = 0; batch_index < conv_param_->input_batch_; batch_index++) { @@ -292,24 +248,49 @@ void Convolution1x1CPUKernel::PackWeight() { return; } auto output_channel = filter_tensor->Batch(); - if (input_channel < 0) { + if (output_channel < 0) { MS_LOG(ERROR) << "get channel failed from filter_tensor."; return; } - int size = input_channel * UP_ROUND(output_channel, col_tile_) * sizeof(float); - int down_size = input_channel * DOWN_DIV(output_channel, col_tile_) * col_tile_ * sizeof(float); - memset(reinterpret_cast(weight_ptr_) + down_size, 0, size - down_size); - MS_ASSERT(filter_tensor->data_c() != nullptr); + void *origin_weight = IsTrainable() ? filter_tensor->data_c() : origin_weight_; + MS_ASSERT(origin_weight != nullptr); #ifdef ENABLE_AVX - RowMajor2Col16Major(reinterpret_cast(filter_tensor->data_c()), weight_ptr_, output_channel, input_channel); + RowMajor2Col16Major(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), + output_channel, input_channel); #elif defined(ENABLE_ARM32) - RowMajor2Col4Major(reinterpret_cast(filter_tensor->data_c()), weight_ptr_, output_channel, input_channel); + RowMajor2Col4Major(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), + output_channel, input_channel); #else - RowMajor2Col8Major(reinterpret_cast(filter_tensor->data_c()), weight_ptr_, output_channel, input_channel); + RowMajor2Col8Major(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), + output_channel, input_channel); #endif } +int Convolution1x1CPUKernel::MallocWeightBiasData() { + auto filter_tensor = in_tensors_.at(kWeightIndex); + auto input_channel = filter_tensor->Channel(); + auto output_channel = filter_tensor->Batch(); + int size = input_channel * UP_ROUND(output_channel, col_tile_) * sizeof(float); + packed_weight_ = malloc(size); + if (packed_weight_ == nullptr) { + MS_LOG(ERROR) << "Conv1x1 Malloc packed_weight_ error!"; + return RET_ERROR; + } + memset(reinterpret_cast(packed_weight_), 0, size); + + if (in_tensors_.size() == 3) { + size = UP_ROUND(output_channel, col_tile_) * sizeof(float); + bias_data_ = malloc(size); + if (bias_data_ == nullptr) { + MS_LOG(ERROR) << "Conv1x1 Malloc bias_ptr_ error!"; + return RET_ERROR; + } + memset(reinterpret_cast(bias_data_), 0, size); + } + return RET_OK; +} + int Convolution1x1CPUKernel::Eval() { auto ret = InnerKernel::Eval(); if (ret != RET_OK) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.h index 22b054afe4a..19d3d040ec7 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_1x1_fp32.h @@ -35,9 +35,7 @@ class Convolution1x1CPUKernel : public ConvolutionBaseCPUKernel { Convolution1x1CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx, float *origin_weight, float *origin_bias) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx), - origin_weight_(origin_weight), - origin_bias_(origin_bias) {} + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias) {} ~Convolution1x1CPUKernel(); int Init() override; int Run() override; @@ -50,11 +48,11 @@ class Convolution1x1CPUKernel : public ConvolutionBaseCPUKernel { private: int InitConv1x1Param(); - int InitConv1x1BiasWeight(); void InitConv1x1MatmulParam(); + int MallocWeightBiasData() override; + void PackWeight() override; void FreeTmpBuffer(); void PackMatmulInput(const float *src_ptr, float *dst_ptr, int row, int col) const; - void PackWeight(); private: MatMulParameter *matmul_param_ = nullptr; @@ -62,9 +60,6 @@ class Convolution1x1CPUKernel : public ConvolutionBaseCPUKernel { bool multi_thread_by_hw_ = false; int thread_count_ = 0; int thread_stride_ = 0; - float *origin_weight_; // do not free - float *origin_bias_; // do not free - float *weight_ptr_ = nullptr; float *pack_input_ = nullptr; float *input_ptr_ = nullptr; float *output_ptr_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc index 72140e99963..27411fbb226 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.cc @@ -73,6 +73,9 @@ int ConvolutionDelegateCPUKernel::GetWeightAndBias() { } int ConvolutionDelegateCPUKernel::GetWeightData() { + if (in_tensors_.at(kWeightIndex)->data_c() == nullptr) { + return RET_OK; + } if (InferShapeDone()) { origin_weight_ = reinterpret_cast(in_tensors_.at(kWeightIndex)->data_c()); MS_ASSERT(origin_weight_ != nullptr); @@ -107,6 +110,8 @@ int ConvolutionDelegateCPUKernel::GetBiasData() { } int ConvolutionDelegateCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); auto ret = GetWeightAndBias(); if (ret != RET_OK) { MS_LOG(ERROR) << "Get weight and bias failed."; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.h index d41f0896423..b97e2ba0ec4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_delegate_fp32.h @@ -40,7 +40,7 @@ class ConvolutionDelegateCPUKernel : public InnerKernel { int ReSize() override; int Run() override { return conv_kernel_->Run(); } - void set_in_tensor(lite::Tensor *in_tensor, int index) override { + void set_in_tensor(lite::Tensor *in_tensor, size_t index) override { MS_ASSERT(index < in_tensors_.size()); this->in_tensors_[index] = in_tensor; if (conv_kernel_ != nullptr) { @@ -48,7 +48,7 @@ class ConvolutionDelegateCPUKernel : public InnerKernel { } } - void set_out_tensor(lite::Tensor *out_tensor, int index) override { + void set_out_tensor(lite::Tensor *out_tensor, size_t index) override { MS_ASSERT(index < out_tensors_.size()); this->out_tensors_[index] = out_tensor; if (conv_kernel_ != nullptr) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.cc index 6973da5b212..d7e090ba343 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.cc @@ -24,51 +24,12 @@ using mindspore::lite::RET_MEMORY_FAILED; using mindspore::lite::RET_OK; namespace mindspore::kernel { -ConvolutionDepthwise3x3CPUKernel::~ConvolutionDepthwise3x3CPUKernel() { - if (packed_weight_ != nullptr) { - free(packed_weight_); - packed_weight_ = nullptr; - } -} - -int ConvolutionDepthwise3x3CPUKernel::InitWeightBias() { - // init weight: k, h, w, c; k == group == output_channel, c == 1 - auto weight_tensor = in_tensors_[kWeightIndex]; - auto origin_weight = reinterpret_cast(weight_tensor->data_c()); - int channel = weight_tensor->Batch(); - int c4 = UP_ROUND(channel, C4NUM); - int pack_weight_size = c4 * C12NUM; - - if (packed_weight_ == nullptr) { - packed_weight_ = reinterpret_cast(malloc(pack_weight_size * sizeof(float))); - if (packed_weight_ == nullptr) { - MS_LOG(ERROR) << "Malloc buffer failed."; - return RET_ERROR; - } - } - PackWeightConvDw3x3Fp32(origin_weight, packed_weight_, channel); - - if (bias_data_ == nullptr) { - bias_data_ = reinterpret_cast(malloc(c4 * sizeof(float))); - if (bias_data_ == nullptr) { - MS_LOG(ERROR) << "Malloc buffer failed."; - return RET_ERROR; - } - } - memset(bias_data_, 0, c4 * sizeof(float)); - if (in_tensors_.size() == kInputSize2) { - auto bias_tensor = in_tensors_[kBiasIndex]; - auto ori_bias = reinterpret_cast(bias_tensor->data_c()); - memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(float)); - } - - return RET_OK; -} - int ConvolutionDepthwise3x3CPUKernel::Init() { - auto ret = InitWeightBias(); + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); + auto ret = InitConvWeightBias(); if (ret != RET_OK) { - MS_LOG(ERROR) << "Convolution depthwise 3x3 fp32 InitWeightBias failed."; + MS_LOG(ERROR) << "Convolution depthwise 3x3 fp32 InitConvWeightBias failed."; return RET_ERROR; } if (!InferShapeDone()) { @@ -98,8 +59,8 @@ int ConvolutionDepthwise3x3CPUKernel::Execute(int task_id) { int step_oh = UP_DIV(conv_param_->output_h_, conv_param_->thread_num_); int start_oh = step_oh * task_id; int end_oh = MSMIN(start_oh + step_oh, conv_param_->output_h_); - ConvDw3x3(output_ptr_, buffer, input_ptr_, packed_weight_, reinterpret_cast(bias_data_), conv_param_, - start_oh, end_oh); + ConvDw3x3(output_ptr_, buffer, input_ptr_, reinterpret_cast(packed_weight_), + reinterpret_cast(bias_data_), conv_param_, start_oh, end_oh); return RET_OK; } @@ -122,13 +83,10 @@ int ConvolutionDepthwise3x3CPUKernel::Run() { MS_LOG(ERROR) << "ConvDw3x3Run failed to allocate buffer"; return RET_MEMORY_FAILED; } - - if (IsTrain() && IsTrainable()) { - if (InitWeightBias() != RET_OK) { - ctx_->allocator->Free(buffer_); - MS_LOG(ERROR) << "Convolution depthwise 3x3 run InitWeightBias failed."; - return RET_ERROR; - } + if (RepackWeight() != RET_OK) { + MS_LOG(ERROR) << "Repack weight failed."; + ctx_->allocator->Free(buffer_); + return RET_ERROR; } auto input_tensor = in_tensors_.at(kInputIndex); @@ -153,12 +111,44 @@ int ConvolutionDepthwise3x3CPUKernel::Eval() { return ret; } if (IsTrainable()) { - if (InitWeightBias() != RET_OK) { + if (InitConvWeightBias() != RET_OK) { MS_LOG(ERROR) << "Convolution depthwise 3x3 fp32 Eval:InitWeightBias failed."; return RET_ERROR; } } return RET_OK; } + +void ConvolutionDepthwise3x3CPUKernel::PackWeight() { + auto weight_tensor = in_tensors_.at(kWeightIndex); + int channel = weight_tensor->Batch(); + void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_; + MS_ASSERT(origin_weight != nullptr); + PackWeightConvDw3x3Fp32(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), channel); +} + +int ConvolutionDepthwise3x3CPUKernel::MallocWeightBiasData() { + auto weight_tensor = in_tensors_.at(kWeightIndex); + int channel = weight_tensor->Batch(); + int c4 = UP_ROUND(channel, C4NUM); + if (packed_weight_ == nullptr) { + int pack_weight_size = c4 * C12NUM; + packed_weight_ = malloc(pack_weight_size * sizeof(float)); + if (packed_weight_ == nullptr) { + MS_LOG(ERROR) << "Malloc buffer failed."; + return RET_ERROR; + } + } + + if (bias_data_ == nullptr) { + bias_data_ = malloc(c4 * sizeof(float)); + if (bias_data_ == nullptr) { + MS_LOG(ERROR) << "Malloc buffer failed."; + return RET_ERROR; + } + } + memset(bias_data_, 0, c4 * sizeof(float)); + return RET_OK; +} } // namespace mindspore::kernel #endif diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.h index 57baad587d4..bbed4403552 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_3x3_fp32.h @@ -28,19 +28,20 @@ class ConvolutionDepthwise3x3CPUKernel : public ConvolutionBaseCPUKernel { public: ConvolutionDepthwise3x3CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} - ~ConvolutionDepthwise3x3CPUKernel() override; + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(), + inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {} + ~ConvolutionDepthwise3x3CPUKernel() override {} int Init() override; int ReSize() override; int Run() override; - int InitWeightBias(); int Execute(int task_id); int Eval() override; private: - float *packed_weight_ = nullptr; + int MallocWeightBiasData() override; + void PackWeight() override; float *input_ptr_ = nullptr; float *output_ptr_ = nullptr; float *buffer_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.cc index 5e4ff8f7270..5f3d171a311 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.cc @@ -22,51 +22,12 @@ using mindspore::lite::RET_INFER_INVALID; using mindspore::lite::RET_OK; namespace mindspore::kernel { -ConvolutionDepthwiseCPUKernel::~ConvolutionDepthwiseCPUKernel() { - if (packed_weight_ != nullptr) { - free(packed_weight_); - packed_weight_ = nullptr; - } -} - -int ConvolutionDepthwiseCPUKernel::InitWeightBias() { - // init weight: k, h, w, c; k == group == output_channel, c == 1 - auto weight_tensor = in_tensors_.at(kWeightIndex); - auto origin_weight = reinterpret_cast(weight_tensor->data_c()); - MS_ASSERT(origin_weight != nullptr); - int channel = weight_tensor->Batch(); - int pack_weight_size = channel * weight_tensor->Height() * weight_tensor->Width(); - if (pack_weight_size >= std::numeric_limits::max() / static_cast(sizeof(float))) { - MS_LOG(ERROR) << "pack_weight_size is invalid, pack_weight_size: " << pack_weight_size; - return RET_ERROR; - } - packed_weight_ = reinterpret_cast(malloc(pack_weight_size * sizeof(float))); - if (packed_weight_ == nullptr) { - MS_LOG(ERROR) << "Malloc buffer failed."; - return RET_ERROR; - } - PackWeightKHWToHWKFp32(origin_weight, packed_weight_, weight_tensor->Height() * weight_tensor->Width(), channel); - - bias_data_ = reinterpret_cast(malloc(channel * sizeof(float))); - if (bias_data_ == nullptr) { - MS_LOG(ERROR) << "Malloc buffer failed."; - return RET_ERROR; - } - - memset(bias_data_, 0, channel * sizeof(float)); - if (in_tensors_.size() == kInputSize2) { - auto bias_tensor = in_tensors_[kBiasIndex]; - auto ori_bias = reinterpret_cast(bias_tensor->data_c()); - memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(float)); - } - - return RET_OK; -} - int ConvolutionDepthwiseCPUKernel::Init() { - auto ret = InitWeightBias(); + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); + auto ret = InitConvWeightBias(); if (ret != RET_OK) { - MS_LOG(ERROR) << "Convolution depthwise fp32 InitWeightBias failed."; + MS_LOG(ERROR) << "Convolution depthwise fp32 InitConvWeightBias failed."; return RET_ERROR; } if (!InferShapeDone()) { @@ -90,8 +51,8 @@ int ConvolutionDepthwiseCPUKernel::ReSize() { } int ConvolutionDepthwiseCPUKernel::Execute(int task_id) { - auto ret = - ConvDw(output_ptr_, input_ptr_, packed_weight_, reinterpret_cast(bias_data_), conv_param_, task_id); + auto ret = ConvDw(output_ptr_, input_ptr_, reinterpret_cast(packed_weight_), + reinterpret_cast(bias_data_), conv_param_, task_id); return ret; } @@ -106,8 +67,9 @@ int ConvDwRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) { } int ConvolutionDepthwiseCPUKernel::Run() { - if (IsTrain() && IsTrainable()) { - PackWeight(); + if (RepackWeight() != RET_OK) { + MS_LOG(ERROR) << "Repack weight failed."; + return RET_ERROR; } auto input_tensor = in_tensors_.at(kInputIndex); @@ -127,11 +89,33 @@ int ConvolutionDepthwiseCPUKernel::Run() { void ConvolutionDepthwiseCPUKernel::PackWeight() { auto weight_tensor = in_tensors_.at(kWeightIndex); - auto origin_weight = reinterpret_cast(weight_tensor->data_c()); + void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_; MS_ASSERT(origin_weight != nullptr); + PackWeightKHWToHWKFp32(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), + weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch()); +} - PackWeightKHWToHWKFp32(origin_weight, packed_weight_, weight_tensor->Height() * weight_tensor->Width(), - weight_tensor->Batch()); +int ConvolutionDepthwiseCPUKernel::MallocWeightBiasData() { + auto weight_tensor = in_tensors_.at(kWeightIndex); + int channel = weight_tensor->Batch(); + int pack_weight_size = weight_tensor->Batch() * weight_tensor->Height() * weight_tensor->Width(); + if (pack_weight_size >= std::numeric_limits::max() / static_cast(sizeof(float))) { + MS_LOG(ERROR) << "pack_weight_size is invalid, pack_weight_size: " << pack_weight_size; + return RET_ERROR; + } + packed_weight_ = malloc(pack_weight_size * sizeof(float)); + if (packed_weight_ == nullptr) { + MS_LOG(ERROR) << "Malloc buffer failed."; + return RET_ERROR; + } + + bias_data_ = malloc(channel * sizeof(float)); + if (bias_data_ == nullptr) { + MS_LOG(ERROR) << "Malloc buffer failed."; + return RET_ERROR; + } + memset(bias_data_, 0, channel * sizeof(float)); + return RET_OK; } int ConvolutionDepthwiseCPUKernel::Eval() { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.h index 652d87eb798..622fe326136 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.h @@ -28,20 +28,20 @@ class ConvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel { public: ConvolutionDepthwiseCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} - ~ConvolutionDepthwiseCPUKernel() override; + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(), + inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {} + ~ConvolutionDepthwiseCPUKernel() override {} int Init() override; int ReSize() override; int Run() override; - int InitWeightBias(); int Execute(int task_id); int Eval() override; private: - void PackWeight(); - float *packed_weight_ = nullptr; + int MallocWeightBiasData() override; + void PackWeight() override; float *input_ptr_ = nullptr; float *output_ptr_ = nullptr; }; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.cc index bbbfb934bec..66ef6c781cb 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.cc @@ -23,10 +23,6 @@ using mindspore::lite::RET_OK; namespace mindspore::kernel { ConvolutionDepthwiseIndirectCPUKernel::~ConvolutionDepthwiseIndirectCPUKernel() { - if (packed_weight_ != nullptr) { - free(packed_weight_); - packed_weight_ = nullptr; - } if (zero_ptr_ != nullptr) { free(zero_ptr_); zero_ptr_ = nullptr; @@ -37,60 +33,12 @@ ConvolutionDepthwiseIndirectCPUKernel::~ConvolutionDepthwiseIndirectCPUKernel() } } -int ConvolutionDepthwiseIndirectCPUKernel::InitWeightBias() { - // init weight: o, h, w, i; o == group, i == 1 - auto weight_tensor = in_tensors_[kWeightIndex]; - auto origin_weight = reinterpret_cast(weight_tensor->data_c()); - MS_ASSERT(origin_weight != nullptr); -#ifdef ENABLE_AVX - int div_flag = C8NUM; -#else - int div_flag = C4NUM; -#endif - int batch_flag = UP_DIV(weight_tensor->Batch(), div_flag); - int pack_weight_size = div_flag * batch_flag * weight_tensor->Height() * weight_tensor->Width(); - - packed_weight_ = reinterpret_cast(malloc(pack_weight_size * sizeof(float))); - if (packed_weight_ == nullptr) { - MS_LOG(ERROR) << "Malloc buffer failed."; - return RET_ERROR; - } -#ifdef ENABLE_AVX - PackDepthwiseIndirectWeightC8Fp32(origin_weight, packed_weight_, weight_tensor->Height(), weight_tensor->Width(), - weight_tensor->Batch()); -#else - PackDepthwiseIndirectWeightC4Fp32(origin_weight, packed_weight_, weight_tensor->Height(), weight_tensor->Width(), - weight_tensor->Batch()); -#endif - - bias_data_ = reinterpret_cast(malloc(batch_flag * div_flag * sizeof(float))); - if (bias_data_ == nullptr) { - MS_LOG(ERROR) << "Malloc buffer failed."; - return RET_ERROR; - } - - if (in_tensors_.size() == kInputSize2) { - auto bias_tensor = in_tensors_[kBiasIndex]; - auto ori_bias = reinterpret_cast(bias_tensor->data_c()); - memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(float)); - } else { - memset(bias_data_, 0, batch_flag * div_flag * sizeof(float)); - } - - // malloc zero ptr - zero_ptr_ = reinterpret_cast(malloc(batch_flag * div_flag * sizeof(float))); - if (zero_ptr_ == nullptr) { - MS_LOG(ERROR) << "Malloc buffer failed."; - return RET_ERROR; - } - memset(zero_ptr_, 0, batch_flag * div_flag * sizeof(float)); - return RET_OK; -} - int ConvolutionDepthwiseIndirectCPUKernel::Init() { - auto ret = InitWeightBias(); + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); + auto ret = InitConvWeightBias(); if (ret != 0) { - MS_LOG(ERROR) << "Convolution depthwise Indirect fp32 InitWeightBias failed."; + MS_LOG(ERROR) << "Convolution depthwise Indirect fp32 InitConvWeightBias failed."; return RET_ERROR; } if (!InferShapeDone()) { @@ -137,8 +85,8 @@ int ConvolutionDepthwiseIndirectCPUKernel::ReSize() { } int ConvolutionDepthwiseIndirectCPUKernel::Execute(int task_id) { - ConvDwIndirection(output_ptr_, indirect_buffer_, packed_weight_, reinterpret_cast(bias_data_), zero_ptr_, - conv_param_, task_id); + ConvDwIndirection(output_ptr_, indirect_buffer_, reinterpret_cast(packed_weight_), + reinterpret_cast(bias_data_), zero_ptr_, conv_param_, task_id); return RET_OK; } @@ -193,11 +141,10 @@ int ConvolutionDepthwiseIndirectCPUKernel::Run() { } else { packed_input_ = input_ptr; } - - if (IsTrain() && IsTrainable()) { - PackWeight(); + if (RepackWeight() != RET_OK) { + MS_LOG(ERROR) << "Repack weight failed."; + return RET_ERROR; } - auto output_tensor = out_tensors_.at(kOutputIndex); output_ptr_ = reinterpret_cast(output_tensor->data_c()); MS_ASSERT(output_ptr_ != nullptr); @@ -215,18 +162,49 @@ int ConvolutionDepthwiseIndirectCPUKernel::Run() { } void ConvolutionDepthwiseIndirectCPUKernel::PackWeight() { - auto weight_tensor = in_tensors_[kWeightIndex]; - auto origin_weight = reinterpret_cast(weight_tensor->data_c()); + auto weight_tensor = in_tensors_.at(kWeightIndex); + void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_; MS_ASSERT(origin_weight != nullptr); #ifdef ENABLE_AVX - PackDepthwiseIndirectWeightC8Fp32(origin_weight, packed_weight_, weight_tensor->Height(), weight_tensor->Width(), - weight_tensor->Batch()); + PackDepthwiseIndirectWeightC8Fp32(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), + weight_tensor->Height(), weight_tensor->Width(), weight_tensor->Batch()); #else - PackDepthwiseIndirectWeightC4Fp32(origin_weight, packed_weight_, weight_tensor->Height(), weight_tensor->Width(), - weight_tensor->Batch()); + PackDepthwiseIndirectWeightC4Fp32(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), + weight_tensor->Height(), weight_tensor->Width(), weight_tensor->Batch()); #endif } +int ConvolutionDepthwiseIndirectCPUKernel::MallocWeightBiasData() { + auto weight_tensor = in_tensors_[kWeightIndex]; +#ifdef ENABLE_AVX + int div_flag = C8NUM; +#else + int div_flag = C4NUM; +#endif + int batch_flag = UP_DIV(weight_tensor->Batch(), div_flag); + int pack_weight_size = div_flag * batch_flag * weight_tensor->Height() * weight_tensor->Width(); + packed_weight_ = malloc(pack_weight_size * sizeof(float)); + if (packed_weight_ == nullptr) { + MS_LOG(ERROR) << "Malloc buffer failed."; + return RET_ERROR; + } + bias_data_ = malloc(batch_flag * div_flag * sizeof(float)); + if (bias_data_ == nullptr) { + MS_LOG(ERROR) << "Malloc buffer failed."; + return RET_ERROR; + } + memset(bias_data_, 0, batch_flag * div_flag * sizeof(float)); + + // malloc zero ptr + zero_ptr_ = reinterpret_cast(malloc(batch_flag * div_flag * sizeof(float))); + if (zero_ptr_ == nullptr) { + MS_LOG(ERROR) << "Malloc buffer failed."; + return RET_ERROR; + } + memset(zero_ptr_, 0, batch_flag * div_flag * sizeof(float)); + return RET_OK; +} + int ConvolutionDepthwiseIndirectCPUKernel::Eval() { auto ret = InnerKernel::Eval(); if (ret != RET_OK) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.h index 80820456d10..1f404d5c5c4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.h @@ -27,26 +27,26 @@ class ConvolutionDepthwiseIndirectCPUKernel : public ConvolutionBaseCPUKernel { public: ConvolutionDepthwiseIndirectCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(), + inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {} ~ConvolutionDepthwiseIndirectCPUKernel() override; int Init() override; int ReSize() override; int Run() override; - int InitWeightBias(); int Execute(int task_id); int Eval() override; private: int MallocIndirectBuffer(); int MallocPackedInput(); - void PackWeight(); + int MallocWeightBiasData() override; + void PackWeight() override; int step_w = 0; int step_h = 0; float **indirect_buffer_ = nullptr; float *zero_ptr_ = nullptr; - float *packed_weight_ = nullptr; float *output_ptr_ = nullptr; float *packed_input_ = nullptr; }; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc index 8c4486f2068..b8f0475f921 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.cc @@ -27,48 +27,6 @@ ConvolutionDepthwiseSWCPUKernel::~ConvolutionDepthwiseSWCPUKernel() { delete sliding_; sliding_ = nullptr; } - if (packed_weight_ != nullptr) { - free(packed_weight_); - packed_weight_ = nullptr; - } -} - -int ConvolutionDepthwiseSWCPUKernel::InitWeightBias() { - // init weight: o, h, w, i; o == group, i == 1 - auto weight_tensor = in_tensors_.at(kWeightIndex); - auto origin_weight = reinterpret_cast(weight_tensor->data_c()); - MS_ASSERT(origin_weight != nullptr); - int OC4 = UP_DIV(weight_tensor->Batch(), C4NUM); - int pack_weight_size = C4NUM * OC4 * weight_tensor->Height() * weight_tensor->Width(); - - packed_weight_ = reinterpret_cast(malloc(pack_weight_size * sizeof(float))); - if (packed_weight_ == nullptr) { - MS_LOG(ERROR) << "Malloc buffer failed."; - return RET_ERROR; - } - PackNCHWToNC4HW4Fp32(origin_weight, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(), - weight_tensor->Batch()); - - int malloc_size = MSMAX(conv_param_->output_channel_, C4NUM * OC4); - if (malloc_size <= 0) { - MS_LOG(ERROR) << "malloc size is wrong"; - return RET_ERROR; - } - bias_data_ = reinterpret_cast(malloc(malloc_size * sizeof(float))); - if (bias_data_ == nullptr) { - MS_LOG(ERROR) << "Malloc buffer failed."; - return RET_ERROR; - } - - memset(bias_data_, 0, malloc_size * sizeof(float)); - if (in_tensors_.size() == kInputSize2) { - auto bias_tensor = in_tensors_.at(kBiasIndex); - auto ori_bias = reinterpret_cast(bias_tensor->data_c()); - memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(float)); - } - - conv_param_->thread_num_ = MSMIN(thread_count_, OC4); - return RET_OK; } int ConvolutionDepthwiseSWCPUKernel::InitPackedInputOutput() { @@ -94,15 +52,17 @@ int ConvolutionDepthwiseSWCPUKernel::InitPackedInputOutput() { } int ConvolutionDepthwiseSWCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); sliding_ = new (std::nothrow) SlidingWindowParam; if (sliding_ == nullptr) { MS_LOG(ERROR) << "new sliding window param failed."; return RET_ERROR; } - auto ret = InitWeightBias(); + auto ret = InitConvWeightBias(); if (ret != 0) { - MS_LOG(ERROR) << "Convolution depthwise fp32 InitWeightBias failed."; + MS_LOG(ERROR) << "Convolution depthwise fp32 InitConvWeightBias failed."; return RET_ERROR; } if (!InferShapeDone()) { @@ -127,8 +87,8 @@ int ConvolutionDepthwiseSWCPUKernel::ReSize() { } int ConvolutionDepthwiseSWCPUKernel::Execute(int task_id) { - ConvDwSWFp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast(bias_data_), conv_param_, - sliding_, task_id); + ConvDwSWFp32(packed_output_, packed_input_, reinterpret_cast(packed_weight_), + reinterpret_cast(bias_data_), conv_param_, sliding_, task_id); return RET_OK; } @@ -149,9 +109,9 @@ int ConvolutionDepthwiseSWCPUKernel::Run() { FreePackedInputOutput(); return RET_ERROR; } - - if (IsTrain() && IsTrainable()) { - PackWeight(); + if (RepackWeight() != RET_OK) { + MS_LOG(ERROR) << "Repack weight failed."; + return RET_ERROR; } auto input_tensor = in_tensors_.at(kInputIndex); @@ -195,10 +155,34 @@ void ConvolutionDepthwiseSWCPUKernel::FreePackedInputOutput() { void ConvolutionDepthwiseSWCPUKernel::PackWeight() { auto weight_tensor = in_tensors_.at(kWeightIndex); - auto origin_weight = reinterpret_cast(weight_tensor->data_c()); + void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_; MS_ASSERT(origin_weight != nullptr); - PackNCHWToNC4HW4Fp32(origin_weight, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(), - weight_tensor->Batch()); + PackNCHWToNC4HW4Fp32(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), 1, + weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch()); +} + +int ConvolutionDepthwiseSWCPUKernel::MallocWeightBiasData() { + auto weight_tensor = in_tensors_.at(kWeightIndex); + int OC4 = UP_DIV(weight_tensor->Batch(), C4NUM); + int pack_weight_size = C4NUM * OC4 * weight_tensor->Height() * weight_tensor->Width(); + packed_weight_ = malloc(pack_weight_size * sizeof(float)); + if (packed_weight_ == nullptr) { + MS_LOG(ERROR) << "Malloc buffer failed."; + return RET_ERROR; + } + int malloc_size = MSMAX(conv_param_->output_channel_, C4NUM * OC4); + if (malloc_size <= 0) { + MS_LOG(ERROR) << "malloc size is wrong"; + return RET_ERROR; + } + bias_data_ = malloc(malloc_size * sizeof(float)); + if (bias_data_ == nullptr) { + MS_LOG(ERROR) << "Malloc buffer failed."; + return RET_ERROR; + } + memset(bias_data_, 0, malloc_size * sizeof(float)); + conv_param_->thread_num_ = MSMIN(thread_count_, OC4); + return RET_OK; } int ConvolutionDepthwiseSWCPUKernel::Eval() { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h index 690096fc113..f5294723bef 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h @@ -27,23 +27,23 @@ class ConvolutionDepthwiseSWCPUKernel : public ConvolutionBaseCPUKernel { public: ConvolutionDepthwiseSWCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(), + inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {} ~ConvolutionDepthwiseSWCPUKernel() override; int Init() override; int ReSize() override; int Run() override; - int InitWeightBias(); int Execute(int task_id); int Eval() override; private: int InitPackedInputOutput(); void FreePackedInputOutput(); - void PackWeight(); + int MallocWeightBiasData() override; + void PackWeight() override; SlidingWindowParam *sliding_ = nullptr; - float *packed_weight_ = nullptr; float *packed_input_ = nullptr; float *packed_output_ = nullptr; bool need_align_ = false; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.cc index 5f79d7cbcf1..2ffecf8d98c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.cc @@ -28,43 +28,6 @@ ConvolutionDepthwiseSWCPUKernelX86::~ConvolutionDepthwiseSWCPUKernelX86() { delete sliding_; sliding_ = nullptr; } - if (packed_weight_ != nullptr) { - free(packed_weight_); - packed_weight_ = nullptr; - } - if (packed_bias_ != nullptr) { - free(packed_bias_); - packed_bias_ = nullptr; - } -} - -int ConvolutionDepthwiseSWCPUKernelX86::InitWeightBias() { - // init weight: o, h, w, i; o == group, i == 1 - auto weight_tensor = in_tensors_.at(kWeightIndex); - origin_weight_ = reinterpret_cast(weight_tensor->data_c()); - MS_ASSERT(origin_weight_ != nullptr); - int oc_algin = UP_DIV(weight_tensor->Batch(), oc_tile_); - int pack_weight_size = oc_algin * oc_tile_ * weight_tensor->Height() * weight_tensor->Width(); - packed_weight_ = reinterpret_cast(malloc(pack_weight_size * sizeof(float))); - if (packed_weight_ == nullptr) { - MS_LOG(ERROR) << "Malloc packed_weight_ is failed!"; - return RET_NULL_PTR; - } - PackNHWCToNXHWCXFp32(weight_tensor->Height(), weight_tensor->Width(), weight_tensor->Batch(), oc_algin, - weight_tensor->Channel(), packed_weight_, origin_weight_); - if (in_tensors_.size() == kInputSize2) { - auto bias_size = oc_algin * oc_tile_; - auto bias_tensor = in_tensors_.at(kBiasIndex); - auto ori_bias = reinterpret_cast(bias_tensor->data_c()); - packed_bias_ = reinterpret_cast(malloc(bias_size * sizeof(float))); - if (packed_bias_ == nullptr) { - MS_LOG(ERROR) << "Malloc bias_data buffer failed."; - return RET_NULL_PTR; - } - memset(packed_bias_, 0, bias_size * sizeof(float)); - memcpy(packed_bias_, ori_bias, bias_tensor->ElementsNum() * sizeof(float)); - } - return RET_OK; } int ConvolutionDepthwiseSWCPUKernelX86::InitPackedInputOutput() { @@ -94,6 +57,8 @@ int ConvolutionDepthwiseSWCPUKernelX86::InitPackedInputOutput() { } int ConvolutionDepthwiseSWCPUKernelX86::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); #ifdef ENABLE_AVX oc_tile_ = C8NUM; #endif @@ -103,9 +68,9 @@ int ConvolutionDepthwiseSWCPUKernelX86::Init() { return RET_ERROR; } - auto ret = InitWeightBias(); + auto ret = InitConvWeightBias(); if (ret != 0) { - MS_LOG(ERROR) << "Convolution depthwise fp32 InitWeightBias failed."; + MS_LOG(ERROR) << "Convolution depthwise fp32 InitConvWeightBias failed."; return RET_ERROR; } if (!InferShapeDone()) { @@ -121,8 +86,8 @@ int ConvolutionDepthwiseSWCPUKernelX86::ReSize() { } int ConvolutionDepthwiseSWCPUKernelX86::Execute(int task_id) { - DepthwiseSWAvxFp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast(packed_bias_), - conv_param_, sliding_, task_id); + DepthwiseSWAvxFp32(packed_output_, packed_input_, reinterpret_cast(packed_weight_), + reinterpret_cast(bias_data_), conv_param_, sliding_, task_id); return RET_OK; } @@ -143,11 +108,10 @@ int ConvolutionDepthwiseSWCPUKernelX86::Run() { FreePackedInputOutput(); return RET_ERROR; } - - if (IsTrain() && IsTrainable()) { - PackWeight(); + if (RepackWeight() != RET_OK) { + MS_LOG(ERROR) << "Repack weight failed."; + return RET_ERROR; } - auto input_tensor = in_tensors_.at(kInputIndex); auto input_ptr = reinterpret_cast(input_tensor->data_c()); MS_ASSERT(input_ptr != nullptr); @@ -194,8 +158,33 @@ void ConvolutionDepthwiseSWCPUKernelX86::FreePackedInputOutput() { void ConvolutionDepthwiseSWCPUKernelX86::PackWeight() { auto weight_tensor = in_tensors_.at(kWeightIndex); int oc_algin = UP_DIV(weight_tensor->Batch(), oc_tile_); + void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_; + MS_ASSERT(origin_weight != nullptr); PackNHWCToNXHWCXFp32(weight_tensor->Height(), weight_tensor->Width(), weight_tensor->Batch(), oc_algin, - weight_tensor->Channel(), packed_weight_, origin_weight_); + weight_tensor->Channel(), reinterpret_cast(packed_weight_), + reinterpret_cast(origin_weight)); +} + +int ConvolutionDepthwiseSWCPUKernelX86::MallocWeightBiasData() { + auto weight_tensor = in_tensors_.at(kWeightIndex); + int oc_algin = UP_DIV(weight_tensor->Batch(), oc_tile_); + int pack_weight_size = oc_algin * oc_tile_ * weight_tensor->Height() * weight_tensor->Width(); + packed_weight_ = malloc(pack_weight_size * sizeof(float)); + if (packed_weight_ == nullptr) { + MS_LOG(ERROR) << "Malloc packed_weight_ is failed!"; + return RET_NULL_PTR; + } + + if (in_tensors_.size() == kInputSize2) { + auto bias_size = oc_algin * oc_tile_; + bias_data_ = malloc(bias_size * sizeof(float)); + if (bias_data_ == nullptr) { + MS_LOG(ERROR) << "Malloc bias_data buffer failed."; + return RET_NULL_PTR; + } + memset(bias_data_, 0, bias_size * sizeof(float)); + } + return RET_OK; } int ConvolutionDepthwiseSWCPUKernelX86::Eval() { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.h index fe060df82a7..c4bc1ffed67 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_x86_fp32.h @@ -27,28 +27,26 @@ class ConvolutionDepthwiseSWCPUKernelX86 : public ConvolutionBaseCPUKernel { public: ConvolutionDepthwiseSWCPUKernelX86(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(), + inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {} ~ConvolutionDepthwiseSWCPUKernelX86() override; int Init() override; int ReSize() override; int Run() override; - int InitWeightBias(); int Execute(int task_id); int Eval() override; private: void FreePackedInputOutput(); int InitPackedInputOutput(); - void PackWeight(); + int MallocWeightBiasData() override; + void PackWeight() override; int oc_tile_ = C8NUM; // in x86 avx SlidingWindowParam *sliding_ = nullptr; - float *packed_weight_ = nullptr; - float *packed_bias_ = nullptr; float *packed_input_ = nullptr; float *packed_output_ = nullptr; - float *origin_weight_ = nullptr; bool input_need_align_ = false; bool output_need_align_ = false; }; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc index 07ad676555c..54cca8410f4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.cc @@ -34,58 +34,6 @@ namespace mindspore::kernel { #else #define OC_BLOCK C8NUM #endif - -int ConvolutionCPUKernel::InitWeightBias() { - auto filter_tensor = in_tensors_.at(kWeightIndex); - int32_t in_channel = filter_tensor->Channel(); - if (in_channel < 0) { - MS_LOG(ERROR) << "get channel from filter_tensor failed."; - return RET_ERROR; - } - int32_t out_channel = filter_tensor->Batch(); - if (out_channel < 0) { - MS_LOG(ERROR) << "get batch from filter_tensor failed."; - return RET_ERROR; - } - conv_param_->input_channel_ = in_channel; - conv_param_->output_channel_ = out_channel; - int32_t kernel_plane = filter_tensor->Height() * filter_tensor->Width(); - if (kernel_plane < 0) { - MS_LOG(ERROR) << "get height and width from filter_tensor failed."; - return RET_ERROR; - } - size_t oc_block_num = UP_ROUND(out_channel, OC_BLOCK); - size_t pack_weight_size = oc_block_num * in_channel * kernel_plane; - - packed_weight_ = reinterpret_cast(malloc(pack_weight_size * sizeof(float))); - if (packed_weight_ == nullptr) { - MS_LOG(ERROR) << "malloc packed weight failed."; - return RET_ERROR; - } - memset(packed_weight_, 0, pack_weight_size * sizeof(float)); -#ifdef ENABLE_AVX - RowMajor2Col16Major(origin_weight_, packed_weight_, out_channel, in_channel * kernel_plane); -#elif defined(ENABLE_ARM32) - RowMajor2Col4Major(origin_weight_, packed_weight_, out_channel, in_channel * kernel_plane); -#else - RowMajor2Col8Major(origin_weight_, packed_weight_, out_channel, in_channel * kernel_plane); -#endif - - bias_data_ = reinterpret_cast(malloc(oc_block_num * sizeof(float))); - if (bias_data_ == nullptr) { - MS_LOG(ERROR) << "malloc bias failed."; - return RET_ERROR; - } - memset(bias_data_, 0, oc_block_num * sizeof(float)); - - if (in_tensors_.size() == kInputSize2) { - memcpy(bias_data_, origin_bias_, out_channel * sizeof(float)); - } else { - MS_ASSERT(in_tensors_.size() == kInputSize1); - } - return RET_OK; -} - int ConvolutionCPUKernel::InitTmpBuffer() { MS_ASSERT(ctx_->allocator != nullptr); @@ -112,7 +60,9 @@ int ConvolutionCPUKernel::InitTmpBuffer() { } int ConvolutionCPUKernel::Init() { - auto ret = InitWeightBias(); + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); + auto ret = InitConvWeightBias(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init weight bias failed."; return RET_ERROR; @@ -137,8 +87,8 @@ int ConvolutionCPUKernel::ReSize() { int ConvolutionCPUKernel::RunImpl(int task_id) { auto ori_input_data = reinterpret_cast(in_tensors_.at(kInputIndex)->data_c()); auto output_addr = reinterpret_cast(out_tensors_.at(kOutputIndex)->data_c()); - ConvFp32(ori_input_data, packed_input_, packed_weight_, reinterpret_cast(bias_data_), col_major_input_, - output_addr, task_id, conv_param_); + ConvFp32(ori_input_data, packed_input_, reinterpret_cast(packed_weight_), + reinterpret_cast(bias_data_), col_major_input_, output_addr, task_id, conv_param_); return RET_OK; } @@ -159,10 +109,11 @@ int ConvolutionCPUKernel::Run() { FreeTmpBuffer(); return RET_ERROR; } - if (IsTrain() && IsTrainable()) { - PackWeight(); - } + if (RepackWeight() != RET_OK) { + MS_LOG(ERROR) << "Repack weight failed."; + return RET_ERROR; + } ret = ParallelLaunch(this->ms_context_, ConvolutionImpl, this, thread_count_); if (ret != RET_OK) { MS_LOG(ERROR) << "conv error error_code[" << ret << "]"; @@ -188,20 +139,45 @@ void ConvolutionCPUKernel::PackWeight() { MS_LOG(ERROR) << "get height and width from filter_tensor failed."; return; } - size_t oc_block_num = UP_ROUND(out_channel, OC_BLOCK); - size_t pack_weight_size = oc_block_num * in_channel * kernel_plane; - - auto origin_weight = reinterpret_cast(filter_tensor->data_c()); - memset(packed_weight_, 0, pack_weight_size * sizeof(float)); + void *origin_weight = IsTrainable() ? filter_tensor->data_c() : origin_weight_; + MS_ASSERT(origin_weight != nullptr); #ifdef ENABLE_AVX - RowMajor2Col16Major(origin_weight, packed_weight_, out_channel, in_channel * kernel_plane); + RowMajor2Col16Major(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), out_channel, + in_channel * kernel_plane); #elif defined(ENABLE_ARM32) - RowMajor2Col4Major(origin_weight, packed_weight_, out_channel, in_channel * kernel_plane); + RowMajor2Col4Major(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), out_channel, + in_channel * kernel_plane); #else - RowMajor2Col8Major(origin_weight, packed_weight_, out_channel, in_channel * kernel_plane); + RowMajor2Col8Major(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), out_channel, + in_channel * kernel_plane); #endif } +int ConvolutionCPUKernel::MallocWeightBiasData() { + auto filter_tensor = in_tensors_.at(kWeightIndex); + size_t in_channel = filter_tensor->Channel(); + size_t out_channel = filter_tensor->Batch(); + conv_param_->input_channel_ = in_channel; + conv_param_->output_channel_ = out_channel; + size_t oc_block_num = UP_ROUND(out_channel, OC_BLOCK); + size_t kernel_plane = filter_tensor->Height() * filter_tensor->Width(); + size_t pack_weight_size = oc_block_num * in_channel * kernel_plane; + packed_weight_ = malloc(pack_weight_size * sizeof(float)); + if (packed_weight_ == nullptr) { + MS_LOG(ERROR) << "malloc packed weight failed."; + return RET_ERROR; + } + memset(packed_weight_, 0, pack_weight_size * sizeof(float)); + + bias_data_ = malloc(oc_block_num * sizeof(float)); + if (bias_data_ == nullptr) { + MS_LOG(ERROR) << "malloc bias failed."; + return RET_ERROR; + } + memset(bias_data_, 0, oc_block_num * sizeof(float)); + return RET_OK; +} + int ConvolutionCPUKernel::Eval() { InnerKernel::Eval(); if (IsTrainable()) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.h index bf1afb2a7a7..64d070f5ef4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_fp32.h @@ -28,18 +28,10 @@ class ConvolutionCPUKernel : public ConvolutionBaseCPUKernel { ConvolutionCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx, float *origin_weight, float *origin_bias) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx), - origin_weight_(origin_weight), - origin_bias_(origin_bias) {} - ~ConvolutionCPUKernel() override { - if (packed_weight_ != nullptr) { - free(packed_weight_); - packed_weight_ = nullptr; - } - } + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias) {} + ~ConvolutionCPUKernel() override {} int Init() override; - virtual int InitWeightBias(); int InitTmpBuffer(); int ReSize() override; int Run() override; @@ -48,7 +40,8 @@ class ConvolutionCPUKernel : public ConvolutionBaseCPUKernel { int Eval() override; protected: - void PackWeight(); + int MallocWeightBiasData() override; + void PackWeight() override; void FreeTmpBuffer() { if (packed_input_ != nullptr) { ctx_->allocator->Free(packed_input_); @@ -61,9 +54,6 @@ class ConvolutionCPUKernel : public ConvolutionBaseCPUKernel { } protected: - float *origin_weight_; // do not free - float *origin_bias_; // do not free - float *packed_weight_ = nullptr; float *packed_input_ = nullptr; float *col_major_input_ = nullptr; }; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.cc index b8ce82b5c1e..8e2ab33b3a6 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.cc @@ -28,37 +28,6 @@ using mindspore::lite::RET_NULL_PTR; using mindspore::lite::RET_OK; namespace mindspore::kernel { -int ConvolutionSWCPUKernel::InitWeightBias() { - auto filter_tensor = in_tensors_.at(kWeightIndex); - auto input_channel = filter_tensor->Channel(); - auto output_channel = filter_tensor->Batch(); - int kernel_h = filter_tensor->Height(); - int kernel_w = filter_tensor->Width(); - conv_param_->input_channel_ = input_channel; - conv_param_->output_channel_ = output_channel; - int kernel_plane = kernel_h * kernel_w; - int oc_block_num = UP_DIV(output_channel, oc_tile_); - int pack_weight_size = oc_block_num * oc_tile_ * input_channel * kernel_plane; - packed_weight_ = reinterpret_cast(malloc(pack_weight_size * sizeof(float))); - if (packed_weight_ == nullptr) { - MS_LOG(ERROR) << "malloc packed weight failed."; - return RET_NULL_PTR; - } - memset(packed_weight_, 0, pack_weight_size * sizeof(float)); - PackNHWCToNXHWCXFp32(kernel_h, kernel_w, output_channel, oc_block_num, input_channel, packed_weight_, - ori_weight_data_); - if (in_tensors_.size() == kInputSize2) { - packed_bias_ = reinterpret_cast(malloc(oc_block_num * oc_tile_ * sizeof(float))); - if (packed_bias_ == nullptr) { - MS_LOG(ERROR) << "malloc bias failed."; - return RET_NULL_PTR; - } - memset(packed_bias_, 0, oc_block_num * oc_tile_ * sizeof(float)); - memcpy(packed_bias_, ori_bias_data_, output_channel * sizeof(float)); - } - return RET_OK; -} - int ConvolutionSWCPUKernel::Init() { oc_tile_ = C8NUM; oc_res_ = conv_param_->output_channel_ % oc_tile_; @@ -67,7 +36,7 @@ int ConvolutionSWCPUKernel::Init() { in_tile_ = C8NUM; ic_res_ = conv_param_->input_channel_ % in_tile_; } - auto ret = InitWeightBias(); + auto ret = InitConvWeightBias(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init weight bias failed."; return RET_ERROR; @@ -108,11 +77,11 @@ int ConvolutionSWCPUKernel::ReSize() { int ConvolutionSWCPUKernel::RunImpl(int task_id) { if (conv_param_->kernel_w_ == 1 && conv_param_->kernel_h_ == 1) { - Conv1x1SWFp32(input_data_, packed_weight_, reinterpret_cast(packed_bias_), output_data_, task_id, - conv_param_, slidingWindow_param_); + Conv1x1SWFp32(input_data_, reinterpret_cast(packed_weight_), reinterpret_cast(bias_data_), + output_data_, task_id, conv_param_, slidingWindow_param_); } else { - ConvSWFp32(input_data_, packed_weight_, reinterpret_cast(packed_bias_), output_data_, task_id, conv_param_, - slidingWindow_param_); + ConvSWFp32(input_data_, reinterpret_cast(packed_weight_), reinterpret_cast(bias_data_), + output_data_, task_id, conv_param_, slidingWindow_param_); } return RET_OK; } @@ -178,6 +147,12 @@ int ConvolutionSWCPUKernel::Run() { FreeTmpBuffer(); return ret; } + + if (RepackWeight() != RET_OK) { + MS_LOG(ERROR) << "Repack weight failed."; + return RET_ERROR; + } + int error_code = ParallelLaunch(this->ms_context_, ConvolutionSWImpl, this, thread_count_); if (error_code != RET_OK) { MS_LOG(ERROR) << "conv error error_code[" << error_code << "]"; @@ -192,5 +167,47 @@ int ConvolutionSWCPUKernel::Run() { FreeTmpBuffer(); return RET_OK; } + +void ConvolutionSWCPUKernel::PackWeight() { + auto filter_tensor = in_tensors_.at(kWeightIndex); + auto input_channel = filter_tensor->Channel(); + auto output_channel = filter_tensor->Batch(); + int kernel_h = filter_tensor->Height(); + int kernel_w = filter_tensor->Width(); + int oc_block_num = UP_DIV(output_channel, oc_tile_); + void *origin_weight = IsTrainable() ? filter_tensor->data_c() : origin_weight_; + MS_ASSERT(origin_weight != nullptr); + PackNHWCToNXHWCXFp32(kernel_h, kernel_w, output_channel, oc_block_num, input_channel, + reinterpret_cast(packed_weight_), reinterpret_cast(origin_weight)); +} + +int ConvolutionSWCPUKernel::MallocWeightBiasData() { + auto filter_tensor = in_tensors_.at(kWeightIndex); + auto input_channel = filter_tensor->Channel(); + auto output_channel = filter_tensor->Batch(); + int kernel_h = filter_tensor->Height(); + int kernel_w = filter_tensor->Width(); + conv_param_->input_channel_ = input_channel; + conv_param_->output_channel_ = output_channel; + int kernel_plane = kernel_h * kernel_w; + int oc_block_num = UP_DIV(output_channel, oc_tile_); + int pack_weight_size = oc_block_num * oc_tile_ * input_channel * kernel_plane; + packed_weight_ = malloc(pack_weight_size * sizeof(float)); + if (packed_weight_ == nullptr) { + MS_LOG(ERROR) << "malloc packed weight failed."; + return RET_NULL_PTR; + } + memset(packed_weight_, 0, pack_weight_size * sizeof(float)); + + if (in_tensors_.size() == kInputSize2) { + bias_data_ = malloc(oc_block_num * oc_tile_ * sizeof(float)); + if (bias_data_ == nullptr) { + MS_LOG(ERROR) << "malloc bias failed."; + return RET_NULL_PTR; + } + memset(bias_data_, 0, oc_block_num * oc_tile_ * sizeof(float)); + } + return RET_OK; +} } // namespace mindspore::kernel #endif // ENABLE_AVX diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.h index a72878e81a2..5112f0dd9e9 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_slidewindow_fp32.h @@ -27,19 +27,9 @@ class ConvolutionSWCPUKernel : public ConvolutionBaseCPUKernel { ConvolutionSWCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx, float *origin_weight, float *origin_bias) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx), - ori_weight_data_(origin_weight), - ori_bias_data_(origin_bias) {} + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias) {} ~ConvolutionSWCPUKernel() override { - if (packed_weight_ != nullptr) { - free(packed_weight_); - packed_weight_ = nullptr; - } - if (packed_bias_ != nullptr) { - free(packed_bias_); - packed_bias_ = nullptr; - } if (slidingWindow_param_ != nullptr) { delete slidingWindow_param_; slidingWindow_param_ = nullptr; @@ -50,10 +40,11 @@ class ConvolutionSWCPUKernel : public ConvolutionBaseCPUKernel { int ReSize() override; int Run() override; int RunImpl(int task_id); - int InitWeightBias(); int InitTmpBuffer(); private: + int MallocWeightBiasData() override; + void PackWeight() override; void FreeTmpBuffer() { if (output_data_ != nullptr && oc_res_ != 0) { ctx_->allocator->Free(output_data_); @@ -68,10 +59,6 @@ class ConvolutionSWCPUKernel : public ConvolutionBaseCPUKernel { int in_tile_ = 0; // input channel algin int oc_res_ = 0; int ic_res_ = 0; - float *ori_weight_data_ = nullptr; - float *ori_bias_data_ = nullptr; - float *packed_weight_ = nullptr; - float *packed_bias_ = nullptr; float *output_data_ = nullptr; float *input_data_ = nullptr; SlidingWindowParam *slidingWindow_param_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc index 3c1bc7da29e..08fb239ff58 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.cc @@ -31,77 +31,9 @@ int ConvolutionWinogradCPUKernel::WinogradFilterTransform(const float *weight_da return RET_ERROR; } - return WinogradWeightTransform(weight_data, trans_weight_, matrix_g, matrix_gt, oc_block, input_unit_, kernel_unit_, - conv_param_->input_channel_, conv_param_->output_channel_, true); -} - -int ConvolutionWinogradCPUKernel::InitWeightBias() { - auto filter_tensor = in_tensors_.at(kWeightIndex); - int in_channel = filter_tensor->Channel(); - if (in_channel < 0) { - MS_LOG(ERROR) << "get channel from filter tensor failed."; - return RET_ERROR; - } - int out_channel = filter_tensor->Batch(); - if (out_channel < 0) { - MS_LOG(ERROR) << "get batch from filter tensor failed."; - return RET_ERROR; - } - conv_param_->input_channel_ = in_channel; - conv_param_->output_channel_ = out_channel; - - // set data - auto trans_matrix_data_size = - input_unit_ * input_unit_ * in_channel * UP_ROUND(out_channel, oc_block_) * sizeof(float); - if (trans_weight_ == nullptr) { - trans_weight_ = reinterpret_cast(malloc(trans_matrix_data_size)); - if (trans_weight_ == nullptr) { - MS_LOG(ERROR) << "malloc matrix_buffer failed."; - return RET_MEMORY_FAILED; - } - } - memset(trans_weight_, 0, trans_matrix_data_size); - - float matrix_g[64]; - float matrix_gt[64]; - float matrix_a[64]; - float matrix_at[64]; - float matrix_b[64]; - float matrix_bt[64]; - float coef = 1.0f; - if (input_unit_ == 8) { - coef = 0.5f; - } - auto ret = - CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g, matrix_gt, coef, output_unit_, kernel_unit_); - if (ret != RET_OK) { - MS_LOG(ERROR) << "get matrix g from CookToomFilter failed."; - return ret; - } - ret = WinogradFilterTransform(origin_weight_, matrix_g, matrix_gt, oc_block_); - if (ret != RET_OK) { - MS_LOG(ERROR) << "winograd filter transform failed."; - return ret; - } - - // init bias - size_t new_bias_size = UP_ROUND(out_channel, C4NUM) * sizeof(float); - if (bias_data_ == nullptr) { - bias_data_ = reinterpret_cast(malloc(new_bias_size)); - if (bias_data_ == nullptr) { - MS_LOG(ERROR) << "malloc bias_data_ failed."; - return RET_MEMORY_FAILED; - } - } - if (in_tensors_.size() == kInputSize2) { - size_t origin_size = out_channel * sizeof(float); - memcpy(bias_data_, origin_bias_, origin_size); - memset(reinterpret_cast(bias_data_) + out_channel, 0, new_bias_size - origin_size); - } else { - MS_ASSERT(in_tensors_.size() == kInputSize1); - memset(bias_data_, 0, new_bias_size); - } - return RET_OK; + return WinogradWeightTransform(weight_data, reinterpret_cast(packed_weight_), matrix_g, matrix_gt, oc_block, + input_unit_, kernel_unit_, conv_param_->input_channel_, conv_param_->output_channel_, + true); } int ConvolutionWinogradCPUKernel::InitTmpBuffer() { @@ -158,6 +90,8 @@ int ConvolutionWinogradCPUKernel::ConfigInputOutput() { } int ConvolutionWinogradCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); tile_num_ = C12NUM; #ifdef ENABLE_AVX oc_block_ = C16NUM; @@ -169,7 +103,7 @@ int ConvolutionWinogradCPUKernel::Init() { conv_param_->input_unit_ = input_unit_; conv_param_->output_unit_ = output_unit_; - auto ret = InitWeightBias(); + auto ret = InitConvWeightBias(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init weight bias failed."; return RET_ERROR; @@ -202,8 +136,9 @@ int ConvolutionWinogradCPUKernel::RunImpl(int task_id) { MS_ASSERT(ori_input_data != nullptr); auto output_data = reinterpret_cast(out_tensors_.front()->data_c()); MS_ASSERT(output_data != nullptr); - ConvWinogardFp32(ori_input_data, trans_weight_, reinterpret_cast(bias_data_), output_data, - tmp_buffer_address_list_, task_id, conv_param_, in_func_, out_func_); + ConvWinogardFp32(ori_input_data, reinterpret_cast(packed_weight_), + reinterpret_cast(bias_data_), output_data, tmp_buffer_address_list_, task_id, + conv_param_, in_func_, out_func_); return RET_OK; } @@ -224,12 +159,9 @@ int ConvolutionWinogradCPUKernel::Run() { FreeTmpBuffer(); return RET_ERROR; } - if (IsTrain() && IsTrainable()) { - ret = InitWeightBias(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Init weight bias failed."; - return RET_ERROR; - } + if (RepackWeight() != RET_OK) { + MS_LOG(ERROR) << "Repack weight failed."; + return RET_ERROR; } ret = ParallelLaunch(this->ms_context_, ConvolutionWinogradImpl, this, thread_count_); @@ -241,6 +173,68 @@ int ConvolutionWinogradCPUKernel::Run() { return ret; } +int ConvolutionWinogradCPUKernel::MallocWeightBiasData() { + auto filter_tensor = in_tensors_.at(kWeightIndex); + int in_channel = filter_tensor->Channel(); + if (in_channel < 0) { + MS_LOG(ERROR) << "get channel from filter tensor failed."; + return RET_ERROR; + } + int out_channel = filter_tensor->Batch(); + if (out_channel < 0) { + MS_LOG(ERROR) << "get batch from filter tensor failed."; + return RET_ERROR; + } + conv_param_->input_channel_ = in_channel; + conv_param_->output_channel_ = out_channel; + + // set data + auto trans_matrix_data_size = + input_unit_ * input_unit_ * in_channel * UP_ROUND(out_channel, oc_block_) * sizeof(float); + if (packed_weight_ == nullptr) { + packed_weight_ = malloc(trans_matrix_data_size); + if (packed_weight_ == nullptr) { + MS_LOG(ERROR) << "malloc matrix_buffer failed."; + return RET_MEMORY_FAILED; + } + } + memset(packed_weight_, 0, trans_matrix_data_size); + + float matrix_a[64]; + float matrix_at[64]; + float matrix_b[64]; + float matrix_bt[64]; + float coef = 1.0f; + if (input_unit_ == 8) { + coef = 0.5f; + } + auto ret = + CookToomFilter(matrix_a, matrix_at, matrix_b, matrix_bt, matrix_g_, matrix_gt_, coef, output_unit_, kernel_unit_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "get matrix g from CookToomFilter failed."; + return ret; + } + + // init bias + size_t new_bias_size = UP_ROUND(out_channel, C4NUM) * sizeof(float); + if (bias_data_ == nullptr) { + bias_data_ = malloc(new_bias_size); + if (bias_data_ == nullptr) { + MS_LOG(ERROR) << "malloc bias_data_ failed."; + return RET_MEMORY_FAILED; + } + } + memset(bias_data_, 0, new_bias_size); + return RET_OK; +} + +void ConvolutionWinogradCPUKernel::PackWeight() { + auto weight_tensor = in_tensors_.at(kWeightIndex); + void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_; + MS_ASSERT(origin_weight != nullptr); + WinogradFilterTransform(reinterpret_cast(origin_weight), matrix_g_, matrix_gt_, oc_block_); +} + int ConvolutionWinogradCPUKernel::Eval() { auto ret = InnerKernel::Eval(); if (ret != RET_OK) { @@ -248,7 +242,7 @@ int ConvolutionWinogradCPUKernel::Eval() { return ret; } if (IsTrainable()) { - ret = InitWeightBias(); + ret = InitConvWeightBias(); if (ret != RET_OK) { MS_LOG(ERROR) << "Init weight bias failed."; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h index 7d5f792a731..9fd402a7bc2 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/convolution_winograd_fp32.h @@ -30,27 +30,21 @@ class ConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel { ConvolutionWinogradCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx, int output_unit, float *origin_weight, float *origin_bias) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx), - output_unit_(output_unit), - origin_weight_(origin_weight), - origin_bias_(origin_bias) {} - ~ConvolutionWinogradCPUKernel() override { - if (trans_weight_ != nullptr) { - free(trans_weight_); - trans_weight_ = nullptr; - } - }; + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, origin_weight, origin_bias), + output_unit_(output_unit) {} + ~ConvolutionWinogradCPUKernel() override {} int Init() override; int ReSize() override; int Run() override; int Eval() override; int RunImpl(int task_id); - int InitWeightBias(); int InitTmpBuffer(); int ConfigInputOutput(); int WinogradFilterTransform(const float *weight_data, float *matrix_g, const float *matrix_gt, int oc_block); private: + int MallocWeightBiasData() override; + void PackWeight() override; void FreeTmpBuffer() { if (trans_input_ != nullptr) { ctx_->allocator->Free(trans_input_); @@ -74,13 +68,12 @@ class ConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel { int output_unit_{0}; int oc_block_{0}; int tile_num_{0}; - float *origin_weight_; // do not free - float *origin_bias_; // do not free float *tmp_data_ = nullptr; float *trans_input_ = nullptr; float *gemm_out_ = nullptr; float *col_buffer_ = nullptr; - float *trans_weight_ = nullptr; + float matrix_g_[64]; + float matrix_gt_[64]; TmpBufferAddress tmp_buffer_address_list_[4] = {nullptr}; InputTransFunc in_func_ = nullptr; OutputTransFunc out_func_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/crop_and_resize_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/crop_and_resize_fp32.cc index cd3456ae562..405056a190d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/crop_and_resize_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/crop_and_resize_fp32.cc @@ -33,6 +33,8 @@ constexpr size_t kBoxIndex = 1; constexpr size_t kBoxIdIndex = 2; } // namespace int CropAndResizeCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_3D); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/crop_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/crop_fp32.cc index cda9a8c5525..e81abdd92d1 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/crop_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/crop_fp32.cc @@ -36,6 +36,8 @@ int CropLaunch(void *cdata, int task_id, float lhs_scale, float rhs_scale) { } // namespace int CropCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc index 3d2f184159c..b798512d1ab 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.cc @@ -26,10 +26,6 @@ DeconvolutionDepthwiseCPUKernel::~DeconvolutionDepthwiseCPUKernel() { delete sliding_; sliding_ = nullptr; } - if (packed_weight_ != nullptr) { - free(packed_weight_); - packed_weight_ = nullptr; - } } int DeconvolutionDepthwiseCPUKernel::InitSlideParam() { @@ -45,37 +41,6 @@ int DeconvolutionDepthwiseCPUKernel::InitSlideParam() { return RET_OK; } -int DeconvolutionDepthwiseCPUKernel::InitWeightBias() { - // init weight: o, h, w, i; o == group, i == 1 - auto weight_tensor = in_tensors_.at(kWeightIndex); - auto origin_weight = reinterpret_cast(weight_tensor->data_c()); - MS_ASSERT(origin_weight != nullptr); - int OC4 = UP_DIV(weight_tensor->Batch(), C4NUM); - int pack_weight_size = C4NUM * OC4 * weight_tensor->Height() * weight_tensor->Width(); - - packed_weight_ = reinterpret_cast(malloc(pack_weight_size * sizeof(float))); - if (packed_weight_ == nullptr) { - MS_LOG(ERROR) << "Malloc buffer failed."; - return RET_ERROR; - } - PackNCHWToNC4HW4Fp32(origin_weight, packed_weight_, 1, weight_tensor->Height() * weight_tensor->Width(), - weight_tensor->Batch()); - - bias_data_ = reinterpret_cast(malloc(C4NUM * OC4 * sizeof(float))); - if (bias_data_ == nullptr) { - MS_LOG(ERROR) << "Malloc buffer failed."; - return RET_ERROR; - } - memset(bias_data_, 0, C4NUM * OC4 * sizeof(float)); - if (in_tensors_.size() == kInputSize2) { - auto ori_bias = reinterpret_cast(in_tensors_.at(kBiasIndex)->data_c()); - memcpy(bias_data_, ori_bias, in_tensors_.at(kBiasIndex)->ElementsNum() * sizeof(float)); - } - - conv_param_->thread_num_ = MSMIN(thread_count_, OC4); - return RET_OK; -} - int DeconvolutionDepthwiseCPUKernel::InitPackedInputOutput() { if (conv_param_->input_channel_ % C4NUM != 0) { need_align_ = true; @@ -100,15 +65,17 @@ int DeconvolutionDepthwiseCPUKernel::InitPackedInputOutput() { } int DeconvolutionDepthwiseCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); sliding_ = new (std::nothrow) SlidingWindowParam; if (sliding_ == nullptr) { MS_LOG(ERROR) << "new sliding window param failed."; return RET_ERROR; } - auto ret = InitWeightBias(); + auto ret = InitConvWeightBias(); if (ret != 0) { - MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitWeightBias failed.ret: " << ret; + MS_LOG(ERROR) << "Deconvolution depthwise fp32 InitConvWeightBias failed.ret: " << ret; return ret; } if (!InferShapeDone()) { @@ -132,8 +99,8 @@ int DeconvolutionDepthwiseCPUKernel::ReSize() { } int DeconvolutionDepthwiseCPUKernel::Execute(int task_id) { - DeconvDwSWFp32(packed_output_, packed_input_, packed_weight_, reinterpret_cast(bias_data_), conv_param_, - sliding_, task_id); + DeconvDwSWFp32(packed_output_, packed_input_, reinterpret_cast(packed_weight_), + reinterpret_cast(bias_data_), conv_param_, sliding_, task_id); return RET_OK; } @@ -148,6 +115,10 @@ int DeconvDwRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) { } int DeconvolutionDepthwiseCPUKernel::Run() { + if (RepackWeight() != RET_OK) { + MS_LOG(ERROR) << "Repack weight failed."; + return RET_ERROR; + } if (conv_param_->input_channel_ != conv_param_->output_channel_) { MS_LOG(ERROR) << "Only support input channel equals output channel."; return RET_ERROR; @@ -190,6 +161,34 @@ int DeconvolutionDepthwiseCPUKernel::Run() { return ret; } +int DeconvolutionDepthwiseCPUKernel::MallocWeightBiasData() { + auto weight_tensor = in_tensors_.at(kWeightIndex); + int OC4 = UP_DIV(weight_tensor->Batch(), C4NUM); + int pack_weight_size = C4NUM * OC4 * weight_tensor->Height() * weight_tensor->Width(); + packed_weight_ = malloc(pack_weight_size * sizeof(float)); + if (packed_weight_ == nullptr) { + MS_LOG(ERROR) << "Malloc buffer failed."; + return RET_ERROR; + } + + bias_data_ = malloc(C4NUM * OC4 * sizeof(float)); + if (bias_data_ == nullptr) { + MS_LOG(ERROR) << "Malloc buffer failed."; + return RET_ERROR; + } + memset(bias_data_, 0, C4NUM * OC4 * sizeof(float)); + conv_param_->thread_num_ = MSMIN(thread_count_, OC4); + return RET_OK; +} + +void DeconvolutionDepthwiseCPUKernel::PackWeight() { + auto weight_tensor = in_tensors_.at(kWeightIndex); + void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_; + MS_ASSERT(origin_weight != nullptr); + PackNCHWToNC4HW4Fp32(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), 1, + weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch()); +} + void DeconvolutionDepthwiseCPUKernel::FreePackedInputOutput() { if (need_align_) { ms_context_->allocator->Free(packed_input_); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.h index 0f0bf8f2423..33b99251bfe 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_depthwise_fp32.h @@ -27,22 +27,23 @@ class DeconvolutionDepthwiseCPUKernel : public ConvolutionBaseCPUKernel { public: DeconvolutionDepthwiseCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(), + inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {} ~DeconvolutionDepthwiseCPUKernel() override; int Init() override; int InitSlideParam(); int ReSize() override; int Run() override; - - int InitWeightBias(); int Execute(int task_id); private: int InitPackedInputOutput(); void FreePackedInputOutput(); + int MallocWeightBiasData() override; + void PackWeight() override; + SlidingWindowParam *sliding_ = nullptr; - float *packed_weight_ = nullptr; float *packed_input_ = nullptr; float *packed_output_ = nullptr; bool need_align_ = false; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc index 4095de69bdb..9b118687cfb 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.cc @@ -31,8 +31,6 @@ DeConvolutionCPUKernel::~DeConvolutionCPUKernel() { delete matmul_param_; matmul_param_ = nullptr; } - FreeAlignedData(reinterpret_cast(&weight_ptr_)); - FreeAlignedData(reinterpret_cast(&bias_ptr)); } int DeConvolutionCPUKernel::ReSize() { @@ -50,48 +48,47 @@ int DeConvolutionCPUKernel::ReSize() { return RET_OK; } -int DeConvolutionCPUKernel::InitWeightBias() { +int DeConvolutionCPUKernel::MallocWeightBiasData() { auto weight_tensor = in_tensors_.at(kWeightIndex); auto input_channel = weight_tensor->Batch(); auto output_channel = weight_tensor->Channel(); auto kernel_h_ = weight_tensor->Height(); auto kernel_w_ = weight_tensor->Width(); int output_aligned_size = UP_ROUND(output_channel, C8NUM); - bias_ptr = reinterpret_cast(MallocAlignedData(C32NUM, output_aligned_size * sizeof(float))); - if (bias_ptr == nullptr) { - MS_LOG(ERROR) << "deconv malloc bias_ptr error!"; - return RET_ERROR; - } - memset(bias_ptr, 0, output_aligned_size * sizeof(float)); - if (in_tensors_.size() == DIMENSION_3D) { - if (in_tensors_.at(kBiasIndex)->shape().size() == DIMENSION_1D && - in_tensors_.at(kBiasIndex)->DimensionSize(0) == output_channel) { - MS_ASSERT(in_tensors_.at(kBiasIndex)->data_c() != nullptr); - memcpy(bias_ptr, in_tensors_.at(kBiasIndex)->data_c(), output_channel * sizeof(float)); - } else { - MS_LOG(ERROR) << "unsupported bias shape for deconv!"; - return RET_ERROR; - } - } - size_t weight_pack_size = input_channel * kernel_w_ * kernel_h_ * output_aligned_size * sizeof(float); - weight_ptr_ = reinterpret_cast(MallocAlignedData(C32NUM, weight_pack_size)); - if (weight_ptr_ == nullptr) { - MS_LOG(ERROR) << "deconv malloc weight_ptr_ error!"; + packed_weight_ = MallocAlignedData(C32NUM, weight_pack_size); + if (packed_weight_ == nullptr) { + MS_LOG(ERROR) << "deconv malloc packed_weight_ error!"; return RET_ERROR; } - memset(weight_ptr_, 0, weight_pack_size); - MS_ASSERT(in_tensors_.at(kWeightIndex)->data_c() != nullptr); -#ifdef ENABLE_AVX - PackNHWCToCXHWNXFp32(reinterpret_cast(in_tensors_.at(kWeightIndex)->data_c()), weight_ptr_, input_channel, - kernel_w_ * kernel_h_, output_channel); -#else - PackNHWCToC8HWN8Fp32(reinterpret_cast(in_tensors_.at(kWeightIndex)->data_c()), weight_ptr_, input_channel, - kernel_w_ * kernel_h_, output_channel); -#endif + memset(packed_weight_, 0, weight_pack_size); + + bias_data_ = MallocAlignedData(C32NUM, output_aligned_size * sizeof(float)); + if (bias_data_ == nullptr) { + MS_LOG(ERROR) << "deconv malloc bias_data_ error!"; + return RET_ERROR; + } + memset(bias_data_, 0, output_aligned_size * sizeof(float)); return RET_OK; } +void DeConvolutionCPUKernel::PackWeight() { + auto weight_tensor = in_tensors_.at(kWeightIndex); + auto input_channel = weight_tensor->Batch(); + auto output_channel = weight_tensor->Channel(); + auto kernel_h = weight_tensor->Height(); + auto kernel_w = weight_tensor->Width(); + void *origin_weight = IsTrainable() ? weight_tensor->data_c() : origin_weight_; + MS_ASSERT(origin_weight != nullptr); +#ifdef ENABLE_AVX + PackNHWCToCXHWNXFp32(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), + input_channel, kernel_w * kernel_h, output_channel); +#else + PackNHWCToC8HWN8Fp32(reinterpret_cast(origin_weight), reinterpret_cast(packed_weight_), + input_channel, kernel_w * kernel_h, output_channel); +#endif +} + int DeConvolutionCPUKernel::InitParam() { input_plane_ = conv_param_->input_h_ * conv_param_->input_w_; kernel_plane_ = conv_param_->kernel_w_ * conv_param_->kernel_h_; @@ -133,26 +130,32 @@ int DeConvolutionCPUKernel::DoDeconv(int task_id) { } auto tmp_buffer = tmp_buffer_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->row_align_; #ifdef ENABLE_AVX - DeconvMatmulAvx(pack_input_, weight_ptr_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_, - tmp_buffer, matmul_param_->deep_, matmul_param_->row_align_, oc * C8NUM * kernel_plane_, - kernel_plane_); + DeconvMatmulAvx( + pack_input_, + reinterpret_cast(packed_weight_) + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_, + tmp_buffer, matmul_param_->deep_, matmul_param_->row_align_, oc * C8NUM * kernel_plane_, kernel_plane_); #elif ENABLE_SSE - DeconvMatmulFloatSse(pack_input_, - weight_ptr_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_, - tmp_buffer, matmul_param_->deep_, matmul_param_->row_align_, oc * C8NUM * kernel_plane_); + DeconvMatmulFloatSse( + pack_input_, + reinterpret_cast(packed_weight_) + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_, + tmp_buffer, matmul_param_->deep_, matmul_param_->row_align_, oc * C8NUM * kernel_plane_); #else - MatMulOpt(pack_input_, weight_ptr_ + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_, - tmp_buffer, nullptr, ActType_No, matmul_param_->deep_, matmul_param_->row_align_, - oc * C8NUM * kernel_plane_, matmul_param_->col_, OutType_C8); + MatMulOpt( + pack_input_, + reinterpret_cast(packed_weight_) + task_id * thread_stride_ * C8NUM * kernel_plane_ * matmul_param_->deep_, + tmp_buffer, nullptr, ActType_No, matmul_param_->deep_, matmul_param_->row_align_, oc * C8NUM * kernel_plane_, + matmul_param_->col_, OutType_C8); #endif DeConvPostFp32C8(tmp_buffer, pack_output_ + task_id * thread_stride_ * C8NUM * output_plane_, - reinterpret_cast(bias_ptr) + thread_stride_ * task_id * C8NUM, + reinterpret_cast(bias_data_) + thread_stride_ * task_id * C8NUM, output_ptr_ + task_id * thread_stride_ * C8NUM, oc_res, conv_param_); return RET_OK; } int DeConvolutionCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); #if defined(ENABLE_ARM32) || defined(ENABLE_AVX) || defined(ENABLE_SSE) row_tile_ = C4NUM; #else @@ -163,10 +166,15 @@ int DeConvolutionCPUKernel::Init() { MS_LOG(ERROR) << "Memory allocation failed"; return RET_ERROR; } - int error_code = InitWeightBias(); - if (error_code != RET_OK) { - MS_LOG(ERROR) << "deconv InitWeightBias error!ret: " << error_code; - return error_code; + if (in_tensors_.at(kWeightIndex)->data_c() != nullptr) { + int error_code = InitConvWeightBias(); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "deconv InitConvWeightBias error!ret: " << error_code; + return error_code; + } + } else { + is_repack_ = true; + MS_LOG(WARNING) << "The weight is nullptr, will pack in runtime."; } if (!InferShapeDone()) { return RET_OK; @@ -214,6 +222,10 @@ int DeConvolutionCPUKernel::InitRunBuf() { } int DeConvolutionCPUKernel::Run() { + if (RepackWeight() != RET_OK) { + MS_LOG(ERROR) << "Repack weight failed."; + return RET_ERROR; + } float *src_in = reinterpret_cast(in_tensors_[0]->data_c()); float *src_out = reinterpret_cast(out_tensors_[0]->data_c()); MS_ASSERT(src_in != nullptr); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.h index 83f10cd2b81..5a1b028ed0d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_fp32.h @@ -32,7 +32,8 @@ class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel { public: DeConvolutionCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(), + inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {} ~DeConvolutionCPUKernel() override; int Init() override; int Run() override; @@ -45,7 +46,8 @@ class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel { int InitRunBuf(); void FreeRunBuf(); int InitParam(); - int InitWeightBias(); + int MallocWeightBiasData() override; + void PackWeight() override; private: MatMulParameter *matmul_param_ = nullptr; @@ -55,13 +57,11 @@ class DeConvolutionCPUKernel : public ConvolutionBaseCPUKernel { int thread_count_ = 1; int thread_stride_ = 0; int row_tile_ = 0; - float *weight_ptr_ = nullptr; float *pack_input_ = nullptr; float *pack_output_ = nullptr; float *tmp_buffer_ = nullptr; float *input_ptr_ = nullptr; float *output_ptr_ = nullptr; - float *bias_ptr = nullptr; }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DECONVOLUTION_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc index 8c20867be1d..2677fe41707 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include "src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.h" using mindspore::lite::RET_ERROR; @@ -192,7 +193,13 @@ int DeConvWgPostFp32Run(void *cdata, int task_id, float lhs_scale, float rhs_sca int DeConvolutionWinogradCPUKernel::InitComputeParam() { auto weight_tensor = in_tensors_[1]; - + auto shape = weight_tensor->shape(); + if (std::find(shape.begin(), shape.end(), -1) != shape.end()) { + MS_LOG(WARNING) << "The shape of weight tensor is invalid."; + valid_weight_shape_ = false; + return RET_OK; + } + valid_weight_shape_ = true; conv_param_->input_channel_ = weight_tensor->Batch(); conv_param_->output_channel_ = weight_tensor->Channel(); conv_param_->kernel_w_ = weight_tensor->Width(); @@ -277,7 +284,11 @@ int DeConvolutionWinogradCPUKernel::InitComputeParam() { int DeConvolutionWinogradCPUKernel::InitDataParam() { auto weight_tensor = in_tensors_.at(kWeightIndex); auto nhwc_weight = reinterpret_cast(weight_tensor->data_c()); - MS_ASSERT(nhwc_weight != nullptr); + if (nhwc_weight == nullptr) { + MS_LOG(WARNING) << "The weight data is nullptr, will init data parameter in runtime."; + is_repack_ = true; + return RET_OK; + } /* unit data : weight & winograd data */ for (int i = 0; i < deconv_param_->compute_size_; i++) { @@ -307,11 +318,30 @@ int DeConvolutionWinogradCPUKernel::InitDataParam() { int DeConvolutionWinogradCPUKernel::ReSize() { FreeResizeBuf(); ConvolutionBaseCPUKernel::Init(); - InitParameter(); + if (!valid_weight_shape_) { + if (InitComputeParam() != RET_OK) { + MS_LOG(ERROR) << "InitComputeParam error!"; + return RET_ERROR; + } else if (!valid_weight_shape_) { + return RET_OK; + } + if (InitDataParam() != RET_OK) { + MS_LOG(ERROR) << "InitDataParam error!"; + return RET_ERROR; + } + } + + int error_code = InitParameter(); + if (error_code != RET_OK) { + MS_LOG(ERROR) << "InitParameter error! ret: " << error_code; + return error_code; + } return RET_OK; } int DeConvolutionWinogradCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); deconv_param_ = new (std::nothrow) DeConvParam(); if (deconv_param_ == nullptr) { MS_LOG(ERROR) << "Memory allocation failed"; @@ -320,16 +350,14 @@ int DeConvolutionWinogradCPUKernel::Init() { for (auto &wg : deconv_param_->a_buffer_) { wg.buf_init_ = false; } - int error_code = InitComputeParam(); - if (error_code != RET_OK) { - MS_LOG(ERROR) << "InitComputeParam error! ret: " << error_code; - return error_code; - } - error_code = InitDataParam(); - if (error_code != RET_OK) { - MS_LOG(ERROR) << "InitWeightBias error! ret: " << error_code; - return error_code; + if (InitComputeParam() != RET_OK) { + MS_LOG(ERROR) << "InitDataParam error!"; + return RET_ERROR; + } + if (valid_weight_shape_ && InitDataParam() != RET_OK) { + MS_LOG(ERROR) << "InitDataParam error!"; + return RET_ERROR; } if (!InferShapeDone()) { @@ -421,6 +449,20 @@ int DeConvolutionWinogradCPUKernel::Run() { return ret; } + if (!valid_weight_shape_) { + if (InitComputeParam() != RET_OK) { + MS_LOG(ERROR) << "InitDataParam error!"; + return RET_ERROR; + } + if (!valid_weight_shape_ || InitParameter() != RET_OK) { + MS_LOG(ERROR) << "InitDataParam error!"; + return RET_ERROR; + } + } + if (IsRepack() && InitDataParam() != RET_OK) { + MS_LOG(ERROR) << "InitDataParam error!"; + return RET_ERROR; + } float *src_in = reinterpret_cast(in_tensors_[0]->data_c()); float *src_out = reinterpret_cast(out_tensors_[0]->data_c()); MS_ASSERT(src_in != nullptr); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.h index b174972d93c..48d4b3a3908 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/deconvolution_winograd_fp32.h @@ -32,7 +32,8 @@ class DeConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel { public: DeConvolutionWinogradCPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const lite::InnerContext *ctx) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, inputs.at(kWeightIndex)->data_c(), + inputs.size() == kInputSize2 ? inputs.at(kBiasIndex)->data_c() : nullptr) {} ~DeConvolutionWinogradCPUKernel() override; int Init() override; int Run() override; @@ -61,6 +62,7 @@ class DeConvolutionWinogradCPUKernel : public ConvolutionBaseCPUKernel { std::mutex lock_; int thread_num_hw_ = 0; int thread_stride_hw_ = 0; + bool valid_weight_shape_ = true; }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_DECONVOLUTION_WINOGRAD_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/depth_to_space_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/depth_to_space_fp32.cc index 086a1a12356..521f610cd02 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/depth_to_space_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/depth_to_space_fp32.cc @@ -26,6 +26,8 @@ using mindspore::schema::PrimitiveType_DepthToSpace; namespace mindspore::kernel { int DepthToSpaceCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); param_->data_type_size_ = sizeof(float); if (!InferShapeDone()) { return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process_fp32.cc index 817b4e6f582..3aef46d91ff 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/detection_post_process_fp32.cc @@ -27,6 +27,7 @@ using mindspore::schema::PrimitiveType_DetectionPostProcess; namespace mindspore::kernel { int DetectionPostProcessCPUKernel::GetInputData() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); if ((in_tensors_.at(0)->data_type() != kNumberTypeFloat32 && in_tensors_.at(0)->data_type() != kNumberTypeFloat) || (in_tensors_.at(1)->data_type() != kNumberTypeFloat32 && in_tensors_.at(1)->data_type() != kNumberTypeFloat)) { MS_LOG(ERROR) << "Input data type error"; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc index 1be7b7dbcca..53f55492a63 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/elu_fp32.cc @@ -25,6 +25,8 @@ using mindspore::schema::PrimitiveType_Elu; namespace mindspore::kernel { int EluCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc index 7b13ed938cd..d43edcd5f10 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/embedding_lookup_fp32.cc @@ -25,6 +25,8 @@ using mindspore::schema::PrimitiveType_EmbeddingLookupFusion; namespace mindspore::kernel { int EmbeddingLookupCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/exp_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/exp_fp32.cc index 046cc426b43..50a1be42f69 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/exp_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/exp_fp32.cc @@ -25,6 +25,8 @@ using mindspore::schema::PrimitiveType_ExpFusion; namespace mindspore::kernel { int ExpCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); float log_base = (param_->base_ == -1) ? 1 : logf(param_->base_); param_->in_scale_ = param_->scale_ * log_base; if (param_->shift_ == 0) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.cc index fc3f37d205a..db9f66ba49b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fill_fp32.cc @@ -28,6 +28,8 @@ using mindspore::schema::PrimitiveType_Fill; namespace mindspore::kernel { int FillCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection_fp32.cc index 63d3f004c16..3729bff5e8a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fullconnection_fp32.cc @@ -25,6 +25,9 @@ using mindspore::schema::PrimitiveType_FullConnection; namespace mindspore::kernel { int FullconnectionCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); + MatmulFp32BaseCPUKernel::InitParameter(); if (params_->a_const_) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.cc index f145b284161..50b682b68fc 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/fused_batchnorm_fp32.cc @@ -23,7 +23,12 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_FusedBatchNorm; namespace mindspore::kernel { +namespace { +constexpr int kNumInputSize = 5; +} // namespace int FusedBatchnormCPUKernel::ReSize() { + CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_5D); + CHECK_LESS_RETURN(out_tensors_.size(), 1); FreeMeanAndVariance(); FreeScaleAndOffset(); FillParam(); @@ -66,7 +71,7 @@ int FusedBatchnormCPUKernel::InitConstTensor() { int FusedBatchnormCPUKernel::Run() { auto param = reinterpret_cast(op_parameter_); - if (IsTrain() && IsTrainable() && in_tensors_.size() >= 5) { + if (IsTrain() && IsTrainable() && in_tensors_.size() >= kNumInputSize) { float *in = static_cast(in_tensors_[0]->MutableData()); float *scale = static_cast(in_tensors_[1]->MutableData()); float *offset = static_cast(in_tensors_[2]->MutableData()); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc index fd454f1b56d..0e42b17f501 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gatherNd_fp32.cc @@ -37,6 +37,8 @@ GatherNdCPUKernel::~GatherNdCPUKernel() { } int GatherNdCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.cc index e313cd74986..3cedf74e3b3 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gather_fp32.cc @@ -27,6 +27,8 @@ using mindspore::schema::PrimitiveType_Gather; namespace mindspore::kernel { int GatherCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); axis_ = *(reinterpret_cast(in_tensors_.at(2)->data_c())); if (!InferShapeDone()) { return RET_OK; @@ -63,7 +65,7 @@ int GatherCPUKernel::DoGather(int task_id) { int8_t *int8_in = reinterpret_cast(input_tensor->data_c()); int8_t *int8_out = reinterpret_cast(out_tensor->data_c()); - int data_size = lite::DataTypeSize(input_tensor->data_type()); + int data_size = static_cast(lite::DataTypeSize(input_tensor->data_type())); int8_in += thread_stride * limit * inner_size * data_size; int8_out += thread_stride * indices_element_size * inner_size * data_size; @@ -119,7 +121,7 @@ int GatherCPUKernel::AssignIndicesData(bool isIndicesInt32, int indices_num, lit } } else { for (int i = 0; i < indices_num; i++) { - indices_data_[i] = reinterpret_cast(indices_tensor->MutableData())[i]; + indices_data_[i] = static_cast(reinterpret_cast(indices_tensor->MutableData())[i]); } } } else { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/glu_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/glu_fp32.cc index c56a7088f9e..a8e246a0917 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/glu_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/glu_fp32.cc @@ -30,6 +30,7 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_GLU; namespace mindspore::kernel { +const int kGluBranchNum = 2; int GluCPUKernel::MallocTmpBuffer() { FreeTmpBuffer(); auto in_tensor = in_tensors_.front(); @@ -115,7 +116,7 @@ int GluCPUKernel::Split(int task_id) { int GluCPUKernel::Sigmoid(int task_id) { auto input_addr = reinterpret_cast(split_ptr_.at(1)); auto output_addr = reinterpret_cast(sigmoid_ptr_); - auto length = in_tensors_.at(0)->ElementsNum() / 2; + auto length = in_tensors_.at(0)->ElementsNum() / kGluBranchNum; int stride = UP_DIV(length, op_parameter_->thread_num_); int count = MSMIN(stride, length - stride * task_id); @@ -129,7 +130,7 @@ int GluCPUKernel::Mul(int task_id) { auto input_addr0 = reinterpret_cast(split_ptr_.at(0)); auto input_addr1 = reinterpret_cast(sigmoid_ptr_); auto output_addr = reinterpret_cast(out_tensors_.at(0)->data_c()); - auto length = in_tensors_.at(0)->ElementsNum() / 2; + auto length = in_tensors_.at(0)->ElementsNum() / kGluBranchNum; int stride = UP_DIV(length, op_parameter_->thread_num_); int count = MSMIN(stride, length - stride * task_id); diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/group_convolution_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/group_convolution_fp32.cc index af5a737fa6c..b2e8cb45248 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/group_convolution_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/group_convolution_fp32.cc @@ -64,6 +64,8 @@ int GroupConvolutionFp32CPUKernel::PostConcat(int group_id) { } int GroupConvolutionFp32CPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (group_conv_creator_ == nullptr) { return lite::RET_ERROR; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/gru_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/gru_fp32.cc index 90b522ffaee..14e32ba9113 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/gru_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/gru_fp32.cc @@ -162,6 +162,8 @@ int GruCPUKernel::InitStateWeightBias() { } int GruCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_5D); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc index 9fca05f9231..f18162d392b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/instance_norm_fp32.cc @@ -27,6 +27,8 @@ using mindspore::schema::PrimitiveType_InstanceNorm; namespace mindspore::kernel { int InstanceNormCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_3D); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/invert_permutation_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/invert_permutation_fp32.cc index 79aa1154c13..9899da70145 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/invert_permutation_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/invert_permutation_fp32.cc @@ -27,6 +27,8 @@ using mindspore::schema::PrimitiveType_InvertPermutation; namespace mindspore::kernel { int InvertPermutationCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm_fp32.cc index eb0228e7eaa..b48390605b7 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/l2_norm_fp32.cc @@ -31,6 +31,8 @@ namespace { const int kMaxThreadNum = 8; } int L2NormCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc index 76d743b9511..d12609e0936 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/layer_norm_fp32.cc @@ -27,6 +27,8 @@ using mindspore::schema::PrimitiveType_LayerNormFusion; namespace mindspore::kernel { int LayerNormCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_3D); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm_fp32.cc index dd40b54c12c..58167000119 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/local_response_norm_fp32.cc @@ -27,7 +27,11 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_LRN; namespace mindspore::kernel { -int LocalResponseNormCPUKernel::Init() { return RET_OK; } +int LocalResponseNormCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); + return RET_OK; +} int LocalResponseNormCPUKernel::ReSize() { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/lstm_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/lstm_fp32.cc index cb0e4ec6b1c..6988376f45e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/lstm_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/lstm_fp32.cc @@ -212,6 +212,8 @@ int LstmCPUKernel::InitParam() { } int LstmCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_6D); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.cc index cc5e90791f4..82a0a977165 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32.cc @@ -48,6 +48,8 @@ void MatmulCPUKernel::InitShapeB() { } int MatmulCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); MatmulFp32BaseCPUKernel::InitParameter(); if (params_->a_const_ == true) { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32_base.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32_base.cc index 0b872af2c7d..f1e6da7ac25 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32_base.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/matmul_fp32_base.cc @@ -78,7 +78,8 @@ int MatmulFp32BaseCPUKernel::InitBufferA() { if (op_parameter_->is_train_session_) { a_pack_ptr_ = reinterpret_cast(workspace()); } else { - a_pack_ptr_ = reinterpret_cast(ms_context_->allocator->Malloc(matrix_a_pack_size_ * sizeof(float))); + a_pack_ptr_ = + reinterpret_cast(ms_context_->allocator->Malloc(matrix_a_pack_size_ * static_cast(sizeof(float)))); } if (a_pack_ptr_ == nullptr) { MS_LOG(ERROR) << "malloc a_pack_ptr_ failed"; @@ -94,7 +95,8 @@ int MatmulFp32BaseCPUKernel::InitBufferB() { if (op_parameter_->is_train_session_) { b_pack_ptr_ = reinterpret_cast(workspace()) + matrix_a_pack_size_; } else { - b_pack_ptr_ = reinterpret_cast(ms_context_->allocator->Malloc(matrix_b_pack_size_ * sizeof(float))); + b_pack_ptr_ = + reinterpret_cast(ms_context_->allocator->Malloc(matrix_b_pack_size_ * static_cast(sizeof(float)))); } if (b_pack_ptr_ == nullptr) { MS_LOG(ERROR) << "malloc b_pack_ptr_ failed"; @@ -128,7 +130,7 @@ int MatmulFp32BaseCPUKernel::InitBiasData() { auto bias_tensor = in_tensors_[2]; int max_bias_data = UP_ROUND(bias_tensor->ElementsNum(), col_tile_); // malloc addr need to aligned to 32 bytes - bias_ptr_ = reinterpret_cast(malloc(max_bias_data * sizeof(float))); + bias_ptr_ = reinterpret_cast(malloc(max_bias_data * static_cast(sizeof(float)))); if (bias_ptr_ == nullptr) { MS_LOG(ERROR) << "malloc bias_ptr_ failed"; return RET_ERROR; @@ -142,8 +144,8 @@ int MatmulFp32BaseCPUKernel::InitBiasData() { bias_ptr_[i] = broadcast_data; } } else { - memset(bias_ptr_, 0, max_bias_data * sizeof(float)); - memcpy(bias_ptr_, bias_tensor->data_c(), bias_tensor->ElementsNum() * sizeof(float)); + memset(bias_ptr_, 0, max_bias_data * static_cast(sizeof(float))); + memcpy(bias_ptr_, bias_tensor->data_c(), bias_tensor->ElementsNum() * static_cast(sizeof(float))); } } return RET_OK; @@ -151,7 +153,7 @@ int MatmulFp32BaseCPUKernel::InitBiasData() { int MatmulFp32BaseCPUKernel::InitMatrixA(const float *src_ptr) { if (vec_matmul_) { - memcpy(a_pack_ptr_, src_ptr, params_->batch * params_->deep_ * sizeof(float)); + memcpy(a_pack_ptr_, src_ptr, params_->batch * params_->deep_ * static_cast(sizeof(float))); return RET_OK; } @@ -176,9 +178,9 @@ int MatmulFp32BaseCPUKernel::InitMatrixB(const float *src_ptr) { #ifdef ENABLE_AVX RowMajor2Col32Major(src_data, dst, params_->deep_, params_->col_); #elif defined(ENABLE_ARM64) - memcpy(dst, src_data, params_->col_ * params_->deep_ * sizeof(float)); + memcpy(dst, src_data, params_->col_ * params_->deep_ * static_cast(sizeof(float))); #else - memcpy(dst, src_data, params_->col_ * params_->deep_ * sizeof(float)); + memcpy(dst, src_data, params_->col_ * params_->deep_ * static_cast(sizeof(float))); #endif } else { #ifdef ENABLE_AVX @@ -270,6 +272,8 @@ int MatmulFp32BaseCPUKernel::FloatRun(int task_id) const { } int MatmulFp32BaseCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); #ifdef ENABLE_AVX matrix_a_pack_fun_ = params_->a_transpose_ ? RowMajor2Row6Major : RowMajor2Col6Major; matrix_b_pack_fun_ = params_->b_transpose_ ? RowMajor2Col16Major : RowMajor2Row16Major; @@ -317,12 +321,14 @@ int MatmulFp32BaseCPUKernel::Init() { // only copy weight data // resize or run to pack auto b_tensor = in_tensors_.at(1); - src_b_ = reinterpret_cast(malloc(params_->batch * params_->deep_ * params_->col_ * sizeof(float))); + src_b_ = reinterpret_cast( + malloc(params_->batch * params_->deep_ * params_->col_ * static_cast(sizeof(float)))); if (src_b_ == nullptr) { MS_LOG(ERROR) << "matmul fp16 src_b_ is failed!"; return RET_ERROR; } - memcpy(src_b_, b_tensor->data_c(), params_->batch * params_->deep_ * params_->col_ * sizeof(float)); + memcpy(src_b_, b_tensor->data_c(), + params_->batch * params_->deep_ * params_->col_ * static_cast(sizeof(float))); } return RET_OK; } @@ -344,7 +350,7 @@ int MatmulFp32BaseCPUKernel::ReSize() { return RET_ERROR; } if (op_parameter_->is_train_session_) { - set_workspace_size((matrix_a_pack_size_ + matrix_b_pack_size_) * sizeof(float)); + set_workspace_size((matrix_a_pack_size_ + matrix_b_pack_size_) * static_cast(sizeof(float))); } if (params_->b_const_ && src_b_ != nullptr) { @@ -380,8 +386,8 @@ int MatmulFp32BaseCPUKernel::InitTmpOutBuffer() { int out_channel = params_->col_; int oc_block_num = UP_DIV(out_channel, col_tile_); MS_ASSERT(ms_context_->allocator != nullptr); - output_data_ = reinterpret_cast( - ms_context_->allocator->Malloc(params_->batch * params_->row_ * oc_block_num * col_tile_ * sizeof(float))); + output_data_ = reinterpret_cast(ms_context_->allocator->Malloc( + params_->batch * params_->row_ * oc_block_num * col_tile_ * static_cast(sizeof(float)))); if (output_data_ == nullptr) { MS_LOG(ERROR) << "malloc tmp output data failed."; return RET_NULL_PTR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/non_max_suppression_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/non_max_suppression_fp32.cc index a2bffd2cffa..32c7795d5bc 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/non_max_suppression_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/non_max_suppression_fp32.cc @@ -47,6 +47,8 @@ constexpr int kBoxPointNum = 4; } // namespace int NonMaxSuppressionCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); // boxes, scores, max_output_boxes, iou_threshold, score_threshold if (in_tensors_.size() < kMinInputsSize || in_tensors_.size() > kMaxInputsSize || out_tensors_.size() != kOutputNum) { MS_LOG(ERROR) << "NonMaxSuppression input size should be in [" << kMinInputsSize << ", " << kMaxInputsSize << "]" @@ -245,7 +247,16 @@ int NonMaxSuppressionCPUKernel::Run() { return RET_ERROR; } - return Run_Selecte(simple_out, box_num, batch_num, class_num, scores_data, box_data); + auto ret = Run_Selecte(simple_out, box_num, batch_num, class_num, scores_data, box_data); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Run_Selecte failed"; + return RET_ERROR; + } + + for (auto *output : this->out_tensors()) { + output->ResetRefCount(); + } + return ret; } REG_KERNEL(kCPU, kNumberTypeFloat32, PrimitiveType_NonMaxSuppression, LiteKernelCreator) diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/nonzero_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/nonzero_fp32.cc index 60f267d1efc..5139aba76f2 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/nonzero_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/nonzero_fp32.cc @@ -28,6 +28,8 @@ using mindspore::schema::PrimitiveType_NonZero; namespace mindspore::kernel { int NonZeroCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.cc index 8d01d2fe911..f3b6c1d0295 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.cc @@ -28,9 +28,12 @@ using mindspore::schema::PrimitiveType_PadFusion; namespace mindspore::kernel { namespace { constexpr size_t kMirrorPadInputSize = 2; -constexpr size_t kPadMaxInputSize = 2; +constexpr size_t kPadCommonInputSize = 2; +constexpr size_t kPadMaxInputSize = 3; } // namespace int PadCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } @@ -40,30 +43,30 @@ int PadCPUKernel::Init() { int PadCPUKernel::ReSize() { auto input = in_tensors_.at(0); auto rank = input->shape().size(); - if (rank > COMM_SHAPE_SIZE) { - MS_LOG(ERROR) << "Pad input rank should <= " << COMM_SHAPE_SIZE << ", got " << rank; + if (rank > DEFAULT_PAD_NDIMS) { + MS_LOG(ERROR) << "Pad input rank should <= " << DEFAULT_PAD_NDIMS << ", got " << rank; return RET_ERROR; } auto output = out_tensors_.at(0); if (pad_param_->pad_mode_ == static_cast(schema::PaddingMode_CONSTANT)) { - auto ret = ExtendShape(in_, COMM_SHAPE_SIZE, input->shape().data(), rank); + auto ret = ExtendShape(in_, DEFAULT_PAD_NDIMS, input->shape().data(), rank); if (ret != RET_OK) { return ret; } - ret = ExtendShape(out_, COMM_SHAPE_SIZE, output->shape().data(), rank); + ret = ExtendShape(out_, DEFAULT_PAD_NDIMS, output->shape().data(), rank); if (ret != RET_OK) { return ret; } - if (pad_param_->padding_length < MAX_SHAPE_SIZE) { - int ori_paddings[MAX_SHAPE_SIZE]; + if (pad_param_->padding_length < MAX_PAD_SIZE) { + int ori_paddings[MAX_PAD_SIZE]; for (auto i = 0; i < pad_param_->padding_length; ++i) { ori_paddings[i] = pad_param_->paddings_[i]; } - ret = ExtendPaddings(pad_param_->paddings_, MAX_SHAPE_SIZE, ori_paddings, pad_param_->padding_length); + ret = ExtendPaddings(pad_param_->paddings_, MAX_PAD_SIZE, ori_paddings, pad_param_->padding_length); if (ret != RET_OK) { return ret; } - pad_param_->padding_length = MAX_SHAPE_SIZE; + pad_param_->padding_length = MAX_PAD_SIZE; } } return RET_OK; @@ -71,19 +74,17 @@ int PadCPUKernel::ReSize() { void PadCPUKernel::InitMirrorPadBlock() { mirror_pad_block_.clear(); - std::vector left_pads(COMM_SHAPE_SIZE); - for (size_t i = 0; i < COMM_SHAPE_SIZE; ++i) { + std::vector left_pads(DEFAULT_PAD_NDIMS); + for (size_t i = 0; i < DEFAULT_PAD_NDIMS; ++i) { left_pads[i] = pad_param_->paddings_[2 * i]; } - std::vector input_separate_dims; std::vector output_separate_dims; std::vector separate_offset; - /* init separate dims */ int cur_input = 1; int cur_output = 1; - for (size_t i = 0; i < COMM_SHAPE_SIZE; ++i) { + for (size_t i = 0; i < DEFAULT_PAD_NDIMS; ++i) { if (cur_input > 1) { input_separate_dims.emplace_back(cur_input); output_separate_dims.emplace_back(cur_output); @@ -100,22 +101,18 @@ void PadCPUKernel::InitMirrorPadBlock() { output_separate_dims.emplace_back(cur_output); separate_offset.emplace_back(0); } - /* init separate stride */ std::vector output_separate_stride; output_separate_stride.resize(output_separate_dims.size()); GetStride(output_separate_stride.data(), output_separate_dims.data(), output_separate_dims.size()); - /* init separate stride */ std::vector remain_stride; remain_stride.resize(0); int remain_size = GetStride(remain_stride.data(), output_separate_dims.data(), remain_stride.size()); - std::vector right_pads(separate_offset.size()); for (size_t i = 0; i < right_pads.size(); ++i) { right_pads[i] = output_separate_dims[i] - input_separate_dims[i] - separate_offset[i]; } - /* init pad region */ std::vector pad_region; for (size_t i = remain_stride.size(); i < output_separate_stride.size(); ++i) { @@ -129,30 +126,27 @@ void PadCPUKernel::InitMirrorPadBlock() { } pad_region.emplace_back(r); } - std::vector pad_region_stride(pad_region.size()); int region_size = GetStride(pad_region_stride.data(), pad_region.data(), pad_region.size()); - int remain_dim_offset = remain_stride.size(); - + int remain_dim_offset = static_cast(remain_stride.size()); std::vector pad_cord(pad_region.size()); - for (int pos = 0; pos < remain_size; ++pos) { const int dst_basic_offset = 0; - for (int index = 1; index < region_size; ++index) { int dst_offset = dst_basic_offset; - int value = index; for (size_t i = 0; i < pad_region.size() && pad_region_stride[i] != 0; ++i) { pad_cord[i] = value / pad_region_stride[i]; value = value % pad_region_stride[i]; } - MirrorPadBlock block; - const int size_offset = COMM_SHAPE_SIZE - static_cast(pad_region.size()); + const int size_offset = DEFAULT_PAD_NDIMS - static_cast(pad_region.size()); for (size_t i = 0; i < pad_region.size(); ++i) { int di = size_offset + i; int si = remain_dim_offset + i; + if (di > DEFAULT_PAD_NDIMS) { + continue; + } switch (pad_cord[i]) { case 0: dst_offset += separate_offset[si] * output_separate_stride[si]; @@ -182,7 +176,6 @@ void PadCPUKernel::InitMirrorPadBlock() { mirror_pad_block_.push_back(std::move(block)); } } - return; } int PadCPUKernel::ExtendShape(int *shape, int length, const int *ori_shape, int rank) const { @@ -257,7 +250,7 @@ int PadCPUKernel::RunMirrorPadImpl(int task_id) { Pad(input_data, output_data, in_, out_, pad_param_->paddings_, task_id, op_parameter_->thread_num_); /* calculate region part */ - for (size_t i = task_id; i < mirror_pad_block_.size(); i += op_parameter_->thread_num_) { + for (size_t i = task_id; i < mirror_pad_block_.size(); i += static_cast(op_parameter_->thread_num_)) { auto block = mirror_pad_block_[i]; for (int a = 0; a < block.size_[0]; a++) { @@ -265,8 +258,14 @@ int PadCPUKernel::RunMirrorPadImpl(int task_id) { for (int b = 0; b < block.size_[1]; b++) { int out_b_index = out_a_index + b * block.out_stride_[1]; for (int c = 0; c < block.size_[2]; ++c) { - int output_index = out_b_index + c * block.out_stride_[2]; - MirrorPad(input_data, output_data, in_, pad_param_, output_index, output_index + block.size_[3]); + int out_c_index = out_b_index + c * block.out_stride_[2]; + for (int d = 0; d < block.size_[3]; ++d) { + int out_d_index = out_c_index + d * block.out_stride_[3]; + for (int e = 0; e < block.size_[4]; ++e) { + int output_index = out_d_index + e * block.out_stride_[4]; + MirrorPad(input_data, output_data, in_, pad_param_, output_index, output_index + block.size_[5]); + } + } } } } @@ -282,7 +281,7 @@ int PadCPUKernel::RunMirrorPadImpl(int task_id) { return RET_OK; } -int PadCPUKernel::CheckPaddings(int *paddings, int length, int *input_shape, int mode) { +int PadCPUKernel::CheckPaddings(const int *paddings, int length, const int *input_shape, int mode) { if (paddings == nullptr || input_shape == nullptr) { return RET_NULL_PTR; } @@ -310,8 +309,8 @@ int PadCPUKernel::CheckPaddings(int *paddings, int length, int *input_shape, int } int PadCPUKernel::CopyPaddingFromInput() { - if (in_tensors_.size() != kMirrorPadInputSize) { - MS_LOG(ERROR) << "Pad Reflect or Symmetric mode need 2 inputs, got " << in_tensors_.size(); + if (in_tensors_.size() < kMirrorPadInputSize) { + MS_LOG(ERROR) << "Pad Reflect or Symmetric mode need at least 2 inputs, got " << in_tensors_.size(); return RET_ERROR; } auto padding_tensor = in_tensors_.at(1); @@ -327,28 +326,28 @@ int PadCPUKernel::CopyPaddingFromInput() { return RET_ERROR; } - auto ret = ExtendShape(in_, COMM_SHAPE_SIZE, input_shape.data(), rank); + auto ret = ExtendShape(in_, DEFAULT_PAD_NDIMS, input_shape.data(), rank); if (ret != RET_OK) { return ret; } - ret = ExtendPaddings(pad_param_->paddings_, MAX_SHAPE_SIZE, paddings, padding_tensor->ElementsNum()); + ret = ExtendPaddings(pad_param_->paddings_, MAX_PAD_SIZE, paddings, padding_tensor->ElementsNum()); if (ret != RET_OK) { return ret; } - pad_param_->padding_length = MAX_SHAPE_SIZE; + pad_param_->padding_length = MAX_PAD_SIZE; return RET_OK; } void PadCPUKernel::CalculateStrides() { - pad_param_->in_strides[COMM_SHAPE_SIZE - 1] = 1; - for (auto i = COMM_SHAPE_SIZE - 2; i >= 0; --i) { + pad_param_->in_strides[DEFAULT_PAD_NDIMS - 1] = 1; + for (auto i = DEFAULT_PAD_NDIMS - 2; i >= 0; --i) { pad_param_->in_strides[i] = in_[i + 1] * pad_param_->in_strides[i + 1]; } - for (auto i = 0; i < COMM_SHAPE_SIZE; ++i) { + for (auto i = 0; i < DEFAULT_PAD_NDIMS; ++i) { out_[i] = in_[i] + pad_param_->paddings_[i * 2] + pad_param_->paddings_[i * 2 + 1]; } - pad_param_->out_strides[COMM_SHAPE_SIZE - 1] = 1; - for (auto i = COMM_SHAPE_SIZE - 2; i >= 0; --i) { + pad_param_->out_strides[DEFAULT_PAD_NDIMS - 1] = 1; + for (auto i = DEFAULT_PAD_NDIMS - 2; i >= 0; --i) { pad_param_->out_strides[i] = out_[i + 1] * pad_param_->out_strides[i + 1]; } } @@ -358,7 +357,7 @@ int PadCPUKernel::HandleMirrorPad() { if (in_tensors_.size() == 1) { auto input_shape = in_tensors_.at(0)->shape(); int rank = static_cast(input_shape.size()); - ret = ExtendShape(in_, COMM_SHAPE_SIZE, input_shape.data(), rank); + ret = ExtendShape(in_, DEFAULT_PAD_NDIMS, input_shape.data(), rank); if (ret != RET_OK) { return ret; } @@ -368,7 +367,7 @@ int PadCPUKernel::HandleMirrorPad() { return ret; } } - ret = CheckPaddings(pad_param_->paddings_, COMM_SHAPE_SIZE, in_, pad_param_->pad_mode_); + ret = CheckPaddings(pad_param_->paddings_, DEFAULT_PAD_NDIMS, in_, pad_param_->pad_mode_); if (ret != RET_OK) { return ret; } @@ -391,18 +390,21 @@ int PadCPUKernel::Run() { } int error_code = 0; if (pad_param_->pad_mode_ == static_cast(schema::PaddingMode_CONSTANT)) { - if (in_tensors_.size() == kPadMaxInputSize) { + if (in_tensors_.size() >= kPadCommonInputSize) { error_code = CopyPaddingFromInput(); if (error_code != RET_OK) { MS_LOG(ERROR) << "Pad run error, error_code[" << error_code << "]"; return RET_ERROR; } } + if (in_tensors_.size() == kPadMaxInputSize) { + pad_param_->constant_value_ = reinterpret_cast(in_tensors_.at(2)->data_c())[0]; + } auto output = out_tensors_.at(0); int output_size = output->ElementsNum(); auto output_data = reinterpret_cast(output->data_c()); if (abs(pad_param_->constant_value_ - 0.0f) < 1e-5) { - memset(output_data, 0, output_size * sizeof(float)); + memset(output_data, 0, static_cast(output_size) * sizeof(float)); } else { for (auto i = 0; i < output_size; ++i) { output_data[i] = pad_param_->constant_value_; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.h index 97ff8ae7802..aaa5e59cb80 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/pad_fp32.h @@ -45,7 +45,7 @@ class PadCPUKernel : public InnerKernel { virtual int RunMirrorPadImpl(int task_id); private: - int CheckPaddings(int *paddings, int length, int *input_shape, int mode); + int CheckPaddings(const int *paddings, int length, const int *input_shape, int mode); void CalculateStrides(); int ExtendShape(int *shape, int length, const int *ori_shape, int rank) const; int ExtendPaddings(int *paddings, int length, const int *ori_paddings, int ori_length) const; @@ -55,8 +55,8 @@ class PadCPUKernel : public InnerKernel { int HandleMirrorPad(); int CopyPaddingFromInput(); PadParameter *pad_param_ = nullptr; - int in_[4] = {0}; - int out_[4] = {0}; + int in_[DEFAULT_PAD_NDIMS] = {0}; + int out_[DEFAULT_PAD_NDIMS] = {0}; std::vector mirror_pad_block_; }; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/pooling_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/pooling_fp32.cc index c9d6819b3ae..4e84593cf88 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/pooling_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/pooling_fp32.cc @@ -30,6 +30,8 @@ using mindspore::schema::PrimitiveType_MaxPoolFusion; namespace mindspore::kernel { int PoolingCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); auto ret = PoolingBaseCPUKernel::Init(); if (ret != RET_OK) { MS_LOG(ERROR) << "PoolingBase Init failed."; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/power_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/power_fp32.cc index b02ea7881db..f6666a734bf 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/power_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/power_fp32.cc @@ -25,7 +25,11 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_PowFusion; namespace mindspore::kernel { -int PowerCPUKernel::Init() { return RET_OK; } +int PowerCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); + return RET_OK; +} int PowerCPUKernel::ReSize() { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/prelu_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/prelu_fp32.cc index f9dccae7138..c7e9da9cbf6 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/prelu_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/prelu_fp32.cc @@ -37,6 +37,8 @@ static int PReluRun(void *cdata, int task_id, float lhs_scale, float rhs_scale) } int PReluCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (in_tensors_[1]->ElementsNum() == 1) { prelu_param_->channelShared = true; } else { diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/range_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/range_fp32.cc index 7e3da02c0ef..45e1bcf1b24 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/range_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/range_fp32.cc @@ -28,6 +28,8 @@ using mindspore::schema::PrimitiveType_Range; namespace mindspore::kernel { int RangeCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/rank_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/rank_fp32.cc index 561bde27f72..aef350b3247 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/rank_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/rank_fp32.cc @@ -27,7 +27,11 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_Rank; namespace mindspore::kernel { -int RankCPUKernel::Init() { return RET_OK; } +int RankCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); + return RET_OK; +} int RankCPUKernel::ReSize() { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce_fp32.cc index c8824e3e57f..fabd47c76ce 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/reduce_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reduce_fp32.cc @@ -40,6 +40,8 @@ using mindspore::schema::ReduceMode_ReduceSumSquare; namespace mindspore::kernel { int ReduceCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); auto ret = ReduceBaseCPUKernel::Init(); if (ret != RET_OK) { return ret; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/relative_position_attention_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/relative_position_attention_fp32.cc index cb7dce95b27..087e08ed776 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/relative_position_attention_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/relative_position_attention_fp32.cc @@ -687,6 +687,8 @@ void RelativePositionAttentionCPUKernel::FreeAllPackData() { } int RelativePositionAttentionCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_11D); + CHECK_LESS_RETURN(out_tensors_.size(), 1); auto ret = CheckWeights(); if (ret != RET_OK) { MS_LOG(ERROR) << "CheckWeights failed."; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/resize_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/resize_fp32.cc index fbcd53ba4ad..f2a79f05881 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/resize_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/resize_fp32.cc @@ -37,6 +37,8 @@ constexpr int kResizeSizeDouble = 2; } // namespace int ResizeCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); auto ret = ResizeBaseCPUKernel::Init(); if (ret != RET_OK) { return ret; @@ -115,29 +117,30 @@ int ResizeCPUKernel::MallocTmpBuffer() { // malloc memory for x, y coordinates { - coordinate_.x_lefts_ = reinterpret_cast(malloc(sizeof(int) * x_len)); + coordinate_.x_lefts_ = reinterpret_cast(malloc(static_cast(sizeof(int)) * x_len)); CHECK_MALLOC_RES(coordinate_.x_lefts_, RET_NULL_PTR) - coordinate_.y_tops_ = reinterpret_cast(malloc(sizeof(int) * y_len)); + coordinate_.y_tops_ = reinterpret_cast(malloc(static_cast(sizeof(int)) * y_len)); CHECK_MALLOC_RES(coordinate_.y_tops_, RET_NULL_PTR) if (method_ == static_cast(schema::ResizeMethod_LINEAR)) { - coordinate_.x_rights_ = reinterpret_cast(malloc(sizeof(int) * x_len)); + coordinate_.x_rights_ = reinterpret_cast(malloc(static_cast(sizeof(int)) * x_len)); CHECK_MALLOC_RES(coordinate_.x_rights_, RET_NULL_PTR) - coordinate_.y_bottoms_ = reinterpret_cast(malloc(sizeof(int) * y_len)); + coordinate_.y_bottoms_ = reinterpret_cast(malloc(static_cast(sizeof(int)) * y_len)); CHECK_MALLOC_RES(coordinate_.y_bottoms_, RET_NULL_PTR) } } // malloc memory for weights of x, y axes { - x_weights_ = reinterpret_cast(malloc(sizeof(float) * x_weight_len)); + x_weights_ = reinterpret_cast(malloc(static_cast(sizeof(float)) * x_weight_len)); CHECK_MALLOC_RES(x_weights_, RET_NULL_PTR) - y_weights_ = reinterpret_cast(malloc(sizeof(float) * y_weight_len)); + y_weights_ = reinterpret_cast(malloc(static_cast(sizeof(float)) * y_weight_len)); CHECK_MALLOC_RES(y_weights_, RET_NULL_PTR) } { - line_buffer_ = reinterpret_cast( - malloc(sizeof(float) * x_len * in_tensors_.at(0)->Channel() * kResizeSizeDouble * op_parameter_->thread_num_)); + line_buffer_ = + reinterpret_cast(malloc(static_cast(sizeof(float)) * x_len * in_tensors_.at(0)->Channel() * + kResizeSizeDouble * op_parameter_->thread_num_)); CHECK_MALLOC_RES(line_buffer_, RET_NULL_PTR) } return RET_OK; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_fp32.cc index fe42dac2a97..4c0d98570d7 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_fp32.cc @@ -29,7 +29,7 @@ using mindspore::schema::PrimitiveType_ReverseV2; namespace mindspore::kernel { int ReverseCPUKernel::Stride(int index) { int stride = 1; - for (size_t i = index + 1; i < in_tensors_.at(0)->shape().size(); ++i) { + for (size_t i = static_cast(index) + 1; i < in_tensors_.at(0)->shape().size(); ++i) { stride *= in_tensors_.at(0)->shape().at(i); } return stride; @@ -63,12 +63,12 @@ int ReverseCPUKernel::ReSize() { free(tmp_); tmp_ = nullptr; } - tmp_ = reinterpret_cast(malloc(data_size_ * sizeof(int))); + tmp_ = reinterpret_cast(malloc(data_size_ * static_cast(sizeof(int)))); if (tmp_ == nullptr) { MS_LOG(ERROR) << "Reverse Malloc tmp_ error!"; return RET_ERROR; } - (void)memset(tmp_, 0, data_size_ * sizeof(int)); + (void)memset(tmp_, 0, data_size_ * static_cast(sizeof(int))); for (int i = 0; i < param->num_axis_; i++) { int axis = param->axis_[i]; @@ -98,6 +98,8 @@ int ReverseCPUKernel::ReSize() { } int ReverseCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } @@ -142,7 +144,7 @@ int ReverseCPUKernel::Run() { void ReverseCPUKernel::UpdateAxisInfo() { auto reverse_param = reinterpret_cast(op_parameter_); - int in_shape_len = in_tensors_.front()->shape().size(); + int in_shape_len = static_cast(in_tensors_.front()->shape().size()); for (int i = 0; i < reverse_param->num_axis_; ++i) { if (reverse_param->axis_[i] < 0) { reverse_param->axis_[i] += in_shape_len; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence_fp32.cc index 765c9d362c7..fd211394654 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/reverse_sequence_fp32.cc @@ -24,6 +24,8 @@ using mindspore::schema::PrimitiveType_ReverseSequence; namespace mindspore::kernel { int ReverseSequenceCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling_fp32.cc index 4125e856042..4744e78952c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/roi_pooling_fp32.cc @@ -29,6 +29,8 @@ using mindspore::schema::PrimitiveType_ROIPooling; namespace mindspore::kernel { int ROIPoolingCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } @@ -42,7 +44,7 @@ int ROIPoolingCPUKernel::ReSize() { } auto in_shape = in_tensors_.front()->shape(); auto out_shape = out_tensors_.front()->shape(); - int ndims = in_shape.size(); + int ndims = static_cast(in_shape.size()); if (ndims < C4NUM) { MS_LOG(ERROR) << "ROIPooling in_shape.size() error ,shape dim greater than or equal to 4!"; return RET_ERROR; @@ -67,7 +69,7 @@ int ROIPoolingCPUKernel::ReSize() { param_->out_strides_[i] = out_shape.at(i + 1) * param_->out_strides_[i + 1]; } param_->thread_num_ = MSMIN(param_->op_parameter_.thread_num_, out_shape.at(0)); - max_c_ = reinterpret_cast(malloc(param_->input_c_ * sizeof(float))); + max_c_ = reinterpret_cast(malloc(param_->input_c_ * static_cast(sizeof(float)))); if (max_c_ == nullptr) { MS_LOG(ERROR) << "malloc max_c failed."; return RET_MEMORY_FAILED; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/scale_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/scale_fp32.cc index 9448ac4fa2c..0542f9b2489 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/scale_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scale_fp32.cc @@ -117,10 +117,8 @@ int ScaleCPUKernel::CalculateParameter() { } int ScaleCPUKernel::Init() { - if (in_tensors_.size() < 2 || in_tensors_.size() > 3) { - MS_LOG(ERROR) << "inputs to Scale operator should be 2 or 3, but " << in_tensors_.size() << " is given."; - return RET_ERROR; - } + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); auto ret = InitScaleOffset(); if (ret != RET_OK) { MS_LOG(ERROR) << "Scale fp32 InitScaleOffset failed."; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd_fp32.cc index 20304f80dd1..f9afe0a5427 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/scatter_nd_fp32.cc @@ -34,6 +34,8 @@ constexpr int kScatterIndicesIndex = 1; constexpr int kScatterUpdateIndex = 2; } // namespace int ScatterNDCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_3D); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/shape_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/shape_fp32.cc index 38f8be7ed8c..9392ca914e9 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/shape_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/shape_fp32.cc @@ -26,7 +26,11 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_Shape; namespace mindspore::kernel { -int ShapeCPUKernel::Init() { return RET_OK; } +int ShapeCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); + return RET_OK; +} int ShapeCPUKernel::ReSize() { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/size_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/size_fp32.cc index cd056de6684..25e3d22ecea 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/size_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/size_fp32.cc @@ -25,7 +25,11 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_Size; namespace mindspore::kernel { -int SizeCPUKernel::Init() { return RET_OK; } +int SizeCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); + return RET_OK; +} int SizeCPUKernel::ReSize() { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/softmax_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/softmax_fp32.cc index 2518347add1..9654b6db0ed 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/softmax_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/softmax_fp32.cc @@ -30,6 +30,8 @@ using mindspore::schema::PrimitiveType_Softmax; namespace mindspore::kernel { int SoftmaxCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); auto ret = SoftmaxBaseCPUKernel::Init(); if (ret != RET_OK) { return ret; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch_fp32.cc index b76872a21a0..4a1b974a589 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_batch_fp32.cc @@ -53,6 +53,8 @@ void SpaceToBatchCPUKernel::ProcessInput() { } int SpaceToBatchCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth_fp32.cc index b3f1cfddc68..d895051791b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/space_to_depth_fp32.cc @@ -32,6 +32,8 @@ using mindspore::schema::PrimitiveType_SpaceToDepth; namespace mindspore::kernel { int SpaceToDepthCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); SpaceToDepthParameter *param = reinterpret_cast(op_parameter_); if (param->block_size_ <= 0) { MS_LOG(ERROR) << "Input block_size should > 0!"; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense_fp32.cc index 819ae42388a..2f4db92acc9 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/sparse_to_dense_fp32.cc @@ -30,6 +30,8 @@ using mindspore::schema::PrimitiveType_SparseToDense; namespace mindspore::kernel { int SparseToDenseCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), DIMENSION_3D); + CHECK_LESS_RETURN(out_tensors_.size(), 1); auto input2 = in_tensors_.at(2); MS_ASSERT(input2); auto input3 = in_tensors_.at(3); @@ -49,7 +51,7 @@ int SparseToDenseCPUKernel::ReSize() { auto output0 = out_tensors_.at(0); std::vector out_shape_tensor = output0->shape(); auto output_shape_tmp = reinterpret_cast(out_shape_tensor.data()); - int output_dim = output0->shape().size(); + int output_dim = static_cast(output0->shape().size()); for (int i = 0; i < DIMENSION_4D - output_dim; i++) { output_shape[i] = 1; } @@ -98,12 +100,13 @@ int SparseToDenseCPUKernel::GenerateIndices() { MS_LOG(ERROR) << "Input dim is invalid, dim: " << index_num; return RET_ERROR; } - sparse_indices_vect = reinterpret_cast(ctx_->allocator->Malloc(sizeof(int *) * index_num)); + sparse_indices_vect = + reinterpret_cast(ctx_->allocator->Malloc(sizeof(int *) * static_cast(index_num))); if (sparse_indices_vect == nullptr) { MS_LOG(ERROR) << "Null pointer reference: sparse_indices_vect."; return RET_ERROR; } - index_dim = input0->shape().size(); + index_dim = static_cast(input0->shape().size()); int *sparse_indices = reinterpret_cast(input0->MutableData()); switch (index_dim) { case 0: diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/splice_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/splice_fp32.cc index 9ecfb9d9743..40402526c03 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/splice_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/splice_fp32.cc @@ -27,7 +27,11 @@ using mindspore::lite::RET_OK; using mindspore::lite::RET_PARAM_INVALID; using mindspore::schema::PrimitiveType_Splice; namespace mindspore::kernel { -int SpliceCPUKernel::Init() { return RET_OK; } +int SpliceCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); + return RET_OK; +} int SpliceCPUKernel::ReSize() { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/topk_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/topk_fp32.cc index 71d4047a310..ad0940f3c1b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/topk_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/topk_fp32.cc @@ -25,6 +25,8 @@ using mindspore::schema::PrimitiveType_TopKFusion; namespace mindspore::kernel { int TopKCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); topk_param_->topk_node_list_ = nullptr; if (!InferShapeDone()) { return RET_OK; @@ -59,7 +61,8 @@ int TopKCPUKernel::Run() { MS_LOG(ERROR) << "The k value is out of the data size range."; return RET_ERROR; } - topk_param_->topk_node_list_ = ms_context_->allocator->Malloc(sizeof(TopkNode) * topk_param_->last_dim_size_); + topk_param_->topk_node_list_ = + ms_context_->allocator->Malloc(static_cast(sizeof(TopkNode)) * topk_param_->last_dim_size_); if (topk_param_->topk_node_list_ == nullptr) { MS_LOG(ERROR) << "Memory allocation failed"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc index 5aad5a12b8f..9562d2a4250 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.cc @@ -27,6 +27,8 @@ using mindspore::schema::PrimitiveType_Transpose; namespace mindspore::kernel { int TransposeCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), C2NUM); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } @@ -113,7 +115,7 @@ int TransposeCPUKernel::TransposeDimGreaterThan6(int task_id) { return RET_OK; } -void TransposeCPUKernel::GetNHNCTransposeFunc(lite::Tensor *in_tensor, lite::Tensor *out_tensor) { +void TransposeCPUKernel::GetNHNCTransposeFunc(const lite::Tensor *in_tensor, const lite::Tensor *out_tensor) { if (in_tensor->shape().size() != 4) { return; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.h b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.h index 6b3d1ee83c5..64df8a7f17f 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.h +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/transpose_fp32.h @@ -48,7 +48,7 @@ class TransposeCPUKernel : public InnerKernel { virtual int TransposeDim2to6(); virtual int TransposeDimGreaterThan6(int task_id); - void GetNHNCTransposeFunc(lite::Tensor *in_tensor, lite::Tensor *out_tensor); + void GetNHNCTransposeFunc(const lite::Tensor *in_tensor, const lite::Tensor *out_tensor); void *in_data_ = nullptr; void *out_data_ = nullptr; int *out_shape_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/uniform_real_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/uniform_real_fp32.cc index 1e5b39eff27..02d92a71c12 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/uniform_real_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/uniform_real_fp32.cc @@ -43,6 +43,13 @@ constexpr size_t kThirdDataIndex = 2; constexpr size_t kFourthDataIndex = 3; constexpr size_t kBitWidth = 32; constexpr size_t kPerSegNum = 4; +constexpr size_t kFirstDataStride = 1; +constexpr size_t kSecondDataStride = 2; +constexpr size_t kThirdDataStride = 3; +constexpr size_t kFirstRandNum = 0; +constexpr size_t kSecondRandNum = 1; +constexpr size_t kThirdRandNum = 2; +constexpr size_t kFourthRandNum = 3; } // namespace class PhiloxRandom { @@ -184,10 +191,10 @@ void GetPhiloxRandomFloat(float *data, size_t length, int seed, int seed2) { for (size_t i = 1; i < length / kPerSegNum; i++) { philoxRandom.Skip(0); randNum = philoxRandom.operator()(); - data[kPerSegNum * i] = uint32ToFloat(randNum[0]); - data[kPerSegNum * i + 1] = uint32ToFloat(randNum[1]); - data[kPerSegNum * i + 2] = uint32ToFloat(randNum[2]); - data[kPerSegNum * i + 3] = uint32ToFloat(randNum[3]); + data[kPerSegNum * i] = uint32ToFloat(randNum[kFirstRandNum]); + data[kPerSegNum * i + kFirstDataStride] = uint32ToFloat(randNum[kSecondRandNum]); + data[kPerSegNum * i + kSecondDataStride] = uint32ToFloat(randNum[kThirdRandNum]); + data[kPerSegNum * i + kThirdDataStride] = uint32ToFloat(randNum[kFourthRandNum]); } philoxRandom.Skip(0); randNum = philoxRandom.operator()(); @@ -197,7 +204,11 @@ void GetPhiloxRandomFloat(float *data, size_t length, int seed, int seed2) { } } -int UniformRealCPUKernel::Init() { return RET_OK; } +int UniformRealCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); + return RET_OK; +} int UniformRealCPUKernel::ReSize() { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/unique_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/unique_fp32.cc index f449770b373..024c1c459e3 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/unique_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/unique_fp32.cc @@ -23,7 +23,11 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_Unique; namespace mindspore::kernel { -int UniqueCPUKernel::Init() { return RET_OK; } +int UniqueCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); + return RET_OK; +} int UniqueCPUKernel::ReSize() { return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/unstack_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/unstack_fp32.cc index af8ed3aabb1..ffc7438dd00 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/unstack_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/unstack_fp32.cc @@ -24,6 +24,8 @@ using mindspore::schema::PrimitiveType_Unstack; namespace mindspore::kernel { int UnstackCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); if (!InferShapeDone()) { return RET_OK; } @@ -40,7 +42,7 @@ int UnstackCPUKernel::ReSize() { para->axis_dim_ = 1; para->after_dims_ = 1; if (para->axis_ < 0) { - para->axis_ += shape_size; + para->axis_ += static_cast(shape_size); } for (size_t i = 0; i < shape_size; i++) { @@ -73,7 +75,7 @@ int UnstackCPUKernel::Run() { } MS_ASSERT(output_addr_array_); auto para = reinterpret_cast(op_parameter_); - para->num_ = out_num; + para->num_ = static_cast(out_num); Unstack(input, output_addr_array_, para, sizeof(float)); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/where_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/where_fp32.cc index f1f6e19cd47..d3da01c8e34 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/where_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/where_fp32.cc @@ -32,6 +32,8 @@ namespace mindspore::kernel { constexpr uint32_t kSingleNum = 1; constexpr uint32_t kTripleNum = 3; int WhereCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); where_param_->op_parameter_.thread_num_ = thread_count_; return RET_OK; } @@ -69,11 +71,12 @@ int WhereCPUKernel::RunWithSingleInput() { MS_ASSERT(input); condition_ = reinterpret_cast(input->data_c()); where_param_->condition_num_ = input->ElementsNum(); - where_param_->rank_ = input->shape().size(); + where_param_->rank_ = static_cast(input->shape().size()); int strides[8]; ComputeStrides(in_tensors_.at(0)->shape().data(), strides, where_param_->rank_); - auto data = ms_context_->allocator->Malloc(where_param_->condition_num_ * where_param_->rank_ * sizeof(int32_t)); + auto data = ms_context_->allocator->Malloc(where_param_->condition_num_ * where_param_->rank_ * + static_cast(sizeof(int32_t))); if (data == nullptr) { MS_LOG(ERROR) << "macllov data is error!"; return RET_ERROR; @@ -104,7 +107,7 @@ int WhereCPUKernel::RunWithSingleInput() { MS_LOG(ERROR) << "malloc out tensor failed."; return RET_ERROR; } - memcpy(out_data, result, true_num * where_param_->rank_ * sizeof(int32_t)); + memcpy(out_data, result, true_num * where_param_->rank_ * static_cast(sizeof(int32_t))); ms_context_->allocator->Free(data); return RET_OK; } @@ -159,6 +162,9 @@ int WhereCPUKernel::Run() { if (ret != RET_OK) { MS_LOG(ERROR) << "Where op run failed."; } + for (auto *output : this->out_tensors()) { + output->ResetRefCount(); + } return ret; } diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/zeroslike_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/zeroslike_fp32.cc index 37e7c8ec376..a21d06b7781 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/zeroslike_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/zeroslike_fp32.cc @@ -27,7 +27,11 @@ using mindspore::lite::RET_OK; using mindspore::schema::PrimitiveType_ZerosLike; namespace mindspore::kernel { -int ZerosLikeCPUKernel::Init() { return RET_OK; } +int ZerosLikeCPUKernel::Init() { + CHECK_LESS_RETURN(in_tensors_.size(), 1); + CHECK_LESS_RETURN(out_tensors_.size(), 1); + return RET_OK; +} int ZerosLikeCPUKernel::Run() { auto output_data = reinterpret_cast(out_tensors_.at(0)->MutableData()); diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h index 7e42b5553d0..79f30b310ca 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/arithmetic_int8.h @@ -42,7 +42,7 @@ class ArithmeticInt8CPUKernel : public InnerKernel { int8_t *tile_data0_{nullptr}; int8_t *tile_data1_{nullptr}; ArithmeticRunInt8 arithmetic_run_{nullptr}; - ArithmeticQuantArg quant_args_; + ArithmeticQuantArg quant_args_ = {}; }; } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_ARITHMETIC_INT8_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc index cf4698196c1..c869ba490c9 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/batchnorm_int8.cc @@ -48,12 +48,12 @@ int BatchnormInt8CPUKernel::InitConstTensor() { auto mean_ptr = reinterpret_cast(mean->MutableData()); auto var_ptr = reinterpret_cast(variance->MutableData()); - alpha_addr_ = reinterpret_cast(malloc(mean->ElementsNum() * sizeof(float))); + alpha_addr_ = reinterpret_cast(malloc(static_cast(mean->ElementsNum()) * sizeof(float))); if (alpha_addr_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; } - beta_addr_ = reinterpret_cast(malloc(variance->ElementsNum() * sizeof(float))); + beta_addr_ = reinterpret_cast(malloc(static_cast(variance->ElementsNum()) * sizeof(float))); if (beta_addr_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; @@ -92,12 +92,12 @@ int BatchnormInt8CPUKernel::InitFusedConstTensor() { auto mean_ptr = reinterpret_cast(mean->MutableData()); auto var_ptr = reinterpret_cast(variance->MutableData()); - alpha_addr_ = reinterpret_cast(malloc(mean->ElementsNum() * sizeof(float))); + alpha_addr_ = reinterpret_cast(malloc(static_cast(mean->ElementsNum()) * sizeof(float))); if (alpha_addr_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; } - beta_addr_ = reinterpret_cast(malloc(variance->ElementsNum() * sizeof(float))); + beta_addr_ = reinterpret_cast(malloc(static_cast(variance->ElementsNum()) * sizeof(float))); if (beta_addr_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc index 017a674168e..e185ff3f9e5 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.cc @@ -59,11 +59,12 @@ int ConcatInt8CPUKernel::Init() { } int ConcatInt8CPUKernel::ReSize() { - concat_param_->axis_ = - concat_param_->axis_ >= 0 ? concat_param_->axis_ : in_tensors_.front()->shape().size() + concat_param_->axis_; + concat_param_->axis_ = concat_param_->axis_ >= 0 + ? concat_param_->axis_ + : static_cast(in_tensors_.front()->shape().size()) + concat_param_->axis_; auto input_num = in_tensors_.size(); - concat_param_->input_num_ = input_num; + concat_param_->input_num_ = static_cast(input_num); concat_param_->input_shapes_ = reinterpret_cast(malloc(sizeof(int *) * input_num)); if (concat_param_->input_shapes_ == nullptr) { MS_LOG(ERROR) << "malloc concat_param_->input_shapes_ failed."; @@ -97,7 +98,7 @@ int ConcatInt8CPUKernel::ReSize() { memcpy(reinterpret_cast(concat_param_->output_shapes_), output_tensor->shape().data(), sizeof(int) * output_dim); - for (size_t i = concat_param_->axis_ + 1; i < output_dim; i++) { + for (size_t i = static_cast(concat_param_->axis_ + 1); i < output_dim; i++) { after_axis_size *= concat_param_->output_shapes_[i]; } concat_param_->after_axis_size = after_axis_size; @@ -122,21 +123,17 @@ int ConcatInt8CPUKernel::Run() { int ConcatInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) { auto concat = reinterpret_cast(cdata); - auto ret = concat->DoExecute(task_id); - if (ret != RET_OK) { - MS_LOG(ERROR) << "ConcatInt8Run task_id " << task_id << " failed."; - return ret; - } + concat->DoExecute(task_id); return lite::RET_OK; } -int ConcatInt8CPUKernel::DoExecute(int task_id) { +void ConcatInt8CPUKernel::DoExecute(int task_id) { int64_t real_dst_count = MSMIN(before_axis_size - task_id * count_unit_, count_unit_); if (real_dst_count <= 0) { - return lite::RET_OK; + return; } Int8Concat(input_data_, output_data_, concat_param_, concat_param_->axis_, real_dst_count, task_id); - return lite::RET_OK; + return; } REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Concat, LiteKernelCreator) diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h index aa9f32d2791..8ea19039d86 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/concat_int8.h @@ -57,7 +57,7 @@ class ConcatInt8CPUKernel : public InnerKernel { int Init() override; int ReSize() override; int Run() override; - int DoExecute(int task_id); + void DoExecute(int task_id); private: int64_t before_axis_size = 0; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.h index 7938fbdfb34..ab580f81c10 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_1x1_int8.h @@ -33,7 +33,7 @@ class Convolution1x1Int8CPUKernel : public ConvolutionBaseCPUKernel { public: Convolution1x1Int8CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const InnerContext *ctx) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {} ~Convolution1x1Int8CPUKernel() override; int Init() override; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc index 65d46c85614..a1776ef639a 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.cc @@ -25,7 +25,7 @@ namespace mindspore::kernel { namespace { constexpr size_t kUnitBufferMultipler = 4 * 4; } // namespace -int ProcessFilterUint8(const int8_t *origin_weight, int16_t *dst_weight, ConvParameter *conv_param) { +int ProcessFilterUint8(const int8_t *origin_weight, int16_t *dst_weight, const ConvParameter *conv_param) { auto input_channel = conv_param->input_channel_; auto output_channel = conv_param->output_channel_; auto kernel_plane = conv_param->kernel_w_ * conv_param->kernel_h_; @@ -116,7 +116,7 @@ int Convolution3x3Int8CPUKernel::InitWeightBias() { memset(bias_data_, 0, new_bias_size); if (in_tensors_.size() == kInputSize2) { auto ori_bias_addr = reinterpret_cast(in_tensors_.at(kBiasIndex)->MutableData()); - memcpy(bias_data_, ori_bias_addr, output_channel * sizeof(int32_t)); + memcpy(bias_data_, ori_bias_addr, static_cast(output_channel) * sizeof(int32_t)); } else { MS_ASSERT(in_tensors_.size() == kInputSize1); } diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.h index 6b3c087de86..60d6307739b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_3x3_int8.h @@ -27,7 +27,7 @@ class Convolution3x3Int8CPUKernel : public ConvolutionBaseCPUKernel { public: Convolution3x3Int8CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const InnerContext *ctx) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {} ~Convolution3x3Int8CPUKernel() override; int Init() override; @@ -46,7 +46,7 @@ class Convolution3x3Int8CPUKernel : public ConvolutionBaseCPUKernel { int32_t *tmp_dst_buffer_ = nullptr; int8_t *tmp_out_ = nullptr; }; -int ProcessFilterUint8(const int8_t *origin_weight, int16_t *dst_weight, ConvParameter *conv_param); +int ProcessFilterUint8(const int8_t *origin_weight, int16_t *dst_weight, const ConvParameter *conv_param); } // namespace mindspore::kernel #endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_INT8_CONVOLUTION_3X3_INT8_H_ diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.cc index 54df66909ea..182b0f859d5 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.cc @@ -60,13 +60,13 @@ int ConvolutionDepthwise3x3Int8CPUKernel::InitWeightBias() { PackNCHWToNHWCInt8(origin_weight, tmp_weight, 1, weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch()); - packed_weight_ = reinterpret_cast(malloc(pack_weight_size * sizeof(int16_t))); + packed_weight_ = reinterpret_cast(malloc(static_cast(pack_weight_size) * sizeof(int16_t))); if (packed_weight_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; free(tmp_weight); return RET_ERROR; } - bool filter_per_channel = conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL; + bool filter_per_channel = static_cast(conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL); if (filter_per_channel) { for (int i = 0; i < weight_tensor->Height() * weight_tensor->Width(); i++) { for (int c = 0; c < channel; c++) { @@ -87,16 +87,16 @@ int ConvolutionDepthwise3x3Int8CPUKernel::InitWeightBias() { } free(tmp_weight); - bias_data_ = reinterpret_cast(malloc(channel * sizeof(int32_t))); + bias_data_ = reinterpret_cast(malloc(static_cast(channel) * sizeof(int32_t))); if (bias_data_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; } - memset(bias_data_, 0, channel * sizeof(int32_t)); + memset(bias_data_, 0, static_cast(channel) * sizeof(int32_t)); if (in_tensors_.size() == kInputSize2) { auto bias_tensor = in_tensors_.at(kBiasIndex); auto ori_bias = reinterpret_cast(bias_tensor->MutableData()); - memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(int32_t)); + memcpy(bias_data_, ori_bias, static_cast(bias_tensor->ElementsNum()) * sizeof(int32_t)); } return RET_OK; } @@ -153,7 +153,8 @@ int ConvDw3x3Int8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) int ConvolutionDepthwise3x3Int8CPUKernel::InitBuffer() { int buffer_size = kConvDepthwise3x3BufferSize * conv_param_->thread_num_; - buffer_ = reinterpret_cast(ms_context_->allocator->Malloc(buffer_size * sizeof(int8_t))); + buffer_ = + reinterpret_cast(ms_context_->allocator->Malloc(static_cast(buffer_size) * sizeof(int8_t))); if (buffer_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.h index 58a41e97ec1..93a50ccc0be 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_3x3_int8.h @@ -27,7 +27,7 @@ class ConvolutionDepthwise3x3Int8CPUKernel : public ConvolutionBaseCPUKernel { public: ConvolutionDepthwise3x3Int8CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const InnerContext *ctx) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {} ~ConvolutionDepthwise3x3Int8CPUKernel() override; int Init() override; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc index e689107940c..803445f12a5 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.cc @@ -55,7 +55,7 @@ int ConvolutionDepthwiseInt8CPUKernel::InitWeightBias() { return RET_ERROR; } - bool filter_per_channel = conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL; + bool filter_per_channel = static_cast(conv_param_->conv_quant_arg_.per_channel_ & FILTER_PER_CHANNEL); if (filter_per_channel) { for (int i = 0; i < weight_tensor->Height() * weight_tensor->Width(); i++) { for (int c = 0; c < channel; c++) { diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h index 5f27cef2b85..ccb22bf2109 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_int8.h @@ -27,7 +27,7 @@ class ConvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel { public: ConvolutionDepthwiseInt8CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const InnerContext *ctx) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {} ~ConvolutionDepthwiseInt8CPUKernel() override; int Init() override; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc index 4f5166a7f5b..208a2684bc6 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.cc @@ -42,7 +42,7 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitWeightBias() { auto origin_weight = reinterpret_cast(weight_tensor->MutableData()); int OC8 = UP_DIV(weight_tensor->Batch(), C8NUM); int pack_weight_size = C8NUM * OC8 * weight_tensor->Height() * weight_tensor->Width(); - packed_weight_ = reinterpret_cast(malloc(pack_weight_size * sizeof(int16_t))); + packed_weight_ = reinterpret_cast(malloc(static_cast(pack_weight_size) * sizeof(int16_t))); if (packed_weight_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; @@ -50,16 +50,16 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitWeightBias() { PackDepthwiseInt8Weight(origin_weight, packed_weight_, weight_tensor->Height() * weight_tensor->Width(), weight_tensor->Batch(), &(conv_param_->conv_quant_arg_)); - bias_data_ = reinterpret_cast(malloc(C8NUM * OC8 * sizeof(int32_t))); + bias_data_ = reinterpret_cast(malloc(static_cast(C8NUM * OC8) * sizeof(int32_t))); if (bias_data_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; } - memset(bias_data_, 0, C8NUM * OC8 * sizeof(int32_t)); + memset(bias_data_, 0, static_cast(C8NUM * OC8) * sizeof(int32_t)); if (in_tensors_.size() == kInputSize2) { auto bias_tensor = in_tensors_.at(kBiasIndex); auto ori_bias = reinterpret_cast(bias_tensor->MutableData()); - memcpy(bias_data_, ori_bias, bias_tensor->ElementsNum() * sizeof(int32_t)); + memcpy(bias_data_, ori_bias, static_cast(bias_tensor->ElementsNum()) * sizeof(int32_t)); } conv_param_->thread_num_ = MSMIN(thread_count_, OC8); @@ -72,7 +72,8 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitPackedInputOutput() { int pack_input_size = conv_param_->input_batch_ * conv_param_->input_h_ * conv_param_->input_w_ * C8NUM * UP_DIV(conv_param_->input_channel_, C8NUM); - packed_input_ = reinterpret_cast(ms_context_->allocator->Malloc(pack_input_size * sizeof(int8_t))); + packed_input_ = + reinterpret_cast(ms_context_->allocator->Malloc(static_cast(pack_input_size) * sizeof(int8_t))); if (packed_input_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; @@ -80,7 +81,8 @@ int ConvolutionDepthwiseSWInt8CPUKernel::InitPackedInputOutput() { int pack_output_size = conv_param_->output_batch_ * conv_param_->output_h_ * conv_param_->output_w_ * C8NUM * UP_DIV(conv_param_->output_channel_, C8NUM); - packed_output_ = reinterpret_cast(ms_context_->allocator->Malloc(pack_output_size * sizeof(int8_t))); + packed_output_ = reinterpret_cast( + ms_context_->allocator->Malloc(static_cast(pack_output_size) * sizeof(int8_t))); if (packed_output_ == nullptr) { MS_LOG(ERROR) << "Malloc buffer failed."; return RET_ERROR; @@ -150,10 +152,10 @@ int ConvolutionDepthwiseSWInt8CPUKernel::ReinitQuantParam() { auto input_tensor = in_tensors_.at(kInputIndex); auto channel = conv_param_->input_channel_; - input_scale_ = reinterpret_cast(malloc(channel * sizeof(float))); + input_scale_ = reinterpret_cast(malloc(static_cast(channel) * sizeof(float))); MSLITE_CHECK_PTR(input_scale_); - input_zp_ = reinterpret_cast(malloc(channel * sizeof(int8_t))); + input_zp_ = reinterpret_cast(malloc(static_cast(channel) * sizeof(int8_t))); MSLITE_CHECK_PTR(input_zp_); if (input_tensor->quant_params().size() == kPerTensor) { @@ -171,10 +173,10 @@ int ConvolutionDepthwiseSWInt8CPUKernel::ReinitQuantParam() { } auto output_tensor = out_tensors_.at(kOutputIndex); - output_scale_ = reinterpret_cast(malloc(channel * sizeof(float))); + output_scale_ = reinterpret_cast(malloc(static_cast(channel) * sizeof(float))); MSLITE_CHECK_PTR(output_scale_); - output_zp_ = reinterpret_cast(malloc(channel * sizeof(int32_t))); + output_zp_ = reinterpret_cast(malloc(static_cast(channel) * sizeof(int32_t))); MSLITE_CHECK_PTR(output_zp_); if (output_tensor->quant_params().size() == kPerTensor) { @@ -191,25 +193,26 @@ int ConvolutionDepthwiseSWInt8CPUKernel::ReinitQuantParam() { } } - conv_quant_arg_->real_multiplier_ = reinterpret_cast(malloc(channel * sizeof(double))); + conv_quant_arg_->real_multiplier_ = reinterpret_cast(malloc(static_cast(channel) * sizeof(double))); MSLITE_CHECK_PTR(conv_quant_arg_->real_multiplier_); - conv_quant_arg_->left_shift_ = reinterpret_cast(malloc(channel * sizeof(int32_t))); + conv_quant_arg_->left_shift_ = reinterpret_cast(malloc(static_cast(channel) * sizeof(int32_t))); MSLITE_CHECK_PTR(conv_quant_arg_->left_shift_); - conv_quant_arg_->right_shift_ = reinterpret_cast(malloc(channel * sizeof(int32_t))); + conv_quant_arg_->right_shift_ = reinterpret_cast(malloc(static_cast(channel) * sizeof(int32_t))); MSLITE_CHECK_PTR(conv_quant_arg_->right_shift_); - conv_quant_arg_->quant_multiplier_ = reinterpret_cast(malloc(channel * sizeof(int32_t))); + conv_quant_arg_->quant_multiplier_ = + reinterpret_cast(malloc(static_cast(channel) * sizeof(int32_t))); MSLITE_CHECK_PTR(conv_quant_arg_->quant_multiplier_); - conv_quant_arg_->out_act_min_ = reinterpret_cast(malloc(channel * sizeof(int32_t))); + conv_quant_arg_->out_act_min_ = reinterpret_cast(malloc(static_cast(channel) * sizeof(int32_t))); MSLITE_CHECK_PTR(conv_quant_arg_->out_act_min_); - conv_quant_arg_->out_act_max_ = reinterpret_cast(malloc(channel * sizeof(int32_t))); + conv_quant_arg_->out_act_max_ = reinterpret_cast(malloc(static_cast(channel) * sizeof(int32_t))); MSLITE_CHECK_PTR(conv_quant_arg_->out_act_max_); - weight_scale_ = reinterpret_cast(malloc(channel * sizeof(float))); + weight_scale_ = reinterpret_cast(malloc(static_cast(channel) * sizeof(float))); MSLITE_CHECK_PTR(weight_scale_); auto weight_tensor = in_tensors_.at(kWeightIndex); diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h index acbc0835ef7..b11576f43d4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_depthwise_slidewindow_int8.h @@ -28,7 +28,7 @@ class ConvolutionDepthwiseSWInt8CPUKernel : public ConvolutionBaseCPUKernel { public: ConvolutionDepthwiseSWInt8CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const InnerContext *ctx) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {} ~ConvolutionDepthwiseSWInt8CPUKernel() override; int Init() override; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc index b64a6f8e0e1..1f0c35e4d2e 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.cc @@ -98,12 +98,12 @@ int ConvolutionInt8CPUKernel::InitWeightBias() { memset(bias_data_, 0, bias_size); if (in_tensors_.size() == kInputSize2) { auto ori_bias = reinterpret_cast(in_tensors_.at(kBiasIndex)->data_c()); - memcpy(bias_data_, ori_bias, output_channel * sizeof(int32_t)); + memcpy(bias_data_, ori_bias, static_cast(output_channel) * sizeof(int32_t)); } else { MS_ASSERT(in_tensors_.size() == kInputSize1); } auto *bias_data = reinterpret_cast(bias_data_); - bool filter_peroc = conv_quant_arg_->per_channel_ & FILTER_PER_CHANNEL; + bool filter_peroc = static_cast(conv_quant_arg_->per_channel_ & FILTER_PER_CHANNEL); if (filter_peroc) { filter_zp_ptr_ = reinterpret_cast(malloc(output_channel * sizeof(int32_t))); if (filter_zp_ptr_ == nullptr) { @@ -126,9 +126,9 @@ int ConvolutionInt8CPUKernel::InitWeightBias() { size_t input_sum_size; if (conv_quant_arg_->per_channel_ & FILTER_PER_CHANNEL) { - input_sum_size = up_round_oc * tile_num_ * thread_count_ * sizeof(int32_t); + input_sum_size = static_cast(up_round_oc * tile_num_ * thread_count_) * sizeof(int32_t); } else { - input_sum_size = tile_num_ * thread_count_ * sizeof(int32_t); + input_sum_size = static_cast(tile_num_ * thread_count_) * sizeof(int32_t); } input_sum_ = reinterpret_cast(malloc(input_sum_size)); if (input_sum_ == nullptr) { diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.h index 8afc0c2ed14..bdff948a3e9 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/convolution_int8.h @@ -28,7 +28,7 @@ class ConvolutionInt8CPUKernel : public ConvolutionBaseCPUKernel { public: ConvolutionInt8CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const InnerContext *ctx) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {} ~ConvolutionInt8CPUKernel() override { FreeQuantParam(); if (packed_weight_ != nullptr) { diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc index e1a628ce206..62d110e5cda 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.cc @@ -57,21 +57,16 @@ int CropInt8CPUKernel::Run() { int CropInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) { auto crop = reinterpret_cast(cdata); - auto ret = crop->DoExecute(task_id); - if (ret != RET_OK) { - MS_LOG(ERROR) << "CropInt8Run task id " << task_id << " run failed."; - return ret; - } + crop->DoExecute(task_id); return RET_OK; } -int CropInt8CPUKernel::DoExecute(int task_id) { +void CropInt8CPUKernel::DoExecute(int task_id) { auto input_tensor = in_tensors_.at(kInputIndex); auto out_tensor = out_tensors_.at(kOutputIndex); int8_t *input_data = reinterpret_cast(input_tensor->data_c()); int8_t *output_data = reinterpret_cast(out_tensor->data_c()); Int8Crop(input_data, output_data, task_id, crop_para_); - return RET_OK; } REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Crop, LiteKernelCreator) diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h index 788d5207e0b..99f1d7a4078 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/crop_int8.h @@ -36,7 +36,7 @@ class CropInt8CPUKernel : public CropBaseCPUKernel { int Init() override; int ReSize() override; int Run() override; - int DoExecute(int task_id); + void DoExecute(int task_id); }; int CropInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale); diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h index aef09fbab57..f85c3343a73 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_depthwise_int8.h @@ -27,7 +27,7 @@ class DeconvolutionDepthwiseInt8CPUKernel : public ConvolutionBaseCPUKernel { public: DeconvolutionDepthwiseInt8CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const InnerContext *ctx) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {} ~DeconvolutionDepthwiseInt8CPUKernel() override; int Init() override; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h index 97489e36679..b80dd7c67f2 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/deconvolution_int8.h @@ -33,7 +33,7 @@ class DeConvInt8CPUKernel : public ConvolutionBaseCPUKernel { public: DeConvInt8CPUKernel(OpParameter *parameter, const std::vector &inputs, const std::vector &outputs, const InnerContext *ctx) - : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx) {} + : ConvolutionBaseCPUKernel(parameter, inputs, outputs, ctx, nullptr, nullptr) {} ~DeConvInt8CPUKernel() override; int ReSize() override; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc index 2efab7a88a2..d2d8e3b2c37 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.cc @@ -57,7 +57,7 @@ int GatherNdInt8CPUKernel::ReSize() { auto indices_tensor = in_tensors_.at(1); auto indices_shape = indices_tensor->shape(); - int indices_rank = indices_shape.size(); + int indices_rank = static_cast(indices_shape.size()); count_ = 1; for (int i = 0; i < indices_rank - 1; ++i) { count_ *= indices_shape[i]; @@ -66,12 +66,12 @@ int GatherNdInt8CPUKernel::ReSize() { MS_LOG(ERROR) << "count_ is invalid, count_: " << count_; return RET_ERROR; } - in_offset_ = reinterpret_cast(malloc(count_ * sizeof(int))); + in_offset_ = reinterpret_cast(malloc(static_cast(count_) * sizeof(int))); if (in_offset_ == nullptr) { MS_LOG(ERROR) << "GatherNdInt8 Malloc in_offset_ error!"; return RET_ERROR; } - (void)memset(in_offset_, 0, count_ * sizeof(int)); + (void)memset(in_offset_, 0, static_cast(count_) * sizeof(int)); thread_sz_count_ = MSMIN(thread_count_, count_); if (thread_sz_count_ == 0) { MS_LOG(ERROR) << "div zero"; @@ -85,9 +85,9 @@ int GatherNdInt8CPUKernel::InitOffset() { auto ind_quant_args = in_tensors_.at(1)->quant_params(); auto indices_tensor = in_tensors_.at(1); auto indices_shape = indices_tensor->shape(); - int indices_rank = indices_shape.size(); + int indices_rank = static_cast(indices_shape.size()); auto in_shape = in_tensors_.front()->shape(); - int in_rank = in_shape.size(); + int in_rank = static_cast(in_shape.size()); if (indices_rank < 1) { MS_LOG(ERROR) << "inex out of bounds"; return RET_ERROR; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.h index eba1229ca0c..43d38e00043 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/gatherNd_int8.h @@ -44,7 +44,7 @@ class GatherNdInt8CPUKernel : public InnerKernel { int *in_offset_ = nullptr; int8_t *in_ptr_ = nullptr; int8_t *out_ptr_ = nullptr; - GatherQuantArg param_; + GatherQuantArg param_ = {}; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/group_convolution_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/group_convolution_int8.cc index ab7a19f7eef..dc624a12ef4 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/group_convolution_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/group_convolution_int8.cc @@ -29,7 +29,7 @@ int GroupConvolutionInt8CPUKernel::SeparateInput(int group_id) { int8_t *src_ptr = reinterpret_cast(ori_in_data_) + group_id * sub_in_channel; int8_t *dst_ptr = sub_in_data; for (int i = 0; i < in_plane; ++i) { - memcpy(dst_ptr, src_ptr, sub_in_channel * sizeof(int8_t)); + memcpy(dst_ptr, src_ptr, static_cast(sub_in_channel) * sizeof(int8_t)); src_ptr += ori_in_channel; dst_ptr += sub_in_channel; } @@ -45,7 +45,7 @@ int GroupConvolutionInt8CPUKernel::PostConcat(int group_id) { int8_t *src_ptr = sub_out_data; int8_t *dst_ptr = reinterpret_cast(ori_out_data_) + group_id * sub_out_channel; for (int i = 0; i < out_plane; ++i) { - memcpy(dst_ptr, src_ptr, sub_out_channel * sizeof(int8_t)); + memcpy(dst_ptr, src_ptr, static_cast(sub_out_channel) * sizeof(int8_t)); src_ptr += sub_out_channel; dst_ptr += ori_out_channel; } diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.h index 9eaf4883a1f..6d7c057f262 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/hswish_int8.h @@ -37,7 +37,7 @@ class HswishInt8CPUKernel : public InnerKernel { private: int thread_count_; - HswishQuantArg quant_arg_; + HswishQuantArg quant_arg_ = {}; void MultiplierInt32ToInt16(int32_t input, int16_t *output) const; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.h index e112d6fa4af..137ebe2d6b0 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/leaky_relu_int8.h @@ -39,7 +39,7 @@ class LeakyReluInt8CPUKernel : public InnerKernel { int DoExecute(int task_id); private: - LeakyReluQuantArg quant_prelu_parm_; + LeakyReluQuantArg quant_prelu_parm_ = {}; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc index 46f1b2ddcff..fe8cd176587 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.cc @@ -187,29 +187,21 @@ int MulInt8CPUKernel::Run() { int FastHWBroadcastMulInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) { auto mul = reinterpret_cast(cdata); - auto ret = mul->FastDoExecute(task_id); - if (ret != RET_OK) { - MS_LOG(ERROR) << "FastHWBroadcastMulInt8Run task_id " << task_id << " failed."; - return ret; - } + mul->FastDoExecute(task_id); return lite::RET_OK; } int MulInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) { auto mul = reinterpret_cast(cdata); - auto ret = mul->DoExecute(task_id); - if (ret != RET_OK) { - MS_LOG(ERROR) << "MulInt8Run task_id " << task_id << " failed."; - return ret; - } + mul->DoExecute(task_id); return lite::RET_OK; } -int MulInt8CPUKernel::FastDoExecute(int task_id) { +void MulInt8CPUKernel::FastDoExecute(int task_id) { int depth = out_tensors_.front()->Channel(); int64_t real_dst_count = MSMIN(elements_num_ - task_id * count_unit_, count_unit_); if (real_dst_count <= 0) { - return lite::RET_OK; + return; } int8_t *cur_input0_data = input0_data_; int8_t *cur_input1_data = input1_data_ + task_id * count_unit_ * depth; @@ -219,20 +211,19 @@ int MulInt8CPUKernel::FastDoExecute(int task_id) { cur_input1_data = input0_data_ + task_id * count_unit_ * depth; } FastMul(cur_input0_data, cur_input1_data, cur_output_data, depth, real_dst_count, input1_hw_broadcast_, quant_args_); - return RET_OK; } -int MulInt8CPUKernel::DoExecute(int task_id) { +void MulInt8CPUKernel::DoExecute(int task_id) { int64_t real_dst_count = MSMIN(elements_num_ - task_id * count_unit_, count_unit_); if (real_dst_count <= 0) { - return lite::RET_OK; + return; } int8_t *cur_input0_data = input0_data_ + task_id * count_unit_; int8_t *cur_input1_data = input1_data_ + task_id * count_unit_; int8_t *cur_output_data = output_data_ + task_id * count_unit_; Mul(cur_input0_data, cur_input1_data, cur_output_data, real_dst_count, quant_args_); - return lite::RET_OK; + return; } REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_MulFusion, LiteKernelCreator) diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h index 4a82b301950..1d483f93d4c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/mul_int8.h @@ -39,8 +39,8 @@ class MulInt8CPUKernel : public InnerKernel { void CheckSameShapeSize(std::vector in_tensor0_shape, std::vector in_tensor1_shape); void CheckIfFastImpl(); int Run() override; - int DoExecute(int task_id); - int FastDoExecute(int task_id); + void DoExecute(int task_id); + void FastDoExecute(int task_id); private: const lite::InnerContext *ctx_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.cc b/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.cc index 7e24d9d7361..31552f5cc76 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.cc @@ -30,16 +30,17 @@ void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, i } void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, - size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, - int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, - int32_t maxi, size_t per_channel) { + size_t stride, const int32_t *input_sum, const int32_t *bias, + const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier, + int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel) { return MatmulInt8DpNeon64(a, b, dst, UP_ROUND(row, C8NUM), UP_ROUND(col, C8NUM), deep_4, input_sum, bias, mini, maxi, output_zp, multiplier, left_shift, right_shift, row, col, stride, per_channel); } void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, - size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, - int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, - int32_t maxi, size_t per_channel, int32_t *filter_zp) { + size_t stride, const int32_t *input_sum, const int32_t *bias, + const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier, + int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel, + const int32_t *filter_zp) { return MatmulInt8DpOpt(a, b, dst, row, col, deep_4, input_sum, bias, mini, maxi, output_zp, multiplier, left_shift, right_shift, stride, per_channel, filter_zp); } diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.h b/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.h index bf60e312410..302268d003c 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.h @@ -25,11 +25,11 @@ extern "C" { void MatMulOptR4Int8Neon64(const int8_t *a, const int8_t *b, int *dst, int row4, int col4, int deep16, const int *input_sum, const int *bias); void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, int row8, int col8, int deep4, const int *a_sums, - const int *bias, int act_min, int act_max, int out_zp, int *multiplier, int *left_shift, - int *right_shift, int row, int col, int stride, size_t peroc); + const int *bias, int act_min, int act_max, int out_zp, const int *multiplier, + const int *left_shift, const int *right_shift, int row, int col, int stride, size_t peroc); void MatmulInt8DpOpt(const int8_t *a, const int8_t *b, int8_t *dst, size_t row8, size_t col8, size_t deep4, - const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, int *multiplier, - int *left_shift, int *right_shift, size_t stride, size_t peroc, int *filter_zp); + const int *a_sums, const int *bias, int act_min, int act_max, int out_zp, const int *multiplier, + const int *left_shift, const int *right_shift, size_t stride, size_t peroc, const int *filter_zp); #ifdef ENABLE_ARM64 void IndirectGemmInt8_optimize_handler(int8_t *dst, const int8_t *src, const int8_t *weight, const int32_t *bias, size_t ksize, size_t ic4, size_t output_channel, size_t offset, @@ -40,13 +40,14 @@ void MatMulR4Int8_optimize_handler(const int8_t *a, const int8_t *b, int *dst, i const int *input_sum, const int *bias); void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, - size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, - int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, - int32_t maxi, size_t per_channel); + size_t stride, const int32_t *input_sum, const int32_t *bias, + const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier, + int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel); void MatMulDpInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, size_t row, size_t col, size_t deep_4, - size_t stride, const int32_t *input_sum, const int32_t *bias, int32_t *left_shift, - int32_t *right_shift, int32_t *multiplier, int32_t output_zp, int32_t mini, - int32_t maxi, size_t per_channel, int32_t *filter_zp); + size_t stride, const int32_t *input_sum, const int32_t *bias, + const int32_t *left_shift, const int32_t *right_shift, const int32_t *multiplier, + int32_t output_zp, int32_t mini, int32_t maxi, size_t per_channel, + const int32_t *filter_zp); #endif #ifdef __cplusplus diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc index 0b3c8ea1f87..d45afdee830 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/pad_int8.cc @@ -85,7 +85,7 @@ int PadInt8CPUKernel::SetQuantParam() { int PadInt8CPUKernel::InitPadParam() { auto in_dims = in_tensors_.at(0)->shape(); auto out_dims = out_tensors_.at(0)->shape(); - int ndims = in_dims.size(); + int ndims = static_cast(in_dims.size()); int in[] = {1, 1, 1, 1}; int out[] = {1, 1, 1, 1}; @@ -267,7 +267,8 @@ int PadInt8CPUKernel::Run() { int error_code; if (pad_param_->pad_mode_ == static_cast(schema::PaddingMode_CONSTANT)) { - memset(out_data_, pad_param_->pad_quant_arg_.constant_value_[0], out_tensors_[0]->ElementsNum() * sizeof(int8_t)); + memset(out_data_, pad_param_->pad_quant_arg_.constant_value_[0], + static_cast(out_tensors_[0]->ElementsNum()) * sizeof(int8_t)); error_code = ParallelLaunch(this->ms_context_, PadInt8Impl, this, op_parameter_->thread_num_); if (error_code != RET_OK) { MS_LOG(ERROR) << "Resize run error, error_code[" << error_code << "]"; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.h index 6f4c0718542..a66943c81fd 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/reduce_int8.h @@ -93,7 +93,7 @@ class ReduceInt8CPUKernel : public ReduceBaseCPUKernel { bool valid_shape_ = false; bool pattern_impl_ = false; Four_DIMENSION_REDUCE_TEMPLATE pattern_; - QuantMulArg reduce_mean_quant_param_; // used in reduce mean 4D situation + QuantMulArg reduce_mean_quant_param_ = {}; // used in reduce mean 4D situation Reducer reducer_ = nullptr; LastReducer last_reducer_ = nullptr; std::vector mean_multipliers_; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.h index fad5a09c0f6..ffc79ac2f24 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/relux_int8.h @@ -37,7 +37,7 @@ class ReluXInt8CPUKernel : public InnerKernel { int Run() override; int DoActivation(int task_id); - ReluXQuantArg quant_arg_; + ReluXQuantArg quant_arg_ = {}; private: int type_{0}; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc index aa0362f5528..45fc3a784d9 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.cc @@ -63,18 +63,14 @@ int ReshapeInt8CPUKernel::Run() { int ReshapeInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) { auto reshape = reinterpret_cast(cdata); - auto ret = reshape->DoExecute(task_id); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Reshapeint8Run task_id " << task_id << " failed."; - return ret; - } + reshape->DoExecute(task_id); return lite::RET_OK; } -int ReshapeInt8CPUKernel::DoExecute(int task_id) { +void ReshapeInt8CPUKernel::DoExecute(int task_id) { int64_t real_dst_count = MSMIN(elements_num_ - task_id * count_unit_, count_unit_); if (real_dst_count <= 0) { - return lite::RET_OK; + return; } MS_ASSERT(input_data_); MS_ASSERT(output_data_); @@ -82,7 +78,7 @@ int ReshapeInt8CPUKernel::DoExecute(int task_id) { int8_t *cur_output_data = output_data_ + task_id * count_unit_; Int8Reshape(cur_input0_data, cur_output_data, real_dst_count, reshape_param_->quant_para_); - return lite::RET_OK; + return; } REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Reshape, LiteKernelCreator) diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h index b0f5276c425..fa5b18c4f73 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/reshape_int8.h @@ -37,7 +37,7 @@ class ReshapeInt8CPUKernel : public InnerKernel { int Init() override; int ReSize() override; int Run() override; - int DoExecute(int task_id); + void DoExecute(int task_id); private: int64_t elements_num_ = 0; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc index de1092a72ba..d77fb20b694 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.cc @@ -37,20 +37,32 @@ constexpr unsigned int OFFSET_BASE = 10; } // namespace void ResizeInt8CPUKernel::FreeResizeBiLinear() { free(resize_quant_arg_.x_axis_index_); + resize_quant_arg_.x_axis_index_ = nullptr; free(resize_quant_arg_.x_axis_lower_); + resize_quant_arg_.x_axis_lower_ = nullptr; free(resize_quant_arg_.x_axis_upper_); + resize_quant_arg_.x_axis_upper_ = nullptr; free(resize_quant_arg_.y_axis_index_); + resize_quant_arg_.y_axis_index_ = nullptr; free(resize_quant_arg_.y_axis_lower_); + resize_quant_arg_.y_axis_lower_ = nullptr; free(resize_quant_arg_.y_axis_upper_); + resize_quant_arg_.y_axis_upper_ = nullptr; } void ResizeInt8CPUKernel::FreeFloatResizeBiLinear() { free(resize_float_quant_arg_.x_axis_index_); + resize_float_quant_arg_.x_axis_index_ = nullptr; free(resize_float_quant_arg_.x_axis_lower_); + resize_float_quant_arg_.x_axis_lower_ = nullptr; free(resize_float_quant_arg_.x_axis_upper_); + resize_float_quant_arg_.x_axis_upper_ = nullptr; free(resize_float_quant_arg_.y_axis_index_); + resize_float_quant_arg_.y_axis_index_ = nullptr; free(resize_float_quant_arg_.y_axis_lower_); + resize_float_quant_arg_.y_axis_lower_ = nullptr; free(resize_float_quant_arg_.y_axis_upper_); + resize_float_quant_arg_.y_axis_upper_ = nullptr; } ResizeInt8CPUKernel::~ResizeInt8CPUKernel() { diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h index 874267bc9cb..6d5881c57bd 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/resize_int8.h @@ -52,8 +52,8 @@ class ResizeInt8CPUKernel : public ResizeBaseCPUKernel { QuantArg *quant_in_{nullptr}; QuantArg *quant_out_{nullptr}; QuantMulArg *multiplier_{nullptr}; - ResizeQuantArg resize_quant_arg_; - ResizeFloatScaleQuantArg resize_float_quant_arg_; + ResizeQuantArg resize_quant_arg_ = {}; + ResizeFloatScaleQuantArg resize_float_quant_arg_ = {}; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc index ee42ef26f3c..3ae9295ee40 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/split_int8.cc @@ -42,7 +42,7 @@ int SplitInt8CPUKernel::Init() { auto in_quant_args = in_tensor->quant_params(); param->quant_arg_.in_args_.scale_ = in_quant_args.front().scale; param->quant_arg_.in_args_.zp_ = in_quant_args.front().zeroPoint; - MS_ASSERT(param->num_split_ == this->out_tensors_.size()); + MS_ASSERT(static_cast(param->num_split_) == this->out_tensors_.size()); for (int i = 0; i < param->num_split_; i++) { auto *out_tensor = out_tensors_.at(i); auto out_quant_args = out_tensor->quant_params(); @@ -91,7 +91,7 @@ int SplitInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) { int SplitInt8CPUKernel::Run() { auto in_tensor = in_tensors_.at(kInputIndex); input_ptr_ = reinterpret_cast(in_tensor->MutableData()); - MS_ASSERT(param->num_split_ == this->out_tensors_.size()); + MS_ASSERT(static_cast(param->num_split_) == this->out_tensors_.size()); for (int i = 0; i < param->num_split_; i++) { output_ptr_[i] = reinterpret_cast(out_tensors_.at(i)->data_c()); } diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc index 884cd364a13..ed60486fc6d 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.cc @@ -64,7 +64,7 @@ int SqueezeInt8CPUKernel::Init() { auto quant_params = output_tensor->quant_params(); MS_ASSERT(quant_params.size() == 1); quant_squeeze_param_->out_quant_args_ = reinterpret_cast(malloc(sizeof(QuantArg))); - if (quant_squeeze_param_->in_quant_args_ == nullptr) { + if (quant_squeeze_param_->out_quant_args_ == nullptr) { MS_LOG(ERROR) << "malloc QuantArg failed"; if (quant_squeeze_param_ != nullptr) { if (quant_squeeze_param_->in_quant_args_ != nullptr) { @@ -97,15 +97,11 @@ int SqueezeInt8CPUKernel::Run() { int SqueezeInt8Run(void *cdata, int task_id, float lhs_scale, float rhs_scale) { auto Squeeze = reinterpret_cast(cdata); - auto ret = Squeeze->DoExecute(task_id); - if (ret != RET_OK) { - MS_LOG(ERROR) << "SqueezeInt8Run task_id " << task_id << " failed."; - return ret; - } + Squeeze->DoExecute(task_id); return RET_OK; } -int SqueezeInt8CPUKernel::DoExecute(int task_id) { +void SqueezeInt8CPUKernel::DoExecute(int task_id) { auto input_tensor = in_tensors_.at(kInputIndex); MS_ASSERT(input_tensor); auto out_tensor = out_tensors_.at(kOutputIndex); @@ -117,7 +113,6 @@ int SqueezeInt8CPUKernel::DoExecute(int task_id) { int num = input_tensor->ElementsNum(); SqueezeInt8(input_data, output_data, quant_squeeze_param_, num, task_id, op_parameter_->thread_num_); - return RET_OK; } REG_KERNEL(kCPU, kNumberTypeInt8, PrimitiveType_Squeeze, LiteKernelCreator) diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.h index 066f9987c2e..65b3d6b7fb5 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/squeeze_int8.h @@ -36,7 +36,7 @@ class SqueezeInt8CPUKernel : public InnerKernel { int Init() override; int ReSize() override; int Run() override; - int DoExecute(int tId); + void DoExecute(int tId); private: SqueezeQuantArg *quant_squeeze_param_{nullptr}; diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/tanh_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/tanh_int8.h index 15df0e25cef..5507bc93255 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/tanh_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/tanh_int8.h @@ -46,7 +46,7 @@ class TanhInt8CPUKernel : public InnerKernel { int element_size_{0}; int thread_count_{0}; int thread_stride_{0}; - TanhQuantParameter tanh_quant_; + TanhQuantParameter tanh_quant_ = {}; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.cc b/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.cc index 3442093c104..1f981e90fc5 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.cc +++ b/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.cc @@ -79,7 +79,7 @@ int TransposeInt8CPUKernel::DoTranspose(int task_id) { return RET_OK; } -void TransposeInt8CPUKernel::GetNHNCTransposeFunc(lite::Tensor *in_tensor, lite::Tensor *out_tensor, +void TransposeInt8CPUKernel::GetNHNCTransposeFunc(const lite::Tensor *in_tensor, const lite::Tensor *out_tensor, const TransposeParameter *param) { auto out_shape = out_tensor->shape(); if (in_tensor->shape().size() == DIMENSION_4D && param->perm_[0] == 0 && param->perm_[1] == 2 && diff --git a/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.h b/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.h index c8aed254a6e..dbee9ab45c3 100644 --- a/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.h +++ b/mindspore/lite/src/runtime/kernel/arm/int8/transpose_int8.h @@ -44,7 +44,8 @@ class TransposeInt8CPUKernel : public InnerKernel { int DoTranspose(int task_id); private: - void GetNHNCTransposeFunc(lite::Tensor *in_tensor, lite::Tensor *out_tensor, const TransposeParameter *param); + void GetNHNCTransposeFunc(const lite::Tensor *in_tensor, const lite::Tensor *out_tensor, + const TransposeParameter *param); TransposeParameter *transpose_param_; TransposeFunc NHNCTransposeFunc_ = nullptr; int8_t *in_ptr_ = nullptr; diff --git a/mindspore/lite/src/runtime/kernel/opencl/cl/pooling2d.cl b/mindspore/lite/src/runtime/kernel/opencl/cl/pooling2d.cl index 130e296409f..bbc8a9852f6 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/cl/pooling2d.cl +++ b/mindspore/lite/src/runtime/kernel/opencl/cl/pooling2d.cl @@ -1,6 +1,7 @@ #ifdef cl_khr_fp16 #pragma OPENCL EXTENSION cl_khr_fp16 : enable #endif +#define LOCAL_CACHE_THREAD 16 #define divide_no_check(a, b) (a / b) __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; __kernel void AvgPooling2d_NHWC4_IMG(__read_only image2d_t input, __write_only image2d_t output, const int4 input_shape, @@ -34,41 +35,11 @@ __kernel void AvgPooling2d_NHWC4_IMG(__read_only image2d_t input, __write_only i } } FLT4 result = TO_FLT4(divide_no_check(r, window_size)); - WRITE_IMAGE(output, (int2)(Y * output_shape.w + Z, N * output_shape.y + X), result); -} - -__kernel void AvgPooling2d_ReLU_NHWC4_IMG(__read_only image2d_t input, __write_only image2d_t output, - const int4 input_shape, const int4 output_shape, const int2 stride, - const int2 kernel_size, const int2 padding) { - // axis to dst tensor coordinate - int X = get_global_id(2); // N*H - int Y = get_global_id(1); // W - int Z = get_global_id(0); // C4 - int N = X / output_shape.y; - X = X % output_shape.y; - // boundary check - if (N >= output_shape.x || X >= output_shape.y || Y >= output_shape.z || Z >= output_shape.w) { - return; - } - - FLT4 r = (FLT4)(0.0f); - FLT window_size = 0.0f; - int xs = X * stride.x - padding.x; - int ys = Y * stride.y - padding.y; - - for (int ky = 0; ky < kernel_size.y; ++ky) { - int y_c = ys + ky; - bool outside_y = y_c < 0 || y_c >= input_shape.z; - for (int kx = 0; kx < kernel_size.x; ++kx) { - int x_c = xs + kx; - bool outside = outside_y || x_c < 0 || x_c >= input_shape.y; - r += - !outside ? READ_IMAGE(input, smp_zero, (int2)(y_c * input_shape.w + Z, N * input_shape.y + x_c)) : (FLT4)(0.0f); - window_size += !outside ? 1.0f : 0.0f; - } - } - FLT4 result = TO_FLT4(divide_no_check(r, window_size)); +#ifdef RELU WRITE_IMAGE(output, (int2)(Y * output_shape.w + Z, N * output_shape.y + X), max(result, (FLT4)(0.f))); +#else + WRITE_IMAGE(output, (int2)(Y * output_shape.w + Z, N * output_shape.y + X), result); +#endif } __kernel void MaxPooling2d_NHWC4_IMG(__read_only image2d_t input, __write_only image2d_t output, const int4 input_shape, @@ -98,35 +69,41 @@ __kernel void MaxPooling2d_NHWC4_IMG(__read_only image2d_t input, __write_only i maximum = max(src, maximum); } } +#ifdef RELU + WRITE_IMAGE(output, (int2)(Y * output_shape.w + Z, N * output_shape.y + X), max(maximum, (FLT4)(0.f))); +#else WRITE_IMAGE(output, (int2)(Y * output_shape.w + Z, N * output_shape.y + X), maximum); +#endif } -__kernel void MaxPooling2d_ReLU_NHWC4_IMG(__read_only image2d_t input, __write_only image2d_t output, - const int4 input_shape, const int4 output_shape, const int2 stride, - const int2 kernel_size, const int2 padding) { - // axis to dst tensor coordinate - int X = get_global_id(2); // N*H - int Y = get_global_id(1); // W - int Z = get_global_id(0); // C4 - int N = X / output_shape.y; - X = X % output_shape.y; - // boundary check - if (N >= output_shape.x || X >= output_shape.y || Y >= output_shape.z || Z >= output_shape.w) { - return; - } - - FLT4 maximum = (FLT4)(-10000.0f); - int xs = X * stride.x - padding.x; - int ys = Y * stride.y - padding.y; - for (int ky = 0; ky < kernel_size.y; ++ky) { - int y_c = ys + ky; - if (y_c < 0 || y_c >= input_shape.z) continue; - for (int kx = 0; kx < kernel_size.x; ++kx) { - int x_c = xs + kx; - if (x_c < 0 || x_c >= input_shape.y) continue; - FLT4 src = READ_IMAGE(input, smp_zero, (int2)(y_c * input_shape.w + Z, N * input_shape.y + x_c)); - maximum = max(src, maximum); +__kernel void AvgPooling2d_global_NHWC4_IMG(__read_only image2d_t src_data, __write_only image2d_t dst_data, + int4 size) { + int X = get_global_id(0); // C4 + int localy = get_local_id(1); + int localz = get_local_id(2); + if (X >= size.z) return; + __local float4 temp[LOCAL_CACHE_THREAD][LOCAL_CACHE_THREAD]; + temp[localy][localz] = (float4)0.f; + for (int h = localy; h < size.x; h += LOCAL_CACHE_THREAD) { + for (int w = localz; w < size.y; w += LOCAL_CACHE_THREAD) { + temp[localy][localz] += convert_float4(READ_IMAGE(src_data, smp_zero, (int2)(w * size.z + X, h))); } } - WRITE_IMAGE(output, (int2)(Y * output_shape.w + Z, N * output_shape.y + X), max(maximum, (FLT4)(0.f))); + barrier(CLK_LOCAL_MEM_FENCE); + if (localz == 0) { + for (int i = 1; i < LOCAL_CACHE_THREAD; i++) { + temp[localy][0] += temp[localy][i]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + float4 result = temp[0][0]; + for (int i = 1; i < LOCAL_CACHE_THREAD; i++) { + result += temp[i][0]; + } + result /= size.x * size.y; +#ifdef RELU + WRITE_IMAGE(dst_data, (int2)(X, 0), max(TO_FLT4(result), (FLT4)(0.f))); +#else + WRITE_IMAGE(dst_data, (int2)(X, 0), TO_FLT4(result)); +#endif } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc index 0ac112b88d3..f7dab80ed41 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.cc @@ -65,37 +65,53 @@ int ActivationOpenCLKernel::CheckSpecs() { int ActivationOpenCLKernel::Prepare() { outShape = GpuTensorInfo(out_tensors_[0]); std::string source = activation_source; - std::string program_name = "Activation"; + const std::string program_name = "Activation"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; } - std::string kernel_name = GetActTypeString(type_); + const std::string kernel_name = GetActTypeString(type_); auto build_options_ext = CreateBuildOptionsExtByDType(this->registry_data_type_); auto ret = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); if (ret != RET_OK) { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " init Done!"; return RET_OK; } -void ActivationOpenCLKernel::SetConstArgs() { +int ActivationOpenCLKernel::SetConstArgs() { int arg_idx = 2; cl_int2 image_size = {static_cast(outShape.width), static_cast(outShape.height)}; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, image_size); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, image_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } if (type_ == ActivationType_LEAKY_RELU) { - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, alpha_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, alpha_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } if (type_ == ActivationType_SIGMOID) { int c4 = outShape.Slice; int last_c4 = outShape.C % 4 == 0 ? 4 : outShape.C % 4; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, c4); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, last_c4); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, c4) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, last_c4) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } + return RET_OK; } void ActivationOpenCLKernel::SetGlobalLocal() { @@ -107,8 +123,14 @@ void ActivationOpenCLKernel::SetGlobalLocal() { int ActivationOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } auto ret = ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); if (ret != RET_OK) { MS_LOG(ERROR) << "Run kernel:" << this->name() << " fail."; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h index 0c47e8955a3..7031a9a8f9e 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/activation.h @@ -35,7 +35,7 @@ class ActivationOpenCLKernel : public OpenCLKernel { int Run() override; int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; private: diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc index 48e0cfe5054..8d7118776a5 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.cc @@ -16,6 +16,7 @@ #include #include #include +#include #include "src/kernel_registry.h" #include "src/runtime/kernel/opencl/utils.h" #include "src/runtime/kernel/opencl/kernel/argminmax.h" @@ -58,19 +59,41 @@ int ArgMinMaxOpenCLKernel::CheckSpecs() { return RET_OK; } -void ArgMinMaxOpenCLKernel::SetConstArgs() { +int ArgMinMaxOpenCLKernel::SetConstArgs() { auto param = reinterpret_cast(op_parameter_); cl_int4 in_shape{static_cast(im_in_.N), static_cast(im_in_.H), static_cast(im_in_.W), static_cast(im_in_.C)}; cl_int4 flags = {param->out_value_, param->get_max_, param->axis_, param->topk_}; int arg_cnt = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, buff_, lite::opencl::MemType::BUF); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, ids_, lite::opencl::MemType::BUF); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size_); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, cus_size_); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, strides_); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, flags); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, buff_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, ids_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, cus_size_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, strides_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, flags) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void ArgMinMaxOpenCLKernel::SetGlobalLocal() { @@ -134,14 +157,22 @@ int ArgMinMaxOpenCLKernel::InitWeights() { auto allocator = ocl_runtime_->GetAllocator(); int dtype_size = ocl_runtime_->GetFp16Enable() ? sizeof(int16_t) : sizeof(float); buff_ = allocator->Malloc(in_tensors_[0]->ElementsNum() * dtype_size, lite::opencl::MemType::BUF); + if (buff_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } ids_ = allocator->Malloc(in_tensors_[0]->ElementsNum() * sizeof(int32_t), lite::opencl::MemType::BUF); + if (ids_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } return RET_OK; } int ArgMinMaxOpenCLKernel::Prepare() { - std::string kernel_name = "argminmax"; + const std::string kernel_name = "argminmax"; std::string source = argminmax_source; - std::string program_name = "argminmax"; + const std::string program_name = "argminmax"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -162,16 +193,28 @@ int ArgMinMaxOpenCLKernel::Prepare() { InitWeights(); SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } int ArgMinMaxOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), lite::opencl::MemType::BUF); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c(), lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.h index ec3b70ce256..220949e3e2c 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/argminmax.h @@ -32,7 +32,7 @@ class ArgMinMaxOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int InitWeights() override; int Tune() override { return lite::RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc index 44ff1a45694..b5afadce8a3 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.cc @@ -98,6 +98,10 @@ int ArithmeticOpenCLKernel::InitWeights() { size_t dtype = fp16_enable ? CL_HALF_FLOAT : CL_FLOAT; ImageSize img_size{in_shape.width, in_shape.height, dtype}; auto weight_ptr_ = allocator->Malloc(img_size, weight.data()); + if (weight_ptr_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } weight_ptrs_.push_back(weight_ptr_); } else { weight_ptrs_.push_back(nullptr); @@ -106,7 +110,7 @@ int ArithmeticOpenCLKernel::InitWeights() { return RET_OK; } -void ArithmeticOpenCLKernel::SetConstArgs() { +int ArithmeticOpenCLKernel::SetConstArgs() { int arg_idx = 3; if (!element_flag_) { cl_int4 in0_shape = {static_cast(in0_shape_.N), static_cast(in0_shape_.H), static_cast(in0_shape_.W), @@ -121,16 +125,38 @@ void ArithmeticOpenCLKernel::SetConstArgs() { } else if (in0_shape_.C != 1 && in1_shape_.C == 1) { broadcastC_flag = 2; // BroadCast C4 in input1 } - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in0_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in1_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, broadcastC_flag); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in0_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in1_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, broadcastC_flag) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { cl_int2 output_shape{static_cast(global_range_[0]), static_cast(global_range_[1])}; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_min_); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_max_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_min_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_max_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } int ArithmeticOpenCLKernel::Prepare() { @@ -179,7 +205,7 @@ int ArithmeticOpenCLKernel::Prepare() { activation_max_ = 6.f; } - std::string program_name = "Arithmetic"; + const std::string program_name = "Arithmetic"; std::string source = arithmetic_source; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; @@ -196,7 +222,10 @@ int ArithmeticOpenCLKernel::Prepare() { if (type() != PrimitiveType_BiasAdd) { InitWeights(); } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << kernel_name_ << " Init Done!"; return RET_OK; } @@ -206,10 +235,22 @@ int ArithmeticOpenCLKernel::Run() { auto input_0_ptr = weight_ptrs_[0] == nullptr ? in_tensors_[0]->data_c() : weight_ptrs_[0]; auto input_1_ptr = weight_ptrs_[1] == nullptr ? in_tensors_[1]->data_c() : weight_ptrs_[1]; int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h index ff7bfa922b1..e19386cf3b4 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic.h @@ -35,7 +35,7 @@ class ArithmeticOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; int InitWeights() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; private: diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc index 4a30f4c33c6..dbc619ab884 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.cc @@ -86,7 +86,7 @@ int ArithmeticSelfOpenCLKernel::Prepare() { kernel_name += std::string(schema::EnumNamePrimitiveType(type())) + "_NHWC4"; } MS_LOG(DEBUG) << "execute kernel name : " << kernel_name; - std::string program_name = "ArithmeticSelf"; + const std::string program_name = "ArithmeticSelf"; if (!ocl_runtime_->LoadSource(program_name, arithmeticself_source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -98,15 +98,27 @@ int ArithmeticSelfOpenCLKernel::Prepare() { return ret; } SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } return RET_OK; } int ArithmeticSelfOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h index 2419ee40783..4cd9e2ba16a 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/arithmetic_self.h @@ -47,7 +47,13 @@ class ArithmeticSelfOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override { ocl_runtime_->SetKernelArg(kernel_, 2, output_shape_); } + int SetConstArgs() override { + if (ocl_runtime_->SetKernelArg(kernel_, 2, output_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; + } void SetGlobalLocal() override; int Run() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc index c0dbd556b05..105b5abb051 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.cc @@ -55,7 +55,7 @@ int BatchToSpaceNDOpenCLKernel::CheckSpecs() { return RET_OK; } -void BatchToSpaceNDOpenCLKernel::SetConstArgs() { +int BatchToSpaceNDOpenCLKernel::SetConstArgs() { auto param = reinterpret_cast(this->op_parameter_); size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); size_t CI4 = UP_DIV(in_tensors_[0]->Channel(), C4NUM); @@ -66,10 +66,23 @@ void BatchToSpaceNDOpenCLKernel::SetConstArgs() { cl_int4 paddings = {param->crops_[0], param->crops_[1], param->crops_[2], param->crops_[3]}; int arg_cnt = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, block_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, paddings); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, block_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, paddings) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void BatchToSpaceNDOpenCLKernel::SetGlobalLocal() { @@ -82,9 +95,9 @@ void BatchToSpaceNDOpenCLKernel::SetGlobalLocal() { } int BatchToSpaceNDOpenCLKernel::Prepare() { - std::string kernel_name = "batch_to_space_nd_NHWC4"; + const std::string kernel_name = "batch_to_space_nd_NHWC4"; std::string source = batch_to_space_nd_source; - std::string program_name = "batch_to_space_nd"; + const std::string program_name = "batch_to_space_nd"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -96,16 +109,28 @@ int BatchToSpaceNDOpenCLKernel::Prepare() { return ret; } SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } int BatchToSpaceNDOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.h index aeeced68781..df756af6778 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batch_to_space_nd.h @@ -32,7 +32,7 @@ class BatchToSpaceNDOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Tune() override { return lite::RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc index b135ed41c3d..56577306bbe 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.cc @@ -59,15 +59,25 @@ void BatchNormGetWorkGroup(const std::vector &global, std::vectorpush_back(z); } -void BatchNormOpenCLKernel::SetConstArgs() { +int BatchNormOpenCLKernel::SetConstArgs() { int arg_cn = 6; auto param = reinterpret_cast(this->op_parameter_); auto input0_shape = in_tensors_.at(0)->shape(); cl_int4 input_shape_ = {input0_shape.at(0), input0_shape.at(1), input0_shape.at(2), UP_DIV(input0_shape.at(3), C4NUM)}; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->epsilon_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input0_shape.at(3)); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param->epsilon_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input0_shape.at(3)) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void BatchNormOpenCLKernel::SetGlobalLocal() { @@ -83,6 +93,41 @@ void BatchNormOpenCLKernel::SetGlobalLocal() { OpenCLKernel::AlignGlobalLocal(global_size_, local_size_); } +int BatchNormOpenCLKernel::UnmapBuffer() { + auto allocator = ocl_runtime_->GetAllocator(); + if (allocator->UnmapBuffer(scale_) != RET_OK) { + return RET_ERROR; + } + if (allocator->UnmapBuffer(offset_) != RET_OK) { + return RET_ERROR; + } + if (allocator->UnmapBuffer(mean_) != RET_OK) { + return RET_ERROR; + } + if (allocator->UnmapBuffer(variance_) != RET_OK) { + return RET_ERROR; + } + return RET_OK; +} + +int BatchNormOpenCLKernel::MapBuffer() { + auto allocator = ocl_runtime_->GetAllocator(); + if (allocator->MapBuffer(scale_, CL_MAP_WRITE, nullptr, true) == nullptr) { + return RET_ERROR; + } + if (allocator->MapBuffer(offset_, CL_MAP_WRITE, nullptr, true) == nullptr) { + return RET_ERROR; + } + if (allocator->MapBuffer(mean_, CL_MAP_WRITE, nullptr, true) == nullptr) { + return RET_ERROR; + } + if (allocator->MapBuffer(variance_, CL_MAP_WRITE, nullptr, true) == nullptr) { + return RET_ERROR; + } + + return RET_OK; +} + int BatchNormOpenCLKernel::Initweight() { auto allocator = ocl_runtime_->GetAllocator(); GpuTensorInfo img_info(in_tensors_.at(1)); @@ -90,15 +135,30 @@ int BatchNormOpenCLKernel::Initweight() { size_t weight_size = img_info.OriginSize; // allocated memory for weight and init value scale_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); + if (scale_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } offset_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); + if (offset_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } mean_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); + if (mean_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } variance_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); + if (variance_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } - allocator->MapBuffer(scale_, CL_MAP_WRITE, nullptr, true); - allocator->MapBuffer(offset_, CL_MAP_WRITE, nullptr, true); - allocator->MapBuffer(mean_, CL_MAP_WRITE, nullptr, true); - allocator->MapBuffer(variance_, CL_MAP_WRITE, nullptr, true); - + if (MapBuffer() != RET_OK) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memset(scale_, 1, weight_size); memset(offset_, 0x00, weight_size); memset(mean_, 0x00, weight_size); @@ -153,18 +213,18 @@ int BatchNormOpenCLKernel::Initweight() { memcpy(variance_, in_tensors_.at(4)->data_c(), weight_size); } } - allocator->UnmapBuffer(scale_); - allocator->UnmapBuffer(offset_); - allocator->UnmapBuffer(mean_); - allocator->UnmapBuffer(variance_); + if (UnmapBuffer() != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } return RET_OK; } int BatchNormOpenCLKernel::Prepare() { use_fp16_enable_ = ocl_runtime_->GetFp16Enable(); - std::string kernel_name = "Batch_normalization_NHWC4"; + const std::string kernel_name = "Batch_normalization_NHWC4"; std::string source = batchnorm_source; - std::string program_name = "Batch_normalization"; + const std::string program_name = "Batch_normalization"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -181,7 +241,10 @@ int BatchNormOpenCLKernel::Prepare() { MS_LOG(ERROR) << "Initweight failed "; return RET_ERROR; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); return RET_OK; @@ -190,13 +253,34 @@ int BatchNormOpenCLKernel::Prepare() { int BatchNormOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; int arg_cn = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()); // input tensor - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, scale_, lite::opencl::MemType::BUF); // scale - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, offset_, lite::opencl::MemType::BUF); // offset - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, mean_, lite::opencl::MemType::BUF); // mean - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, variance_, lite::opencl::MemType::BUF); // variance - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c()); // out tensor - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // input tensor + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, scale_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // scale + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, offset_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // offset + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, mean_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // mean + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, variance_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // variance + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // out tensor + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h index 80b217febba..7f7b90710d5 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/batchnorm.h @@ -32,11 +32,13 @@ class BatchNormOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; private: int Initweight(); + int UnmapBuffer(); + int MapBuffer(); private: bool use_fp16_enable_{false}; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc index b022b270417..08e24d4fd68 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.cc @@ -52,9 +52,13 @@ int CastOpenCLKernel::CheckSpecs() { return RET_OK; } -void CastOpenCLKernel::SetConstArgs() { +int CastOpenCLKernel::SetConstArgs() { cl_int2 shape = {static_cast(shape_.width), static_cast(shape_.height)}; - ocl_runtime_->SetKernelArg(kernel_, 2, shape); + if (ocl_runtime_->SetKernelArg(kernel_, 2, shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void CastOpenCLKernel::SetGlobalLocal() { @@ -68,8 +72,8 @@ int CastOpenCLKernel::Prepare() { {kNumberTypeFloat32, "fp32"}, {kNumberTypeFloat16, "fp16"}, }; - std::string program_name = "Cast"; - std::string kernel_name = + const std::string program_name = "Cast"; + const std::string kernel_name = "Cast_" + dtype_names[in_tensors_.front()->data_type()] + "_to_" + dtype_names[out_tensors_.front()->data_type()]; if (!ocl_runtime_->LoadSource(program_name, cast_source)) { MS_LOG(ERROR) << "Load source failed."; @@ -80,16 +84,28 @@ int CastOpenCLKernel::Prepare() { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); return RET_OK; } int CastOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h index 3db1f15a008..68fc43cd6c9 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/cast.h @@ -31,7 +31,7 @@ class CastOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Run() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc index 6beebbfbe29..05a986da862 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.cc @@ -38,7 +38,10 @@ int ConcatOpenCLKernel::RunAxis0() { auto *out_image = reinterpret_cast(allocator_->GetImage(dst_data)); for (int i = 0; i < in_tensors_.size(); i++) { auto src_data = weight_ptrs_.at(i) == nullptr ? in_tensors_[i]->data_c() : weight_ptrs_.at(i); - allocator_->GetImageSize(src_data, &img_size); + if (allocator_->GetImageSize(src_data, &img_size) != RET_OK) { + MS_LOG(ERROR) << "GetImageSize failed."; + return RET_ERROR; + } auto src_origin = cl::array{0, 0, 0}; auto region = cl::array{img_size.width, img_size.height, 1}; auto *input_image = reinterpret_cast(allocator_->GetImage(src_data)); @@ -107,7 +110,7 @@ int ConcatOpenCLKernel::CheckSpecs() { return RET_OK; } -void ConcatOpenCLKernel::SetConstArgs() { +int ConcatOpenCLKernel::SetConstArgs() { GpuTensorInfo img_info(out_tensors_[0]); size_t dtype = ocl_runtime_->GetFp16Enable() ? sizeof(cl_half) : sizeof(cl_float); stride_w = img_info.RowPitch() / dtype; @@ -124,9 +127,15 @@ void ConcatOpenCLKernel::SetConstArgs() { temp.s[j] = in_tensor->shape()[j]; } Broadcast2GpuShape(in_shape_.s, temp.s, in_tensor->shape().size(), 1); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; } - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w); } else { for (auto &in_tensor : in_tensors_) { cl_int4 temp = {}; @@ -135,11 +144,18 @@ void ConcatOpenCLKernel::SetConstArgs() { } Broadcast2GpuShape(in_shape_.s, temp.s, in_tensor->shape().size(), 1); in_shape_.s[3] = UP_DIV(in_shape_.s[3], C4NUM); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } } out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void ConcatOpenCLKernel::SetGlobalLocal() { @@ -190,6 +206,10 @@ int ConcatOpenCLKernel::ConvertWeightToTensor() { } ImageSize img_size{in_shape.width, in_shape.height, dtype}; auto weight_ptr_ = allocator->Malloc(img_size, weight.data()); + if (weight_ptr_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } weight_ptrs_.push_back(weight_ptr_); } else { weight_ptrs_.push_back(nullptr); @@ -222,7 +242,7 @@ int ConcatOpenCLKernel::Prepare() { kernel_name += "_NHWC4"; MS_LOG(DEBUG) << "kernel_name=: " << kernel_name; std::string source = concat_source; - std::string program_name = "Concat"; + const std::string program_name = "Concat"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -234,7 +254,10 @@ int ConcatOpenCLKernel::Prepare() { return ret; } MS_LOG(DEBUG) << kernel_name << " Init Done!"; - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); return RET_OK; } @@ -247,14 +270,27 @@ int ConcatOpenCLKernel::Run() { int arg_cn = 0; for (int i = 0; i < in_tensors_.size(); ++i) { auto input_ptr = weight_ptrs_.at(i) == nullptr ? in_tensors_[i]->data_c() : weight_ptrs_.at(i); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_ptr); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_ptr) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } if (axis_ == 3 && !Align_) { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF) != + CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; } - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h index 9b3ffae6bb4..363888eaf2c 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/concat.h @@ -31,7 +31,7 @@ class ConcatOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Run() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc index 26f77796123..bfed62a5129 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.cc @@ -108,7 +108,10 @@ int Conv2DOpenCLKernel::Prepare() { return ret; } SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } return RET_OK; } @@ -142,7 +145,7 @@ void Conv2DOpenCLKernel::InitAttrs() { int Conv2DOpenCLKernel::BuildKernel() { SetBlockSize(); - std::string program_name = "conv2d"; + const std::string program_name = "conv2d"; std::stringstream kernel_name; kernel_name << "Conv2D_H" << block_size_.H << "W" << block_size_.W << "C" << block_size_.C; if (filter_type_ == MemType::IMG) { @@ -245,9 +248,11 @@ void Conv2DOpenCLKernel::SetMaliFp16BlockSize(int task_size_per_cu, bool w_kerne } int Conv2DOpenCLKernel::InitWeights() { - InitFilter(); + if (InitFilter() != RET_OK) { + return RET_ERROR; + } if (has_bias_) { - InitBias(); + return InitBias(); } return RET_OK; } @@ -300,7 +305,7 @@ void ConvertFilter(void *src, void *dst, TypeId src_dtype, TypeId dst_dtype, Fil } } -void Conv2DOpenCLKernel::InitFilter() { +int Conv2DOpenCLKernel::InitFilter() { auto allocator = ocl_runtime_->GetAllocator(); // allocate opencl memory: buffer or image2d @@ -312,9 +317,17 @@ void Conv2DOpenCLKernel::InitFilter() { size_t dtype = use_fp16_ ? CL_HALF_FLOAT : CL_FLOAT; size = width * height * CO_TILE * sizeof_FLT_; packed_filter_ = allocator->Malloc({width, height, dtype}); + if (packed_filter_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } } else { size = UP_DIV(CO_SLICES_, Ogroup) * KH_ * KW_ * CI_SLICES_ * Ogroup * CI_TILE * CO_TILE * sizeof_FLT_; packed_filter_ = allocator->Malloc(size, lite::opencl::MemType::BUF); + if (packed_filter_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } } // rearrange filter @@ -333,15 +346,22 @@ void Conv2DOpenCLKernel::InitFilter() { if (filter_type_ == MemType::IMG) { ocl_runtime_->WriteImage(packed_filter_, tmp.data()); } else { - allocator->MapBuffer(packed_filter_, CL_MAP_WRITE, nullptr, true); + if (allocator->MapBuffer(packed_filter_, CL_MAP_WRITE, nullptr, true) == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memcpy(packed_filter_, tmp.data(), size); - allocator->UnmapBuffer(packed_filter_); + if (allocator->UnmapBuffer(packed_filter_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } } FreeStoredData(stored_filter_); + return RET_OK; } -void Conv2DOpenCLKernel::InitBias() { +int Conv2DOpenCLKernel::InitBias() { auto allocator = ocl_runtime_->GetAllocator(); // align bias from C to C4 @@ -349,8 +369,15 @@ void Conv2DOpenCLKernel::InitBias() { void *src_data = stored_bias_ == nullptr ? bias_tensor->data_c() : stored_bias_; size_t packed_bias_size = UP_ROUND(CO_SLICES_, block_size_.C) * CO_TILE * sizeof_FLT_; packed_bias_ = allocator->Malloc(packed_bias_size, lite::opencl::MemType::BUF); + if (packed_bias_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } - allocator->MapBuffer(packed_bias_, CL_MAP_WRITE, nullptr, true); + if (allocator->MapBuffer(packed_bias_, CL_MAP_WRITE, nullptr, true) == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memset(packed_bias_, 0x00, packed_bias_size); if (bias_tensor->data_type() == kNumberTypeFloat16) { if (use_fp16_) { @@ -375,11 +402,15 @@ void Conv2DOpenCLKernel::InitBias() { memcpy(packed_bias_, src_data, CO_ * sizeof_FLT_); } } - allocator->UnmapBuffer(packed_bias_); + if (allocator->UnmapBuffer(packed_bias_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } FreeStoredData(stored_bias_); + return RET_OK; } -void Conv2DOpenCLKernel::SetConstArgs() { +int Conv2DOpenCLKernel::SetConstArgs() { cl_int4 input_shape = {batch_size_, IH_, IW_, CI_SLICES_}; cl_int4 output_shape = {batch_size_, OH_, OW_, CO_SLICES_}; cl_int4 kernel_stride = {KH_, KW_, param_->stride_h_, param_->stride_w_}; @@ -387,15 +418,43 @@ void Conv2DOpenCLKernel::SetConstArgs() { cl_int2 dilation = {param_->dilation_h_, param_->dilation_w_}; int arg_cn = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_filter_, filter_type_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_bias_, MemType::BUF); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, kernel_stride); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, pad); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, dilation); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param_->act_type_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn, alpha_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_filter_, filter_type_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_bias_, MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, kernel_stride) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, pad) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, dilation) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, param_->act_type_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn, alpha_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void Conv2DOpenCLKernel::SetGlobalLocal() { @@ -429,9 +488,18 @@ void Conv2DOpenCLKernel::SetGlobalLocal() { int Conv2DOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.h index f12ec7124f7..751b960774a 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d.h @@ -53,7 +53,7 @@ class Conv2DOpenCLKernel : public OpenCLKernel { int CheckSpecs() override; int Prepare() override; int InitWeights() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Run() override; @@ -78,8 +78,8 @@ class Conv2DOpenCLKernel : public OpenCLKernel { protected: void InitAttrs(); virtual int BuildKernel(); - virtual void InitFilter(); - void InitBias(); + virtual int InitFilter(); + int InitBias(); bool use_fp16_{false}; size_t sizeof_FLT_{4}; ConvParameter *param_{nullptr}; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc index c3a5d528ecb..16bd63384c5 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.cc @@ -55,10 +55,10 @@ int Conv2dTransposeOpenCLKernel::CheckSpecs() { } int Conv2dTransposeOpenCLKernel::Prepare() { - std::string kernel_name = "conv2d_transpose"; + const std::string kernel_name = "conv2d_transpose"; enable_fp16_ = ocl_runtime_->GetFp16Enable(); std::string source = GetActDefines() + conv2d_transpose_source; - std::string program_name = "conv2d_transpose"; + const std::string program_name = "conv2d_transpose"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -74,7 +74,10 @@ int Conv2dTransposeOpenCLKernel::Prepare() { return ret; } SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } @@ -94,7 +97,7 @@ void Conv2dTransposeOpenCLKernel::SetGlobalLocal() { AlignGlobalLocal(global_size_, local_size_); } -void Conv2dTransposeOpenCLKernel::SetConstArgs() { +int Conv2dTransposeOpenCLKernel::SetConstArgs() { int arg_cnt = 2; auto *param = reinterpret_cast(op_parameter_); int ci = in_tensors_[0]->shape()[3]; @@ -115,14 +118,39 @@ void Conv2dTransposeOpenCLKernel::SetConstArgs() { cl_int2 padding = {pad_h, pad_w}; cl_int4 src_size = {h, w, UP_DIV(ci, C4NUM), n}; cl_int4 dst_size = {oh, ow, UP_DIV(co, C4NUM), n}; - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padWeight_, lite::opencl::MemType::BUF); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt, static_cast(param->act_type_)); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padWeight_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt, static_cast(param->act_type_)) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } int Conv2dTransposeOpenCLKernel::InitWeights() { @@ -147,7 +175,15 @@ int Conv2dTransposeOpenCLKernel::InitFilter() { // IHWO to OHWI4(I)4(O)(converter format is IHWO) // init padWeight_(buffer mem) padWeight_ = allocator->Malloc(div_ci * div_co * C4NUM * C4NUM * kh * kw * data_size, lite::opencl::MemType::BUF); + if (padWeight_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true); + if (padWeight_ == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memset(padWeight_, 0x00, div_ci * div_co * C4NUM * C4NUM * kh * kw * data_size); auto origin_weight = stored_weight_ == nullptr ? in_tensors_.at(kWeightIndex)->data_c() : stored_weight_; auto weight_dtype = in_tensors_.at(kWeightIndex)->data_type(); @@ -188,7 +224,10 @@ int Conv2dTransposeOpenCLKernel::InitFilter() { } } } - allocator->UnmapBuffer(padWeight_); + if (allocator->UnmapBuffer(padWeight_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } FreeStoredData(stored_weight_); return RET_OK; } @@ -208,7 +247,15 @@ int Conv2dTransposeOpenCLKernel::InitBias() { } ImageSize img_size{im_dst_x, im_dst_y, img_dtype}; bias_ = allocator->Malloc(img_size); + if (bias_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } bias_ = allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true); + if (bias_ == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memset(bias_, 0x00, div_co * C4NUM * data_size); if (in_tensors_.size() == INPUT_TENSOR_SIZE_3) { void *src_data = stored_bias_ == nullptr ? in_tensors_.at(kBiasIndex)->data_c() : stored_bias_; @@ -225,7 +272,10 @@ int Conv2dTransposeOpenCLKernel::InitBias() { memcpy(bias_, src_data, co * data_size); } } - allocator->UnmapBuffer(bias_); + if (allocator->UnmapBuffer(bias_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } FreeStoredData(stored_bias_); return RET_OK; } @@ -233,9 +283,18 @@ int Conv2dTransposeOpenCLKernel::InitBias() { int Conv2dTransposeOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; int arg_cnt = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h index 70caeb50ced..b709dee59b0 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/conv2d_transpose.h @@ -34,7 +34,7 @@ class Conv2dTransposeOpenCLKernel : public OpenCLKernel { int InitWeights() override; int InitFilter(); int InitBias(); - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int StoreConstData() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc index 7e9f7f7b572..73733bafd20 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.cc @@ -73,7 +73,7 @@ int DepthwiseConv2dOpenCLKernel::Prepare() { } else { block_size_.C = block_size_.H = block_size_.W = 1; } - std::string program_name = "DepthwiseConv2d"; + const std::string program_name = "DepthwiseConv2d"; std::string source = depthwise_conv2d_source; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; @@ -94,7 +94,10 @@ int DepthwiseConv2dOpenCLKernel::Prepare() { return ret; } SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << kernel_name << " Init Done! mem type=" << static_cast(out_mem_type_); return RET_OK; } @@ -153,10 +156,12 @@ int DepthwiseConv2dOpenCLKernel::InitWeights() { size_t img_dtype = ocl_runtime_->GetFp16Enable() ? CL_HALF_FLOAT : CL_FLOAT; ImageSize img_size{(size_t)plane_out / C4NUM, (size_t)out_info.N * CO4, img_dtype}; packed_weight_ = allocator->Malloc(img_size, temp_filter.data()); + } else { packed_weight_ = allocator->Malloc(pack_weight_size, temp_filter.data()); } if (packed_weight_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; return RET_ERROR; } FreeStoredData(stored_weight_); @@ -199,13 +204,15 @@ int DepthwiseConv2dOpenCLKernel::InitBias() { } bias_data_ = allocator->Malloc(bias_size, temp_bias.data()); if (bias_data_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; return RET_ERROR; } + FreeStoredData(stored_bias_); return RET_OK; } -void DepthwiseConv2dOpenCLKernel::SetConstArgs() { +int DepthwiseConv2dOpenCLKernel::SetConstArgs() { auto parameter = reinterpret_cast(op_parameter_); auto in_info = GpuTensorInfo(in_tensors_[0]); auto out_info = GpuTensorInfo(out_tensors_[0]); @@ -222,16 +229,47 @@ void DepthwiseConv2dOpenCLKernel::SetConstArgs() { cl_int4 dst_size = {(cl_int)out_info.W, (cl_int)out_info.H, (cl_int)CO4, (cl_int)out_info.N}; int arg_cnt = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, packed_weight_, filter_type_); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_data_, lite::opencl::MemType::BUF); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dilation); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].first); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].second); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, packed_weight_, filter_type_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, bias_data_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, kernel_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, stride) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, padding) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dilation) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].first) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, relu_clips[parameter->act_type_].second) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void DepthwiseConv2dOpenCLKernel::SetGlobalLocal() { @@ -286,9 +324,18 @@ int DepthwiseConv2dOpenCLKernel::StoreConstData() { int DepthwiseConv2dOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; - ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h index 8fdbed9d1bd..91626bb9606 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/depthwise_conv2d.h @@ -41,7 +41,7 @@ class DepthwiseConv2dOpenCLKernel : public OpenCLKernel { int CheckSpecs() override; int InitWeights() override; int InitBias(); - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int StoreConstData() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.cc index a42d0f9b9d1..dac1c248bcf 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.cc @@ -35,7 +35,10 @@ int FillOpenCLKernel::RunFill() { cl_int4 fill_value = {}; fill_value.s[0] = fill_value.s[1] = fill_value.s[2] = fill_value.s[3] = default_; auto src_data = out_tensors_[0]->data_c(); - allocator_->GetImageSize(src_data, &img_size); + if (allocator_->GetImageSize(src_data, &img_size) != RET_OK) { + MS_LOG(ERROR) << "GetImageSize failed."; + return RET_ERROR; + } auto src_origin = cl::array{0, 0, 0}; auto region = cl::array{img_size.width, img_size.height, 1}; cl::Image2D *out_image = reinterpret_cast(allocator_->GetImage(src_data)); @@ -59,7 +62,7 @@ int FillOpenCLKernel::RunShape() { return RET_OK; } -void FillOpenCLKernel::SetConstArgs() {} +int FillOpenCLKernel::SetConstArgs() { return RET_OK; } void FillOpenCLKernel::SetGlobalLocal() {} diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h index e60da1d447a..0828414c7b6 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fill.h @@ -31,7 +31,7 @@ class FillOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Run() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc index 00971e0b5fa..f86b979bf9c 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.cc @@ -98,7 +98,7 @@ int FullConnectionOpenCLKernel::Prepare() { kernel_name = "FullConnectionWeightVar"; } std::string source = fullconnection_source; - std::string program_name = "FullConnection"; + const std::string program_name = "FullConnection"; if (!ocl_runtime_->LoadSource(program_name, GetActDefines() + source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -113,7 +113,10 @@ int FullConnectionOpenCLKernel::Prepare() { if (ret != RET_OK) { return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; @@ -137,7 +140,15 @@ int FullConnectionOpenCLKernel::InitFilter() { size_t dtype_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float); padWeight_ = allocator->Malloc(nhw_remainder * intensor_shape.Slice * co4 * C4NUM * C4NUM * dtype_size, lite::opencl::MemType::BUF); + if (padWeight_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true); + if (padWeight_ == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } auto padWeightFp32 = reinterpret_cast(padWeight_); auto padWeightFp16 = reinterpret_cast(padWeight_); memset(padWeight_, 0x00, nhw_remainder * intensor_shape.Slice * co4 * C4NUM * C4NUM * dtype_size); @@ -183,7 +194,10 @@ int FullConnectionOpenCLKernel::InitFilter() { } } } - allocator->UnmapBuffer(padWeight_); + if (allocator->UnmapBuffer(padWeight_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } FreeStoredData(stored_weight_); return RET_OK; } @@ -202,7 +216,15 @@ int FullConnectionOpenCLKernel::InitBias() { } ImageSize img_size{im_dst_x, im_dst_y, img_dtype}; bias_ = allocator->Malloc(img_size); + if (bias_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } bias_ = allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true); + if (bias_ == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memset(bias_, 0x00, co4 * C4NUM * dtype_size); if (in_tensors_.size() == INPUT_TENSOR_SIZE_3) { void *src_data = stored_bias_ == nullptr ? in_tensors_.at(kBiasIndex)->data_c() : stored_bias_; @@ -218,7 +240,10 @@ int FullConnectionOpenCLKernel::InitBias() { memcpy(bias_, src_data, CO_ * dtype_size); } } - allocator->UnmapBuffer(bias_); + if (allocator->UnmapBuffer(bias_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } FreeStoredData(stored_bias_); return RET_OK; } @@ -231,22 +256,44 @@ void FullConnectionOpenCLKernel::SetGlobalLocal() { AlignGlobalLocal(global_size_, local_size_); } -void FullConnectionOpenCLKernel::SetConstArgs() { +int FullConnectionOpenCLKernel::SetConstArgs() { if (!weight_var_) { - ocl_runtime_->SetKernelArg(kernel_, 2, padWeight_, lite::opencl::MemType::BUF); + if (ocl_runtime_->SetKernelArg(kernel_, 2, padWeight_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } int arg_count = 3; - ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_); - ocl_runtime_->SetKernelArg(kernel_, arg_count++, N_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, N_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } auto intensor_shape = GpuTensorInfo(in_tensors_[0]); int CI4 = CI_remainder_ * intensor_shape.Slice; - ocl_runtime_->SetKernelArg(kernel_, arg_count++, CI4); - ocl_runtime_->SetKernelArg(kernel_, arg_count++, UP_DIV(CO_, C4NUM)); + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, CI4) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, UP_DIV(CO_, C4NUM)) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } auto in_shape_info = GpuTensorInfo(in_tensors_[0]); cl_int2 in_img_shape = {static_cast(in_shape_info.height), static_cast(in_shape_info.width)}; - ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_img_shape); + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_img_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } auto *param = reinterpret_cast(op_parameter_); - ocl_runtime_->SetKernelArg(kernel_, arg_count, static_cast(param->act_type_)); + if (ocl_runtime_->SetKernelArg(kernel_, arg_count, static_cast(param->act_type_)) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } int FullConnectionOpenCLKernel::StoreConstData() { @@ -270,12 +317,24 @@ int FullConnectionOpenCLKernel::StoreConstData() { int FullConnectionOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; int arg_count = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()); - if (weight_var_) { - ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[1]->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (weight_var_) { + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[1]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; } - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h index be830de30ee..09bc05d2f74 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fullconnection.h @@ -31,7 +31,7 @@ class FullConnectionOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; int InitWeights() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Tune() override { return lite::RET_OK; } int StoreConstData() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc index f96d4583eb1..faaa7e81a00 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc @@ -164,8 +164,8 @@ bool IsEltwiseAndOperatorSupported(LiteKernel *node) { int FusionEltwiseOpenCLKernel::Prepare() { std::string source = Codegen(); - std::string program_name = "FusionEltwise\n" + source; - std::string kernel_name = "FusionEltwise"; + const std::string program_name = "FusionEltwise\n" + source; + const std::string kernel_name = "FusionEltwise"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -183,7 +183,10 @@ int FusionEltwiseOpenCLKernel::Prepare() { } InitWeights(); SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } return RET_OK; } @@ -217,7 +220,14 @@ int FusionEltwiseOpenCLKernel::InitWeights() { size_t num = tensor_info.ElementsNum; size_t size = tensor_info.Image2DSize; void *buffer = allocator->Malloc(size, lite::opencl::MemType::BUF); - allocator->MapBuffer(buffer, CL_MAP_WRITE, nullptr, true); + if (buffer == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } + if (allocator->MapBuffer(buffer, CL_MAP_WRITE, nullptr, true) == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memset(buffer, 0x00, size); if (tensor->data_type() == kNumberTypeFloat16) { if (use_fp16) { @@ -232,7 +242,10 @@ int FusionEltwiseOpenCLKernel::InitWeights() { CopyNumber(buffer, tensor->data_c(), num); } } - allocator->UnmapBuffer(buffer); + if (allocator->UnmapBuffer(buffer) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } buffer_weights_.push_back(buffer); } } @@ -247,7 +260,7 @@ void FusionEltwiseOpenCLKernel::SetGlobalLocal() { AlignGlobalLocal(global_size_, local_size_); } -void FusionEltwiseOpenCLKernel::SetConstArgs() { +int FusionEltwiseOpenCLKernel::SetConstArgs() { auto output = GpuTensorInfo(out_tensors_.front()); cl_int4 output_shape = {static_cast(output.N), static_cast(output.H), static_cast(output.W), static_cast(output.C)}; @@ -260,18 +273,32 @@ void FusionEltwiseOpenCLKernel::SetConstArgs() { if (IsScalar(in_tensor->shape())) { if (ocl_runtime_->GetFp16Enable()) { auto value = static_cast(scalar_weights_[scalar_idx++]); - ocl_runtime_->SetKernelArg(kernel_, arg_idx, *(reinterpret_cast(&value))); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, *(reinterpret_cast(&value))) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { - ocl_runtime_->SetKernelArg(kernel_, arg_idx, scalar_weights_[scalar_idx++]); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, scalar_weights_[scalar_idx++]) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } } else { - ocl_runtime_->SetKernelArg(kernel_, arg_idx, buffer_weights_[buffer_idx++], lite::opencl::MemType::BUF); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, buffer_weights_[buffer_idx++], lite::opencl::MemType::BUF) != + CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } } arg_idx++; // for act input } arg_idx++; // for output - ocl_runtime_->SetKernelArg(kernel_, arg_idx, output_shape); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, output_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } int FusionEltwiseOpenCLKernel::Run() { @@ -279,12 +306,21 @@ int FusionEltwiseOpenCLKernel::Run() { int arg_idx = 0; for (auto *in_tensor : in_tensors_) { if (!in_tensor->IsConst()) { - ocl_runtime_->SetKernelArg(kernel_, arg_idx, in_tensor->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, in_tensor->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } arg_idx++; } - ocl_runtime_->SetKernelArg(kernel_, arg_idx, out_tensors_.front()->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, out_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.h index 800c1aa4c0a..b585273cfad 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.h @@ -162,7 +162,7 @@ class FusionEltwiseOpenCLKernel : public OpenCLKernel { int Prepare() override; int InitWeights() override; void SetGlobalLocal() override; - void SetConstArgs() override; + int SetConstArgs() override; int Run() override; void ClearParameter() { op_parameter_ = nullptr; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc index 251c0df94c1..68dbaf98b4b 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.cc @@ -81,7 +81,7 @@ int GatherOpenCLKernel::CheckSpecs() { } } -void GatherOpenCLKernel::SetConstArgs() { +int GatherOpenCLKernel::SetConstArgs() { auto input = GpuTensorInfo(in_tensors_.front()); auto output = GpuTensorInfo(out_tensors_.front()); int indices_num = in_tensors_.at(1)->ElementsNum(); @@ -90,10 +90,23 @@ void GatherOpenCLKernel::SetConstArgs() { cl_int4 dst_size = {static_cast(output.W), static_cast(output.H), static_cast(output.Slice), static_cast(output.N)}; int arg_cnt = 3; - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, indices_num); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt, axis_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, indices_num) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt, axis_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void GatherOpenCLKernel::SetGlobalLocal() { @@ -104,11 +117,11 @@ void GatherOpenCLKernel::SetGlobalLocal() { } int GatherOpenCLKernel::Prepare() { - std::string kernel_name = "gather"; + const std::string kernel_name = "gather"; if (in_tensors_.at(0)->shape().size() == 1 && axis_ == 0) { axis_ = 3; } - std::string program_name = "gather"; + const std::string program_name = "gather"; if (!ocl_runtime_->LoadSource(program_name, gather_source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -127,7 +140,10 @@ int GatherOpenCLKernel::Prepare() { } } SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } @@ -135,11 +151,21 @@ int GatherOpenCLKernel::Prepare() { int GatherOpenCLKernel::ConvertTensorToweight() { auto allocator = ocl_runtime_->GetAllocator(); auto indices_tensor = in_tensors_.at(1); - allocator->MapBuffer(indices_tensor->data_c(), CL_MAP_WRITE, nullptr, true); + if (allocator->MapBuffer(indices_tensor->data_c(), CL_MAP_WRITE, nullptr, true) == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } auto indices_num = indices_tensor->ElementsNum(); indices_data_ = reinterpret_cast(allocator->Malloc(sizeof(int32_t) * indices_num, lite::opencl::MemType::BUF)); - allocator->MapBuffer(indices_data_, CL_MAP_WRITE, nullptr, true); + if (indices_data_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } + if (allocator->MapBuffer(indices_data_, CL_MAP_WRITE, nullptr, true) == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } if (indices_data_ == nullptr) { MS_LOG(ERROR) << "Memory allocation failed"; return RET_ERROR; @@ -155,8 +181,14 @@ int GatherOpenCLKernel::ConvertTensorToweight() { << " But Your type is :" << data_type; return RET_ERROR; } - allocator->UnmapBuffer(indices_data_); - allocator->UnmapBuffer(indices_tensor->data_c()); + if (allocator->UnmapBuffer(indices_data_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } + if (allocator->UnmapBuffer(indices_tensor->data_c()) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } return RET_OK; } @@ -197,7 +229,10 @@ int GatherOpenCLKernel::PreProcess() { if (!InferShapeDone()) { auto indices_tensor = in_tensors_[1]; if (!indices_tensor->IsConst()) { - ocl_runtime_->SyncCommandQueue(); + if (!ocl_runtime_->SyncCommandQueue()) { + MS_LOG(ERROR) << "SyncCommandQueue failed."; + return RET_ERROR; + } indices_tensor->MutableData(); } } @@ -209,10 +244,22 @@ int GatherOpenCLKernel::Run() { if (intensor1_is_tensor) { ConvertTensorToweight(); } - ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_.front()->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_.front()->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 2, indices_data_, lite::opencl::MemType::BUF); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, out_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, in_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 2, indices_data_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h index 5ec2047f2d0..78f3e2d531b 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/gather.h @@ -34,7 +34,7 @@ class GatherOpenCLKernel : public OpenCLKernel { int PreProcess() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Tune() override { return lite::RET_OK; } int ConvertTensorToweight(); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.cc index b803bae593e..74504b8e983 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.cc @@ -98,6 +98,10 @@ int ArithmeticInt8OpenCLKernel::InitWeights() { size_t dtype = fp16_enable ? CL_HALF_FLOAT : CL_FLOAT; ImageSize img_size{in_shape.width, in_shape.height, dtype}; auto weight_ptr_ = allocator->Malloc(img_size, weight.data()); + if (weight_ptr_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } weight_ptrs_.push_back(weight_ptr_); } else { weight_ptrs_.push_back(nullptr); @@ -106,7 +110,7 @@ int ArithmeticInt8OpenCLKernel::InitWeights() { return RET_OK; } -void ArithmeticInt8OpenCLKernel::SetConstArgs() { +int ArithmeticInt8OpenCLKernel::SetConstArgs() { int arg_idx = 3; if (!element_flag_) { cl_int4 in0_shape = {static_cast(in0_shape_.N), static_cast(in0_shape_.H), static_cast(in0_shape_.W), @@ -121,16 +125,37 @@ void ArithmeticInt8OpenCLKernel::SetConstArgs() { } else if (in0_shape_.C != 1 && in1_shape_.C == 1) { broadcastC_flag = 2; // BroadCast C4 in input1 } - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in0_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in1_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, broadcastC_flag); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in0_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in1_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, broadcastC_flag) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { cl_int2 output_shape{static_cast(global_range_[0]), static_cast(global_range_[1])}; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_min_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_max_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; } - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_min_); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, activation_max_); // set quantization parameter. auto input0_quant_param = in_tensors_[0]->quant_params().front(); @@ -141,8 +166,15 @@ void ArithmeticInt8OpenCLKernel::SetConstArgs() { cl_char4 zero_point = {static_cast(input0_quant_param.zeroPoint), static_cast(input1_quant_param.zeroPoint), static_cast(output_quant_param.zeroPoint), 0}; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale); // scale - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, zero_point); // zero_point + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // scale + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, zero_point) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // zero_point + return RET_OK; } int ArithmeticInt8OpenCLKernel::Prepare() { @@ -191,7 +223,7 @@ int ArithmeticInt8OpenCLKernel::Prepare() { activation_max_ = 6.f; } - std::string program_name = "Arithmetic"; + const std::string program_name = "Arithmetic"; std::string source = arithmetic_source; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; @@ -207,7 +239,10 @@ int ArithmeticInt8OpenCLKernel::Prepare() { if (type() != PrimitiveType_BiasAdd) { InitWeights(); } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << kernel_name_ << " Init Done!"; return RET_OK; } @@ -218,10 +253,22 @@ int ArithmeticInt8OpenCLKernel::Run() { auto input_1_ptr = weight_ptrs_[1] == nullptr ? in_tensors_[1]->data_c() : weight_ptrs_[1]; int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_0_ptr) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_1_ptr) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.h index 667ea8f4763..3f8feb78749 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/int8/arithmetic_int8.h @@ -33,7 +33,7 @@ class ArithmeticInt8OpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; int InitWeights() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; private: diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.cc index 08f552c8d34..ea3599de657 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.cc @@ -67,15 +67,31 @@ void LayerNormGetWorkGroup(const std::vector &global, std::vectorpush_back(z); } -void LayerNormOpenCLKernel::SetConstArgs() { +int LayerNormOpenCLKernel::SetConstArgs() { int arg_cn = 6; GpuTensorInfo img_info(in_tensors_.at(0)); in_shape_.s[0] = img_info.N, in_shape_.s[1] = img_info.H, in_shape_.s[2] = img_info.W, in_shape_.s[3] = img_info.C; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, epsilon_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, normalized_axis_); - ocl_runtime_->SetKernelArg(kernel_mean_var_, 3, in_shape_); - ocl_runtime_->SetKernelArg(kernel_mean_var_, 4, normalized_shape_size_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, epsilon_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, normalized_axis_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_mean_var_, 3, in_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_mean_var_, 4, normalized_shape_size_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void AlignMeanVarGlobalLocal(const std::vector &global, const std::vector &local, cl::NDRange *global_range, @@ -106,9 +122,23 @@ int LayerNormOpenCLKernel::Initweight() { size_t weight_size = img_info.Image2DSize; // allocated memory for weight and init value gamma_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); + if (gamma_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } beta_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); - allocator->MapBuffer(gamma_, CL_MAP_WRITE, nullptr, true); - allocator->MapBuffer(beta_, CL_MAP_WRITE, nullptr, true); + if (beta_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } + if (allocator->MapBuffer(gamma_, CL_MAP_WRITE, nullptr, true) == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } + if (allocator->MapBuffer(beta_, CL_MAP_WRITE, nullptr, true) == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memset(gamma_, 0x01, weight_size); memset(beta_, 0x00, weight_size); @@ -143,8 +173,14 @@ int LayerNormOpenCLKernel::Initweight() { memcpy(beta_, in_tensors_.at(2)->data_c(), weight_size); } } - allocator->UnmapBuffer(gamma_); - allocator->UnmapBuffer(beta_); + if (allocator->UnmapBuffer(gamma_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } + if (allocator->UnmapBuffer(beta_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } return RET_OK; } @@ -164,11 +200,19 @@ int LayerNormOpenCLKernel::Prepare() { size_t size_dtype = use_fp16_enable_ ? sizeof(float16_t) : sizeof(float); mean_size *= size_dtype; mean_ = allocator->Malloc(mean_size, lite::opencl::MemType::BUF); + if (mean_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } var_ = allocator->Malloc(mean_size, lite::opencl::MemType::BUF); - std::string kernel_name = "LayerNormalization_NHWC4"; + if (var_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } + const std::string kernel_name = "LayerNormalization_NHWC4"; std::string kernel_name_mean_var = "ComputeMeanVar"; std::string source = layer_norm_source; - std::string program_name = "LayerNormalization"; + const std::string program_name = "LayerNormalization"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -182,7 +226,10 @@ int LayerNormOpenCLKernel::Prepare() { kernel_name_mean_var += "Axis" + std::to_string(normalized_axis_) + "NHWC4"; ocl_runtime_->BuildKernel(kernel_mean_var_, program_name, kernel_name_mean_var, build_options_ext); MS_LOG(DEBUG) << kernel_name << " Init Done!"; - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); return RET_OK; @@ -191,21 +238,48 @@ int LayerNormOpenCLKernel::Prepare() { int LayerNormOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; int arg1_cn = 0; - ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, in_tensors_.at(0)->data_c()); // input tensor - ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, mean_, lite::opencl::MemType::BUF); // mean_ - ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, var_, lite::opencl::MemType::BUF); // var_ return RET_OK; + if (ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // input tensor + if (ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, mean_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_mean_var_, arg1_cn++, var_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } ocl_runtime_->RunKernel(kernel_mean_var_, global_mean_var_, local_mean_var_, nullptr, &event_); int arg_cn = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()); // input tensor - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c()); // out tensor - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, mean_, lite::opencl::MemType::BUF); // mean_ - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, var_, lite::opencl::MemType::BUF); // var_ - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, gamma_, lite::opencl::MemType::BUF); // gamma_ - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, beta_, lite::opencl::MemType::BUF); // beta_ + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // input tensor + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // out tensor + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, mean_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // mean_ + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, var_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // var_ + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, gamma_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // gamma_ + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, beta_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } // beta_ ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_OK; -} +} // namespace mindspore::kernel REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_LayerNormFusion, OpenCLKernelCreator) REG_KERNEL(kGPU, kNumberTypeFloat16, PrimitiveType_LayerNormFusion, OpenCLKernelCreator) diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.h index 67f40e01ad0..ca432abca14 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/layer_norm.h @@ -31,7 +31,7 @@ class LayerNormOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; private: diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc index 3815743c0c4..dc5b5b6cd51 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.cc @@ -84,7 +84,7 @@ int MatMulOpenCLKernel::Prepare() { std::map dims2str = {{2, "_2d"}, {3, "_4d"}, {4, "_4d"}}; kernel_name += dims2str[dims]; std::string source = matmul_source; - std::string program_name = "MatMul"; + const std::string program_name = "MatMul"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -95,13 +95,16 @@ int MatMulOpenCLKernel::Prepare() { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } -void MatMulOpenCLKernel::PadWeight(std::vector weight_shape_4d, int ci, int co) { +int MatMulOpenCLKernel::PadWeight(std::vector weight_shape_4d, int ci, int co) { auto allocator = ocl_runtime_->GetAllocator(); int a = weight_shape_4d[0]; int b = weight_shape_4d[1]; @@ -109,7 +112,15 @@ void MatMulOpenCLKernel::PadWeight(std::vector weight_shape_4d, int ci, int int co4 = UP_DIV(co, C4NUM); size_t dtype_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float); padWeight_ = allocator->Malloc(a * b * ci4 * co4 * C4NUM * C4NUM * dtype_size, lite::opencl::MemType::BUF); + if (padWeight_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true); + if (padWeight_ == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } auto padWeightFp32 = reinterpret_cast(padWeight_); auto padWeightFp16 = reinterpret_cast(padWeight_); memset(padWeight_, 0x00, a * b * ci4 * co4 * C4NUM * C4NUM * dtype_size); @@ -157,6 +168,7 @@ void MatMulOpenCLKernel::PadWeight(std::vector weight_shape_4d, int ci, int } } } + return RET_OK; } int MatMulOpenCLKernel::InitWeights() { @@ -185,7 +197,10 @@ int MatMulOpenCLKernel::InitWeights() { PadWeight(weight_shape_4d, ci, CO_); - allocator->UnmapBuffer(padWeight_); + if (allocator->UnmapBuffer(padWeight_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } FreeStoredData(stored_weight_); return InitBias(); } @@ -204,7 +219,15 @@ int MatMulOpenCLKernel::InitBias() { } lite::opencl::ImageSize img_size{im_dst_x, im_dst_y, img_dtype}; bias_ = allocator->Malloc(img_size); + if (bias_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } bias_ = allocator->MapBuffer(bias_, CL_MAP_WRITE, nullptr, true); + if (bias_ == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memset(bias_, 0x00, co4 * C4NUM * dtype_size); if (in_tensors_.size() == INPUT_TENSOR_SIZE_3) { void *src_data = stored_bias_ == nullptr ? in_tensors_.at(kBiasIndex)->data_c() : stored_bias_; @@ -220,7 +243,10 @@ int MatMulOpenCLKernel::InitBias() { memcpy(bias_, src_data, CO_ * dtype_size); } } - allocator->UnmapBuffer(bias_); + if (allocator->UnmapBuffer(bias_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } FreeStoredData(stored_bias_); return RET_OK; } @@ -235,29 +261,54 @@ void MatMulOpenCLKernel::SetGlobalLocal() { AlignGlobalLocal(global_size_, local_size_); } -void MatMulOpenCLKernel::SetConstArgs() { +int MatMulOpenCLKernel::SetConstArgs() { int arg_count = 2; cl_int4 in_shape = {inShape[0], inShape[1], inShape[2], inShape[3]}; cl_int4 out_shape = {outShape[0], outShape[1], outShape[2], outShape[3]}; if (act_weight_) { arg_count++; } else { - ocl_runtime_->SetKernelArg(kernel_, arg_count++, padWeight_, lite::opencl::MemType::BUF); + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, padWeight_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } - ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_); - ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_shape); + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, bias_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } int MatMulOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; int arg_count = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()); - if (act_weight_) { - ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[1]->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (act_weight_) { + if (ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_tensors_[1]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; } - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h index 54aee868ba4..02c62986c18 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/matmul.h @@ -32,7 +32,7 @@ class MatMulOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; int InitWeights() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Tune() override { return lite::RET_OK; } int InitBias(); @@ -54,7 +54,7 @@ class MatMulOpenCLKernel : public OpenCLKernel { std::vector outShape{std::vector(MAX_DIMS, 1)}; private: - void PadWeight(std::vector weight_shape_4d, int ci, int co); + int PadWeight(std::vector weight_shape_4d, int ci, int co); }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc index f6f231c1605..fe128cf5c49 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.cc @@ -48,7 +48,7 @@ int OneHotOpenCLKernel::Prepare() { kernel_name += "Axis" + std::to_string(axis_); } std::string source = one_hot_source; - std::string program_name = "OneHot"; + const std::string program_name = "OneHot"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -65,7 +65,10 @@ int OneHotOpenCLKernel::Prepare() { return ret; } InitWeights(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; @@ -87,18 +90,40 @@ int OneHotOpenCLKernel::InitWeights() { return RET_OK; } -void OneHotOpenCLKernel::SetConstArgs() { +int OneHotOpenCLKernel::SetConstArgs() { cl_int2 cl_in_image2d_shape = {static_cast(in_shape_.width), static_cast(in_shape_.height)}; cl_int4 cl_out_shape = {static_cast(out_shape_.N), static_cast(out_shape_.H), static_cast(out_shape_.W), static_cast(out_shape_.Slice)}; int arg_idx = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_image2d_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, depth_); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, on_value_); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, off_value_); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast(out_shape_.C)); - ocl_runtime_->SetKernelArg(kernel_, arg_idx, static_cast(param_->support_neg_index_)); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_image2d_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, depth_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, on_value_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, off_value_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast(out_shape_.C)) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, static_cast(param_->support_neg_index_)) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void OneHotOpenCLKernel::SetGlobalLocal() { local_size_ = {}; @@ -108,9 +133,18 @@ void OneHotOpenCLKernel::SetGlobalLocal() { int OneHotOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h index 7efcc4e556f..add5beaf7bd 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/one_hot.h @@ -33,7 +33,7 @@ class OneHotOpenCLKernel : public OpenCLKernel { int Prepare() override; int InitWeights() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; private: diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc index fee30266b16..3cd6fdd054f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.cc @@ -81,11 +81,14 @@ int PadOpenCLKernel::Prepare() { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } return RET_OK; } -void PadOpenCLKernel::SetConstArgs() { +int PadOpenCLKernel::SetConstArgs() { auto input = GpuTensorInfo(in_tensors_.front()); auto output = GpuTensorInfo(out_tensors_.front()); cl_int4 input_shape = {static_cast(input.N), static_cast(input.H), static_cast(input.W), @@ -105,20 +108,45 @@ void PadOpenCLKernel::SetConstArgs() { Broadcast2GpuShape(pad_before.s, pad_before_ori.data(), ndim, 0); int arg_cn = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, pad_before); - ocl_runtime_->SetKernelArg(kernel_, arg_cn, param_->constant_value_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, pad_before) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn, param_->constant_value_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } local_size_ = {8, 4, 1}; global_size_ = {output.N * output.H, output.W, output.Slice}; AlignGlobalLocal(global_size_, local_size_); + return RET_OK; } int PadOpenCLKernel::Run() { - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h index 4464241d1d6..3752982727d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pad.h @@ -35,7 +35,7 @@ class PadOpenCLKernel : public OpenCLKernel { int CheckSpecs() override; int Prepare() override; - void SetConstArgs() override; + int SetConstArgs() override; int Run() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc index 668863226b8..9f1fd5c8763 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.cc @@ -53,18 +53,25 @@ int PoolingOpenCLKernel::CheckSpecs() { return RET_OK; } -int PoolingOpenCLKernel::Prepare() { +int PoolingOpenCLKernel::BuildKernel() { std::string kernel_name; if (parameter_->pool_mode_ == PoolMode_MaxPool) { kernel_name = "MaxPooling2d"; } else if (parameter_->pool_mode_ == PoolMode_AvgPool) { kernel_name = "AvgPooling2d"; } + + if (parameter_->global_ && + (parameter_->window_h_ >= LOCAL_CACHE_THREAD || parameter_->window_w_ >= LOCAL_CACHE_THREAD)) { + kernel_name += "_global"; + is_use_local_ = true; + } + auto build_options_ext = CreateBuildOptionsExtByDType(this->registry_data_type_); switch (parameter_->act_type_) { case ActType_No: break; case ActType_Relu: - kernel_name += "_ReLU"; + build_options_ext.emplace_back("-DRELU"); break; default: MS_LOG(ERROR) << "Unsupported activation type " << parameter_->act_type_; @@ -73,34 +80,49 @@ int PoolingOpenCLKernel::Prepare() { kernel_name += "_NHWC4"; kernel_name += "_IMG"; std::string source = pooling2d_source; - std::string program_name = "Pooling2d"; + const std::string program_name = "Pooling2d"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; } - auto build_options_ext = CreateBuildOptionsExtByDType(this->registry_data_type_); auto ret = ocl_runtime_->BuildKernel(kernel_, program_name, kernel_name, build_options_ext); if (ret != RET_OK) { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); - SetGlobalLocal(); - MS_LOG(DEBUG) << kernel_name << " Init Done!"; + return RET_OK; +} +int PoolingOpenCLKernel::Prepare() { + input_tensor_ = GpuTensorInfo(in_tensors_[0]); + if (BuildKernel() != RET_OK) { + MS_LOG(ERROR) << "BuildKernel failed."; + return RET_ERROR; + } + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } + SetGlobalLocal(); return RET_OK; } void PoolingOpenCLKernel::SetGlobalLocal() { - const size_t global_x = out_tensors_[0]->shape()[1] * out_tensors_[0]->shape()[0]; - const size_t global_y = out_tensors_[0]->shape()[2]; - const size_t global_z = UP_DIV(out_tensors_[0]->shape()[3], C4NUM); - global_size_ = {global_z, global_y, global_x}; - local_size_ = {}; - AlignGlobalLocal(global_size_, local_size_); + if (is_use_local_) { + local_size_ = {1, LOCAL_CACHE_THREAD, LOCAL_CACHE_THREAD}; + global_size_ = {static_cast(input_tensor_.Slice), 1, 1}; + AlignGlobalLocal(global_size_, local_size_); + } else { + const size_t global_x = out_tensors_[0]->shape()[1] * out_tensors_[0]->shape()[0]; + const size_t global_y = out_tensors_[0]->shape()[2]; + const size_t global_z = UP_DIV(out_tensors_[0]->shape()[3], C4NUM); + global_size_ = {global_z, global_y, global_x}; + local_size_ = {}; + AlignGlobalLocal(global_size_, local_size_); + } } -void PoolingOpenCLKernel::SetConstArgs() { +int PoolingOpenCLKernel::SetGlobalConstArgs() { int slices = UP_DIV(out_tensors_[0]->shape()[3], C4NUM); cl_int4 input_shape = {in_tensors_[0]->shape()[0], in_tensors_[0]->shape()[1], in_tensors_[0]->shape()[2], slices}; cl_int4 output_shape = {out_tensors_[0]->shape()[0], out_tensors_[0]->shape()[1], out_tensors_[0]->shape()[2], @@ -109,19 +131,73 @@ void PoolingOpenCLKernel::SetConstArgs() { cl_int2 kernel_size = {parameter_->window_h_, parameter_->window_w_}; cl_int2 padding = {parameter_->pad_u_, parameter_->pad_l_}; int arg_idx = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, stride); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, kernel_size); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, padding); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, input_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, stride) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, kernel_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, padding) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; +} + +int PoolingOpenCLKernel::SetLocalConstArgs() { + int h = input_tensor_.H; + int w = input_tensor_.W; + int c = input_tensor_.C; + int c4 = UP_DIV(c, C4NUM); + cl_int4 size = {h, w, c4, c}; + int arg_idx = 2; + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; +} + +int PoolingOpenCLKernel::SetConstArgs() { + if (is_use_local_) { + return SetLocalConstArgs(); + } else { + return SetGlobalConstArgs(); + } +} + +int PoolingOpenCLKernel::Tune() { + if (is_use_local_) { + return RET_OK; + } + return OpenCLKernel::Tune(); } int PoolingOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h index e47b34b1bf0..1bc0cb86440 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/pooling2d.h @@ -32,11 +32,20 @@ class PoolingOpenCLKernel : public OpenCLKernel { int Run() override; int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; + int Tune() override; + + private: + int BuildKernel(); + int SetGlobalConstArgs(); + int SetLocalConstArgs(); private: PoolingParameter *parameter_; + bool is_use_local_ = false; + static const size_t LOCAL_CACHE_THREAD{16}; + GpuTensorInfo input_tensor_; }; } // namespace mindspore::kernel diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/power.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/power.cc index 817c6aaeeaf..b9d8890fb5c 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/power.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/power.cc @@ -63,15 +63,21 @@ void PowerGetWorkGroup(const std::vector &global, std::vector *l local->push_back(z); } -void PowerOpenCLKernel::SetConstArgs() { +int PowerOpenCLKernel::SetConstArgs() { float unalign_w = static_cast(out_shape_.s[3]); out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM); int arg_cn = 2; if (!broadcast_) { arg_cn++; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } if (use_fp16_enable_) { auto x = static_cast(power_); @@ -80,11 +86,18 @@ void PowerOpenCLKernel::SetConstArgs() { auto w = static_cast(unalign_w); cl_half4 parameter = {*(reinterpret_cast(&x)), *(reinterpret_cast(&y)), *(reinterpret_cast(&z)), *(reinterpret_cast(&w))}; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, parameter); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, parameter) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { cl_float4 parameter = {power_, shift_, scale_, unalign_w}; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, parameter); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, parameter) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } + return RET_OK; } void PowerOpenCLKernel::SetGlobalLocal() { @@ -111,7 +124,7 @@ int PowerOpenCLKernel::Prepare() { auto param = reinterpret_cast(this->op_parameter_); std::string kernel_name = "power"; std::string source = power_source; - std::string program_name = "power"; + const std::string program_name = "power"; if (broadcast_) { power_ = param->power_; kernel_name += "_broadcast"; @@ -130,7 +143,10 @@ int PowerOpenCLKernel::Prepare() { } MS_LOG(DEBUG) << kernel_name << " Init Done!"; SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } return RET_OK; } @@ -138,13 +154,28 @@ int PowerOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; int arg_cn = 0; if (broadcast_) { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(1)->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(1)->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; } - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(0)->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/power.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/power.h index 71934bd7b92..ea36486b0a5 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/power.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/power.h @@ -30,7 +30,7 @@ class PowerOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Run() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc index 9e7f08a1510..2784f06b708 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.cc @@ -46,7 +46,14 @@ int PReluOpenCLKernel::InitWeights() { auto sizeof_FLT = enable_fp16_ ? sizeof(float16_t) : sizeof(float); size_t weight_size = UP_ROUND(C_, C4NUM) * sizeof_FLT; weight_vector_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); - allocator->MapBuffer(weight_vector_, CL_MAP_WRITE, nullptr, true); + if (weight_vector_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } + if (allocator->MapBuffer(weight_vector_, CL_MAP_WRITE, nullptr, true) == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memset(weight_vector_, 0x00, weight_size); if (weight_tensor->data_type() == kNumberTypeFloat16) { if (enable_fp16_) { @@ -69,7 +76,10 @@ int PReluOpenCLKernel::InitWeights() { memcpy(weight_vector_, weight_tensor->data_c(), C_ * sizeof_FLT); } } - allocator->UnmapBuffer(weight_vector_); + if (allocator->UnmapBuffer(weight_vector_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } } return RET_OK; } @@ -95,11 +105,18 @@ int PReluOpenCLKernel::CheckSpecs() { return RET_OK; } -void PReluOpenCLKernel::SetConstArgs() { +int PReluOpenCLKernel::SetConstArgs() { int arg_idx = 3; out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape_); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, 2); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, 2) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void PReluOpenCLKernel::SetGlobalLocal() { @@ -126,8 +143,8 @@ int PReluOpenCLKernel::Prepare() { weight_is_scalar = param->channelShared; enable_fp16_ = ocl_runtime_->GetFp16Enable(); std::string source = prelu_source; - std::string program_name = "PRelu"; - std::string kernel_name = "PRelu_" + std::string(weight_is_scalar ? "scalar" : "vector"); + const std::string program_name = "PRelu"; + const std::string kernel_name = "PRelu_" + std::string(weight_is_scalar ? "scalar" : "vector"); if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -141,7 +158,10 @@ int PReluOpenCLKernel::Prepare() { InitWeights(); MS_LOG(DEBUG) << program_name << " init Done!"; MS_LOG(DEBUG) << "kernel_name=: " << kernel_name << " init Done!"; - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); return RET_OK; } @@ -149,12 +169,24 @@ int PReluOpenCLKernel::Prepare() { int PReluOpenCLKernel::Run() { MS_LOG(DEBUG) << op_parameter_->name_ << " Running!"; int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } if (weight_is_scalar) { - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_scalar_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_scalar_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_vector_, lite::opencl::MemType::BUF); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, weight_vector_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } auto ret = ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); if (ret != mindspore::lite::RET_OK) { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h index 739149eee49..b6e6d3de247 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/prelu.h @@ -31,7 +31,7 @@ class PReluOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Run() override; int InitWeights() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc index 237820dc37f..4186f6911c7 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.cc @@ -17,6 +17,7 @@ #include #include #include +#include #include "include/errorcode.h" #include "src/kernel_registry.h" #include "src/runtime/kernel/opencl/kernel/reduce.h" @@ -179,7 +180,7 @@ int ReduceOpenCLKernel::Prepare() { } kernel_name += GetReduceTypeStr(reduce_param->mode_); std::string source = reduce_source; - std::string program_name = "Reduce"; + const std::string program_name = "Reduce"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -190,22 +191,32 @@ int ReduceOpenCLKernel::Prepare() { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } -void ReduceOpenCLKernel::SetConstArgs() { +int ReduceOpenCLKernel::SetConstArgs() { int h = inShape.H; int w = inShape.W; int c = inShape.C; int c4 = UP_DIV(c, C4NUM); cl_int4 size = {h, w, c4, c}; int arg_idx = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size); - if (wc_reduce_ || c_reduce_) { - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, GenC4Mask()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; } + if (wc_reduce_ || c_reduce_) { + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, GenC4Mask()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + return RET_OK; } void ReduceOpenCLKernel::SetGlobalLocal() { int h = inShape.H; @@ -235,9 +246,18 @@ int ReduceOpenCLKernel::Tune() { int ReduceOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h index 2d359a19ee7..ae70347aaa0 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reduce.h @@ -32,7 +32,7 @@ class ReduceOpenCLKernel : public OpenCLKernel { int Run() override; int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Tune() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc index 79116366827..b343ecc5ed2 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.cc @@ -53,15 +53,22 @@ int ReshapeOpenCLKernel::CheckSpecs() { return RET_OK; } -void ReshapeOpenCLKernel::SetConstArgs() { +int ReshapeOpenCLKernel::SetConstArgs() { auto in = GpuTensorInfo(in_tensors_.front()); auto out = GpuTensorInfo(out_tensors_.front()); cl_int4 src_size = {cl_int(in.C), cl_int(in.W), cl_int(in.H), cl_int(in.N)}; cl_int4 dst_size = {cl_int(out.width), cl_int(out.height), cl_int(out.C), cl_int(out.C * out.W)}; int arg_idx = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, src_size); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, dst_size); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, src_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, dst_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void ReshapeOpenCLKernel::SetGlobalLocal() { @@ -72,9 +79,9 @@ void ReshapeOpenCLKernel::SetGlobalLocal() { } int ReshapeOpenCLKernel::Prepare() { - std::string kernel_name = "reshape_NHWC4"; + const std::string kernel_name = "reshape_NHWC4"; std::string source = reshape_source; - std::string program_name = "reshape"; + const std::string program_name = "reshape"; auto build_options_ext = CreateBuildOptionsExtByDType(this->registry_data_type_); if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; @@ -87,16 +94,28 @@ int ReshapeOpenCLKernel::Prepare() { } SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } int ReshapeOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } @@ -104,7 +123,10 @@ int ReshapeOpenCLKernel::PreProcess() { if (type() == PrimitiveType_Reshape && !InferShapeDone()) { auto shape_tensor = in_tensors_[1]; if (!shape_tensor->IsConst()) { - ocl_runtime_->SyncCommandQueue(); + if (!ocl_runtime_->SyncCommandQueue()) { + MS_LOG(ERROR) << "SyncCommandQueue failed."; + return RET_ERROR; + } shape_tensor->MutableData(); } } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h index 149e50ab96c..7b9025b5866 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/reshape.h @@ -30,7 +30,7 @@ class ReshapeOpenCLKernel : public OpenCLKernel { int Run() override; int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int PreProcess() override; }; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc index 8d4156db470..cf91a167f4f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.cc @@ -64,7 +64,7 @@ int ResizeOpenCLKernel::Prepare() { } kernel_name += "_NHWC4"; std::string source = resize_source; - std::string program_name = "Resize"; + const std::string program_name = "Resize"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -75,7 +75,10 @@ int ResizeOpenCLKernel::Prepare() { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; @@ -87,7 +90,7 @@ float ResizeOpenCLKernel::getResizeScaleFactor(int input_size, int output_size) : static_cast(input_size) / static_cast(output_size); } -void ResizeOpenCLKernel::SetConstArgs() { +int ResizeOpenCLKernel::SetConstArgs() { auto in_shape = in_tensors_[0]->shape(); auto out_shape = out_tensors_[0]->shape(); int n = out_shape[0]; @@ -101,9 +104,19 @@ void ResizeOpenCLKernel::SetConstArgs() { cl_int4 out_size = {n, h, w, c4}; cl_float2 scale = {scale_h, scale_w}; int arg_idx = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_size); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_size); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void ResizeOpenCLKernel::SetGlobalLocal() { @@ -116,9 +129,18 @@ void ResizeOpenCLKernel::SetGlobalLocal() { int ResizeOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } @@ -126,7 +148,10 @@ int ResizeOpenCLKernel::PreProcess() { if (type() == PrimitiveType_Resize && !InferShapeDone() && in_tensors_.size() == INPUT_TENSOR_SIZE_2) { auto shape_tensor = in_tensors_[1]; if (!shape_tensor->IsConst()) { - ocl_runtime_->SyncCommandQueue(); + if (!ocl_runtime_->SyncCommandQueue()) { + MS_LOG(ERROR) << "SyncCommandQueue failed."; + return RET_ERROR; + } shape_tensor->MutableData(); } } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.h index 38b5eee6d9e..ea73e0b10a7 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/resize.h @@ -31,7 +31,7 @@ class ResizeOpenCLKernel : public OpenCLKernel { int Run() override; int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int PreProcess() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc index f298fff5958..14c83e0a780 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.cc @@ -98,14 +98,30 @@ int ScaleOpenCLKernel::InitWeights() { img_size.height = 1; img_size.width = UP_DIV(scale_tensor->shape()[0], C4NUM); scale_ptr_ = allocator->Malloc(img_size, scale_tensor->data_c()); + if (scale_ptr_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } offset_ptr_ = allocator->Malloc(img_size, offset_tensor->data_c()); + if (offset_ptr_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } return RET_OK; } if (in_tensor->format() == scale_tensor->format()) { if (in_tensor->data_type() == scale_tensor->data_type()) { scale_ptr_ = allocator->Malloc(img_size, scale_tensor->data_c()); + if (scale_ptr_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } offset_ptr_ = allocator->Malloc(img_size, offset_tensor->data_c()); + if (offset_ptr_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } } else { MS_LOG(ERROR) << "Unsupported data type transpose from " << scale_tensor->data_type() << "to " << in_tensor->data_type(); @@ -121,7 +137,15 @@ int ScaleOpenCLKernel::InitWeights() { PackNHWCToNHWC4(scale_tensor->data_c(), scale.data(), src_is_fp16, fp16_enable, image2d_info); PackNHWCToNHWC4(offset_tensor->data_c(), offset.data(), src_is_fp16, fp16_enable, image2d_info); scale_ptr_ = allocator->Malloc(img_size, scale.data()); + if (scale_ptr_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } offset_ptr_ = allocator->Malloc(img_size, offset.data()); + if (offset_ptr_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } } else { MS_LOG(ERROR) << "Unsupported data type transpose from " << scale_tensor->data_type() << "to " << in_tensor->data_type(); @@ -175,7 +199,7 @@ int ScaleOpenCLKernel::Prepare() { } else { kernel_name += "_BUF"; } - std::string program_name = "Scale"; + const std::string program_name = "Scale"; std::string source = GetActDefines() + scale_source; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; @@ -193,44 +217,86 @@ int ScaleOpenCLKernel::Prepare() { return RET_OK; } -int ScaleOpenCLKernel::Run() { - MS_LOG(DEBUG) << this->name() << " Running!"; - auto *param = reinterpret_cast(op_parameter_); +int ScaleOpenCLKernel::SetKernelArg(int *idx) { int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + return RET_ERROR; + } if (weight_vector_flag_) { void *scale = scale_ptr_ == nullptr ? in_tensors_[1]->data_c() : scale_ptr_; void *offset = offset_ptr_ == nullptr ? in_tensors_[2]->data_c() : offset_ptr_; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale) != CL_SUCCESS) { + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset) != CL_SUCCESS) { + return RET_ERROR; + } } else { if (in_tensors_[1]->data_type() == kNumberTypeFloat32) { float scale = static_cast(in_tensors_[1]->data_c())[0]; float offset = static_cast(in_tensors_[2]->data_c())[0]; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, scale) != CL_SUCCESS) { + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, offset) != CL_SUCCESS) { + return RET_ERROR; + } } else if (in_tensors_[1]->data_type() == kNumberTypeFloat16) { float16_t scale = static_cast(in_tensors_[1]->data_c())[0]; float16_t offset = static_cast(in_tensors_[2]->data_c())[0]; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast(scale)); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast(offset)); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast(scale)) != CL_SUCCESS) { + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, static_cast(offset)) != CL_SUCCESS) { + return RET_ERROR; + } } else { MS_LOG(ERROR) << "Unsupported data type " << in_tensors_[1]->data_type(); return RET_ERROR; } } - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + return RET_ERROR; + } cl_int2 output_shape{static_cast(global_size_[0]), static_cast(global_size_[1])}; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, output_shape) != CL_SUCCESS) { + return RET_ERROR; + } + *idx = arg_idx; + return RET_OK; +} + +int ScaleOpenCLKernel::Run() { + MS_LOG(DEBUG) << this->name() << " Running!"; + auto *param = reinterpret_cast(op_parameter_); + int arg_idx = 0; + + if (SetKernelArg(&arg_idx) != RET_OK) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (weight_vector_flag_ && broadcast_flag_) { if (broadcast_H_flag_) { - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[1]->shape()[0]); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[1]->shape()[0]) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, UP_DIV(in_tensors_[1]->shape()[0], C4NUM)); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, UP_DIV(in_tensors_[1]->shape()[0], C4NUM)) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } } - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->activation_type_); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->activation_type_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h index 755bdc1db28..f1abc693ff7 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/scale.h @@ -34,7 +34,7 @@ class ScaleOpenCLKernel : public OpenCLKernel { private: void Image2dGetWorkGroupSize(); - + int SetKernelArg(int *idx); bool weight_vector_flag_{true}; bool broadcast_flag_{false}; bool broadcast_H_flag_{false}; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc index 2491f59036c..9f8fb994a90 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.cc @@ -75,7 +75,7 @@ int SoftmaxOpenCLKernel::Prepare() { kernel_name += "Axis" + std::to_string(axis_); } kernel_name += "_NHWC4"; - std::string program_name = "Softmax"; + const std::string program_name = "Softmax"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -93,7 +93,10 @@ int SoftmaxOpenCLKernel::Prepare() { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return lite::RET_OK; @@ -131,24 +134,40 @@ int SoftmaxOpenCLKernel::Tune() { return OpenCLKernel::Tune(); } -void SoftmaxOpenCLKernel::SetConstArgs() { +int SoftmaxOpenCLKernel::SetConstArgs() { int arg_idx = 2; int channel = out_shape_.C; int c4 = out_shape_.Slice; auto mask_ = GetMaskForLastChannel(channel); cl_float4 mask = {mask_[0], mask_[1], mask_[2], mask_[3]}; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, mask); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, mask) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } cl_int4 input_shape = {static_cast(out_shape_.N), static_cast(out_shape_.H), static_cast(out_shape_.W), c4}; - ocl_runtime_->SetKernelArg(kernel_, arg_idx, input_shape); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx, input_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } int SoftmaxOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return lite::RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h index da0b75b29e0..504e1e8715f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/softmax.h @@ -30,7 +30,7 @@ class SoftmaxOpenCLKernel : public OpenCLKernel { int Run() override; int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Tune() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc index 6b6da404602..09f6cc70871 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.cc @@ -61,7 +61,7 @@ int SpaceToBatchNDOpenCLKernel::CheckSpecs() { return RET_OK; } -void SpaceToBatchNDOpenCLKernel::SetConstArgs() { +int SpaceToBatchNDOpenCLKernel::SetConstArgs() { auto param = reinterpret_cast(this->op_parameter_); size_t CO4 = UP_DIV(out_tensors_[0]->Channel(), C4NUM); size_t CI4 = UP_DIV(in_tensors_[0]->Channel(), C4NUM); @@ -71,10 +71,23 @@ void SpaceToBatchNDOpenCLKernel::SetConstArgs() { cl_int4 paddings = {param->paddings_[0], param->paddings_[1], param->paddings_[2], param->paddings_[3]}; int arg_cnt = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, block_size); - ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, paddings); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, src_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, dst_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, block_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cnt++, paddings) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void SpaceToBatchNDOpenCLKernel::SetGlobalLocal() { @@ -87,9 +100,9 @@ void SpaceToBatchNDOpenCLKernel::SetGlobalLocal() { } int SpaceToBatchNDOpenCLKernel::Prepare() { - std::string kernel_name = "space_to_batch_nd_NHWC4"; + const std::string kernel_name = "space_to_batch_nd_NHWC4"; std::string source = space_to_batch_nd_source; - std::string program_name = "space_to_batch_nd"; + const std::string program_name = "space_to_batch_nd"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -101,7 +114,10 @@ int SpaceToBatchNDOpenCLKernel::Prepare() { return ret; } SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } @@ -109,9 +125,18 @@ int SpaceToBatchNDOpenCLKernel::Prepare() { int SpaceToBatchNDOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.h index 30df823c059..e545c68b2a4 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_batch_nd.h @@ -32,7 +32,7 @@ class SpaceToBatchNDOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; private: diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc index 0303ea31bdb..0e69cd3ef23 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.cc @@ -51,7 +51,7 @@ int SpaceToDepthOpenCLKernel::Prepare() { kernel_name += "Align"; } std::string source = space_to_depth_source; - std::string program_name = "SpaceToDepth"; + const std::string program_name = "SpaceToDepth"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -63,28 +63,47 @@ int SpaceToDepthOpenCLKernel::Prepare() { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } -void SpaceToDepthOpenCLKernel::SetConstArgs() { +int SpaceToDepthOpenCLKernel::SetConstArgs() { cl_int4 cl_in_shape = {static_cast(in_shape_.N), static_cast(in_shape_.H), static_cast(in_shape_.W), static_cast(in_shape_.Slice)}; cl_int4 cl_out_shape = {static_cast(out_shape_.N), static_cast(out_shape_.H), static_cast(out_shape_.W), static_cast(out_shape_.Slice)}; auto param = reinterpret_cast(op_parameter_); int arg_idx = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->block_size_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_in_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, cl_out_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, param->block_size_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } if (type() == PrimitiveType_DepthToSpace) { int co_size = out_shape_.C; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, co_size); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, co_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { int ci_size = in_shape_.C; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, ci_size); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, ci_size) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } + return RET_OK; } void SpaceToDepthOpenCLKernel::SetGlobalLocal() { local_size_ = {}; @@ -95,9 +114,18 @@ void SpaceToDepthOpenCLKernel::SetGlobalLocal() { int SpaceToDepthOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.h index 3576e26d616..75ee5d1d1b6 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/space_to_depth.h @@ -32,7 +32,7 @@ class SpaceToDepthOpenCLKernel : public OpenCLKernel { int Run() override; int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; private: diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc index f3f6c8c084f..dc532bbbb92 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.cc @@ -37,7 +37,10 @@ int SparseToDenseOpenCLKernel::InitOutputToDefault() { cl_float4 fill_value = {}; fill_value.s[0] = fill_value.s[1] = fill_value.s[2] = fill_value.s[3] = default_; auto src_data = out_tensors_[0]->data_c(); - allocator_->GetImageSize(src_data, &img_size); + if (allocator_->GetImageSize(src_data, &img_size) != RET_OK) { + MS_LOG(ERROR) << "GetImageSize failed."; + return RET_ERROR; + } auto src_origin = cl::array{0, 0, 0}; auto region = cl::array{img_size.width, img_size.height, 1}; cl::Image2D *out_image = reinterpret_cast(allocator_->GetImage(src_data)); @@ -62,7 +65,14 @@ int SparseToDenseOpenCLKernel::InitWeights() { auto sizeof_FLT = enable_fp16_ ? sizeof(float16_t) : sizeof(float); size_t weight_size = UP_ROUND(size, C4NUM) * sizeof_FLT; weight_vector_ = allocator->Malloc(weight_size, lite::opencl::MemType::BUF); - allocator->MapBuffer(weight_vector_, CL_MAP_WRITE, nullptr, true); + if (weight_vector_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } + if (allocator->MapBuffer(weight_vector_, CL_MAP_WRITE, nullptr, true) == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memset(weight_vector_, 0x00, weight_size); if (weight_tensor->data_type() == kNumberTypeFloat16) { if (enable_fp16_) { @@ -85,7 +95,10 @@ int SparseToDenseOpenCLKernel::InitWeights() { memcpy(weight_vector_, weight_tensor->data_c(), size * sizeof_FLT); } } - allocator->UnmapBuffer(weight_vector_); + if (allocator->UnmapBuffer(weight_vector_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } } return RET_OK; } @@ -115,7 +128,7 @@ int SparseToDenseOpenCLKernel::CheckSpecs() { return RET_OK; } -void SparseToDenseOpenCLKernel::SetConstArgs() { +int SparseToDenseOpenCLKernel::SetConstArgs() { auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); GpuTensorInfo img_info(out_tensors_[0]); size_t dtype = enable_fp16_ ? sizeof(cl_half) : sizeof(cl_float); @@ -124,11 +137,27 @@ void SparseToDenseOpenCLKernel::SetConstArgs() { auto out_shape_temp = out_tensors_[0]->shape(); cl_int4 out_shape = {out_n_, out_h_, out_w_, UP_DIV(out_c_, C4NUM)}; int arg_cn = 3; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, default_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, inshapeindex1_dim); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, default_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, inshapeindex1_dim) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void SparseToDenseOpenCLKernel::SetGlobalLocal() { @@ -144,9 +173,9 @@ int SparseToDenseOpenCLKernel::Prepare() { input_dim_ = in_tensors_[0]->shape().size(); inshapeindex1_dim = in_tensors_[0]->shape()[1]; weight_scalar_ = in_tensors_[2]->IsScalar(); - std::string kernel_name = "SparseToDense" + std::string(weight_scalar_ ? "Scalar" : "Vector"); + const std::string kernel_name = "SparseToDense" + std::string(weight_scalar_ ? "Scalar" : "Vector"); std::string source = sparse_to_dense_source; - std::string program_name = "SparseToDense"; + const std::string program_name = "SparseToDense"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -174,7 +203,10 @@ int SparseToDenseOpenCLKernel::Prepare() { InitWeights(); InferShapeTo4D(); SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } @@ -212,14 +244,30 @@ int SparseToDenseOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; InitOutputToDefault(); int arg_cn = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); - if (!weight_scalar_) { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, weight_vector_, lite::opencl::MemType::BUF); - } else { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, weight_scalar_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF) != + CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (!weight_scalar_) { + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, weight_vector_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } else { + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, weight_scalar_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; } - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.h index 0ffc6359f98..f98dc6f0265 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/sparse_to_dense.h @@ -31,7 +31,7 @@ class SparseToDenseOpenCLKernel : public OpenCLKernel { int Prepare() override; int Run() override; int InitWeights() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int CheckSpecs() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/split.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/split.cc index 862d4f2dba1..206bbffbf33 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/split.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/split.cc @@ -41,7 +41,10 @@ int SplitOpenCLKernel::RunAxis0() { for (int i = 0; i < out_tensors_.size(); i++) { auto dst_data = out_tensors_[i]->data_c(); ImageSize img_size; - allocator_->GetImageSize(dst_data, &img_size); + if (allocator_->GetImageSize(dst_data, &img_size) != RET_OK) { + MS_LOG(ERROR) << "GetImageSize failed."; + return RET_ERROR; + } auto dst_area = cl::array{0, 0, 0}; auto region = cl::array{img_size.width, img_size.height, 1}; cl::Image2D *out_image = reinterpret_cast(allocator_->GetImage(dst_data)); @@ -93,23 +96,32 @@ int SplitOpenCLKernel::CheckSpecs() { return RET_OK; } -void SplitOpenCLKernel::AlignSplitSizes(SplitParameter *param, const std::vector &in_shape) { +int SplitOpenCLKernel::AlignSplitSizes(SplitParameter *param, const std::vector &in_shape) { auto allocator = ocl_runtime_->GetAllocator(); int shape_dim = in_shape.at(param->split_dim_); if (num_split_ == 1) { size_t num_split = UP_DIV(shape_dim, param->split_sizes_[0]); split_sizes_ = reinterpret_cast(allocator->Malloc(num_split * sizeof(int), lite::opencl::MemType::BUF)); + if (split_sizes_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } for (int i = 0; i < num_split - 1; ++i) { split_sizes_[i] = (i + 1) * param->split_sizes_[0]; } } else { int sum = 0; split_sizes_ = reinterpret_cast(allocator->Malloc(num_split_ * sizeof(int), lite::opencl::MemType::BUF)); + if (split_sizes_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } for (int i = 0; i < num_split_ - 1; ++i) { sum += param->split_sizes_[i]; split_sizes_[i] = sum; } } + return RET_OK; } int SplitOpenCLKernel::Prepare() { @@ -129,7 +141,10 @@ int SplitOpenCLKernel::Prepare() { } } } - AlignSplitSizes(param, in_shape); + if (AlignSplitSizes(param, in_shape) != RET_OK) { + MS_LOG(ERROR) << "AlignSplitSizes failed."; + return RET_ERROR; + } std::string kernel_name = "split_out"; kernel_name += std::to_string(num_split_); kernel_name += "_axis" + std::to_string(split_dim_); @@ -138,7 +153,7 @@ int SplitOpenCLKernel::Prepare() { } MS_LOG(DEBUG) << "kernel_name=: " << kernel_name; std::string source = split_source; - std::string program_name = "split"; + const std::string program_name = "split"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -151,12 +166,15 @@ int SplitOpenCLKernel::Prepare() { return ret; } MS_LOG(DEBUG) << kernel_name << " Init Done!"; - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); return RET_OK; } -void SplitOpenCLKernel::SetConstArgs() { +int SplitOpenCLKernel::SetConstArgs() { int arg_cn = out_tensors_.size() + 2; cl_int4 shape = {}; for (int i = 0; i < in_tensors_[0]->shape().size(); ++i) { @@ -166,7 +184,10 @@ void SplitOpenCLKernel::SetConstArgs() { if (Align_) { in_shape_.s[3] = UP_DIV(in_shape_.s[3], C4NUM); } - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } for (int i = 0; i < out_tensors_.size(); ++i) { cl_int4 temp = {}; @@ -177,13 +198,21 @@ void SplitOpenCLKernel::SetConstArgs() { if (Align_) { out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM); } - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } - GpuTensorInfo img_info(in_tensors_.at(0)); - size_t dtype = enable_fp16_ ? sizeof(cl_half) : sizeof(cl_float); - stride_w = img_info.RowPitch() / dtype; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w); - return; + if (!Align_) { + GpuTensorInfo img_info(in_tensors_.at(0)); + size_t dtype = enable_fp16_ ? sizeof(cl_half) : sizeof(cl_float); + stride_w = img_info.RowPitch() / dtype; + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + return RET_OK; } void SplitOpenCLKernel::SetGlobalLocal() { @@ -205,15 +234,31 @@ int SplitOpenCLKernel::Run() { } int arg_cn = 0; if (Align_) { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c(), lite::opencl::MemType::BUF); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_.at(0)->data_c(), lite::opencl::MemType::BUF) != + CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } for (int i = 0; i < out_tensors_.size(); ++i) { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(i)->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_.at(i)->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, split_sizes_, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; } - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, split_sizes_, lite::opencl::MemType::BUF); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/split.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/split.h index c8be6a244da..b7e25a93996 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/split.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/split.h @@ -31,12 +31,12 @@ class SplitOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Run() override; private: - void AlignSplitSizes(SplitParameter *param, const std::vector &in_shape); + int AlignSplitSizes(SplitParameter *param, const std::vector &in_shape); int RunAxis0(); private: diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc index 819c2ab8b7c..5b08fbb3245 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.cc @@ -36,7 +36,10 @@ int StackOpenCLKernel::RunAxis0() { cl::Image2D *out_image = reinterpret_cast(allocator_->GetImage(dst_data)); for (int i = 0; i < in_tensors_.size(); i++) { auto src_data = in_tensors_[i]->data_c(); - allocator_->GetImageSize(src_data, &img_size); + if (allocator_->GetImageSize(src_data, &img_size) != RET_OK) { + MS_LOG(ERROR) << "GetImageSize failed."; + return RET_ERROR; + } auto src_origin = cl::array{0, 0, 0}; auto region = cl::array{img_size.width, img_size.height, 1}; cl::Image2D *input_image = reinterpret_cast(allocator_->GetImage(src_data)); @@ -95,7 +98,7 @@ int StackOpenCLKernel::CheckSpecs() { return RET_OK; } -void StackOpenCLKernel::SetConstArgs() { +int StackOpenCLKernel::SetConstArgs() { int arg_cn = in_tensors_.size() + 1; cl_int4 inshape_tmp = {}, outshape_tmp = {}; for (int i = 0; i < in_tensors_[0]->shape().size(); ++i) { @@ -108,8 +111,14 @@ void StackOpenCLKernel::SetConstArgs() { Broadcast2GpuShape(out_shape_.s, outshape_tmp.s, out_tensors_[0]->shape().size(), 1); in_shape_.s[3] = UP_DIV(in_shape_.s[3], C4NUM); out_shape_.s[3] = UP_DIV(out_shape_.s[3], C4NUM); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } if (buffer_button_) { GpuTensorInfo img_info_out(out_tensors_[0]); GpuTensorInfo img_info_in(in_tensors_[0]); @@ -117,8 +126,12 @@ void StackOpenCLKernel::SetConstArgs() { stride_w_out = img_info_out.RowPitch() / dtype; stride_w_in = img_info_in.RowPitch() / dtype; cl_int2 stride_w = {stride_w_out, stride_w_in}; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_w) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } + return RET_OK; } void StackOpenCLKernel::SetGlobalLocal() { @@ -162,7 +175,7 @@ int StackOpenCLKernel::Prepare() { MS_LOG(DEBUG) << "kernel_name=: " << kernel_name; std::string source = stack_source; - std::string program_name = "stack"; + const std::string program_name = "stack"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -174,7 +187,10 @@ int StackOpenCLKernel::Prepare() { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); return RET_OK; @@ -188,16 +204,33 @@ int StackOpenCLKernel::Run() { int arg_cn = 0; if (buffer_button_) { for (int i = 0; i < in_tensors_.size(); ++i) { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c(), lite::opencl::MemType::BUF); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c(), lite::opencl::MemType::BUF) != + CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF) != + CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; } - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c(), lite::opencl::MemType::BUF); } else { for (int i = 0; i < in_tensors_.size(); ++i) { - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c()); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, in_tensors_[i]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; } - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, out_tensors_[0]->data_c()); } - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } REG_KERNEL(kGPU, kNumberTypeFloat32, PrimitiveType_Stack, OpenCLKernelCreator); diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.h index a41bc0ff7ee..1585fae341d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/stack.h @@ -29,7 +29,7 @@ class StackOpenCLKernel : public OpenCLKernel { ~StackOpenCLKernel() override{}; int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Run() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.cc index 59df111e2a8..bd21ab17886 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.cc @@ -27,9 +27,9 @@ using mindspore::lite::opencl::ImageSize; namespace mindspore::kernel { int StrassenOpenCLKernel::Prepare() { - std::string kernel_name = "MatMul_Strassen_NHWC4_2d"; + const std::string kernel_name = "MatMul_Strassen_NHWC4_2d"; std::string source = strassen_source; - std::string program_name = "MatMul"; + const std::string program_name = "MatMul"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -50,13 +50,16 @@ int StrassenOpenCLKernel::Prepare() { if (ret != RET_OK) { return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } -void StrassenOpenCLKernel::AllocatorMemoryForStrassen(int NumA, int NumB) { +int StrassenOpenCLKernel::AllocatorMemoryForStrassen(int NumA, int NumB) { auto allocator = ocl_runtime_->GetAllocator(); size_t img_dtype = enable_fp16_ ? CL_HALF_FLOAT : CL_FLOAT; ImageSize img_size{static_cast(UP_DIV(NumA, C4NUM)), static_cast(NumA), img_dtype}; @@ -64,15 +67,52 @@ void StrassenOpenCLKernel::AllocatorMemoryForStrassen(int NumA, int NumB) { size_t memB = NumB * NumB * dtype_size; for (int depth = 0; depth < MAXDEPTH; depth++) { B_temp[depth] = allocator->Malloc(memB, lite::opencl::MemType::BUF); + if (B_temp[depth] == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } A_temp[depth] = allocator->Malloc(img_size); + if (A_temp[depth] == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } M1[depth] = allocator->Malloc(img_size); + if (M1[depth] == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } M2[depth] = allocator->Malloc(img_size); + if (M2[depth] == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } M3[depth] = allocator->Malloc(img_size); + if (M3[depth] == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } M4[depth] = allocator->Malloc(img_size); + if (M4[depth] == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } M5[depth] = allocator->Malloc(img_size); + if (M5[depth] == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } M6[depth] = allocator->Malloc(img_size); + if (M6[depth] == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } M7[depth] = allocator->Malloc(img_size); + if (M7[depth] == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } } + return RET_OK; } int StrassenOpenCLKernel::InitWeights() { @@ -82,14 +122,25 @@ int StrassenOpenCLKernel::InitWeights() { int NumB = in_tensors_[1]->shape()[0]; size_t dtype_size = enable_fp16_ ? sizeof(uint16_t) : sizeof(float); padWeight_ = allocator->Malloc(NumA * NumB * dtype_size, lite::opencl::MemType::BUF); + if (padWeight_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } padWeight_ = allocator->MapBuffer(padWeight_, CL_MAP_WRITE, nullptr, true); + if (padWeight_ == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } auto padWeightFp32 = reinterpret_cast(padWeight_); auto padWeightFp16 = reinterpret_cast(padWeight_); memset(padWeight_, 0x00, NumA * NumB * dtype_size); auto originWeightFp32 = reinterpret_cast(in_tensors_.at(kWeightIndex)->data_c()); auto originWeightFp16 = reinterpret_cast(in_tensors_.at(kWeightIndex)->data_c()); bool isModelFp16 = in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat16; - AllocatorMemoryForStrassen(NumA / 2, NumB / 2); + if (AllocatorMemoryForStrassen(NumA / 2, NumB / 2) != RET_OK) { + MS_LOG(ERROR) << "AllocatorMemoryForStrassen failed."; + return RET_ERROR; + } size_t size = NumA * NumB * dtype_size; if (isModelFp16) { if (enable_fp16_) { @@ -108,7 +159,10 @@ int StrassenOpenCLKernel::InitWeights() { memcpy(padWeightFp32, originWeightFp32, size); } } - allocator->UnmapBuffer(padWeight_); + if (allocator->UnmapBuffer(padWeight_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } return RET_OK; } @@ -120,7 +174,7 @@ void AlignStrassenGlobalLocal(const std::vector &global, const std::vect } // 0 : global_size_, 1: global_size_add_sub -void StrassenOpenCLKernel::StrassenSetGlobalLocal(size_t strassen_size, int type_flag) { +int StrassenOpenCLKernel::StrassenSetGlobalLocal(size_t strassen_size, int type_flag) { size_t strassen_size_C4 = UP_DIV(strassen_size, C4NUM); local_size_add_sub = {16, 1, 16}; if (type_flag == 0) { @@ -130,6 +184,7 @@ void StrassenOpenCLKernel::StrassenSetGlobalLocal(size_t strassen_size, int type global_size_add_sub = {strassen_size_C4, 1, strassen_size}; AlignStrassenGlobalLocal(global_size_add_sub, local_size_add_sub, &global_add_sub_, &local_add_sub_); } + return RET_OK; } void StrassenOpenCLKernel::SetGlobalLocal() { @@ -142,111 +197,188 @@ void StrassenOpenCLKernel::SetGlobalLocal() { StrassenSetGlobalLocal(strassen_size, 2); // set global_size_weights } -void StrassenOpenCLKernel::StrassenSetConstArgs(cl::Kernel *kernel, int index, int strassen_size, - bool is_matmul_kernel) { +int StrassenOpenCLKernel::StrassenSetConstArgs(cl::Kernel *kernel, int index, int strassen_size, + bool is_matmul_kernel) { cl_int4 shape; if (is_matmul_kernel) { shape = {1, 1, strassen_size, strassen_size}; } else { shape = {strassen_size, 1, 1, UP_DIV(strassen_size, C4NUM)}; } - ocl_runtime_->SetKernelArg(*kernel, index, shape); + if (ocl_runtime_->SetKernelArg(*kernel, index, shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } -void StrassenOpenCLKernel::SetConstArgs() { - int arg_count = 2; - cl_int4 in_shape = {inShape[0], inShape[1], inShape[2], inShape[3]}; - cl_int4 out_shape = {outShape[0], outShape[1], outShape[2], outShape[3]}; - cl_int4 shape_offset = {0, 0, 0, 0}; +int StrassenOpenCLKernel::SetConstArgs() { int strassen_size = inShape[3] / 2; - out_shape.s[2] = in_shape.s[2] = in_shape.s[2] / 2; - out_shape.s[3] = in_shape.s[3] = in_shape.s[3] / 2; StrassenSetConstArgs(&kernel_IMG_add_sub_2, 3, strassen_size, false); StrassenSetConstArgs(&kernel_BUF_add_sub_2, 2, strassen_size, false); - ocl_runtime_->SetKernelArg(kernel_, arg_count++, in_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_count++, out_shape); - ocl_runtime_->SetKernelArg(kernel_, arg_count++, shape_offset); + return RET_OK; } -void StrassenOpenCLKernel::StrassenDataFilled(cl::Kernel *kernel, void *input, void *output, const int size, - cl_int2 offset, lite::opencl::MemType mem_type) { +int StrassenOpenCLKernel::StrassenDataFilled(cl::Kernel *kernel, void *input, void *output, const int size, + cl_int2 offset, lite::opencl::MemType mem_type) { if (input == nullptr || output == nullptr) { MS_LOG(ERROR) << "StrassenDataFilled input or output can not nullptr"; - return; + return RET_ERROR; } if (mem_type == lite::opencl::MemType::IMG) { - ocl_runtime_->SetKernelArg(*kernel, 0, input); - ocl_runtime_->SetKernelArg(*kernel, 1, output); + if (ocl_runtime_->SetKernelArg(*kernel, 0, input) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 1, output) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { - ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::BUF); - ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::BUF); + if (ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } StrassenSetConstArgs(kernel, 2, size, false); - ocl_runtime_->SetKernelArg(*kernel, 3, offset); - ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(*kernel, 3, offset) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } + return RET_OK; } -void StrassenOpenCLKernel::StrassenAddSub(cl::Kernel *kernel, void *input, void *output, const int size, cl_int4 offset, - int flag, lite::opencl::MemType mem_type) { +int StrassenOpenCLKernel::StrassenAddSub(cl::Kernel *kernel, void *input, void *output, const int size, cl_int4 offset, + int flag, lite::opencl::MemType mem_type) { if (input == nullptr || output == nullptr) { MS_LOG(ERROR) << "StrassenAddSub input or output can not nullptr"; - return; + return RET_ERROR; } if (mem_type == lite::opencl::MemType::IMG) { - ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::IMG); - ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::IMG); + if (ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::IMG) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::IMG) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } else { - ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::BUF); - ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::BUF); + if (ocl_runtime_->SetKernelArg(*kernel, 0, input, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 1, output, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } StrassenSetConstArgs(kernel, 2, size, false); - ocl_runtime_->SetKernelArg(*kernel, 3, offset); - ocl_runtime_->SetKernelArg(*kernel, 4, flag); - ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(*kernel, 3, offset) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 4, flag) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } + return RET_OK; } -void StrassenOpenCLKernel::StrassenBackResult(cl::Kernel *kernel, void *input1, void *input2, void *input3, - void *input4, void *input5, void *input6, void *input7, void *output, - const int size) { +int StrassenOpenCLKernel::StrassenBackResult(cl::Kernel *kernel, void *input1, void *input2, void *input3, void *input4, + void *input5, void *input6, void *input7, void *output, const int size) { if (input1 == nullptr || input2 == nullptr || input3 == nullptr || input4 == nullptr || input5 == nullptr || input6 == nullptr || input7 == nullptr || output == nullptr) { MS_LOG(ERROR) << "StrassenBackResult input or output can not nullptr"; - return; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 0, input1) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 1, input2) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 2, input3) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 3, input4) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 4, input5) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 5, input6) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 6, input7) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(*kernel, 7, output) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; } - ocl_runtime_->SetKernelArg(*kernel, 0, input1); - ocl_runtime_->SetKernelArg(*kernel, 1, input2); - ocl_runtime_->SetKernelArg(*kernel, 2, input3); - ocl_runtime_->SetKernelArg(*kernel, 3, input4); - ocl_runtime_->SetKernelArg(*kernel, 4, input5); - ocl_runtime_->SetKernelArg(*kernel, 5, input6); - ocl_runtime_->SetKernelArg(*kernel, 6, input7); - ocl_runtime_->SetKernelArg(*kernel, 7, output); StrassenSetConstArgs(kernel, 8, size, false); - ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_); + if (ocl_runtime_->RunKernel(*kernel, global_add_sub_, local_add_sub_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } + return RET_OK; } -void StrassenOpenCLKernel::StrassenRunMmatmul(void *input, void *weight, void *output, const int size) { +int StrassenOpenCLKernel::StrassenRunMmatmul(void *input, void *weight, void *output, const int size) { if (input == nullptr || weight == nullptr || output == nullptr) { MS_LOG(ERROR) << "StrassenRunMmatmul input ,weight or output can not nullptr"; - return; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 0, input) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, output) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 2, weight, lite::opencl::MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; } - ocl_runtime_->SetKernelArg(kernel_, 0, input); - ocl_runtime_->SetKernelArg(kernel_, 1, output); - ocl_runtime_->SetKernelArg(kernel_, 2, weight, lite::opencl::MemType::BUF); StrassenSetConstArgs(&kernel_, 3, size, true); StrassenSetConstArgs(&kernel_, 4, size, true); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } + return RET_OK; } -void StrassenOpenCLKernel::DoStrassen(void *data, void *weight, void *result, const int size, const int depth, - const int threshold) { +int StrassenOpenCLKernel::DoStrassen(void *data, void *weight, void *result, const int size, const int depth, + const int threshold) { const int size_2 = size / 2; int C4 = UP_DIV(size_2, C4NUM); if (size <= threshold) { // run matmul; StrassenSetGlobalLocal(size, 0); StrassenRunMmatmul(data, weight, result, size); - return; + return RET_OK; } // flag = 0 : add otherwise flag = 1 : sub // M1 = A11 * ( B12- B22) @@ -307,6 +439,7 @@ void StrassenOpenCLKernel::DoStrassen(void *data, void *weight, void *result, co StrassenSetGlobalLocal(size_2, 1); StrassenBackResult(&kernel_back_result, M1[depth + 1], M2[depth + 1], M3[depth + 1], M4[depth + 1], M5[depth + 1], M6[depth + 1], M7[depth + 1], result, size_2); + return RET_OK; } int StrassenOpenCLKernel::Run() { diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.h index 808cddd6d18..48596a3ebd2 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strassen.h @@ -33,22 +33,22 @@ class StrassenOpenCLKernel : public MatMulOpenCLKernel { int Run() override; int Prepare() override; int InitWeights() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; // strassen private: - void AllocatorMemoryForStrassen(int NumA, int NumB); - void DoStrassen(void *data, void *weight, void *result, const int size, const int depth, const int threshold); - void StrassenSetGlobalLocal(size_t strassen_size, int type_flag); - void StrassenSetConstArgs(cl::Kernel *kernel, int index, int strassen_size, bool is_matmul_kernel); - void StrassenDataFilled(cl::Kernel *kernel, void *input, void *output, const int size, cl_int2 offset, - lite::opencl::MemType mem_type); - void StrassenAddSub(cl::Kernel *kernel, void *input, void *output, const int size, cl_int4 offset, int flag, - lite::opencl::MemType mem_type); - void StrassenBackResult(cl::Kernel *kernel, void *input1, void *input2, void *input3, void *input4, void *input5, - void *input6, void *input7, void *output, const int size); - void StrassenRunMmatmul(void *input, void *weight, void *output, const int size); + int AllocatorMemoryForStrassen(int NumA, int NumB); + int DoStrassen(void *data, void *weight, void *result, const int size, const int depth, const int threshold); + int StrassenSetGlobalLocal(size_t strassen_size, int type_flag); + int StrassenSetConstArgs(cl::Kernel *kernel, int index, int strassen_size, bool is_matmul_kernel); + int StrassenDataFilled(cl::Kernel *kernel, void *input, void *output, const int size, cl_int2 offset, + lite::opencl::MemType mem_type); + int StrassenAddSub(cl::Kernel *kernel, void *input, void *output, const int size, cl_int4 offset, int flag, + lite::opencl::MemType mem_type); + int StrassenBackResult(cl::Kernel *kernel, void *input1, void *input2, void *input3, void *input4, void *input5, + void *input6, void *input7, void *output, const int size); + int StrassenRunMmatmul(void *input, void *weight, void *output, const int size); cl::Kernel kernel_IMG_add_sub_2; cl::Kernel MatMul_StrassenBUFFilled; cl::Kernel MatMul_StrassenIMGFilled; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc index b1d7fa9b762..9d00ac7a4dd 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.cc @@ -85,7 +85,7 @@ int StridedSliceOpenCLKernel::CheckSpecs() { } int StridedSliceOpenCLKernel::Prepare() { - std::string program_name = "strided_slice"; + const std::string program_name = "strided_slice"; if (!ocl_runtime_->LoadSource(program_name, strided_slice_source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -96,7 +96,10 @@ int StridedSliceOpenCLKernel::Prepare() { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); return RET_OK; } @@ -187,14 +190,33 @@ int StridedSliceOpenCLKernel::InitConstArgs() { return RET_OK; } -void StridedSliceOpenCLKernel::SetConstArgs() { +int StridedSliceOpenCLKernel::SetConstArgs() { int arg_cn = 2; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, begin_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn, size_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, input_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, output_shape_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, io_slices_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, begin_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, stride_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn, size_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void StridedSliceOpenCLKernel::SetGlobalLocal() { @@ -214,9 +236,18 @@ void StridedSliceOpenCLKernel::SetGlobalLocal() { int StridedSliceOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running! "; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h index 87e2638dc49..3ce6b991ee5 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/strided_slice.h @@ -31,7 +31,7 @@ class StridedSliceOpenCLKernel : public OpenCLKernel { int CheckSpecs() override; int Prepare() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Run() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc index 5380f461462..0d6ff88d36d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc @@ -42,11 +42,18 @@ int ToFormatOpenCLKernel::CheckSpecs() { return RET_OK; } -void ToFormatOpenCLKernel::SetConstArgs() { +int ToFormatOpenCLKernel::SetConstArgs() { cl_int4 shape{(cl_int)N_, (cl_int)H_, (cl_int)W_, (cl_int)C_}; cl_int4 gsize{(cl_int)(N_ * H_), (cl_int)W_, (cl_int)UP_DIV(C_, C4NUM), 1}; - ocl_runtime_->SetKernelArg(kernel_, 2, gsize); - ocl_runtime_->SetKernelArg(kernel_, 3, shape); + if (ocl_runtime_->SetKernelArg(kernel_, 2, gsize) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 3, shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void ToFormatOpenCLKernel::SetGlobalLocal() { @@ -70,7 +77,7 @@ int ToFormatOpenCLKernel::Prepare() { kernel_name += dtype_str[in_tensor->data_type()] + "_" + dtype_str[out_tensor->data_type()]; this->set_name(kernel_name); - std::string program_name = "to_format"; + const std::string program_name = "to_format"; std::string source = to_format_source; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; @@ -89,7 +96,10 @@ int ToFormatOpenCLKernel::Prepare() { C_ = output.C; SetGlobalLocal(); - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } @@ -98,9 +108,18 @@ int ToFormatOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; auto src_mem_type = (out_mem_type_ == MemType::IMG) ? lite::opencl::MemType::BUF : lite::opencl::MemType::IMG; auto dst_mem_type = out_mem_type_; - ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c(), src_mem_type); - ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c(), dst_mem_type); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, 0, in_tensors_.front()->data_c(), src_mem_type) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, 1, out_tensors_.front()->data_c(), dst_mem_type) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h index d600519e3c4..0e1989d157f 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.h @@ -35,7 +35,7 @@ class ToFormatOpenCLKernel : public OpenCLKernel { int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int InferShape() override; diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc index 6841867de66..9c7cbea7c29 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.cc @@ -101,7 +101,7 @@ int TransposeOpenCLKernel::Prepare() { kernel_name += "_NHWC4"; std::string source = transpose_source; - std::string program_name = "transpose"; + const std::string program_name = "transpose"; if (!ocl_runtime_->LoadSource(program_name, source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -113,32 +113,45 @@ int TransposeOpenCLKernel::Prepare() { MS_LOG(ERROR) << "Build kernel failed."; return ret; } - SetConstArgs(); + if (SetConstArgs() != RET_OK) { + MS_LOG(ERROR) << "SeConstArgs failed."; + return RET_ERROR; + } SetGlobalLocal(); MS_LOG(DEBUG) << kernel_name << " Init Done!"; return RET_OK; } -void TransposeOpenCLKernel::SetConstArgs() { +int TransposeOpenCLKernel::SetConstArgs() { size_t n = tensor_size_.N; size_t h = tensor_size_.H; size_t w = tensor_size_.W; size_t c = tensor_size_.C; int arg_idx = 2; cl_int4 shape = {static_cast(n), static_cast(h), static_cast(w), static_cast(c)}; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, shape); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } if (type_ == TransposeType::GENERAL) { int de_perm[4]; // output to input perm for (int i = 0; i < 4; i++) { de_perm[perm_4d_[i]] = i; } cl_int4 de_perm_cl = {de_perm[0], de_perm[1], de_perm[2], de_perm[3]}; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, de_perm_cl); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, de_perm_cl) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } GpuTensorInfo in_shape = GpuTensorInfo(in_tensors_[0]); cl_int4 in_shape_int4 = {static_cast(in_shape.N), static_cast(in_shape.H), static_cast(in_shape.W), static_cast(in_shape.C)}; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_shape_int4); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_shape_int4) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } } + return RET_OK; } void TransposeOpenCLKernel::SetGlobalLocal() { @@ -161,9 +174,18 @@ void TransposeOpenCLKernel::SetGlobalLocal() { int TransposeOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " Running!"; int arg_idx = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()); - ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()); - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, in_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_idx++, out_tensors_[0]->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h index 54edb3fd011..5daaf10cd35 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/transpose.h @@ -33,7 +33,7 @@ class TransposeOpenCLKernel : public OpenCLKernel { int Run() override; int Prepare() override; int CheckSpecs() override; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; private: diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc index 7b52015c617..8e51bcaaaed 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.cc @@ -78,7 +78,7 @@ std::vector GenerateWinogradFilter(void *src, TypeId dtype, size_t CO, si } // namespace int WinogradOpenCLKernel::BuildKernel() { - std::string program_name = "winograd"; + const std::string program_name = "winograd"; if (!ocl_runtime_->LoadSource(program_name, GetActDefines() + winograd_source)) { MS_LOG(ERROR) << "Load source failed."; return RET_ERROR; @@ -103,7 +103,7 @@ int WinogradOpenCLKernel::BuildKernel() { return RET_OK; } -void WinogradOpenCLKernel::InitFilter() { +int WinogradOpenCLKernel::InitFilter() { auto allocator = ocl_runtime_->GetAllocator(); // allocate opencl memory: buffer or image2d @@ -115,9 +115,17 @@ void WinogradOpenCLKernel::InitFilter() { size_t dtype = use_fp16_ ? CL_HALF_FLOAT : CL_FLOAT; size = width * height * CO_TILE * sizeof_FLT_; packed_filter_ = allocator->Malloc({width, height, dtype}); + if (packed_filter_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } } else { size = UP_DIV(CO_SLICES_, Ogroup) * 6 * 6 * CI_SLICES_ * Ogroup * CI_TILE * CO_TILE * sizeof_FLT_; packed_filter_ = allocator->Malloc(size, MemType::BUF); + if (packed_filter_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } } // rearrange filter @@ -128,6 +136,10 @@ void WinogradOpenCLKernel::InitFilter() { void *src_data = winograd_filter.data(); #else auto winograd_filter = std::make_unique(CO_ * 6 * 6 * CI_); + if (winograd_filter == nullptr) { + MS_LOG(ERROR) << "new winograd_filter failed."; + return RET_ERROR; + } WinogradWeightTransform(reinterpret_cast(src_filter_data), reinterpret_cast(winograd_filter.get()), nullptr, Gt, 1, 6, 3, CI_, CO_, false); @@ -147,53 +159,121 @@ void WinogradOpenCLKernel::InitFilter() { if (filter_type_ == MemType::IMG) { ocl_runtime_->WriteImage(packed_filter_, tmp.data()); } else { - allocator->MapBuffer(packed_filter_, CL_MAP_WRITE, nullptr, true); + if (allocator->MapBuffer(packed_filter_, CL_MAP_WRITE, nullptr, true) == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + return RET_ERROR; + } memcpy(packed_filter_, tmp.data(), size); - allocator->UnmapBuffer(packed_filter_); + if (allocator->UnmapBuffer(packed_filter_) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + return RET_ERROR; + } } FreeStoredData(stored_filter_); + return RET_OK; } -void WinogradOpenCLKernel::AllocateMemory() { +int WinogradOpenCLKernel::AllocateMemory() { auto allocator = ocl_runtime_->GetAllocator(); size_t img_dtype = use_fp16_ ? CL_HALF_FLOAT : CL_FLOAT; size_t width = TILE_HW_; size_t height = CI_SLICES_ * 36; winograd_mem0_ = allocator->Malloc({width, height, img_dtype}); + if (winograd_mem0_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } width = TILE_HW_; height = CO_SLICES_ * 36; winograd_mem1_ = allocator->Malloc({width, height, img_dtype}); + if (winograd_mem1_ == nullptr) { + MS_LOG(ERROR) << "Malloc failed."; + return RET_ERROR; + } + return RET_OK; } -void WinogradOpenCLKernel::SetConstArgs() { +int WinogradOpenCLKernel::SetConstArgs() { AllocateMemory(); int arg_cn = 1; cl_int4 input_shape = {batch_size_, OH_, OW_, CI_SLICES_}; // maybe pad=0, so use OH/OW - ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, winograd_mem0_); - ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, input_shape); - ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, TILE_HW_); - ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, param_->pad_u_); - ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn, param_->pad_l_); + if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, winograd_mem0_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, input_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, TILE_HW_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn++, param_->pad_u_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, arg_cn, param_->pad_l_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } arg_cn = 0; - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, winograd_mem0_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, winograd_mem1_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_filter_, filter_type_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, TILE_HW_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn++, CI_SLICES_); - ocl_runtime_->SetKernelArg(kernel_, arg_cn, CO_SLICES_); + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, winograd_mem0_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, winograd_mem1_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, packed_filter_, filter_type_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, TILE_HW_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn++, CI_SLICES_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_, arg_cn, CO_SLICES_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } arg_cn = 2; cl_int4 output_shape = {batch_size_, OH_, OW_, CO_SLICES_}; - ocl_runtime_->SetKernelArg(kernel_36to4x4_, 0, winograd_mem1_); - ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, packed_bias_, MemType::BUF); - ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, output_shape); - ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, TILE_HW_); - ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, param_->act_type_); - ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn, alpha_); + if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, 0, winograd_mem1_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, packed_bias_, MemType::BUF) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, output_shape) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, TILE_HW_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn++, param_->act_type_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, arg_cn, alpha_) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + return RET_OK; } void WinogradOpenCLKernel::SetGlobalLocal() { @@ -205,15 +285,30 @@ void WinogradOpenCLKernel::SetGlobalLocal() { int WinogradOpenCLKernel::Run() { MS_LOG(DEBUG) << this->name() << " winograd Running!"; MS_LOG(DEBUG) << "winograd kernel0 Running!"; - ocl_runtime_->SetKernelArg(kernel_4x4to36_, 0, in_tensors_.front()->data_c()); - ocl_runtime_->RunKernel(kernel_4x4to36_, global_4x4to36_, local_4x4to36_, nullptr, &event_); + if (ocl_runtime_->SetKernelArg(kernel_4x4to36_, 0, in_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_4x4to36_, global_4x4to36_, local_4x4to36_, nullptr, &event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << "winograd kernel1 Running!"; - ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &kernel2_event_); + if (ocl_runtime_->RunKernel(kernel_, global_range_, local_range_, nullptr, &kernel2_event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } MS_LOG(DEBUG) << "winograd kernel2 Running!"; - ocl_runtime_->SetKernelArg(kernel_36to4x4_, 1, out_tensors_.front()->data_c()); - ocl_runtime_->RunKernel(kernel_36to4x4_, global_36to4x4_, local_36to4x4_, nullptr, &kernel3_event_); + if (ocl_runtime_->SetKernelArg(kernel_36to4x4_, 1, out_tensors_.front()->data_c()) != CL_SUCCESS) { + MS_LOG(ERROR) << "SetKernelArg failed."; + return RET_ERROR; + } + if (ocl_runtime_->RunKernel(kernel_36to4x4_, global_36to4x4_, local_36to4x4_, nullptr, &kernel3_event_) != RET_OK) { + MS_LOG(ERROR) << "RunKernel failed."; + return RET_ERROR; + } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.h b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.h index 7ed7050a2d0..9f3da53f780 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.h +++ b/mindspore/lite/src/runtime/kernel/opencl/kernel/winograd.h @@ -32,7 +32,7 @@ class WinogradOpenCLKernel : public Conv2DOpenCLKernel { ~WinogradOpenCLKernel() override = default; - void SetConstArgs() override; + int SetConstArgs() override; void SetGlobalLocal() override; int Run() override; @@ -42,8 +42,8 @@ class WinogradOpenCLKernel : public Conv2DOpenCLKernel { private: int BuildKernel() override; - void InitFilter() override; - void AllocateMemory(); + int InitFilter() override; + int AllocateMemory(); cl::Kernel kernel_4x4to36_; cl::Kernel kernel_36to4x4_; diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc index 78e6a6842da..bdab2eb6599 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.cc @@ -24,7 +24,7 @@ using mindspore::lite::RET_OK; using mindspore::lite::opencl::ImageSize; namespace mindspore::kernel { -int OpenCLKernel::AlignGlobalLocal(const std::vector &global, const std::vector &local) { +void OpenCLKernel::AlignGlobalLocal(const std::vector &global, const std::vector &local) { std::vector internal_global_ws = global; for (size_t i = 0; i < local.size(); ++i) { internal_global_ws.at(i) = UP_ROUND(global.at(i), local.at(i)); @@ -50,16 +50,12 @@ int OpenCLKernel::AlignGlobalLocal(const std::vector &global, const std: if (!local.empty()) { local_range_ = cl::NDRange(local.at(0), local.at(1)); } - } else if (global.size() == 3) { + } else if (global.size() >= 3) { global_range_ = cl::NDRange(internal_global_ws.at(0), internal_global_ws.at(1), internal_global_ws.at(2)); if (!local.empty()) { local_range_ = cl::NDRange(local.at(0), local.at(1), local.at(2)); } - } else { - MS_LOG(ERROR) << "Not supported NDRange!"; - return RET_ERROR; } - return RET_OK; } int OpenCLKernel::GetImageSize(size_t idx, lite::opencl::ImageSize *img_size) { @@ -112,11 +108,17 @@ void OpenCLKernel::PrintOutput(int print_num, const std::string &out_file) { auto runtime_wrapper = lite::opencl::OpenCLRuntimeWrapper(); auto runtime = runtime_wrapper.GetInstance(); auto allocator = runtime->GetAllocator(); - runtime->SyncCommandQueue(); + if (!runtime->SyncCommandQueue()) { + MS_LOG(ERROR) << "SyncCommandQueue failed."; + } if (mem_type == lite::opencl::MemType::BUF) { - allocator->MapBuffer(tensor->data_c(), CL_MAP_READ, nullptr, true); + if (allocator->MapBuffer(tensor->data_c(), CL_MAP_READ, nullptr, true) == nullptr) { + MS_LOG(ERROR) << "Map Buffer failed."; + } memcpy(data.data(), tensor->data_c(), img_info.OriginSize); - allocator->UnmapBuffer(tensor->data_c()); + if (allocator->UnmapBuffer(tensor->data_c()) != RET_OK) { + MS_LOG(ERROR) << "UnmapBuffer failed."; + } } else { runtime->ReadImage(tensor->data_c(), data.data()); } @@ -181,6 +183,7 @@ int OpenCLKernel::PreProcess() { } } output->set_allocator(allocator); + output->ResetRefCount(); } return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h index 24f10a7aa16..4e17512a38d 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_kernel.h @@ -185,7 +185,7 @@ class OpenCLKernel : public InnerKernel { ocl_runtime_ = ocl_runtime_wrap_.GetInstance(); } ~OpenCLKernel() override = default; - int AlignGlobalLocal(const std::vector &global, const std::vector &local); + void AlignGlobalLocal(const std::vector &global, const std::vector &local); int Prepare() override { return RET_OK; } int PreProcess() override; @@ -194,7 +194,7 @@ class OpenCLKernel : public InnerKernel { virtual int CheckSpecs(); virtual int InitWeights() { return RET_OK; } - virtual void SetConstArgs() {} + virtual int SetConstArgs() { return RET_OK; } virtual void SetGlobalLocal() {} virtual int GetGlobalSize(size_t idx, std::vector *global_size) { return RET_ERROR; } virtual int GetLocalSize(size_t idx, const std::vector &global_size, std::vector *local_size) { diff --git a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc index 957d89a77db..e1c52e51949 100644 --- a/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc +++ b/mindspore/lite/src/runtime/kernel/opencl/opencl_subgraph.cc @@ -420,6 +420,7 @@ int OpenCLSubGraph::Execute() { return ret; } if (!ocl_runtime_->SyncCommandQueue()) { + MS_LOG(ERROR) << "SyncCommandQueue failed."; return RET_ERROR; } return RET_OK; @@ -449,6 +450,7 @@ int OpenCLSubGraph::Execute(const KernelCallBack &before, const KernelCallBack & return ret; } if (!ocl_runtime_->SyncCommandQueue()) { + MS_LOG(ERROR) << "SyncCommandQueue failed."; return RET_ERROR; } return RET_OK; diff --git a/mindspore/lite/src/runtime/runtime_pass.cc b/mindspore/lite/src/runtime/runtime_pass.cc index 0954c178d9f..8bb988e3338 100644 --- a/mindspore/lite/src/runtime/runtime_pass.cc +++ b/mindspore/lite/src/runtime/runtime_pass.cc @@ -20,60 +20,30 @@ namespace mindspore::lite { void Nc4hw4PassReplace(std::vector *kernels, std::vector *tensors, size_t index) { kernel::LiteKernel *conv_kernel = kernels->at(index); - kernel::LiteKernel *transpose_kernel = conv_kernel->out_kernels().front(); - kernel::LiteKernel *c4_kernel = transpose_kernel->out_kernels().front(); - kernel::LiteKernel *transpose2_kernel = c4_kernel->out_kernels().front(); - std::vector end_kernels = transpose2_kernel->out_kernels(); + kernel::LiteKernel *traspose_kernel = conv_kernel->out_kernels().front(); + kernel::LiteKernel *c4_kernel = traspose_kernel->out_kernels().front(); /* tensor */ - { - /* transpose_kernel */ - Tensor *transpose_param_tensor = transpose_kernel->in_tensors().at(1); - VectorSetNull(tensors, transpose_param_tensor); - delete transpose_param_tensor; - transpose_param_tensor = nullptr; + Tensor *transpose_param_tensor = traspose_kernel->in_tensors().at(1); + VectorErase(tensors, transpose_param_tensor); + delete transpose_param_tensor; + transpose_param_tensor = nullptr; - Tensor *conv_out_tensor = conv_kernel->out_tensors().front(); - conv_out_tensor->set_format(NC4HW4); - Tensor *c4_input_tensor = c4_kernel->in_tensors().front(); - c4_kernel->set_in_tensor(conv_out_tensor, 0); - VectorSetNull(tensors, c4_input_tensor); - delete c4_input_tensor; - c4_input_tensor = nullptr; - } - { - /* transpose2_kernel */ - Tensor *transpose_param_tensor = transpose2_kernel->in_tensors().at(1); - VectorSetNull(tensors, transpose_param_tensor); - delete transpose_param_tensor; - transpose_param_tensor = nullptr; - - Tensor *nwhc_tensor = c4_kernel->out_tensors().front(); - nwhc_tensor->set_format(NHWC); - for (auto end : end_kernels) { - end->set_in_tensor(nwhc_tensor, 0); - } - Tensor *trans_out = transpose2_kernel->out_tensors().front(); - VectorSetNull(tensors, trans_out); - delete trans_out; - trans_out = nullptr; - } + Tensor *conv_out_tensor = conv_kernel->out_tensors().front(); + conv_out_tensor->set_format(NC4HW4); + Tensor *c4_input_tensor = c4_kernel->in_tensors().front(); + c4_kernel->set_in_tensor(conv_out_tensor, 0); + VectorErase(tensors, c4_input_tensor); + delete c4_input_tensor; + c4_input_tensor = nullptr; /* kernel */ - VectorErase(kernels, transpose_kernel); - delete transpose_kernel; - transpose_kernel = nullptr; + VectorErase(kernels, traspose_kernel); + delete traspose_kernel; + traspose_kernel = nullptr; conv_kernel->set_out_kernels({c4_kernel}); c4_kernel->set_in_kernels({conv_kernel}); - c4_kernel->set_out_kernels(transpose2_kernel->out_kernels()); - for (auto end : end_kernels) { - end->set_in_kernels({c4_kernel}); - } - VectorErase(kernels, transpose2_kernel); - delete transpose2_kernel; - transpose2_kernel = nullptr; - return; } @@ -90,38 +60,27 @@ bool Nc4hw4PassMatch(std::vector *kernels, size_t index) { return false; } - kernel::LiteKernel *traspose_nhwc2nchw_kernel = start_kernel->out_kernels().front(); - if (traspose_nhwc2nchw_kernel->type() != Nc4hw4FormatTransposeOp) { + kernel::LiteKernel *traspose_kernel = start_kernel->out_kernels().front(); + if (start_kernel->type() != Nc4hw4FormatTransposeOp) { return false; } - if (traspose_nhwc2nchw_kernel->out_kernels().size() != 1) { + if (traspose_kernel->out_kernels().size() != 1) { return false; } - kernel::LiteKernel *end_kernel = traspose_nhwc2nchw_kernel->out_kernels().front(); + kernel::LiteKernel *end_kernel = traspose_kernel->out_kernels().front(); if (IsContain(Nc4hw4FormatInOpList, end_kernel->type()) == false) { return false; } - if (end_kernel->out_kernels().size() != 1) { - return false; - } - - kernel::LiteKernel *transpose_nchw2nhwc_kernel = end_kernel->out_kernels().front(); - if (transpose_nchw2nhwc_kernel->type() != Nc4hw4FormatTransposeOp) { - return false; - } /* double check ops topological sorted in kernel-list */ auto start_iter = find(kernels->begin(), kernels->end(), start_kernel); auto start_index = std::distance(kernels->begin(), start_iter); - auto traspose_nhwc2nchw_iter = find(kernels->begin(), kernels->end(), traspose_nhwc2nchw_kernel); - auto traspose_nhwc2nchw_index = std::distance(kernels->begin(), traspose_nhwc2nchw_iter); + auto transpose_iter = find(kernels->begin(), kernels->end(), traspose_kernel); + auto transpose_index = std::distance(kernels->begin(), transpose_iter); auto end_iter = find(kernels->begin(), kernels->end(), end_kernel); auto end_index = std::distance(kernels->begin(), end_iter); - auto transpose_nchw2nhwc_iter = find(kernels->begin(), kernels->end(), transpose_nchw2nhwc_kernel); - auto transpose_nchw2nhwc_index = std::distance(kernels->begin(), transpose_nchw2nhwc_iter); - if (start_index > traspose_nhwc2nchw_index || traspose_nhwc2nchw_index > end_index || - end_index > transpose_nchw2nhwc_index) { + if (start_index > transpose_index || transpose_index > end_index) { return false; } @@ -129,31 +88,31 @@ bool Nc4hw4PassMatch(std::vector *kernels, size_t index) { } bool Nc4hw4PassValid(const InnerContext *context, std::vector *kernels) { + return false; + if (context->IsGpuEnabled() || context->IsNpuEnabled()) { return false; } for (auto kernel : *kernels) { - if (kernel->op_parameter() != nullptr) { - if (kernel->op_parameter()->quant_type_ == schema::QuantType_AwareTraining || - kernel->op_parameter()->quant_type_ == schema::QuantType_PostTraining) { - return false; - } + if (kernel->op_parameter()->quant_type_ == schema::QuantType_AwareTraining || + kernel->op_parameter()->quant_type_ == schema::QuantType_PostTraining) { + return false; } } - return false; + return true; } -void Nc4hw4PassAct(std::vector *kernels, std::vector *tensors) { +void Nc4hw4Pass(std::vector *kernels, std::vector *tensors) { size_t kernel_size = kernels->size(); size_t index = 0; - for (; index + 3 < kernel_size; index++) { + for (; index < kernel_size - 2; index++) { kernel::LiteKernel *kernel = kernels->at(index); if (kernel->subgraph_type() != kernel::kNotSubGraph) { kernel::SubGraphKernel *subgraph = reinterpret_cast(kernel); std::vector &particial_nodes = subgraph->nodes(); - Nc4hw4PassAct(&particial_nodes, tensors); + Nc4hw4Pass(&particial_nodes, tensors); } if (Nc4hw4PassMatch(kernels, index)) { @@ -164,11 +123,4 @@ void Nc4hw4PassAct(std::vector *kernels, std::vector *kernels, - std::vector *tensors) { - if (Nc4hw4PassValid(context, kernels)) { - Nc4hw4PassAct(kernels, tensors); - } -} } // namespace mindspore::lite diff --git a/mindspore/lite/src/runtime/runtime_pass.h b/mindspore/lite/src/runtime/runtime_pass.h index a12d050461c..141c7d8e3c4 100644 --- a/mindspore/lite/src/runtime/runtime_pass.h +++ b/mindspore/lite/src/runtime/runtime_pass.h @@ -17,7 +17,6 @@ #ifndef MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_PASS_H_ #define MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_PASS_H_ -#ifndef RUNTIME_PASS_CLIP #include #include "src/lite_kernel.h" #include "src/sub_graph_kernel.h" @@ -27,15 +26,16 @@ namespace mindspore::lite { /* Nc4hw4 PASS - * before : --(nhwc)-- CONV --(nhwc)-- TRANSPOSE --(nchw)-- IN --(nchw)-- TRANSPOSE --(nhwc)-- - * after : --(nhwc)-- CONV --(nc4hw4)-- IN --(nhwc)-- + * before : CONV --(nhwc)-- TRANSPOSE --(nhwc)-- OP + * after : CONV --(nc4hw4)-- OP * */ static const schema::PrimitiveType Nc4hw4FormatTransposeOp = schema::PrimitiveType_Transpose; static const std::vector Nc4hw4FormatOutOpList = {schema::PrimitiveType_Conv2DFusion}; -static const std::vector Nc4hw4FormatInOpList = {schema::PrimitiveType_InstanceNorm}; -void Nc4hw4Pass(const InnerContext *context, std::vector *kernels, - std::vector *tensors); +static const std::vector Nc4hw4FormatInOpList = {schema::PrimitiveType_InstanceNorm, + schema::PrimitiveType_PadFusion}; +bool Nc4hw4PassValid(const InnerContext *context, std::vector *kernels); +void Nc4hw4Pass(std::vector *kernels, std::vector *tensors); } // namespace mindspore::lite -#endif + #endif // MINDSPORE_LITE_SRC_RUNTIME_RUNTIME_PASS_H_ diff --git a/mindspore/lite/src/scheduler.cc b/mindspore/lite/src/scheduler.cc index e55b112dcc6..6b6793d2d0c 100644 --- a/mindspore/lite/src/scheduler.cc +++ b/mindspore/lite/src/scheduler.cc @@ -34,6 +34,7 @@ #include "src/common/prim_util.h" #include "src/common/tensor_util.h" #include "src/runtime/infer_manager.h" +#include "src/runtime/runtime_pass.h" #include "src/sub_graph_split.h" #include "src/weight_decoder.h" #include "src/runtime/kernel/arm/fp16/fp16_op_handler.h" @@ -61,15 +62,6 @@ kernel::SubGraphKernel *CreateCustomSubGraph(std::vector & } } // namespace -void Scheduler::SetSubgraphForPartialNode() { - for (auto &pair : partial_kernel_subgraph_index_map_) { - auto &partial_kernel = pair.first; - auto &subgraph_index = pair.second; - static_cast(partial_kernel->kernel()) - ->set_subgraph_kernel(subgraph_index_subgraph_kernel_map_.at(subgraph_index)); - } -} - int Scheduler::InitKernels(std::vector dst_kernels) { if (is_train_session_) { return RET_OK; @@ -117,9 +109,14 @@ int Scheduler::Schedule(std::vector *dst_kernels) { } if (context_->enable_parallel_ && infershape_ret != RET_INFER_INVALID) { +#ifdef ENABLE_AUTO_PARALLEL auto search_sub_graph = SearchSubGraph(context_, src_model_, src_tensors_, &op_parameters_, &graph_output_node_indexes_); search_sub_graph.SubGraphSplit(); +#else + MS_LOG(ERROR) << unsupport_auto_parallel_log; + return RET_NOT_SUPPORT; +#endif } int ret = ScheduleGraphToKernels(dst_kernels); @@ -129,7 +126,9 @@ int Scheduler::Schedule(std::vector *dst_kernels) { return ret; } +#ifdef ENABLE_CONTROL_TENSORLIST SetSubgraphForPartialNode(); +#endif if (delegate_ != nullptr) { ret = ReplaceDelegateKernels(dst_kernels); if (ret != RET_OK) { @@ -137,8 +136,13 @@ int Scheduler::Schedule(std::vector *dst_kernels) { return ret; } } - FindAllInoutKernels(*dst_kernels); + if (Nc4hw4PassValid(context_, dst_kernels)) { + Nc4hw4Pass(dst_kernels, src_tensors_); + } + + FindAllInoutKernels(*dst_kernels); +#ifdef ENABLE_CONTROL_TENSORLIST if (IsControlFlowParttern(*dst_kernels)) { ret = ConstructControlFlowMainGraph(dst_kernels); if (ret != RET_OK) { @@ -146,6 +150,7 @@ int Scheduler::Schedule(std::vector *dst_kernels) { return ret; } } else { +#endif auto src_kernel = *dst_kernels; dst_kernels->clear(); std::map is_kernel_finish; @@ -154,7 +159,9 @@ int Scheduler::Schedule(std::vector *dst_kernels) { MS_LOG(ERROR) << "ConstructSubGraphs failed."; return ret; } +#ifdef ENABLE_CONTROL_TENSORLIST } +#endif ret = InitKernels(*dst_kernels); if (ret != RET_OK) { @@ -332,19 +339,6 @@ int Scheduler::RestoreSubGraphInput(const lite::Model::Node *partial_node) { return RET_OK; } -void CopyTensorList(TensorList *dst_tensor, TensorList *src_tensor) { - dst_tensor->set_data_type(src_tensor->data_type()); - dst_tensor->set_format(src_tensor->format()); - dst_tensor->set_element_shape(src_tensor->element_shape()); - dst_tensor->set_shape(src_tensor->shape()); - std::vector cpy_tensors{}; - for (auto &tensor : src_tensor->tensors()) { - auto new_tensor = Tensor::CopyTensor(*tensor, false); - cpy_tensors.push_back(new_tensor); - } - dst_tensor->set_tensors(cpy_tensors); -} - void CopyCommonTensor(Tensor *dst_tensor, Tensor *src_tensor) { dst_tensor->set_data_type(src_tensor->data_type()); dst_tensor->set_shape(src_tensor->shape()); @@ -396,36 +390,6 @@ int Scheduler::InferPartialShape(const lite::Model::Node *node) { return ret; } -int Scheduler::InferSwitchShape(const lite::Model::Node *switch_node) { - MS_ASSERT(src_model_ != nullptr); - MS_ASSERT(switch_node != nullptr); - if (!IsSwitchNode(switch_node->primitive_)) { - MS_LOG(ERROR) << "Node is not a switch"; - return RET_PARAM_INVALID; - } - std::deque partial_cnode_to_infer{}; - auto true_branch_output_index = switch_node->input_indices_.at(kSwitchTrueBranch); - auto false_branch_output_index = switch_node->input_indices_.at(kSwitchFalseBranch); - for (auto &node : src_model_->all_nodes_) { - if ((IsContain(node->output_indices_, true_branch_output_index) || - IsContain(node->output_indices_, false_branch_output_index)) && - IsPartialNode(node->primitive_) && partial_cnode_inferred_.find(node) == partial_cnode_inferred_.end()) { - partial_cnode_inferred_.insert(node); - partial_cnode_to_infer.push_back(node); - } - } - - while (!partial_cnode_to_infer.empty()) { - auto &node = partial_cnode_to_infer.front(); - partial_cnode_to_infer.pop_front(); - int ret = InferPartialShape(node); - if (ret != RET_OK) { - MS_LOG(WARNING) << "partial infer not ok, ret: " << ret; - } - } - return RET_OK; -} - Model::Node *Scheduler::NodeInputIsPartial(const lite::Model::Node *node) { MS_ASSERT(src_model_ != nullptr); MS_ASSERT(node != nullptr); @@ -441,21 +405,6 @@ Model::Node *Scheduler::NodeInputIsPartial(const lite::Model::Node *node) { return nullptr; } -Model::Node *Scheduler::NodeInputIsSwitch(const lite::Model::Node *node) { - MS_ASSERT(src_model_ != nullptr); - MS_ASSERT(node != nullptr); - for (auto &iter : src_model_->all_nodes_) { - if (iter->output_indices_ == node->input_indices_) { - if (IsSwitchNode(iter->primitive_)) { - return iter; - } else { - return nullptr; - } - } - } - return nullptr; -} - int Scheduler::InferCallShape(const lite::Model::Node *node) { MS_ASSERT(src_model_ != nullptr); MS_ASSERT(node != nullptr); @@ -468,11 +417,12 @@ int Scheduler::InferCallShape(const lite::Model::Node *node) { if (partial_input) { return InferPartialShape(partial_input); } - +#ifdef ENABLE_CONTROL_TENSORLIST auto switch_input = NodeInputIsSwitch(node); if (switch_input) { return InferSwitchShape(switch_input); } +#endif MS_LOG(ERROR) << "call input is not partial and also not switch."; return RET_ERROR; @@ -1090,12 +1040,6 @@ kernel::LiteKernel *Scheduler::ScheduleNodeToKernel(const lite::Model::Node *src return kernel; } -bool Scheduler::SubGraphHasScheduled(const int &index) { - return scheduled_subgraph_index_.find(index) != scheduled_subgraph_index_.end(); -} - -void Scheduler::SubGraphMarkScheduled(const int &index) { scheduled_subgraph_index_.insert(index); } - bool Scheduler::IsControlFlowPattern(const lite::Model::Node &partial_node) { lite::Model::Node *partial_node_output = nullptr; for (auto output_index : partial_node.output_indices_) { @@ -1147,6 +1091,7 @@ int Scheduler::ScheduleSubGraphToKernels(size_t subgraph_index, std::vector &in_ten if (dtype == kObjectTypeString) { return kNumberTypeFloat32; } +#ifdef ENABLE_CONTROL_TENSORLIST if (dtype == kObjectTypeTensorType) { auto tensor_list = reinterpret_cast(tensor); auto tensor_list_dtype = tensor_list->tensors_data_type(); @@ -1292,6 +1242,7 @@ TypeId Scheduler::GetFirstFp32Fp16OrInt8Type(const std::vector &in_ten return tensor_list_dtype; } } +#endif if (dtype == kNumberTypeFloat32 || dtype == kNumberTypeFloat16 || dtype == kNumberTypeInt8 || dtype == kNumberTypeInt32 || dtype == kNumberTypeBool) { return dtype; @@ -1366,6 +1317,80 @@ kernel::SubGraphType Scheduler::PartialSubGraphType(const std::vectorprimitive_)) { + MS_LOG(ERROR) << "Node is not a switch"; + return RET_PARAM_INVALID; + } + std::deque partial_cnode_to_infer{}; + auto true_branch_output_index = switch_node->input_indices_.at(kSwitchTrueBranch); + auto false_branch_output_index = switch_node->input_indices_.at(kSwitchFalseBranch); + for (auto &node : src_model_->all_nodes_) { + if ((IsContain(node->output_indices_, true_branch_output_index) || + IsContain(node->output_indices_, false_branch_output_index)) && + IsPartialNode(node->primitive_) && partial_cnode_inferred_.find(node) == partial_cnode_inferred_.end()) { + partial_cnode_inferred_.insert(node); + partial_cnode_to_infer.push_back(node); + } + } + + while (!partial_cnode_to_infer.empty()) { + auto &node = partial_cnode_to_infer.front(); + partial_cnode_to_infer.pop_front(); + int ret = InferPartialShape(node); + if (ret != RET_OK) { + MS_LOG(WARNING) << "partial infer not ok, ret: " << ret; + } + } + return RET_OK; +} + +Model::Node *Scheduler::NodeInputIsSwitch(const lite::Model::Node *node) { + MS_ASSERT(src_model_ != nullptr); + MS_ASSERT(node != nullptr); + for (auto &iter : src_model_->all_nodes_) { + if (iter->output_indices_ == node->input_indices_) { + if (IsSwitchNode(iter->primitive_)) { + return iter; + } else { + return nullptr; + } + } + } + return nullptr; +} + +bool Scheduler::SubGraphHasScheduled(const int &index) { + return scheduled_subgraph_index_.find(index) != scheduled_subgraph_index_.end(); +} + +void Scheduler::SubGraphMarkScheduled(const int &index) { scheduled_subgraph_index_.insert(index); } + +void Scheduler::SetSubgraphForPartialNode() { + for (auto &pair : partial_kernel_subgraph_index_map_) { + auto &partial_kernel = pair.first; + auto &subgraph_index = pair.second; + static_cast(partial_kernel->kernel()) + ->set_subgraph_kernel(subgraph_index_subgraph_kernel_map_.at(subgraph_index)); + } +} + +void CopyTensorList(TensorList *dst_tensor, TensorList *src_tensor) { + dst_tensor->set_data_type(src_tensor->data_type()); + dst_tensor->set_format(src_tensor->format()); + dst_tensor->set_element_shape(src_tensor->element_shape()); + dst_tensor->set_shape(src_tensor->shape()); + std::vector cpy_tensors{}; + for (auto &tensor : src_tensor->tensors()) { + auto new_tensor = Tensor::CopyTensor(*tensor, false); + cpy_tensors.push_back(new_tensor); + } + dst_tensor->set_tensors(cpy_tensors); +} + bool Scheduler::IsControlFlowParttern(const std::vector &kernels) { if (std::any_of(kernels.begin(), kernels.end(), [](kernel::LiteKernel *item) { if (item->op_parameter()) { @@ -1398,4 +1423,5 @@ int Scheduler::ConstructControlFlowMainGraph(std::vector * kernels->insert(kernels->begin(), subgraph_kernel); return RET_OK; } +#endif } // namespace mindspore::lite diff --git a/mindspore/lite/src/scheduler.h b/mindspore/lite/src/scheduler.h index 3ef86742667..077e1d65836 100644 --- a/mindspore/lite/src/scheduler.h +++ b/mindspore/lite/src/scheduler.h @@ -56,8 +56,6 @@ class Scheduler { void FindNodeInoutTensors(const Model::Node &node, std::vector *inputs, std::vector *outputs); Model::Node *NodeInputIsPartial(const Model::Node *node); int InferPartialShape(const Model::Node *node); - Model::Node *NodeInputIsSwitch(const Model::Node *node); - int InferSwitchShape(const Model::Node *node); int InferCallShape(const Model::Node *node); int InferNodeShape(const Model::Node *node); int InferSubGraphShape(size_t subgraph_index); @@ -95,19 +93,24 @@ class Scheduler { std::vector ScheduleMainSubGraphToKernels(); kernel::LiteKernel *SchedulePartialToSubGraphKernel(const int &subgraph_index); kernel::SubGraphType PartialSubGraphType(const std::vector &kernels); - bool IsControlFlowParttern(const std::vector &kernels); - int ConstructControlFlowMainGraph(std::vector *kernels); // other methods static TypeId GetFirstFp32Fp16OrInt8Type(const std::vector &in_tensors); static void SetKernelTensorDataType(kernel::LiteKernel *kernel); int CopyPartialShapeToSubGraph(const lite::Model::Node *partial_node); int RestoreSubGraphInput(const lite::Model::Node *partial_node); + + bool IsControlFlowPattern(const lite::Model::Node &partial_node); + int SubGraphPreferDataType(const int &subgraph_index, TypeId *prefer_data_type); +#ifdef ENABLE_CONTROL_TENSORLIST + int InferSwitchShape(const Model::Node *node); + Model::Node *NodeInputIsSwitch(const Model::Node *node); bool SubGraphHasScheduled(const int &index); void SubGraphMarkScheduled(const int &index); void SetSubgraphForPartialNode(); - bool IsControlFlowPattern(const lite::Model::Node &partial_node); - int SubGraphPreferDataType(const int &subgraph_index, TypeId *prefer_data_type); + bool IsControlFlowParttern(const std::vector &kernels); + int ConstructControlFlowMainGraph(std::vector *kernels); +#endif protected: const InnerContext *context_ = nullptr; @@ -124,11 +127,13 @@ class Scheduler { std::unique_ptr sched_cb_; std::map primitives_; std::shared_ptr delegate_ = nullptr; - std::set scheduled_subgraph_index_{}; std::deque subgraphs_to_schedule_{}; - std::unordered_map partial_kernel_subgraph_index_map_{}; std::unordered_map subgraph_index_subgraph_kernel_map_{}; +#ifdef ENABLE_CONTROL_TENSORLIST + std::set scheduled_subgraph_index_{}; + std::unordered_map partial_kernel_subgraph_index_map_{}; std::set partial_cnode_inferred_{}; +#endif }; } // namespace mindspore::lite diff --git a/mindspore/lite/src/sub_graph_kernel.cc b/mindspore/lite/src/sub_graph_kernel.cc index c75b955fea9..4e8b7637238 100644 --- a/mindspore/lite/src/sub_graph_kernel.cc +++ b/mindspore/lite/src/sub_graph_kernel.cc @@ -144,9 +144,9 @@ void SubGraphKernel::InitInputTensorInitRefCount() { } } -void SubGraphKernel::InitOutTensorInitRefCount() { +void SubGraphKernel::InitOutTensorInitRefCount(const std::vector *mask_kernels) { for (auto *node : nodes_) { - node->InitOutTensorInitRefCount(); + node->InitOutTensorInitRefCount(mask_kernels); } } @@ -221,14 +221,6 @@ int CpuSubGraph::Prepare() { int CpuSubGraph::Execute(const KernelCallBack &before, const KernelCallBack &after) { MS_ASSERT(this->Context()->allocator.get() != nullptr); -#ifdef SUPPORT_GPU - // In heterogeneous scenarios of CPU and GPU, call MutableData to MapBuffer(synchronize data). - if (this->Context()->IsGpuEnabled()) { - for (auto tensor : this->in_tensors()) { - tensor->MutableData(); - } - } -#endif for (auto *kernel : nodes_) { MS_ASSERT(kernel != nullptr); diff --git a/mindspore/lite/src/sub_graph_kernel.h b/mindspore/lite/src/sub_graph_kernel.h index 0200b2ebd8b..647c1a075ef 100644 --- a/mindspore/lite/src/sub_graph_kernel.h +++ b/mindspore/lite/src/sub_graph_kernel.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef MINDSPORE_LITE_SRC_SUB_GRAPH_H -#define MINDSPORE_LITE_SRC_SUB_GRAPH_H +#ifndef MINDSPORE_LITE_SRC_SUB_GRAPH_KERNEL_H_ +#define MINDSPORE_LITE_SRC_SUB_GRAPH_KERNEL_H_ #include #include @@ -101,7 +101,7 @@ class SubGraphKernel : public LiteKernel { // called after Run int ReSize() override; - void InitOutTensorInitRefCount() override; + void InitOutTensorInitRefCount(const std::vector *mask_kernels) override; void InitInputTensorInitRefCount(); @@ -109,7 +109,7 @@ class SubGraphKernel : public LiteKernel { std::string ToString() const override; - std::vector nodes() { return this->nodes_; } + std::vector &nodes() { return this->nodes_; } void DropNode(LiteKernel *node); @@ -226,4 +226,4 @@ class CustomSubGraph : public SubGraphKernel { int Execute(const KernelCallBack &before, const KernelCallBack &after) override; }; } // namespace mindspore::kernel -#endif // MINDSPORE_LITE_SRC_SUB_GRAPH_H +#endif // MINDSPORE_LITE_SRC_SUB_GRAPH_KERNEL_H_ diff --git a/mindspore/lite/src/tensor.cc b/mindspore/lite/src/tensor.cc index 93822eb96e3..8dc10b2e0af 100644 --- a/mindspore/lite/src/tensor.cc +++ b/mindspore/lite/src/tensor.cc @@ -316,7 +316,9 @@ void Tensor::FreeData() { this->data_ = nullptr; } else { allocator_->Free(this->data_); - this->data_ = nullptr; + if (!IS_STATIC_ALLOCATOR(allocator_) || (allocator_->RefCount(this->data_) != 0)) { + this->data_ = nullptr; + } } } diff --git a/mindspore/lite/src/tensor.h b/mindspore/lite/src/tensor.h index 1933aeec957..86cdd64c305 100644 --- a/mindspore/lite/src/tensor.h +++ b/mindspore/lite/src/tensor.h @@ -34,17 +34,20 @@ namespace mindspore { namespace lite { + +#define STATIC_ALLOCATION -271964 +#define IS_STATIC_ALLOCATOR(allocator) ((allocator != nullptr) && (allocator->RefCount(nullptr) == STATIC_ALLOCATION)) struct LiteQuantParam { double scale; int32_t zeroPoint; float var_corr{1}; float mean_corr{0}; - bool inited; + bool inited{false}; std::vector clusters{}; - int bitNum; - int roundType; - int multiplier; - int dstDtype; + int bitNum{8}; + int roundType{1}; + int multiplier{1}; + int dstDtype{32}; }; class Tensor : public mindspore::tensor::MSTensor { @@ -133,7 +136,6 @@ class Tensor : public mindspore::tensor::MSTensor { void set_format(mindspore::Format format) override { this->format_ = format; } mindspore::Format format() const override { return this->format_; } - virtual int ref_count() const { return ref_count_; } virtual int init_ref_count() const { return this->init_ref_count_; } diff --git a/mindspore/lite/src/tensorlist.h b/mindspore/lite/src/tensorlist.h index d03ee57bd2d..e2474eb4d76 100644 --- a/mindspore/lite/src/tensorlist.h +++ b/mindspore/lite/src/tensorlist.h @@ -24,7 +24,7 @@ #include "src/common/log_adapter.h" #include "schema/model_generated.h" #include "src/tensor.h" - +#ifdef ENABLE_CONTROL_TENSORLIST namespace mindspore::lite { /** * Tensorlist is a container of vector, in which each element is a tensor object. @@ -177,5 +177,5 @@ class TensorList : public Tensor { int max_elements_num_ = -1; }; } // namespace mindspore::lite - +#endif #endif // MINDSPORE_LITE_SRC_TENSORLIST_H_ diff --git a/mindspore/lite/src/train/train_session.cc b/mindspore/lite/src/train/train_session.cc index 8a5514be5a7..d6601eafcde 100644 --- a/mindspore/lite/src/train/train_session.cc +++ b/mindspore/lite/src/train/train_session.cc @@ -24,22 +24,25 @@ #include #include #include "include/errorcode.h" -#include "src/common/utils.h" -#include "src/tensor.h" -#include "src/lite_model.h" -#include "src/train/loss_kernel.h" -#include "src/train/optimizer_kernel.h" -#include "src/sub_graph_kernel.h" -#include "src/train/train_populate_parameter.h" -#include "src/train/train_populate_parameter_v0.h" #include "src/executor.h" +#include "src/lite_model.h" +#include "src/lite_kernel_util.h" +#include "src/sub_graph_kernel.h" +#include "src/tensor.h" #include "src/kernel_registry.h" +#include "src/common/prim_util.h" +#include "src/common/tensor_util.h" +#include "src/common/utils.h" #include "src/runtime/kernel/arm/fp32_grad/convolution.h" #include "src/runtime/kernel/arm/fp32/batchnorm_fp32.h" -#include "src/common/tensor_util.h" +#include "src/train/loss_kernel.h" +#include "src/train/optimizer_kernel.h" #include "src/train/train_utils.h" #include "src/train/train_export.h" -#include "src/common/prim_util.h" +#include "src/train/opt_allocator.h" +#include "src/train/static_allocator.h" +#include "src/train/train_populate_parameter.h" +#include "src/train/train_populate_parameter_v0.h" namespace mindspore { namespace lite { @@ -67,6 +70,7 @@ int TrainSession::Init(const Context *context, const TrainCfg *train_cfg) { } cfg_ = *train_cfg; } + allocator_ = context->allocator; return lite::LiteSession::Init(context); } @@ -158,6 +162,51 @@ int TrainSession::InitCallBack() { return RET_OK; } +int TrainSession::AllocTensors(const std::vector &kernels) { + if (!IS_STATIC_ALLOCATOR(allocator_)) return RET_OK; + OptAllocator allocator; + std::unordered_map ref_count; + std::unordered_map offset_map; + for (auto kernel : kernels) { + for (auto tensor : kernel->out_tensors()) { + size_t size = tensor->Size(); + size_t offset = allocator.Malloc(size); + offset_map[tensor] = offset; + ref_count[tensor] = tensor->init_ref_count(); + } + for (auto tensor : kernel->in_tensors()) { + if (tensor->category() == lite::Tensor::VAR) { + int count = ref_count[tensor] - 1; + ref_count[tensor] = count; + if (count == 0) { + allocator.Free(offset_map[tensor]); + } + } + } + } + // Set Tensor data + if (tensors_data_ == nullptr) { + auto size = allocator.total_size(); + auto buf = malloc(size); + if (buf == nullptr) { + MS_LOG(ERROR) << "cannot allocate buffer size" << size; + return RET_ERROR; + } + StaticAllocator *alloc = reinterpret_cast(allocator_.get()); + alloc->SetContex(buf, size); + tensors_data_ = buf; + } + for (auto kernel : train_kernels_) { + for (auto tensor : kernel->out_tensors()) { + auto it = offset_map.find(tensor); + if (it != offset_map.end()) { + tensor->set_data(reinterpret_cast(reinterpret_cast(tensors_data_) + it->second)); + } + } + } + return RET_OK; +} + int TrainSession::CompileGraph(lite::Model *model) { return lite::RET_ERROR; } int TrainSession::CompileTrainGraph(std::shared_ptr model) { @@ -193,10 +242,21 @@ int TrainSession::CompileTrainGraph(std::shared_ptr model) { MS_LOG(ERROR) << "failed to allocate space"; return RET_ERROR; } + ret = AllocTensors(train_kernels_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "failed to allocate space"; + return RET_ERROR; + } return RET_OK; } -TrainSession::~TrainSession() { FreeWorkSpace(); } +TrainSession::~TrainSession() { + FreeWorkSpace(); + if (tensors_data_ != nullptr) { + free(tensors_data_); + tensors_data_ = nullptr; + } +} int TrainSession::ExecKernels(const KernelCallBack &before, const KernelCallBack &after, const std::vector &run_kernels) { @@ -412,6 +472,19 @@ int TrainSession::Train() { output_node_map_ = train_output_node_map_; output_tensor_map_ = train_output_tensor_map_; output_tensor_names_ = train_output_tensor_names_; + kernel::LiteKernelUtil::InitTensorInitRefCount(train_kernels_); + for (auto &ms_tensors : eval_output_node_map_) { // Allow to look at prediction also during training + for (auto &ms_tensor : ms_tensors.second) { + lite::Tensor *lite_tensor = static_cast(ms_tensor); + lite_tensor->set_init_ref_count(lite_tensor->init_ref_count() + 1); + } + } + // allocate tensors + auto ret = AllocTensors(train_kernels_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "failed to allocate tensor space"; + return RET_ERROR; + } return RET_OK; } @@ -431,6 +504,18 @@ int TrainSession::Eval() { output_node_map_ = eval_output_node_map_; output_tensor_map_ = eval_output_tensor_map_; output_tensor_names_ = eval_output_tensor_names_; + kernel::LiteKernelUtil::InitTensorInitRefCount(inference_kernels_); + for (auto &ms_tensors : eval_output_node_map_) { + for (auto &ms_tensor : ms_tensors.second) { + lite::Tensor *lite_tensor = static_cast(ms_tensor); + lite_tensor->set_init_ref_count(lite_tensor->init_ref_count() + 1); + } + } + auto ret = AllocTensors(inference_kernels_); + if (ret != RET_OK) { + MS_LOG(ERROR) << "failed to allocate space"; + return RET_ERROR; + } return RET_OK; } @@ -766,7 +851,12 @@ session::LiteSession *session::TrainSession::CreateTrainSession(const std::strin MS_LOG(ERROR) << "create session failed"; return nullptr; } - + if (context->allocator == nullptr) { + const_cast(context)->allocator = std::shared_ptr(new (std::nothrow) StaticAllocator()); + if (context->allocator == nullptr) { + MS_LOG(ERROR) << " cannot convert to static allocation"; + } + } auto ret = session->Init(context, cfg); if (ret != mindspore::lite::RET_OK) { MS_LOG(ERROR) << "init session failed"; diff --git a/mindspore/lite/src/train/train_session.h b/mindspore/lite/src/train/train_session.h index a21ab9f07f3..257d29180cc 100644 --- a/mindspore/lite/src/train/train_session.h +++ b/mindspore/lite/src/train/train_session.h @@ -147,6 +147,7 @@ class TrainSession : virtual public lite::LiteSession { void FreeRestoreTensors(); bool AllInputsNeedScale(kernel::LiteKernel *kernel); void FreeWorkSpace(); + int AllocTensors(const std::vector &kernels); std::map restored_origin_tensors_; int virtual_batch_idx_ = 0; @@ -155,6 +156,8 @@ class TrainSession : virtual public lite::LiteSession { void *workspace_ = nullptr; SchedCallBack sched_mix_precision_callback_; bool train_mode_ = false; + void *tensors_data_ = nullptr; + std::shared_ptr allocator_; }; } // namespace lite diff --git a/mindspore/lite/src/weight_decoder.cc b/mindspore/lite/src/weight_decoder.cc index aca7b1ca5a3..589d2284454 100644 --- a/mindspore/lite/src/weight_decoder.cc +++ b/mindspore/lite/src/weight_decoder.cc @@ -20,11 +20,13 @@ #include "src/huffman_decode.h" namespace mindspore::lite { +constexpr int kBit8 = 8; +constexpr int kBit32 = 32; std::vector StringToBitVector(const std::string &str) { - std::vector vec(str.size() * 8); + std::vector vec(str.size() * kBit8); size_t index = 0; for (auto ch : str) { - for (size_t shift = 8; shift > 0; shift--) { + for (size_t shift = kBit8; shift > 0; shift--) { vec[index++] = (ch >> (shift - 1)) & 0x1; } } @@ -47,7 +49,7 @@ STATUS IndexingDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor) if (unique_value_cnt == 0) { unique_value_cnt = 1 << bit_num; } - // parse unique_value_set; + // parse unique_value_set std::vector unique_values; for (size_t i = 0; i < unique_value_cnt; i++) { int unique_value = 0; @@ -81,7 +83,7 @@ STATUS IndexingDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor) return RET_NULL_PTR; } auto dst_data = dst_tensor->data_c(); - if (bit_num <= 8) { + if (bit_num <= kBit8) { ret = UnIndexTensorData(unique_values, unique_value_index_vec, dst_data, dst_tensor->Size()); } else { ret = UnIndexTensorData(unique_values, unique_value_index_vec, dst_data, dst_tensor->Size()); @@ -102,15 +104,15 @@ STATUS SparseDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor) { size_t index = 0; // parse coor_best_bit size_t coor_best_bit = 0; - for (size_t i = 0; i < 8; i++) { + for (size_t i = 0; i < kBit8; i++) { bool bit = bit_vec[index++]; - coor_best_bit |= bit << (8 - i - 1); + coor_best_bit |= bit << (kBit8 - i - 1); } // parse nz_cnt size_t nz_cnt = 0; - for (size_t i = 0; i < 32; i++) { + for (size_t i = 0; i < kBit32; i++) { bool bit = bit_vec[index++]; - nz_cnt |= bit << (32 - i - 1); + nz_cnt |= bit << (kBit32 - i - 1); } // parse unique_value cnt size_t unique_value_cnt = 0; @@ -167,7 +169,7 @@ STATUS SparseDecompress(const schema::Tensor &src_tensor, Tensor *dst_tensor) { } auto dst_data = dst_tensor->data_c(); - if (bit_num <= 8) { + if (bit_num <= kBit8) { ret = UnSparseTensorData(unique_values, unique_value_index_vec, coor_vec, src_tensor.quantParams(), elem_cnt, coor_best_bit, dst_data, dst_tensor->Size()); } else { @@ -233,6 +235,7 @@ int WeightDecoder::DequantWeight(lite::Tensor *input_tensor, bool channel_first, return RET_OK; } +#ifdef ENABLE_HUFFMAN_DECODE int WeightDecoder::DecodeHuffmanCode(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor) { MS_ASSERT(dst_tensor != nullptr); if (!dst_tensor->IsConst() || !src_tensor.enableHuffmanCode()) { @@ -262,6 +265,7 @@ int WeightDecoder::DecodeHuffmanCode(const schema::Tensor &src_tensor, lite::Ten } return RET_OK; } +#endif int WeightDecoder::UnPackToInt(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor) { MS_ASSERT(dst_tensor != nullptr); diff --git a/mindspore/lite/src/weight_decoder.h b/mindspore/lite/src/weight_decoder.h index e8fd3c96454..0d4097f62a9 100644 --- a/mindspore/lite/src/weight_decoder.h +++ b/mindspore/lite/src/weight_decoder.h @@ -128,7 +128,9 @@ class WeightDecoder { static int UnPackToInt(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor); +#ifdef ENABLE_HUFFMAN_DECODE static int DecodeHuffmanCode(const schema::Tensor &src_tensor, lite::Tensor *dst_tensor); +#endif static int DequantNode(OpParameter *op_parameter, const std::vector &in_tensors, TypeId dst_data_type); diff --git a/mindspore/lite/test/CMakeLists.txt b/mindspore/lite/test/CMakeLists.txt index 1f78a5cb6f9..ef6612e9e97 100644 --- a/mindspore/lite/test/CMakeLists.txt +++ b/mindspore/lite/test/CMakeLists.txt @@ -100,6 +100,7 @@ set(TEST_LITE_SRC ${KERNEL_OP_SRC} ${LITE_DIR}/src/runtime/inner_allocator.cc ${LITE_DIR}/src/runtime/infer_manager.cc + ${LITE_DIR}/src/runtime/runtime_pass.cc ${LITE_DIR}/src/tensor.cc ${LITE_DIR}/src/ms_tensor.cc ${LITE_DIR}/src/tensorlist.cc @@ -208,7 +209,6 @@ if(MSLITE_ENABLE_CONVERTER) ${LITE_DIR}/tools/optimizer/common/gllo_utils.cc ${LITE_DIR}/tools/optimizer/common/format_utils.cc ${LITE_DIR}/tools/optimizer/common/multiple_pattern_process_pass.cc - ${LITE_DIR}/tools/optimizer/format/conv_weight_format.cc ${LITE_DIR}/tools/optimizer/format/delete_redundant_transpose.cc ${LITE_DIR}/tools/optimizer/format/to_format_base.cc ${LITE_DIR}/tools/optimizer/format/to_nchw_format.cc @@ -301,6 +301,7 @@ file(GLOB_RECURSE TEST_CASE_KERNEL_SRC ${TEST_DIR}/ut/src/runtime/kernel/arm/fp32/*.cc ${TEST_DIR}/ut/src/runtime/kernel/arm/int8/*.cc ${TEST_DIR}/ut/src/runtime/kernel/arm/string/*.cc + ${TEST_DIR}/ut/src/runtime/runtime_pass_tests.cc ${TEST_DIR}/ut/nnacl/infer/*.cc ) diff --git a/mindspore/lite/test/config/models_ms_train.cfg b/mindspore/lite/test/config/models_ms_train.cfg index 7ec90491da0..b8d65866245 100644 --- a/mindspore/lite/test/config/models_ms_train.cfg +++ b/mindspore/lite/test/config/models_ms_train.cfg @@ -40,4 +40,7 @@ mobilenetv1 vb 0.5 mobilenetv2 vb 0.5 mobilenetv3 vb 0.5 emnist transfer +unified_api code_example +train_lenet code_example +train_lenet_java code_example # LAST diff --git a/mindspore/lite/test/config/models_npu.cfg b/mindspore/lite/test/config/models_npu.cfg index f1d90c4e9b6..fff1d7aaf65 100644 --- a/mindspore/lite/test/config/models_npu.cfg +++ b/mindspore/lite/test/config/models_npu.cfg @@ -88,3 +88,7 @@ ml_video_edit_hair_dyeing_segmodel_v2 0.5 ml_video_edit_makeup_mobilenetv203.onnx 2 ml_video_edit_hairline_segmentation;3 0.5 ml_video_edit_hair_dyeing_migrate_v2.onnx;4 0.5 +ml_audio_kit_encoder_v5.pb;6;1,32:1,32:1,32:1,32:1:1 +fsr_270_mindspore.pb 1 +fsr_360_mindspore.pb 1 +fsr_720_mindspore.pb 1 diff --git a/mindspore/lite/test/config/models_onnx.cfg b/mindspore/lite/test/config/models_onnx.cfg index e86513bc650..bd303c24687 100644 --- a/mindspore/lite/test/config/models_onnx.cfg +++ b/mindspore/lite/test/config/models_onnx.cfg @@ -100,3 +100,8 @@ gender_lstm_vad.onnx gender_resnet34_lzl.onnx # cur acc for tiny-yolov3-11 is 2.5% because the Unsqueeze_concat_7:0's output of the last op has very small numbers. tiny-yolov3-11.onnx;2;1,224,224,3:1,2 3 +# cur acc for ml_video_edit_art_transfer is 2+% +ml_video_edit_art_transfer.onnx;3 +ssd-10.onnx;;;;calib_only +Q888_CV_face_recognition_self.onnx +ml_video_edit_dimming_tech_model_styleGan.onnx;2 diff --git a/mindspore/lite/test/config/models_onnx_fp16.cfg b/mindspore/lite/test/config/models_onnx_fp16.cfg index 4c23284359c..d9ec6e16a45 100644 --- a/mindspore/lite/test/config/models_onnx_fp16.cfg +++ b/mindspore/lite/test/config/models_onnx_fp16.cfg @@ -102,3 +102,4 @@ ml_asr_decoder_202103.onnx;2;1,64,512:1,64 0.5 ml_video_edit_makeup_mobilenetv203.onnx 4 # The input of ml_video_edit_hair_dyeing_migrate_v2.onnx should be between [0, 1] ml_video_edit_hair_dyeing_migrate_v2.onnx;4 2.5 +Q888_CV_face_recognition_self.onnx 3.5 diff --git a/mindspore/lite/test/config/models_posttraining.cfg b/mindspore/lite/test/config/models_posttraining.cfg index 5c997283099..f684576a709 100644 --- a/mindspore/lite/test/config/models_posttraining.cfg +++ b/mindspore/lite/test/config/models_posttraining.cfg @@ -1,5 +1,5 @@ ml_face_mnet 105 ml_face_landmark_2 2 mobilenet.tflite 0.5 -#transformer_20200831_encoder_fp32.tflite;36 70 -#transformer_20200831_decoder_fp32.tflite;11 35 +transformer_20200831_encoder_fp32.tflite;36 70 +transformer_20200831_decoder_fp32.tflite;11 35 diff --git a/mindspore/lite/test/config/models_tf.cfg b/mindspore/lite/test/config/models_tf.cfg index 3aa1a4e24a8..999fb519b56 100644 --- a/mindspore/lite/test/config/models_tf.cfg +++ b/mindspore/lite/test/config/models_tf.cfg @@ -104,4 +104,6 @@ hiai_nlu_model_v1.pb;3;1,16:1,16:1,16 2.0 hiai_nlu_model_v2.pb;7;1,5:1,6:1,174:1,98:1,5:1,5:1,5 hiai_nlu_model_multi.pb;6;1,32:1,32:1,6:1,11:1,74:1,32 hiai_nlu_model_single.pb;3;1,32:1,32:1,32 - +fsr_270_mindspore.pb +fsr_360_mindspore.pb +fsr_720_mindspore.pb diff --git a/mindspore/lite/test/config/models_tf_fp16.cfg b/mindspore/lite/test/config/models_tf_fp16.cfg index 3ce1b398e3b..3196d0697e3 100644 --- a/mindspore/lite/test/config/models_tf_fp16.cfg +++ b/mindspore/lite/test/config/models_tf_fp16.cfg @@ -88,3 +88,6 @@ hiai_transformer_encoder.pb;15 4 decoder_step_nocumsum_v5.pb;13;1:1,512:1,1429,2:1,127:1,127:1,127:1,127,320:1,80:1,512:1,512:1,512:1,512:1,512 1.2 hiai_nlu_model_multi.pb;6;1,32:1,32:1,6:1,11:1,74:1,32 25 hiai_nlu_model_single.pb;3;1,32:1,32:1,32 2470 +fsr_270_mindspore.pb 6.0 +fsr_360_mindspore.pb 6.5 +fsr_720_mindspore.pb 2.0 diff --git a/mindspore/lite/test/runtest.sh b/mindspore/lite/test/runtest.sh index 4bd7a81d1b1..91a33f61883 100644 --- a/mindspore/lite/test/runtest.sh +++ b/mindspore/lite/test/runtest.sh @@ -84,3 +84,6 @@ echo 'run mindrt parallel ut test' echo 'user set output tensors st test' ./lite-test --gtest_filter="GraphTest.UserSetGraphOutput*" + +echo 'runtime pass' +./lite-test --gtest_filter="RuntimePass.*" diff --git a/mindspore/lite/test/st/run_benchmark_nets.sh b/mindspore/lite/test/st/run_benchmark_nets.sh index 3a5de9cbfaa..7a6db1a6e40 100644 --- a/mindspore/lite/test/st/run_benchmark_nets.sh +++ b/mindspore/lite/test/st/run_benchmark_nets.sh @@ -119,11 +119,10 @@ if [[ $backend == "all" || $backend == "x86-all" || $backend == "x86" || $backen fi if [[ $backend == "all" || $backend == "arm32_3516D" ]]; then - exit 0 -# sh $cur_path/scripts/nnie/run_converter_nnie.sh -r $release_path -m $models_path -d $device_id -e $backend -# hi3516_status=$? -# if [[ $hi3516_status -ne 0 ]]; then -# echo "Run nnie hi3516 failed" -# exit 1 -# fi + sh $cur_path/scripts/nnie/run_converter_nnie.sh -r $release_path -m $models_path -d $device_id -e $backend + hi3516_status=$? + if [[ $hi3516_status -ne 0 ]]; then + echo "Run nnie hi3516 failed" + exit 1 + fi fi diff --git a/mindspore/lite/test/st/scripts/base_functions.sh b/mindspore/lite/test/st/scripts/base_functions.sh index 1cc37e0e18f..480009512fe 100644 --- a/mindspore/lite/test/st/scripts/base_functions.sh +++ b/mindspore/lite/test/st/scripts/base_functions.sh @@ -146,18 +146,21 @@ function Run_Benchmark() { if [[ $6 == "arm64" && $7 == "CPU" && ! ${cfg_file_name} =~ "fp16" ]]; then benchmark_mode="calib+loop" fi - # adjust file name - infix="" + # adjust precision mode mode="fp32" if [[ ${cfg_file_name} =~ "fp16" ]]; then mode="fp16" - elif [[ ${cfg_file_name} =~ "bit" ]]; then + fi + # adjust file name + infix="" + if [[ ${cfg_file_name} =~ "bit" ]]; then infix="_${cfg_file##*_}" infix=${infix%.*} elif [[ ${cfg_file_name} =~ "_train" ]]; then infix="_train" elif [[ ${cfg_file_name} =~ "_weightquant" ]]; then infix="_weightquant" + benchmark_mode="calib" elif [[ ${cfg_file_name} =~ "_posttraining" ]]; then model_name=${model_name}"_posttraining" elif [[ ${cfg_file_name} =~ "_process_only" ]]; then @@ -198,6 +201,9 @@ function Run_Benchmark() { if [[ ${mode} == "fp16" ]]; then enableFp16="true" fi + if [[ ${extra_info} =~ "calib_only" ]]; then + benchmark_mode="calib" + fi # start running benchmark echo "---------------------------------------------------------" >> "$4" if [[ ${benchmark_mode} = "calib" || ${benchmark_mode} = "calib+loop" ]]; then diff --git a/mindspore/lite/test/st/scripts/nnie/run_converter_nnie.sh b/mindspore/lite/test/st/scripts/nnie/run_converter_nnie.sh index 85659162783..96b47a91080 100755 --- a/mindspore/lite/test/st/scripts/nnie/run_converter_nnie.sh +++ b/mindspore/lite/test/st/scripts/nnie/run_converter_nnie.sh @@ -64,8 +64,8 @@ function Run_Hi3516() { # cp files to nfs shared folder echo "start push files to hi3516" echo ${device_ip} - sshpass -p "mindspore@123" scp ${benchmark_test_path}/* root@${device_ip}:/user/nnie/benchmark_test/ || exit 1 - sshpass -p "mindspore@123" ssh root@${device_ip} "cd /user/nnie/benchmark_test; sh run_benchmark_nnie.sh" + scp ${benchmark_test_path}/* root@${device_ip}:/user/nnie/benchmark_test/ || exit 1 + ssh root@${device_ip} "cd /user/nnie/benchmark_test; sh run_benchmark_nnie.sh" if [ $? = 0 ]; then run_result='hi3516: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}; else diff --git a/mindspore/lite/test/st/scripts/run_benchmark_arm64.sh b/mindspore/lite/test/st/scripts/run_benchmark_arm64.sh index e020b15f03c..e1b807887c0 100644 --- a/mindspore/lite/test/st/scripts/run_benchmark_arm64.sh +++ b/mindspore/lite/test/st/scripts/run_benchmark_arm64.sh @@ -15,9 +15,9 @@ function Run_Converter() { mkdir -p ${ms_models_path} # Prepare the config file list local fp32_cfg_file_list=("$models_tf_config" "$models_tflite_config" "$models_caffe_config" "$models_onnx_config" "$models_mindspore_config" \ - "$models_mindspore_train_config" "$models_tflite_posttraining_config" "$models_caffe_posttraining_config" \ + "$models_mindspore_train_config" "$models_posttraining_config" "$models_process_only_fp16_config" \ "$models_tflite_awaretraining_config" "$models_weightquant_config" "$models_weightquant_7bit_config" \ - "$models_weightquant_9bit_config" "$models_for_process_only_config") + "$models_weightquant_9bit_config" "$models_process_only_config") local fp16_cfg_file_list=("$models_onnx_fp16_config" "$models_caffe_fp16_config" "$models_tflite_fp16_config" "$models_tf_fp16_config") # Convert models: @@ -34,9 +34,9 @@ function Run_Converter() { function Run_arm64() { # Prepare the config file list local arm64_cfg_file_list=("$models_tf_config" "$models_tflite_config" "$models_caffe_config" "$models_onnx_config" "$models_mindspore_config" \ - "$models_mindspore_train_config" "$models_tflite_posttraining_config" "$models_caffe_posttraining_config" \ + "$models_mindspore_train_config" "$models_posttraining_config" "$models_compatibility_config" \ "$models_tflite_awaretraining_config" "$models_weightquant_config" "$models_weightquant_7bit_config" \ - "$models_weightquant_9bit_config" "$models_for_process_only_config" "$models_compatibility_config") + "$models_weightquant_9bit_config" "$models_process_only_config" "$models_process_only_fp16_config") # Run converted models: # $1:cfgFileList; $2:modelPath; $3:dataPath; $4:logFile; $5:resultFile; $6:platform; $7:processor; $8:phoneId; Run_Benchmark "${arm64_cfg_file_list[*]}" . '/data/local/tmp' $run_arm64_fp32_log_file $run_benchmark_result_file 'arm64' 'CPU' $device_id @@ -44,7 +44,8 @@ function Run_arm64() { # Run on arm64-fp16 platform: function Run_arm64_fp16() { - local arm64_cfg_file_list=("$models_onnx_fp16_config" "$models_caffe_fp16_config" "$models_tflite_fp16_config" "$models_tf_fp16_config") + local arm64_cfg_file_list=("$models_onnx_fp16_config" "$models_caffe_fp16_config" "$models_tflite_fp16_config" "$models_tf_fp16_config" \ + "$models_process_only_fp16_config") # $1:cfgFileList; $2:modelPath; $3:dataPath; $4:logFile; $5:resultFile; $6:platform; $7:processor; $8:phoneId; Run_Benchmark "${arm64_cfg_file_list[*]}" . '/data/local/tmp' $run_arm64_fp16_log_file $run_benchmark_result_file 'arm64' 'CPU' $device_id } @@ -90,8 +91,7 @@ models_tflite_config=${basepath}/../config/models_tflite.cfg models_tf_config=${basepath}/../config/models_tf.cfg models_caffe_config=${basepath}/../config/models_caffe.cfg models_tflite_awaretraining_config=${basepath}/../config/models_tflite_awaretraining.cfg -models_tflite_posttraining_config=${basepath}/../config/models_tflite_posttraining.cfg -models_caffe_posttraining_config=${basepath}/../config/models_caffe_posttraining.cfg +models_posttraining_config=${basepath}/../config/models_posttraining.cfg models_onnx_config=${basepath}/../config/models_onnx.cfg models_onnx_fp16_config=${basepath}/../config/models_onnx_fp16.cfg models_caffe_fp16_config=${basepath}/../config/models_caffe_fp16.cfg @@ -103,7 +103,8 @@ models_weightquant_7bit_config=${basepath}/../config/models_weightquant_7bit.cfg models_weightquant_9bit_config=${basepath}/../config/models_weightquant_9bit.cfg models_weightquant_config=${basepath}/../config/models_weightquant.cfg models_compatibility_config=${basepath}/../config/models_compatibility.cfg -models_for_process_only_config=${basepath}/../config/models_for_process_only.cfg +models_process_only_config=${basepath}/../config/models_process_only.cfg +models_process_only_fp16_config=${basepath}/../config/models_process_only_fp16.cfg ms_models_path=${basepath}/ms_models diff --git a/mindspore/lite/test/st/scripts/run_benchmark_x86.sh b/mindspore/lite/test/st/scripts/run_benchmark_x86.sh index e1e137e8b1e..dd61c255e70 100644 --- a/mindspore/lite/test/st/scripts/run_benchmark_x86.sh +++ b/mindspore/lite/test/st/scripts/run_benchmark_x86.sh @@ -51,9 +51,9 @@ function Run_Converter() { # Prepare the config file list local x86_cfg_file_list=("$models_tf_config" "$models_tflite_config" "$models_caffe_config" "$models_onnx_config" "$models_mindspore_config" \ - "$models_mindspore_train_config" "$models_tflite_posttraining_config" "$models_caffe_posttraining_config" \ + "$models_mindspore_train_config" "$models_posttraining_config" "$models_process_only_config" \ "$models_tflite_awaretraining_config" "$models_weightquant_config" "$models_weightquant_7bit_config" \ - "$models_weightquant_9bit_config" "$models_for_process_only_config") + "$models_weightquant_9bit_config") # Convert models: # $1:cfgFileList; $2:inModelPath; $3:outModelPath; $4:logFile; $5:resultFile; Convert "${x86_cfg_file_list[*]}" $models_path $ms_models_path $run_converter_log_file $run_converter_result_file @@ -102,9 +102,9 @@ function Run_x86() { # Prepare the config file list local x86_cfg_file_list=("$models_tf_config" "$models_tflite_config" "$models_caffe_config" "$models_onnx_config" "$models_mindspore_config" \ - "$models_mindspore_train_config" "$models_tflite_posttraining_config" "$models_caffe_posttraining_config" \ + "$models_mindspore_train_config" "$models_posttraining_config" "$models_process_only_fp16_config" \ "$models_tflite_awaretraining_config" "$models_weightquant_config" "$models_weightquant_7bit_config" \ - "$models_weightquant_9bit_config" "$models_for_process_only_config") + "$models_weightquant_9bit_config" "$models_process_only_config") # Run converted models: # $1:cfgFileList; $2:modelPath; $3:dataPath; $4:logFile; $5:resultFile; $6:platform; $7:processor; $8:phoneId; Run_Benchmark "${x86_cfg_file_list[*]}" $ms_models_path $models_path $run_x86_log_file $run_benchmark_result_file 'x86' 'CPU' '' @@ -120,9 +120,9 @@ function Run_x86_sse() { # Prepare the config file list local sse_cfg_file_list=("$models_tf_config" "$models_tflite_config" "$models_caffe_config" "$models_onnx_config" "$models_mindspore_config" \ - "$models_mindspore_train_config" "$models_tflite_posttraining_config" "$models_caffe_posttraining_config" \ + "$models_mindspore_train_config" "$models_posttraining_config" "$models_process_only_fp16_config" \ "$models_tflite_awaretraining_config" "$models_weightquant_config" "$models_weightquant_7bit_config" \ - "$models_weightquant_9bit_config" "$models_for_process_only_config") + "$models_weightquant_9bit_config" "$models_process_only_config") # Run converted models: # $1:cfgFileList; $2:modelPath; $3:dataPath; $4:logFile; $5:resultFile; $6:platform; $7:processor; $8:phoneId; Run_Benchmark "${sse_cfg_file_list[*]}" $ms_models_path $models_path $run_x86_sse_log_file $run_benchmark_result_file 'x86' 'CPU' '' @@ -138,9 +138,9 @@ function Run_x86_avx() { # Prepare the config file list local avx_cfg_file_list=("$models_tf_config" "$models_tflite_config" "$models_caffe_config" "$models_onnx_config" "$models_mindspore_config" \ - "$models_mindspore_train_config" "$models_tflite_posttraining_config" "$models_caffe_posttraining_config" \ + "$models_mindspore_train_config" "$models_posttraining_config" "$models_process_only_fp16_config" \ "$models_tflite_awaretraining_config" "$models_weightquant_config" "$models_weightquant_7bit_config" \ - "$models_weightquant_9bit_config" "$models_for_process_only_config") + "$models_weightquant_9bit_config" "$models_process_only_config") # Run converted models: # $1:cfgFileList; $2:modelPath; $3:dataPath; $4:logFile; $5:resultFile; $6:platform; $7:processor; $8:phoneId; $9:benchmark_mode Run_Benchmark "${avx_cfg_file_list[*]}" $ms_models_path $models_path $run_x86_avx_log_file $run_benchmark_result_file 'x86' 'CPU' '' @@ -219,15 +219,15 @@ models_tflite_config=${basepath}/../config/models_tflite.cfg models_tf_config=${basepath}/../config/models_tf.cfg models_caffe_config=${basepath}/../config/models_caffe.cfg models_tflite_awaretraining_config=${basepath}/../config/models_tflite_awaretraining.cfg -models_tflite_posttraining_config=${basepath}/../config/models_tflite_posttraining.cfg -models_caffe_posttraining_config=${basepath}/../config/models_caffe_posttraining.cfg +models_posttraining_config=${basepath}/../config/models_tflite_posttraining.cfg models_onnx_config=${basepath}/../config/models_onnx.cfg models_mindspore_config=${basepath}/../config/models_mindspore.cfg models_mindspore_train_config=${basepath}/../config/models_mindspore_train.cfg models_weightquant_7bit_config=${basepath}/../config/models_weightquant_7bit.cfg models_weightquant_9bit_config=${basepath}/../config/models_weightquant_9bit.cfg models_weightquant_config=${basepath}/../config/models_weightquant.cfg -models_for_process_only_config=${basepath}/../config/models_for_process_only.cfg +models_process_only_config=${basepath}/../config/models_process_only.cfg +models_process_only_fp16_config=${basepath}/../config/models_process_only_fp16.cfg ms_models_path=${basepath}/ms_models diff --git a/mindspore/lite/test/st/scripts/run_net_train.sh b/mindspore/lite/test/st/scripts/run_net_train.sh index 86121ab8283..cde5ff984d4 100755 --- a/mindspore/lite/test/st/scripts/run_net_train.sh +++ b/mindspore/lite/test/st/scripts/run_net_train.sh @@ -49,8 +49,8 @@ function Run_Converter() { # Convert mindspore train models: while read line; do LFS=" " read -r -a line_array <<< ${line} - parse_line convert local model_prefix=${line_array[0]}_train + parse_line convert if [[ "$?" == "1" ]]; then continue; fi if [[ $model_name == \#* ]]; then continue @@ -93,6 +93,23 @@ function Run_Converter() { return ${fail} } +function should_run_example() { + ret=0 + while read line; do + LFS=" " read -r -a line_array <<< ${line} + model_name=${line_array[0]} + if [[ $model_name == \#* ]]; then + continue + fi + if [[ $model_name == "$1" ]]; then + if [[ ${line_array[1]} == "code_example" ]]; then + ret=1 + fi + fi + done < ${models_ms_train_config} + return $ret +} + function parse_line() { i=1 loss_name= @@ -138,6 +155,9 @@ function parse_line() { fi check_convert=1 ;; + "code_example") + ret=1 + ;; *) check=`echo "${line_array[i]}" | grep -E '^\-?[0-9]*\.?[0-9]+$'` if [ "${check}" != "" ] ; then @@ -208,9 +228,9 @@ function Run_x86() { --virtualBatch=${virtual_batch} \ --lossName=${loss_name} >> "${run_x86_log_file}" if [ $? = 0 ]; then - run_result='x86_'${log_suffix}': '${model_name}''${suffix_print}' pass'; echo ${run_result} >> ${run_benchmark_train_result_file} + run_result='x86'${log_suffix}': '${model_name}''${suffix_print}' pass'; echo ${run_result} >> ${run_benchmark_train_result_file} else - run_result='x86_'${log_suffix}': '${model_name}''${suffix_print}' failed'; echo ${run_result} >> ${run_benchmark_train_result_file} + run_result='x86'${log_suffix}': '${model_name}''${suffix_print}' failed'; echo ${run_result} >> ${run_benchmark_train_result_file} fail=1 fi done < ${models_ms_train_config} @@ -351,7 +371,6 @@ ENDM } function Run_CodeExamples() { - ls ${basepath}/../../ fail=0 target="x86" tarball_path=${x86_path}/mindspore-lite-${version}-linux-x64.tar.gz @@ -360,10 +379,13 @@ function Run_CodeExamples() { tarball_path=${arm64_path}/mindspore-lite-${version_arm64}-android-aarch64.tar.gz export ANDROID_SERIAL=${device_id} fi + should_run_example "train_lenet_java" + should_run=$? + export PATH=${x86_path}/mindspore-lite-${version}-linux-x64/tools/converter/converter/:$PATH export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${x86_path}/mindspore-lite-${version}-linux-x64/tools/converter/lib/:${x86_path}/mindspore-lite-${version}-linux-x64/tools/converter/third_party/glog/lib - if [[ $backend == "all" || $backend == "x86-all" || $backend == "x86_train" || $backend == "x86-java" ]]; then + if [[ "$should_run" == "1" && ($backend == "all" || $backend == "x86-all" || $backend == "x86_train" || $backend == "x86-java") ]]; then cd ${basepath}/../../examples/train_lenet_java || exit 1 chmod 777 ./prepare_and_run.sh ./prepare_and_run.sh -D ${datasets_path}/mnist -r ${tarball_path} -m ${models_path}/code_example.mindir >> ${run_code_examples_log_file} @@ -378,37 +400,46 @@ function Run_CodeExamples() { fi if [[ $backend == "all" || $backend == "train" || $backend == "x86_train" || $backend == "codegen&train" || $backend == "arm64_train" ]]; then - cd ${basepath}/../../examples/unified_api || exit 1 - chmod 777 ./prepare_and_run.sh - chmod 777 ./*/*.sh - ./prepare_and_run.sh -D ${datasets_path}/mnist -r ${tarball_path} -t ${target} -m ${models_path}/code_example.mindir -e 1 >> ${run_code_examples_log_file} - accurate=$(tail -20 ${run_code_examples_log_file} | awk 'NF==3 && /Accuracy is/ { sum += $3} END { print (sum > 1.6) }') - if [ $accurate -eq 1 ]; then - echo "Unified API Trained and reached accuracy" >> ${run_code_examples_log_file} - echo 'code_examples: unified_api pass' >> ${run_benchmark_train_result_file} - else - echo "Unified API demo failure" >> ${run_code_examples_log_file} - echo 'code_examples: unified_api failed' >> ${run_benchmark_train_result_file} - fail=1 - fi - rm -rf package*/dataset - cd - - cd ${basepath}/../../examples/train_lenet || exit 1 - chmod 777 ./prepare_and_run.sh - chmod 777 ./*/*.sh - ./prepare_and_run.sh -D ${datasets_path}/mnist -r ${tarball_path} -t ${target} -m ${models_path}/code_example.mindir -e 1 >> ${run_code_examples_log_file} - accurate=$(tail -10 ${run_code_examples_log_file} | awk 'NF==3 && /Accuracy is/ { sum += $3} END { print (sum > 1.6) }') - if [ $accurate -eq 1 ]; then - echo "Lenet Trained and reached accuracy" >> ${run_code_examples_log_file} - echo 'code_examples: train_lenet pass' >> ${run_benchmark_train_result_file} - else - echo "Train Lenet demo failure" >> ${run_code_examples_log_file} - echo 'code_examples: train_lenet failed' >> ${run_benchmark_train_result_file} - fail=1 + should_run_example "unified_api" + should_run=$? + if [[ "$should_run" == "1" ]]; then + cd ${basepath}/../../examples/unified_api || exit 1 + chmod 777 ./prepare_and_run.sh + chmod 777 ./*/*.sh + ./prepare_and_run.sh -D ${datasets_path}/mnist -r ${tarball_path} -t ${target} -m ${models_path}/code_example.mindir -e 1 >> ${run_code_examples_log_file} + accurate=$(tail -20 ${run_code_examples_log_file} | awk 'NF==3 && /Accuracy is/ { sum += $3} END { print (sum > 1.6) }') + if [ $accurate -eq 1 ]; then + echo "Unified API Trained and reached accuracy" >> ${run_code_examples_log_file} + echo 'code_examples: unified_api pass' >> ${run_benchmark_train_result_file} + else + echo "Unified API demo failure" >> ${run_code_examples_log_file} + echo 'code_examples: unified_api failed' >> ${run_benchmark_train_result_file} + fail=1 + fi + rm -rf package*/dataset + cd - + fi + + should_run_example "train_lenet" + should_run=$? + if [[ "$should_run" == "1" ]]; then + cd ${basepath}/../../examples/train_lenet || exit 1 + chmod 777 ./prepare_and_run.sh + chmod 777 ./*/*.sh + ./prepare_and_run.sh -D ${datasets_path}/mnist -r ${tarball_path} -t ${target} -m ${models_path}/code_example.mindir -e 1 >> ${run_code_examples_log_file} + accurate=$(tail -10 ${run_code_examples_log_file} | awk 'NF==3 && /Accuracy is/ { sum += $3} END { print (sum > 1.6) }') + if [ $accurate -eq 1 ]; then + echo "Lenet Trained and reached accuracy" >> ${run_code_examples_log_file} + echo 'code_examples: train_lenet pass' >> ${run_benchmark_train_result_file} + else + echo "Train Lenet demo failure" >> ${run_code_examples_log_file} + echo 'code_examples: train_lenet failed' >> ${run_benchmark_train_result_file} + fail=1 + fi + rm -rf package*/dataset + cd - fi - rm -rf package*/dataset - cd - fi return ${fail} } @@ -596,7 +627,7 @@ echo "Push files to benchmark_train_test folder and run benchmark_train" benchmark_train_test_path=${basepath}/benchmark_train_test rm -rf ${benchmark_train_test_path} mkdir -p ${benchmark_train_test_path} -cp -a ${ms_models_path}/*.ms ${benchmark_train_test_path} || exit 1 +cp -a ${ms_models_path}/*.ms ${benchmark_train_test_path} isFailed=0 if [[ $backend == "all" || $backend == "train" || $backend == "x86_train" || $backend == "codegen&train" ]]; then diff --git a/mindspore/lite/test/ut/nnacl/infer/custom_extract_features_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/custom_extract_features_infer_test.cc index 3062d4f59d6..cd1ded3f5fd 100644 --- a/mindspore/lite/test/ut/nnacl/infer/custom_extract_features_infer_test.cc +++ b/mindspore/lite/test/ut/nnacl/infer/custom_extract_features_infer_test.cc @@ -14,7 +14,7 @@ * limitations under the License. */ #include "common/common_test.h" -#include "nnacl/infer/custom_extract_features_infer.h" +#include "nnacl/infer/string/custom_extract_features_infer.h" namespace mindspore { diff --git a/mindspore/lite/test/ut/nnacl/infer/custom_normalize_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/custom_normalize_infer_test.cc index 9b932f28492..1c84fdd7215 100644 --- a/mindspore/lite/test/ut/nnacl/infer/custom_normalize_infer_test.cc +++ b/mindspore/lite/test/ut/nnacl/infer/custom_normalize_infer_test.cc @@ -14,7 +14,7 @@ * limitations under the License. */ #include "common/common_test.h" -#include "nnacl/infer/custom_normalize_infer.h" +#include "nnacl/infer/string/custom_normalize_infer.h" namespace mindspore { diff --git a/mindspore/lite/test/ut/nnacl/infer/custom_predict_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/custom_predict_infer_test.cc index 62cf10fa8aa..b908aa7a344 100644 --- a/mindspore/lite/test/ut/nnacl/infer/custom_predict_infer_test.cc +++ b/mindspore/lite/test/ut/nnacl/infer/custom_predict_infer_test.cc @@ -14,7 +14,7 @@ * limitations under the License. */ #include "common/common_test.h" -#include "nnacl/infer/custom_predict_infer.h" +#include "nnacl/infer/string/custom_predict_infer.h" namespace mindspore { diff --git a/mindspore/lite/test/ut/nnacl/infer/hashtable_lookup_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/hashtable_lookup_infer_test.cc index b6dbf4b6085..4768bedf7e4 100644 --- a/mindspore/lite/test/ut/nnacl/infer/hashtable_lookup_infer_test.cc +++ b/mindspore/lite/test/ut/nnacl/infer/hashtable_lookup_infer_test.cc @@ -14,7 +14,7 @@ * limitations under the License. */ #include "common/common_test.h" -#include "nnacl/infer/hashtable_lookup_infer.h" +#include "nnacl/infer/string/hashtable_lookup_infer.h" namespace mindspore { diff --git a/mindspore/lite/test/ut/nnacl/infer/lsh_projection_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/lsh_projection_infer_test.cc index 9b27f538cbd..33717760b18 100644 --- a/mindspore/lite/test/ut/nnacl/infer/lsh_projection_infer_test.cc +++ b/mindspore/lite/test/ut/nnacl/infer/lsh_projection_infer_test.cc @@ -14,7 +14,7 @@ * limitations under the License. */ #include "common/common_test.h" -#include "nnacl/infer/lsh_projection_infer.h" +#include "nnacl/infer/string/lsh_projection_infer.h" namespace mindspore { diff --git a/mindspore/lite/test/ut/nnacl/infer/skip_gram_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/skip_gram_infer_test.cc index 469b0934498..ef7adebb898 100644 --- a/mindspore/lite/test/ut/nnacl/infer/skip_gram_infer_test.cc +++ b/mindspore/lite/test/ut/nnacl/infer/skip_gram_infer_test.cc @@ -14,7 +14,7 @@ * limitations under the License. */ #include "common/common_test.h" -#include "nnacl/infer/skip_gram_infer.h" +#include "nnacl/infer/string/skip_gram_infer.h" namespace mindspore { diff --git a/mindspore/lite/test/ut/nnacl/infer/tensorlist_fromtensor_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/tensorlist_fromtensor_infer_test.cc index eeefae7073a..6c03371fe05 100644 --- a/mindspore/lite/test/ut/nnacl/infer/tensorlist_fromtensor_infer_test.cc +++ b/mindspore/lite/test/ut/nnacl/infer/tensorlist_fromtensor_infer_test.cc @@ -15,7 +15,7 @@ */ #include "common/common_test.h" #include "src/common/tensor_util.h" -#include "nnacl/infer/tensorlist_fromtensor_infer.h" +#include "nnacl/infer/control/tensorlist_fromtensor_infer.h" namespace mindspore { diff --git a/mindspore/lite/test/ut/nnacl/infer/tensorlist_getitem_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/tensorlist_getitem_infer_test.cc index d92851cd325..05872d6b741 100644 --- a/mindspore/lite/test/ut/nnacl/infer/tensorlist_getitem_infer_test.cc +++ b/mindspore/lite/test/ut/nnacl/infer/tensorlist_getitem_infer_test.cc @@ -15,7 +15,7 @@ */ #include "common/common_test.h" #include "src/common/tensor_util.h" -#include "nnacl/infer/tensorlist_getitem_infer.h" +#include "nnacl/infer/control/tensorlist_getitem_infer.h" namespace mindspore { diff --git a/mindspore/lite/test/ut/nnacl/infer/tensorlist_reserve_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/tensorlist_reserve_infer_test.cc index 37f93257529..a8c877b72f6 100644 --- a/mindspore/lite/test/ut/nnacl/infer/tensorlist_reserve_infer_test.cc +++ b/mindspore/lite/test/ut/nnacl/infer/tensorlist_reserve_infer_test.cc @@ -14,7 +14,7 @@ * limitations under the License. */ #include "common/common_test.h" -#include "nnacl/infer/tensorlist_reserve_infer.h" +#include "nnacl/infer/control/tensorlist_reserve_infer.h" namespace mindspore { diff --git a/mindspore/lite/test/ut/nnacl/infer/tensorlist_setitem_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/tensorlist_setitem_infer_test.cc index 5626e5b9719..9c43909aef9 100644 --- a/mindspore/lite/test/ut/nnacl/infer/tensorlist_setitem_infer_test.cc +++ b/mindspore/lite/test/ut/nnacl/infer/tensorlist_setitem_infer_test.cc @@ -14,7 +14,7 @@ * limitations under the License. */ #include "common/common_test.h" -#include "nnacl/infer/tensorlist_setitem_infer.h" +#include "nnacl/infer/control/tensorlist_setitem_infer.h" namespace mindspore { diff --git a/mindspore/lite/test/ut/nnacl/infer/tensorlist_stack_infer_test.cc b/mindspore/lite/test/ut/nnacl/infer/tensorlist_stack_infer_test.cc index bf020b5e5d1..e7e4a27b30b 100644 --- a/mindspore/lite/test/ut/nnacl/infer/tensorlist_stack_infer_test.cc +++ b/mindspore/lite/test/ut/nnacl/infer/tensorlist_stack_infer_test.cc @@ -14,7 +14,7 @@ * limitations under the License. */ #include "common/common_test.h" -#include "nnacl/infer/tensorlist_stack_infer.h" +#include "nnacl/infer/control/tensorlist_stack_infer.h" namespace mindspore { diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/skip_gram_fp32.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/skip_gram_fp32.cc index 200b9f49f19..7bbc852b1b3 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/skip_gram_fp32.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/fp32/skip_gram_fp32.cc @@ -15,7 +15,7 @@ */ #include -#include "src/runtime/kernel/arm/fp32/skip_gram_fp32.h" +#include "src/runtime/kernel/arm/string/skip_gram.h" #include "nnacl/skip_gram_parameter.h" #include "src/common/file_utils.h" #include "common/common_test.h" diff --git a/mindspore/lite/test/ut/src/runtime/kernel/arm/string/normalize.cc b/mindspore/lite/test/ut/src/runtime/kernel/arm/string/normalize.cc index 1c86a856853..202ae5a2f87 100644 --- a/mindspore/lite/test/ut/src/runtime/kernel/arm/string/normalize.cc +++ b/mindspore/lite/test/ut/src/runtime/kernel/arm/string/normalize.cc @@ -15,7 +15,7 @@ */ #include -#include "src/runtime/kernel/arm/fp32/skip_gram_fp32.h" +#include "src/runtime/kernel/arm/string/skip_gram.h" #include "src/runtime/kernel/arm/string/normalize.h" #include "mindspore/lite/src/kernel_registry.h" #include "nnacl/skip_gram_parameter.h" diff --git a/mindspore/lite/test/ut/src/runtime/runtime_pass_tests.cc b/mindspore/lite/test/ut/src/runtime/runtime_pass_tests.cc index 0f05beb6208..ab961bb8b87 100644 --- a/mindspore/lite/test/ut/src/runtime/runtime_pass_tests.cc +++ b/mindspore/lite/test/ut/src/runtime/runtime_pass_tests.cc @@ -59,52 +59,23 @@ void Nc4hw4PassConstruct(std::vector *kernels, std::vector transpose_param, &transpose_kernel, nullptr); kernels->push_back(transpose_kernel); - lite::Tensor *in_param_tensor = new lite::Tensor(); - tensors->push_back(in_param_tensor); - lite::Tensor *in_out_tensor = new lite::Tensor(); - tensors->push_back(in_out_tensor); - OpParameter *in_param = new OpParameter(); - kernel::KernelKey in_desc{kernel::kCPU, kNumberTypeFloat32, schema::PrimitiveType_InstanceNorm}; - kernel::LiteKernel *in_kernel = nullptr; - std::vector in_in = {transpose_out_tensor, in_param_tensor}; - std::vector in_out = {in_out_tensor}; - lite::KernelRegistry::GetInstance()->GetKernel(in_in, in_out, ctx, nullptr, in_desc, in_param, &in_kernel, nullptr); - kernels->push_back(in_kernel); - - lite::Tensor *transpose2_param_tensor = new lite::Tensor(); - tensors->push_back(transpose_param_tensor); - lite::Tensor *transpose2_out_tensor = new lite::Tensor(); - tensors->push_back(transpose_param_tensor); - OpParameter *transpose2_param = new OpParameter(); - kernel::KernelKey transpose2_desc{kernel::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Transpose}; - kernel::LiteKernel *transpose2_kernel = nullptr; - std::vector transpose2_in = {in_out_tensor, transpose2_param_tensor}; - std::vector transpose2_out = {transpose2_out_tensor}; - lite::KernelRegistry::GetInstance()->GetKernel(transpose2_in, transpose2_out, ctx, nullptr, transpose2_desc, - transpose2_param, &transpose2_kernel, nullptr); - kernels->push_back(transpose2_kernel); - - lite::Tensor *conv2_weight = new lite::Tensor(); - tensors->push_back(conv2_weight); - lite::Tensor *conv2_out_tensor = new lite::Tensor(); - tensors->push_back(conv2_out_tensor); - std::vector conv2_in = {transpose2_out_tensor, conv_weight}; - std::vector conv2_out = {conv2_out_tensor}; - OpParameter *conv2_param = new OpParameter(); - kernel::KernelKey conv2_desc{kernel::kCPU, kNumberTypeFloat32, schema::PrimitiveType_Conv2DFusion}; - kernel::LiteKernel *conv2_kernel = nullptr; - lite::KernelRegistry::GetInstance()->GetKernel(conv2_in, conv2_out, ctx, nullptr, conv2_desc, conv2_param, - &conv2_kernel, nullptr); - kernels->push_back(conv2_kernel); + lite::Tensor *pad_param_tensor = new lite::Tensor(); + tensors->push_back(pad_param_tensor); + lite::Tensor *pad_out_tensor = new lite::Tensor(); + tensors->push_back(pad_out_tensor); + OpParameter *pad_param = new OpParameter(); + kernel::KernelKey pad_desc{kernel::kCPU, kNumberTypeFloat32, schema::PrimitiveType_PadFusion}; + kernel::LiteKernel *pad_kernel = nullptr; + std::vector pad_in = {transpose_out_tensor, pad_param_tensor}; + std::vector pad_out = {pad_out_tensor}; + lite::KernelRegistry::GetInstance()->GetKernel(pad_in, pad_out, ctx, nullptr, pad_desc, pad_param, &pad_kernel, + nullptr); + kernels->push_back(pad_kernel); conv_kernel->set_out_kernels({transpose_kernel}); transpose_kernel->set_in_kernels({conv_kernel}); - transpose_kernel->set_out_kernels({in_kernel}); - in_kernel->set_in_kernels({transpose_kernel}); - in_kernel->set_out_kernels({transpose2_kernel}); - transpose2_kernel->set_in_kernels({in_kernel}); - transpose2_kernel->set_out_kernels({conv2_kernel}); - conv2_kernel->set_in_kernels({transpose2_kernel}); + transpose_kernel->set_out_kernels({pad_kernel}); + pad_kernel->set_in_kernels({transpose_kernel}); return; } @@ -114,12 +85,11 @@ TEST_F(RuntimePass, Nc4hw4Pass1) { std::vector tensors; Nc4hw4PassConstruct(&kernels, &tensors, ctx.get()); - ASSERT_EQ(kernels.size(), 5); - /* runtime pass */ lite::Nc4hw4PassReplace(&kernels, &tensors, 0); - ASSERT_EQ(kernels.size(), 3); + ASSERT_EQ(kernels.size(), 2); + ASSERT_EQ(tensors.size(), 5); for (auto tensor : tensors) { delete tensor; diff --git a/mindspore/lite/tools/benchmark/benchmark_base.cc b/mindspore/lite/tools/benchmark/benchmark_base.cc index 77c5004bc35..abb7f1deada 100644 --- a/mindspore/lite/tools/benchmark/benchmark_base.cc +++ b/mindspore/lite/tools/benchmark/benchmark_base.cc @@ -208,10 +208,10 @@ void BenchmarkFlags::InitInputDataList() { void BenchmarkFlags::InitResizeDimsList() { std::string content = this->resize_dims_in_; std::vector shape; - auto shape_strs = StringSplit(content, std::string(DELIM_COLON)); + auto shape_strs = StrSplit(content, std::string(DELIM_COLON)); for (const auto &shape_str : shape_strs) { shape.clear(); - auto dim_strs = StringSplit(shape_str, std::string(DELIM_COMMA)); + auto dim_strs = StrSplit(shape_str, std::string(DELIM_COMMA)); std::cout << "Resize Dims: "; for (const auto &dim_str : dim_strs) { std::cout << dim_str << " "; diff --git a/mindspore/lite/tools/benchmark_train/net_train.cc b/mindspore/lite/tools/benchmark_train/net_train.cc index c06e39505bd..76164f076a3 100644 --- a/mindspore/lite/tools/benchmark_train/net_train.cc +++ b/mindspore/lite/tools/benchmark_train/net_train.cc @@ -603,7 +603,7 @@ int NetTrain::InitCallbackParameter() { } op_call_times_total_++; op_begin_ = GetTimeUs(); - if ((callParam.node_type == "Adam") || (callParam.node_type == "Assign")) { + if ((callParam.node_type == "Adam") || (callParam.node_type == "Assign") || callParam.node_type == "SGD") { for (auto tensor : before_outputs) { std::fill(reinterpret_cast(tensor->MutableData()), reinterpret_cast(tensor->MutableData()) + tensor->Size(), 0); @@ -646,10 +646,10 @@ int NetTrain::InitCallbackParameter() { void NetTrainFlags::InitResizeDimsList() { std::string content = this->resize_dims_in_; std::vector shape; - auto shape_strs = StringSplit(content, std::string(DELIM_COLON)); + auto shape_strs = StrSplit(content, std::string(DELIM_COLON)); for (const auto &shape_str : shape_strs) { shape.clear(); - auto dim_strs = StringSplit(shape_str, std::string(DELIM_COMMA)); + auto dim_strs = StrSplit(shape_str, std::string(DELIM_COMMA)); std::cout << "Resize Dims: "; for (const auto &dim_str : dim_strs) { std::cout << dim_str << " "; diff --git a/mindspore/lite/tools/common/flag_parser.cc b/mindspore/lite/tools/common/flag_parser.cc index 1c4ed26b791..58fc4d139a3 100644 --- a/mindspore/lite/tools/common/flag_parser.cc +++ b/mindspore/lite/tools/common/flag_parser.cc @@ -24,6 +24,10 @@ Option FlagParser::ParseFlags(int argc, const char *const *argv, bo bool supportDuplicate) { MS_ASSERT(argv != nullptr); const int FLAG_PREFIX_LEN = 2; + if (argc <= 0) { + MS_LOG(ERROR) << "The arguments number is out of range"; + return Option("Failed: flags is not valid"); + } binName = GetFileName(argv[0]); std::multimap> keyValues; diff --git a/mindspore/lite/tools/common/flag_parser.h b/mindspore/lite/tools/common/flag_parser.h index 7a69333ee5f..26d881b3257 100644 --- a/mindspore/lite/tools/common/flag_parser.h +++ b/mindspore/lite/tools/common/flag_parser.h @@ -280,8 +280,11 @@ void FlagParser::AddFlag(Option Flags::*t, const std::string &flagName, const ConstructFlag(t, flagName, helpInfo, &flagItem); flagItem.isRequired = false; flagItem.parse = [t](FlagParser *base, const std::string &value) -> Option { + if (base == nullptr) { + return Option(Nothing()); + } auto *flag = dynamic_cast(base); - if (base != nullptr) { + if (flag != nullptr) { Option ret = Option(GenericParseValue(value)); if (ret.IsNone()) { return Option(None()); diff --git a/mindspore/lite/tools/common/func_graph_subgraph.cc b/mindspore/lite/tools/common/func_graph_subgraph.cc index a507353a63b..79d900fa277 100644 --- a/mindspore/lite/tools/common/func_graph_subgraph.cc +++ b/mindspore/lite/tools/common/func_graph_subgraph.cc @@ -482,9 +482,7 @@ void SubGraph::CreateCNodeForPartialSubGraph( // move cnode from belong_graph to subgraph for (auto &node : this->GetNodes()) { sub_graph->AddNode(node); - if (!utils::isa(node)) { - node->set_func_graph(sub_graph); - } + node->set_func_graph(sub_graph); for (size_t i = 0; i < node->inputs().size(); i++) { if (node == nullptr || node->inputs().at(i)) { continue; diff --git a/mindspore/lite/tools/common/graph_util.cc b/mindspore/lite/tools/common/graph_util.cc index 9e9c1ba552c..2e6407a63cf 100644 --- a/mindspore/lite/tools/common/graph_util.cc +++ b/mindspore/lite/tools/common/graph_util.cc @@ -26,6 +26,7 @@ #include "tools/common/node_util.h" #include "src/common/log_adapter.h" #include "src/common/utils.h" +#include "tools/converter/ops/ops_def.h" namespace mindspore { namespace lite { @@ -33,6 +34,29 @@ namespace { enum QuantBitNum { QuantBitNum_INT8 = 8, QuantBitNum_INT16 = 16 }; const int kZeroPointGap = 128; } // namespace +int SetFuncGraphOutput(const FuncGraphPtr &graph, const std::vector &outputs) { + if (graph == nullptr || outputs.empty()) { + MS_LOG(DEBUG) << "Input graph is nullptr or outputs is empty"; + return RET_INPUT_PARAM_INVALID; + } + if (outputs.size() == 1) { + graph->set_output(outputs.front(), false); + return RET_OK; + } + auto make_tuple_prim_ptr = std::make_shared(); + if (make_tuple_prim_ptr == nullptr) { + MS_LOG(DEBUG) << "new MakeTuple failed"; + return lite::RET_NULL_PTR; + } + auto make_tuple_cnode = graph->NewCNode(make_tuple_prim_ptr, outputs); + if (make_tuple_prim_ptr == nullptr) { + MS_LOG(DEBUG) << "new cnode failed"; + return lite::RET_NULL_PTR; + } + make_tuple_cnode->set_fullname_with_scope("return tuple"); + graph->set_output(make_tuple_cnode, false); + return RET_OK; +} OpDefCopyer GetSimpleOpCopyer() { return [](CNodeT *inCNode) -> std::unique_ptr { diff --git a/mindspore/lite/tools/common/graph_util.h b/mindspore/lite/tools/common/graph_util.h index 1fc3f60dbf0..720b9111085 100644 --- a/mindspore/lite/tools/common/graph_util.h +++ b/mindspore/lite/tools/common/graph_util.h @@ -46,6 +46,8 @@ using OpDefCopyer = std::function(schema::CNodeT OpDefCopyer GetSimpleOpCopyer(); +int SetFuncGraphOutput(const FuncGraphPtr &graph, const std::vector &outputs); + std::vector GetInputNodeIdx(const schema::MetaGraphT &graphT, const size_t &nodeIdx, int inputIndexIdx = -1); std::vector GetInputNodeIdx(const schema::MetaGraphT &graphT, const schema::CNodeT &node, diff --git a/mindspore/lite/tools/common/node_util.cc b/mindspore/lite/tools/common/node_util.cc index 57ec131fd7d..65d6a8659e9 100644 --- a/mindspore/lite/tools/common/node_util.cc +++ b/mindspore/lite/tools/common/node_util.cc @@ -28,147 +28,19 @@ namespace mindspore { namespace lite { constexpr size_t kInitialSize = 1024; - -static const std::vector nhwcOpList = {schema::PrimitiveType_Conv2DBackpropFilterFusion, - schema::PrimitiveType_Conv2DBackpropInputFusion, - schema::PrimitiveType_AvgPoolGrad, - schema::PrimitiveType_MaxPoolGrad, - schema::PrimitiveType_BiasAddGrad, - schema::PrimitiveType_BatchNormGrad, - schema::PrimitiveType_ApplyMomentum, - schema::PrimitiveType_SGD, - schema::PrimitiveType_Adam, - schema::PrimitiveType_ResizeGrad, - schema::PrimitiveType_AvgPoolFusion, - schema::PrimitiveType_MaxPoolFusion, - schema::PrimitiveType_Conv2DFusion, - schema::PrimitiveType_Conv2dTransposeFusion, - schema::PrimitiveType_LRN, - schema::PrimitiveType_Resize, - schema::PrimitiveType_BatchNorm, - schema::PrimitiveType_FusedBatchNorm, - schema::PrimitiveType_PReLUFusion, - schema::PrimitiveType_BiasAdd, - schema::PrimitiveType_SpaceToDepth, - schema::PrimitiveType_DepthToSpace, - schema::PrimitiveType_TopKFusion, - schema::PrimitiveType_BatchToSpace, - schema::PrimitiveType_SpaceToBatch, - schema::PrimitiveType_SpaceToBatchND}; - -static const std::vector nchwOpList = {schema::PrimitiveType_InstanceNorm}; - -static const std::vector nhwcOpAllInputList = { - schema::PrimitiveType_AvgPoolGrad, schema::PrimitiveType_MaxPoolGrad, - schema::PrimitiveType_ActivationGrad, schema::PrimitiveType_Conv2DBackpropFilterFusion, - schema::PrimitiveType_BatchNormGrad, schema::PrimitiveType_ResizeGrad}; - -// index {} mean all inputs need insert -static std::unordered_map> extNhwcInsertIndex = { - {schema::PrimitiveType_BatchNormGrad, {0, 1}}, - {schema::PrimitiveType_Conv2DBackpropFilterFusion, {0, 1}}, - {schema::PrimitiveType_ApplyMomentum, {3}}, - {schema::PrimitiveType_SGD, {1}}, - {schema::PrimitiveType_Adam, {9}}}; - -static const std::vector fp32FullOpList = { - schema::PrimitiveType_Concat, schema::PrimitiveType_AddFusion, - schema::PrimitiveType_Floor}; // fp32 ops support C4 and nhwc in fp32 - -static const std::vector int8NeedNhwcOpList = {}; - -static const std::vector int8OpList = {schema::PrimitiveType_Conv2DFusion, - schema::PrimitiveType_Conv2dTransposeFusion, - schema::PrimitiveType_AddFusion, - schema::PrimitiveType_Transpose, - schema::PrimitiveType_AvgPoolFusion, - schema::PrimitiveType_MaxPoolFusion, - schema::PrimitiveType_Concat, - schema::PrimitiveType_Softmax, - schema::PrimitiveType_Reshape, - schema::PrimitiveType_Activation, - schema::PrimitiveType_Resize, - schema::PrimitiveType_FullConnection, - schema::PrimitiveType_ArgMaxFusion, - schema::PrimitiveType_ArgMinFusion, - schema::PrimitiveType_BatchNorm, - schema::PrimitiveType_FusedBatchNorm, - schema::PrimitiveType_BiasAdd, - schema::PrimitiveType_DivFusion, - schema::PrimitiveType_MulFusion, - schema::PrimitiveType_SliceFusion, - schema::PrimitiveType_Split, - schema::PrimitiveType_Squeeze, - schema::PrimitiveType_SubFusion, - schema::PrimitiveType_StridedSlice, - schema::PrimitiveType_TopKFusion, - schema::PrimitiveType_Unsqueeze, - schema::PrimitiveType_MatMul, - schema::PrimitiveType_PadFusion, - schema::PrimitiveType_ScaleFusion, - schema::PrimitiveType_Cast, - schema::PrimitiveType_Shape, - schema::PrimitiveType_ExpandDims, - schema::PrimitiveType_BatchToSpace, - schema::PrimitiveType_BatchToSpaceND, - schema::PrimitiveType_ReduceFusion, - schema::PrimitiveType_Round, - schema::PrimitiveType_Floor, - schema::PrimitiveType_Ceil, - schema::PrimitiveType_Abs, - schema::PrimitiveType_Sin, - schema::PrimitiveType_Cos, - schema::PrimitiveType_Log, - schema::PrimitiveType_Sqrt, - schema::PrimitiveType_Rsqrt, - schema::PrimitiveType_Square, - schema::PrimitiveType_LogicalNot, - schema::PrimitiveType_SpaceToBatch, - schema::PrimitiveType_SpaceToBatchND, - schema::PrimitiveType_DepthToSpace, - schema::PrimitiveType_PowFusion, - schema::PrimitiveType_GatherNd, - schema::PrimitiveType_LeakyRelu, - schema::PrimitiveType_Gather, - schema::PrimitiveType_Equal, - schema::PrimitiveType_NotEqual, - schema::PrimitiveType_LessEqual, - schema::PrimitiveType_Greater, - schema::PrimitiveType_GreaterEqual, - schema::PrimitiveType_Eltwise, - schema::PrimitiveType_DetectionPostProcess, - schema::PrimitiveType_Crop, - schema::PrimitiveType_PriorBox, - schema::PrimitiveType_QuantDTypeCast, - schema::PrimitiveType_LayerNormFusion, - schema::PrimitiveType_L2NormalizeFusion}; - -static const std::vector needInsertOpList = { - schema::PrimitiveType_Eltwise, schema::PrimitiveType_Activation, schema::PrimitiveType_Concat, - schema::PrimitiveType_PowFusion, schema::PrimitiveType_StridedSlice, schema::PrimitiveType_AddFusion, - schema::PrimitiveType_AddN, schema::PrimitiveType_Split, schema::PrimitiveType_SliceFusion, - schema::PrimitiveType_Crop, schema::PrimitiveType_MulFusion, schema::PrimitiveType_Maximum, - schema::PrimitiveType_ActivationGrad}; - -static const std::unordered_map nc2NhAxisMap = {{0, 0}, {1, -1}, {2, 1}, {3, 2}}; - -std::unordered_map GetNc2NhAxisMap() { return nc2NhAxisMap; } - -std::vector GetInsertOpList() { return needInsertOpList; } - -std::vector Getfp32FullOpList() { return fp32FullOpList; } - -std::vector GetNhwcOpList() { return nhwcOpList; } - -std::vector GetNchwOpList() { return nchwOpList; } - -std::unordered_map> GetExtNhwcIndexes() { return extNhwcInsertIndex; } - -std::vector GetNhwcAllInputOpList() { return nhwcOpAllInputList; } - -std::vector GetUint8NhwcOpList() { return int8NeedNhwcOpList; } - -std::vector GetInt8OpList() { return int8OpList; } +std::vector GetInputCNode(const CNodePtr &cnode) { + if (cnode == nullptr) { + return {}; + } + std::vector inputs; + for (const auto &input : cnode->inputs()) { + if (input == nullptr || !utils::isa(input)) { + continue; + } + inputs.emplace_back(utils::cast(input)); + } + return inputs; +} const schema::Primitive *ConvertToPrimitive(schema::PrimitiveT *primitive_t, flatbuffers::FlatBufferBuilder *fbb) { if (primitive_t == nullptr || fbb == nullptr) { @@ -463,6 +335,5 @@ size_t GetCNodeOutputsSize(const std::shared_ptr &anf_node, bool train_ return 1; } } - } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/tools/common/node_util.h b/mindspore/lite/tools/common/node_util.h index 7fcba451927..6a2f1a560ae 100644 --- a/mindspore/lite/tools/common/node_util.h +++ b/mindspore/lite/tools/common/node_util.h @@ -31,6 +31,8 @@ namespace mindspore { namespace lite { +std::vector GetInputCNode(const CNodePtr &cnode); + template int CreateOperator(const std::unique_ptr &primitive, schema::PrimitiveType type) { auto attr = std::make_unique(); diff --git a/mindspore/lite/tools/converter/CMakeLists.txt b/mindspore/lite/tools/converter/CMakeLists.txt index 1d33d5dc863..52e3e50abe3 100644 --- a/mindspore/lite/tools/converter/CMakeLists.txt +++ b/mindspore/lite/tools/converter/CMakeLists.txt @@ -27,6 +27,7 @@ file(GLOB_RECURSE CONVERTER_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../common/tensor_util.cc ${CMAKE_CURRENT_SOURCE_DIR}/../common/string_util.cc ${CMAKE_CURRENT_SOURCE_DIR}/../common/protobuf_utils.cc + ${CMAKE_CURRENT_SOURCE_DIR}/../common/func_graph_subgraph.cc ${CMAKE_CURRENT_SOURCE_DIR}/../common/flag_parser.cc ${CMAKE_CURRENT_SOURCE_DIR}/../common/storage.cc ${CMAKE_CURRENT_SOURCE_DIR}/../../src/ir/primitive_t_value.cc @@ -47,7 +48,6 @@ file(GLOB_RECURSE CONVERTER_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ../optimizer/common/gllo_utils.cc ../optimizer/common/format_utils.cc ../optimizer/common/multiple_pattern_process_pass.cc - ../optimizer/format/conv_weight_format.cc ../optimizer/format/delete_redundant_transpose.cc ../optimizer/format/to_format_base.cc ../optimizer/format/to_nchw_format.cc @@ -113,6 +113,7 @@ file(GLOB_RECURSE CONVERTER_SRC RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ../optimizer/graph/transpose_strategy.cc ../optimizer/graph/reduce_same_act_pass.cc ../optimizer/graph/split_one_pass.cc + ../optimizer/graph/find_const_subgraph_pass.cc ) add_subdirectory(../anf_exporter anf_exporter) @@ -136,11 +137,14 @@ set(LITE_SRC ${SRC_DIR}/common/tensor_util.cc ${SRC_DIR}/runtime/inner_allocator.cc ${SRC_DIR}/runtime/infer_manager.cc + ${SRC_DIR}/runtime/runtime_pass.cc ${SRC_DIR}/inner_context.cc ${SRC_DIR}/tensor.cc ${SRC_DIR}/ms_tensor.cc ${SRC_DIR}/tensorlist.cc ${SRC_DIR}/registry/kernel_interface_registry.cc + ${SRC_DIR}/registry/register_utils.cc + ${SRC_DIR}/registry/register_kernel_impl.cc ${SRC_DIR}/registry/kernel_interface.cc ${SRC_DIR}/kernel_registry.cc ${SRC_DIR}/inner_kernel.cc diff --git a/mindspore/lite/tools/converter/anf_transform.cc b/mindspore/lite/tools/converter/anf_transform.cc index 02c0aef1a7f..65b50868f20 100644 --- a/mindspore/lite/tools/converter/anf_transform.cc +++ b/mindspore/lite/tools/converter/anf_transform.cc @@ -72,7 +72,6 @@ #include "tools/optimizer/format/delete_redundant_transpose.h" #include "tools/optimizer/format/to_nchw_format.h" #include "tools/optimizer/format/to_nhwc_format.h" -#include "tools/optimizer/format/conv_weight_format.h" using std::string; namespace mindspore::lite { @@ -389,8 +388,6 @@ FuncGraphPtr AnfTransform::TransformFuncGraph(const FuncGraphPtr &old_graph, con void AnfTransform::AppendPassToStoreRoom(const converter::Flags *config) { auto fmk = config->fmk; auto is_train = config->trainModel; - opt::PassRegistry("ConvWeightToKHWC", std::make_shared()); - opt::PassRegistry("ConvWeightToKCHW", std::make_shared()); opt::PassRegistry("DecreaseTransposeAlgo", std::make_shared(fmk, is_train)); opt::PassRegistry("DeleteRedundantTranspose", std::make_shared()); opt::PassRegistry("InferShapePass", std::make_shared(fmk, is_train)); diff --git a/mindspore/lite/tools/converter/converter_flags.cc b/mindspore/lite/tools/converter/converter_flags.cc index d97136bdf45..c17fe9a2814 100644 --- a/mindspore/lite/tools/converter/converter_flags.cc +++ b/mindspore/lite/tools/converter/converter_flags.cc @@ -32,6 +32,7 @@ namespace converter { namespace { constexpr int kBase = 10; constexpr int kQuantBitNumInt16 = 16; +constexpr int kPathLengthUpperLimit = 1024; } // namespace Flags::Flags() { AddFlag(&Flags::fmkIn, "fmk", "Input model framework type. TF | TFLITE | CAFFE | MINDIR | ONNX", ""); @@ -211,10 +212,10 @@ int Flags::InitTrainModel() { int Flags::InitInTensorShape() { std::string content = this->inTensorShape; std::vector shape; - auto shape_strs = StringSplit(content, std::string(";")); + auto shape_strs = StrSplit(content, std::string(";")); for (const auto &shape_str : shape_strs) { shape.clear(); - auto string_split = StringSplit(shape_str, std::string(":")); + auto string_split = StrSplit(shape_str, std::string(":")); auto name = string_split[0]; if (name.empty()) { MS_LOG(ERROR) << "input tensor name is empty"; @@ -223,7 +224,7 @@ int Flags::InitInTensorShape() { if (dim_strs.empty()) { MS_LOG(ERROR) << "input tensor dim string is empty"; } - auto dims = StringSplit(dim_strs, std::string(",")); + auto dims = StrSplit(dim_strs, std::string(",")); if (dims.empty()) { MS_LOG(ERROR) << "input tensor dim is empty"; } @@ -428,7 +429,7 @@ std::string GetStrFromConfigFile(const std::string &file, const std::string &tar } #ifdef _WIN32 - char *real_path = _fullpath(resolved_path.get(), file.c_str(), 1024); + char *real_path = _fullpath(resolved_path.get(), file.c_str(), kPathLengthUpperLimit); #else char *real_path = realpath(file.c_str(), resolved_path.get()); #endif @@ -486,7 +487,6 @@ std::vector SplitStringToVector(const std::string &raw_str, const c } return res; } - } // namespace converter } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/tools/converter/export_model.cc b/mindspore/lite/tools/converter/export_model.cc index 3fe4924c7db..6789e67afba 100644 --- a/mindspore/lite/tools/converter/export_model.cc +++ b/mindspore/lite/tools/converter/export_model.cc @@ -193,7 +193,7 @@ STATUS ExportModel(const FuncGraphPtr &graph) { return RET_ERROR; } (void)Manage(mirror_graph, true); - if (!opt::RunOptimizerPass(mirror_graph, {"InferShapePass", "DecreaseTransposeAlgo"})) { + if (!opt::RunOptimizerPass(mirror_graph, {"InferShapePass", "DeleteRedundantTranspose", "DecreaseTransposeAlgo"})) { MS_LOG(ERROR) << "Run transpose opt pass failed."; return RET_ERROR; } diff --git a/mindspore/lite/tools/converter/import/mindspore_importer.cc b/mindspore/lite/tools/converter/import/mindspore_importer.cc index bcc6e40885f..7dbacf58af4 100644 --- a/mindspore/lite/tools/converter/import/mindspore_importer.cc +++ b/mindspore/lite/tools/converter/import/mindspore_importer.cc @@ -16,6 +16,7 @@ #include "tools/converter/import/mindspore_importer.h" #include +#include #include #include #include "tools/converter/parser/parser_utils.h" @@ -49,96 +50,6 @@ STATUS MindsporeImporter::Mindir2AnfAdjust(const FuncGraphPtr &func_graph, const return RET_OK; } -STATUS MindsporeImporter::WeightFormatTransform(const FuncGraphPtr &graph) { - MS_ASSERT(graph != nullptr); - auto node_list = TopoSort(graph->get_return()); - for (auto &node : node_list) { - if (!utils::isa(node)) { - continue; - } - auto conv_cnode = node->cast(); - if (!opt::CheckPrimitiveType(node, prim::kPrimConv2DFusion) && - !opt::CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) && - !opt::CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion)) { - continue; - } - MS_ASSERT(conv_cnode->inputs().size() > kConvWeightIndex); - int status = HardCodeMindir(conv_cnode, graph); - if (status != lite::RET_OK) { - MS_LOG(ERROR) << "Format hard code failed: " << status << ", node: " << node->fullname_with_scope(); - return RET_ERROR; - } - } - return RET_OK; -} - -STATUS MindsporeImporter::HardCodeMindir(const CNodePtr &conv_node, const FuncGraphPtr &graph) { - MS_ASSERT(conv_cnode != nullptr); - auto prim = GetValueNode(conv_node->input(0)); - if (prim == nullptr) { - MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive."; - return lite::RET_ERROR; - } - int64_t format = prim->GetAttr(ops::kFormat) != nullptr ? GetValue(prim->GetAttr(ops::kFormat)) : 0; - auto weight_node = conv_node->input(kConvWeightIndex); - schema::Format weight_dst_format = schema::Format::Format_KHWC; - STATUS status = RET_OK; - schema::Format weight_src_format = schema::Format::Format_NUM_OF_FORMAT; - switch (quant_type_) { - case QuantType_AwareTraining: - case QuantType_PostTraining: - case QuantType_WeightQuant: - case QuantType_QUANT_NONE: { - if (format == schema::Format::Format_KHWC) { - weight_src_format = schema::Format::Format_KHWC; - } else { - weight_src_format = schema::Format::Format_KCHW; - } - } break; - default: { - MS_LOG(ERROR) << "Unsupported quantType: " << EnumNameQuantType(quant_type_) - << ", node: " << conv_node->fullname_with_scope(); - return RET_ERROR; - } - } - if (utils::isa(weight_node)) { - status = HandleWeightConst(graph, conv_node, weight_node->cast(), weight_src_format, weight_dst_format); - if (status != lite::RET_OK) { - MS_LOG(ERROR) << "handle weight-const failed."; - return RET_ERROR; - } - } - weight_node = conv_node->input(kConvWeightIndex); - auto weight_value = opt::GetTensorInfo(weight_node); - if (weight_value != nullptr) { - status = opt::TransFilterFormat(weight_value, weight_src_format, weight_dst_format); - if (status != RET_OK) { - MS_LOG(ERROR) << "TransFilter " << EnumNameFormat(schema::EnumValuesFormat()[weight_dst_format]) << "To" - << EnumNameFormat(weight_dst_format) << " failed, node : " << conv_node->fullname_with_scope() - << "quant type:" << quant_type_; - return RET_ERROR; - } - prim->AddAttr(ops::kFormat, MakeValue(weight_dst_format)); - auto type_id = static_cast(weight_value->data_type()); - auto shape = weight_value->shape(); - std::vector shape_vector(shape.begin(), shape.end()); - auto abstract = lite::CreateTensorAbstract(shape_vector, type_id); - if (abstract == nullptr) { - MS_LOG(ERROR) << "Create tensor abstarct failed"; - return RET_ERROR; - } - weight_node->set_abstract(abstract); - } - if (utils::isa(weight_node)) { - status = HandleWeightSharing(graph, KHWC, weight_node->cast(), weight_src_format, weight_dst_format); - if (status != lite::RET_OK) { - MS_LOG(ERROR) << "handle weight-sharing failed."; - return RET_ERROR; - } - } - return lite::RET_OK; -} - size_t MindsporeImporter::Hex2ByteArray(const std::string &hex_str, unsigned char *byte_array, size_t max_len) { std::regex r("[0-9a-fA-F]+"); if (!std::regex_match(hex_str, r)) { @@ -208,16 +119,11 @@ FuncGraphPtr MindsporeImporter::ImportMindIR(const converter::Flags &flag) { ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status); return nullptr; } - auto unify_format = std::make_shared(lite::converter::FmkType_MS, flag.trainModel); + auto unify_format = std::make_shared(lite::converter::FmkType_MS, flag.trainModel, flag.quantType); if (!unify_format->Run(func_graph)) { MS_LOG(ERROR) << "Run insert transpose failed."; return nullptr; } - if ((status = WeightFormatTransform(func_graph)) != RET_OK) { - MS_LOG(ERROR) << "WeightFormatTransform failed."; - ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status); - return nullptr; - } return func_graph; } } // namespace mindspore::lite diff --git a/mindspore/lite/tools/converter/import/mindspore_importer.h b/mindspore/lite/tools/converter/import/mindspore_importer.h index ca74b4b8fd6..96bcabe1d7f 100644 --- a/mindspore/lite/tools/converter/import/mindspore_importer.h +++ b/mindspore/lite/tools/converter/import/mindspore_importer.h @@ -17,6 +17,7 @@ #ifndef MINDSPORE_LITE_TOOLS_IMPORT_MINDSPORE_IMPORTER_H_ #define MINDSPORE_LITE_TOOLS_IMPORT_MINDSPORE_IMPORTER_H_ +#include #include #include "tools/converter/converter_flags.h" #include "load_mindir/load_model.h" @@ -30,8 +31,6 @@ class MindsporeImporter { private: STATUS Mindir2AnfAdjust(const FuncGraphPtr &func_graph, const converter::Flags &flag); - STATUS WeightFormatTransform(const FuncGraphPtr &graph); - STATUS HardCodeMindir(const CNodePtr &conv_node, const FuncGraphPtr &graph); QuantType quant_type_ = schema::QuantType_QUANT_NONE; size_t Hex2ByteArray(const std::string &hex_str, unsigned char *byte_array, size_t max_len); }; diff --git a/mindspore/lite/tools/converter/legacy_optimizer/graph/batchnorm_convert_scale_pass.cc b/mindspore/lite/tools/converter/legacy_optimizer/graph/batchnorm_convert_scale_pass.cc index 40292965fd7..2f6a27ed99b 100644 --- a/mindspore/lite/tools/converter/legacy_optimizer/graph/batchnorm_convert_scale_pass.cc +++ b/mindspore/lite/tools/converter/legacy_optimizer/graph/batchnorm_convert_scale_pass.cc @@ -230,15 +230,8 @@ STATUS BatchNormConvertScalePass::GetTransParam(MetaGraphT *graph, const std::un return RET_OK; } -// BatchNorm weight Tensor definition: -// caffe -// estimated_mean --0 -// estimated_variance --1 -// tensorflow -// scale -- 0 -// bias --1 -// estimated_mean --2 -// estimated_variance --3 +// caffe:estimated_mean:0 estimated_variance:1 +// tensorflow scale:0,bias:1,estimated_mean:2,estimated_variance:3 STATUS BatchNormConvertScalePass::GetBnWeightTensors(MetaGraphT *graph, BNWeightTensors *bnWeightTensors, const std::unique_ptr &bnNode) { MS_ASSERT(graph != nullptr); @@ -250,19 +243,6 @@ STATUS BatchNormConvertScalePass::GetBnWeightTensors(MetaGraphT *graph, BNWeight if (fmkType == converter::FmkType_CAFFE) { bnWeightTensors->meanTensor = graph->allTensors.at(bnWeightTensorIdxes[CAFFE_BATCHNORM_MEAN_INDEX]).get(); bnWeightTensors->varianceTensor = graph->allTensors.at(bnWeightTensorIdxes[CAFFE_BATCHNORM_VARIANCE_INDEX]).get(); - auto scaleTensor = graph->allTensors.at(bnWeightTensorIdxes[CAFFE_BATCHNORM_SCALE_INDEX]).get(); - - // calibrate mean and variance - float scale_factor_data = (reinterpret_cast(scaleTensor->data.data()))[0]; - float scale_factor = scale_factor_data == 0 ? 0 : 1 / scale_factor_data; - auto mean_data = reinterpret_cast(bnWeightTensors->meanTensor->data.data()); - auto variance_data = reinterpret_cast(bnWeightTensors->varianceTensor->data.data()); - for (size_t i = 0; i < GetShapeSize(*bnWeightTensors->meanTensor); i++) { - mean_data[i] *= scale_factor; - } - for (size_t i = 0; i < GetShapeSize(*bnWeightTensors->varianceTensor); i++) { - variance_data[i] *= scale_factor; - } } else { bnWeightTensors->scaleTensor = graph->allTensors.at(bnWeightTensorIdxes[TF_BATCHNORM_SCALE_INDEX]).get(); bnWeightTensors->biasTensor = graph->allTensors.at(bnWeightTensorIdxes[TF_BATCHNORM_BIAS_INDEX]).get(); @@ -274,11 +254,24 @@ STATUS BatchNormConvertScalePass::GetBnWeightTensors(MetaGraphT *graph, BNWeight MS_LOG(ERROR) << "BatchNorm's mean tensor is nullptr"; return RET_ERROR; } - if (bnWeightTensors->varianceTensor == nullptr) { MS_LOG(ERROR) << "BatchNorm's variance tensor is nullptr"; return RET_ERROR; } + if (fmkType == converter::FmkType_CAFFE) { + auto scaleTensor = graph->allTensors.at(bnWeightTensorIdxes[CAFFE_BATCHNORM_SCALE_INDEX]).get(); + // calibrate mean and variance + float scale_factor_data = (reinterpret_cast(scaleTensor->data.data()))[0]; + float scale_factor = scale_factor_data == 0 ? 0 : 1 / scale_factor_data; + auto mean_data = reinterpret_cast(bnWeightTensors->meanTensor->data.data()); + auto variance_data = reinterpret_cast(bnWeightTensors->varianceTensor->data.data()); + for (size_t i = 0; i < GetShapeSize(*bnWeightTensors->meanTensor); i++) { + mean_data[i] *= scale_factor; + } + for (size_t i = 0; i < GetShapeSize(*bnWeightTensors->varianceTensor); i++) { + variance_data[i] *= scale_factor; + } + } bnChannel = bnWeightTensors->meanTensor->data.size() * sizeof(uint8_t) / sizeof(float); if (bnChannel <= 0) { MS_LOG(ERROR) << "BatchNorm's channel less or equal 0"; @@ -289,14 +282,12 @@ STATUS BatchNormConvertScalePass::GetBnWeightTensors(MetaGraphT *graph, BNWeight MS_LOG(ERROR) << "conv kernel num expected to be equal to variance size"; return RET_ERROR; } - if (bnWeightTensors->scaleTensor != nullptr) { if (bnChannel != bnWeightTensors->scaleTensor->data.size() * sizeof(uint8_t) / sizeof(float)) { MS_LOG(ERROR) << "conv kernel num expected to be equal to scale size"; return RET_ERROR; } } - if (bnWeightTensors->biasTensor != nullptr) { if (bnChannel != bnWeightTensors->biasTensor->data.size() * sizeof(uint8_t) / sizeof(float)) { MS_LOG(ERROR) << "conv kernel num expected to be equal to bias size"; diff --git a/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.cc b/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.cc index f73367307d3..18564ad112f 100644 --- a/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.cc +++ b/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.cc @@ -39,6 +39,11 @@ namespace { namespace { constexpr size_t kConvWeightIndex = 2; constexpr size_t kConvWeightShapeSize = 4; +constexpr size_t kFcWeightFirstShapeIndex = 0; +constexpr size_t kFcWeightSecondShapeIndex = 1; +constexpr size_t kFcBiasFirstShapeIndex = 0; +constexpr size_t kFcBiasSecondShapeIndex = 1; +constexpr size_t kFcBiasThirdShapeIndex = 2; } // namespace bool IsSkipedLayer(const caffe::LayerParameter &layer) { if (layer.type() == "Input" || layer.type() == "Dropout" || layer.type() == "Split") { @@ -50,12 +55,14 @@ bool IsSkipedLayer(const caffe::LayerParameter &layer) { void FcSqueezeWeightBias(const caffe::LayerParameter &layer, int blob_index, std::vector *shape) { if (layer.type() == "InnerProduct") { if (blob_index == 0) { - if (shape->size() == kConvWeightShapeSize && shape->at(0) == 1 && shape->at(1) == 1) { + if (shape->size() == kConvWeightShapeSize && shape->at(kFcWeightFirstShapeIndex) == 1 && + shape->at(kFcWeightSecondShapeIndex) == 1) { shape->erase(shape->begin()); shape->erase(shape->begin()); } } else if (blob_index == 1) { - if (shape->size() == kConvWeightShapeSize && shape->at(0) == 1 && shape->at(1) == 1 && shape->at(2) == 1) { + if (shape->size() == kConvWeightShapeSize && shape->at(kFcBiasFirstShapeIndex) == 1 && + shape->at(kFcBiasSecondShapeIndex) == 1 && shape->at(kFcBiasThirdShapeIndex) == 1) { shape->erase(shape->begin()); shape->erase(shape->begin()); shape->erase(shape->begin()); @@ -105,112 +112,14 @@ FuncGraphPtr CaffeModelParser::Parse(const converter::ConverterParameters &flag) ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status); return nullptr; } - auto unify_format = std::make_shared(lite::converter::FmkType_CAFFE, false); + auto unify_format = std::make_shared(lite::converter::FmkType_CAFFE, false, quant_type_); if (!unify_format->Run(res_graph_)) { MS_LOG(ERROR) << "Run insert transpose failed."; return nullptr; } - if ((status = WeightFormatTransform(res_graph_)) != RET_OK) { - MS_LOG(ERROR) << "WeightFormatTransform failed."; - ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status); - return nullptr; - } return res_graph_; } -STATUS CaffeModelParser::WeightFormatTransform(const FuncGraphPtr &graph) { - MS_ASSERT(graph != nullptr); - auto node_list = TopoSort(graph->get_return()); - for (auto &node : node_list) { - if (!utils::isa(node)) { - continue; - } - auto conv_cnode = node->cast(); - if (!opt::CheckPrimitiveType(node, prim::kPrimConv2DFusion) && - !opt::CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) && - !opt::CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion)) { - continue; - } - MS_ASSERT(conv_cnode->inputs().size() > kConvWeightIndex); - auto weight_node = conv_cnode->input(kConvWeightIndex); - MS_ASSERT(weight_node != nullptr); - auto tensor_info = opt::GetTensorInfo(weight_node); - if (tensor_info == nullptr) { - MS_LOG(ERROR) << "weight node must param value"; - return RET_OK; - } - auto status = HardCodeCaffe(conv_cnode, tensor_info, graph); - if (status != lite::RET_OK) { - MS_LOG(ERROR) << "Format hard code failed: " << status << ", node: " << node->fullname_with_scope(); - return RET_ERROR; - } - } - return RET_OK; -} - -STATUS CaffeModelParser::HardCodeCaffe(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info, - const FuncGraphPtr &graph) { - MS_ASSERT(conv_cnode != nullptr); - MS_ASSERT(tensor_info != nullptr); - auto weight_node = conv_node->input(kConvWeightIndex); - auto weight_value = opt::GetTensorInfo(weight_node); - if (weight_value == nullptr) { - MS_LOG(DEBUG) << "weight node must param value"; - return RET_OK; - } - schema::Format weight_dst_format = schema::Format::Format_KHWC; - STATUS status = RET_OK; - schema::Format weight_src_format = Format_NUM_OF_FORMAT; - switch (quant_type_) { - case QuantType_PostTraining: - case QuantType_WeightQuant: - case QuantType_QUANT_NONE: { - weight_src_format = schema::Format::Format_KCHW; - } break; - default: { - MS_LOG(ERROR) << "Unsupported quantType: " << EnumNameQuantType(quant_type_) - << ", node: " << conv_node->fullname_with_scope(); - return lite::RET_ERROR; - } - } - if (utils::isa(weight_node)) { - auto status = - HandleWeightConst(graph, conv_node, weight_node->cast(), weight_src_format, weight_dst_format); - if (status != lite::RET_OK) { - MS_LOG(ERROR) << "handle weight-const failed."; - return RET_ERROR; - } - } - weight_value = opt::GetTensorInfo(weight_node); - if (weight_value != nullptr) { - status = opt::TransFilterFormat(weight_value, schema::Format::Format_KCHW, weight_dst_format); - if (status != RET_OK) { - MS_LOG(ERROR) << "TransFilter " << EnumNameFormat(schema::EnumValuesFormat()[weight_dst_format]) << "To" - << EnumNameFormat(weight_dst_format) << " failed, node : " << conv_node->fullname_with_scope() - << "quant type:" << quant_type_; - return RET_ERROR; - } - auto type_id = static_cast(weight_value->data_type()); - auto shape = weight_value->shape(); - std::vector shape_vector(shape.begin(), shape.end()); - auto abstract = lite::CreateTensorAbstract(shape_vector, type_id); - if (abstract == nullptr) { - MS_LOG(ERROR) << "Create tensor abstarct failed"; - return RET_ERROR; - } - weight_node->set_abstract(abstract); - } - if (utils::isa(weight_node)) { - auto status = - HandleWeightSharing(graph, KHWC, weight_node->cast(), weight_src_format, weight_dst_format); - if (status != lite::RET_OK) { - MS_LOG(ERROR) << "handle weight-sharing failed."; - return RET_ERROR; - } - } - return lite::RET_OK; -} - STATUS CaffeModelParser::ConvertLayers() { STATUS status = RET_OK; std::map weight_layers; diff --git a/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.h b/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.h index 57b265e8c57..91a6c28a303 100644 --- a/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.h +++ b/mindspore/lite/tools/converter/parser/caffe/caffe_model_parser.h @@ -56,10 +56,6 @@ class CaffeModelParser : public ModelParser { std::string GetOriginLayerName(const std::string &layer_name); - STATUS WeightFormatTransform(const FuncGraphPtr &graph); - - STATUS HardCodeCaffe(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info, const FuncGraphPtr &graph); - STATUS ConvertGraphInputsOfLayer(); STATUS ConvertGraphInputsOfDim(); diff --git a/mindspore/lite/tools/converter/parser/conv1d_inout_adjust.cc b/mindspore/lite/tools/converter/parser/conv1d_inout_adjust.cc index 92f306af4f3..bc35c5f055e 100644 --- a/mindspore/lite/tools/converter/parser/conv1d_inout_adjust.cc +++ b/mindspore/lite/tools/converter/parser/conv1d_inout_adjust.cc @@ -123,9 +123,11 @@ bool Conv1DInOutAdjust::Run(const FuncGraphPtr &func_graph) { std::vector axis; switch (conv2d_node->get_format()) { case mindspore::Format::NWC: + conv2d_node->set_format(mindspore::NHWC); axis = {1}; break; case mindspore::Format::NCW: + conv2d_node->set_format(mindspore::NCHW); axis = {2}; break; default: diff --git a/mindspore/lite/tools/converter/parser/onnx/onnx_conv_transpose_parser.cc b/mindspore/lite/tools/converter/parser/onnx/onnx_conv_transpose_parser.cc index 4412e0d992a..a9235f6a0a6 100644 --- a/mindspore/lite/tools/converter/parser/onnx/onnx_conv_transpose_parser.cc +++ b/mindspore/lite/tools/converter/parser/onnx/onnx_conv_transpose_parser.cc @@ -77,24 +77,27 @@ ops::PrimitiveC *OnnxDeConvParser::Parse(const onnx::GraphProto &onnx_graph, con std::find_if(onnx_graph.initializer().begin(), onnx_graph.initializer().end(), [onnx_conv_weight](const onnx::TensorProto &proto) { return proto.name() == onnx_conv_weight; }); if (node_iter == onnx_graph.initializer().end()) { - MS_LOG(ERROR) << "not find node: " << onnx_conv_weight.c_str(); - return nullptr; - } - std::vector weight_shape; - auto size = (*node_iter).dims_size(); - weight_shape.reserve(size); - for (int i = 0; i < size; ++i) { - weight_shape.emplace_back((*node_iter).dims(i)); - } - if (weight_shape.size() != 4) { - MS_LOG(ERROR) << "weight_shape.size() should be 4, but is " << weight_shape.size(); - return nullptr; - } - prim->set_in_channel(weight_shape[0]); - prim->set_out_channel(weight_shape[1] * group); + // in_channel and out_channnel is set to 1 by default. + prim->set_in_channel(1); + prim->set_out_channel(1); + MS_LOG(WARNING) << "parsing of channelIn/Out is delayed."; + } else { + std::vector weight_shape; + auto size = (*node_iter).dims_size(); + weight_shape.reserve(size); + for (int i = 0; i < size; ++i) { + weight_shape.emplace_back((*node_iter).dims(i)); + } + if (weight_shape.size() != 4) { + MS_LOG(ERROR) << "weight_shape.size() should be 4, but is " << weight_shape.size(); + return nullptr; + } + prim->set_in_channel(weight_shape[0]); + prim->set_out_channel(weight_shape[1] * group); - if (group != 1 && weight_shape[1] == 1) { - prim->AddAttr(ops::kIsDepthWise, MakeValue(true)); + if (group != 1 && weight_shape[1] == 1) { + prim->AddAttr(ops::kIsDepthWise, MakeValue(true)); + } } return prim.release(); diff --git a/mindspore/lite/tools/converter/parser/onnx/onnx_inputs_adjust.cc b/mindspore/lite/tools/converter/parser/onnx/onnx_inputs_adjust.cc index 188a6a3600e..155f5330dd3 100644 --- a/mindspore/lite/tools/converter/parser/onnx/onnx_inputs_adjust.cc +++ b/mindspore/lite/tools/converter/parser/onnx/onnx_inputs_adjust.cc @@ -196,6 +196,7 @@ STATUS OnnxInputAdjust::ReplaceTransposeWithGraphInput(const FuncGraphPtr &func_ auto shape_ptr = param_node->abstract()->GetShapeTrack()->cast(); if (shape_ptr == nullptr) { MS_LOG(ERROR) << "shape is nullptr."; + return lite::RET_ERROR; } auto shape_vector = shape_ptr->shape(); if (shape_vector.size() != opt::kInputSizeFour) { diff --git a/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.cc b/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.cc index d343245b488..948cb8fbf48 100644 --- a/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.cc +++ b/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.cc @@ -95,154 +95,14 @@ FuncGraphPtr OnnxModelParser::Parse(const converter::ConverterParameters &flag) ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status); return nullptr; } - auto unify_format = std::make_shared(lite::converter::FmkType_ONNX, false); + auto unify_format = std::make_shared(lite::converter::FmkType_ONNX, false, quant_type_); if (!unify_format->Run(res_graph_)) { MS_LOG(ERROR) << "Run insert transpose failed."; return nullptr; } - if ((status = WeightFormatTransform(all_func_graphs)) != RET_OK) { - MS_LOG(ERROR) << "WeightFormatTransform failed."; - ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status); - return nullptr; - } return res_graph_; } -STATUS OnnxModelParser::WeightFormatTransform(const std::set &all_func_graphs) { - for (const auto &graph : all_func_graphs) { - MS_ASSERT(graph != nullptr); - auto node_list = TopoSort(graph->get_return()); - for (auto &node : node_list) { - if (!utils::isa(node)) { - continue; - } - auto conv_cnode = node->cast(); - if (!opt::CheckPrimitiveType(node, prim::kPrimConv2DFusion) && - !opt::CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) && - !opt::CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion)) { - continue; - } - MS_ASSERT(conv_cnode->inputs().size() > kConvWeightIndex); - auto weight_node = conv_cnode->input(kConvWeightIndex); - MS_ASSERT(weight_node != nullptr); - auto tensor_info = opt::GetTensorInfo(weight_node); - auto status = HardCodeONNX(conv_cnode, tensor_info, graph); - if (status != lite::RET_OK) { - MS_LOG(ERROR) << "Format hard code failed: " << status << ", node: " << node->fullname_with_scope(); - return RET_ERROR; - } - } - } - return RET_OK; -} - -lite::STATUS OnnxModelParser::HardCodeONNX(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info, - const FuncGraphPtr &graph) { - MS_ASSERT(conv_cnode != nullptr); - MS_ASSERT(tensor_info != nullptr); - auto prim = GetValueNode(conv_node->input(0)); - if (prim == nullptr) { - MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive."; - return lite::RET_ERROR; - } - bool is_depth_wise = prim->GetAttr(ops::kIsDepthWise) != nullptr && GetValue(prim->GetAttr(ops::kIsDepthWise)); - int64_t format = prim->GetAttr(ops::kFormat) != nullptr ? GetValue(prim->GetAttr(ops::kFormat)) : 0; - schema::Format weight_dst_format = schema::Format::Format_KHWC; - STATUS status = RET_OK; - schema::Format weight_src_format = Format_NUM_OF_FORMAT; - auto weight_node = conv_node->input(kConvWeightIndex); - switch (quant_type_) { - case QuantType_AwareTraining: { - // sum up from current onnx quant models - if (opt::CheckPrimitiveType(conv_node, prim::kPrimConv2DFusion)) { - if (!is_depth_wise) { - weight_src_format = schema::Format::Format_KHWC; - prim->AddAttr(ops::kFormat, MakeValue(weight_dst_format)); - } else { - prim->AddAttr(ops::kFormat, MakeValue(weight_dst_format)); - weight_src_format = schema::Format::Format_CHWK; - } - } else if (opt::CheckPrimitiveType(conv_node, prim::kPrimConv2dTransposeFusion) && !is_depth_wise) { - prim->AddAttr(ops::kFormat, MakeValue(weight_dst_format)); - weight_src_format = schema::Format::Format_KCHW; - } else { - MS_LOG(ERROR) << "Unsupported op: " << conv_node->fullname_with_scope(); - return lite::RET_ERROR; - } - } break; - case QuantType_PostTraining: - case QuantType_WeightQuant: - case QuantType_QUANT_NONE: { - // conv (K x C/group x kH x kW) group = 1 - // depth (K x C/group x kH x kW) group = channelOut ==> (K, multiplier, H, W) - // deconv (C x K/group x kH x kW) group = 1 - // dedepth (C x K/group x kH x kW) group = channelIn ==> (C, multiplier, H, W) - if (opt::CheckPrimitiveType(conv_node, prim::kPrimConv2DFusion) || - opt::CheckPrimitiveType(conv_node, prim::kPrimConv2dTransposeFusion)) { - if (format == schema::Format::Format_NHWC) { - prim->AddAttr(ops::kFormat, MakeValue(Format_NHWC)); - weight_src_format = schema::Format::Format_KHWC; - } else if (format == schema::Format::Format_KHWC) { - weight_src_format = schema::Format::Format_KHWC; - } else { - prim->AddAttr(ops::kFormat, MakeValue(weight_dst_format)); - weight_src_format = schema::Format::Format_KCHW; - } - } - } break; - default: { - MS_LOG(ERROR) << "Unsupported quantType: " << EnumNameQuantType(quant_type_) - << ", node: " << conv_node->fullname_with_scope(); - return lite::RET_ERROR; - } - } - status = DoWeightFormatTransform(conv_node, weight_node, graph, weight_src_format, weight_dst_format); - if (status != RET_OK) { - return RET_ERROR; - } - return lite::RET_OK; -} -int OnnxModelParser::DoWeightFormatTransform(const CNodePtr &conv_node, const AnfNodePtr &weight_node, - const FuncGraphPtr &graph, schema::Format weight_src_format, - schema::Format weight_dst_format) { - if (utils::isa(weight_node)) { - auto status = - HandleWeightConst(graph, conv_node, weight_node->cast(), weight_src_format, weight_dst_format); - if (status != lite::RET_OK) { - MS_LOG(ERROR) << "handle weight-const failed."; - return RET_ERROR; - } - } - auto weight_value = opt::GetTensorInfo(weight_node); - if (weight_value != nullptr) { - auto status = opt::TransFilterFormat(weight_value, weight_src_format, weight_dst_format); - if (status != RET_OK) { - MS_LOG(ERROR) << "TransFilter " << EnumNameFormat(schema::EnumValuesFormat()[weight_src_format]) << "To" - << EnumNameFormat(weight_dst_format) << " failed, node : " << conv_node->fullname_with_scope() - << "quant type:" << quant_type_; - return RET_ERROR; - } - auto type_id = static_cast(weight_value->data_type()); - auto shape = weight_value->shape(); - std::vector shape_vector(shape.begin(), shape.end()); - auto abstract = lite::CreateTensorAbstract(shape_vector, type_id); - if (abstract == nullptr) { - MS_LOG(ERROR) << "Create tensor abstarct failed"; - return RET_ERROR; - } - weight_node->set_abstract(abstract); - } - if (utils::isa(weight_node)) { - auto status = - HandleWeightSharing(graph, KHWC, weight_node->cast(), weight_src_format, weight_dst_format); - if (status != lite::RET_OK) { - MS_LOG(ERROR) << "handle weight-sharing failed."; - return RET_ERROR; - } - } - return RET_OK; -} - STATUS OnnxModelParser::InitOriginModel(const std::string &model_file) { auto status = ValidateFileStr(model_file, ".onnx"); if (status != RET_OK) { diff --git a/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.h b/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.h index d4a170069ae..10ea0de5781 100644 --- a/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.h +++ b/mindspore/lite/tools/converter/parser/onnx/onnx_model_parser.h @@ -92,10 +92,7 @@ class OnnxModelParser : public ModelParser { STATUS ConvertIfSubgraph(const onnx::GraphProto &onnx_graph, const FuncGraphPtr &anf_graph, const std::string &subgrah_name, const std::string &if_node_name, const std::string &root_node_name); - STATUS WeightFormatTransform(const std::set &all_func_graphs); - STATUS HardCodeONNX(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info, const FuncGraphPtr &graph); - int DoWeightFormatTransform(const CNodePtr &conv_node, const AnfNodePtr &weight_node, const FuncGraphPtr &graph, - schema::Format weight_src_format, schema::Format weight_dst_format); + onnx::ModelProto onnx_model_; onnx::GraphProto onnx_root_graph_; std::vector all_subgraphs_; diff --git a/mindspore/lite/tools/converter/parser/onnx/onnx_pad_adjust.cc b/mindspore/lite/tools/converter/parser/onnx/onnx_pad_adjust.cc index ea01385c10e..d48cce87626 100644 --- a/mindspore/lite/tools/converter/parser/onnx/onnx_pad_adjust.cc +++ b/mindspore/lite/tools/converter/parser/onnx/onnx_pad_adjust.cc @@ -98,8 +98,8 @@ bool OnnxPadAdjust::Run(const FuncGraphPtr &func_graph) { if (!input_node->isa()) { continue; } - // reshape the padding of pad operator to 2 x 4. - std::vector shape_pre = {2, 4}; + // reshape the padding of pad operator to 2 x i. + std::vector shape_pre = {2, -1}; auto reshape_pre = NewReshapeOpNode(func_graph, input_node, shape_pre); if (reshape_pre == nullptr) { MS_LOG(ERROR) << "create reshape failed."; diff --git a/mindspore/lite/tools/converter/parser/parser_utils.cc b/mindspore/lite/tools/converter/parser/parser_utils.cc index 5e3d9cbb8e1..6d00a18da3b 100644 --- a/mindspore/lite/tools/converter/parser/parser_utils.cc +++ b/mindspore/lite/tools/converter/parser/parser_utils.cc @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "tools/converter/parser/tf_bidirection_gru_cf_fusion.h" #include "tools/converter/parser/unused_node_remove_pass.h" @@ -30,7 +31,15 @@ namespace mindspore::lite { namespace { constexpr size_t kNumWeightIndex = 2; +bool IsWeightNodeSensitive(const AnfNodePtr &node) { + return opt::CheckPrimitiveType(node, prim::kPrimConv2DFusion) || + opt::CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) || + opt::CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion) || + opt::CheckPrimitiveType(node, prim::kPrimApplyMomentum) || opt::CheckPrimitiveType(node, prim::kPrimSGD) || + opt::CheckPrimitiveType(node, prim::kPrimAdam); } +} // namespace + void GetAllFuncGraph(const FuncGraphPtr &func_graph, std::set *all_func_graphs) { if (all_func_graphs->find(func_graph) == all_func_graphs->end()) { all_func_graphs->insert(func_graph); @@ -106,6 +115,7 @@ int GetTransposePerm(schema::Format src_format, schema::Format dst_format, std:: } return lite::RET_OK; } + int GetTransposePermSharing(schema::Format src_format, schema::Format dst_format, std::vector *perm) { MS_ASSERT(perm != nullptr); auto src_format_str = std::string(schema::EnumNameFormat(src_format)); @@ -125,112 +135,74 @@ int GetTransposePermSharing(schema::Format src_format, schema::Format dst_format return lite::RET_OK; } -int TransposeInsertForWeightSharing(const FuncGraphPtr &graph, int64_t dst_format, int64_t format, - const ParameterPtr &weight_node, std::vector perm) { - MS_ASSERT(graph != nullptr); - MS_ASSERT(weight_node != nullptr); - auto node_list = TopoSort(graph->get_return()); - std::vector adjust_nodes; - for (auto &node : node_list) { - if (!utils::isa(node)) { - continue; - } - if (opt::CheckPrimitiveType(node, prim::kPrimApplyMomentum) || opt::CheckPrimitiveType(node, prim::kPrimSGD) || - opt::CheckPrimitiveType(node, prim::kPrimAdam)) { - continue; - } - auto cnode = node->cast(); - auto inputs = cnode->inputs(); - if (std::any_of(inputs.begin(), inputs.end(), - [&](const AnfNodePtr &anf_node) { return weight_node == anf_node; })) { - if (opt::CheckPrimitiveType(node, prim::kPrimConv2DFusion) || - opt::CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) || - opt::CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion)) { - auto prim = GetValueNode(cnode->input(0)); - prim->AddAttr(ops::kFormat, MakeValue(format)); - continue; - } - adjust_nodes.push_back(cnode); +AnfNodePtr GetRealConvWeightNode(const FuncGraphPtr &graph, const CNodePtr &cnode) { + MS_ASSERT(graph != nullptr && cnode != nullptr); + if (!opt::CheckPrimitiveType(cnode, prim::kPrimConv2DFusion) && + !opt::CheckPrimitiveType(cnode, opt::kPrimConv2DBackpropInputFusion) && + !opt::CheckPrimitiveType(cnode, prim::kPrimConv2dTransposeFusion)) { + MS_LOG(ERROR) << "cnode is not a member of convolution's family."; + return nullptr; + } + auto weight_node = cnode->input(opt::kInputIndexTwo); + bool is_real_weight = + !opt::CheckPrimitiveType(weight_node, opt::kPrimIdentity) && !opt::CheckPrimitiveType(weight_node, prim::kPrimLoad); + while (!is_real_weight) { + if (!utils::isa(weight_node)) { + MS_LOG(ERROR) << "weight node is invalid."; + return nullptr; } + auto weight_cnode = weight_node->cast(); + weight_node = weight_cnode->input(1); + is_real_weight = !opt::CheckPrimitiveType(weight_node, opt::kPrimIdentity) && + !opt::CheckPrimitiveType(weight_node, prim::kPrimLoad); } - if (adjust_nodes.empty()) { - MS_LOG(DEBUG) << "do not need to adjust nodes."; - return lite::RET_OK; - } - auto perm_node = opt::BuildIntVecParameterNode(graph, perm, weight_node->fullname_with_scope() + "_sharing_perm"); - auto prim = std::make_shared(); - prim->AddAttr("quant_params", std::make_shared(1, 1)); - prim->AddAttr(ops::kFormat, MakeValue(dst_format)); - auto transpose_node = graph->NewCNode(prim, {weight_node, perm_node}); - if (!weight_node->has_default()) { - MS_LOG(DEBUG) << "Weight parameter should has default parameter."; - return lite::RET_ERROR; - } - auto weight_tensor = weight_node->default_param()->cast(); - if (weight_tensor == nullptr) { - MS_LOG(DEBUG) << "Default parameter of weight parameter should be a tensor."; - return lite::RET_ERROR; - } - auto abstract = CreateTensorAbstract(weight_tensor->shape_c(), weight_tensor->data_type()); - if (abstract == nullptr) { - MS_LOG(ERROR) << "Create tensor abstarct failed"; - return RET_ERROR; - } - transpose_node->set_abstract(abstract); - transpose_node->set_fullname_with_scope(weight_node->fullname_with_scope() + "_sharing_post"); - for (auto &adjust_node : adjust_nodes) { - auto inputs = adjust_node->inputs(); - std::replace_if( - inputs.begin(), inputs.end(), [&weight_node](const AnfNodePtr &anf_node) { return weight_node == anf_node; }, - transpose_node); - adjust_node->set_inputs(inputs); - } - return lite::RET_OK; + auto manager = Manage(graph); + MS_ASSERT(manager != nullptr); + manager->Replace(cnode->input(opt::kInputIndexTwo), weight_node); + return weight_node; } -int HandleWeightSharing(const FuncGraphPtr &graph, int64_t format, const ParameterPtr &weight_node, - schema::Format src_format, schema::Format dst_format) { - MS_ASSERT(graph != nullptr); - MS_ASSERT(weight_node != nullptr); +int UnifyConvWeightFormat(const FuncGraphPtr &graph, const CNodePtr &cnode, schema::Format src_format, + schema::Format dst_format, std::set *has_visited) { + MS_ASSERT(graph != nullptr && cnode != nullptr && has_visited != nullptr); if (src_format == dst_format) { return lite::RET_OK; } - std::vector perm; - auto status = GetTransposePermSharing(src_format, dst_format, &perm); - if (status != lite::RET_OK) { - MS_LOG(ERROR) << "get perm failed."; - return status; + if (!opt::CheckPrimitiveType(cnode, prim::kPrimConv2DFusion) && + !opt::CheckPrimitiveType(cnode, opt::kPrimConv2DBackpropInputFusion) && + !opt::CheckPrimitiveType(cnode, prim::kPrimConv2dTransposeFusion)) { + MS_LOG(ERROR) << "cnode is not a member of convolution's family."; + return RET_ERROR; } - status = TransposeInsertForWeightSharing(graph, dst_format, format, weight_node, perm); - if (status != lite::RET_OK) { - MS_LOG(ERROR) << "transpose insert failed."; + if (GetRealConvWeightNode(graph, cnode) == nullptr) { + MS_LOG(ERROR) << "current conv node is invalid, node name is " << cnode->fullname_with_scope(); + return RET_ERROR; + } + bool is_const_weight = true; + auto weight_node = cnode->input(opt::kInputIndexTwo); + if (utils::isa(weight_node)) { + is_const_weight = false; + } else if (utils::isa(weight_node)) { + auto weight_param_node = weight_node->cast(); + if (!weight_param_node->has_default()) { + is_const_weight = false; + } + } + int status; + if (is_const_weight) { + status = UnifyConstConvWeight(graph, weight_node, src_format, dst_format, has_visited); + } else { + status = UnifyVariableConvWeight(graph, weight_node, src_format, dst_format, has_visited); + } + if (status != RET_OK) { + MS_LOG(ERROR) << "unfiy coneight failed, cnode name is " << cnode->fullname_with_scope(); } return status; } -int TransposeInsertForWeightConst(const FuncGraphPtr &graph, const CNodePtr &conv_node, const CNodePtr &weight_node, - std::vector perm) { - MS_ASSERT(graph != nullptr); - MS_ASSERT(weight_node != nullptr); - auto manager = Manage(graph); - if (opt::CheckPrimitiveType(weight_node, opt::kPrimIdentity) || - opt::CheckPrimitiveType(weight_node, prim::kPrimLoad)) { - manager->Replace(weight_node, weight_node->input(1)); - return RET_OK; - } - auto perm_node = opt::BuildIntVecParameterNode(graph, perm, weight_node->fullname_with_scope() + "_const_perm"); - auto prim = std::make_shared(); - prim->AddAttr("quant_params", std::make_shared(1, 1)); - auto transpose_node = graph->NewCNode(prim, {weight_node, perm_node}); - transpose_node->set_fullname_with_scope(weight_node->fullname_with_scope() + "_const_post"); - conv_node->set_input(kNumWeightIndex, transpose_node); - return lite::RET_OK; -} - -int HandleWeightConst(const FuncGraphPtr &graph, const CNodePtr &conv_node, const CNodePtr &weight_node, - schema::Format src_format, schema::Format dst_format) { - MS_ASSERT(graph != nullptr); - MS_ASSERT(weight_node != nullptr); +int UnifyVariableConvWeight(const FuncGraphPtr &graph, const AnfNodePtr &weight_node, schema::Format src_format, + schema::Format dst_format, std::set *has_visited) { + MS_ASSERT(graph != nullptr && weight_node != nullptr && has_visited != nullptr); if (src_format == dst_format) { return lite::RET_OK; } @@ -240,10 +212,142 @@ int HandleWeightConst(const FuncGraphPtr &graph, const CNodePtr &conv_node, cons MS_LOG(ERROR) << "get perm failed."; return status; } - status = TransposeInsertForWeightConst(graph, conv_node, weight_node, perm); - if (status != lite::RET_OK) { - MS_LOG(ERROR) << "transpose insert failed."; + auto manager = Manage(graph); + MS_ASSERT(manager != nullptr); + CNodePtr trans_cnode = nullptr; + auto weight_node_users = manager->node_users()[weight_node]; + for (auto &weight_node_user : weight_node_users) { + auto post_node = weight_node_user.first; + if (!utils::isa(post_node)) { + MS_LOG(ERROR) << "post node is invalid."; + return RET_ERROR; + } + if (!IsWeightNodeSensitive(post_node)) { + continue; + } + has_visited->insert(post_node); + if (trans_cnode == nullptr) { + trans_cnode = opt::GenTransposeNode(graph, weight_node, perm, weight_node->fullname_with_scope() + "_post_perm"); + MS_ASSERT(trans_cnode != nullptr); + auto abstract = weight_node->abstract(); + ShapeVector shape; + if (abstract != nullptr) { + ShapeVector weight_shape; + if (opt::FetchShapeFromAbstract(abstract, &weight_shape) != RET_OK) { + MS_LOG(ERROR) << "fetch shape from abstract failed."; + return RET_ERROR; + } + if (!weight_shape.empty()) { + if (weight_shape.size() != opt::kInputSizeFour) { + MS_LOG(ERROR) << "conv weight shape is invalid, which is not 4D, now is " << weight_shape.size(); + return RET_ERROR; + } + std::transform(perm.begin(), perm.end(), std::back_inserter(shape), + [&weight_shape](const int index) { return weight_shape[index]; }); + } + abstract = abstract->Clone(); + } else { + abstract = CreateTensorAbstract(shape, TypeId::kNumberTypeFloat32); + MS_ASSERT(abstract != nullptr); + } + abstract->set_shape(std::make_shared(shape)); + trans_cnode->set_abstract(abstract); + } + auto post_cnode = post_node->cast(); + auto tr = manager->Transact(); + tr.SetEdge(post_cnode, weight_node_user.second, trans_cnode); + tr.Commit(); } - return status; + return RET_OK; +} + +int UnifyConstConvWeight(const FuncGraphPtr &graph, const AnfNodePtr &weight_node, schema::Format src_format, + schema::Format dst_format, std::set *has_visited) { + MS_ASSERT(graph != nullptr && weight_node != nullptr && has_visited != nullptr); + if (src_format == dst_format) { + return lite::RET_OK; + } + auto weight_value = opt::GetTensorInfo(weight_node); + if (weight_value == nullptr) { + MS_LOG(ERROR) << "conv weight is non-const."; + return RET_ERROR; + } + auto status = opt::TransFilterFormat(weight_value, src_format, dst_format); + if (status != RET_OK) { + MS_LOG(ERROR) << "TransFilter " << EnumNameFormat(src_format) << "To" << EnumNameFormat(dst_format) + << " failed, node : " << weight_node->fullname_with_scope(); + return RET_ERROR; + } + auto type_id = static_cast(weight_value->data_type()); + auto shape = weight_value->shape(); + auto abstract = CreateTensorAbstract(shape, type_id); + if (abstract == nullptr) { + MS_LOG(ERROR) << "Create tensor abstarct failed"; + return RET_ERROR; + } + weight_node->set_abstract(abstract); + if (HandleConstConvWeightShared(graph, weight_node, src_format, dst_format, has_visited) != RET_OK) { + MS_LOG(ERROR) << "handle const conv weight-shared failed, node name is " << weight_node->fullname_with_scope(); + return RET_ERROR; + } + return RET_OK; +} + +int HandleConstConvWeightShared(const FuncGraphPtr &graph, const AnfNodePtr &weight_node, schema::Format src_format, + schema::Format dst_format, std::set *has_visited) { + MS_ASSERT(graph != nullptr && weight_node != nullptr && has_visited != nullptr); + if (src_format == dst_format) { + return RET_OK; + } + std::vector perm; + auto status = GetTransposePermSharing(src_format, dst_format, &perm); + if (status != RET_OK) { + MS_LOG(ERROR) << "get perm failed."; + return status; + } + auto manager = Manage(graph); + MS_ASSERT(manager != nullptr); + CNodePtr trans_cnode = nullptr; + auto weight_node_users = manager->node_users()[weight_node]; + for (auto &weight_node_user : weight_node_users) { + auto post_node = weight_node_user.first; + if (!utils::isa(post_node)) { + MS_LOG(ERROR) << "post node is invalid."; + return RET_ERROR; + } + if (IsWeightNodeSensitive(post_node)) { + has_visited->insert(post_node); + continue; + } + if (trans_cnode == nullptr) { + trans_cnode = opt::GenTransposeNode(graph, weight_node, perm, weight_node->fullname_with_scope() + "_post_perm"); + MS_ASSERT(trans_cnode != nullptr); + auto prim = GetValueNode(trans_cnode->input(0)); + MS_ASSERT(prim != nullptr); + prim->AddAttr(ops::kFormat, MakeValue(dst_format)); + auto weight_value = opt::GetTensorInfo(weight_node); + MS_ASSERT(weight_value != nullptr); + auto weight_shape = weight_value->shape(); + ShapeVector shape; + if (!weight_shape.empty()) { + if (weight_shape.size() != opt::kInputSizeFour) { + MS_LOG(ERROR) << "conv weight shape is invalid, which is not 4D, now is " << weight_shape.size(); + return RET_ERROR; + } + std::transform(perm.begin(), perm.end(), std::back_inserter(shape), + [&weight_shape](const int index) { return weight_shape[index]; }); + } + auto abstract = weight_node->abstract(); + MS_ASSERT(abstract != nullptr); + abstract = abstract->Clone(); + abstract->set_shape(std::make_shared(shape)); + trans_cnode->set_abstract(abstract); + } + auto post_cnode = post_node->cast(); + auto tr = manager->Transact(); + tr.SetEdge(post_cnode, weight_node_user.second, trans_cnode); + tr.Commit(); + } + return RET_OK; } } // namespace mindspore::lite diff --git a/mindspore/lite/tools/converter/parser/parser_utils.h b/mindspore/lite/tools/converter/parser/parser_utils.h index d34379367d2..913ff3d8c10 100644 --- a/mindspore/lite/tools/converter/parser/parser_utils.h +++ b/mindspore/lite/tools/converter/parser/parser_utils.h @@ -30,14 +30,15 @@ void GetAllFuncGraph(const FuncGraphPtr &func_graph, std::set *all int CommonAnfAdjust(const std::set &all_func_graphs); int GetTransposePerm(schema::Format src_format, schema::Format dst_format, std::vector *perm); int GetTransposePermSharing(schema::Format src_format, schema::Format dst_format, std::vector *perm); -int TransposeInsertForWeightConst(const FuncGraphPtr &graph, const CNodePtr &conv_node, const CNodePtr &weight_node, - std::vector perm); -int HandleWeightConst(const FuncGraphPtr &graph, const CNodePtr &conv_node, const CNodePtr &weight_node, - schema::Format src_format, schema::Format dst_format); -int TransposeInsertForWeightSharing(const FuncGraphPtr &graph, int64_t dst_format, int64_t format, - const ParameterPtr &weight_node, std::vector perm); -int HandleWeightSharing(const FuncGraphPtr &graph, int64_t format, const ParameterPtr &weight_node, - schema::Format src_format, schema::Format dst_format); +AnfNodePtr GetRealConvWeightNode(const FuncGraphPtr &graph, const CNodePtr &cnode); +int UnifyConvWeightFormat(const FuncGraphPtr &graph, const CNodePtr &cnode, schema::Format src_format, + schema::Format dst_format, std::set *has_visited); +int UnifyVariableConvWeight(const FuncGraphPtr &graph, const AnfNodePtr &weight_node, schema::Format src_format, + schema::Format dst_format, std::set *has_visited); +int UnifyConstConvWeight(const FuncGraphPtr &graph, const AnfNodePtr &weight_node, schema::Format src_format, + schema::Format dst_format, std::set *has_visited); +int HandleConstConvWeightShared(const FuncGraphPtr &graph, const AnfNodePtr &weight_node, schema::Format src_format, + schema::Format dst_format, std::set *has_visited); } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/tools/converter/parser/tf/tf_model_parser.cc b/mindspore/lite/tools/converter/parser/tf/tf_model_parser.cc index 14d4718c1c1..bea44401e44 100644 --- a/mindspore/lite/tools/converter/parser/tf/tf_model_parser.cc +++ b/mindspore/lite/tools/converter/parser/tf/tf_model_parser.cc @@ -576,150 +576,16 @@ FuncGraphPtr TFModelParser::Parse(const converter::ConverterParameters &flag) { ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status); return nullptr; } - auto unify_format = std::make_shared(lite::converter::FmkType_TF, false); + auto unify_format = std::make_shared(lite::converter::FmkType_TF, false, quant_type_); if (!unify_format->Run(res_graph_)) { MS_LOG(ERROR) << "Run insert transpose failed."; return nullptr; } - if ((status = WeightFormatTransform(res_graph_)) != RET_OK) { - MS_LOG(ERROR) << "WeightFormatTransform failed."; - ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status); - return nullptr; - } res_graph_->set_manager(nullptr); static auto root_func_manager = Manage(res_graph_); return res_graph_; } -STATUS TFModelParser::WeightFormatTransform(const FuncGraphPtr &graph) { - MS_ASSERT(graph != nullptr); - auto node_list = TopoSort(graph->get_return()); - for (auto &node : node_list) { - if (!utils::isa(node)) { - continue; - } - auto conv_cnode = node->cast(); - if (!opt::CheckPrimitiveType(node, prim::kPrimConv2DFusion) && - !opt::CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) && - !opt::CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion)) { - continue; - } - MS_ASSERT(conv_cnode->inputs().size() > kConvWeightIndex); - auto weight_node = conv_cnode->input(kConvWeightIndex); - MS_ASSERT(weight_node != nullptr); - auto tensor_info = opt::GetTensorInfo(weight_node); - auto status = HardCodeTF(conv_cnode, tensor_info, graph); - if (status != lite::RET_OK) { - MS_LOG(ERROR) << "Format hard code failed: " << status << ", node: " << node->fullname_with_scope(); - return RET_ERROR; - } - } - return RET_OK; -} - -STATUS TFModelParser::HardCodeTF(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info, - const FuncGraphPtr &graph) { - MS_ASSERT(conv_cnode != nullptr); - MS_ASSERT(tensor_info != nullptr); - auto prim = GetValueNode(conv_node->input(0)); - if (prim == nullptr) { - MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive."; - return RET_ERROR; - } - bool is_depth_wise = prim->GetAttr(ops::kIsDepthWise) != nullptr && GetValue(prim->GetAttr(ops::kIsDepthWise)); - int64_t format = prim->GetAttr(ops::kFormat) != nullptr ? GetValue(prim->GetAttr(ops::kFormat)) : 0; - schema::Format weight_dst_format = schema::Format::Format_KHWC; - STATUS status = RET_OK; - schema::Format weight_src_format = Format_NUM_OF_FORMAT; - auto weight_node = conv_node->input(kConvWeightIndex); - auto weight_value = opt::GetTensorInfo(weight_node); - switch (quant_type_) { - case QuantType_AwareTraining: - case QuantType_PostTraining: - case QuantType_WeightQuant: - case QuantType_QUANT_NONE: { - if (opt::CheckPrimitiveType(conv_node, prim::kPrimConv2DFusion)) { - if (!is_depth_wise) { - prim->AddAttr(ops::kFormat, MakeValue(weight_dst_format)); - weight_src_format = schema::Format::Format_HWCK; - } else { - prim->AddAttr(ops::kFormat, MakeValue(weight_dst_format)); - weight_src_format = schema::Format::Format_HWKC; - } - } else if (opt::CheckPrimitiveType(conv_node, prim::kPrimConv2dTransposeFusion) && !is_depth_wise) { - prim->AddAttr(ops::kFormat, MakeValue(weight_dst_format)); - weight_src_format = schema::Format::Format_HWCK; - } - if (format == Format_NCHW) { - prim->AddAttr(ops::kFormat, MakeValue(Format_NCHW)); - } else if (format == Format_KHWC) { - prim->AddAttr(ops::kFormat, MakeValue(weight_dst_format)); - weight_src_format = schema::Format::Format_KHWC; - } - } break; - default: { - MS_LOG(ERROR) << "Unsupported op: " << conv_node->fullname_with_scope(); - return lite::RET_ERROR; - } - } - status = DoWeightFormatTransform(conv_node, weight_node, graph, weight_src_format, weight_dst_format); - if (status != RET_OK) { - return RET_ERROR; - } - if (format == Format_NCHW) { - prim->AddAttr(ops::kFormat, MakeValue(Format_NCHW)); - } - return RET_OK; -} - -int TFModelParser::DoWeightFormatTransform(const CNodePtr &conv_node, const AnfNodePtr &weight_node, - const FuncGraphPtr &graph, schema::Format weight_src_format, - schema::Format weight_dst_format) { - auto prim = GetValueNode(conv_node->input(0)); - if (prim == nullptr) { - MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive."; - return RET_ERROR; - } - int64_t format = prim->GetAttr(ops::kFormat) != nullptr ? GetValue(prim->GetAttr(ops::kFormat)) : 0; - - if (utils::isa(weight_node)) { - auto status = - HandleWeightConst(graph, conv_node, weight_node->cast(), weight_src_format, weight_dst_format); - if (status != lite::RET_OK) { - MS_LOG(ERROR) << "handle weight-const failed."; - return RET_ERROR; - } - } - auto weight_value = opt::GetTensorInfo(weight_node); - if (weight_value != nullptr) { - auto status = opt::TransFilterFormat(weight_value, weight_src_format, weight_dst_format); - if (status != RET_OK) { - MS_LOG(ERROR) << "TransFilter " << EnumNameFormat(schema::EnumValuesFormat()[weight_dst_format]) << "To" - << EnumNameFormat(weight_dst_format) << " failed, node : " << conv_node->fullname_with_scope() - << "quant type:" << quant_type_; - return RET_ERROR; - } - auto type_id = static_cast(weight_value->data_type()); - auto shape = weight_value->shape(); - std::vector shape_vector(shape.begin(), shape.end()); - auto abstract = CreateTensorAbstract(shape_vector, type_id); - if (abstract == nullptr) { - MS_LOG(ERROR) << "Create tensor abstarct failed"; - return RET_ERROR; - } - weight_node->set_abstract(abstract); - } - if (utils::isa(weight_node)) { - auto status = - HandleWeightSharing(graph, format, weight_node->cast(), weight_src_format, weight_dst_format); - if (status != lite::RET_OK) { - MS_LOG(ERROR) << "handle weight-sharing failed."; - return RET_ERROR; - } - } - return RET_OK; -} - STATUS TFModelParser::ConvertSubgraphInputs(std::map *tf_sub_node_map, std::unordered_map *anf_sub_node_map, const tensorflow::FunctionDef &tf_sub_fuction, const CNodePtr &cnode, diff --git a/mindspore/lite/tools/converter/parser/tf/tf_model_parser.h b/mindspore/lite/tools/converter/parser/tf/tf_model_parser.h index 2a63210d61f..f0ecc57a254 100644 --- a/mindspore/lite/tools/converter/parser/tf/tf_model_parser.h +++ b/mindspore/lite/tools/converter/parser/tf/tf_model_parser.h @@ -95,13 +95,6 @@ class TFModelParser : public ModelParser { STATUS ConnectNullInput(); - STATUS WeightFormatTransform(const FuncGraphPtr &graph); - - STATUS HardCodeTF(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info, const FuncGraphPtr &graph); - - int DoWeightFormatTransform(const CNodePtr &conv_node, const AnfNodePtr &weight_node, const FuncGraphPtr &graph, - schema::Format weight_src_format, schema::Format weight_dst_format); - std::unique_ptr tf_root_graph_; // tf root graph def std::map tf_root_graph_nodes_; // tf root graph node map std::unordered_map anf_root_node_map_; diff --git a/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.cc b/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.cc index 79e127b2001..7d29bfcb66b 100644 --- a/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.cc +++ b/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.cc @@ -105,128 +105,13 @@ FuncGraphPtr TfliteModelParser::Parse(const converter::ConverterParameters &flag ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status); return nullptr; } - auto unify_format = std::make_shared(lite::converter::FmkType_TFLITE, false); + auto unify_format = std::make_shared(lite::converter::FmkType_TFLITE, false, quant_type_); if (!unify_format->Run(res_graph_)) { MS_LOG(ERROR) << "Run insert transpose failed."; return nullptr; } - if ((status = WeightFormatTransform(res_graph_)) != RET_OK) { - MS_LOG(ERROR) << "WeightFormatTransform failed."; - ReturnCode::GetSingleReturnCode()->UpdateReturnCode(status); - return nullptr; - } return res_graph_; } -STATUS TfliteModelParser::WeightFormatTransform(const FuncGraphPtr &graph) { - MS_ASSERT(graph != nullptr); - auto node_list = TopoSort(graph->get_return()); - for (auto &node : node_list) { - if (!utils::isa(node)) { - continue; - } - auto conv_cnode = node->cast(); - if (!opt::CheckPrimitiveType(node, prim::kPrimConv2DFusion) && - !opt::CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) && - !opt::CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion)) { - continue; - } - MS_ASSERT(conv_cnode->inputs().size() > kConvWeightIndex); - auto weight_node = conv_cnode->input(kConvWeightIndex); - MS_ASSERT(weight_node != nullptr); - auto tensor_info = opt::GetTensorInfo(weight_node); - auto status = HardCodeTflite(conv_cnode, tensor_info, graph); - if (status != lite::RET_OK) { - MS_LOG(ERROR) << "Format hard code failed: " << status << ", node: " << node->fullname_with_scope(); - return RET_ERROR; - } - } - return RET_OK; -} - -STATUS TfliteModelParser::HardCodeTflite(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info, - const FuncGraphPtr &graph) { - MS_ASSERT(conv_cnode != nullptr); - auto prim = GetValueNode(conv_node->input(0)); - if (prim == nullptr) { - MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive."; - return lite::RET_ERROR; - } - bool is_depth_wise = prim->GetAttr(ops::kIsDepthWise) != nullptr && GetValue(prim->GetAttr(ops::kIsDepthWise)); - schema::Format weight_dst_format = schema::Format::Format_KHWC; - STATUS status = RET_OK; - schema::Format weight_src_format = Format_NUM_OF_FORMAT; - auto weight_node = conv_node->input(kConvWeightIndex); - int64_t format = prim->GetAttr(ops::kFormat) != nullptr ? GetValue(prim->GetAttr(ops::kFormat)) : 0; - switch (quant_type_) { - case QuantType_AwareTraining: - case QuantType_PostTraining: - case QuantType_WeightQuant: - case QuantType_QUANT_NONE: { - if (format == KHWC) { - weight_src_format = schema::Format::Format_KHWC; - } else if (opt::CheckPrimitiveType(conv_node, prim::kPrimConv2DFusion)) { - if (!is_depth_wise) { - weight_src_format = schema::Format::Format_KHWC; - } else { - weight_src_format = schema::Format::Format_CHWK; - } - } else if (opt::CheckPrimitiveType(conv_node, prim::kPrimConv2dTransposeFusion) && !is_depth_wise) { - weight_src_format = schema::Format::Format_CHWK; - } - } break; - default: { - MS_LOG(ERROR) << "Unsupported quantType: " << EnumNameQuantType(quant_type_) - << ", node: " << conv_node->fullname_with_scope(); - return RET_ERROR; - } - } - status = DoWeightFormatTransform(conv_node, weight_node, graph, weight_src_format, weight_dst_format); - if (status != RET_OK) { - return RET_ERROR; - } - return lite::RET_OK; -} - -int TfliteModelParser::DoWeightFormatTransform(const CNodePtr &conv_node, const AnfNodePtr &weight_node, - const FuncGraphPtr &graph, schema::Format weight_src_format, - schema::Format weight_dst_format) { - if (utils::isa(weight_node)) { - auto status = - HandleWeightConst(graph, conv_node, weight_node->cast(), weight_src_format, weight_dst_format); - if (status != lite::RET_OK) { - MS_LOG(ERROR) << "handle weight-const failed."; - return RET_ERROR; - } - } - auto weight_value = opt::GetTensorInfo(weight_node); - if (weight_value != nullptr) { - auto status = opt::TransFilterFormat(weight_value, weight_src_format, weight_dst_format); - if (status != RET_OK) { - MS_LOG(ERROR) << "TransFilter " << EnumNameFormat(schema::EnumValuesFormat()[weight_dst_format]) << "To" - << EnumNameFormat(weight_dst_format) << " failed, node : " << conv_node->fullname_with_scope() - << "quant type:" << quant_type_; - return RET_ERROR; - } - auto type_id = static_cast(weight_value->data_type()); - auto shape = weight_value->shape(); - std::vector shape_vector(shape.begin(), shape.end()); - auto abstract = lite::CreateTensorAbstract(shape_vector, type_id); - if (abstract == nullptr) { - MS_LOG(ERROR) << "Create tensor abstarct failed"; - return RET_ERROR; - } - weight_node->set_abstract(abstract); - } - if (utils::isa(weight_node)) { - auto status = - HandleWeightSharing(graph, KHWC, weight_node->cast(), weight_src_format, weight_dst_format); - if (status != lite::RET_OK) { - MS_LOG(ERROR) << "handle weight-sharing failed."; - return RET_ERROR; - } - } - return RET_OK; -} std::string GetTensorName(size_t index, const tflite::BuiltinOperator &op_type, const std::string &op_name) { std::string tensor_name = op_name + "/input-" + std::to_string(index); diff --git a/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.h b/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.h index b45c2ee033c..78d8b22d2e8 100644 --- a/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.h +++ b/mindspore/lite/tools/converter/parser/tflite/tflite_model_parser.h @@ -52,10 +52,6 @@ class TfliteModelParser : public ModelParser { STATUS ConvertGraphOutputs(); static STATUS SetTensorQuantParam(const tflite::TensorT *tflite_tensor, std::vector *quant_params, int round_type = 1); - int DoWeightFormatTransform(const CNodePtr &conv_node, const AnfNodePtr &weight_node, const FuncGraphPtr &graph, - schema::Format weight_src_format, schema::Format weight_dst_format); - STATUS WeightFormatTransform(const FuncGraphPtr &graph); - STATUS HardCodeTflite(const CNodePtr &conv_node, const tensor::TensorPtr &tensor_info, const FuncGraphPtr &graph); QuantType quant_type_ = schema::QuantType_QUANT_NONE; }; } // namespace lite diff --git a/mindspore/lite/tools/converter/parser/unify_format.cc b/mindspore/lite/tools/converter/parser/unify_format.cc index 29ea6005d41..f3a07842db2 100644 --- a/mindspore/lite/tools/converter/parser/unify_format.cc +++ b/mindspore/lite/tools/converter/parser/unify_format.cc @@ -15,14 +15,162 @@ */ #include "tools/converter/parser/unify_format.h" +#include namespace mindspore { namespace lite { namespace { constexpr int kInputChannal = 3; +STATUS DecideMINDIRConvWeightSrcFormat(const CNodePtr &cnode, schema::QuantType quant_type, + schema::Format *src_format) { + MS_ASSERT(cnode != nullptr && src_format != nullptr); + auto prim = GetValueNode(cnode->input(0)); + if (prim == nullptr) { + MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive."; + return lite::RET_ERROR; + } + int64_t format = prim->GetAttr(ops::kFormat) != nullptr ? GetValue(prim->GetAttr(ops::kFormat)) : 0; + if (format == schema::Format_NHWC) { + *src_format = schema::Format_KHWC; + } else if (format == schema::Format_NCHW) { + *src_format = schema::Format_KCHW; + } else { + MS_LOG(ERROR) << "cnode format is invalid."; + return RET_ERROR; + } + return RET_OK; } -void UnifyFormatToNHWC::GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) { - MS_ASSERT(cnode != nullptr); + +STATUS DecideTFConvWeightSrcFormat(const CNodePtr &cnode, schema::QuantType quant_type, schema::Format *src_format) { + MS_ASSERT(cnode != nullptr && src_format != nullptr); + auto prim = GetValueNode(cnode->input(0)); + if (prim == nullptr) { + MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive."; + return lite::RET_ERROR; + } + bool is_depth_wise = prim->GetAttr(ops::kIsDepthWise) != nullptr && GetValue(prim->GetAttr(ops::kIsDepthWise)); + switch (quant_type) { + case QuantType_AwareTraining: + case QuantType_PostTraining: + case QuantType_WeightQuant: + case QuantType_QUANT_NONE: { + if (opt::CheckPrimitiveType(cnode, prim::kPrimConv2DFusion)) { + if (!is_depth_wise) { + *src_format = schema::Format_HWCK; + } else { + *src_format = schema::Format_HWKC; + } + } else if (opt::CheckPrimitiveType(cnode, prim::kPrimConv2dTransposeFusion) && !is_depth_wise) { + *src_format = schema::Format::Format_HWCK; + } else { + MS_LOG(ERROR) << "depthwise-conv2dTranspose need to check."; + return RET_ERROR; + } + } break; + default: { + MS_LOG(ERROR) << "Unsupported op: " << cnode->fullname_with_scope(); + return lite::RET_ERROR; + } + } + return RET_OK; +} + +STATUS DecideTFLITEConvWeightSrcFormat(const CNodePtr &cnode, schema::QuantType quant_type, + schema::Format *src_format) { + MS_ASSERT(cnode != nullptr && src_format != nullptr); + auto prim = GetValueNode(cnode->input(0)); + if (prim == nullptr) { + MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive."; + return lite::RET_ERROR; + } + bool is_depth_wise = prim->GetAttr(ops::kIsDepthWise) != nullptr && GetValue(prim->GetAttr(ops::kIsDepthWise)); + switch (quant_type) { + case QuantType_AwareTraining: + case QuantType_PostTraining: + case QuantType_WeightQuant: + case QuantType_QUANT_NONE: { + if (opt::CheckPrimitiveType(cnode, prim::kPrimConv2DFusion)) { + if (!is_depth_wise) { + *src_format = schema::Format_KHWC; + } else { + *src_format = schema::Format_CHWK; + } + } else if (opt::CheckPrimitiveType(cnode, prim::kPrimConv2dTransposeFusion) && !is_depth_wise) { + *src_format = schema::Format_CHWK; + } else { + MS_LOG(ERROR) << "cannot decide weight format, current situation need to check."; + return RET_NOT_SUPPORT; + } + } break; + default: { + MS_LOG(ERROR) << "Unsupported quantType: " << EnumNameQuantType(quant_type) + << ", node: " << cnode->fullname_with_scope(); + return RET_ERROR; + } + } + return RET_OK; +} + +STATUS DecideCAFFEConvWeightSrcFormat(const CNodePtr &cnode, schema::QuantType quant_type, schema::Format *src_format) { + MS_ASSERT(cnode != nullptr && src_format != nullptr); + *src_format = schema::Format_KCHW; + return RET_OK; +} + +STATUS DecideONNXConvWeightSrcFormat(const CNodePtr &cnode, schema::QuantType quant_type, schema::Format *src_format) { + MS_ASSERT(cnode != nullptr && src_format != nullptr); + auto prim = GetValueNode(cnode->input(0)); + if (prim == nullptr) { + MS_LOG(ERROR) << "Invalid anfnode, which don't have primitive."; + return lite::RET_ERROR; + } + bool is_depth_wise = prim->GetAttr(ops::kIsDepthWise) != nullptr && GetValue(prim->GetAttr(ops::kIsDepthWise)); + int64_t format = prim->GetAttr(ops::kFormat) != nullptr ? GetValue(prim->GetAttr(ops::kFormat)) : 0; + switch (quant_type) { + case QuantType_AwareTraining: { + if (opt::CheckPrimitiveType(cnode, prim::kPrimConv2DFusion)) { + if (!is_depth_wise) { + *src_format = schema::Format_KHWC; + } else { + *src_format = schema::Format_CHWK; + } + } else if (opt::CheckPrimitiveType(cnode, prim::kPrimConv2dTransposeFusion) && !is_depth_wise) { + *src_format = schema::Format_KCHW; + } else { + MS_LOG(ERROR) << "Unsupported op: " << cnode->fullname_with_scope(); + return lite::RET_ERROR; + } + } break; + case QuantType_PostTraining: + case QuantType_WeightQuant: + case QuantType_QUANT_NONE: { + if (opt::CheckPrimitiveType(cnode, prim::kPrimConv2DFusion) || + opt::CheckPrimitiveType(cnode, prim::kPrimConv2dTransposeFusion)) { + if (format == schema::Format_NHWC) { + *src_format = schema::Format_KHWC; + } else if (format == schema::Format_NCHW) { + *src_format = schema::Format_KCHW; + } else { + MS_LOG(ERROR) << "format is invalid, format is " << format; + return RET_ERROR; + } + } else { + MS_LOG(ERROR) << "d an unsupported op type, which need to check. the type is " << prim->name(); + return RET_NOT_SUPPORT; + } + } break; + default: { + MS_LOG(ERROR) << "Unsupported quantType: " << EnumNameQuantType(quant_type) + << ", node: " << cnode->fullname_with_scope(); + return lite::RET_ERROR; + } + } + return RET_OK; +} +} // namespace + +STATUS UnifyFormatToNHWC::GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) { + MS_ASSERT(cnode != nullptr && trans_info != nullptr); auto prim_node = cnode->input(0); auto prim = GetValueNode(prim_node); MS_ASSERT(prim != nullptr); @@ -30,7 +178,7 @@ void UnifyFormatToNHWC::GetTransNodeFormatType(const CNodePtr &cnode, opt::Trans auto &specify_nchw_op_map = opt::GetNCHWOpMap(); if (fmk_type_ == lite::converter::FmkType_TFLITE) { if (specify_nchw_op_map.find(prim->name()) == specify_nchw_op_map.end()) { - return; + return lite::RET_OK; } trans_info->pre_ = opt::kNHWC2NCHW; trans_info->post_ = opt::kNCHW2NHWC; @@ -47,12 +195,13 @@ void UnifyFormatToNHWC::GetTransNodeFormatType(const CNodePtr &cnode, opt::Trans if (specify_nhwc_op_map.find(prim->name()) != specify_nhwc_op_map.end()) { if (fmk_type_ == lite::converter::FmkType_ONNX && prim->GetAttr(ops::kFormat) != nullptr && GetValue(prim->GetAttr(ops::kFormat)) == NHWC) { - return; + return lite::RET_OK; } trans_info->pre_ = opt::kNCHW2NHWC; trans_info->post_ = opt::kNHWC2NCHW; } } + return lite::RET_OK; } void UnifyFormatToNHWC::SetSensitiveOps() { @@ -63,6 +212,7 @@ void UnifyFormatToNHWC::SetSensitiveOps() { } bool UnifyFormatToNHWC::DecideWhetherHandleGraphInput(const FuncGraphPtr &func_graph, const ShapeVector &shape) { + MS_ASSERT(func_graph != nullptr); if (fmk_type_ == converter::FmkType_TF || fmk_type_ == converter::FmkType_TFLITE) { return false; } @@ -74,5 +224,29 @@ bool UnifyFormatToNHWC::DecideWhetherHandleGraphInput(const FuncGraphPtr &func_g } bool UnifyFormatToNHWC::DecideWhetherInferShapeForNewNode() { return false; } + +STATUS UnifyFormatToNHWC::DecideConvWeightSrcAndDstFormat(const CNodePtr &cnode, schema::Format *src_format, + schema::Format *dst_format) { + MS_ASSERT(cnode != nullptr && src_format != nullptr && dst_format != nullptr); + *dst_format = schema::Format_KHWC; + std::map> + decide_functions = {{converter::FmkType_MS, DecideMINDIRConvWeightSrcFormat}, + {converter::FmkType_TF, DecideTFConvWeightSrcFormat}, + {converter::FmkType_TFLITE, DecideTFLITEConvWeightSrcFormat}, + {converter::FmkType_CAFFE, DecideCAFFEConvWeightSrcFormat}, + {converter::FmkType_ONNX, DecideONNXConvWeightSrcFormat}}; + auto iter = decide_functions.find(fmk_type_); + if (iter == decide_functions.end()) { + MS_LOG(ERROR) << "current fmk don't support, please check."; + return RET_NOT_SUPPORT; + } + auto decide_func = iter->second; + MS_ASSERT(decide_func != nullptr); + if (decide_func(cnode, quant_type_, src_format) != RET_OK) { + MS_LOG(ERROR) << "run decide function failed, cannot decide conv weight format."; + return RET_ERROR; + } + return RET_OK; +} } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/tools/converter/parser/unify_format.h b/mindspore/lite/tools/converter/parser/unify_format.h index 49f20f44c4f..1ef43187504 100644 --- a/mindspore/lite/tools/converter/parser/unify_format.h +++ b/mindspore/lite/tools/converter/parser/unify_format.h @@ -24,15 +24,19 @@ namespace mindspore { namespace lite { class UnifyFormatToNHWC : public opt::ToFormatBase { public: - explicit UnifyFormatToNHWC(FmkType fmk_type = lite::converter::FmkType_MS, bool train_flag = false) - : ToFormatBase(fmk_type, train_flag) {} + explicit UnifyFormatToNHWC(FmkType fmk_type = lite::converter::FmkType_MS, bool train_flag = false, + schema::QuantType quant_type = schema::QuantType_QUANT_NONE) + : ToFormatBase(fmk_type, train_flag), quant_type_(quant_type) {} ~UnifyFormatToNHWC() override = default; private: - void GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) override; + STATUS GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) override; void SetSensitiveOps() override; bool DecideWhetherHandleGraphInput(const FuncGraphPtr &func_graph, const ShapeVector &shape) override; bool DecideWhetherInferShapeForNewNode() override; + STATUS DecideConvWeightSrcAndDstFormat(const CNodePtr &cnode, schema::Format *src_format, + schema::Format *dst_format) override; + schema::QuantType quant_type_{schema::QuantType_QUANT_NONE}; }; } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc index 6e5376b90dd..6e4bce06232 100644 --- a/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc +++ b/mindspore/lite/tools/converter/quantizer/post_training_quantizer.cc @@ -1454,7 +1454,10 @@ KernelCallBack PostTrainingQuantizer::GetBeforeCallBack(bool int8_op) { auto tensor = beforeInputs[0]; MS_ASSERT(tensor != nullptr); auto lite_tensor = dynamic_cast(tensor); - MS_ASSERT(lite_tensor != nullptr); + if (lite_tensor == nullptr) { + MS_LOG(ERROR) << "Before inputs is not a lite::Tensor"; + return false; + } if (tensor->data_type() != kNumberTypeInt8) { MS_LOG(ERROR) << "unexpected tensor type: " << tensor->data_type(); return false; @@ -1513,7 +1516,10 @@ KernelCallBack PostTrainingQuantizer::GetInt8AfterCallBack() { auto tensor = afterOutputs[0]; MS_ASSERT(tensor != nullptr); auto lite_tensor = dynamic_cast(tensor); - MS_ASSERT(lite_tensor != nullptr); + if (lite_tensor == nullptr) { + MS_LOG(ERROR) << "Before inputs is not a lite::Tensor"; + return false; + } if (tensor->data_type() != kNumberTypeInt8) { MS_LOG(ERROR) << "unexpected tensor type: " << tensor->data_type(); return false; diff --git a/mindspore/lite/tools/converter/quantizer/quant_cast.cc b/mindspore/lite/tools/converter/quantizer/quant_cast.cc index 6e05fdcced2..82dca0ec3c5 100644 --- a/mindspore/lite/tools/converter/quantizer/quant_cast.cc +++ b/mindspore/lite/tools/converter/quantizer/quant_cast.cc @@ -41,14 +41,13 @@ STATUS QuantCast::Run(const FuncGraphPtr &graph) { auto cnodes = graph->GetOrderedCnodes(); for (auto &cnode : cnodes) { auto primitive_c = GetValueNode>(cnode->input(0)); - auto primitive_quant_param_holder = GetCNodeQuantHolder(primitive_c); - MS_ASSERT(primitive_quant_param_holder != nullptr); - auto curnode_quant_type = schema::QuantType_QUANT_NONE; if (primitive_c == nullptr) { MS_LOG(WARNING) << "primitive_c is nullptr: " << cnode->fullname_with_scope(); - } else { - curnode_quant_type = primitive_quant_param_holder->quant_type(); + continue; } + auto primitive_quant_param_holder = GetCNodeQuantHolder(primitive_c); + MS_ASSERT(primitive_quant_param_holder != nullptr); + auto curnode_quant_type = primitive_quant_param_holder->quant_type(); if (primitive_c->name() == ops::kNameGather) { continue; } diff --git a/mindspore/lite/tools/converter/quantizer/quant_helper/attention_quant_type_determiner.cc b/mindspore/lite/tools/converter/quantizer/quant_helper/attention_quant_type_determiner.cc index 980e88dcf2f..f9cf72ca306 100644 --- a/mindspore/lite/tools/converter/quantizer/quant_helper/attention_quant_type_determiner.cc +++ b/mindspore/lite/tools/converter/quantizer/quant_helper/attention_quant_type_determiner.cc @@ -19,14 +19,19 @@ #include "mindspore/core/utils/log_adapter.h" #include "mindspore/core/ir/dtype/type_id.h" namespace mindspore::lite { +const size_t kWeightQueryIndex = 4; +const size_t kWeightKeyIndex = 5; +const size_t kWeightValueIndex = 6; +const size_t kWeightOutputIndex = 10; + bool AttentionQuantTypeDeterminer::DetermineQuantWeight(const mindspore::schema::MetaGraphT &graph, mindspore::schema::CNodeT *node) { MS_ASSERT(node->inputIndex.size() >= 2); auto &input_tensor = graph.allTensors.at(node->inputIndex.at(kInputIndex)); - auto &weight_query_tensor = graph.allTensors.at(node->inputIndex.at(4)); - auto &weight_key_tensor = graph.allTensors.at(node->inputIndex.at(5)); - auto &weight_value_tensor = graph.allTensors.at(node->inputIndex.at(6)); - auto &weight_output_tensor = graph.allTensors.at(node->inputIndex.at(10)); + auto &weight_query_tensor = graph.allTensors.at(node->inputIndex.at(kWeightQueryIndex)); + auto &weight_key_tensor = graph.allTensors.at(node->inputIndex.at(kWeightKeyIndex)); + auto &weight_value_tensor = graph.allTensors.at(node->inputIndex.at(kWeightValueIndex)); + auto &weight_output_tensor = graph.allTensors.at(node->inputIndex.at(kWeightOutputIndex)); if (!quant::TensorQuantParamsInited(*input_tensor) && quant::TensorQuantParamsInited(*weight_query_tensor) && quant::TensorQuantParamsInited(*weight_key_tensor) && quant::TensorQuantParamsInited(*weight_value_tensor) && diff --git a/mindspore/lite/tools/converter/quantizer/quant_helper/conv_quant_param_propogator.cc b/mindspore/lite/tools/converter/quantizer/quant_helper/conv_quant_param_propogator.cc index 006871f0fb5..3e6ae8a22dd 100644 --- a/mindspore/lite/tools/converter/quantizer/quant_helper/conv_quant_param_propogator.cc +++ b/mindspore/lite/tools/converter/quantizer/quant_helper/conv_quant_param_propogator.cc @@ -58,5 +58,4 @@ STATUS ConvQuantParamPropogator::PropogateQuantParams(mindspore::schema::MetaGra } return RET_OK; } - } // namespace mindspore::lite diff --git a/mindspore/lite/tools/converter/quantizer/quant_helper/default_quant_all_quant_type_determiner.cc b/mindspore/lite/tools/converter/quantizer/quant_helper/default_quant_all_quant_type_determiner.cc index 2783bb08929..40a676c3f8f 100644 --- a/mindspore/lite/tools/converter/quantizer/quant_helper/default_quant_all_quant_type_determiner.cc +++ b/mindspore/lite/tools/converter/quantizer/quant_helper/default_quant_all_quant_type_determiner.cc @@ -16,7 +16,6 @@ #include "tools/converter/quantizer/quant_helper/default_quant_all_quant_type_determiner.h" namespace mindspore::lite { - bool DefaultQuantAllQuantTypeDeterminer::DetermineQuantAll(const schema::MetaGraphT &graph, schema::CNodeT *node) { return true; } diff --git a/mindspore/lite/tools/converter/quantizer/quant_helper/only_need_inputs_quant_type_determiner.cc b/mindspore/lite/tools/converter/quantizer/quant_helper/only_need_inputs_quant_type_determiner.cc index b32b338efed..bae725fa398 100644 --- a/mindspore/lite/tools/converter/quantizer/quant_helper/only_need_inputs_quant_type_determiner.cc +++ b/mindspore/lite/tools/converter/quantizer/quant_helper/only_need_inputs_quant_type_determiner.cc @@ -16,7 +16,6 @@ #include "tools/converter/quantizer/quant_helper/only_need_inputs_quant_type_determiner.h" namespace mindspore::lite { - bool OnlyNeedInputsQuantTypeDeterminer::DetermineQuantAll(const schema::MetaGraphT &graph, schema::CNodeT *node) { UpdateQuantParamsNum(graph, *node); if (input_inited_quant_params_ == node->inputIndex.size()) { diff --git a/mindspore/lite/tools/converter/quantizer/quant_helper/quant_node_helper.cc b/mindspore/lite/tools/converter/quantizer/quant_helper/quant_node_helper.cc index fea5dab604a..283ef442e03 100644 --- a/mindspore/lite/tools/converter/quantizer/quant_helper/quant_node_helper.cc +++ b/mindspore/lite/tools/converter/quantizer/quant_helper/quant_node_helper.cc @@ -142,5 +142,4 @@ QuantHelperRegister::~QuantHelperRegister() { } this->register_map_.clear(); } - } // namespace mindspore::lite diff --git a/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc b/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc index 7d3b6f5be16..53ff184e15e 100644 --- a/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc +++ b/mindspore/lite/tools/converter/quantizer/weight_quantizer.cc @@ -143,7 +143,7 @@ STATUS WeightQuantizer::DoMulQuantize(const CNodePtr &cnode) { auto status = RET_ERROR; auto per_channel = true; - if (i == 3) { + if (i == kInputSize2) { per_channel = false; } if (type_id_ == kNumberTypeInt8) { diff --git a/mindspore/lite/tools/converter/registry/model_parser_registry.cc b/mindspore/lite/tools/converter/registry/model_parser_registry.cc index 93796131f3c..975df7a09e0 100644 --- a/mindspore/lite/tools/converter/registry/model_parser_registry.cc +++ b/mindspore/lite/tools/converter/registry/model_parser_registry.cc @@ -46,6 +46,5 @@ int ModelParserRegistry::RegParser(const FmkType fmk, ModelParserCreator creator instance->parsers_[fmk] = creator; return RET_OK; } - } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/tools/cropper/build_cropper_config.sh b/mindspore/lite/tools/cropper/build_cropper_config.sh index 832c17ca656..feba431a9ab 100644 --- a/mindspore/lite/tools/cropper/build_cropper_config.sh +++ b/mindspore/lite/tools/cropper/build_cropper_config.sh @@ -3,7 +3,8 @@ CURRENT_PATH=$(pwd) MINDSPORE_HOME="${CURRENT_PATH}/../../../.." echo "MINDSPORE_HOME path is ${MINDSPORE_HOME}" -CROPPER_OUTPUT_DIR=${MINDSPORE_HOME}/mindspore/lite/build/tools/cropper +cd "${MINDSPORE_HOME}" || exit 1 +CROPPER_OUTPUT_DIR=mindspore/lite/build/tools/cropper mkdir -p ${CROPPER_OUTPUT_DIR} MAPPING_OUTPUT_FILE_NAME_TMP=${CROPPER_OUTPUT_DIR}/cropper_mapping_tmp.cfg CPU_MAPPING_OUTPUT_FILE=${CROPPER_OUTPUT_DIR}/cropper_mapping_cpu.cfg @@ -22,7 +23,7 @@ if [ ${MSLIBS_CACHE_PATH} ]; then FLATBUFFERS=${FLATBUFFERS_LIST[0]} echo "FLATBUFFERS path is ${FLATBUFFERS}" else - FLATBUFFERS=$(ls -d ${MINDSPORE_HOME}/mindspore/lite/build/.mslib/flatbuffers_*/include) + FLATBUFFERS=$(ls -d mindspore/lite/build/.mslib/flatbuffers_*/include) echo "FLATBUFFERS path is ${FLATBUFFERS}" fi @@ -103,7 +104,6 @@ getOpsFile() { getCommonFile() { echo "start get common files" - cd "${MINDSPORE_HOME}" || exit 1 include_h=() while IFS='' read -r line; do include_h+=("$line"); done < <(ls mindspore/lite/include/*.h) regist_include_h=() @@ -142,33 +142,33 @@ getCommonFile() { done cxx_api_files=() - while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/cxx_api/graph/*.cc) - while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/cxx_api/model/*.cc) - while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/cxx_api/tensor/*.cc) - while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/cxx_api/*.cc) + while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls mindspore/lite/src/cxx_api/graph/*.cc) + while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls mindspore/lite/src/cxx_api/model/*.cc) + while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls mindspore/lite/src/cxx_api/tensor/*.cc) + while IFS='' read -r line; do cxx_api_files+=("$line"); done < <(ls mindspore/lite/src/cxx_api/*.cc) mindrt_files=() - while IFS='' read -r line; do mindrt_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/core/mindrt/src/*.cc) - while IFS='' read -r line; do mindrt_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/core/mindrt/src/async/*.cc) - while IFS='' read -r line; do mindrt_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/core/mindrt/src/actor/*.cc) + while IFS='' read -r line; do mindrt_files+=("$line"); done < <(ls mindspore/core/mindrt/src/*.cc) + while IFS='' read -r line; do mindrt_files+=("$line"); done < <(ls mindspore/core/mindrt/src/async/*.cc) + while IFS='' read -r line; do mindrt_files+=("$line"); done < <(ls mindspore/core/mindrt/src/actor/*.cc) src_files=() - while IFS='' read -r line; do src_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/*.cc) + while IFS='' read -r line; do src_files+=("$line"); done < <(ls mindspore/lite/src/*.cc) regist_files=() - while IFS='' read -r line; do regist_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/registry/*.cc) + while IFS='' read -r line; do regist_files+=("$line"); done < <(ls mindspore/lite/src/registry/*.cc) common_files=() - while IFS='' read -r line; do common_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/common/*.cc) + while IFS='' read -r line; do common_files+=("$line"); done < <(ls mindspore/lite/src/common/*.cc) runtime_files_cc=() - while IFS='' read -r line; do runtime_files_cc+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/runtime/*.cc) + while IFS='' read -r line; do runtime_files_cc+=("$line"); done < <(ls mindspore/lite/src/runtime/*.cc) # sava all assembly files assembly_files=() - while IFS='' read -r line; do assembly_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/*/*.S) + while IFS='' read -r line; do assembly_files+=("$line"); done < <(ls mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/assembly/*/*.S) others_files_c=( - "${MINDSPORE_HOME}"/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/nnacl_utils.c - "${MINDSPORE_HOME}"/mindspore/lite/src/runtime/kernel/arm/fp16/common_fp16.cc - "${MINDSPORE_HOME}"/mindspore/lite/src/runtime/infer_manager.cc - "${MINDSPORE_HOME}"/mindspore/lite/src/ops/populate/populate_register.cc - "${MINDSPORE_HOME}"/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.c - "${MINDSPORE_HOME}"/mindspore/core/utils/status.cc - "${MINDSPORE_HOME}"/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.c + mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/nnacl_utils.c + mindspore/lite/src/runtime/kernel/arm/fp16/common_fp16.cc + mindspore/lite/src/runtime/infer_manager.cc + mindspore/lite/src/ops/populate/populate_register.cc + mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/infer_register.c + mindspore/core/utils/status.cc + mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer/common_infer.c ) all_files=("${src_files[@]}" "${regist_files[@]}" "${common_files[@]}" "${runtime_files_cc[@]}" "${others_files_c[@]}" "${assembly_files[@]}" "${mindrt_files[@]}" @@ -238,11 +238,11 @@ getOpsFileWithNoDeepSearch() { local depend_file=("${ret}" "${ret_h}") for array_file in ${depend_file[@]}; do # only add existing files - if [[ -e ${MINDSPORE_HOME}/mindspore/lite/${array_file%h*}cc ]]; then + if [[ -e mindspore/lite/${array_file%h*}cc ]]; then array_file_split=$(echo ${array_file} | awk -F '/' '{print $NF}') echo "${type},${3},${array_file_split%h*}cc.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP} fi - if [[ -e ${MINDSPORE_HOME}/mindspore/lite/${array_file%h*}c ]]; then + if [[ -e mindspore/lite/${array_file%h*}c ]]; then array_file_split=$(echo ${array_file} | awk -F '/' '{print $NF}') echo "${type},${3},${array_file_split%h*}c.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP} fi @@ -255,7 +255,7 @@ getOpsFileWithNoDeepSearch() { generateOpsList() { echo "start generate operator list" ops_list=() - while IFS='' read -r line; do ops_list+=("$line"); done < <(grep -Rn "^table" "${MINDSPORE_HOME}/mindspore/lite/schema/ops.fbs" | awk -F ' ' '{print $2}') + while IFS='' read -r line; do ops_list+=("$line"); done < <(grep -Rn "^table" "mindspore/lite/schema/ops.fbs" | awk -F ' ' '{print $2}') ops_num=$((${#ops_list[@]})) echo "ops nums:${ops_num}" } @@ -263,15 +263,16 @@ echo "Start getting all file associations." generateOpsList getCommonFile wait +sleep 1 # get src/ops -getOpsFile "REG_POPULATE\(PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/ops/populate" "prototype" & -getOpsFile "REG_INFER\(.*?, PrimType_" "${MINDSPORE_HOME}/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer" "prototype" & +getOpsFile "REG_POPULATE\(PrimitiveType_" "mindspore/lite/src/ops/populate" "prototype" & +getOpsFile "REG_INFER\(.*?, PrimType_" "mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/infer" "prototype" & # support for cpu -getOpsFile "REG_KERNEL\(.*?, kNumberTypeFloat32, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/arm" "kNumberTypeFloat32" & -getOpsFile "REG_KERNEL\(.*?, kNumberTypeFloat16, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/arm" "kNumberTypeFloat16" & -getOpsFile "REG_KERNEL\(.*?, kNumberTypeInt8, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/arm" "kNumberTypeInt8" & -getOpsFile "REG_KERNEL\(.*?, kNumberTypeInt32, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/arm" "kNumberTypeInt32" & -getOpsFile "REG_KERNEL\(.*?, kNumberTypeBool, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/arm" "kNumberTypeInt32" & +getOpsFile "REG_KERNEL\(.*?, kNumberTypeFloat32, PrimitiveType_" "mindspore/lite/src/runtime/kernel/arm" "kNumberTypeFloat32" & +getOpsFile "REG_KERNEL\(.*?, kNumberTypeFloat16, PrimitiveType_" "mindspore/lite/src/runtime/kernel/arm" "kNumberTypeFloat16" & +getOpsFile "REG_KERNEL\(.*?, kNumberTypeInt8, PrimitiveType_" "mindspore/lite/src/runtime/kernel/arm" "kNumberTypeInt8" & +getOpsFile "REG_KERNEL\(.*?, kNumberTypeInt32, PrimitiveType_" "mindspore/lite/src/runtime/kernel/arm" "kNumberTypeInt32" & +getOpsFile "REG_KERNEL\(.*?, kNumberTypeBool, PrimitiveType_" "mindspore/lite/src/runtime/kernel/arm" "kNumberTypeInt32" & wait sleep 1 # remove duplicate files @@ -280,12 +281,12 @@ chmod 444 ${CPU_MAPPING_OUTPUT_FILE} # support for gpu opencl_files=() -while IFS='' read -r line; do opencl_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/*.cc) -while IFS='' read -r line; do opencl_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/runtime/gpu/*.cc) -while IFS='' read -r line; do opencl_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/runtime/gpu/opencl/*.cc) +while IFS='' read -r line; do opencl_files+=("$line"); done < <(ls mindspore/lite/src/runtime/kernel/opencl/*.cc) +while IFS='' read -r line; do opencl_files+=("$line"); done < <(ls mindspore/lite/src/runtime/gpu/*.cc) +while IFS='' read -r line; do opencl_files+=("$line"); done < <(ls mindspore/lite/src/runtime/gpu/opencl/*.cc) opencl_others_files=( - "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc" - "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc" + "mindspore/lite/src/runtime/kernel/opencl/kernel/fusion_eltwise.cc" + "mindspore/lite/src/runtime/kernel/opencl/kernel/to_format.cc" ) opencl_files=("${opencl_files[@]}" "${opencl_others_files[@]}") # shellcheck disable=SC2068 @@ -294,11 +295,11 @@ for file in ${opencl_files[@]}; do echo "CommonFile,common,${file}.o" >>${MAPPING_OUTPUT_FILE_NAME_TMP} done -getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeFloat32, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeFloat32" & -getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeFloat16, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeFloat16" & -getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeInt8, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeInt8" & -getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeInt32, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeInt32" & -getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeBool, PrimitiveType_" "${MINDSPORE_HOME}/mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeInt32" & +getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeFloat32, PrimitiveType_" "mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeFloat32" & +getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeFloat16, PrimitiveType_" "mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeFloat16" & +getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeInt8, PrimitiveType_" "mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeInt8" & +getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeInt32, PrimitiveType_" "mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeInt32" & +getOpsFileWithNoDeepSearch "REG_KERNEL\(.*?, kNumberTypeBool, PrimitiveType_" "mindspore/lite/src/runtime/kernel/opencl/kernel" "kNumberTypeInt32" & sleep 1 wait sort ${MAPPING_OUTPUT_FILE_NAME_TMP} | uniq >${GPU_MAPPING_OUTPUT_FILE} @@ -306,10 +307,10 @@ chmod 444 ${GPU_MAPPING_OUTPUT_FILE} # support for npu npu_files=() -while IFS='' read -r line; do npu_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/delegate/delegate.cc) -while IFS='' read -r line; do npu_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/delegate/npu/*.cc) -while IFS='' read -r line; do npu_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/delegate/npu/op/*.cc) -while IFS='' read -r line; do npu_files+=("$line"); done < <(ls ${MINDSPORE_HOME}/mindspore/lite/src/delegate/npu/pass/*.cc) +while IFS='' read -r line; do npu_files+=("$line"); done < <(ls mindspore/lite/src/delegate/delegate.cc) +while IFS='' read -r line; do npu_files+=("$line"); done < <(ls mindspore/lite/src/delegate/npu/*.cc) +while IFS='' read -r line; do npu_files+=("$line"); done < <(ls mindspore/lite/src/delegate/npu/op/*.cc) +while IFS='' read -r line; do npu_files+=("$line"); done < <(ls mindspore/lite/src/delegate/npu/pass/*.cc) # shellcheck disable=SC2068 for file in ${npu_files[@]}; do diff --git a/mindspore/lite/tools/cropper/cropper.cc b/mindspore/lite/tools/cropper/cropper.cc index 07d6d0a4b1a..e121cd40d6c 100644 --- a/mindspore/lite/tools/cropper/cropper.cc +++ b/mindspore/lite/tools/cropper/cropper.cc @@ -126,7 +126,7 @@ int Cropper::GetModelOps() { int Cropper::GetModelFiles() { if (!this->flags_->model_file_.empty()) { - auto files = StringSplit(this->flags_->model_file_, std::string(kDelimComma)); + auto files = StrSplit(this->flags_->model_file_, std::string(kDelimComma)); for (const auto &file : files) { if (ValidFileSuffix(file, "ms") != RET_OK) { return RET_INPUT_PARAM_INVALID; @@ -177,7 +177,7 @@ int Cropper::GetOpMatchFiles() { while (!in_file.eof()) { in_file.getline(buf, kBufSize); std::string buf_str = buf; - auto mapping = StringSplit(buf_str, kDelimComma); + auto mapping = StrSplit(buf_str, kDelimComma); if (!mapping.empty()) { std::string primitive = mapping.at(0); std::string type = mapping.at(1); diff --git a/mindspore/lite/tools/dataset/cropper/build_lib.py b/mindspore/lite/tools/dataset/cropper/build_lib.py index 8d34137bfcc..ba295b2245b 100644 --- a/mindspore/lite/tools/dataset/cropper/build_lib.py +++ b/mindspore/lite/tools/dataset/cropper/build_lib.py @@ -124,7 +124,8 @@ def main(): if not user_ops: warnings.warn('No MindData Ops detected in your code...') remove_unused_objects([], [], all_object_files) - with open(os.path.join(OBJECTS_DIR, ALL_DEPS_FILENAME), 'w') as _: + with os.fdopen(os.open(os.path.join(OBJECTS_DIR, ALL_DEPS_FILENAME), os.O_WRONLY | os.O_CREAT, 0o660), + "w+") as _: pass exit(0) @@ -141,7 +142,8 @@ def main(): remove_unused_objects(final_deps, ESSENTIAL_OBJECTS, all_object_files) # write all dependencies to the file (for extracting external ones) - with open(os.path.join(OBJECTS_DIR, ALL_DEPS_FILENAME), 'w') as fout: + with os.fdopen(os.open(os.path.join(OBJECTS_DIR, ALL_DEPS_FILENAME), os.O_WRONLY | os.O_CREAT, 0o660), + "w+") as fout: fout.write("\n".join(unique_deps) + '\n') diff --git a/mindspore/lite/tools/dataset/cropper/cropper_configure.py b/mindspore/lite/tools/dataset/cropper/cropper_configure.py index 864928dc9d6..440b2e9dc1c 100644 --- a/mindspore/lite/tools/dataset/cropper/cropper_configure.py +++ b/mindspore/lite/tools/dataset/cropper/cropper_configure.py @@ -362,13 +362,15 @@ def main(): dependencies.update(other_dependencies) errors += err - with open(os.path.join(OUTPUT_LOCATION, DEPENDENCIES_FILENAME), "w") as f: + with os.fdopen(os.open(os.path.join(OUTPUT_LOCATION, DEPENDENCIES_FILENAME), os.O_WRONLY | os.O_CREAT, 0o660), + "w+") as f: json.dump(dependencies, f) - with open(os.path.join(OUTPUT_LOCATION, ASSOCIATIONS_FILENAME), "w") as f: + with os.fdopen(os.open(os.path.join(OUTPUT_LOCATION, ASSOCIATIONS_FILENAME), os.O_WRONLY | os.O_CREAT, 0o660), + "w+") as f: json.dump(all_associations, f) - with open(os.path.join(OUTPUT_LOCATION, ERRORS_FILENAME), "w") as f: + with os.fdopen(os.open(os.path.join(OUTPUT_LOCATION, ERRORS_FILENAME), os.O_WRONLY | os.O_CREAT, 0o660), "w+") as f: f.write(errors) diff --git a/mindspore/lite/tools/optimizer/common/gllo_utils.cc b/mindspore/lite/tools/optimizer/common/gllo_utils.cc index ebb43ea0f87..c92ccc899ee 100644 --- a/mindspore/lite/tools/optimizer/common/gllo_utils.cc +++ b/mindspore/lite/tools/optimizer/common/gllo_utils.cc @@ -530,6 +530,9 @@ tensor::TensorPtr GetTensorInfo(const AnfNodePtr &node) { } auto param = node->cast(); MS_ASSERT(param != nullptr); + if (!param->has_default()) { + return nullptr; + } auto tensor_info = std::dynamic_pointer_cast(param->default_param()); return tensor_info; } @@ -1493,10 +1496,14 @@ CNodePtr GenTransposeNode(const FuncGraphPtr &func_graph, const AnfNodePtr &inpu MS_ASSERT(trans_prim != nullptr); auto cnode = func_graph->NewCNode(trans_prim, {input_node, perm_node}); MS_ASSERT(cnode != nullptr); + auto manager = Manage(func_graph); + MS_ASSERT(manager != nullptr); + auto tr = manager->Transact(); + tr.SetEdge(cnode, 1, input_node); + tr.SetEdge(cnode, kInputIndexTwo, perm_node); + tr.Commit(); cnode->set_fullname_with_scope(cnode_name); - size_t input_size = 2; - size_t output_size = 1; - auto quant_params_holder = std::make_shared(input_size, output_size); + auto quant_params_holder = std::make_shared(kInputSizeTwo, 1); auto trans_insert_prim = GetValueNode(cnode->input(0)); trans_insert_prim->AddAttr("quant_params", quant_params_holder); return cnode; diff --git a/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.cc b/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.cc index dbf191e73fe..d9390489d95 100644 --- a/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.cc +++ b/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.cc @@ -71,6 +71,10 @@ STATUS DeleteRedundantTranspose::DeleteNot4DTranspose(const FuncGraphPtr &func_g } if (!shape.empty() && shape.size() != perm.size()) { MS_LOG(DEBUG) << "transpose node need to be deleted."; + if (UpdateNodeFormat(func_graph, cnode) != lite::RET_OK) { + MS_LOG(ERROR) << "update cnode format failed."; + return lite::RET_ERROR; + } manager->Replace(node, cnode->input(1)); } } @@ -129,6 +133,33 @@ STATUS DeleteRedundantTranspose::TransTransFusion(const FuncGraphPtr &func_graph return lite::RET_OK; } +STATUS DeleteRedundantTranspose::UpdateNodeFormat(const FuncGraphPtr &func_graph, const CNodePtr &cnode) { + MS_ASSERT(func_graph != nullptr && cnode != nullptr); + auto manager = func_graph->manager(); + MS_ASSERT(manager != nullptr); + auto prim = GetValueNode(cnode->input(0)); + MS_ASSERT(prim != nullptr); + if (prim->GetAttr(ops::kFormat) == nullptr) { + return lite::RET_OK; + } + auto format = GetValue(prim->GetAttr(ops::kFormat)); + auto node_users = manager->node_users()[cnode]; + for (auto &node_user : node_users) { + if (node_user.second != 1) { + continue; + } + if (!utils::isa(node_user.first)) { + MS_LOG(ERROR) << "post node is not cnode, which is invalid."; + return lite::RET_ERROR; + } + auto post_cnode = node_user.first->cast(); + auto post_prim = GetValueNode(post_cnode->input(0)); + MS_ASSERT(post_prim != nullptr); + post_prim->AddAttr(ops::kFormat, MakeValue(format)); + } + return lite::RET_OK; +} + bool DeleteRedundantTranspose::Run(const FuncGraphPtr &func_graph) { MS_ASSERT(func_graph != nullptr); auto manager = Manage(func_graph, true); diff --git a/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.h b/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.h index 41894313d44..71d89e14555 100644 --- a/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.h +++ b/mindspore/lite/tools/optimizer/format/delete_redundant_transpose.h @@ -31,6 +31,7 @@ class DeleteRedundantTranspose : public Pass { private: STATUS DeleteNot4DTranspose(const FuncGraphPtr &func_graph); STATUS TransTransFusion(const FuncGraphPtr &func_graph); + STATUS UpdateNodeFormat(const FuncGraphPtr &func_graph, const CNodePtr &node); }; } // namespace opt } // namespace mindspore diff --git a/mindspore/lite/tools/optimizer/format/to_format_base.cc b/mindspore/lite/tools/optimizer/format/to_format_base.cc index ec39ce5739b..5e46a31c170 100644 --- a/mindspore/lite/tools/optimizer/format/to_format_base.cc +++ b/mindspore/lite/tools/optimizer/format/to_format_base.cc @@ -15,10 +15,12 @@ */ #include "tools/optimizer/format/to_format_base.h" +#include #include "ops/op_utils.h" #include "src/common/common.h" #include "src/common/utils.h" #include "tools/common/tensor_util.h" +#include "tools/converter/parser/parser_utils.h" using mindspore::lite::NHWC_SHAPE; namespace mindspore { @@ -67,8 +69,17 @@ STATUS ToFormatBase::GenNewInput(const FuncGraphPtr &func_graph, const CNodePtr return lite::RET_OK; } -STATUS ToFormatBase::ModifyCNodeAbstract(const CNodePtr &cnode) { +STATUS ToFormatBase::ModifyCNode(const CNodePtr &cnode) { MS_ASSERT(cnode != nullptr); + auto prim = GetValueNode(cnode->input(0)); + if (prim == nullptr) { + MS_LOG(ERROR) << "current node's prim is nullptr, " << cnode->fullname_with_scope(); + return lite::RET_ERROR; + } + auto insert_pos = sensitive_ops_[prim->name()]; + if (insert_pos.empty() || std::find(insert_pos.begin(), insert_pos.end(), 1) != insert_pos.end()) { + prim->AddAttr(ops::kFormat, MakeValue(format_)); + } auto abstract_base = cnode->abstract(); std::vector abstracts; if (utils::isa(abstract_base)) { @@ -216,7 +227,10 @@ STATUS ToFormatBase::HandleGraphInput(const FuncGraphPtr &func_graph) { STATUS ToFormatBase::HandleGraphNode(const FuncGraphPtr &func_graph, const CNodePtr &cnode) { MS_ASSERT(func_graph != nullptr && cnode != nullptr); opt::TransTypePair trans_info; - GetTransNodeFormatType(cnode, &trans_info); + if (GetTransNodeFormatType(cnode, &trans_info) != lite::RET_OK) { + MS_LOG(ERROR) << "obtain node's transferring format type failed, " << cnode->fullname_with_scope(); + return lite::RET_ERROR; + } if (trans_info.pre_ == opt::kNONE || trans_info.post_ == opt::kNONE) { return lite::RET_NO_CHANGE; } @@ -229,7 +243,7 @@ STATUS ToFormatBase::HandleGraphNode(const FuncGraphPtr &func_graph, const CNode if (opt::CheckPrimitiveType(cnode, prim::kPrimAdam) || opt::CheckPrimitiveType(cnode, prim::kPrimSGD)) { return lite::RET_OK; } - if (ModifyCNodeAbstract(cnode) != lite::RET_OK) { + if (ModifyCNode(cnode) != lite::RET_OK) { MS_LOG(ERROR) << "adjust cnode's output shape failed, " << cnode->fullname_with_scope(); return lite::RET_ERROR; } @@ -281,6 +295,59 @@ bool ToFormatBase::BasicProcess(const FuncGraphPtr &func_graph, bool main_graph) return true; } +STATUS ToFormatBase::ConvWeightFormatTrans(const FuncGraphPtr &graph, std::set *has_visited) { + MS_ASSERT(graph != nullptr && has_visited != nullptr); + auto node_list = TopoSort(graph->get_return()); + schema::Format src_format = schema::Format_NUM_OF_FORMAT; + schema::Format dst_format = schema::Format_NUM_OF_FORMAT; + for (auto &node : node_list) { + if (!utils::isa(node)) { + continue; + } + auto cnode = node->cast(); + if (CheckPrimitiveType(node, prim::kPrimIf) || CheckPrimitiveType(node, prim::kPrimWhile)) { + auto sub_func_graph = GetValueNode(cnode->input(1)); + if (sub_func_graph == nullptr) { + lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_NULL_PTR); + return false; + } + if (ConvWeightFormatTrans(sub_func_graph, has_visited) != lite::RET_OK) { + MS_LOG(ERROR) << "transform conv weight format failed."; + return lite::RET_ERROR; + } + sub_func_graph = GetValueNode(cnode->input(kInputIndexTwo)); + if (sub_func_graph == nullptr) { + lite::ReturnCode::GetSingleReturnCode()->UpdateReturnCode(lite::RET_NULL_PTR); + return false; + } + if (ConvWeightFormatTrans(sub_func_graph, has_visited) != lite::RET_OK) { + MS_LOG(ERROR) << "transform conv weight format failed."; + return lite::RET_ERROR; + } + continue; + } + if (!CheckPrimitiveType(node, prim::kPrimConv2DFusion) && + !CheckPrimitiveType(node, opt::kPrimConv2DBackpropInputFusion) && + !CheckPrimitiveType(node, prim::kPrimConv2dTransposeFusion)) { + continue; + } + if (has_visited->find(node) != has_visited->end()) { + continue; + } + has_visited->insert(node); + if (DecideConvWeightSrcAndDstFormat(cnode, &src_format, &dst_format) != lite::RET_OK) { + MS_LOG(ERROR) << "weight's src format and dst format get failed."; + return lite::RET_ERROR; + } + auto status = lite::UnifyConvWeightFormat(graph, cnode, src_format, dst_format, has_visited); + if (status != lite::RET_OK) { + MS_LOG(ERROR) << "unify conv weight failed, current node name is " << cnode->fullname_with_scope(); + return status; + } + } + return lite::RET_OK; +} + bool ToFormatBase::Run(const FuncGraphPtr &func_graph) { MS_ASSERT(func_graph != nullptr); if (format_ != mindspore::NHWC && format_ != mindspore::NCHW) { @@ -297,6 +364,12 @@ bool ToFormatBase::Run(const FuncGraphPtr &func_graph) { MS_LOG(ERROR) << "create NodeInferShape object failed."; return false; } + std::set has_visited; + auto status = ConvWeightFormatTrans(func_graph, &has_visited); + if (status != lite::RET_OK) { + MS_LOG(ERROR) << "Conv2D weight FormatTrans failed: " << status; + return false; + } SetSensitiveOps(); auto node_list = TopoSort(func_graph->get_return()); for (auto &node : node_list) { diff --git a/mindspore/lite/tools/optimizer/format/to_format_base.h b/mindspore/lite/tools/optimizer/format/to_format_base.h index fc1aeea487e..03a214697f5 100644 --- a/mindspore/lite/tools/optimizer/format/to_format_base.h +++ b/mindspore/lite/tools/optimizer/format/to_format_base.h @@ -18,6 +18,7 @@ #define MINDSPORE_LITE_TOOLS_OPTIMIZER_FORMAT_TO_FORMAT_BASE_H_ #include +#include #include #include #include @@ -45,13 +46,16 @@ class ToFormatBase : public Pass { STATUS InsertPreTransNode(const FuncGraphPtr &func_graph, const CNodePtr &cnode, const std::vector &perm); STATUS GenNewInput(const FuncGraphPtr &func_graph, const CNodePtr &cnode, std::vector perm, bool before, size_t index = 0); - STATUS ModifyCNodeAbstract(const CNodePtr &cnode); + STATUS ModifyCNode(const CNodePtr &cnode); + STATUS ConvWeightFormatTrans(const FuncGraphPtr &graph, std::set *has_visited); protected: - virtual void GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) = 0; + virtual STATUS GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) = 0; virtual void SetSensitiveOps() { sensitive_ops_ = opt::GetNHWCOpMap(); } virtual bool DecideWhetherHandleGraphInput(const FuncGraphPtr &func_graph, const ShapeVector &shape) { return true; } virtual bool DecideWhetherInferShapeForNewNode() { return true; } + virtual STATUS DecideConvWeightSrcAndDstFormat(const CNodePtr &cnode, schema::Format *src_format, + schema::Format *dst_format) = 0; FmkType fmk_type_{lite::converter::FmkType_MS}; bool train_flag_{false}; mindspore::Format format_{mindspore::NHWC}; diff --git a/mindspore/lite/tools/optimizer/format/to_nchw_format.cc b/mindspore/lite/tools/optimizer/format/to_nchw_format.cc index dc5b23f37a5..b7d853e5e13 100644 --- a/mindspore/lite/tools/optimizer/format/to_nchw_format.cc +++ b/mindspore/lite/tools/optimizer/format/to_nchw_format.cc @@ -18,16 +18,36 @@ namespace mindspore { namespace opt { - -void ToNCHWFormat::GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) { +STATUS ToNCHWFormat::GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) { MS_ASSERT(cnode != nullptr); auto prim_node = cnode->input(0); auto prim = GetValueNode(prim_node); MS_ASSERT(prim != nullptr); + if (prim->GetAttr(ops::kFormat) != nullptr) { + auto node_format = GetValue(prim->GetAttr(ops::kFormat)); + if (node_format == mindspore::NCHW) { + MS_LOG(DEBUG) << "node's format has been nchw, no need to transfer, " << cnode->fullname_with_scope(); + return lite::RET_OK; + } + if (node_format != mindspore::NHWC) { + MS_LOG(ERROR) << "node's format is invalid, which must be nhwc or nchw, now is " << node_format + << ", node name is " << cnode->fullname_with_scope(); + return lite::RET_ERROR; + } + } if (sensitive_ops_.find(prim->name()) != sensitive_ops_.end()) { trans_info->pre_ = opt::kNHWC2NCHW; trans_info->post_ = opt::kNCHW2NHWC; } + return lite::RET_OK; +} + +STATUS ToNCHWFormat::DecideConvWeightSrcAndDstFormat(const CNodePtr &cnode, schema::Format *src_format, + schema::Format *dst_format) { + MS_ASSERT(cnode != nullptr && src_format != nullptr && dst_format != nullptr); + *src_format = schema::Format_KHWC; + *dst_format = schema::Format_KCHW; + return lite::RET_OK; } } // namespace opt } // namespace mindspore diff --git a/mindspore/lite/tools/optimizer/format/to_nchw_format.h b/mindspore/lite/tools/optimizer/format/to_nchw_format.h index 43de093698a..d2e2d000ea6 100644 --- a/mindspore/lite/tools/optimizer/format/to_nchw_format.h +++ b/mindspore/lite/tools/optimizer/format/to_nchw_format.h @@ -30,7 +30,9 @@ class ToNCHWFormat : public ToFormatBase { ~ToNCHWFormat() = default; private: - void GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) override; + STATUS GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) override; + STATUS DecideConvWeightSrcAndDstFormat(const CNodePtr &cnode, schema::Format *src_format, + schema::Format *dst_format) override; }; } // namespace opt } // namespace mindspore diff --git a/mindspore/lite/tools/optimizer/format/to_nhwc_format.cc b/mindspore/lite/tools/optimizer/format/to_nhwc_format.cc index 7bf2c613792..33f786772db 100644 --- a/mindspore/lite/tools/optimizer/format/to_nhwc_format.cc +++ b/mindspore/lite/tools/optimizer/format/to_nhwc_format.cc @@ -18,15 +18,36 @@ namespace mindspore { namespace opt { -void ToNHWCFormat::GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) { +STATUS ToNHWCFormat::GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) { MS_ASSERT(cnode != nullptr); auto prim_node = cnode->input(0); auto prim = GetValueNode(prim_node); MS_ASSERT(prim != nullptr); + if (prim->GetAttr(ops::kFormat) != nullptr) { + auto node_format = GetValue(prim->GetAttr(ops::kFormat)); + if (node_format == mindspore::NHWC) { + MS_LOG(DEBUG) << "node's format has been nhwc, no need to transfer, " << cnode->fullname_with_scope(); + return lite::RET_OK; + } + if (node_format != mindspore::NCHW) { + MS_LOG(ERROR) << "node's format is invalid, which must be nhwc or nchw, now is " << node_format + << ", node name is " << cnode->fullname_with_scope(); + return lite::RET_ERROR; + } + } if (sensitive_ops_.find(prim->name()) != sensitive_ops_.end()) { trans_info->pre_ = opt::kNCHW2NHWC; trans_info->post_ = opt::kNHWC2NCHW; } + return lite::RET_OK; +} + +STATUS ToNHWCFormat::DecideConvWeightSrcAndDstFormat(const CNodePtr &cnode, schema::Format *src_format, + schema::Format *dst_format) { + MS_ASSERT(cnode != nullptr && src_format != nullptr && dst_format != nullptr); + *src_format = schema::Format_KCHW; + *dst_format = schema::Format_KHWC; + return lite::RET_OK; } } // namespace opt } // namespace mindspore diff --git a/mindspore/lite/tools/optimizer/format/to_nhwc_format.h b/mindspore/lite/tools/optimizer/format/to_nhwc_format.h index c9c36fff4d4..d16b861b6fc 100644 --- a/mindspore/lite/tools/optimizer/format/to_nhwc_format.h +++ b/mindspore/lite/tools/optimizer/format/to_nhwc_format.h @@ -28,7 +28,9 @@ class ToNHWCFormat : public ToFormatBase { ~ToNHWCFormat() = default; private: - void GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) override; + STATUS GetTransNodeFormatType(const CNodePtr &cnode, opt::TransTypePair *trans_info) override; + STATUS DecideConvWeightSrcAndDstFormat(const CNodePtr &cnode, schema::Format *src_format, + schema::Format *dst_format) override; }; } // namespace opt } // namespace mindspore diff --git a/mindspore/lite/tools/optimizer/fusion/batchmatmul_fusion.cc b/mindspore/lite/tools/optimizer/fusion/batchmatmul_fusion.cc index 795e2845f98..05ce9dd9846 100644 --- a/mindspore/lite/tools/optimizer/fusion/batchmatmul_fusion.cc +++ b/mindspore/lite/tools/optimizer/fusion/batchmatmul_fusion.cc @@ -148,7 +148,6 @@ std::shared_ptr BuildMatMulPrim(const CNodePtr &stack_cnode) { matmul_cvalue->AddAttr("quant_params", quant_params_holder); return matmul_cvalue; } - } // namespace const BaseRef BatchMatMulFusion::DefinePattern() const { auto pack_var = std::make_shared(IsStackNode); diff --git a/mindspore/lite/tools/optimizer/fusion/conv_conv_fusion.cc b/mindspore/lite/tools/optimizer/fusion/conv_conv_fusion.cc index d2cf34f00de..1163e76ad20 100644 --- a/mindspore/lite/tools/optimizer/fusion/conv_conv_fusion.cc +++ b/mindspore/lite/tools/optimizer/fusion/conv_conv_fusion.cc @@ -55,6 +55,10 @@ bool IsCommonConvNode(const BaseRef &n) { } STATUS GenNewConvBias(const ParameterPtr &down_bias_node, const ParameterPtr &down_weight_node, const ParameterPtr &up_bias_node, const ParameterPtr &new_bias_node) { + if (down_weight_node == nullptr || up_bias_node == nullptr || new_bias_node == nullptr) { + MS_LOG(ERROR) << "Input down_weight_node or up_bias_node or new_bias_node is nullptr"; + return RET_FAILED; + } float *down_bias_data = nullptr; if (down_bias_node != nullptr) { auto down_bias_param = std::dynamic_pointer_cast(down_bias_node->default_param()); diff --git a/mindspore/lite/tools/optimizer/fusion/multi_head_attention_fusion.cc b/mindspore/lite/tools/optimizer/fusion/multi_head_attention_fusion.cc index eb48e8c14c8..bf37a8395d0 100644 --- a/mindspore/lite/tools/optimizer/fusion/multi_head_attention_fusion.cc +++ b/mindspore/lite/tools/optimizer/fusion/multi_head_attention_fusion.cc @@ -21,6 +21,7 @@ namespace mindspore::opt { namespace { const auto &p1 = std::placeholders::_1; +const size_t kWeightShapeSize = 2; } // namespace MultiHeadAttentionFusion::MultiHeadAttentionFusion(const string &name, bool multigraph) @@ -244,7 +245,8 @@ std::shared_ptr MultiHeadAttentionFusion::BuildAttentionPrim(con MS_LOG(ERROR) << "Get reshape k data failed"; return nullptr; } - if (shape_k.size() < 2 || shape_v.size() < 2 || shape_k.at(shape_k.size() - 2) != shape_v.at(shape_v.size() - 2)) { + if (shape_k.size() < kWeightShapeSize || shape_v.size() < kWeightShapeSize || + shape_k.at(shape_k.size() - kWeightShapeSize) != shape_v.at(shape_v.size() - kWeightShapeSize)) { MS_LOG(ERROR) << "Shape k or shape v is invalid."; return nullptr; } diff --git a/mindspore/lite/tools/optimizer/fusion/tflite_rel_pos_multi_head_attention_fusion.cc b/mindspore/lite/tools/optimizer/fusion/tflite_rel_pos_multi_head_attention_fusion.cc index 0e01129ec9c..619f7a5d3f5 100644 --- a/mindspore/lite/tools/optimizer/fusion/tflite_rel_pos_multi_head_attention_fusion.cc +++ b/mindspore/lite/tools/optimizer/fusion/tflite_rel_pos_multi_head_attention_fusion.cc @@ -23,6 +23,14 @@ namespace mindspore::opt { namespace { const auto &p1 = std::placeholders::_1; +const size_t kWeightQueryIndex = 4; +const size_t kWeightKeyIndex = 5; +const size_t kWeightValueIndex = 6; +const size_t kWeightPosIndex = 7; +const size_t kWeightOutputIndex = 10; +const size_t kStackParamSize = 2; +const size_t kInputSize = 16; +const size_t kOutputSize = 2; } // namespace TfliteRelPosMultiHeadAttentionFusion::TfliteRelPosMultiHeadAttentionFusion(const string &name, bool multigraph) @@ -37,7 +45,7 @@ TfliteRelPosMultiHeadAttentionFusion::TfliteRelPosMultiHeadAttentionFusion(const output_prim_ = std::make_shared(std::bind(IsOpType, p1, prim::kPrimFullConnection)); pos_prim_ = std::make_shared(std::bind(IsOpType, p1, prim::kPrimFullConnection)); - for (size_t i = 0; i < 2; i++) { + for (size_t i = 0; i < kStackParamSize; i++) { query_stack_params_.emplace_back(std::make_shared()); key_stack_params_.emplace_back(std::make_shared()); value_stack_params_.emplace_back(std::make_shared()); @@ -157,38 +165,38 @@ CNodePtr TfliteRelPosMultiHeadAttentionFusion::CreateRelPosMultiHeadAttentionNod MS_LOG(ERROR) << "Build attention primitive failed."; return nullptr; } - auto quant_params_holder = std::make_shared(16, 1); + auto quant_params_holder = std::make_shared(kInputSize, kOutputSize); auto query_prim = GetValueNode(utils::cast((*equiv)[query_prim_])); auto query_quant_param_holder = query_prim->GetAttr("quant_params"); if (query_quant_param_holder != nullptr) { quant_params_holder->set_input_quant_param( - 4, query_quant_param_holder->cast()->get_input_quant_params().at(1)); + kWeightQueryIndex, query_quant_param_holder->cast()->get_input_quant_params().at(1)); } auto key_prim = GetValueNode(utils::cast((*equiv)[key_prim_])); auto key_quant_param_holder = key_prim->GetAttr("quant_params"); if (key_quant_param_holder != nullptr) { quant_params_holder->set_input_quant_param( - 5, key_quant_param_holder->cast()->get_input_quant_params().at(1)); + kWeightKeyIndex, key_quant_param_holder->cast()->get_input_quant_params().at(1)); } auto value_prim = GetValueNode(utils::cast((*equiv)[value_prim_])); auto value_quant_param_holder = value_prim->GetAttr("quant_params"); if (value_quant_param_holder != nullptr) { quant_params_holder->set_input_quant_param( - 6, value_quant_param_holder->cast()->get_input_quant_params().at(1)); + kWeightValueIndex, value_quant_param_holder->cast()->get_input_quant_params().at(1)); } auto pos_prim = GetValueNode(utils::cast((*equiv)[pos_prim_])); auto pos_quant_param_holder = pos_prim->GetAttr("quant_params"); if (pos_quant_param_holder != nullptr) { quant_params_holder->set_input_quant_param( - 7, pos_quant_param_holder->cast()->get_input_quant_params().at(1)); + kWeightPosIndex, pos_quant_param_holder->cast()->get_input_quant_params().at(1)); } auto output_prim = GetValueNode(utils::cast((*equiv)[output_prim_])); auto output_quant_param_holder = output_prim->GetAttr("quant_params"); if (output_quant_param_holder != nullptr) { quant_params_holder->set_input_quant_param( - 10, output_quant_param_holder->cast()->get_input_quant_params().at(1)); + kWeightOutputIndex, output_quant_param_holder->cast()->get_input_quant_params().at(1)); } attention_prim->AddAttr("quant_params", quant_params_holder); @@ -273,7 +281,7 @@ const VectorRef TfliteRelPosMultiHeadAttentionFusion::DefineProcessInputPattern( result = VectorRef({std::make_shared(std::bind(IsOpType, p1, prim::kPrimAddFusion)), result, bias}); } - MS_ASSERT(stack_params.size() == 2); + MS_ASSERT(stack_params.size() == kStackParamSize); auto stack = VectorRef({std::make_shared(std::bind(IsOpType, p1, prim::kPrimStack)), std::make_shared(), std::make_shared(), stack_params.at(0), stack_params.at(1)}); result = VectorRef({std::make_shared(std::bind(IsOpType, p1, prim::kPrimReshape)), result, stack}); diff --git a/mindspore/lite/tools/optimizer/graph/node_infershape.cc b/mindspore/lite/tools/optimizer/graph/node_infershape.cc index 6e11780ff7b..c34d8bc8c56 100644 --- a/mindspore/lite/tools/optimizer/graph/node_infershape.cc +++ b/mindspore/lite/tools/optimizer/graph/node_infershape.cc @@ -43,20 +43,6 @@ void FreeTensors(std::vector *tensors) { tensors->resize(0); } -void SetConvWeightFormat(const CNodePtr &cnode, const std::vector &inputs) { - MS_ASSERT(cnode != nullptr); - if (!CheckPrimitiveType(cnode, prim::kPrimConv2DFusion) && - !CheckPrimitiveType(cnode, kPrimConv2DBackpropInputFusion) && - !CheckPrimitiveType(cnode, prim::kPrimConv2dTransposeFusion)) { - return; - } - auto prim = GetValueNode(cnode->input(0)); - MS_ASSERT(prim != nullptr); - if (prim->GetAttr(ops::kFormat) != nullptr && inputs.size() > 1) { - inputs[1]->set_format(static_cast(GetValue(prim->GetAttr(ops::kFormat)))); - } -} - void RectifyFormat(const CNodePtr &cnode, const std::vector &inputs, FmkType fmk_type) { MS_ASSERT(cnode != nullptr); if (fmk_type != lite::converter::FmkType_ONNX) { @@ -114,7 +100,6 @@ STATUS NodeInferShape::InferShape(const CNodePtr &cnode) { MS_LOG(ERROR) << "get inputs failed."; return lite::RET_ERROR; } - SetConvWeightFormat(cnode, inputs); if (GetCNodeOutputTensors(cnode, &outputs) != lite::RET_OK) { FreeTensors(&inputs); FreeTensors(&outputs); diff --git a/mindspore/lite/tools/optimizer/graph/slice_prepose_pass.cc b/mindspore/lite/tools/optimizer/graph/slice_prepose_pass.cc index 0881ec0af45..ad862310a21 100644 --- a/mindspore/lite/tools/optimizer/graph/slice_prepose_pass.cc +++ b/mindspore/lite/tools/optimizer/graph/slice_prepose_pass.cc @@ -401,6 +401,10 @@ bool SlicePreposePass::SiblingsAreSameSlice(const FuncGraphPtr &graph, const Nod auto first_slice_cnode = slices.front(); auto first_slice_node = GetSlice(first_slice_cnode); + if (first_slice_node == nullptr) { + MS_LOG(ERROR) << "GetSlice return nullptr"; + return false; + } auto first_axes = first_slice_node->get_axes(); auto first_begin = GetSliceBeginAndSize(first_slice_cnode, SliceBeginIndex); auto first_size = GetSliceBeginAndSize(first_slice_cnode, SliceSizeIndex); diff --git a/mindspore/nn/acc/base.py b/mindspore/nn/acc/base.py index a0be25582d6..b8c5587d7c3 100644 --- a/mindspore/nn/acc/base.py +++ b/mindspore/nn/acc/base.py @@ -133,56 +133,62 @@ class ParameterProcess: if isinstance(origin_params_copy[0], Parameter): group_params = [{"params": parameters}] - else: - group_params = [] - params_name = [param.name for param in parameters] - new_params_count = copy.deepcopy(params_name) - new_params_clone = {} - max_key_number = 0 - for group_param in origin_params_copy: - if 'order_params' in group_param.keys(): - new_group_param = copy.deepcopy(group_param) - new_group_param['order_params'] = parameters - group_params.append(new_group_param) - continue - params_value = [] - for param in group_param['params']: - if param.name in params_name: - index = params_name.index(param.name) - params_value.append(parameters[index]) - new_params_count.remove(param.name) + return group_params + + group_params = [] + params_name = [param.name for param in parameters] + new_params_count = copy.deepcopy(params_name) + new_params_clone = {} + max_key_number = 0 + for group_param in origin_params_copy: + if 'order_params' in group_param.keys(): new_group_param = copy.deepcopy(group_param) - new_group_param['params'] = params_value + new_group_param['order_params'] = parameters group_params.append(new_group_param) - if len(group_param.keys()) > max_key_number: - max_key_number = len(group_param.keys()) - new_params_clone = copy.deepcopy(group_param) - if new_params_count: - params_value = [] - for param in new_params_count: - index = params_name.index(param) + continue + params_value = [] + for param in group_param['params']: + if param.name in params_name: + index = params_name.index(param.name) params_value.append(parameters[index]) - if new_params_clone: - new_params_clone['params'] = params_value - group_params.append(new_params_clone) - else: - group_params.append({"params": params_value}) + new_params_count.remove(param.name) + new_group_param = copy.deepcopy(group_param) + new_group_param['params'] = params_value + group_params.append(new_group_param) + if len(group_param.keys()) > max_key_number: + max_key_number = len(group_param.keys()) + new_params_clone = copy.deepcopy(group_param) + if new_params_count: + params_value = [] + for param in new_params_count: + index = params_name.index(param) + params_value.append(parameters[index]) + if new_params_clone: + new_params_clone['params'] = params_value + group_params.append(new_params_clone) + else: + group_params.append({"params": params_value}) return group_params + _gradient_accumulation_op = C.MultitypeFuncGraph("gradient_accumulation_op") + @_gradient_accumulation_op.register("Int64", "Tensor", "Tensor") def _cumulative_grad(accumulation_step, cumulative_grad, grad): """Apply gradient accumulation to cumulative grad.""" return P.AssignAdd()(cumulative_grad, grad / accumulation_step) + _gradient_clear_op = C.MultitypeFuncGraph("gradient_clear_op") + @_gradient_clear_op.register("Tensor") def _clear_grad(cumulative_grad): zero_grad = P.ZerosLike()(cumulative_grad) return F.assign(cumulative_grad, zero_grad) + class GradientAccumulation(Cell): """ After accumulating the gradients of multiple steps, call to optimize its update. diff --git a/mindspore/nn/acc/grad_freeze.py b/mindspore/nn/acc/grad_freeze.py index dd8835953ec..8e84d4f12ab 100644 --- a/mindspore/nn/acc/grad_freeze.py +++ b/mindspore/nn/acc/grad_freeze.py @@ -243,6 +243,7 @@ class GradientFreeze: return network, optimizer + def freeze_cell(reducer_flag, network, optimizer, sens, grad, use_grad_accumulation, mean=None, degree=None, max_accumulation_step=1): """Provide freeze network cell.""" diff --git a/mindspore/nn/acc/less_batch_normalization.py b/mindspore/nn/acc/less_batch_normalization.py index c2c6683afef..d1d35b4a94d 100644 --- a/mindspore/nn/acc/less_batch_normalization.py +++ b/mindspore/nn/acc/less_batch_normalization.py @@ -81,6 +81,7 @@ class CommonHeadLastFN(Cell): x = self.multiplier * x return x + class LessBN(Cell): """ Reduce the number of BN automatically to improve the network performance diff --git a/mindspore/nn/cell.py b/mindspore/nn/cell.py index 46ed2ce34d5..8ab61f3a042 100755 --- a/mindspore/nn/cell.py +++ b/mindspore/nn/cell.py @@ -1247,17 +1247,18 @@ class Cell(Cell_): for param in params: param.set_param_ps(init_in_server) - def set_param_fl(self, push_to_server=False, pull_from_server=False): + def set_param_fl(self, push_to_server=False, pull_from_server=False, requires_aggr=True): """ Set the way of parameter and server interaction. Args: push_to_server (bool): Whether the parameter should be pushed to server. Default: False. pull_from_server (bool): Whether the parameter should be pulled from server. Default: False. + requires_aggr (bool): Whether the parameter should be aggregated in the server. Default: True. """ params = self.parameters_and_names() for param in params: - param[1].set_param_fl(push_to_server, pull_from_server) + param[1].set_param_fl(push_to_server, pull_from_server, requires_aggr) def set_comm_fusion(self, fusion_type, recurse=True): """ @@ -1403,8 +1404,7 @@ class GraphCell(Cell): Examples: >>> import numpy as np >>> import mindspore.nn as nn - >>> from mindspore import Tensor - >>> from mindspore.train import export, load + >>> from mindspore import Tensor, export, load >>> >>> net = nn.Conv2d(1, 1, kernel_size=3, weight_init="ones") >>> input = Tensor(np.ones([1, 1, 3, 3]).astype(np.float32)) diff --git a/mindspore/nn/layer/activation.py b/mindspore/nn/layer/activation.py index b947e5eb873..634d3d0ee07 100644 --- a/mindspore/nn/layer/activation.py +++ b/mindspore/nn/layer/activation.py @@ -40,6 +40,7 @@ __all__ = ['Softmax', 'ELU', 'LogSigmoid', 'SoftShrink', + 'HShrink', ] @@ -803,6 +804,51 @@ class SoftShrink(Cell): output = self.softshrink(input_x) return output +class HShrink(Cell): + r""" + Applies the hard shrinkage function element-wise, each element complies the follow function: + + .. math:: + \text{HardShrink}(x) = + \begin{cases} + x, & \text{ if } x > \lambda \\ + x, & \text{ if } x < -\lambda \\ + 0, & \text{ otherwise } + \end{cases} + + Args: + lambd (float): The value for the HardShrink formulation. Default: 0.5 + + Inputs: + - **input_x** (Tensor) - The input of HardShrink with data type of float16 or float32. + + Outputs: + Tensor, the same shape and data type as the input. + + Supported Platforms: + ``Ascend`` + + Raises: + TypeError: If `lambd` is not a float. + TypeError: If dtype of `input_x` is neither float16 nor float32. + + Examples: + >>> input_x = Tensor(np.array([[ 0.5, 1, 2.0],[0.0533,0.0776,-2.1233]]),mstype.float32) + >>> hshrink = nn.HShrink() + >>> output = hshrink(input_x) + >>> print(output) + [[ 0. 1. 2. ] + [ 0. 0. -2.1233]] + """ + + def __init__(self, lambd=0.5): + super(HShrink, self).__init__() + self.hshrink = P.HShrink(lambd) + + def construct(self, input_x): + return self.hshrink(input_x) + + _activation = { 'softmax': Softmax, 'logsoftmax': LogSoftmax, @@ -819,6 +865,7 @@ _activation = { 'hsigmoid': HSigmoid, 'logsigmoid': LogSigmoid, 'softshrink': SoftShrink, + 'hshrink': HShrink, } diff --git a/mindspore/nn/loss/__init__.py b/mindspore/nn/loss/__init__.py index d0c87236362..1bd4bc7714d 100644 --- a/mindspore/nn/loss/__init__.py +++ b/mindspore/nn/loss/__init__.py @@ -19,13 +19,13 @@ Cells of loss function. Loss function in machine learning is the target of the m It shows how well the model works on a dataset and the optimization target which the optimizer is searching. """ -from .loss import LossBase, L1Loss, MSELoss, SmoothL1Loss, FocalLoss,\ +from .loss import LossBase, L1Loss, MSELoss, SmoothL1Loss, SoftMarginLoss, FocalLoss,\ SoftmaxCrossEntropyWithLogits, BCELoss, CosineEmbeddingLoss, \ SampledSoftmaxLoss, DiceLoss, BCEWithLogitsLoss, MultiClassDiceLoss,\ RMSELoss, MAELoss -__all__ = ['LossBase', 'L1Loss', 'MSELoss', 'SmoothL1Loss', 'FocalLoss', +__all__ = ['LossBase', 'L1Loss', 'MSELoss', 'SmoothL1Loss', 'SoftMarginLoss', 'FocalLoss', 'SoftmaxCrossEntropyWithLogits', 'BCELoss', 'BCEWithLogitsLoss', 'CosineEmbeddingLoss', 'SampledSoftmaxLoss', 'DiceLoss', 'MultiClassDiceLoss', 'RMSELoss', 'MAELoss'] diff --git a/mindspore/nn/loss/loss.py b/mindspore/nn/loss/loss.py index bbf0adfe61f..29acf71030f 100644 --- a/mindspore/nn/loss/loss.py +++ b/mindspore/nn/loss/loss.py @@ -436,6 +436,53 @@ class SmoothL1Loss(LossBase): return self.smooth_l1_loss(base, target) +class SoftMarginLoss(LossBase): + r""" + A loss class for two-class classification problems. + + SoftMarginLoss creates a criterion that optimizes a two-class classification + logistic loss between input tensor :math:`x` and target tensor :math:`y` + (containing 1 or -1). + + .. math:: + \text{loss}(x, y) = \sum_i \frac{\log(1 + \exp(-y[i]*x[i]))}{\text{x.nelement}()} + + Args: + reduction (str): Apply specific reduction method to the output: 'none', 'mean', 'sum'. Default: "mean". + + Inputs: + - **logits** (Tensor) - Predict data. Data type must be float16 or float32. + - **labels** (Tensor) - Ground truth data, with the same type and shape as `logits`. + + Outputs: + Tensor or Scalar, if `reduction` is "none", its shape is the same as `logits`. + Otherwise, a scalar value will be returned. + + Raises: + TypeError: If `logits` or `labels` is not a Tensor. + TypeError: If dtype of `logits` or `labels` is neither float16 nor float32. + ValueError: If shape of `logits` is not the same as `labels`. + ValueError: If `reduction` is not one of 'none', 'mean', 'sum'. + + Supported Platforms: + ``Ascend`` + + Examples: + >>> loss = ops.SoftMarginLoss() + >>> logits = Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]), mindspore.float32) + >>> labels = Tensor(np.array([[-1, 1], [1, -1]]), mindspore.float32) + >>> output = loss(logits, labels) + >>> print(output) + 0.6764238 + """ + def __init__(self, reduction='mean'): + super(SoftMarginLoss, self).__init__() + self.soft_margin_loss = P.SoftMarginLoss(reduction) + + def construct(self, base, target): + return self.soft_margin_loss(base, target) + + class SoftmaxCrossEntropyWithLogits(LossBase): r""" Computes softmax cross entropy between logits and labels. @@ -1282,10 +1329,10 @@ class FocalLoss(LossBase): convert_weight = self.squeeze(convert_weight) log_probability = log_probability * convert_weight - weight = F.pows(-probability + 1.0, self.gamma) + weight = F.pows(-1 * probability + 1.0, self.gamma) if target.shape[1] == 1: - loss = (-weight * log_probability).mean(axis=1) + loss = (-1 * weight * log_probability).mean(axis=1) else: - loss = (-weight * targets * log_probability).mean(axis=-1) + loss = (-1 * weight * targets * log_probability).mean(axis=-1) return self.get_loss(loss) diff --git a/mindspore/nn/wrap/grad_reducer.py b/mindspore/nn/wrap/grad_reducer.py index f10e9e640f1..07f363da3d0 100644 --- a/mindspore/nn/wrap/grad_reducer.py +++ b/mindspore/nn/wrap/grad_reducer.py @@ -101,7 +101,6 @@ def _tensors_allreduce(degree, mean, allgather, allreduce, allreduce_filter, gra @reduce_opt.register("Tensor", "Bool", "Bool", "Tensor") - def _tensors_allreduce_post(degree, mean, allreduce_filter, grad): """ Apply allreduce on gradient in PyNative mode. @@ -125,7 +124,6 @@ def _tensors_allreduce_post(degree, mean, allreduce_filter, grad): @reduce_opt.register("Tensor", "Bool", "Function", "Function", "Bool", "Tensor", "Bool") - def _tensors_allreduce_ps(degree, mean, allgather, allreduce, allreduce_filter, grad, ps_parameter): """ Apply allreduce on gradient. @@ -154,7 +152,6 @@ def _tensors_allreduce_ps(degree, mean, allgather, allreduce, allreduce_filter, @reduce_opt.register("Tensor", "Bool", "Function", "Function", "Bool", "RowTensor") - def _tensors_allreduce_with_sparse(degree, mean, allgather, allreduce, allreduce_filter, grad): """ Apply allgather on gradient instead of allreduce for sparse feature. @@ -181,7 +178,6 @@ def _tensors_allreduce_with_sparse(degree, mean, allgather, allreduce, allreduce @reduce_opt.register("Tensor", "Bool", "Function", "Function", "Bool", "RowTensor", "Bool") - def _tensors_allreduce_with_sparse_ps(degree, mean, allgather, allreduce, allreduce_filter, grad, ps_parameter): """ Apply allgather on gradient instead of allreduce for sparse feature. @@ -215,7 +211,6 @@ _get_datatype = C.MultitypeFuncGraph("_get_datatype") @_get_datatype.register("Tensor") - def _tensors_get_datatype(grad): """ Acquire gradient datatype. @@ -230,7 +225,6 @@ def _tensors_get_datatype(grad): @_get_datatype.register("RowTensor") - def _tensors_get_datatype_with_sparse(grad): """ Acquire gradient datatype. @@ -248,7 +242,6 @@ _cast_datatype = C.MultitypeFuncGraph("_cast_datatype") @_cast_datatype.register("TypeType", "Tensor") - def _tensors_cast_datatype(datatype, grad): """ Cast gradient to datatype. @@ -264,7 +257,6 @@ def _tensors_cast_datatype(datatype, grad): @_cast_datatype.register("TypeType", "RowTensor") - def _tensors_cast_datatype_with_sparse(datatype, grad): """ Cast gradient to datatype. diff --git a/mindspore/nn/wrap/loss_scale.py b/mindspore/nn/wrap/loss_scale.py index 735ef2edcec..eeecc30d60c 100644 --- a/mindspore/nn/wrap/loss_scale.py +++ b/mindspore/nn/wrap/loss_scale.py @@ -30,12 +30,11 @@ reciprocal = P.Reciprocal() @_grad_scale.register("Tensor", "Tensor") - def tensor_grad_scale(scale, grad): return grad * F.cast(reciprocal(scale), F.dtype(grad)) -@_grad_scale.register("Tensor", "RowTensor") +@_grad_scale.register("Tensor", "RowTensor") def tensor_grad_scale_row_tensor(scale, grad): return RowTensor(grad.indices, grad.values * F.cast(reciprocal(scale), F.dtype(grad.values)), @@ -46,12 +45,11 @@ grad_overflow = P.FloatStatus() @_grad_overflow.register("Tensor") - def _tensor_grad_overflow(grad): return grad_overflow(grad) -@_grad_overflow.register("RowTensor") +@_grad_overflow.register("RowTensor") def _tensor_grad_overflow_row_tensor(grad): return grad_overflow(grad.values) @@ -88,15 +86,14 @@ class DynamicLossScaleUpdateCell(Cell): Examples: >>> import numpy as np >>> from mindspore import Tensor, Parameter, nn - >>> from mindspore.ops import operations as P - >>> from mindspore.nn.wrap.cell_wrapper import WithLossCell + >>> import mindspore.ops as ops >>> >>> class Net(nn.Cell): ... def __init__(self, in_features, out_features): ... super(Net, self).__init__() ... self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)), ... name='weight') - ... self.matmul = P.MatMul() + ... self.matmul = ops.MatMul() ... ... def construct(self, x): ... output = self.matmul(x, self.weight) @@ -106,7 +103,7 @@ class DynamicLossScaleUpdateCell(Cell): >>> net = Net(in_features, out_features) >>> loss = nn.MSELoss() >>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) - >>> net_with_loss = WithLossCell(net, loss) + >>> net_with_loss = nn.WithLossCell(net, loss) >>> manager = nn.DynamicLossScaleUpdateCell(loss_scale_value=2**12, scale_factor=2, scale_window=1000) >>> train_network = nn.TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=manager) >>> input = Tensor(np.ones([out_features, in_features]), mindspore.float32) @@ -179,15 +176,14 @@ class FixedLossScaleUpdateCell(Cell): Examples: >>> import numpy as np >>> from mindspore import Tensor, Parameter, nn - >>> from mindspore.ops import operations as P - >>> from mindspore.nn.wrap.cell_wrapper import WithLossCell + >>> from mindspore.ops as ops >>> >>> class Net(nn.Cell): ... def __init__(self, in_features, out_features): ... super(Net, self).__init__() ... self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)), ... name='weight') - ... self.matmul = P.MatMul() + ... self.matmul = ops.MatMul() ... ... def construct(self, x): ... output = self.matmul(x, self.weight) @@ -197,7 +193,7 @@ class FixedLossScaleUpdateCell(Cell): >>> net = Net(in_features, out_features) >>> loss = nn.MSELoss() >>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) - >>> net_with_loss = WithLossCell(net, loss) + >>> net_with_loss = nn.WithLossCell(net, loss) >>> manager = nn.FixedLossScaleUpdateCell(loss_scale_value=2**12) >>> train_network = nn.TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=manager) >>> input = Tensor(np.ones([out_features, in_features]), mindspore.float32) @@ -253,16 +249,15 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell): Examples: >>> import numpy as np >>> from mindspore import Tensor, Parameter, nn - >>> from mindspore.ops import operations as P - >>> from mindspore.nn.wrap.cell_wrapper import WithLossCell - >>> from mindspore.common import dtype as mstype + >>> from mindspore.ops as ops + >>> from mindspore import dtype as mstype >>> >>> class Net(nn.Cell): ... def __init__(self, in_features, out_features): ... super(Net, self).__init__() ... self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)), ... name='weight') - ... self.matmul = P.MatMul() + ... self.matmul = ops.MatMul() ... ... def construct(self, x): ... output = self.matmul(x, self.weight) @@ -273,7 +268,7 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell): >>> net = Net(in_features, out_features) >>> loss = nn.MSELoss() >>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) - >>> net_with_loss = WithLossCell(net, loss) + >>> net_with_loss = nn.WithLossCell(net, loss) >>> manager = nn.DynamicLossScaleUpdateCell(loss_scale_value=2**12, scale_factor=2, scale_window=1000) >>> train_network = nn.TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=manager) >>> input = Tensor(np.ones([out_features, in_features]), mindspore.float32) @@ -284,7 +279,7 @@ class TrainOneStepWithLossScaleCell(TrainOneStepCell): >>> net = Net(in_features, out_features) >>> loss = nn.MSELoss() >>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) - >>> net_with_loss = WithLossCell(net, loss) + >>> net_with_loss = nn.WithLossCell(net, loss) >>> inputs = Tensor(np.ones([size, in_features]).astype(np.float32)) >>> label = Tensor(np.zeros([size, out_features]).astype(np.float32)) >>> scaling_sens = Tensor(np.full((1), np.finfo(np.float32).max), dtype=mstype.float32) diff --git a/mindspore/numpy/array_creations.py b/mindspore/numpy/array_creations.py index 3861a35b06c..b6c0bc4f537 100644 --- a/mindspore/numpy/array_creations.py +++ b/mindspore/numpy/array_creations.py @@ -49,6 +49,7 @@ _reduce_min_keepdims = P.ReduceMin(True) _reduce_max_keepdims = P.ReduceMax(True) _reduce_mean_keepdims = P.ReduceMean(True) + def array(obj, dtype=None, copy=True, ndmin=0): """ Creates a tensor. @@ -89,7 +90,7 @@ def array(obj, dtype=None, copy=True, ndmin=0): _raise_value_error("Empty tensor cannot be expanded beyond the current dimension.") res = _expand(res, ndmin) - if copy: + if copy and isinstance(obj, Tensor): res = copy_(res) elif dtype is not None and dtype != res.dtype: res = res.astype(dtype) @@ -2208,17 +2209,14 @@ def _pad_linear(arr, pad_width, end_values): dtype = arr.dtype end_values = _convert_pad_to_nd(end_values, ndim) for i in range(ndim): - # shape [..., 1, ...] left_value = _slice_along_axis(arr, i, 0, 1) right_value = _slice_along_axis(arr, i, shape[i]-1, shape[i]) pad_before = () pad_after = () if pad_width[i][0] > 0: - # shape [..., pad_width[i][0], ...] pad_before = (linspace(end_values[i][0], left_value, num=pad_width[i][0], endpoint=False, dtype=dtype, axis=i).squeeze(i+1),) if pad_width[i][1] > 0: - # shape [..., pad_width[i][1], ...] pad_after = linspace(right_value, end_values[i][1], num=pad_width[i][1]+1, endpoint=True, dtype=dtype, axis=i).squeeze(i+1) pad_after = (_slice_along_axis(pad_after, i, 1, pad_width[i][1]+1),) @@ -2227,6 +2225,58 @@ def _pad_linear(arr, pad_width, end_values): return arr +def _add_pads_before(arr, pad_args, mode): + """handle pads before the array""" + idx, array_length, times_to_pad_before, additional_pad_before, reflect_type = pad_args + curr_pad = None + endpoint_adder = None + edge_before = _slice_along_axis(arr, idx, 0, 1) + if mode == "reflect": + endpoint_adder = 1 + else: + endpoint_adder = 0 + # Deal with paddings before the original array + for times in range(times_to_pad_before): + if times < times_to_pad_before - 1: + endpoint = array_length + else: + endpoint = additional_pad_before + endpoint_adder + if endpoint != endpoint_adder: + curr_pad = _slice_along_axis(arr, idx, endpoint_adder, endpoint) + curr_pad = flip(curr_pad, axis=idx) + if reflect_type == "odd": + curr_pad = 2 * edge_before - curr_pad + arr = P.Concat(idx)((curr_pad, arr)) + edge_before = _slice_along_axis(arr, idx, 0, 1) + return arr + + +def _add_pads_after(arr, pad_args, mode): + """handle pads after the array""" + idx, array_length, times_to_pad_after, additional_pad_after, reflect_type = pad_args + curr_pad = None + endpoint_adder = None + edge_end = _slice_along_axis(arr, idx, arr.shape[idx]-1, arr.shape[idx]) + if mode == "reflect": + endpoint_adder = 1 + else: + endpoint_adder = 0 + # Deal with paddings after the original array + for times in range(times_to_pad_after): + if times < times_to_pad_after - 1: + startpoint = arr.shape[idx] - array_length + else: + startpoint = arr.shape[idx] - additional_pad_after - endpoint_adder + if startpoint != arr.shape[idx] - endpoint_adder: + curr_pad = _slice_along_axis(arr, idx, startpoint, arr.shape[idx] - endpoint_adder) + curr_pad = flip(curr_pad, axis=idx) + if reflect_type == "odd": + curr_pad = 2 * edge_end - curr_pad + arr = P.Concat(idx)((arr, curr_pad)) + edge_end = _slice_along_axis(arr, idx, arr.shape[idx]-1, arr.shape[idx]) + return arr + + def _pad_symmetric(arr, pad_width, reflect_type): """pad the array with symmetric paddings""" for i in range(arr.ndim): @@ -2235,41 +2285,18 @@ def _pad_symmetric(arr, pad_width, reflect_type): has_pad_before = (pad_width[i][0] > 0) has_pad_after = (pad_width[i][1] > 0) - edge_before = _slice_along_axis(arr, i, 0, 1) - edge_end = _slice_along_axis(arr, i, array_length-1, array_length) times_to_pad_before = pad_width[i][0] // array_length + 1 additional_pad_before = pad_width[i][0] % array_length times_to_pad_after = pad_width[i][1] // array_length + 1 additional_pad_after = pad_width[i][1] % array_length - curr_pad = None if has_pad_before: # Deal with paddings before the original array - for times in range(times_to_pad_before): - if times < times_to_pad_before - 1: - endpoint = array_length - else: - endpoint = additional_pad_before - if endpoint != 0: - curr_pad = _slice_along_axis(arr, i, 0, endpoint) - curr_pad = flip(curr_pad, axis=i) - if reflect_type == "odd": - curr_pad = 2 * edge_before - curr_pad - arr = P.Concat(i)((curr_pad, arr)) - edge_before = _slice_along_axis(arr, i, 0, 1) + pad_args = (i, array_length, times_to_pad_before, additional_pad_before, reflect_type) + arr = _add_pads_before(arr, pad_args, "symmetric") if has_pad_after: # Deal with paddings after the original array - for times in range(times_to_pad_after): - if times < times_to_pad_after - 1: - startpoint = arr.shape[i] - array_length - else: - startpoint = arr.shape[i] - additional_pad_after - if startpoint != arr.shape[i]: - curr_pad = _slice_along_axis(arr, i, startpoint, arr.shape[i]) - curr_pad = flip(curr_pad, axis=i) - if reflect_type == "odd": - curr_pad = 2 * edge_end - curr_pad - arr = P.Concat(i)((arr, curr_pad)) - edge_end = _slice_along_axis(arr, i, arr.shape[i]-1, arr.shape[i]) + pad_args = (i, array_length, times_to_pad_after, additional_pad_after, reflect_type) + arr = _add_pads_after(arr, pad_args, "symmetric") return arr @@ -2278,7 +2305,6 @@ def _pad_reflect(arr, pad_width, reflect_type): pad the array with reflect paddings, this is very similar to symmetric paddings, but differs at how edges are selected. """ - # pylint: disable=too-many-nested-blocks for i in range(arr.ndim): array_length = arr.shape[i] if array_length == 1: @@ -2288,42 +2314,19 @@ def _pad_reflect(arr, pad_width, reflect_type): has_pad_before = (pad_width[i][0] > 0) has_pad_after = (pad_width[i][1] > 0) - edge_before = _slice_along_axis(arr, i, 0, 1) - edge_end = _slice_along_axis(arr, i, array_length-1, array_length) pad_size = array_length - 1 times_to_pad_before = pad_width[i][0] // pad_size + 1 additional_pad_before = pad_width[i][0] % pad_size times_to_pad_after = pad_width[i][1] // pad_size + 1 additional_pad_after = pad_width[i][1] % pad_size - curr_pad = None if has_pad_before: # Deal with paddings before the original array - for times in range(times_to_pad_before): - if times < times_to_pad_before - 1: - endpoint = array_length - else: - endpoint = additional_pad_before + 1 - if endpoint != 1: - curr_pad = _slice_along_axis(arr, i, 1, endpoint) - curr_pad = flip(curr_pad, axis=i) - if reflect_type == "odd": - curr_pad = 2 * edge_before - curr_pad - arr = P.Concat(i)((curr_pad, arr)) - edge_before = _slice_along_axis(arr, i, 0, 1) + pad_args = (i, array_length, times_to_pad_before, additional_pad_before, reflect_type) + arr = _add_pads_before(arr, pad_args, "reflect") if has_pad_after: # Deal with paddings after the original array - for times in range(times_to_pad_after): - if times < times_to_pad_after - 1: - startpoint = arr.shape[i] - array_length - else: - startpoint = arr.shape[i] - additional_pad_after - 1 - if startpoint != arr.shape[i]-1: - curr_pad = _slice_along_axis(arr, i, startpoint, arr.shape[i]-1) - curr_pad = flip(curr_pad, axis=i) - if reflect_type == "odd": - curr_pad = 2 * edge_end - curr_pad - arr = P.Concat(i)((arr, curr_pad)) - edge_end = _slice_along_axis(arr, i, arr.shape[i]-1, arr.shape[i]) + pad_args = (i, array_length, times_to_pad_after, additional_pad_after, reflect_type) + arr = _add_pads_after(arr, pad_args, "reflect") return arr @@ -2476,7 +2479,7 @@ def pad(arr, pad_width, mode="constant", stat_length=None, constant_values=0, constant_values = _convert_pad_to_nd(constant_values, arr.ndim) return _pad_constant(arr, pad_width, constant_values) if mode in ("maximum", "minimum", "mean", "median"): - # TODO: support median mode once P.Sort/P.Median is supported on GPU/CPU + # support median mode once P.Sort/P.Median is supported on GPU/CPU if mode == "median": _raise_unimplemented_error("median mode is not supported yet") return _pad_statistic(arr, pad_width, stat_length, stat_func[mode]) diff --git a/mindspore/numpy/array_ops.py b/mindspore/numpy/array_ops.py index e7f01776ab3..92189ae52c0 100644 --- a/mindspore/numpy/array_ops.py +++ b/mindspore/numpy/array_ops.py @@ -773,12 +773,12 @@ def atleast_1d(*arys): >>> output = np.atleast_1d(a, b, c) >>> print(output) [Tensor(shape=[2, 3], dtype=Float32, value= - [[1.00000000e+000, 1.00000000e+000, 1.00000000e+000], - [1.00000000e+000, 1.00000000e+000, 1.00000000e+000]]), - Tensor(shape=[1], dtype=Float32, value= [1.00000000e+000]), + [[1.00000000e+00, 1.00000000e+00, 1.00000000e+00], + [1.00000000e+00, 1.00000000e+00, 1.00000000e+00]]), + Tensor(shape=[1], dtype=Float32, value= [1.00000000e+00]), Tensor(shape=[5], dtype=Float32, - value= [1.00000000e+000, 1.00000000e+000, 1.00000000e+000, - 1.00000000e+000, 1.00000000e+000])] + value= [1.00000000e+00, 1.00000000e+00, 1.00000000e+00, + 1.00000000e+00, 1.00000000e+00])] """ return _atleast_xd(1, arys) @@ -810,12 +810,12 @@ def atleast_2d(*arys): >>> output = np.atleast_2d(a, b, c) >>> print(output) [Tensor(shape=[2, 3], dtype=Float32, value= - [[1.00000000e+000, 1.00000000e+000, 1.00000000e+000], - [1.00000000e+000, 1.00000000e+000, 1.00000000e+000]]), - Tensor(shape=[1, 1], dtype=Float32, value= [[1.00000000e+000]]), + [[1.00000000e+00, 1.00000000e+00, 1.00000000e+00], + [1.00000000e+00, 1.00000000e+00, 1.00000000e+00]]), + Tensor(shape=[1, 1], dtype=Float32, value= [[1.00000000e+00]]), Tensor(shape=[1, 5], dtype=Float32, - value= [[1.00000000e+000, 1.00000000e+000, 1.00000000e+000, - 1.00000000e+000, 1.00000000e+000]])] + value= [[1.00000000e+00, 1.00000000e+00, 1.00000000e+00, + 1.00000000e+00, 1.00000000e+00]])] """ return _atleast_xd(2, arys) @@ -850,12 +850,12 @@ def atleast_3d(*arys): >>> output = np.atleast_3d(a, b, c) >>> print(output) [Tensor(shape=[2, 3, 1], dtype=Float32, value= - [[[1.00000000e+000], [1.00000000e+000], [1.00000000e+000]], - [[1.00000000e+000], [1.00000000e+000], [1.00000000e+000]]]), - Tensor(shape=[1, 1, 1], dtype=Float32, value= [[[1.00000000e+000]]]), + [[[1.00000000e+00], [1.00000000e+00], [1.00000000e+00]], + [[1.00000000e+00], [1.00000000e+00], [1.00000000e+00]]]), + Tensor(shape=[1, 1, 1], dtype=Float32, value= [[[1.00000000e+00]]]), Tensor(shape=[1, 5, 1], dtype=Float32, - value= [[[1.00000000e+000], [1.00000000e+000], [1.00000000e+000], - [1.00000000e+000], [1.00000000e+000]]])] + value= [[[1.00000000e+00], [1.00000000e+00], [1.00000000e+00], + [1.00000000e+00], [1.00000000e+00]]])] """ res = [] for arr in arys: @@ -1444,6 +1444,7 @@ def _split(x, indices_or_sections, opname, axis=0): should be integer, tuple(int) or list(int), but got", indices_or_sections) return res + @constexpr def convert_neg_indices(indices, ndim): """converts negative values in tuple/list indices""" @@ -1452,6 +1453,7 @@ def convert_neg_indices(indices, ndim): indices = tuple([canonicalizer(axis) for axis in indices]) return indices + def _split_sub_tensors(x, indices, axis): """ Splits the input tensor `x` into multiple sub-tensors diff --git a/mindspore/numpy/math_ops.py b/mindspore/numpy/math_ops.py index ed25813e789..031e4716993 100644 --- a/mindspore/numpy/math_ops.py +++ b/mindspore/numpy/math_ops.py @@ -2234,7 +2234,7 @@ def convolve(a, v, mode='full'): a, v = v, a a_size, v_size = v_size, a_size v = v[::-1] - return _compute_1D_conv(a, v, mode).astype(final_dtype) + return _compute_1d_conv(a, v, mode).astype(final_dtype) def _handle_weights(weights, num_samples): @@ -3923,6 +3923,23 @@ def _gradient_along_axis(f, h, axis): return a_grad / h +def check_gradient_arguments(f, axis, edge_order): + """check arguments for gradient""" + if edge_order != 1: + _raise_unimplemented_error("edge_order != 1 not implemented") + if not isinstance(f, Tensor): + f = asarray_const(f) + if f.dtype != mstype.float64: + f = f.astype(mstype.float32) + if axis is None: + axis = F.make_range(f.ndim) + else: + _check_axis_type(axis, True, True, True) + axis = _canonicalize_axis(axis, f.ndim) + axis = (axis,) if isinstance(axis, int) else axis + return f, axis, edge_order + + def gradient(f, *varargs, axis=None, edge_order=1): """ Returns the gradient of a N-dimensional array. @@ -3969,18 +3986,7 @@ def gradient(f, *varargs, axis=None, edge_order=1): [1. 1. 1. ]] """ # This implementation was adapted from Numpy and jax.numpy - if edge_order != 1: - _raise_unimplemented_error("edge_order != 1 not implemented") - if not isinstance(f, Tensor): - f = asarray_const(f) - if f.dtype != mstype.float64: - f = f.astype(mstype.float32) - if axis is None: - axis = F.make_range(f.ndim) - else: - _check_axis_type(axis, True, True, True) - axis = _canonicalize_axis(axis, f.ndim) - axis = (axis,) if isinstance(axis, int) else axis + f, axis, edge_order = check_gradient_arguments(f, axis, edge_order) len_axes = len(axis) n = len(varargs) @@ -4370,7 +4376,7 @@ def interp(x, xp, fp, left=None, right=None): >>> print(np.interp(3.14, xp, fp, right=UNDEF)) -99.0 """ - # TODO implement period once sort is supported + # implement period once sort is supported x, xp, fp = _to_tensor(x, xp, fp) if F.rank(xp) != 1 or F.rank(fp) != 1: _raise_value_error('xp and fp must be 1-d sequences') @@ -4378,7 +4384,6 @@ def interp(x, xp, fp, left=None, right=None): if fp.size != size: _raise_value_error('the y-coordinates must have the same length as `xp`') - shape = F.shape(x) xp = xp.astype(mstype.float32) fp = fp.astype(mstype.float32) @@ -4392,20 +4397,17 @@ def interp(x, xp, fp, left=None, right=None): y_1 = F.gather_nd(fp, indices_1) res = (y_0*(x_1 - x) + y_1*(x - x_0))/(x_1 - x_0) res = F.select(F.equal(x_0, x_1), y_0, res) - # where x < xp[0], y = left or xp[0] - # where x > xp[-1], y = right or xp[-1] + idx_0 = _to_tensor([0]) idx_last = _to_tensor([size - 1]) if left is None: left = F.gather_nd(fp, idx_0) - left = full(shape, left, mstype.float32) + left = full(F.shape(x), left, mstype.float32) if right is None: right = F.gather_nd(fp, idx_last) - right = full(shape, right, mstype.float32) - choose_left = F.tensor_lt(x, F.gather_nd(xp, idx_0)) - choose_right = F.tensor_gt(x, F.gather_nd(xp, idx_last)) - res = F.select(choose_left, left, res) - res = F.select(choose_right, right, res) + right = full(F.shape(x), right, mstype.float32) + res = F.select(F.tensor_lt(x, F.gather_nd(xp, idx_0)), left, res) + res = F.select(F.tensor_gt(x, F.gather_nd(xp, idx_last)), right, res) return res @@ -4723,6 +4725,31 @@ def _factor_flattened_hist(nbin): return factor +def _get_histogramdd_count(ndim, bin_edges, sample, weights): + """Returns count for histogramdd.""" + data_indices = [] + nbin = () + flattened_bin_size = 1 + for i in F.make_range(ndim): + data_to_bins = searchsorted(bin_edges[i], sample[:, i], 'right') + bin_size = _type_convert(int, bin_edges[i].size) + data_to_bins = where_(sample[:, i] == bin_edges[i][-1], _to_tensor(bin_size - 1), data_to_bins) + data_indices.append(data_to_bins) + nbin += (bin_size + 1,) + flattened_bin_size *= (bin_size + 1) + + factor = F.reshape(_to_tensor(_factor_flattened_hist(nbin)), (ndim, 1)) + stacked_indices = stack(data_indices) * factor + if _get_device() == 'Ascend': + stacked_indices = F.cast(stacked_indices, mstype.float32) + flattened_hist = F.reduce_sum(stacked_indices.astype(mstype.float32), 0) + count = bincount(flattened_hist.astype(mstype.int32), weights, length=flattened_bin_size) + count = F.reshape(count, nbin) + slices = _list_comprehensions(ndim, F.make_slice(1, -1, 1), True) + count = count[slices] + return count + + def histogramdd(sample, bins=10, range=None, weights=None, density=False): # pylint: disable=redefined-builtin """ Computes the multidimensional histogram of some data. @@ -4823,26 +4850,7 @@ def histogramdd(sample, bins=10, range=None, weights=None, density=False): # pyl bin_edges.append(edges) dedges.append(diff(edges)) - data_indices = [] - nbin = () - flattened_bin_size = 1 - for i in F.make_range(ndim): - data_to_bins = searchsorted(bin_edges[i], sample[:, i], 'right') - bin_size = _type_convert(int, bin_edges[i].size) - data_to_bins = where_(sample[:, i] == bin_edges[i][-1], _to_tensor(bin_size - 1), data_to_bins) - data_indices.append(data_to_bins) - nbin += (bin_size + 1,) - flattened_bin_size *= (bin_size + 1) - - factor = F.reshape(_to_tensor(_factor_flattened_hist(nbin)), (ndim, 1)) - stacked_indices = stack(data_indices) * factor - if _get_device() == 'Ascend': - stacked_indices = F.cast(stacked_indices, mstype.float32) - flattened_hist = F.reduce_sum(stacked_indices.astype(mstype.float32), 0) - count = bincount(flattened_hist.astype(mstype.int32), weights, length=flattened_bin_size) - count = F.reshape(count, nbin) - slices = _list_comprehensions(ndim, F.make_slice(1, -1, 1), True) - count = count[slices] + count = _get_histogramdd_count(ndim, bin_edges, sample, weights) if density: s = F.reduce_sum(count.astype(mstype.float32)) @@ -5079,7 +5087,7 @@ def polysub(a1, a2): >>> print(np.polysub([2, 10, -2], [3, 10, -4])) [-1 0 2] """ - return polyadd(a1, -_to_tensor(a2)) + return polyadd(a1, F.neg_tensor(_to_tensor(a2))) def polyval(p, x): @@ -5485,51 +5493,48 @@ def ravel_multi_index(multi_index, dims, mode='clip', order='C'): return sum_((multi_index * strides).astype('float32'), axis=0) -def _vector_norm(x, ord, axis, keepdims): # pylint: disable=redefined-builtin +def _vector_norm(x, _ord, axis, keepdims): """Returns norm of a vector.""" - if _in(ord, ('fro', 'nuc')): + if _in(_ord, ('fro', 'nuc')): _raise_value_error('Frobenius norm and nuclear norm are only defined for vectors') - if ord is None: - ord = 2 - if ord == inf: + if _ord is None: + _ord = 2 + if _ord == inf: res = P.ReduceMax(keepdims)(absolute(x), axis) - elif ord == -inf: + elif _ord == -inf: res = P.ReduceMin(keepdims)(absolute(x), axis) - elif ord == 0: + elif _ord == 0: res = P.ReduceSum(keepdims)(F.not_equal(x, 0).astype(mstype.float32), axis) else: - res = power(P.ReduceSum(keepdims)(power(absolute(x), ord), axis), 1./ord) + res = power(P.ReduceSum(keepdims)(power(absolute(x), _ord), axis), 1./_ord) return res -def _matrix_norm(x, ord, axis, keepdims): # pylint: disable=redefined-builtin +def _matrix_norm(x, _ord, axis, keepdims): """Returns norm of a matrix.""" - if ord == 0: + if _ord == 0: _raise_value_error('for 0 axis, norm is defined only for 2-D matrices') - if ord == 'nuc': + if _ord == 'nuc': _raise_unimplemented_error('nuclear norm is not implemented') - if _in(ord, (2, -2)): + if _in(_ord, (2, -2)): _raise_unimplemented_error('2-norm is not implemented for matrices') - if _in(ord, (None, 'fro')): - res = F.sqrt(P.ReduceSum(keepdims)(F.square(x), axis)) - else: - axis0, axis1 = axis - if not keepdims: - if _check_is_inf(_abs(ord)) and axis0 > axis1: - axis0 -= 1 - elif _abs(ord) == 1 and axis1 > axis0: - axis1 -= 1 - if _check_is_inf(ord): - res = P.ReduceMax(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis1), axis0) - elif _check_is_inf(ord, True): - res = P.ReduceMin(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis1), axis0) - elif ord == 1: - res = P.ReduceMax(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis0), axis1) - elif ord == -1: - res = P.ReduceMin(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis0), axis1) - else: - return _raise_value_error('invalid norm order for matrices') - return res + if _in(_ord, (None, 'fro')): + return F.sqrt(P.ReduceSum(keepdims)(F.square(x), axis)) + axis0, axis1 = axis + if not keepdims: + if _check_is_inf(_abs(_ord)) and axis0 > axis1: + axis0 -= 1 + elif _abs(_ord) == 1 and axis1 > axis0: + axis1 -= 1 + if _check_is_inf(_ord): + return P.ReduceMax(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis1), axis0) + if _check_is_inf(_ord, True): + return P.ReduceMin(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis1), axis0) + if _ord == 1: + return P.ReduceMax(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis0), axis1) + if _ord == -1: + return P.ReduceMin(keepdims)(P.ReduceSum(keepdims)(absolute(x), axis0), axis1) + return _raise_value_error('invalid norm order for matrices') def norm(x, ord=None, axis=None, keepdims=False): # pylint: disable=redefined-builtin @@ -5827,11 +5832,11 @@ def correlate(a, v, mode='valid'): v = v.astype(promote_dtype) if a.size < v.size: a, v = v, a - return _compute_1D_conv(a, v, mode)[::-1] - return _compute_1D_conv(a, v, mode) + return _compute_1d_conv(a, v, mode)[::-1] + return _compute_1d_conv(a, v, mode) -def _compute_1D_conv(a, v, mode): +def _compute_1d_conv(a, v, mode): """Returns a 1-D sequence which is the cross-correlate of two 1-D sequences (`a` and `v`).""" v_size = F.shape_mul(v.shape) if mode not in ('same', 'full', 'valid'): diff --git a/mindspore/numpy/utils_const.py b/mindspore/numpy/utils_const.py index da55b9e45df..dbb5edd91b3 100644 --- a/mindspore/numpy/utils_const.py +++ b/mindspore/numpy/utils_const.py @@ -136,6 +136,8 @@ def _can_broadcast(*shapes): _infer_out_shape(*shapes) except ValueError: return False + finally: + pass return True diff --git a/mindspore/ops/_grad/grad_array_ops.py b/mindspore/ops/_grad/grad_array_ops.py index 4fcff7b163b..d025f7a8ec1 100644 --- a/mindspore/ops/_grad/grad_array_ops.py +++ b/mindspore/ops/_grad/grad_array_ops.py @@ -264,9 +264,13 @@ def get_bprop_embedding_lookup(self): def bprop_sparse(x, indices, offset, out, dout): x_shp = shape_op(x) new_indices = sub_op(indices, offset) - # Reshape the 'new_indices' - new_indices_shape_changed = (size_op(new_indices),) - new_indices = reshape_op(new_indices, new_indices_shape_changed) + indices_size = size_op(new_indices) + if indices_size > 0: + # Reshape the 'new_indices' + new_indices_shape_changed = (indices_size,) + new_indices = reshape_op(new_indices, new_indices_shape_changed) + else: + new_indices_shape_changed = () x_shp_tail = x_shp[1:] actual_dout_shape_changed = new_indices_shape_changed + x_shp_tail # Reshape the 'actual_dout' on device diff --git a/mindspore/ops/_grad_experimental/grad_nn_ops.py b/mindspore/ops/_grad_experimental/grad_nn_ops.py index acb3f84dc31..56e25b989e8 100644 --- a/mindspore/ops/_grad_experimental/grad_nn_ops.py +++ b/mindspore/ops/_grad_experimental/grad_nn_ops.py @@ -34,6 +34,19 @@ def get_bprop_ctc_loss_v2(self): return bprop +@bprop_getters.register(P.SoftMarginLoss) +def get_bprop_soft_margin_loss(self): + """Grad definition for `SoftMarginLoss` operation.""" + grad = G.SoftMarginLossGrad(reduction=self.reduction) + + def bprop(predict, label, out, dout): + dx = grad(predict, label, dout) + dy = grad(label, predict, dout) + return dx, dy + + return bprop + + @bprop_getters.register(P.SoftShrink) def get_bprop_softshrink(self): """Grad definition for `SoftShrink` operation.""" @@ -44,3 +57,15 @@ def get_bprop_softshrink(self): return (dx,) return bprop + + +@bprop_getters.register(P.HShrink) +def get_bprop_hshrink(self): + """Grad definition for `HShrinkGrad` operation.""" + grad = G.HShrinkGrad(self.lambd) + + def bprop(features, out, gradients): + dx = grad(gradients, features) + return (dx,) + + return bprop diff --git a/mindspore/ops/_op_impl/akg/ascend/__init__.py b/mindspore/ops/_op_impl/akg/ascend/__init__.py index 61e9dea9db4..41127a2806a 100644 --- a/mindspore/ops/_op_impl/akg/ascend/__init__.py +++ b/mindspore/ops/_op_impl/akg/ascend/__init__.py @@ -44,5 +44,6 @@ from .sqrt import _sqrt_akg from .square import _square_akg from .sub import _sub_akg from .prod_force_se_a import _prod_force_se_a_akg +from .load_im2col import _load_im2col_akg # Please insert op register in lexicographical order of the filename. diff --git a/mindspore/ops/_op_impl/cpu/__init__.py b/mindspore/ops/_op_impl/cpu/__init__.py index 6ac57186a4b..0b1f418ecf8 100644 --- a/mindspore/ops/_op_impl/cpu/__init__.py +++ b/mindspore/ops/_op_impl/cpu/__init__.py @@ -64,3 +64,4 @@ from .one_hot import _one_hot_cpu from .pad import _pad_cpu from .range import _range_cpu from .tensor_copy_slices import _tensor_copy_slices_cpu +from .l2loss import _l2loss_cpu diff --git a/mindspore/ops/_op_impl/tbe/__init__.py b/mindspore/ops/_op_impl/tbe/__init__.py index a017bc4d416..7108c57a533 100644 --- a/mindspore/ops/_op_impl/tbe/__init__.py +++ b/mindspore/ops/_op_impl/tbe/__init__.py @@ -150,6 +150,7 @@ from .logical_or import _logical_or_tbe from .reduce_max import _reduce_max_tbe from .reduce_min import _reduce_min_tbe from .reduce_sum import _reduce_sum_tbe +from .reduce_sum_ds import _reduce_sum_ds_tbe from .round import _round_tbe from .tanh import _tanh_tbe from .tanh_grad import _tanh_grad_tbe @@ -219,6 +220,8 @@ from .arg_max_with_value import _arg_max_with_value_tbe from .arg_min_with_value import _arg_min_with_value_tbe from .smooth_l1_loss import _smooth_l1_loss_tbe from .smooth_l1_loss_grad import _smooth_l1_loss_grad_tbe +from .soft_margin_loss import _soft_margin_loss_tbe +from .soft_margin_loss_grad import _soft_margin_loss_grad_tbe from .fused_mul_add import _fused_mul_add_tbe from .fused_mul_add_n import _fused_mul_add_n_tbe from .fused_mul_apply_momentum import _fused_mul_apply_momentum_tbe @@ -394,3 +397,5 @@ from .soft_shrink import _soft_shrink_tbe from .soft_shrink_grad import _soft_shrink_grad_tbe from .hsigmoid_grad import _hsigmoid_grad_tbe from .hsigmoid import _hsigmoid_tbe +from .hshrink import _hshrink_tbe +from .hshrink_grad import _hshrink_grad_tbe diff --git a/mindspore/ops/bprop_mindir/Identity_bprop.mindir b/mindspore/ops/bprop_mindir/Identity_bprop.mindir index ad7f1ccef67..39bfa0862c2 100644 --- a/mindspore/ops/bprop_mindir/Identity_bprop.mindir +++ b/mindspore/ops/bprop_mindir/Identity_bprop.mindir @@ -1,9 +1,9 @@ -0.1.0 MindSpore*1.1.0:î +0.1.0 MindSpore*1.4.0:î — - bprop.10:doutbprop.10:[CNode]12:2bprop.10:[CNode]11:1"S-Prim-MakeTuple:HGradients/Default/network-NetIdentity/gradIdentity/S-Prim-MakeTuple-op15bprop.10* + bprop.15:doutbprop.15:[CNode]17:2bprop.15:[CNode]16:1"S-Prim-MakeTuple:HGradients/Default/network-NetIdentity/gradIdentity/S-Prim-MakeTuple-op15bprop.15* -bprop.10:x* - bprop.10:out* - bprop.10:dout2 -bprop.10:[CNode]12:2:€027af68f320ba40d9fbd0893da424c07f9c3a4ec82e98f9543bff9b5a15547a2102a58399653345b09bd6f5b337c4b81c4f8900664c0abc09fb80f38f8e95be82366f7bd59ea5ec135e982de03b4f7cab6b61d833d046a6e13f78bdaf2fb2b224c332efad4a51b4773cb78093dd53a4ca850b2dc6cdd5f2ae47106b3fda77bb3565f906930f68ca2413e9ad958d105e129e717cd183b95d11d65a8b0b030fc0d65c0e00bc893ef15ec6199798d6c8c46997153587d375b3240c1195ff2c7278c7e635a08323207b4cb3f73fd8437b4d7ee28a7676a68f005a7749bd19e5ed4eca0593a639478ea8dfad17fdbe39f66855cc459eb58bcaf5eac44185e03b16374a6c407ad6a3b57190d3702d6a45031d13b97bb6952735edf94fb36f73dbff6cdab258748286fc6d783abacce203dfc79d2fc31e23a427ce1f86e08777a687f71c414b8c313aac4f85c6217fbbb7009dd079b2d5548f8b695a470a11cb8cc83e6f5e78f5b3c67f2e7bf339b250c3638aee952e1a073002e2834011401f3827260 \ No newline at end of file +bprop.15:x* + bprop.15:out* + bprop.15:dout2 +bprop.15:[CNode]17:2:€027af68f320ba40d9fbd0893da424c07f9c3a4ec82e98f9543bff9b5a15547a2102a58399653345b09bd6f5b337c4b81c4f8900664c0abc09fb80f38f8e95be82366f7bd59ea5ec135e982de03b4f7cab6b61d833d046a6e13f78bdaf2fb2b224c332efad4a51b4773cb78093dd53a4ca850b2dc6cdd5f2ae47106b3fda77bb3565f906930f68ca2413e9ad958d105e129e717cd183b95d11d65a8b0b030fc0d65c0e00bc893ef15ec6199798d6c8c46997153587d375b3240c1195ff2c7278c7e635a08323207b4cb3f73fd8437b4d7ee28a7676a68f005a7749bd19e5ed4ec99802e8da0efad2a3f80e99bfdcc99c4d54f2769de69733086a4722cb141371ba6c407ad6a3b57190d3702d6a45031d13b97bb6952735edf94fb36f73dbff6cdab258748286fc6d783abacce203dfc79d2fc31e23a427ce1f86e08777a687f71c414b8c313aac4f85c6217fbbb7009dd079b2d5548f8b695a470a11cb8cc83e6f5e78f5b3c67f2e7bf339b250c3638aee952e1a073002e2834011401f3827260 \ No newline at end of file diff --git a/mindspore/ops/bprop_mindir/ReLU_bprop.mindir b/mindspore/ops/bprop_mindir/ReLU_bprop.mindir index 56ae56bfac1..728be19742d 100644 --- a/mindspore/ops/bprop_mindir/ReLU_bprop.mindir +++ b/mindspore/ops/bprop_mindir/ReLU_bprop.mindir @@ -1,11 +1,11 @@ -0.1.0 MindSpore*1.1.0:å +0.1.0 MindSpore*1.4.0:å ˆ - bprop.2:dout - bprop.2:out bprop.2:dx:1 bprop.2:dx:1"S-Prim-ReluGrad:>Gradients/Default/network-NetRelu/gradReLU/S-Prim-ReluGrad-op5 + bprop.4:dout + bprop.4:out bprop.4:dx:1 bprop.4:dx:1"S-Prim-ReluGrad:>Gradients/Default/network-NetRelu/gradReLU/S-Prim-ReluGrad-op5 ‰ - bprop.2:dx:1bprop.2:[CNode]4:3bprop.2:[CNode]3:2"S-Prim-MakeTuple:?Gradients/Default/network-NetRelu/gradReLU/S-Prim-MakeTuple-op6bprop.2* - bprop.2:x* - bprop.2:out* - bprop.2:dout2 -bprop.2:[CNode]4:3:€027af68f320ba40d9fbd0893da424c07f9c3a4ec82e98f9543bff9b5a15547a2102a58399653345b09bd6f5b337c4b81c4f8900664c0abc09fb80f38f8e95be82366f7bd59ea5ec135e982de03b4f7cab6b61d833d046a6e13f78bdaf2fb2b224c332efad4a51b4773cb78093dd53a4ca850b2dc6cdd5f2ae47106b3fda77bb3565f906930f68ca2413e9ad958d105e129e717cd183b95d11d65a8b0b030fc0d65c0e00bc893ef15ec6199798d6c8c46997153587d375b3240c1195ff2c7278c7e635a08323207b4cb3f73fd8437b4d7ee28a7676a68f005a7749bd19e5ed4eca0593a639478ea8dfad17fdbe39f66855cc459eb58bcaf5eac44185e03b16374a6c407ad6a3b57190d3702d6a45031d13b97bb6952735edf94fb36f73dbff6cdab258748286fc6d783abacce203dfc79d2fc31e23a427ce1f86e08777a687f71c414b8c313aac4f85c6217fbbb7009dd079b2d5548f8b695a470a11cb8cc83e6f5e78f5b3c67f2e7bf339b250c3638aee952e1a073002e2834011401f3827260 \ No newline at end of file + bprop.4:dx:1bprop.4:[CNode]6:3bprop.4:[CNode]5:2"S-Prim-MakeTuple:?Gradients/Default/network-NetRelu/gradReLU/S-Prim-MakeTuple-op6bprop.4* + bprop.4:x* + bprop.4:out* + bprop.4:dout2 +bprop.4:[CNode]6:3:€027af68f320ba40d9fbd0893da424c07f9c3a4ec82e98f9543bff9b5a15547a2102a58399653345b09bd6f5b337c4b81c4f8900664c0abc09fb80f38f8e95be82366f7bd59ea5ec135e982de03b4f7cab6b61d833d046a6e13f78bdaf2fb2b224c332efad4a51b4773cb78093dd53a4ca850b2dc6cdd5f2ae47106b3fda77bb3565f906930f68ca2413e9ad958d105e129e717cd183b95d11d65a8b0b030fc0d65c0e00bc893ef15ec6199798d6c8c46997153587d375b3240c1195ff2c7278c7e635a08323207b4cb3f73fd8437b4d7ee28a7676a68f005a7749bd19e5ed4ec99802e8da0efad2a3f80e99bfdcc99c4d54f2769de69733086a4722cb141371ba6c407ad6a3b57190d3702d6a45031d13b97bb6952735edf94fb36f73dbff6cdab258748286fc6d783abacce203dfc79d2fc31e23a427ce1f86e08777a687f71c414b8c313aac4f85c6217fbbb7009dd079b2d5548f8b695a470a11cb8cc83e6f5e78f5b3c67f2e7bf339b250c3638aee952e1a073002e2834011401f3827260 \ No newline at end of file diff --git a/mindspore/ops/composite/random_ops.py b/mindspore/ops/composite/random_ops.py index 2d29a362c36..f3edf17e973 100644 --- a/mindspore/ops/composite/random_ops.py +++ b/mindspore/ops/composite/random_ops.py @@ -251,7 +251,7 @@ def gamma(shape, alpha, beta, seed=None): >>> output = ops.gamma(shape, alpha, beta, seed=5) >>> result = output.shape >>> print(output) - [[[ 2.2132034 5.8855834]] + [[[ 2.2132034 5.8855834]] [ 3.3981476 7.5805717] [[ 3.3981476 7.5805717]] [ 3.7190282 19.941492] @@ -264,7 +264,7 @@ def gamma(shape, alpha, beta, seed=None): >>> output = ops.gamma(shape, alpha, beta, seed=5) >>> result = output.shape >>> print(output) - [[[ 5.6085486 7.8280783]] + [[[ 5.6085486 7.8280783]] [ 15.97684 16.116285] [[ 1.8347423 1.713663]] [ 3.2434065 15.667398] diff --git a/mindspore/ops/functional.py b/mindspore/ops/functional.py index 66bb25e84b2..22173821784 100644 --- a/mindspore/ops/functional.py +++ b/mindspore/ops/functional.py @@ -116,6 +116,10 @@ bitwise_and = P.BitwiseAnd() bitwise_or = P.BitwiseOr() bitwise_xor = P.BitwiseXor() invert = P.Invert() +erf = P.Erf() +erfc = P.Erfc() +sort = P.Sort() +tensor_range = P.Range() scalar_to_array = P.ScalarToArray() scalar_to_tensor = P.ScalarToTensor() diff --git a/mindspore/ops/operations/__init__.py b/mindspore/ops/operations/__init__.py index bf79430ed1c..2237f7f0f5f 100644 --- a/mindspore/ops/operations/__init__.py +++ b/mindspore/ops/operations/__init__.py @@ -76,9 +76,9 @@ from .nn_ops import (LSTM, SGD, Adam, FusedSparseAdam, FusedSparseLazyAdam, Adam MaxPool, DataFormatDimMap, AvgPool, Conv2DBackpropInput, ComputeAccidentalHits, MaxPoolWithArgmax, OneHot, Pad, MirrorPad, Mish, PReLU, ReLU, ReLU6, ReLUV2, HSwish, HSigmoid, - ResizeBilinear, Sigmoid, SeLU, + ResizeBilinear, Sigmoid, SeLU, HShrink, SigmoidCrossEntropyWithLogits, NLLLoss, BCEWithLogitsLoss, - SmoothL1Loss, Softmax, Softsign, Softplus, LRN, RNNTLoss, DynamicRNN, DynamicGRUV2, + SmoothL1Loss, SoftMarginLoss, Softmax, Softsign, Softplus, LRN, RNNTLoss, DynamicRNN, DynamicGRUV2, SoftmaxCrossEntropyWithLogits, ROIAlign, SparseSoftmaxCrossEntropyWithLogits, Tanh, TopK, BinaryCrossEntropy, KLDivLoss, SparseApplyAdagrad, LARSUpdate, ApplyFtrl, SparseApplyFtrl, @@ -96,7 +96,8 @@ from .other_ops import (Assign, InplaceAssign, IOU, BoundingBoxDecode, BoundingB from ._thor_ops import (CusBatchMatMul, CusCholeskyTrsm, CusFusedAbsMax1, CusImg2Col, CusMatMulCubeDenseLeft, CusMatMulCubeFraczRightMul, CusMatMulCube, CusMatrixCombine, CusTranspose02314, CusMatMulCubeDenseRight, - CusMatMulCubeFraczLeftCast, Im2Col, UpdateThorGradient, Cholesky, CholeskyTrsm, DetTriangle, + CusMatMulCubeFraczLeftCast, Im2Col, LoadIm2Col, UpdateThorGradient, Cholesky, CholeskyTrsm, + DetTriangle, ProdForceSeA) from .sparse_ops import (SparseToDense, SparseTensorDenseMatmul) from ._embedding_cache_ops import (CacheSwapTable, UpdateCache, MapCacheIdx, SubAndFilter, @@ -107,9 +108,18 @@ from .sponge_ops import (BondForce, BondEnergy, BondAtomEnergy, BondForceWithAto AngleEnergy, AngleAtomEnergy, AngleForceWithAtomEnergy, PMEReciprocalForce, LJForce, LJEnergy, LJForceWithPMEDirectForce, PMEExcludedForce, PMEEnergy, Dihedral14LJForce, Dihedral14LJForceWithDirectCF, Dihedral14LJEnergy, Dihedral14LJCFForceWithAtomEnergy, - Dihedral14LJAtomEnergy, Dihedral14CFEnergy, Dihedral14CFAtomEnergy, MDIterationLeapFrog, - GetCenterOfGeometry, MDTemperature, NeighborListUpdate, MDIterationLeapFrogLiujian, + Dihedral14LJAtomEnergy, Dihedral14CFEnergy, Dihedral14CFAtomEnergy, + GetCenterOfGeometry, MDTemperature, MDIterationLeapFrogLiujian, CrdToUintCrd, MDIterationSetupRandState, TransferCrd, FFT3D, IFFT3D) +from .sponge_update_ops import (v0coordinaterefresh, v1coordinaterefresh, v2coordinaterefresh, v3coordinaterefresh, + v0forceredistribute, v1forceredistribute, v2forceredistribute, v3forceredistribute, + restrainenergy, restrainforcewithatomenergyandvirial, constrainforcecyclewithvirial, + refreshuintcrd, lastcrdtodr, refreshcrdvel, calculatenowrapcrd, refreshboxmaptimes, + totalc6get, copyfrctosystemgrad, CrdToUintCrdQuarter, + MDIterationLeapFrogLiujianWithMaxVel, GetCenterOfMass, MapCenterOfMass, + NeighborListUpdate, MDIterationLeapFrog, + MDIterationLeapFrogWithMaxVel, MDIterationGradientDescent, + BondForceWithAtomEnergyAndVirial, ConstrainForceCycle) __all__ = [ 'Unique', @@ -276,6 +286,7 @@ __all__ = [ 'FloatStatus', 'Reciprocal', 'SmoothL1Loss', + 'SoftMarginLoss', 'L2Loss', 'CTCLoss', 'CTCGreedyDecoder', @@ -485,7 +496,34 @@ __all__ = [ "TensorScatterSub", "SoftShrink", "FFT3D", - "IFFT3D" + "IFFT3D", + "HShrink", + "v0coordinaterefresh", + "v1coordinaterefresh", + "v2coordinaterefresh", + "v3coordinaterefresh", + "v0forceredistribute", + "v1forceredistribute", + "v2forceredistribute", + "v3forceredistribute", + "restrainenergy", + "restrainforcewithatomenergyandvirial", + "constrainforcecyclewithvirial", + "refreshuintcrd", + "lastcrdtodr", + "refreshcrdvel", + "calculatenowrapcrd", + "refreshboxmaptimes", + "totalc6get", + "copyfrctosystemgrad", + "CrdToUintCrdQuarter", + "MDIterationLeapFrogLiujianWithMaxVel", + "GetCenterOfMass", + "MapCenterOfMass", + "MDIterationLeapFrogWithMaxVel", + "MDIterationGradientDescent", + "BondForceWithAtomEnergyAndVirial", + "ConstrainForceCycle", ] __all__.sort() diff --git a/mindspore/ops/operations/_grad_ops.py b/mindspore/ops/operations/_grad_ops.py index 22f361f7060..208604b368d 100644 --- a/mindspore/ops/operations/_grad_ops.py +++ b/mindspore/ops/operations/_grad_ops.py @@ -1831,6 +1831,15 @@ class SmoothL1LossGrad(PrimitiveWithInfer): return dloss +class SoftMarginLossGrad(Primitive): + """Computes gradient for prediction on SoftMarginLoss.""" + + @prim_attr_register + def __init__(self, reduction="mean"): + self.init_prim_io_names(inputs=['predict', 'label', "dout"], outputs=['gradient']) + self.reduction = validator.check_string(reduction, ['none', 'sum', 'mean'], 'reduction', self.name) + + class StridedSliceGrad(PrimitiveWithInfer): """ Performs grad of StridedSlice operation. @@ -2212,3 +2221,37 @@ class SoftShrinkGrad(Primitive): self.init_prim_io_names(inputs=['input_grad', 'input_x'], outputs=['output']) validator.check_value_type("lambd", lambd, [float], self.name) validator.check_number("lambd", lambd, 0, Rel.GE, self.name) + + +class HShrinkGrad(Primitive): + """ + Computes gradients for HShrinkGrad operation. + + Args: + Lambd (float): the λ value for the Hardshrink formulation. Default: 0.5 + + Inputs: + - **Gradients** (Tensor) - the gradients of loss to output of HShrink function. + Currently gradients data type only support float16 and float32. + - **Features** (Tensor) - Must be the input `input_x` of the forward operator HSHrink. + Currently features data type only support float16 and float32. + + Outputs: + backprops - Tensor, with the same shape and data type as `features`. + + Rasise: + ValueError: If `lambd` is not a float. + ValueError: If shape of `gradients` is not the same as `features`. + TypeError: If dtype of `gradients` is not the same as `features`. + TypeError: If dtype of `gradients` or `features` is neither float16 nor float32. + + Supported Platforms: + ``Ascend`` + """ + + @prim_attr_register + def __init__(self, lambd=0.5): + validator.check_value_type("lambd", lambd, [float], self.name) + if lambd < 0.0: + lambd = 0.0 + self.add_prim_attr('lambd', lambd) diff --git a/mindspore/ops/operations/_thor_ops.py b/mindspore/ops/operations/_thor_ops.py index 537560d0ca2..8627f4c40bc 100644 --- a/mindspore/ops/operations/_thor_ops.py +++ b/mindspore/ops/operations/_thor_ops.py @@ -31,6 +31,7 @@ __all__ = ["CusBatchMatMul", "CusTranspose02314", "CusMatMulCubeDenseRight", "CusMatMulCubeFraczLeftCast", + "LoadIm2Col" ] @@ -362,6 +363,7 @@ class CusTranspose02314(PrimitiveWithInfer): def get_bprop(self): """Get backprop for CusTranspose02314.""" + def bprop(x, out, dout): return (C.zeros_like(x),) @@ -529,6 +531,55 @@ class Im2Col(PrimitiveWithInfer): return x_dtype +class LoadIm2Col(PrimitiveWithInfer): + """ + extracts image patches from image. + + The rank of input_x1 must be `4`, data_format is "NCHW". + Only supports when C is divisible by 16. + + Inputs: + - **input_x1** (Tensor) - The feature map. + The shape of the tensor is :math:`(N, C, H, W)`. + Outputs: + Tensor. + Examples: + >>> input_x = Tensor(np.random.rand(32, 16, 224, 224).astype(np.float16)) + >>> img2col = ops.LoadIm2Col(kernel_size=(7,7), stride=(2,2)) + >>> output = img2col(input_x) + """ + + @prim_attr_register + def __init__(self, + ksizes, + strides, + pad_mode="same", + dilates=(1, 1, 1, 1)): + """Initialize LoadIm2Col""" + + self.init_prim_io_names(inputs=['x1'], outputs=['y']) + self.ksizes = ksizes + self.strides = strides + self.pad_mode = validator.check_string(pad_mode, ['same'], 'pad_mode', self.name) + self.dilation = dilates + + def infer_shape(self, data1_shape): + bs, c, h, w = data1_shape + stride_h, stride_w = self.strides + k_w, k_h = self.ksizes + h_out = math.ceil(h / stride_h) + w_out = math.ceil(w / stride_w) + m = h_out * w_out + if m % 16 != 0: + shape = [(bs * m) // 16, (c * k_h * k_w) // 16, 16, 16] + else: + shape = [bs, m // 16, (c * k_h * k_w) // 16, 16, 16] + return shape + + def infer_dtype(self, data1_dtype): + return data1_dtype + + class UpdateThorGradient(PrimitiveWithInfer): """ Updates Thor Gradient with Approximate Fisher info matrix(for GPU backend). diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py index e80ab1c250c..50afe154728 100755 --- a/mindspore/ops/operations/array_ops.py +++ b/mindspore/ops/operations/array_ops.py @@ -739,6 +739,7 @@ class Unique(Primitive): Inputs: - **input_x** (Tensor) - The input tensor. + The shape is :math:`(N,*)` where :math:`*` means, any number of additional dimensions. Outputs: Tuple, containing Tensor objects `(y, idx), `y` is a tensor with the @@ -1202,7 +1203,7 @@ class Size(PrimitiveWithInfer): else: size = functools.reduce(lambda x, y: x * y, x['shape']) out = {'shape': None, - 'dtype': mstype.int32, + 'dtype': mstype.int64, 'value': size} return out @@ -1285,7 +1286,6 @@ class Ones(PrimitiveWithInfer): ``Ascend`` ``GPU`` ``CPU`` Examples: - >>> from mindspore.ops import operations as ops >>> ones = ops.Ones() >>> output = ones((2, 2), mindspore.float32) >>> print(output) @@ -1347,7 +1347,6 @@ class Zeros(Primitive): ``Ascend`` ``GPU`` ``CPU`` Examples: - >>> from mindspore.ops import operations as ops >>> zeros = ops.Zeros() >>> output = zeros((2, 2), mindspore.float32) >>> print(output) @@ -1369,6 +1368,7 @@ class OnesLike(Primitive): Inputs: - **input_x** (Tensor) - Input tensor. + The shape is :math:`(N,*)` where :math:`*` means, any number of additional dimensions. Outputs: Tensor, has the same shape and type as `input_x` but filled with ones. @@ -1401,6 +1401,7 @@ class ZerosLike(Primitive): Inputs: - **input_x** (Tensor) - Input tensor. The data type is int32, int64, float16 or float32. + The shape is :math:`(N,*)` where :math:`*` means, any number of additional dimensions. Outputs: Tensor, has the same shape and data type as `input_x` but filled with zeros. @@ -1655,7 +1656,7 @@ class Argmax(PrimitiveWithInfer): Inputs: - **input_x** (Tensor) - Input tensor. :math:`(N,*)` where :math:`*` means, any number of additional dimensions. - Support data type list as follows: + Support data type list as follows: - Ascend: Float16, Float32. - GPU: Float16, Float32. @@ -1716,6 +1717,7 @@ class Argmin(PrimitiveWithInfer): Inputs: - **input_x** (Tensor) - Input tensor. + The shape is :math:`(N,*)` where :math:`*` means, any number of additional dimensions. Outputs: Tensor, indices of the min value of input tensor across the axis. @@ -1860,7 +1862,7 @@ class ArgMinWithValue(PrimitiveWithInfer): >>> input_x = Tensor(np.array([0.0, 0.4, 0.6, 0.7, 0.1]), mindspore.float32) >>> output = ops.ArgMinWithValue()(input_x) >>> print(output) - (Tensor(shape=[], dtype=Int32, value= 0), Tensor(shape=[], dtype=Float32, value= 0.0)) + (Tensor(shape=[], dtype=Int32, value= 0), Tensor(shape=[], dtype=Float32, value= 0)) >>> output = ops.ArgMinWithValue(keep_dims=True)(input_x) >>> print(output) (Tensor(shape=[1], dtype=Int32, value= [0]), Tensor(shape=[1], dtype=Float32, value= [ 0.00000000e+00])) @@ -2299,13 +2301,14 @@ class Concat(PrimitiveWithInfer): Inputs: - **input_x** (tuple, list) - A tuple or a list of input tensors. - `input_x`, `input_y` should has same data type. - - **input_y** (tuple, list) - A tuple or a list of input tensors. - `input_x`, `input_y` should has same data type. + Suppose there are two tensors in this tuple or list, namely x1 and x2. + To perform `Concat` in the axis 0 direction, except for the 0th axis, all other axes should be equal, + that is, :math:`x1.shape[1] == x2.shape[1], x1.shape[2] == x2.shape[2], ..., x1.shape[R] == x2.shape[R]', + where the :math:`R' indicates the last axis. Outputs: Tensor, the shape is :math:`(x_1, x_2, ..., \sum_{i=1}^Nx_{mi}, ..., x_R)`. - The data type is the same with `input_X` and `input_y`. + The data type is the same with `input_x`. Raises: TypeError: If `axis` is not an int. @@ -2314,17 +2317,17 @@ class Concat(PrimitiveWithInfer): ``Ascend`` ``GPU`` ``CPU`` Examples: - >>> input_x = Tensor(np.array([[0, 1], [2, 1]]).astype(np.float32)) - >>> input_y = Tensor(np.array([[0, 1], [2, 1]]).astype(np.float32)) + >>> input_x1 = Tensor(np.array([[0, 1], [2, 1]]).astype(np.float32)) + >>> input_x2 = Tensor(np.array([[0, 1], [2, 1]]).astype(np.float32)) >>> op = ops.Concat() - >>> output = op((input_x, input_y)) + >>> output = op((input_x1, input_x2)) >>> print(output) [[0. 1.] [2. 1.] [0. 1.] [2. 1.]] >>> op = ops.Concat(1) - >>> output = op((input_x, input_y)) + >>> output = op((input_x1, input_x2)) >>> print(output) [[0. 1. 0. 1.] [2. 1. 2. 1.]] @@ -2658,6 +2661,7 @@ class Slice(PrimitiveWithInfer): Inputs: - **input_x** (Tensor): The target tensor. + The shape is :math:`(N,*)` where :math:`*` means, any number of additional dimensions. - **begin** (Union[tuple, list]): The beginning of the slice. Only constant value(>=0) is allowed. - **size** (Union[tuple, list]): The size of the slice. Only constant value is allowed. @@ -2733,6 +2737,7 @@ class ReverseV2(PrimitiveWithInfer): Inputs: - **input_x** (Tensor) - The target tensor. The data type is Number except float64. + The shape is :math:`(N,*)` where :math:`*` means, any number of additional dimensions. Outputs: Tensor, has the same shape and type as `input_x`. @@ -2795,7 +2800,7 @@ class Rint(PrimitiveWithInfer): Inputs: - **input_x** (Tensor) - The target tensor, which must be one of the following types: - float16, float32. + float16, float32. The shape is :math:`(N,*)` where :math:`*` means, any number of additional dimensions. Outputs: Tensor, has the same shape and type as `input_x`. diff --git a/mindspore/ops/operations/inner_ops.py b/mindspore/ops/operations/inner_ops.py index d21cb5d4be4..16fbe1993ae 100755 --- a/mindspore/ops/operations/inner_ops.py +++ b/mindspore/ops/operations/inner_ops.py @@ -502,8 +502,7 @@ class AdamWeightDecay(PrimitiveWithInfer): Examples: >>> import numpy as np >>> import mindspore.nn as nn - >>> from mindspore import Tensor, Parameter - >>> from mindspore.ops import operations as ops + >>> from mindspore import Tensor, Parameter, ops >>> class Net(nn.Cell): ... def __init__(self): ... super(Net, self).__init__() diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py index 7d4dd49cee4..dd036edc5c4 100644 --- a/mindspore/ops/operations/math_ops.py +++ b/mindspore/ops/operations/math_ops.py @@ -371,10 +371,43 @@ class _Reduce(PrimitiveWithInfer): input_shp = input_x['shape'] args = {'input_x': input_x['dtype']} validator.check_tensors_dtypes_same_and_valid(args, valid_dtype, self.name) - - if axis_v is None: + if not isinstance(axis, mstype.tensor_type) and axis_v is None: raise ValueError(f"For {self.name}, axis must be const.") - input_shp = _infer_shape_reduce(input_shp, axis_v, self.keep_dims, self.name) + out_shape = _infer_shape_reduce(input_shp, axis_v, self.keep_dims, self.name) + if -1 in input_shp: + if axis_v is None: + max_v = max(input_shp) + if 'max_shape' and 'min_shape' in input_x: + input_max_shp = input_x['max_shape'] + max_v = max(input_max_shp) + axis_shape_list = axis['shape'] + if len(axis_shape_list) != 1: + raise ValueError("axis_shape must be 1-D, but got ", len(axis_shape_list)) + axis_shape = axis_shape_list[0] + if len(axis_shape) == 1 and axis_shape[0] == -1 and not self.keep_dims: + out_shape = np.array([-2]).tolist() + output_min_shape = np.ones_like(input_shp).tolist() + output_max_shape = max_v * np.ones_like(input_shp) + output_max_shape = output_max_shape.tolist() + elif not self.keep_dims: + out_shape = -1 * np.ones_like(input_shp[:-axis_shape]) + out_shape = out_shape.tolist() + output_min_shape = np.ones_like(out_shape).tolist() + output_max_shape = max_v * np.ones_like(out_shape) + output_max_shape = output_max_shape.tolist() + else: + out_shape = -1 * np.ones_like(input_shp) + out_shape = out_shape.tolist() + output_min_shape = np.ones_like(input_shp).tolist() + output_max_shape = max_v * np.ones_like(input_shp) + output_max_shape = output_max_shape.tolist() + else: + output_max_shape = _infer_shape_reduce(input_x['max_shape'], axis_v, self.keep_dims, self.name) + output_min_shape = _infer_shape_reduce(input_x['min_shape'], axis_v, self.keep_dims, self.name) + else: + output_max_shape = out_shape + output_min_shape = out_shape + value = None if input_x['value'] is not None: prim_map = { @@ -386,20 +419,13 @@ class _Reduce(PrimitiveWithInfer): if np_reduce_func is not None: value = input_x['value'].asnumpy() - if not axis_v and axis_v != 0: + if not axis_v: axis_v = [i for i in range(len(input_x['shape']))] axis_v = tuple(axis_v) value = np_reduce_func(value, axis_v, keepdims=self.keep_dims) value = np.array(value) value = Tensor(value) - if 'max_shape' and 'min_shape' in input_x: - output_max_shape = _infer_shape_reduce(input_x['max_shape'], axis_v, self.keep_dims, self.name) - output_min_shape = _infer_shape_reduce(input_x['min_shape'], axis_v, self.keep_dims, self.name) - else: - output_max_shape = input_shp - output_min_shape = input_shp - - return {'shape': input_shp, + return {'shape': out_shape, 'min_shape': output_min_shape, 'max_shape': output_max_shape, 'dtype': input_x['dtype'], @@ -1013,7 +1039,7 @@ class MatMul(PrimitiveWithCheck): .. math:: - (Output)_{i j}=\\sum_{k=1}^{p} a_{i k} b_{k j}=a_{i 1} b_{1 j}+a_{i 2} b_{2 j}+\\cdots+a_{i p} b_{p j}, p\\in N + (Output)_{i j}=\sum_{k=1}^{p} a_{i k} b_{k j}=a_{i 1} b_{1 j}+a_{i 2} b_{2 j}+\cdots+a_{i p} b_{p j}, p\in N where the :math:`i,j` indicates the output of the i-th row and j-th column element. @@ -3248,10 +3274,10 @@ class ApproximateEqual(_LogicBinaryOp): .. math:: - out_i = \begin{cases} - & \text{ if } \left | x_{i} - y_{i} \right | < \text{tolerance},\ \ True\ \\ - & \text{ if } \left | x_{i} - y_{i} \right | \ge \text{tolerance},\ \ False\ - \end{cases} + out_i = \begin{cases} + & \text{ if } \left | x_{i} - y_{i} \right | < \text{tolerance},\ \ True \\ + & \text{ if } \left | x_{i} - y_{i} \right | \ge \text{tolerance},\ \ False + \end{cases} where :math:`\text{tolerance}` indicates Acceptable maximum tolerance. @@ -3759,10 +3785,10 @@ class IsNan(PrimitiveWithInfer): .. math:: - out_i = \begin{cases} - & \text{ if } x_{i} = \text{Nan},\ \ True\ \\ - & \text{ if } x_{i} \ne \text{Nan},\ \ False\ - \end{cases} + out_i = \begin{cases} + & \text{ if } x_{i} = \text{Nan},\ \ True \\ + & \text{ if } x_{i} \ne \text{Nan},\ \ False + \end{cases} where :math:`Nan` means not a number. @@ -3805,10 +3831,10 @@ class IsInf(PrimitiveWithInfer): .. math:: - out_i = \begin{cases} - & \text{ if } x_{i} = \text{Inf},\ \ True\ \\ - & \text{ if } x_{i} \ne \text{Inf},\ \ False\ - \end{cases} + out_i = \begin{cases} + & \text{ if } x_{i} = \text{Inf},\ \ True \\ + & \text{ if } x_{i} \ne \text{Inf},\ \ False + \end{cases} where :math:`Inf` means not a number. @@ -3851,10 +3877,10 @@ class IsFinite(PrimitiveWithInfer): .. math:: - out_i = \begin{cases} - & \text{ if } x_{i} = \text{Finite},\ \ True\ \\ - & \text{ if } x_{i} \ne \text{Finite},\ \ False\ - \end{cases} + out_i = \begin{cases} + & \text{ if } x_{i} = \text{Finite},\ \ True\ \\ + & \text{ if } x_{i} \ne \text{Finite},\ \ False + \end{cases} Inputs: - **x** (Tensor) - The input tensor. diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py index 6c60d2a1d0b..46b6ce6ffd7 100755 --- a/mindspore/ops/operations/nn_ops.py +++ b/mindspore/ops/operations/nn_ops.py @@ -2076,6 +2076,7 @@ class Conv2DBackpropInput(Primitive): self.init_prim_io_names(inputs=['out_backprop', 'filter', 'input_sizes'], outputs=['output']) self.out_channel = validator.check_positive_int(out_channel, 'out_channel', self.name) self.kernel_size = _check_positive_int_or_tuple('kernel_size', kernel_size, self.name) + self.add_prim_attr('kernel_size', self.kernel_size) self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.name) if context.get_context("device_target") != "GPU" and self.format == "NHWC": raise ValueError("NHWC format only support in GPU target.") @@ -2658,6 +2659,53 @@ class SmoothL1Loss(PrimitiveWithInfer): return prediction +class SoftMarginLoss(Primitive): + r""" + SoftMarginLoss operation. + + Creates a criterion that optimizes a two-class classification + logistic loss between input tensor :math:`x` and target tensor :math:`y` + (containing 1 or -1). + + .. math:: + \text{loss}(x, y) = \sum_i \frac{\log(1 + \exp(-y[i]*x[i]))}{\text{x.nelement}()} + + Args: + reduction (str): Apply specific reduction method to the output: 'none', 'mean', 'sum'. Default: "mean". + + Inputs: + - **logits** (Tensor) - Predict data. Data type must be float16 or float32. + - **labels** (Tensor) - Ground truth data, with the same type and shape as `logits`. + + Outputs: + Tensor or Scalar, if `reduction` is "none", its shape is the same as `logits`. + Otherwise, a scalar value will be returned. + + Raises: + TypeError: If `logits` or `labels` is not a Tensor. + TypeError: If dtype of `logits` or `labels` is neither float16 nor float32. + ValueError: If shape of `logits` is not the same as `labels`. + ValueError: If `reduction` is not one of 'none', 'mean', 'sum'. + + Supported Platforms: + ``Ascend`` + + Examples: + >>> loss = ops.SoftMarginLoss() + >>> logits = Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]), mindspore.float32) + >>> labels = Tensor(np.array([[-1, 1], [1, -1]]), mindspore.float32) + >>> output = loss(logits, labels) + >>> print(output) + 0.6764238 + """ + + @prim_attr_register + def __init__(self, reduction="mean"): + """Initialize SoftMarginLoss""" + self.init_prim_io_names(inputs=['predict', 'label'], outputs=['loss']) + self.reduction = validator.check_string(reduction, ['none', 'sum', 'mean'], 'reduction', self.name) + + class L2Loss(PrimitiveWithInfer): """ Calculates half of the L2 norm of a tensor without using the `sqrt`. @@ -2678,7 +2726,7 @@ class L2Loss(PrimitiveWithInfer): TypeError: If dtype of `input_x` is neither float16 nor float32. Supported Platforms: - ``Ascend`` ``GPU`` + ``Ascend`` ``GPU`` ``CPU`` Examples >>> input_x = Tensor(np.array([1, 2, 3]), mindspore.float16) @@ -4097,7 +4145,7 @@ class MirrorPad(PrimitiveWithInfer): ``Ascend`` ``GPU`` ``CPU`` Examples: - # case1: mode="REFLECT" + >>> # case1: mode="REFLECT" >>> class Net(nn.Cell): ... def __init__(self, mode): ... super(Net, self).__init__() @@ -8606,7 +8654,6 @@ class SoftShrink(Primitive): x + \lambda, & \text{ if } x < -\lambda \\ 0, & \text{ otherwise } \end{cases} - Args: lambd: the :math:`\lambda` must be no less than zero value for the Softshrink formulation. Default: 0.5. @@ -8640,3 +8687,49 @@ class SoftShrink(Primitive): """Initialize SoftShrink""" validator.check_value_type("lambd", lambd, [float], self.name) validator.check_number("lambd", lambd, 0, Rel.GE, self.name) + + +class HShrink(Primitive): + r""" + Applies the hard shrinkage function element-wise, each element complies the follow function: + + .. math:: + \text{HardShrink}(x) = + \begin{cases} + x, & \text{ if } x > \lambda \\ + x, & \text{ if } x < -\lambda \\ + 0, & \text{ otherwise } + \end{cases} + + Args: + lambd (float): The value for the HardShrink formulation. Default: 0.5 + + Inputs: + - **input_x** (Tensor) - The input of HardShrink with data type of float16 or float32. + + Outputs: + Tensor, the same shape and data type as the input. + + Supported Platforms: + ``Ascend`` + + Raises: + TypeError: If `lambd` is not a float. + TypeError: If dtype of `input_x` is neither float16 nor float32. + + Examples: + >>> input_x = Tensor(np.array([[ 0.5, 1, 2.0],[0.0533,0.0776,-2.1233]]),mstype.float32) + >>> hshrink = P.HShrink() + >>> output = hshrink(input_x) + >>> print(output) + [[ 0. 1. 2. ] + [ 0. 0. -2.1233]] + """ + + @prim_attr_register + def __init__(self, lambd=0.5): + """Initialize HShrink""" + validator.check_value_type('lambd', lambd, [float], self.name) + if lambd < 0.0: + lambd = 0.0 + self.add_prim_attr('lambd', lambd) diff --git a/mindspore/ops/operations/other_ops.py b/mindspore/ops/operations/other_ops.py index 9c44f386a09..4e746ac04a7 100644 --- a/mindspore/ops/operations/other_ops.py +++ b/mindspore/ops/operations/other_ops.py @@ -291,8 +291,7 @@ class CheckValid(PrimitiveWithInfer): >>> import mindspore >>> import mindspore.nn as nn >>> import numpy as np - >>> from mindspore import Tensor - >>> from mindspore.ops import operations as ops + >>> from mindspore import Tensor, ops >>> class Net(nn.Cell): ... def __init__(self): ... super(Net, self).__init__() diff --git a/mindspore/ops/operations/sponge_ops.py b/mindspore/ops/operations/sponge_ops.py index af6fff4e60c..8e9773ba5af 100644 --- a/mindspore/ops/operations/sponge_ops.py +++ b/mindspore/ops/operations/sponge_ops.py @@ -1950,95 +1950,6 @@ class Dihedral14CFAtomEnergy(PrimitiveWithInfer): return charge_dtype -class MDIterationLeapFrog(PrimitiveWithInfer): - """ - One step of classical leap frog algorithm to solve the finite difference - Hamiltonian equations of motion for certain system, using Langevin dynamics - with Liu's thermostat scheme. Assume the number of atoms is n and the target - control temperature is T. - - Detailed iteration formula can be found in this paper: A unified thermostat - scheme for efficient configurational sampling for classical/quantum canonical - ensembles via molecular dynamics. DOI: 10.1063/1.4991621. - - Because there is a large amount of inputs and each of them are related, - there is no way to construct `Examples` using random methods. For details, refer the webpage `SPONGE in MindSpore - `_. - - Args: - float4_numbers(int32): total length to store random numbers. - atom_numbers(int32): the number of atoms n. - dt(float32): time step for finite difference. - half_dt(float32): half of time step for finite difference. - exp_gamma(float32): parameter in Liu's dynamic, equals exp(-gamma_ln * dt), - where gamma_ln is the firction factor in Langvin dynamics. - max_velocity(float32): the upper limit of velocity, when the veclocity overflows, - scale it to the upper limit. - is_max_velocity(int32): whether the max velocity control is open or not. - - Inputs: - - **mass_inverse** (Tensor) - The inverse value of mass of each atom. - The data type is float32 and the shape is :math:`(n,]` - - **sqrt_mass** (Tensor) - The inverse square root value - of effect mass in Liu's dynamics of each atom. The data type is float32 and the shape is :math:`(n,]` - - Outputs: - - **vel** (Tensor) - The velocity of each atom. - The data type is float32 and the shape is :math:`(n, 3]` - - **crd** (Tensor) - The coordinate of each atom. - The data type is float32 and the shape is :math:`(n, 3]` - - **frc** (Tensor) - The force felt by each atom. - The data type is float32 and the shape is :math:`(n, 3]` - - **acc** (Tensor) - The acceleration of each atom. - The data type is float32 and the shape is :math:`(n, 3]` - - Supported Platforms: - ``GPU`` - """ - - @prim_attr_register - def __init__(self, float4_numbers, atom_numbers, half_dt, dt, exp_gamma, is_max_velocity, max_velocity): - """Initialize MDIterationLeapFrog.""" - validator.check_value_type('float4_numbers', float4_numbers, int, self.name) - validator.check_value_type('atom_numbers', atom_numbers, int, self.name) - validator.check_value_type('half_dt', half_dt, float, self.name) - validator.check_value_type('dt', dt, float, self.name) - validator.check_value_type('exp_gamma', exp_gamma, float, self.name) - validator.check_value_type('is_max_velocity', is_max_velocity, int, self.name) - validator.check_value_type('max_velocity', max_velocity, float, self.name) - self.float4_numbers = float4_numbers - self.atom_numbers = atom_numbers - self.half_dt = half_dt - self.dt = dt - self.exp_gamma = exp_gamma - self.is_max_velocity = is_max_velocity - self.max_velocity = max_velocity - - self.init_prim_io_names( - inputs=['mass_inverse', 'sqrt_mass'], - outputs=['vel', 'crd', 'frc', 'acc']) - self.add_prim_attr('float4_numbers', self.float4_numbers) - self.add_prim_attr('atom_numbers', self.atom_numbers) - self.add_prim_attr('half_dt', self.half_dt) - self.add_prim_attr('dt', self.dt) - self.add_prim_attr('exp_gamma', self.exp_gamma) - self.add_prim_attr('is_max_velocity', self.is_max_velocity) - self.add_prim_attr('max_velocity', self.max_velocity) - - def infer_shape(self, mass_inverse_shape, sqrt_mass_shape): - cls_name = self.name - n = self.atom_numbers - validator.check_int(mass_inverse_shape[0], n, Rel.EQ, "mass_inverse", cls_name) - validator.check_int(sqrt_mass_shape[0], n, Rel.EQ, "sqrt_mass", cls_name) - return [self.atom_numbers, 3], [self.atom_numbers, 3], [self.atom_numbers, 3], [self.atom_numbers, 3] - - def infer_dtype(self, mass_inverse_dtype, sqrt_mass_dtype): - validator.check_tensor_dtype_valid('mass_inverse', mass_inverse_dtype, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('sqrt_mass', sqrt_mass_dtype, [mstype.float32], self.name) - - return mass_inverse_dtype, mass_inverse_dtype, mass_inverse_dtype, mass_inverse_dtype - - class PMEReciprocalForce(PrimitiveWithInfer): """ Calculate the reciprocal part of long-range Coulumb force using @@ -2710,219 +2621,6 @@ class MDTemperature(PrimitiveWithInfer): validator.check_tensor_dtype_valid('atom_mass', atom_mass_dtype, [mstype.float32], self.name) return atom_mass_dtype - -class NeighborListUpdate(PrimitiveWithInfer): - """ - Update (or construct if first time) the Verlet neighbor list for the - calculation of short-ranged force. Assume the number of atoms is n, - the number of grids divided is G, the maximum number of atoms in one - grid is m, the maximum number of atoms in single atom's neighbor list - is L, and the number of total atom in excluded list is E. - - Because there is a large amount of inputs and each of them are related, - there is no way to construct `Examples` using random methods. For details, refer the webpage `SPONGE in MindSpore - `_. - - Args: - grid_numbers(int32): the total number of grids divided. - not_first_time(int32): whether to construct the neighbor - list first time or not. - nxy(int32): the total number of grids divided in xy plane. - excluded_atom_numbers(int32): the total atom numbers in the excluded list. - cutoff(float32): the cutoff distance for short-range force calculation. Default: 10.0. - skin(float32): the overflow value of cutoff to maintain a neighbor list. Default: 2.0. - cutoff_square(float32): the square value of cutoff. - half_skin_square(float32): skin*skin/4, indicates the maximum - square value of the distance atom allowed to move between two updates. - cutoff_with_skin(float32): cutoff + skin, indicates the - radius of the neighbor list for each atom. - half_cutoff_with_skin(float32): cutoff_with_skin/2. - cutoff_with_skin_square(float32): the square value of cutoff_with_skin. - refresh_interval(int32): the number of iteration steps between two updates of neighbor list. Default: 20. - max_atom_in_grid_numbers(int32): the maximum number of atoms in one grid. Default: 64. - max_neighbor_numbers(int32): The maximum number of neighbors. Default: 800. - - Inputs: - - **atom_numbers_in_grid_bucket** (Tensor) - The number of atoms in each grid bucket. - The data type is int32 and the shape is :math:`(G,)`. - - **bucket** (Tensor) - The atom indices in each grid bucket. - The data type is int32 and the shape is :math:`(G, m)`. - - **crd** (Tensor) - The coordinates of each atom. - The data type is float32 and the shape is :math:`(n,)`. - - **box_length** (Tensor) - The length of 3 dimensions of the simulation box. - The data type is float32 and the shape is :math:`(3,)`. - - **grid_n** (Tensor) - The number of grids divided of 3 dimensions of the simulation box. - The data type is float32 and the shape is :math:`(3,)`. - - **grid_length_inverse** (Scalar) - the inverse value of grid length. - The data type is float32 and the shape is :math:`(n,)`. - - **atom_in_grid_serial** (Tensor) - The grid index for each atom. - The data type is int32 and the shape is :math:`(n,)`. - - **old_crd** (Tensor) - The coordinates before update of each atom. - The data type is float32 and the shape is :math:`(n, 3)`. - - **crd_to_uint_crd_cof** (Tensor) - The scale factor - between the unsigned int value and the real space coordinates. - The data type is float32 and the shape is :math:`(3,)`. - - **uint_crd** (Tensor) - The unsigned int coordinates value fo each atom. - The data type is uint32 and the shape is :math:`(n, 3)`. - - **gpointer** (Tensor) - The 125 nearest neighbor grids (including self) of each grid. - G is the number of nearest neighbor grids. The data type is int32 and the shape is :math:`(G, 125)`. - - **nl_atom_numbers** (Tensor) - The number of atoms in neighbor list of each atom. - The data type is int32 and the shape is :math:`(n,)`. - - **nl_atom_serial** (Tensor) - The indices of atoms in neighbor list of each atom. - The data type is int32 and the shape is :math:`(n, L)`. - - **uint_dr_to_dr_cof** (Tensor) - The scale factor between - the real space coordinates and the unsigned int value. The data type is float32 and the shape is :math:`(3,)`. - - **excluded_list_start** (Tensor) - The start excluded index in excluded list for each atom. - The data type is int32 and the shape is :math:`(n,)`. - - **excluded_numbers** (Tensor) - The number of atom excluded in excluded list for each atom. - The data type is int32 and the shape is :math:`(n,)`. - - **excluded_list** (Tensor) - The contiguous join of excluded list of each atom. - The data type is int32 and the shape is :math:`(E,)`. - - **need_refresh_flag** (Tensor) - Whether the neighbor list of each atom need update or not. - The data type is int32 and the shape is :math:`(n,)`. - - **refresh_count** (Tensor) - Count how many iteration steps have passed since last update. - The data type is int32 and the shape is :math:`(n,)`. - - Outputs: - - **res** (Scalar) - The data type is float32. - - Supported Platforms: - ``GPU`` - """ - - @prim_attr_register - def __init__(self, grid_numbers, atom_numbers, not_first_time, nxy, excluded_atom_numbers, - cutoff_square, half_skin_square, cutoff_with_skin, half_cutoff_with_skin, cutoff_with_skin_square, - refresh_interval=20, cutoff=10.0, skin=2.0, max_atom_in_grid_numbers=64, max_neighbor_numbers=800): - """Initialize NeighborListUpdate.""" - self.grid_numbers = grid_numbers - self.atom_numbers = atom_numbers - self.refresh_interval = refresh_interval - self.not_first_time = not_first_time - self.cutoff = cutoff - self.skin = skin - self.max_atom_in_grid_numbers = max_atom_in_grid_numbers - self.nxy = nxy - self.excluded_atom_numbers = excluded_atom_numbers - self.cutoff_square = cutoff_square - self.half_skin_square = half_skin_square - self.cutoff_with_skin = cutoff_with_skin - self.half_cutoff_with_skin = half_cutoff_with_skin - self.cutoff_with_skin_square = cutoff_with_skin_square - self.max_neighbor_numbers = max_neighbor_numbers - self.init_prim_io_names( - inputs=['atom_numbers_in_grid_bucket', 'bucket', 'crd', 'box_length', 'grid_n', 'grid_length_inverse', - 'atom_in_grid_serial', 'old_crd', 'crd_to_uint_crd_cof', 'uint_crd', 'gpointer', 'nl_atom_numbers', - 'nl_atom_serial', 'uint_dr_to_dr_cof', 'excluded_list_start', 'excluded_list', 'excluded_numbers', - 'need_refresh_flag', 'refresh_count'], outputs=['res']) - - self.add_prim_attr('grid_numbers', self.grid_numbers) - self.add_prim_attr('atom_numbers', self.atom_numbers) - self.add_prim_attr('refresh_interval', self.refresh_interval) - self.add_prim_attr('not_first_time', self.not_first_time) - self.add_prim_attr('cutoff', self.cutoff) - self.add_prim_attr('skin', self.skin) - self.add_prim_attr('max_atom_in_grid_numbers', self.max_atom_in_grid_numbers) - self.add_prim_attr('nxy', self.nxy) - self.add_prim_attr('excluded_atom_numbers', self.excluded_atom_numbers) - self.add_prim_attr('cutoff_square', self.cutoff_square) - self.add_prim_attr('half_skin_square', self.half_skin_square) - self.add_prim_attr('cutoff_with_skin', self.cutoff_with_skin) - self.add_prim_attr('half_cutoff_with_skin', self.half_cutoff_with_skin) - self.add_prim_attr('cutoff_with_skin_square', self.cutoff_with_skin_square) - - def infer_shape(self, atom_numbers_in_grid_bucket_shape, bucket_shape, crd_shape, box_length_shape, grid_n_shape, - grid_length_inverse_shape, atom_in_grid_serial_shape, old_crd_shape, crd_to_uint_crd_cof_shape, - uint_crd_shape, gpointer_shape, nl_atom_numbers_shape, nl_atom_serial_shape, - uint_dr_to_dr_cof_shape, excluded_list_start_shape, excluded_list_shape, excluded_numbers_shape, - need_refresh_flag_shape, refresh_count_shape): - validator.check_int(len(atom_numbers_in_grid_bucket_shape), 1, Rel.EQ, - "atom_numbers_in_grid_bucket_dim", self.name) - validator.check_int(len(bucket_shape), 2, Rel.EQ, "bucket_dim", self.name) - validator.check_int(len(crd_shape), 2, Rel.EQ, "crd_dim", self.name) - validator.check_int(len(box_length_shape), 1, Rel.EQ, "box_length_dim", self.name) - validator.check_int(len(grid_n_shape), 1, Rel.EQ, "grid_n_dim", self.name) - validator.check_int(len(grid_length_inverse_shape), 1, Rel.EQ, "grid_length_inverse_dim", self.name) - validator.check_int(len(atom_in_grid_serial_shape), 1, Rel.EQ, "atom_in_grid_serial_dim", self.name) - validator.check_int(len(old_crd_shape), 2, Rel.EQ, "old_crd_dim", self.name) - validator.check_int(len(crd_to_uint_crd_cof_shape), 1, Rel.EQ, "crd_to_uint_crd_cof_dim", self.name) - validator.check_int(len(uint_crd_shape), 2, Rel.EQ, "uint_crd_dim", self.name) - validator.check_int(len(gpointer_shape), 2, Rel.EQ, "gpointer_dim", self.name) - validator.check_int(len(nl_atom_numbers_shape), 1, Rel.EQ, "nl_atom_numbers_dim", self.name) - validator.check_int(len(nl_atom_serial_shape), 2, Rel.EQ, "nl_atom_serial_dim", self.name) - validator.check_int(len(uint_dr_to_dr_cof_shape), 1, Rel.EQ, "uint_dr_to_dr_cof_dim", self.name) - validator.check_int(len(excluded_list_start_shape), 1, Rel.EQ, "excluded_list_start_dim", self.name) - validator.check_int(len(excluded_list_shape), 1, Rel.EQ, "excluded_list_dim", self.name) - validator.check_int(len(excluded_numbers_shape), 1, Rel.EQ, "excluded_numbers_dim", self.name) - validator.check_int(len(need_refresh_flag_shape), 1, Rel.EQ, "need_refresh_flag_dim", self.name) - - validator.check_int(atom_numbers_in_grid_bucket_shape[0], self.grid_numbers, Rel.EQ, - "atom_numbers_in_grid_bucket", self.name) - validator.check_int(bucket_shape[0], self.grid_numbers, Rel.EQ, "bucket", self.name) - validator.check_int(bucket_shape[1], self.max_atom_in_grid_numbers, Rel.EQ, "bucket", self.name) - validator.check_int(crd_shape[0], self.atom_numbers, Rel.EQ, "crd", self.name) - validator.check_int(crd_shape[1], 3, Rel.EQ, "crd", self.name) - validator.check_int(box_length_shape[0], 3, Rel.EQ, "box_length", self.name) - validator.check_int(grid_n_shape[0], 3, Rel.EQ, "grid_n", self.name) - validator.check_int(grid_length_inverse_shape[0], 3, Rel.EQ, "grid_length_inverse", self.name) - validator.check_int(atom_in_grid_serial_shape[0], self.atom_numbers, Rel.EQ, "atom_in_grid_serial", - self.name) - validator.check_int(old_crd_shape[0], self.atom_numbers, Rel.EQ, "old_crd", self.name) - validator.check_int(old_crd_shape[1], 3, Rel.EQ, "old_crd", self.name) - validator.check_int(crd_to_uint_crd_cof_shape[0], 3, Rel.EQ, "crd_to_uint_crd_cof", self.name) - validator.check_int(uint_crd_shape[0], self.atom_numbers, Rel.EQ, "uint_crd", self.name) - validator.check_int(uint_crd_shape[1], 3, Rel.EQ, "uint_crd", self.name) - validator.check_int(gpointer_shape[0], self.grid_numbers, Rel.EQ, "gpointer", self.name) - validator.check_int(gpointer_shape[1], 125, Rel.EQ, "gpointer", self.name) - validator.check_int(nl_atom_numbers_shape[0], self.atom_numbers, Rel.EQ, "nl_atom_numbers", self.name) - validator.check_int(nl_atom_serial_shape[0], self.atom_numbers, Rel.EQ, "nl_atom_serial", self.name) - validator.check_int(nl_atom_serial_shape[1], self.max_neighbor_numbers, Rel.EQ, "nl_atom_serial", - self.name) - validator.check_int(uint_dr_to_dr_cof_shape[0], 3, Rel.EQ, "uint_dr_to_dr_cof", self.name) - validator.check_int(excluded_list_start_shape[0], self.atom_numbers, Rel.EQ, "excluded_list_start", - self.name) - validator.check_int(excluded_list_shape[0], self.excluded_atom_numbers, Rel.EQ, "excluded_list", - self.name) - validator.check_int(excluded_numbers_shape[0], self.atom_numbers, Rel.EQ, "excluded_numbers", self.name) - validator.check_int(need_refresh_flag_shape[0], 1, Rel.EQ, "need_refresh_flag", self.name) - - return [1,] - - def infer_dtype(self, atom_numbers_in_grid_bucket_dtype, bucket_dtype, crd_dtype, box_length_dtype, grid_n_dtype, - grid_length_inverse_dtype, atom_in_grid_serial_dtype, old_crd_dtype, crd_to_uint_crd_cof_dtype, - uint_crd_dtype, gpointer_dtype, nl_atom_numbers_dtype, nl_atom_serial_dtype, - uint_dr_to_dr_cof_dtype, excluded_list_start_dtype, excluded_list_dtype, excluded_numbers_dtype, - need_refresh_flag_dtype, refresh_count_dtype): - validator.check_tensor_dtype_valid('atom_numbers_in_grid_bucket', atom_numbers_in_grid_bucket_dtype, - [mstype.int32], self.name) - validator.check_tensor_dtype_valid('bucket', bucket_dtype, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('crd', crd_dtype, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('box_length', box_length_dtype, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('grid_n', grid_n_dtype, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('grid_length_inverse', grid_length_inverse_dtype, [mstype.float32], - self.name) - validator.check_tensor_dtype_valid('atom_in_grid_serial', atom_in_grid_serial_dtype, [mstype.int32], - self.name) - validator.check_tensor_dtype_valid('old_crd', old_crd_dtype, [mstype.float32], self.name) - validator.check_tensor_dtype_valid('crd_to_uint_crd_cof', crd_to_uint_crd_cof_dtype, [mstype.float32], - self.name) - validator.check_tensor_dtype_valid('uint_crd', uint_crd_dtype, [mstype.uint32], self.name) - validator.check_tensor_dtype_valid('gpointer', gpointer_dtype, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('nl_atom_numbers', nl_atom_numbers_dtype, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('nl_atom_serial', nl_atom_serial_dtype, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('uint_dr_to_dr_cof', uint_dr_to_dr_cof_dtype, [mstype.float32], - self.name) - validator.check_tensor_dtype_valid('excluded_list_start', excluded_list_start_dtype, [mstype.int32], - self.name) - validator.check_tensor_dtype_valid('excluded_list', excluded_list_dtype, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('excluded_numbers', excluded_numbers_dtype, [mstype.int32], self.name) - validator.check_tensor_dtype_valid('need_refresh_flag', need_refresh_flag_dtype, [mstype.int32], - self.name) - - return mstype.float32 - - class MDIterationLeapFrogWithRF(PrimitiveWithInfer): """ One step of classical leap frog algorithm to solve the finite difference diff --git a/mindspore/ops/operations/sponge_update_ops.py b/mindspore/ops/operations/sponge_update_ops.py index 85f6b33e848..1c1be718ef5 100644 --- a/mindspore/ops/operations/sponge_update_ops.py +++ b/mindspore/ops/operations/sponge_update_ops.py @@ -998,7 +998,7 @@ class MapCenterOfMass(PrimitiveWithInfer): return mstype.float32 -class NeighborListUpdateNew(PrimitiveWithInfer): +class NeighborListUpdate(PrimitiveWithInfer): """ Update (or construct if first time) the Verlet neighbor list for the calculation of short-ranged force. Assume the number of atoms is n, diff --git a/mindspore/ops/primitive.py b/mindspore/ops/primitive.py index b47752b753e..2c94d657245 100644 --- a/mindspore/ops/primitive.py +++ b/mindspore/ops/primitive.py @@ -101,8 +101,8 @@ class Primitive(Primitive_): value (Any): Attribute value. Examples: - >>> import mindspore.ops as P - >>> a = P.Add() + >>> import mindspore.ops as ops + >>> a = ops.Add() >>> a = a.add_prim_attr("attr",1) >>> out = a.attrs["attr"] >>> print(out) @@ -120,8 +120,8 @@ class Primitive(Primitive_): Args: name (str): Attribute Name. Examples: - >>> import mindspore.ops as P - >>> a = P.Add() + >>> import mindspore.ops as ops + >>> a = ops.Add() >>> a = a.add_prim_attr("attr",1) >>> a = a.del_prim_attr("attr") >>> print(a.attrs) @@ -143,8 +143,8 @@ class Primitive(Primitive_): Args: stage (int): The stage id for the current operation. Examples: - >>> from mindspore.ops import operations as P - >>> add = P.Add() + >>> from mindspore.ops as ops + >>> add = ops.Add() >>> print(add.set_stage(0)) Prim[Add] """ @@ -162,8 +162,8 @@ class Primitive(Primitive_): Args: strategy (tuple): Strategy describes the distributed parallel mode of the current primitive. Examples: - >>> from mindspore.ops import operations as P - >>> add = P.Add() + >>> from mindspore.ops as ops + >>> add = ops.Add() >>> print(add.shard(((1, 1), (1, 1)))) Prim[Add] """ @@ -190,8 +190,8 @@ class Primitive(Primitive_): Args: instance_name (str): Instance name of primitive operator set by user. Examples: - >>> import mindspore.ops as P - >>> a = P.Add() + >>> import mindspore.ops as ops + >>> a = ops.Add() >>> a.set_prim_instance_name("add") >>> print(a.instance_name) add @@ -270,8 +270,8 @@ class Primitive(Primitive_): inputs (list[str]): list of inputs names. outputs (list[str]): list of outputs names. Examples: - >>> import mindspore.ops as P - >>> a = P.Add() + >>> import mindspore.ops as ops + >>> a = ops.Add() >>> a.init_prim_io_names(["x","y"],["sum"]) >>> print(a.input_names) ['x','y'] @@ -631,14 +631,14 @@ def constexpr(fn=None, get_instance=True, name=None): >>> def tuple_len(x): ... return len(x) ... - >>> tuple_len(a) + >>> print(tuple_len(a)) 2 >>> # make an operator class to calculate tuple len >>> @constexpr(get_instance=False, name="TupleLen") >>> def tuple_len_class(x): ... return len(x) ... - >>> tuple_len_class()(a) + >>> print(tuple_len_class()(a)) 2 """ diff --git a/mindspore/profiler/common/exceptions/error_code.py b/mindspore/profiler/common/exceptions/error_code.py index 0514f52dab2..a14d8cbba1b 100644 --- a/mindspore/profiler/common/exceptions/error_code.py +++ b/mindspore/profiler/common/exceptions/error_code.py @@ -15,7 +15,6 @@ """Profiler error code and messages.""" from enum import unique, Enum - _GENERAL_MASK = 0b00001 << 7 _PARSER_MASK = 0b00010 << 7 _ANALYSER_MASK = 0b00011 << 7 @@ -24,6 +23,7 @@ _ANALYSER_MASK = 0b00011 << 7 class ProfilerMgrErrors(Enum): """Enum definition for profiler errors""" + @unique class ProfilerErrors(ProfilerMgrErrors): """Profiler error codes.""" @@ -53,8 +53,6 @@ class ProfilerErrors(ProfilerMgrErrors): PIPELINE_OP_NOT_EXIST_ERROR = 8 | _ANALYSER_MASK - - @unique class ProfilerErrorMsg(Enum): """Profiler error messages.""" diff --git a/mindspore/profiler/common/exceptions/exceptions.py b/mindspore/profiler/common/exceptions/exceptions.py index d5821d59540..f999fbf8730 100644 --- a/mindspore/profiler/common/exceptions/exceptions.py +++ b/mindspore/profiler/common/exceptions/exceptions.py @@ -46,7 +46,6 @@ class ProfilerException(Exception): self.message = message self.http_code = http_code - @property def error_code(self): """ diff --git a/mindspore/profiler/parser/aicpu_data_parser.py b/mindspore/profiler/parser/aicpu_data_parser.py index 3f6796f66c6..aee9e2a3307 100644 --- a/mindspore/profiler/parser/aicpu_data_parser.py +++ b/mindspore/profiler/parser/aicpu_data_parser.py @@ -45,9 +45,10 @@ class DataPreProcessParser: self._source_file_name = self._get_source_file() self._ms_kernel_flag = 3 self._other_kernel_flag = 6 - self._thread_flag = 7 self._ms_kernel_run_end_index = 2 self._other_kernel_run_end_index = 5 + self._dispatch_time_index = 5 + self._total_time_index = 6 self._result_list = [] self._min_cycle_counter = float('inf') @@ -66,10 +67,10 @@ class DataPreProcessParser: def _get_kernel_result(self, number, node_list, thread_list): """Get the profiling data form different aicpu kernel""" try: - if len(node_list) == self._ms_kernel_flag and len(thread_list) == self._thread_flag: + if len(node_list) == self._ms_kernel_flag: node_type_name = node_list[0].split(':')[-1] run_end_index = self._ms_kernel_run_end_index - elif len(node_list) == self._other_kernel_flag and len(thread_list) == self._thread_flag: + elif len(node_list) == self._other_kernel_flag: node_type_name = node_list[0].split(':')[-1].split('/')[-1].split('-')[0] run_end_index = self._other_kernel_run_end_index else: @@ -82,8 +83,8 @@ class DataPreProcessParser: run_start = node_list[1].split(':')[-1].split(' ')[0] run_end = node_list[run_end_index].split(':')[-1].split(' ')[0] exe_time = (float(run_end) - float(run_start)) / self._ms_unit - total_time = float(thread_list[-1].split('=')[-1].split()[0]) / self._ms_unit - dispatch_time = float(thread_list[-2].split('=')[-1].split()[0]) / self._ms_unit + total_time = float(thread_list[self._total_time_index].split('=')[-1].split()[0]) / self._ms_unit + dispatch_time = float(thread_list[self._dispatch_time_index].split('=')[-1].split()[0]) / self._ms_unit return [number, node_type_name, total_time, dispatch_time, exe_time, run_start_counter, run_end_counter] diff --git a/mindspore/profiler/parser/container.py b/mindspore/profiler/parser/container.py index 476545dd6d1..a96e1b365bf 100644 --- a/mindspore/profiler/parser/container.py +++ b/mindspore/profiler/parser/container.py @@ -23,6 +23,7 @@ class HWTSContainer: Args: split_list (list): The split list of metadata in HWTS output file. """ + def __init__(self, split_list): self._op_name = '' self._duration = None @@ -79,6 +80,7 @@ class TimelineContainer: Args: split_list (list): The split list of metadata in op_compute output file. """ + def __init__(self, split_list): self._op_name = split_list[0] self._stream_id = str(split_list[1]) @@ -121,6 +123,7 @@ class MemoryGraph: Args: graph_proto (proto): Graph proto, defined in profiler module. """ + def __init__(self, graph_proto): self._graph_proto = graph_proto self.graph_id = graph_proto.graph_id @@ -153,6 +156,7 @@ class MemoryNode: Args: node_proto (proto): Node proto. """ + def __init__(self, node_proto): self._node_proto = node_proto self.node_id = node_proto.node_id @@ -192,6 +196,7 @@ class MemoryTensor: Args: tensor_proto (proto): Tensor proto. """ + def __init__(self, tensor_proto): self._tensor_proto = tensor_proto self.tensor_id = tensor_proto.tensor_id diff --git a/mindspore/profiler/parser/flops_parser.py b/mindspore/profiler/parser/flops_parser.py index 3d9f3b2441c..f779ba8678d 100644 --- a/mindspore/profiler/parser/flops_parser.py +++ b/mindspore/profiler/parser/flops_parser.py @@ -83,6 +83,10 @@ class FlopsParser: op_avg_time = op_avg_time_dict[op_name] # Time unit of op_avg_time is ms. # The unit of gflop_per_second is GFLOPS(1e9). + if float(op_avg_time) == 0.0: + raise ValueError("All operators take 0 ms.") + if peak_flops == 0: + raise ValueError("The frequency of an operator is 0.") gflop_per_second = task_fops / float(op_avg_time) flops_utilization = (gflop_per_second * 1e9 / peak_flops) * 100 self._flops_summary['FLOPs'] += task_fops @@ -170,9 +174,9 @@ class FlopsParser: # These formula is provided by HISI profiling. # a cube_fp16 instruction has (16**3)*2 float point operation. # a cube_fp16 instruction has 16*16*32*2 float point operation. - cube_fops = cube_fp16_exec*(16**3)*2 + cube_int8_exec*16*16*32*2 - vec_fops = vec_fp32*32 + vec_fp16_128lane_exec*128 + \ - vec_fp16_64lane_exec*64 + vec_int32_exec*64 + vec_misc_exec*32 + cube_fops = cube_fp16_exec * (16 ** 3) * 2 + cube_int8_exec * 16 * 16 * 32 * 2 + vec_fops = vec_fp32 * 32 + vec_fp16_128lane_exec * 128 + \ + vec_fp16_64lane_exec * 64 + vec_int32_exec * 64 + vec_misc_exec * 32 task_fops = cube_fops + vec_fops return task_fops @@ -231,14 +235,14 @@ class FlopsParser: suffix_name = "(recompute_Gradients)" else: suffix_name = f"({top_level_scope})" - scope_list = list(map(lambda x: x+suffix_name, scope_list)) + scope_list = list(map(lambda x: x + suffix_name, scope_list)) scope_list[0] = top_level_scope # Add root node (refers to total flops). scope_list.insert(0, "Total") scope_depth = len(scope_list) for idx in range(scope_depth - 1): - key_name = scope_list[idx] + " " + scope_list[idx+1] + key_name = scope_list[idx] + " " + scope_list[idx + 1] self._flops_each_scope.setdefault(key_name, 0) self._flops_each_scope[key_name] += task_fops diff --git a/mindspore/profiler/parser/hccl_parser.py b/mindspore/profiler/parser/hccl_parser.py index 3077d04681d..d83db58271d 100644 --- a/mindspore/profiler/parser/hccl_parser.py +++ b/mindspore/profiler/parser/hccl_parser.py @@ -157,7 +157,7 @@ class HcclParser: csv_reader = csv.reader(src_file) # index_0:step_num, index_1:start_point, index_2:end_point # The unit of time stamp is 10ns. To convert it to μs, you need to divide it by 100. - step_timestamps_info = [[info[0], float(info[1])/100, float(info[2])/100] + step_timestamps_info = [[info[0], float(info[1]) / 100, float(info[2]) / 100] for info in csv_reader if info[0].isdigit()] return step_timestamps_info @@ -219,6 +219,7 @@ class HcclParser: def _calculate_communication_operator_iter_cost(self, file_path): """Calculate the time-consuming of communication operator in one execution round.""" + def _inner_calculate_communication_operator_iter_cost(events): total_notify_wait = self._calculate_notify_wait_time(events) # Divide information by src dst rank_id. @@ -362,7 +363,7 @@ class HcclParser: rdma_communication_size = 0 rdma_communication_wait_time = 0 start_index = 0 - end_index = len(trace_event)-1 + end_index = len(trace_event) - 1 while start_index < end_index: first_task_type = trace_event[start_index].get("args").get("task type") if first_task_type == CommunicationInfo.RDMASEND.value and start_index < end_index - 1: @@ -386,10 +387,10 @@ class HcclParser: # The unit of rdma_communication_wait_time is ms. # The unit of rdma_bandwidth is KB/s. # The unit of rdma_communication_size is k_byte and The unit of rdma_communication_time is ms. - rdma_communication_wait_time = rdma_communication_wait_time/1e3 - rdma_communication_size = rdma_communication_size/1e3 - rdma_communication_time = rdma_communication_time/1e3 - rdma_bandwidth = rdma_communication_size/(rdma_communication_time/1e3) \ + rdma_communication_wait_time = rdma_communication_wait_time / 1e3 + rdma_communication_size = rdma_communication_size / 1e3 + rdma_communication_time = rdma_communication_time / 1e3 + rdma_bandwidth = rdma_communication_size / (rdma_communication_time / 1e3) \ if rdma_communication_size else 0 return [rdma_communication_time, rdma_communication_size, rdma_bandwidth, rdma_communication_wait_time] @@ -413,9 +414,9 @@ class HcclParser: # The unit of sdma_bandwidth is KB/s. # The unit of sdma_communication_size is k_byte and The unit of sdma_communication_time is ms. - sdma_communication_time = sdma_communication_time/1e3 - sdma_communication_size = sdma_communication_size/1e3 - sdma_bandwidth = sdma_communication_size/(sdma_communication_time/1e3) \ + sdma_communication_time = sdma_communication_time / 1e3 + sdma_communication_size = sdma_communication_size / 1e3 + sdma_bandwidth = sdma_communication_size / (sdma_communication_time / 1e3) \ if sdma_communication_size else 0 return [sdma_communication_time, sdma_communication_size, sdma_bandwidth] @@ -427,7 +428,7 @@ class HcclParser: if task_type == CommunicationInfo.NOTIFY_WAIT.value: total_notify_wait_time += item.get("dur", 0) # The unit of total_notify_wait_time is ms. - total_notify_wait_time = total_notify_wait_time/1e3 + total_notify_wait_time = total_notify_wait_time / 1e3 return total_notify_wait_time def _calculate_communication_average_value(self, communication_info: list): @@ -436,8 +437,8 @@ class HcclParser: if communication_info_size == 0: return [] # index1: communication_cost,index2:wait_cost,index3:link_info - communication_cost_average = sum([i[1] for i in communication_info])/communication_info_size - wait_cost_average = sum([i[2] for i in communication_info])/communication_info_size + communication_cost_average = sum([i[1] for i in communication_info]) / communication_info_size + wait_cost_average = sum([i[2] for i in communication_info]) / communication_info_size link_info = [i[3] for i in communication_info] calculate_type = 'average' link_average_info = self._calculate_link_value(link_info, calculate_type) diff --git a/mindspore/profiler/parser/hwts_log_parser.py b/mindspore/profiler/parser/hwts_log_parser.py index ff140ec8e3a..76a3471e6b7 100644 --- a/mindspore/profiler/parser/hwts_log_parser.py +++ b/mindspore/profiler/parser/hwts_log_parser.py @@ -20,6 +20,7 @@ from mindspore import log as logger from mindspore.profiler.common.validator.validate_path import \ validate_and_normalize_path + class HWTSLogParser: """ The Parser for hwts log files. @@ -112,8 +113,8 @@ class HWTSLogParser: if int(task_id) < 25000: task_id = str(stream_id) + "_" + str(task_id) - result_data += ("%-14s %-4s %-8s %-9s %-8s %-15s %s\n" %(log_type[int(ms_type, 2)], cnt, core_id, - blk_id, task_id, syscnt, stream_id)) + result_data += ("%-14s %-4s %-8s %-9s %-8s %-15s %s\n" % (log_type[int(ms_type, 2)], cnt, core_id, + blk_id, task_id, syscnt, stream_id)) fwrite_format(self._output_filename, data_source=self._dst_file_title, is_start=True) fwrite_format(self._output_filename, data_source=self._dst_file_column_title) diff --git a/mindspore/profiler/parser/integrator.py b/mindspore/profiler/parser/integrator.py index 472441a254a..916e36a4501 100644 --- a/mindspore/profiler/parser/integrator.py +++ b/mindspore/profiler/parser/integrator.py @@ -113,6 +113,8 @@ class Integrator: op_type_time_cache[op_type][0] += op_time op_type_time_cache[op_type][1] += 1 + if self._total_time == 0: + raise ValueError("The total time of operations can not be 0.") op_type_file_name = 'aicore_intermediate_' + self._device_id + '_type.csv' op_type_file_path = os.path.join(self._profiling_dir, op_type_file_name) with open(op_type_file_path, 'w') as type_file: @@ -1059,6 +1061,7 @@ class AscendTimelineGenerator(BaseTimelineGenerator): framework_info (dict): The framework metadata. aicpu_info (dict): The metadata of AI CPU operator. min_cycle_counter (float): The minimum cycle counter of the timeline. + source_path (str): The source of file. """ if min_cycle_counter == float('inf'): min_cycle_counter = 0 diff --git a/mindspore/profiler/parser/memory_usage_parser.py b/mindspore/profiler/parser/memory_usage_parser.py index cd68a0de3db..2dccb77ad73 100644 --- a/mindspore/profiler/parser/memory_usage_parser.py +++ b/mindspore/profiler/parser/memory_usage_parser.py @@ -34,6 +34,7 @@ GIGABYTES = 1024 * 1024 * 1024 class MemoryUsageParser: """MemoryUsageParser to parse memory raw data.""" + def __init__(self, profiling_dir, device_id): self._profiling_dir = profiling_dir self._device_id = device_id @@ -163,6 +164,7 @@ class MemoryUsageParser: class GraphMemoryParser: """Parse memory usage data for each graph.""" + def __init__(self, graph_proto, points, framework): self.graph = None self.nodes = OrderedDict() @@ -238,7 +240,7 @@ class GraphMemoryParser: if index == 0: node.mem_change = self._mem_change[index] - self.graph.static_mem else: - node.mem_change = self._mem_change[index] - self._mem_change[index-1] + node.mem_change = self._mem_change[index] - self._mem_change[index - 1] self._update_nodes(node) self._update_tensor_source(node) @@ -308,7 +310,7 @@ class GraphMemoryParser: elif life_long == 'LifeLongGraphStart': # lifetime is from graph start to tensor end if life_end is not None and life_end >= 0: tensor.life_start = 0 - self._update_mem_change(size, 0, life_end+1, tensor_id) + self._update_mem_change(size, 0, life_end + 1, tensor_id) else: logger.info('Cannot locate lifetime end for tensor: %s', tensor_id) elif life_long == 'LifeLongGraphEnd': # lifetime is from tensor start to graph end @@ -319,7 +321,7 @@ class GraphMemoryParser: logger.info('Cannot locate lifetime start for tensor: %s', tensor_id) elif life_long == 'LifeLongNone': # lifetime is from tensor start to tensor end if life_start is not None and life_end is not None and life_start <= life_end: - self._update_mem_change(size, life_start, life_end+1, tensor_id) + self._update_mem_change(size, life_start, life_end + 1, tensor_id) else: logger.info('Cannot locate lifetime start or end for tensor: %s', tensor_id) diff --git a/mindspore/profiler/parser/minddata_analyzer.py b/mindspore/profiler/parser/minddata_analyzer.py index 96352c80fe4..34390da9882 100644 --- a/mindspore/profiler/parser/minddata_analyzer.py +++ b/mindspore/profiler/parser/minddata_analyzer.py @@ -304,6 +304,8 @@ class MinddataProfilingAnalyzer: if metrics and metrics['output_queue']: queue_size = metrics['output_queue']['size'] queue_length = metrics['output_queue']['length'] + if queue_length == 0: + raise ValueError("The input queue can not be None.") queue_average_size = round(sum(queue_size) / len(queue_size), 2) if queue_size else -1 queue_utilization_pct = round(100 * queue_average_size / queue_length, 2) # Compute percentage of time queue is empty diff --git a/mindspore/profiler/parser/minddata_parser.py b/mindspore/profiler/parser/minddata_parser.py index a200acc9bcf..805ac5f9906 100644 --- a/mindspore/profiler/parser/minddata_parser.py +++ b/mindspore/profiler/parser/minddata_parser.py @@ -20,8 +20,10 @@ from mindspore import log as logger from mindspore.profiler.common.validator.validate_path import \ validate_and_normalize_path + class MinddataParser: """Minddata Aicpu Parser.""" + @staticmethod def parse_minddata_aicpu_data(minddata_aicpu_source_path): """ diff --git a/mindspore/profiler/parser/minddata_pipeline_parser.py b/mindspore/profiler/parser/minddata_pipeline_parser.py index d73bfd7c115..94999dbec46 100644 --- a/mindspore/profiler/parser/minddata_pipeline_parser.py +++ b/mindspore/profiler/parser/minddata_pipeline_parser.py @@ -262,8 +262,12 @@ class MinddataPipelineParser: output_queue = metrics.get('output_queue') if output_queue: queue_size = output_queue.get('size') + if queue_size is None: + raise ValueError("The queue can not be None.") queue_average_size = sum(queue_size) / len(queue_size) queue_length = output_queue.get('length') + if queue_length == 0: + raise ValueError("The length of queue can not be 0.") queue_usage_rate = queue_average_size / queue_length children_id = op_node.get('children') diff --git a/mindspore/profiler/parser/optime_parser.py b/mindspore/profiler/parser/optime_parser.py index 2725d7cc154..bedf25a398a 100644 --- a/mindspore/profiler/parser/optime_parser.py +++ b/mindspore/profiler/parser/optime_parser.py @@ -24,6 +24,7 @@ from mindspore.profiler.parser.container import HWTSContainer TIMELINE_FILE_COLUMN_TITLE = 'op_name, stream_id, start_time(ms), duration(ms)' + class OPComputeTimeParser: """ Join hwts info and framework info, get op time info, and output to the result file. @@ -102,10 +103,12 @@ class OPComputeTimeParser: for op_name, time in op_name_time_dict.items(): if op_name in op_name_stream_dict.keys(): stream_id = op_name_stream_dict[op_name] + if op_name_count_dict[op_name] == 0: + raise ValueError("The number of operations can not be 0.") avg_time = time / op_name_count_dict[op_name] total_time += avg_time - result_data += ("%s %s %s\n" %(op_name, str(avg_time), stream_id)) - result_data += ("total op %s 0" %(str(total_time))) + result_data += ("%s %s %s\n" % (op_name, str(avg_time), stream_id)) + result_data += ("total op %s 0" % (str(total_time))) timeline_data = [] for op_name, time in op_name_time_dict.items(): @@ -146,8 +149,8 @@ class OPComputeTimeParser: Args: timeline_data (list): The metadata to be written into the file. [ - ['op_name_1', 'stream_id_1', 'start_time_1', 'durarion_1'], - ['op_name_2', 'stream_id_2', 'start_time_2', 'durarion_2'], + ['op_name_1', 'stream_id_1', 'start_time_1', 'duration_1'], + ['op_name_2', 'stream_id_2', 'start_time_2', 'duration_2'], [...] ] """ diff --git a/mindspore/profiler/parser/step_trace_parser.py b/mindspore/profiler/parser/step_trace_parser.py index f1755ba3dd7..185b84779fc 100644 --- a/mindspore/profiler/parser/step_trace_parser.py +++ b/mindspore/profiler/parser/step_trace_parser.py @@ -348,12 +348,12 @@ class BaseStepTraceParser: csv_writer = csv.writer(file_handle) if not self._is_training_mode: self._header[FP_DURATION] = 'fp' - self._header = self._header[:BP_POINT] + self._header[BP_POINT+1:TAIL] + self._header = self._header[:BP_POINT] + self._header[BP_POINT + 1:TAIL] csv_writer.writerow(self._header) for row_data in self._result: if not self._is_training_mode: row_data[FP_DURATION] += row_data[TAIL] - row_data = row_data[:BP_POINT] + row_data[BP_POINT+1:TAIL] + row_data = row_data[:BP_POINT] + row_data[BP_POINT + 1:TAIL] csv_writer.writerow(row_data) os.chmod(self._output_path, stat.S_IREAD | stat.S_IWRITE) except (IOError, OSError) as err: diff --git a/mindspore/profiler/profiling.py b/mindspore/profiler/profiling.py index 7442fb9eac1..7f8474ddb03 100644 --- a/mindspore/profiler/profiling.py +++ b/mindspore/profiler/profiling.py @@ -47,12 +47,14 @@ from mindspore.nn.cell import Cell INIT_OP_NAME = 'Default/InitDataSetQueue' + class ProfileOption(Enum): """ Profile Option Enum which be used in Profiler.profile. """ trainable_parameters = 0 + class Profiler: """ Performance profiling API. @@ -67,9 +69,9 @@ class Profiler: and analysed,will deal with all op if null; Different op types should be separated by comma. ascend_job_id (str): (Ascend only) The directory where the profiling files to be parsed are located; This parameter is used to support offline parsing. - profile_communication(bool): Whether to collect communication performance data, collect when True. - Default is False. - profile_memory(bool): Whether to collect tensor memory data, collect when True.Default is False. + profile_communication (bool): Whether to collect communication performance data in a multi devices training. + collect when True. Default is False. Setting this parameter has no effect during single device training. + profile_memory (bool): Whether to collect tensor memory data, collect when True.Default is False. Examples: >>> import numpy as np @@ -145,29 +147,7 @@ class Profiler: if kwargs: logger.warning("Params not be supported yet on GPU.") elif self._device_target and self._device_target == "Ascend": - optypes_not_deal = kwargs.pop("optypes_not_deal", "Variable") - if not isinstance(optypes_not_deal, str): - raise TypeError("The parameter optypes_not_deal must be str.") - job_dir = kwargs.pop("ascend_job_id", "") - if job_dir: - job_dir = validate_and_normalize_path(job_dir) - if not os.path.exists(job_dir): - msg = f"Invalid ascend_job_id: {job_dir}, Please pass the absolute path of the JOB dir" - logger.error(msg) - raise ValueError(msg) - self._output_path, _ = os.path.split(job_dir) - self._profile_communication = kwargs.pop("profile_communication", False) - if not isinstance(self._profile_communication, bool): - raise TypeError("The parameter profile_communication must be bool.") - if self._profile_communication: - hccl_option = {"output": self._output_path, "task_trace": "on"} - os.environ['PROFILING_OPTIONS'] = json.dumps(hccl_option) - self._profile_memory = kwargs.pop("profile_memory", False) - if not isinstance(self._profile_memory, bool): - raise TypeError("The parameter profile_memory must be bool") - if kwargs: - logger.warning("There are invalid params which don't work.") - + self._parse_parameter_for_ascend(**kwargs) os.environ['DEVICE_ID'] = self._dev_id profiling_options = json.dumps(self._construct_profiling_options()) @@ -185,7 +165,6 @@ class Profiler: if not os.path.exists(data_path): os.makedirs(data_path, exist_ok=True) - self._filt_optype_names = optypes_not_deal.split(",") if optypes_not_deal else [] # add job id env through user input later self._job_id_env = 0 self._start_time = int(time.time() * 10000000) @@ -211,10 +190,46 @@ class Profiler: "aic_metrics": "PipeUtilization", "aicpu": "on", "profile_memory": profile_memory - } + } return profiling_options + def _parse_parameter_for_ascend(self, **kwargs): + """Parse parameter in Proflier when the device target is Ascend.""" + optypes_not_deal = kwargs.pop("optypes_not_deal", "Variable") + if not isinstance(optypes_not_deal, str): + raise TypeError("The parameter optypes_not_deal must be str.") + self._filt_optype_names = optypes_not_deal.split(",") if optypes_not_deal else [] + job_dir = kwargs.pop("ascend_job_id", "") + if job_dir: + job_dir = validate_and_normalize_path(job_dir) + if not os.path.exists(job_dir): + msg = f"Invalid ascend_job_id: {job_dir}, Please pass the absolute path of the JOB dir" + logger.error(msg) + raise ValueError(msg) + self._output_path, _ = os.path.split(job_dir) + + env_rank_id = os.getenv("RANK_ID") + env_table_file = os.getenv("RANK_TABLE_FILE") + env_hccl_path = os.getenv("MINDSPORE_HCCL_CONFIG_PATH") + # Determine whether it is multi card training. + if env_rank_id and (env_table_file or env_hccl_path): + self._profile_communication = kwargs.pop("profile_communication", False) + if "profile_communication" in kwargs: + kwargs.pop("profile_communication") + logger.warning("The profile_communication parameter is invalid in single device training " + " which doesn't work.") + if not isinstance(self._profile_communication, bool): + raise TypeError("The parameter profile_communication must be bool.") + if self._profile_communication: + hccl_option = {"output": self._output_path, "task_trace": "on"} + os.environ['PROFILING_OPTIONS'] = json.dumps(hccl_option) + self._profile_memory = kwargs.pop("profile_memory", False) + if not isinstance(self._profile_memory, bool): + raise TypeError("The parameter profile_memory must be bool") + if kwargs: + logger.warning("There are invalid params which don't work.") + def analyse(self): """ Collect and analyse performance data, called after training or during training. The example shows above. @@ -539,7 +554,7 @@ class Profiler: for line in f.readlines(): if "clock_realtime" in line: # 16 means the first digit of the timestamp, len(line)-3 means the last. - job_start_time = line[16:len(line)-3] + job_start_time = line[16:len(line) - 3] return job_start_time @@ -651,7 +666,7 @@ class Profiler: return select_time - if "output_path" not in kwargs or kwargs.get("output_path") is None: + if kwargs.get("output_path") is None: if "output_path" in kwargs: kwargs.pop("output_path") # Environment variables are mainly set for the convenience of cloud profiler. @@ -684,6 +699,9 @@ class Profiler: if not os.path.exists(hccl_path): os.makedirs(hccl_path, exist_ok=True) os.chmod(hccl_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) + logger.info("Start call the interface HCCLParseOP parsing hccl info...") + logger.info('Warm Prompt: It could take a few minutes if you are training ' + 'with a complex network or more than 10 steps.') # Call the interface HCCLParseOP parsing hccl info. try: from hccl_parser.entry import hccl_parse_op @@ -693,11 +711,14 @@ class Profiler: "The hccl_parser-{version}-py3-none-any.whl package is usually located " "in the /usr/local/Ascend/tools Directory", err) raise ImportError(err) + logger.info("Parse hccl info successfully.") + logger.info("Start analyse hccl info.") hccl_parse = HcclParser(hccl_path, self._dev_id, self._output_path) hccl_parse.parse() + logger.info("Analyse hccl info successfully.") @staticmethod - def profile(network=None, profile_option=None): + def profile(network, profile_option): """ Get the number of trainable parameters in the training network. diff --git a/mindspore/run_check/_check_version.py b/mindspore/run_check/_check_version.py index 69d2df67750..fa52264ac12 100644 --- a/mindspore/run_check/_check_version.py +++ b/mindspore/run_check/_check_version.py @@ -207,7 +207,7 @@ class AscendEnvChecker(EnvChecker): """ascend environment check""" def __init__(self): - self.version = ["1.78.23.3.230"] + self.version = ["1.79.T10.0.B100"] atlas_nnae_version = "/usr/local/Ascend/nnae/latest/fwkacllib/version.info" atlas_toolkit_version = "/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/version.info" hisi_fwk_version = "/usr/local/Ascend/fwkacllib/version.info" diff --git a/mindspore/train/callback/_lr_scheduler_callback.py b/mindspore/train/callback/_lr_scheduler_callback.py index 2d9c095cfb0..536b5c2202a 100644 --- a/mindspore/train/callback/_lr_scheduler_callback.py +++ b/mindspore/train/callback/_lr_scheduler_callback.py @@ -32,9 +32,9 @@ class LearningRateScheduler(Callback): learning_rate_function (Function): The function about how to change the learning rate during training. Examples: + >>> from mindspore import Model >>> from mindspore.train.callback import LearningRateScheduler >>> import mindspore.nn as nn - >>> from mindspore.train import Model ... >>> def learning_rate_function(lr, cur_step_num): ... if cur_step_num%1000 == 0: diff --git a/mindspore/train/callback/_summary_collector.py b/mindspore/train/callback/_summary_collector.py index 8ba5ee457e3..779d30a4f28 100644 --- a/mindspore/train/callback/_summary_collector.py +++ b/mindspore/train/callback/_summary_collector.py @@ -150,8 +150,8 @@ class SummaryCollector(Callback): >>> import mindspore.nn as nn >>> from mindspore import context >>> from mindspore.train.callback import SummaryCollector - >>> from mindspore.train import Model - >>> from mindspore.nn.metrics import Accuracy + >>> from mindspore import Model + >>> from mindspore.nn import Accuracy >>> >>> if __name__ == '__main__': ... # If the device_target is GPU, set the device_target to "GPU" diff --git a/mindspore/train/loss_scale_manager.py b/mindspore/train/loss_scale_manager.py index 02a134fd590..501aebb5c1c 100644 --- a/mindspore/train/loss_scale_manager.py +++ b/mindspore/train/loss_scale_manager.py @@ -115,8 +115,7 @@ class DynamicLossScaleManager(LossScaleManager): scale_window (int): Maximum continuous normal steps when there is no overflow. Default: 2000. Examples: - >>> from mindspore import Model, nn - >>> from mindspore.train.loss_scale_manager import DynamicLossScaleManager + >>> from mindspore import Model, nn, DynamicLossScaleManager >>> >>> net = Net() >>> loss_scale_manager = DynamicLossScaleManager() diff --git a/mindspore/train/model.py b/mindspore/train/model.py index d87ec722425..23412cd1f5d 100644 --- a/mindspore/train/model.py +++ b/mindspore/train/model.py @@ -274,6 +274,8 @@ class Model: def _update_metrics(self, outputs): """Update metrics local values.""" + if isinstance(outputs, Tensor): + outputs = (outputs,) if not isinstance(outputs, tuple): raise ValueError("The `outputs` is not tuple.") @@ -365,6 +367,8 @@ class Model: dataset_sink_mode=True, sink_size=sink_size) self._train_network = train_network + if context.get_auto_parallel_context("pipeline_stages") > 1 and valid_dataset: + self._train_network.add_flags_recursive(is_first_iteration=True) for inputs in train_dataset_helper: self._train_network.compile(*inputs) break @@ -378,6 +382,8 @@ class Model: dataset=valid_dataset, dataset_sink_mode=True) self._eval_network = eval_network + if context.get_auto_parallel_context("pipeline_stages") > 1: + self._eval_network.add_flags_recursive(is_first_iteration=False) for inputs in valid_dataset_helper: self._eval_network.compile(*inputs) break @@ -615,8 +621,7 @@ class Model: Default: -1. Examples: - >>> from mindspore import Model, nn - >>> from mindspore.train.loss_scale_manager import FixedLossScaleManager + >>> from mindspore import Model, nn, FixedLossScaleManager >>> >>> # For details about how to build the dataset, please refer to the tutorial >>> # document on the official website. @@ -872,10 +877,9 @@ class Model: >>> # mindspore.cn. >>> import numpy as np >>> import mindspore as ms - >>> from mindspore import Model, context, Tensor, nn + >>> from mindspore import Model, context, Tensor, nn, FixedLossScaleManager >>> from mindspore.context import ParallelMode >>> from mindspore.communication import init - >>> from mindspore.train.loss_scale_manager import FixedLossScaleManager >>> >>> context.set_context(mode=context.GRAPH_MODE) >>> init() diff --git a/mindspore/train/serialization.py b/mindspore/train/serialization.py index 1e4c96c7b1e..dea204c29ff 100644 --- a/mindspore/train/serialization.py +++ b/mindspore/train/serialization.py @@ -27,7 +27,7 @@ from collections import defaultdict import numpy as np import mindspore.nn as nn -import mindspore.context as context +from mindspore import context from mindspore import log as logger from mindspore.train.checkpoint_pb2 import Checkpoint from mindspore.train.print_pb2 import Print @@ -275,8 +275,6 @@ def save_checkpoint(save_obj, ckpt_file_name, integrated_save=True, data = param["data"].asnumpy().reshape(-1) data_list[key].append(data) - if not isinstance(ckpt_file_name, str): - raise ValueError("The ckpt_file_name must be string.") ckpt_file_name = os.path.realpath(ckpt_file_name) if async_save: thr = Thread(target=_exec_save, args=(ckpt_file_name, data_list, enc_key, enc_mode), name="asyn_save_ckpt") @@ -331,8 +329,7 @@ def load(file_name, **kwargs): Examples: >>> import numpy as np >>> import mindspore.nn as nn - >>> from mindspore import Tensor - >>> from mindspore.train import export, load + >>> from mindspore import Tensor, export, load >>> >>> net = nn.Conv2d(1, 1, kernel_size=3, weight_init="ones") >>> input = Tensor(np.ones([1, 1, 3, 3]).astype(np.float32)) @@ -602,8 +599,6 @@ def _save_graph(network, file_name): """ logger.info("Execute the process of saving graph.") - if not isinstance(file_name, str): - raise ValueError("The ckpt_file_name must be string.") file_name = os.path.realpath(file_name) graph_pb = network.get_func_graph_proto() if graph_pb: @@ -719,7 +714,7 @@ def export(net, *inputs, file_name, file_format='AIR', **kwargs): Default: 127.5. - std_dev (float): The variance of input data after preprocessing, used for quantizing the first layer of network. Default: 127.5. - - enc_key (str): Byte type key used for encryption. Tha valid length is 16, 24, or 32. + - enc_key (byte): Byte type key used for encryption. Tha valid length is 16, 24, or 32. - enc_mode (str): Specifies the encryption mode, take effect when enc_key is set. Option: 'AES-GCM' | 'AES-CBC'. Default: 'AES-GCM'. @@ -733,11 +728,8 @@ def export(net, *inputs, file_name, file_format='AIR', **kwargs): """ logger.info("exporting model file:%s format:%s.", file_name, file_format) check_input_data(*inputs, data_class=Tensor) - if not isinstance(file_name, str): - raise ValueError("Args file_name {} must be string, please check it".format(file_name)) - file_name = os.path.realpath(file_name) - Validator.check_file_name_by_regular(file_name) + file_name = os.path.realpath(file_name) net = _quant_export(net, *inputs, file_format=file_format, **kwargs) if 'enc_key' in kwargs.keys(): if file_format != 'MINDIR': @@ -1199,9 +1191,8 @@ def merge_sliced_parameter(sliced_parameters, strategy=None): Examples: >>> import numpy as np - >>> from mindspore import Tensor + >>> from mindspore import Tensor, merge_sliced_parameter >>> from mindspore.common.parameter import Parameter - >>> from mindspore.train import merge_sliced_parameter >>> >>> sliced_parameters = [ ... Parameter(Tensor(np.array([0.00023915, 0.00013939, -0.00098059])), diff --git a/mindspore/train/train_thor/convert_utils.py b/mindspore/train/train_thor/convert_utils.py index 34d6166e450..7ce34a94b95 100644 --- a/mindspore/train/train_thor/convert_utils.py +++ b/mindspore/train/train_thor/convert_utils.py @@ -195,7 +195,7 @@ class ConvertModelUtils(): Examples: >>> from mindspore.nn.optim import thor >>> from mindspore.train.model import Model - >>> from mindspore.train.loss_scale_manager import FixedLossScaleManager + >>> from mindspore import FixedLossScaleManager >>> >>> net = Net() >>> loss_manager = FixedLossScaleManager(128, drop_overflow_update=False) diff --git a/model_zoo/official/cv/centerface/src/centerface.py b/model_zoo/official/cv/centerface/src/centerface.py index aae19169a39..b023ada4c6c 100644 --- a/model_zoo/official/cv/centerface/src/centerface.py +++ b/model_zoo/official/cv/centerface/src/centerface.py @@ -310,8 +310,8 @@ class TrainingWrapper(nn.Cell): else: cond = self.less_equal(self.base, flag_sum) - ret = (loss, cond, sens) - return F.depend(ret, self.optimizer(grads)) + self.optimizer(grads) + return (loss, cond, sens) class CenterFaceWithNms(nn.Cell): diff --git a/model_zoo/official/cv/cnnctc/src/cnn_ctc.py b/model_zoo/official/cv/cnnctc/src/cnn_ctc.py index 3e46d30db0f..60af01aae9f 100644 --- a/model_zoo/official/cv/cnnctc/src/cnn_ctc.py +++ b/model_zoo/official/cv/cnnctc/src/cnn_ctc.py @@ -135,10 +135,8 @@ class CNNCTCTrainOneStepWithLossScaleCell(nn.Cell): #apply grad reducer on grads grads = self.grad_reducer(grads) - success = self.optimizer(grads) - - ret = (loss, scaling_sens) - return F.depend(ret, success) + self.optimizer(grads) + return (loss, scaling_sens) class CNNCTC_Model(nn.Cell): diff --git a/model_zoo/official/cv/crnn/src/crnn_for_train.py b/model_zoo/official/cv/crnn/src/crnn_for_train.py index fad288c36f4..90a3d83e659 100644 --- a/model_zoo/official/cv/crnn/src/crnn_for_train.py +++ b/model_zoo/official/cv/crnn/src/crnn_for_train.py @@ -108,4 +108,5 @@ class TrainOneStepCellWithGradClip(Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/crnn_seq2seq_ocr/src/attention_ocr.py b/model_zoo/official/cv/crnn_seq2seq_ocr/src/attention_ocr.py index 172867b4b1b..1871eb65f58 100755 --- a/model_zoo/official/cv/crnn_seq2seq_ocr/src/attention_ocr.py +++ b/model_zoo/official/cv/crnn_seq2seq_ocr/src/attention_ocr.py @@ -184,4 +184,5 @@ class TrainingWrapper(nn.Cell): grads = self.grad(self.network, weights)(*args, sens) if self.reducer_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/crnn_seq2seq_ocr/src/seq2seq.py b/model_zoo/official/cv/crnn_seq2seq_ocr/src/seq2seq.py index 4bd4dc7a951..3c8b2be5e13 100755 --- a/model_zoo/official/cv/crnn_seq2seq_ocr/src/seq2seq.py +++ b/model_zoo/official/cv/crnn_seq2seq_ocr/src/seq2seq.py @@ -109,7 +109,7 @@ class AttnDecoderRNN(nn.Cell): output = self.relu(output) gru_hidden = self.squeeze1(hidden) - output, hidden, _, _, _, _ = self.gru(output, gru_hidden) + output, hidden = self.gru(output, gru_hidden) output = self.squeeze1(output) output = self.log_softmax(self.out(output)) diff --git a/model_zoo/official/cv/ctpn/default_config.yaml b/model_zoo/official/cv/ctpn/default_config.yaml index 8a0fc80f31a..40958e477d8 100644 --- a/model_zoo/official/cv/ctpn/default_config.yaml +++ b/model_zoo/official/cv/ctpn/default_config.yaml @@ -114,13 +114,13 @@ pretraining_dataset_file: "" finetune_dataset_file: "" # pretrain lr -pre_base_lr: 0.0009 +pre_base_lr: 0.009 pre_warmup_step: 30000 pre_warmup_ratio: 1/3 pre_total_epoch: 100 # finetune lr -fine_base_lr: 0.0005 +fine_base_lr: 0.005 fine_warmup_step: 300 fine_warmup_ratio: 1/3 fine_total_epoch: 50 diff --git a/model_zoo/official/cv/ctpn/src/ctpn.py b/model_zoo/official/cv/ctpn/src/ctpn.py index f764a5e4b65..1f1e2826a43 100644 --- a/model_zoo/official/cv/ctpn/src/ctpn.py +++ b/model_zoo/official/cv/ctpn/src/ctpn.py @@ -92,8 +92,8 @@ class CTPN(nn.Cell): self.num_step = config.num_step self.input_size = config.input_size self.hidden_size = config.hidden_size - self.vgg16_feature_extractor = VGG16FeatureExtraction() - self.conv = nn.Conv2d(512, 512, kernel_size=3, padding=0, pad_mode='same') + self.vgg16_feature_extractor = VGG16FeatureExtraction().to_float(mstype.float16) + self.conv = nn.Conv2d(512, 512, kernel_size=3, padding=0, pad_mode='same').to_float(mstype.float16) self.rnn = BiLSTM(self.config, batch_size=self.batch_size).to_float(mstype.float16) self.reshape = P.Reshape() self.transpose = P.Transpose() diff --git a/model_zoo/official/cv/ctpn/src/network_define.py b/model_zoo/official/cv/ctpn/src/network_define.py index e1458bdbac0..c95fbabdaf6 100644 --- a/model_zoo/official/cv/ctpn/src/network_define.py +++ b/model_zoo/official/cv/ctpn/src/network_define.py @@ -18,7 +18,6 @@ import time import numpy as np import mindspore.nn as nn from mindspore.common.tensor import Tensor -from mindspore.ops import functional as F from mindspore.ops import composite as C from mindspore import ParameterTuple from mindspore.train.callback import Callback @@ -140,4 +139,5 @@ class TrainOneStepCell(nn.Cell): grads = self.grad(self.network, weights)(x, gt_bbox, gt_label, gt_num, img_shape, self.sens) if self.reduce_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/deeptext/src/network_define.py b/model_zoo/official/cv/deeptext/src/network_define.py index 2fcd9bb6c44..0895741001b 100644 --- a/model_zoo/official/cv/deeptext/src/network_define.py +++ b/model_zoo/official/cv/deeptext/src/network_define.py @@ -18,7 +18,6 @@ import time import numpy as np import mindspore.nn as nn from mindspore.common.tensor import Tensor -from mindspore.ops import functional as F from mindspore.ops import composite as C from mindspore import ParameterTuple from mindspore.train.callback import Callback @@ -150,4 +149,5 @@ class TrainOneStepCell(nn.Cell): grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, self.sens) if self.reduce_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/faster_rcnn/default_config.yaml b/model_zoo/official/cv/faster_rcnn/default_config.yaml index a1fc08caf1a..a6ca4fbe2bc 100644 --- a/model_zoo/official/cv/faster_rcnn/default_config.yaml +++ b/model_zoo/official/cv/faster_rcnn/default_config.yaml @@ -124,7 +124,7 @@ weight_decay: 0.00001 epoch_size: 20 save_checkpoint: True save_checkpoint_epochs: 1 -keep_checkpoint_max: 20 +keep_checkpoint_max: 5 save_checkpoint_path: "./" # Number of threads used to process the dataset in parallel diff --git a/model_zoo/official/cv/faster_rcnn/default_config_101.yaml b/model_zoo/official/cv/faster_rcnn/default_config_101.yaml index b6a16195514..c06337dada2 100644 --- a/model_zoo/official/cv/faster_rcnn/default_config_101.yaml +++ b/model_zoo/official/cv/faster_rcnn/default_config_101.yaml @@ -125,7 +125,7 @@ weight_decay: 0.00001 epoch_size: 20 save_checkpoint: True save_checkpoint_epochs: 1 -keep_checkpoint_max: 20 +keep_checkpoint_max: 5 save_checkpoint_path: "./" # Number of threads used to process the dataset in parallel diff --git a/model_zoo/official/cv/faster_rcnn/default_config_152.yaml b/model_zoo/official/cv/faster_rcnn/default_config_152.yaml index d2755194040..896c0b02fc5 100644 --- a/model_zoo/official/cv/faster_rcnn/default_config_152.yaml +++ b/model_zoo/official/cv/faster_rcnn/default_config_152.yaml @@ -125,7 +125,7 @@ weight_decay: 0.00001 epoch_size: 20 save_checkpoint: True save_checkpoint_epochs: 1 -keep_checkpoint_max: 20 +keep_checkpoint_max: 5 save_checkpoint_path: "./" # Number of threads used to process the dataset in parallel diff --git a/model_zoo/official/cv/faster_rcnn/src/network_define.py b/model_zoo/official/cv/faster_rcnn/src/network_define.py index 531cd32c6e5..4219667f84e 100644 --- a/model_zoo/official/cv/faster_rcnn/src/network_define.py +++ b/model_zoo/official/cv/faster_rcnn/src/network_define.py @@ -18,7 +18,6 @@ import time import numpy as np import mindspore.nn as nn from mindspore.common.tensor import Tensor -from mindspore.ops import functional as F from mindspore.ops import composite as C from mindspore import ParameterTuple from mindspore.train.callback import Callback @@ -147,4 +146,5 @@ class TrainOneStepCell(nn.Cell): grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, self.sens) if self.reduce_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/maskrcnn/src/network_define.py b/model_zoo/official/cv/maskrcnn/src/network_define.py index 662cd99cefb..2269c23db49 100644 --- a/model_zoo/official/cv/maskrcnn/src/network_define.py +++ b/model_zoo/official/cv/maskrcnn/src/network_define.py @@ -18,7 +18,6 @@ import time import numpy as np import mindspore.nn as nn from mindspore.common.tensor import Tensor -from mindspore.ops import functional as F from mindspore.ops import composite as C from mindspore import ParameterTuple from mindspore.train.callback import Callback @@ -146,5 +145,5 @@ class TrainOneStepCell(nn.Cell): grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, gt_mask, self.sens) if self.reduce_flag: grads = self.grad_reducer(grads) - - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/network_define.py b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/network_define.py index 7825a19ebcc..4c5b4a89b45 100644 --- a/model_zoo/official/cv/maskrcnn_mobilenetv1/src/network_define.py +++ b/model_zoo/official/cv/maskrcnn_mobilenetv1/src/network_define.py @@ -177,7 +177,8 @@ class TrainOneStepCell(nn.Cell): if self.reduce_flag: grads = self.grad_reducer(grads) grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class MaskRcnn_Mobilenetv1_Infer(nn.Cell): def __init__(self, config): diff --git a/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py b/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py index f54dc4edeed..39787b928a0 100755 --- a/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py +++ b/model_zoo/official/cv/nasnet/src/nasnet_a_mobile.py @@ -934,4 +934,5 @@ class NASNetAMobileTrainOneStepWithClipGradient(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/openpose/src/loss.py b/model_zoo/official/cv/openpose/src/loss.py index 943b033279f..312dba9a633 100644 --- a/model_zoo/official/cv/openpose/src/loss.py +++ b/model_zoo/official/cv/openpose/src/loss.py @@ -199,4 +199,5 @@ class TrainOneStepWithClipGradientCell(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/psenet/README.md b/model_zoo/official/cv/psenet/README.md index 9e22490b416..a8654fc34b1 100644 --- a/model_zoo/official/cv/psenet/README.md +++ b/model_zoo/official/cv/psenet/README.md @@ -2,6 +2,7 @@ - [PSENet Description](#PSENet-description) - [Dataset](#dataset) +- [Pretrained Model](#Pretrained-model) - [Features](#features) - [Mixed Precision](#mixed-precision) - [Environment Requirements](#environment-requirements) @@ -15,6 +16,7 @@ - [Distributed GPU Training](#distributed-gpu-training) - [Evaluation Process](#evaluation-process) - [Evaluation](#evaluation) + - [Result](#result) - [Inference Process](#inference-process) - [Export MindIR](#export-mindir) - [Infer on Ascend310](#infer-on-ascend310) @@ -48,6 +50,19 @@ Dataset used: [ICDAR2015](https://rrc.cvc.uab.es/?ch=4&com=tasks#TextLocalizatio A training set of 1000 images containing about 4500 readable words A testing set containing about 2000 readable words +unzip dataset files and needn't transform to mindrecord. + +# [Pretrained Model](#contents) + +download pytorch pretrained model: [resnet50-19c8e357.pth](https://download.pytorch.org/models/resnet50-19c8e357.pth) +transform pytorch model to mindspore model + +```shell +cd src + +python psenet_model_torch2mindspore.py --torch_file=/path_to_model/resnet50-19c8e357.pth --output_path=../ +``` + # [Environment Requirements](#contents) - Hardware(Ascend or GPU) @@ -61,34 +76,100 @@ A testing set containing about 2000 readable words - install [pyblind11](https://github.com/pybind/pybind11) - install [Opencv3.4](https://docs.opencv.org/3.4.9/) +```shell +# install pybind11 +pip install pybind11 + +# install opencv3.4.9 +wget https://github.com/opencv/opencv/archive/3.4.9.zip +unzip 3.4.9.zip +cd opencv-3.4.9 +mkdir build +cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=/usr/local -D WITH_WEBP=OFF .. +make -j4 # -j指定线程数,用户根æ®æœºå™¨é…置修改å‚æ•° +make install + +# export environment variables +export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/local/include +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib64 +``` + # [Quick Start](#contents) After installing MindSpore via the official website, you can start training and evaluation as follows: -```python +```shell # run distributed training example bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] [PRED_TRAINED PATH] [TRAIN_ROOT_DIR] -#download opencv library -download pyblind11, opencv3.4 - -#install pyblind11 opencv3.4 -setup pyblind11(install the library by the pip command) -setup opencv3.4(compile source code install the library) - -#enter the path ,run Makefile to product file +#enter the path ,run Makefile cd ./src/ETSNET/pse/;make #run test.py python test.py --ckpt pretrained_model.ckpt --TEST_ROOT_DIR [test root path] -#download eval method from [here](https://rrc.cvc.uab.es/?ch=4&com=tasks#TextLocalization). -#click "My Methods" button,then download Evaluation Scripts +#go to Evaluation Process for details download script.py # run evaluation example bash scripts/run_eval_ascend.sh ``` +- running on ModelArts +- If you want to train the model on modelarts, you can refer to the [official guidance document] of modelarts (https://support.huaweicloud.com/modelarts/) + +```python +# Example of using distributed training on modelarts : +# Data set storage method + +# ├── ICDAR2015 # dir +# ├── train # train dir +# ├── ic15 # train_dataset dir +# ├── ch4_training_images +# ├── ch4_training_localization_transcription_gt +# ├── train_predtrained # predtrained dir +# ├── eval # eval dir +# ├── ic15 # eval dataset dir +# ├── ch4_test_images +# ├── challenge4_Test_Task1_GT +# ├── checkpoint # ckpt files dir + +# (1) Choose either a (modify yaml file parameters) or b (modelArts create training job to modify parameters) 。 +# a. set "enable_modelarts=True" 。 +# set "run_distribute=True" +# set "TRAIN_MODEL_SAVE_PATH=/cache/train/outputs_imagenet/" +# set "TRAIN_ROOT_DIR=/cache/data/ic15/" +# set "pre_trained=/cache/data/train_predtrained/pred file name" Without pre-training weights train_pretrained="" + +# b. add "enable_modelarts=True" Parameters are on the interface of modearts。 +# Set the parameters required by method a on the modelarts interface +# Note: The path parameter does not need to be quoted + +# (2) Set the path of the network configuration file "_config_path=/The path of config in default_config.yaml/" +# (3) Set the code path on the modelarts interface "/path/psenet"。 +# (4) Set the model's startup file on the modelarts interface "train.py" 。 +# (5) Set the data path of the model on the modelarts interface ".../ICDAR2015/train"(choices ICDAR2015/train Folder path) , +# The output path of the model "Output file path" and the log path of the model "Job log path" 。 +# (6) start trainning the model。 + +# Example of using model inference on modelarts +# (1) Place the trained model to the corresponding position of the bucket。 +# (2) chocie a or b。 +# a. set "enable_modelarts=True" 。 +# set "TEST_ROOT_DIR=/cache/data/ic15/" +# set "ckpt=/cache/data/checkpoint/ckpt file" + +# b. Add "enable_modelarts=True" parameter on the interface of modearts。 +# Set the parameters required by method a on the modelarts interface +# Note: The path parameter does not need to be quoted + +# (3) Set the path of the network configuration file "_config_path=/The path of config in default_config.yaml/" +# (4) Set the code path on the modelarts interface "/path/psenet"。 +# (5) Set the model's startup file on the modelarts interface "eval.py" 。 +# (6) Set the data path of the model on the modelarts interface ".../ICDAR2015/eval"(choices ICDAR2015/eval Folder path) , +# The output path of the model "Output file path" and the log path of the model "Job log path" 。 +# (7) Start model inference。 +``` + # [Script Description](#contents) ## [Script and Sample Code](#contents) @@ -156,7 +237,7 @@ Major parameters in default_config.yaml are: Please follow the instructions in the link below: . ```shell -bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] [PRED_TRAINED PATH] [TRAIN_ROOT_DIR] +bash scripts/run_distribute_train.sh [RANK_FILE] [PRETRAINED_PATH] [TRAIN_ROOT_DIR] ``` rank_table_file which is specified by RANK_TABLE_FILE is needed when you are running a distribute task. You can generate it by using the [hccl_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools). @@ -195,66 +276,27 @@ time: 2021-07-24 04:01:07, epoch: 90, step: 31, loss is 0.58495 ### run test code -```test +```shell +python test.py --ckpt [CKPK_PATH] --TEST_ROOT_DIR [TEST_DATA_DIR] + +# click [Here](https://rrc.cvc.uab.es/?ch=4&com=tasks#TextLocalization) to download evaluation scripts +# choose My Methods -> Offline evaluation -> Evaluation Scripts +# download data and put it in /path_to_data +mkdir eval_ic15 +ln -s /path_to_data/script_test_ch4_t1_e1-1577983151.zip eval_ic15/script_test_ch4_t1_e1-1577983151.zip + +cd eval_ic15 +unzip script_test_ch4_t1_e1-1577983151.zip +cd .. + +sh ./script/run_eval_ascend.sh python test.py --ckpt [CKPK PATH] --TEST_ROOT_DIR [TEST DATA DIR] ``` -- running on ModelArts -- If you want to train the model on modelarts, you can refer to the [official guidance document] of modelarts (https://support.huaweicloud.com/modelarts/) +### [Result](#contents) -```python -# Example of using distributed training on modelarts : -# Data set storage method - -# ├── ICDAR2015 # dir -# ├── train # train dir -# ├── ic15 # train_dataset dir -# ├── ch4_training_images -# ├── ch4_training_localization_transcription_gt -# ├── train_predtrained # predtrained dir -# ├── eval # eval dir -# ├── ic15 # eval dataset dir -# ├── ch4_test_images -# ├── challenge4_Test_Task1_GT -# ├── checkpoint # ckpt files dir - -# (1) Choose either a (modify yaml file parameters) or b (modelArts create training job to modify parameters) 。 -# a. set "enable_modelarts=True" 。 -# set "run_distribute=True" -# set "TRAIN_MODEL_SAVE_PATH=/cache/train/outputs_imagenet/" -# set "TRAIN_ROOT_DIR=/cache/data/ic15/" -# set "pre_trained=/cache/data/train_predtrained/pred file name" Without pre-training weights train_pretrained="" - -# b. add "enable_modelarts=True" Parameters are on the interface of modearts。 -# Set the parameters required by method a on the modelarts interface -# Note: The path parameter does not need to be quoted - -# (2) Set the path of the network configuration file "_config_path=/The path of config in default_config.yaml/" -# (3) Set the code path on the modelarts interface "/path/psenet"。 -# (4) Set the model's startup file on the modelarts interface "train.py" 。 -# (5) Set the data path of the model on the modelarts interface ".../ICDAR2015/train"(choices ICDAR2015/train Folder path) , -# The output path of the model "Output file path" and the log path of the model "Job log path" 。 -# (6) start trainning the model。 - -# Example of using model inference on modelarts -# (1) Place the trained model to the corresponding position of the bucket。 -# (2) chocie a or b。 -# a. set "enable_modelarts=True" 。 -# set "TEST_ROOT_DIR=/cache/data/ic15/" -# set "ckpt=/cache/data/checkpoint/ckpt file" - -# b. Add "enable_modelarts=True" parameter on the interface of modearts。 -# Set the parameters required by method a on the modelarts interface -# Note: The path parameter does not need to be quoted - -# (3) Set the path of the network configuration file "_config_path=/The path of config in default_config.yaml/" -# (4) Set the code path on the modelarts interface "/path/psenet"。 -# (5) Set the model's startup file on the modelarts interface "eval.py" 。 -# (6) Set the data path of the model on the modelarts interface ".../ICDAR2015/eval"(choices ICDAR2015/eval Folder path) , -# The output path of the model "Output file path" and the log path of the model "Job log path" 。 -# (7) Start model inference。 -``` +Calculated!{"precision": 0.8147966668299853,"recall":0.8006740491092923,"hmean":0.8076736279747451,"AP":0} ### Eval Script for ICDAR2015 @@ -342,8 +384,9 @@ The `res` folder is generated in the upper-level directory. For details about th | Loss Function | LossCallBack | | outputs | probability | | Loss | 0.35 | -| Speed | 1pc: 444 ms/step; 8pcs: 446 ms/step | -| Total time | 1pc: 75.48 h; 8pcs: 7.11 h | +| Parameters | batch_size = 4 | +| Speed | 1pc: 444 ms/step(fps: 9.0); 8pcs: 446 ms/step(fps: 71) | +| Total time | 1pc: 75.48 h; 8pcs: 7.11 h | | Parameters (M) | 27.36 | | Checkpoint for Fine tuning | 109.44M (.ckpt file) | | Scripts | | diff --git a/model_zoo/official/cv/psenet/README_CN.md b/model_zoo/official/cv/psenet/README_CN.md index 7355e1e44e8..770e9872bac 100644 --- a/model_zoo/official/cv/psenet/README_CN.md +++ b/model_zoo/official/cv/psenet/README_CN.md @@ -5,6 +5,7 @@ - [PSENet示例](#psenet示例) - [概述](#概述) - [æ•°æ®é›†](#æ•°æ®é›†) +- [预训练模型](#预训练模型) - [环境è¦æ±‚](#环境è¦æ±‚) - [快速入门](#快速入门) - [脚本说明](#脚本说明) @@ -14,9 +15,7 @@ - [分布å¼è®­ç»ƒ](#分布å¼è®­ç»ƒ) - [评估过程](#评估过程) - [è¿è¡Œæµ‹è¯•ä»£ç ](#è¿è¡Œæµ‹è¯•ä»£ç ) - - [ICDAR2015评估脚本](#icdar2015评估脚本) - - [用法](#用法) - - [结果](#结果) + - [结果](#结果) - [推ç†è¿‡ç¨‹](#推ç†è¿‡ç¨‹) - [导出MindIR](#导出mindir) - [在Ascend310执行推ç†](#在ascend310执行推ç†) @@ -48,6 +47,21 @@ 训练集:包括约4500个å¯è¯»å•è¯çš„1000张图åƒã€‚ 测试集:约2000个å¯è¯»å•è¯ã€‚ +下载得到的训练和推ç†æ•°æ®è§£åŽ‹åŽå¤‡ç”¨ï¼Œä¸éœ€è¦è½¬ä¸ºmindrecordæ•°æ® + +# 预训练模型 + +下载pytorch的预训练模型: [resnet50-19c8e357.pth](https://download.pytorch.org/models/resnet50-19c8e357.pth) +å°†pytorch模型转为mindspore模型 + +```shell +cd src + +python psenet_model_torch2mindspore.py --torch_file=/path_to_model/resnet50-19c8e357.pth --output_path=../ +``` + +执行完æˆï¼Œsrc的上层目录得到文件pretrained_model.ckpt文件,用于接下æ¥çš„训练 + # 环境è¦æ±‚ - 硬件:昇腾处ç†å™¨ï¼ˆAscend) @@ -62,36 +76,100 @@ - 安装[pyblind11](https://github.com/pybind/pybind11) - 安装[Opencv3.4](https://docs.opencv.org/3.4.9/) +```shell +# 使用pip安装pybind11 +pip install pybind11 + +# 使用æºç å®‰è£…opencv3.4.9 +wget https://github.com/opencv/opencv/archive/3.4.9.zip +unzip 3.4.9.zip +cd opencv-3.4.9 +mkdir build +cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=/usr/local -D WITH_WEBP=OFF .. +make -j4 # -j指定线程数,用户根æ®æœºå™¨é…置修改å‚æ•° +make install + +# opencv安装在/usr/local目录下,将该目录添加到环境å˜é‡ä¸­ +export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/local/include +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib64 +``` + # 快速入门 通过官方网站安装MindSporeåŽï¼Œæ‚¨å¯ä»¥æŒ‰ç…§å¦‚下步骤进行训练和评估: -```python +```shell # 分布å¼è®­ç»ƒè¿è¡Œç¤ºä¾‹ -bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] [PRED_TRAINED PATH] [TRAIN_ROOT_DIR] +# 第一个å‚数为rank_table文件,第二个å‚数为生æˆçš„预训练模型,第三个å‚数为下载的训练数æ®é›† +bash scripts/run_distribute_train.sh [RANK_FILE] [PRETRAINED_PATH] [TRAIN_ROOT_DIR] -# 下载opencv库 -download pyblind11, opencv3.4 - -# 安装pyblind11 opencv3.4 -setup pyblind11(install the library by the pip command) -setup opencv3.4(compile source code install the library) - -# å•å‡»[此处](https://rrc.cvc.uab.es/?ch=4&com=tasks#TextLocalization)下载评估方法 -# 点击"我的方法"按钮,下载评估脚本 - -# 输入路径,è¿è¡ŒMakefile,找到产å“文件 +# 进入路径,è¿è¡ŒMakefile cd ./src/ETSNET/pse/;make clean&&make # è¿è¡Œtest.py -python test.py --ckpt pretrained_model.ckpt --TEST_ROOT_DIR [test root path] - +python test.py --ckpt [CKPK_PATH] --TEST_ROOT_DIR [TEST_DATA_DIR] +# 具体è§è¯„估过程 download script.py # è¿è¡Œè¯„估示例 bash scripts/run_eval_ascend.sh ``` +- 如果è¦åœ¨modelarts上进行模型的训练,å¯ä»¥å‚考modelartsçš„[官方指导文档](https://support.huaweicloud.com/modelarts/) 开始进行模型的训练和推ç†ï¼Œå…·ä½“æ“作如下: + +```ModelArts +# 在ModelArts上使用分布å¼è®­ç»ƒç¤ºä¾‹: +# æ•°æ®é›†å­˜æ”¾æ–¹å¼ + +# ├── ICDAR2015 # dir +# ├── train # train dir +# ├── ic15 # train_dataset dir +# ├── ch4_training_images +# ├── ch4_training_localization_transcription_gt +# ├── train_predtrained # predtrained dir +# ├── eval # eval dir +# ├── ic15 # eval dataset dir +# ├── ch4_test_images +# ├── challenge4_Test_Task1_GT +# ├── checkpoint # ckpt files dir + +# (1) 选择a(修改yaml文件å‚æ•°)或者b(ModelArts创建训练作业修改å‚æ•°)其中一ç§æ–¹å¼ã€‚ +# a. 设置 "enable_modelarts=True" +# 设置 "run_distribute=True" +# 设置 "TRAIN_MODEL_SAVE_PATH=/cache/train/outputs/" +# 设置 "TRAIN_ROOT_DIR=/cache/data/ic15/" +# 设置 "pre_trained=/cache/data/train_predtrained/pred file name" 如果没有预训练æƒé‡ pre_trained="" + +# b. 增加 "enable_modelarts=True" å‚数在modeartsçš„ç•Œé¢ä¸Šã€‚ +# 在modelartsçš„ç•Œé¢ä¸Šè®¾ç½®æ–¹æ³•a所需è¦çš„å‚æ•° +# 注æ„:路径å‚æ•°ä¸éœ€è¦åŠ å¼•å· + +# (2)设置网络é…置文件的路径 "_config_path=/The path of config in default_config.yaml/" +# (3) 在modelartsçš„ç•Œé¢ä¸Šè®¾ç½®ä»£ç çš„路径 "/path/psenet"。 +# (4) 在modelartsçš„ç•Œé¢ä¸Šè®¾ç½®æ¨¡åž‹çš„å¯åŠ¨æ–‡ä»¶ "train.py" 。 +# (5) 在modelartsçš„ç•Œé¢ä¸Šè®¾ç½®æ¨¡åž‹çš„æ•°æ®è·¯å¾„ ".../ICDAR2015/train"(选择ICDAR2015/train文件夹路径) , +# 模型的输出路径"Output file path" 和模型的日志路径 "Job log path" 。 +# (6) 开始模型的训练。 + +# 在modelarts上使用模型推ç†çš„示例 +# (1) 把训练好的模型地方到桶的对应ä½ç½®ã€‚ +# (2) 选择a或者b其中一ç§æ–¹å¼ã€‚ +# a.设置 "enable_modelarts=True" +# 设置 "TEST_ROOT_DIR=/cache/data/ic15" +# 设置 "ckpt=/cache/data/checkpoint/ckpt file" + +# b. 增加 "enable_modelarts=True" å‚数在modeartsçš„ç•Œé¢ä¸Šã€‚ +# 在modelartsçš„ç•Œé¢ä¸Šè®¾ç½®æ–¹æ³•a所需è¦çš„å‚æ•° +# 注æ„:路径å‚æ•°ä¸éœ€è¦åŠ å¼•å· + +# (3) 设置网络é…置文件的路径 "_config_path=/The path of config in default_config.yaml/" +# (4) 在modelartsçš„ç•Œé¢ä¸Šè®¾ç½®ä»£ç çš„路径 "/path/psenet"。 +# (5) 在modelartsçš„ç•Œé¢ä¸Šè®¾ç½®æ¨¡åž‹çš„å¯åŠ¨æ–‡ä»¶ "eval.py" 。 +# (6) 在modelartsçš„ç•Œé¢ä¸Šè®¾ç½®æ¨¡åž‹çš„æ•°æ®è·¯å¾„ "../ICDAR2015/eval"(选择ICDAR2015/eval文件夹路径) , +# 模型的输出路径"Output file path" 和模型的日志路径 "Job log path" 。 +# (7) 开始模型的推ç†ã€‚ +``` + ## 脚本说明 ## è„šæœ¬å’Œæ ·ä¾‹ä»£ç  @@ -153,7 +231,8 @@ bash scripts/run_eval_ascend.sh 请éµå¾ªé“¾æŽ¥ä¸­çš„说明:[链接](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools) ```shell -bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] [PRED_TRAINED PATH] [TRAIN_ROOT_DIR] +# 第一个å‚数为rank_table文件,第二个å‚数为生æˆçš„预训练模型,第三个å‚数为下载的训练数æ®é›† +bash scripts/run_distribute_train.sh [RANK_FILE] [PRETRAINED_PATH] [TRAIN_ROOT_DIR] ``` 上述shell脚本将在åŽå°è¿è¡Œåˆ†å¸ƒè®­ç»ƒã€‚å¯ä»¥é€šè¿‡`device[X]/test_*.log`文件查看结果。 @@ -173,81 +252,24 @@ device_1/log:epcoh: 2, step: 40,loss is 0.76629 ### è¿è¡Œæµ‹è¯•ä»£ç  -```test -python test.py --ckpt [CKPK PATH] --TEST_ROOT_DIR [TEST DATA DIR] - -``` - -- 如果è¦åœ¨modelarts上进行模型的训练,å¯ä»¥å‚考modelartsçš„[官方指导文档](https://support.huaweicloud.com/modelarts/) 开始进行模型的训练和推ç†ï¼Œå…·ä½“æ“作如下: - -```ModelArts -# 在ModelArts上使用分布å¼è®­ç»ƒç¤ºä¾‹: -# æ•°æ®é›†å­˜æ”¾æ–¹å¼ - -# ├── ICDAR2015 # dir -# ├── train # train dir -# ├── ic15 # train_dataset dir -# ├── ch4_training_images -# ├── ch4_training_localization_transcription_gt -# ├── train_predtrained # predtrained dir -# ├── eval # eval dir -# ├── ic15 # eval dataset dir -# ├── ch4_test_images -# ├── challenge4_Test_Task1_GT -# ├── checkpoint # ckpt files dir - -# (1) 选择a(修改yaml文件å‚æ•°)或者b(ModelArts创建训练作业修改å‚æ•°)其中一ç§æ–¹å¼ã€‚ -# a. 设置 "enable_modelarts=True" -# 设置 "run_distribute=True" -# 设置 "TRAIN_MODEL_SAVE_PATH=/cache/train/outputs/" -# 设置 "TRAIN_ROOT_DIR=/cache/data/ic15/" -# 设置 "pre_trained=/cache/data/train_predtrained/pred file name" 如果没有预训练æƒé‡ pre_trained="" - -# b. 增加 "enable_modelarts=True" å‚数在modeartsçš„ç•Œé¢ä¸Šã€‚ -# 在modelartsçš„ç•Œé¢ä¸Šè®¾ç½®æ–¹æ³•a所需è¦çš„å‚æ•° -# 注æ„:路径å‚æ•°ä¸éœ€è¦åŠ å¼•å· - -# (2)设置网络é…置文件的路径 "_config_path=/The path of config in default_config.yaml/" -# (3) 在modelartsçš„ç•Œé¢ä¸Šè®¾ç½®ä»£ç çš„路径 "/path/psenet"。 -# (4) 在modelartsçš„ç•Œé¢ä¸Šè®¾ç½®æ¨¡åž‹çš„å¯åŠ¨æ–‡ä»¶ "train.py" 。 -# (5) 在modelartsçš„ç•Œé¢ä¸Šè®¾ç½®æ¨¡åž‹çš„æ•°æ®è·¯å¾„ ".../ICDAR2015/train"(选择ICDAR2015/train文件夹路径) , -# 模型的输出路径"Output file path" 和模型的日志路径 "Job log path" 。 -# (6) 开始模型的训练。 - -# 在modelarts上使用模型推ç†çš„示例 -# (1) 把训练好的模型地方到桶的对应ä½ç½®ã€‚ -# (2) 选择a或者b其中一ç§æ–¹å¼ã€‚ -# a.设置 "enable_modelarts=True" -# 设置 "TEST_ROOT_DIR=/cache/data/ic15" -# 设置 "ckpt=/cache/data/checkpoint/ckpt file" - -# b. 增加 "enable_modelarts=True" å‚数在modeartsçš„ç•Œé¢ä¸Šã€‚ -# 在modelartsçš„ç•Œé¢ä¸Šè®¾ç½®æ–¹æ³•a所需è¦çš„å‚æ•° -# 注æ„:路径å‚æ•°ä¸éœ€è¦åŠ å¼•å· - -# (3) 设置网络é…置文件的路径 "_config_path=/The path of config in default_config.yaml/" -# (4) 在modelartsçš„ç•Œé¢ä¸Šè®¾ç½®ä»£ç çš„路径 "/path/psenet"。 -# (5) 在modelartsçš„ç•Œé¢ä¸Šè®¾ç½®æ¨¡åž‹çš„å¯åŠ¨æ–‡ä»¶ "eval.py" 。 -# (6) 在modelartsçš„ç•Œé¢ä¸Šè®¾ç½®æ¨¡åž‹çš„æ•°æ®è·¯å¾„ "../ICDAR2015/eval"(选择ICDAR2015/eval文件夹路径) , -# 模型的输出路径"Output file path" 和模型的日志路径 "Job log path" 。 -# (7) 开始模型的推ç†ã€‚ -``` - -### ICDAR2015评估脚本 - -#### 用法 - -第一步:å•å‡»[此处](https://rrc.cvc.uab.es/?ch=4&com=tasks#TextLocalization)下载评估方法。 - -第二步:å•å‡»"我的方法"按钮,下载评估脚本。 - -第三步:建议将评估方法根符å·é“¾æŽ¥åˆ°$MINDSPORE/model_zoo/psenet/eval_ic15/。如果您的文件夹结构ä¸åŒï¼Œæ‚¨å¯èƒ½éœ€è¦æ›´æ”¹è¯„估脚本文件中的相应路径。 - ```shell -bash ./script/run_eval_ascend.sh.sh +# 第一个å‚数为训练得到的模型文件,第二个å‚数为下载得到的推ç†æ•°æ®é›† +python test.py --ckpt [CKPK_PATH] --TEST_ROOT_DIR [TEST_DATA_DIR] + +# å•å‡»[此处](https://rrc.cvc.uab.es/?ch=4&com=tasks#TextLocalization)下载评估方法 +# 点击"My Methods"按钮,选择Offline evaluation -> Evaluation Scripts +# 下载完æˆåŽï¼Œå°†æ•°æ®æ”¾åœ¨/path_to_data路径 +mkdir eval_ic15 +ln -s /path_to_data/script_test_ch4_t1_e1-1577983151.zip eval_ic15/script_test_ch4_t1_e1-1577983151.zip + +cd eval_ic15 +unzip script_test_ch4_t1_e1-1577983151.zip +cd .. + +bash ./script/run_eval_ascend.sh ``` -#### 结果 +### 结果 Calculated!{"precision": 0.8147966668299853,"recall":0.8006740491092923,"hmean":0.8076736279747451,"AP":0} @@ -317,7 +339,8 @@ bash run_infer_310.sh [MINDIR_PATH] [DATA_PATH] [DEVICE_ID] | æŸå¤±å‡½æ•° | LossCallBack | | 输出 | 概率 | | æŸå¤± | 0.35 | -| 速度 | 1å¡ï¼š444毫秒/步;8å¡ï¼š446毫秒/æ­¥ +| 训练å‚æ•° | batch_size = 4 | +| 速度 | 1å¡ï¼š444毫秒/æ­¥(fps: 9.0)ï¼›8å¡ï¼š446毫秒/æ­¥(fps: 71) | | 总时间 | 1å¡ï¼š75.48å°æ—¶ï¼›8å¡ï¼š7.11å°æ—¶| | å‚æ•°(M) | 27.36 | | 微调检查点 | 109.44M (.ckpt file) | diff --git a/model_zoo/official/cv/psenet/requirements.txt b/model_zoo/official/cv/psenet/requirements.txt index 9d316731512..bee48e58af9 100644 --- a/model_zoo/official/cv/psenet/requirements.txt +++ b/model_zoo/official/cv/psenet/requirements.txt @@ -2,3 +2,5 @@ numpy opencv-python pillow pyyaml +Polygon3 +pyclipper diff --git a/model_zoo/official/cv/psenet/src/ETSNET/pse/Makefile b/model_zoo/official/cv/psenet/src/ETSNET/pse/Makefile index 541e9ba3b37..eac5bc1e8e6 100644 --- a/model_zoo/official/cv/psenet/src/ETSNET/pse/Makefile +++ b/model_zoo/official/cv/psenet/src/ETSNET/pse/Makefile @@ -13,8 +13,7 @@ # limitations under the License. # ============================================================================ -mindspore_home = ${MINDSPORE_HOME} -CXXFLAGS = -I include -I ${mindspore_home}/model_zoo/official/cv/psenet -std=c++11 -O3 +CXXFLAGS = -std=c++11 -O3 CXX_SOURCES = adaptor.cpp opencv_home = ${OPENCV_HOME} OPENCV = -I$(opencv_home)/include -L$(opencv_home)/lib64 -lopencv_superres -lopencv_ml -lopencv_objdetect \ diff --git a/model_zoo/official/cv/psenet/src/ETSNET/pse/adaptor.cpp b/model_zoo/official/cv/psenet/src/ETSNET/pse/adaptor.cpp index 8885e848fec..f4e343e9fc3 100644 --- a/model_zoo/official/cv/psenet/src/ETSNET/pse/adaptor.cpp +++ b/model_zoo/official/cv/psenet/src/ETSNET/pse/adaptor.cpp @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "src/ETSNET/pse/adaptor.h" #include #include #include @@ -26,6 +25,7 @@ #include #include #include +#include "./adaptor.h" using std::vector; using std::queue; diff --git a/model_zoo/official/cv/psenet/src/network_define.py b/model_zoo/official/cv/psenet/src/network_define.py index 09ffe610209..3f55a996903 100644 --- a/model_zoo/official/cv/psenet/src/network_define.py +++ b/model_zoo/official/cv/psenet/src/network_define.py @@ -23,7 +23,6 @@ from mindspore import ParameterTuple from mindspore.common.tensor import Tensor from mindspore.nn.wrap.grad_reducer import DistributedGradReducer from mindspore.ops import composite as C -from mindspore.ops import functional as F from mindspore.train.callback import Callback __all__ = ['LossCallBack', 'WithLossCell', 'TrainOneStepCell'] @@ -144,4 +143,5 @@ class TrainOneStepCell(nn.Cell): grads = self.grad(self.network, weights)(img, gt_text, gt_kernels, training_mask, self.sens) if self.reducer_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/psenet/train.py b/model_zoo/official/cv/psenet/train.py index b11e45ecce5..d8519e2fd51 100644 --- a/model_zoo/official/cv/psenet/train.py +++ b/model_zoo/official/cv/psenet/train.py @@ -100,7 +100,7 @@ def train(): if config.pre_trained: param_dict = load_checkpoint(config.pre_trained) - load_param_into_net(net, param_dict) + load_param_into_net(net, param_dict, strict_load=True) print('Load Pretrained parameters done!') criterion = DiceLoss(batch_size=config.TRAIN_BATCH_SIZE) diff --git a/model_zoo/official/cv/resnet/README.md b/model_zoo/official/cv/resnet/README.md index 2a2271a6bfb..0878db4c40e 100644 --- a/model_zoo/official/cv/resnet/README.md +++ b/model_zoo/official/cv/resnet/README.md @@ -202,6 +202,19 @@ If you want to run in modelarts, please check the official documentation of [mod . └──resnet ├── README.md + ├── config # parameter configuration + ├── resnet18_cifar10_config.yaml + ├── resnet18_cifar10_config_gpu.yaml + ├── resnet18_imagenet2012_config.yaml + ├── resnet18_imagenet2012_config_gpu.yaml + ├── resnet34_imagenet2012_config.yaml + ├── resnet50_cifar10_config.yaml + ├── resnet50_imagenet2012_Acc_config.yaml # High performance version: The performance is improved by more than 10% and the precision decrease less than 1% + ├── resnet50_imagenet2012_Ascend_Thor_config.yaml + ├── resnet50_imagenet2012_config.yaml + ├── resnet50_imagenet2012_GPU_Thor_config.yaml + ├── resnet101_imagenet2012_config.yaml + └── se-resnet50_imagenet2012_config.yaml ├── scripts ├── run_distribute_train.sh # launch ascend distributed training(8 pcs) ├── run_parameter_server_train.sh # launch ascend parameter server training(8 pcs) @@ -226,16 +239,6 @@ If you want to run in modelarts, please check the official documentation of [mod ├──device_adapter.py # device adapter ├──local_adapter.py # local adapter ├──moxing_adapter.py # moxing adapter - ├── resnet18_cifar10_config.yaml # parameter configuration - ├── resnet18_imagenet2012_config.yaml # parameter configuration - ├── resnet34_imagenet2012_config.yaml # parameter configuration - ├── resnet50_cifar10_config.yaml # parameter configuration - ├── resnet50_imagenet2012_Acc_config.yaml # parameter configuration - ├── resnet50_imagenet2012_Ascend_Thor_config.yaml # parameter configuration - ├── resnet50_imagenet2012_config.yaml # parameter configuration - ├── resnet50_imagenet2012_GPU_Thor_config.yaml # parameter configuration - ├── resnet101_imagenet2012_config.yaml # parameter configuration - ├── se-resnet50_imagenet2012_config.yaml # parameter configuration ├── export.py # export model for inference ├── mindspore_hub_conf.py # mindspore hub interface ├── eval.py # eval net @@ -713,42 +716,42 @@ Total data: 50000, top1 accuracy: 0.76844, top5 accuracy: 0.93522. #### ResNet18 on CIFAR-10 -| Parameters | Ascend 910 | -| -------------------------- | -------------------------------------- | -| Model Version | ResNet18 | -| Resource | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 | -| uploaded Date | 02/25/2021 (month/day/year) | -| MindSpore Version | 1.1.1 | -| Dataset | CIFAR-10 | -| Training Parameters | epoch=90, steps per epoch=195, batch_size = 32 | -| Optimizer | Momentum | -| Loss Function | Softmax Cross Entropy | -| outputs | probability | -| Loss | 0.0002519517 | -| Speed | 13 ms/step(8pcs) | -| Total time | 4 mins | -| Parameters (M) | 11.2 | -| Checkpoint for Fine tuning | 86M (.ckpt file) | +| Parameters | Ascend 910 | GPU | +| -------------------------- | -------------------------------------- | -------------------------------------- | +| Model Version | ResNet18 | ResNet18 | +| Resource | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 | PCIE V100-32G | +| uploaded Date | 02/25/2021 (month/day/year) | 07/23/2021 (month/day/year) | +| MindSpore Version | 1.1.1 | 1.3.0 | +| Dataset | CIFAR-10 | CIFAR-10 | +| Training Parameters | epoch=90, steps per epoch=195, batch_size = 32 | epoch=90, steps per epoch=195, batch_size = 32 | +| Optimizer | Momentum | Momentum | +| Loss Function | Softmax Cross Entropy | Softmax Cross Entropy | +| outputs | probability | probability | +| Loss | 0.0002519517 | 0.0015517382 | +| Speed | 13 ms/step(8pcs) | 29 ms/step(8pcs) | +| Total time | 4 mins | 11 minds | +| Parameters (M) | 11.2 | 11.2 | +| Checkpoint for Fine tuning | 86M (.ckpt file) | 85.4 (.ckpt file) | | Scripts | [Link](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) | #### ResNet18 on ImageNet2012 -| Parameters | Ascend 910 | -| -------------------------- | -------------------------------------- | -| Model Version | ResNet18 | -| Resource | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 | -| uploaded Date | 02/25/2021 (month/day/year) ï¼› | -| MindSpore Version | 1.1.1 | -| Dataset | ImageNet2012 | -| Training Parameters | epoch=90, steps per epoch=626, batch_size = 256 | -| Optimizer | Momentum | -| Loss Function | Softmax Cross Entropy | -| outputs | probability | -| Loss | 2.15702 | -| Speed | 110ms/step(8pcs) (may need to set_numa_enbale in dataset.py) | -| Total time | 110 mins | -| Parameters (M) | 11.7 | -| Checkpoint for Fine tuning | 90M (.ckpt file) | +| Parameters | Ascend 910 | GPU | +| -------------------------- | -------------------------------------- | -------------------------------------- | +| Model Version | ResNet18 | ResNet18 | +| Resource | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 | PCIE V100-32G | +| uploaded Date | 02/25/2021 (month/day/year) ï¼› | 07/23/2021 (month/day/year) | +| MindSpore Version | 1.1.1 | 1.3.0 | +| Dataset | ImageNet2012 | ImageNet2012 | +| Training Parameters | epoch=90, steps per epoch=626, batch_size = 256 | epoch=90, steps per epoch=625, batch_size = 256 | +| Optimizer | Momentum | Momentum | +| Loss Function | Softmax Cross Entropy | Softmax Cross Entropy | +| outputs | probability | probability | +| Loss | 2.15702 | 2.168664 | +| Speed | 110ms/step(8pcs) (may need to set_numa_enbale in dataset.py) | 107 ms/step(8pcs) | +| Total time | 110 mins | 130 mins | +| Parameters (M) | 11.7 | 11.7 | +| Checkpoint for Fine tuning | 90M (.ckpt file) | 90M (.ckpt file) | | Scripts | [Link](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) | #### ResNet50 on CIFAR-10 diff --git a/model_zoo/official/cv/resnet/README_CN.md b/model_zoo/official/cv/resnet/README_CN.md index 18c39d777e6..64a97707f16 100755 --- a/model_zoo/official/cv/resnet/README_CN.md +++ b/model_zoo/official/cv/resnet/README_CN.md @@ -188,6 +188,19 @@ bash run_eval_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH] [CONFIG_PATH] . └──resnet ├── README.md + ├── config # å‚æ•°é…ç½® + ├── resnet18_cifar10_config.yaml + ├── resnet18_cifar10_config_gpu.yaml + ├── resnet18_imagenet2012_config.yaml + ├── resnet18_imagenet2012_config_gpu.yaml + ├── resnet34_imagenet2012_config.yaml + ├── resnet50_cifar10_config.yaml + ├── resnet50_imagenet2012_Acc_config.yaml # 高性能版本:性能æ高超过10%而精度下é™å°‘于1% + ├── resnet50_imagenet2012_Ascend_Thor_config.yaml + ├── resnet50_imagenet2012_config.yaml + ├── resnet50_imagenet2012_GPU_Thor_config.yaml + ├── resnet101_imagenet2012_config.yaml + ├── se-resnet50_imagenet2012_config.yaml ├── scripts ├── run_distribute_train.sh # å¯åŠ¨Ascend分布å¼è®­ç»ƒï¼ˆ8å¡ï¼‰ ├── run_parameter_server_train.sh # å¯åŠ¨Ascendå‚æ•°æœåŠ¡å™¨è®­ç»ƒ(8å¡) @@ -209,17 +222,6 @@ bash run_eval_gpu.sh [DATASET_PATH] [CHECKPOINT_PATH] [CONFIG_PATH] ├── device_adapter.py # 设备é…ç½® ├── local_adapter.py # 本地设备é…ç½® └── moxing_adapter.py # modelarts设备é…ç½® - ├── resnet18_cifar10_config.yaml # å‚æ•°é…ç½® - ├── resnet18_imagenet2012_config.yaml # å‚æ•°é…ç½® - ├── resnet34_imagenet2012_config.yaml # å‚æ•°é…ç½® - ├── resnet50_cifar10_config.yaml # å‚æ•°é…ç½® - ├── resnet50_imagenet2012_Acc_config.yaml # å‚æ•°é…ç½® - ├── resnet50_imagenet2012_Ascend_Thor_config.yaml # å‚æ•°é…ç½® - ├── resnet50_imagenet2012_config.yaml # å‚æ•°é…ç½® - ├── resnet50_imagenet2012_GPU_Thor_config.yaml # å‚æ•°é…ç½® - ├── resnet101_imagenet2012_config.yaml # å‚æ•°é…ç½® - ├── se-resnet50_imagenet2012_config.yaml # å‚æ•°é…ç½® - ├── eval.py # 评估网络 ├── eval.py # 评估网络 └── train.py # 训练网络 ``` @@ -674,42 +676,42 @@ Total data: 50000, top1 accuracy: 0.76844, top5 accuracy: 0.93522. #### CIFAR-10上的ResNet18 -| å‚æ•° | Ascend 910 | -| -------------------------- | -------------------------------------- | -| 模型版本 | ResNet18 | -| èµ„æº | Ascend 910ï¼›CPU 2.60GHz,192核;内存 755G;系统 Euler2.8 | -| 上传日期 | 2021-02-25 | -| MindSpore版本 | 1.1.1 | -| æ•°æ®é›† | CIFAR-10 | -| 训练å‚æ•° | epoch=90, steps per epoch=195, batch_size = 32 | -| 优化器 | Momentum | -| æŸå¤±å‡½æ•° | Softmax交å‰ç†µ | -| 输出 | 概率 | -| æŸå¤± | 0.0002519517 | -| 速度 | 13毫秒/步(8å¡ï¼‰ | -| 总时长 | 4分钟 | -| å‚æ•°(M) | 11.2 | +| å‚æ•° | Ascend 910 | GPU | +| -------------------------- | -------------------------------------- | -------------------------------------- | +| 模型版本 | ResNet18 | ResNet18 | +| èµ„æº | Ascend 910ï¼›CPU 2.60GHz,192核;内存 755G;系统 Euler2.8 | PCIE V100-32G | +| 上传日期 | 2021-02-25 | 2021-07-23 | +| MindSpore版本 | 1.1.1 | 1.3.0 | +| æ•°æ®é›† | CIFAR-10 | CIFAR-10 | +| 训练å‚æ•° | epoch=90, steps per epoch=195, batch_size = 32 | epoch=90, steps per epoch=195, batch_size = 32 | +| 优化器 | Momentum | Momentum| +| æŸå¤±å‡½æ•° | Softmax交å‰ç†µ | Softmax交å‰ç†µ | +| 输出 | 概率 | 概率 | +| æŸå¤± | 0.0002519517 | 0.0015517382 | +| 速度 | 13毫秒/步(8å¡ï¼‰ | 29毫秒/步(8å¡ï¼‰ | +| 总时长 | 4分钟 | 11分钟 | +| å‚æ•°(M) | 11.2 | 11.2 | | 微调检查点 | 86(.ckpt文件) | | 脚本 | [链接](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) | #### ImageNet2012上的ResNet18 -| å‚æ•° | Ascend 910 | -| -------------------------- | -------------------------------------- | -| 模型版本 | ResNet18 | -| èµ„æº | Ascend 910ï¼›CPU 2.60GHz,192核;内存 755G;系统 Euler2.8 | -| 上传日期 | 2020-04-01 ; | -| MindSpore版本 | 1.1.1 | -| æ•°æ®é›† | ImageNet2012 | -| 训练å‚æ•° | epoch=90, steps per epoch=626, batch_size = 256 | -| 优化器 | Momentum | -| æŸå¤±å‡½æ•° | Softmax交å‰ç†µ | -| 输出 | 概率 | -| æŸå¤± | 2.15702 | -| 速度 | 110毫秒/步(8å¡ï¼‰ (å¯èƒ½éœ€è¦åœ¨datasetpy中增加set_numa_enbale绑核æ“作) | -| 总时长 | 110分钟 | -| å‚æ•°(M) | 11.7 | -| 微调检查点| 90M(.ckpt文件) | +| å‚æ•° | Ascend 910 | GPU | +| -------------------------- | -------------------------------------- | -------------------------------------- | +| 模型版本 | ResNet18 | RESNET18 | +| èµ„æº | Ascend 910ï¼›CPU 2.60GHz,192核;内存 755G;系统 Euler2.8 | PCIE V100-32G | +| 上传日期 | 2020-04-01 ; | 2021-07-23 | +| MindSpore版本 | 1.1.1 | 1.3.0 | +| æ•°æ®é›† | ImageNet2012 | ImageNet2012 | +| 训练å‚æ•° | epoch=90, steps per epoch=626, batch_size = 256 | epoch=90, steps per epoch=625, batch_size = 256 | +| 优化器 | Momentum | Momentum| +| æŸå¤±å‡½æ•° | Softmax交å‰ç†µ | Softmax交å‰ç†µ | +| 输出 | 概率 | 概率 | +| æŸå¤± | 2.15702 | 2.168664 | +| 速度 | 110毫秒/步(8å¡ï¼‰ (å¯èƒ½éœ€è¦åœ¨datasetpy中增加set_numa_enbale绑核æ“作) | 107毫秒/步(8å¡ï¼‰ | +| 总时长 | 110分钟 | 130分钟 | +| å‚æ•°(M) | 11.7 | 11.7 | +| 微调检查点| 90M(.ckpt文件) | 90M(.ckpt文件) | | 脚本 | [链接](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/cv/resnet) | #### CIFAR-10上的ResNet50 diff --git a/model_zoo/official/cv/resnet/scripts/run_distribute_train.sh b/model_zoo/official/cv/resnet/scripts/run_distribute_train.sh index 6967dae9a80..c5f3903be96 100755 --- a/model_zoo/official/cv/resnet/scripts/run_distribute_train.sh +++ b/model_zoo/official/cv/resnet/scripts/run_distribute_train.sh @@ -35,7 +35,7 @@ get_real_path(){ PATH1=$(get_real_path $1) PATH2=$(get_real_path $2) -CONFIG_FILE=$3 +CONFIG_FILE=$(get_real_path $3) if [ $# == 4 ] then @@ -101,7 +101,7 @@ do mkdir ./train_parallel$i cp ../*.py ./train_parallel$i cp *.sh ./train_parallel$i - cp -r ../*.yaml ./train_parallel$i + cp -r ../config/*.yaml ./train_parallel$i cp -r ../src ./train_parallel$i cd ./train_parallel$i || exit echo "start training for rank $RANK_ID, device $DEVICE_ID" diff --git a/model_zoo/official/cv/resnet/scripts/run_distribute_train_gpu.sh b/model_zoo/official/cv/resnet/scripts/run_distribute_train_gpu.sh index b44116f9923..39dacf98653 100755 --- a/model_zoo/official/cv/resnet/scripts/run_distribute_train_gpu.sh +++ b/model_zoo/official/cv/resnet/scripts/run_distribute_train_gpu.sh @@ -34,7 +34,7 @@ get_real_path(){ } PATH1=$(get_real_path $1) -CONFIG_FILE=$2 +CONFIG_FILE=$(get_real_path $2) if [ $# == 3 ] then @@ -80,7 +80,7 @@ rm -rf ./train_parallel mkdir ./train_parallel cp ../*.py ./train_parallel cp *.sh ./train_parallel -cp -r ../*.yaml ./train_parallel +cp -r ../config/*.yaml ./train_parallel cp -r ../src ./train_parallel cd ./train_parallel || exit diff --git a/model_zoo/official/cv/resnet/scripts/run_eval.sh b/model_zoo/official/cv/resnet/scripts/run_eval.sh index 85c75682c3b..97a7ba85c71 100755 --- a/model_zoo/official/cv/resnet/scripts/run_eval.sh +++ b/model_zoo/official/cv/resnet/scripts/run_eval.sh @@ -30,7 +30,7 @@ get_real_path(){ PATH1=$(get_real_path $1) PATH2=$(get_real_path $2) -CONFIG_FILE=$3 +CONFIG_FILE=$(get_real_path $3) if [ ! -d $PATH1 ] @@ -58,7 +58,7 @@ fi mkdir ./eval cp ../*.py ./eval cp *.sh ./eval -cp -r ../*.yaml ./eval +cp -r ../config/*.yaml ./eval cp -r ../src ./eval cd ./eval || exit env > env.log diff --git a/model_zoo/official/cv/resnet/scripts/run_eval_gpu.sh b/model_zoo/official/cv/resnet/scripts/run_eval_gpu.sh index ed93cb09c08..97114b7a456 100755 --- a/model_zoo/official/cv/resnet/scripts/run_eval_gpu.sh +++ b/model_zoo/official/cv/resnet/scripts/run_eval_gpu.sh @@ -30,7 +30,7 @@ get_real_path(){ PATH1=$(get_real_path $1) PATH2=$(get_real_path $2) -CONFIG_FILE=$3 +CONFIG_FILE=$(get_real_path $3) if [ ! -d $PATH1 ] @@ -58,7 +58,7 @@ fi mkdir ./eval cp ../*.py ./eval cp *.sh ./eval -cp -r ../*.yaml ./eval +cp -r ../config/*.yaml ./eval cp -r ../src ./eval cd ./eval || exit env > env.log diff --git a/model_zoo/official/cv/resnet/scripts/run_infer.sh b/model_zoo/official/cv/resnet/scripts/run_infer.sh index 34ae0fadadc..b73e956c18a 100644 --- a/model_zoo/official/cv/resnet/scripts/run_infer.sh +++ b/model_zoo/official/cv/resnet/scripts/run_infer.sh @@ -30,7 +30,7 @@ get_real_path(){ PATH1=$(get_real_path $1) PATH2=$(get_real_path $2) -CONFIG_FILE=$3 +CONFIG_FILE=$(get_real_path $3) if [ ! -d $PATH1 ] @@ -56,7 +56,7 @@ then rm -rf ./infer fi mkdir ./infer -cp ../*.yaml ./infer +cp ../config/*.yaml ./infer cp ../*.py ./infer cp *.sh ./infer cp -r ../src ./infer diff --git a/model_zoo/official/cv/resnet/scripts/run_infer_310.sh b/model_zoo/official/cv/resnet/scripts/run_infer_310.sh index d49002a575b..79ff34bb8d3 100644 --- a/model_zoo/official/cv/resnet/scripts/run_infer_310.sh +++ b/model_zoo/official/cv/resnet/scripts/run_infer_310.sh @@ -87,7 +87,7 @@ function preprocess_data() fi mkdir preprocess_Result BASE_PATH=$(dirname "$(dirname "$(readlink -f $0)")") - CONFIG_FILE="${BASE_PATH}/$1" + CONFIG_FILE="${BASE_PATH}/config/$1" python3.7 ../preprocess.py --data_path=$data_path --output_path=./preprocess_Result --config_path=$CONFIG_FILE &> preprocess.log } diff --git a/model_zoo/official/cv/resnet/scripts/run_parameter_server_train.sh b/model_zoo/official/cv/resnet/scripts/run_parameter_server_train.sh index e3dd2d6372a..0cd85f336cd 100644 --- a/model_zoo/official/cv/resnet/scripts/run_parameter_server_train.sh +++ b/model_zoo/official/cv/resnet/scripts/run_parameter_server_train.sh @@ -30,7 +30,7 @@ get_real_path(){ PATH1=$(get_real_path $1) PATH2=$(get_real_path $2) -CONFIG_FILE=$3 +CONFIG_FILE=$(get_real_path $3) if [ $# == 4 ] then @@ -71,7 +71,7 @@ export DEVICE_ID=0 export RANK_ID=0 rm -rf ./sched mkdir ./sched -cp ../*.yaml ./sched +cp ../config/*.yaml ./sched cp ../*.py ./sched cp *.sh ./sched cp -r ../src ./sched @@ -97,7 +97,7 @@ do export RANK_ID=$i rm -rf ./server_$i mkdir ./server_$i - cp ../*.yaml ./server_$i + cp ../config/*.yaml ./server_$i cp ../*.py ./server_$i cp *.sh ./server_$i cp -r ../src ./server_$i @@ -125,7 +125,7 @@ do export RANK_ID=$i rm -rf ./worker_$i mkdir ./worker_$i - cp ../*.yaml ./worker_$i + cp ../config/*.yaml ./worker_$i cp ../*.py ./worker_$i cp *.sh ./worker_$i cp -r ../src ./worker_$i diff --git a/model_zoo/official/cv/resnet/scripts/run_parameter_server_train_gpu.sh b/model_zoo/official/cv/resnet/scripts/run_parameter_server_train_gpu.sh index ba83f209644..38eac825e35 100755 --- a/model_zoo/official/cv/resnet/scripts/run_parameter_server_train_gpu.sh +++ b/model_zoo/official/cv/resnet/scripts/run_parameter_server_train_gpu.sh @@ -29,7 +29,7 @@ get_real_path(){ } PATH1=$(get_real_path $1) -CONFIG_FILE=$2 +CONFIG_FILE=$(get_real_path $2) if [ $# == 3 ] then PATH2=$(get_real_path $3) @@ -60,7 +60,7 @@ export MS_SCHED_PORT=8081 export MS_ROLE=MS_SCHED rm -rf ./sched mkdir ./sched -cp ../*.yaml ./sched +cp ../config/*.yaml ./sched cp ../*.py ./sched cp *.sh ./sched cp -r ../src ./sched @@ -85,7 +85,7 @@ for((i=0;i<$MS_SERVER_NUM;i++)); do rm -rf ./server_$i mkdir ./server_$i - cp ../*.yaml ./server_$i + cp ../config/*.yaml ./server_$i cp ../*.py ./server_$i cp *.sh ./server_$i cp -r ../src ./server_$i @@ -110,7 +110,7 @@ done export MS_ROLE=MS_WORKER rm -rf ./worker mkdir ./worker -cp ../*.yaml ./worker +cp ../config/*.yaml ./worker cp ../*.py ./worker cp *.sh ./worker cp -r ../src ./worker diff --git a/model_zoo/official/cv/resnet/scripts/run_standalone_train.sh b/model_zoo/official/cv/resnet/scripts/run_standalone_train.sh index 402e01a6869..a0381dbeafe 100755 --- a/model_zoo/official/cv/resnet/scripts/run_standalone_train.sh +++ b/model_zoo/official/cv/resnet/scripts/run_standalone_train.sh @@ -34,7 +34,7 @@ get_real_path(){ } PATH1=$(get_real_path $1) -CONFIG_FILE=$2 +CONFIG_FILE=$(get_real_path $2) if [ $# == 3 ] then PATH2=$(get_real_path $3) @@ -80,7 +80,7 @@ then rm -rf ./train fi mkdir ./train -cp ../*.yaml ./train +cp ../config/*.yaml ./train cp ../*.py ./train cp *.sh ./train cp -r ../src ./train diff --git a/model_zoo/official/cv/resnet/scripts/run_standalone_train_gpu.sh b/model_zoo/official/cv/resnet/scripts/run_standalone_train_gpu.sh index edb85580acb..581d5521911 100755 --- a/model_zoo/official/cv/resnet/scripts/run_standalone_train_gpu.sh +++ b/model_zoo/official/cv/resnet/scripts/run_standalone_train_gpu.sh @@ -34,7 +34,7 @@ get_real_path(){ } PATH1=$(get_real_path $1) -CONFIG_FILE=$2 +CONFIG_FILE=$(get_real_path $2) if [ $# == 3 ] then @@ -83,7 +83,7 @@ then rm -rf ./train fi mkdir ./train -cp ../*.yaml ./train +cp ../config/*.yaml ./train cp ../*.py ./train cp *.sh ./train cp -r ../src ./train diff --git a/model_zoo/official/cv/resnet/src/model_utils/config.py b/model_zoo/official/cv/resnet/src/model_utils/config.py index d8f6518f1ad..19678722f34 100644 --- a/model_zoo/official/cv/resnet/src/model_utils/config.py +++ b/model_zoo/official/cv/resnet/src/model_utils/config.py @@ -21,7 +21,7 @@ import argparse from pprint import pprint, pformat import yaml -_config_path = "./resnet50_cifar10_config.yaml" +_config_path = "./config/resnet50_cifar10_config.yaml" class Config: """ @@ -118,7 +118,7 @@ def get_config(): parser = argparse.ArgumentParser(description="default name", add_help=False) current_dir = os.path.dirname(os.path.abspath(__file__)) parser.add_argument("--config_path", type=str, default=os.path.join(current_dir, \ - "../resnet50_cifar10_config.yaml"), help="Config file path") + "../config/resnet50_cifar10_config.yaml"), help="Config file path") path_args, _ = parser.parse_known_args() default, helper, choices = parse_yaml(path_args.config_path) pprint(default) diff --git a/model_zoo/official/cv/resnet/src/resnet.py b/model_zoo/official/cv/resnet/src/resnet.py index 0405e38cafa..54174d4ad7d 100755 --- a/model_zoo/official/cv/resnet/src/resnet.py +++ b/model_zoo/official/cv/resnet/src/resnet.py @@ -23,7 +23,7 @@ from mindspore.ops import functional as F from mindspore.common.tensor import Tensor -def _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size): +def conv_variance_scaling_initializer(in_channel, out_channel, kernel_size): fan_in = in_channel * kernel_size * kernel_size scale = 1.0 scale /= max(1., fan_in) @@ -108,7 +108,7 @@ def kaiming_uniform(inputs_shape, a=0., mode='fan_in', nonlinearity='leaky_relu' def _conv3x3(in_channel, out_channel, stride=1, use_se=False, res_base=False): if use_se: - weight = _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=3) + weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=3) else: weight_shape = (out_channel, in_channel, 3, 3) weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu')) @@ -121,7 +121,7 @@ def _conv3x3(in_channel, out_channel, stride=1, use_se=False, res_base=False): def _conv1x1(in_channel, out_channel, stride=1, use_se=False, res_base=False): if use_se: - weight = _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=1) + weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=1) else: weight_shape = (out_channel, in_channel, 1, 1) weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu')) @@ -134,7 +134,7 @@ def _conv1x1(in_channel, out_channel, stride=1, use_se=False, res_base=False): def _conv7x7(in_channel, out_channel, stride=1, use_se=False, res_base=False): if use_se: - weight = _conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=7) + weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=7) else: weight_shape = (out_channel, in_channel, 7, 7) weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu')) @@ -207,7 +207,7 @@ class ResidualBlock(nn.Cell): self.bn2 = _bn(channel) self.conv3 = _conv1x1(channel, out_channel, stride=1, use_se=self.use_se) - self.bn3 = _bn_last(out_channel) + self.bn3 = _bn(out_channel) if self.se_block: self.se_global_pool = P.ReduceMean(keep_dims=False) self.se_dense_0 = _fc(out_channel, int(out_channel / 4), use_se=self.use_se) diff --git a/model_zoo/official/cv/resnet/train.py b/model_zoo/official/cv/resnet/train.py index e1440dd65e0..7048543b7ac 100755 --- a/model_zoo/official/cv/resnet/train.py +++ b/model_zoo/official/cv/resnet/train.py @@ -14,9 +14,10 @@ # ============================================================================ """train resnet.""" import os +import numpy as np from mindspore import context from mindspore import Tensor -from mindspore.nn.optim import Momentum, thor +from mindspore.nn.optim import Momentum, thor, LARS from mindspore.train.model import Model from mindspore.context import ParallelMode from mindspore.train.train_thor import ConvertModelUtils @@ -37,6 +38,7 @@ from src.metric import DistAccuracy, ClassifyCorrectCell from src.model_utils.config import config from src.model_utils.moxing_adapter import moxing_wrapper from src.model_utils.device_adapter import get_rank_id, get_device_num +from src.resnet import conv_variance_scaling_initializer set_seed(1) @@ -130,13 +132,26 @@ def init_weight(net): else: for _, cell in net.cells_and_names(): if isinstance(cell, nn.Conv2d): - cell.weight.set_data(weight_init.initializer(weight_init.XavierUniform(), - cell.weight.shape, - cell.weight.dtype)) + if config.conv_init == "XavierUniform": + cell.weight.set_data(weight_init.initializer(weight_init.XavierUniform(), + cell.weight.shape, + cell.weight.dtype)) + elif config.conv_init == "TruncatedNormal": + weight = conv_variance_scaling_initializer(cell.in_channels, + cell.out_channels, + cell.kernel_size[0]) + cell.weight.set_data(weight) if isinstance(cell, nn.Dense): - cell.weight.set_data(weight_init.initializer(weight_init.TruncatedNormal(), - cell.weight.shape, - cell.weight.dtype)) + if config.dense_init == "TruncatedNormal": + cell.weight.set_data(weight_init.initializer(weight_init.TruncatedNormal(), + cell.weight.shape, + cell.weight.dtype)) + elif config.dense_init == "RandomNormal": + in_channel = cell.in_channels + out_channel = cell.out_channels + weight = np.random.normal(loc=0, scale=0.01, size=out_channel * in_channel) + weight = Tensor(np.reshape(weight, (out_channel, in_channel)), dtype=cell.weight.dtype) + cell.weight.set_data(weight) def init_lr(step_size): """init lr""" @@ -163,6 +178,21 @@ def init_loss_scale(): loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') return loss + +def init_group_params(net): + decayed_params = [] + no_decayed_params = [] + for param in net.trainable_params(): + if 'beta' not in param.name and 'gamma' not in param.name and 'bias' not in param.name: + decayed_params.append(param) + else: + no_decayed_params.append(param) + + group_params = [{'params': decayed_params, 'weight_decay': config.weight_decay}, + {'params': no_decayed_params}, + {'order_params': net.trainable_params()}] + return group_params + def run_eval(target, model, ckpt_save_dir, cb): """run_eval""" if config.run_eval: @@ -205,18 +235,11 @@ def train_net(): init_weight(net=net) lr = Tensor(init_lr(step_size=step_size)) # define opt - decayed_params = [] - no_decayed_params = [] - for param in net.trainable_params(): - if 'beta' not in param.name and 'gamma' not in param.name and 'bias' not in param.name: - decayed_params.append(param) - else: - no_decayed_params.append(param) - - group_params = [{'params': decayed_params, 'weight_decay': config.weight_decay}, - {'params': no_decayed_params}, - {'order_params': net.trainable_params()}] + group_params = init_group_params(net) opt = Momentum(group_params, lr, config.momentum, loss_scale=config.loss_scale) + if config.optimizer == "LARS": + opt = LARS(opt, epsilon=config.lars_epsilon, coefficient=config.lars_coefficient, + lars_filter=lambda x: 'beta' not in x.name and 'gamma' not in x.name and 'bias' not in x.name) loss = init_loss_scale() loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) dist_eval_network = ClassifyCorrectCell(net) if config.run_distribute else None diff --git a/model_zoo/official/cv/retinaface_resnet50/src/network.py b/model_zoo/official/cv/retinaface_resnet50/src/network.py index 337a4e9acac..3be88a8da28 100644 --- a/model_zoo/official/cv/retinaface_resnet50/src/network.py +++ b/model_zoo/official/cv/retinaface_resnet50/src/network.py @@ -19,7 +19,6 @@ import numpy as np import mindspore import mindspore.nn as nn -from mindspore.ops import functional as F from mindspore.ops import operations as P from mindspore.ops import composite as C from mindspore import context, Tensor @@ -524,4 +523,5 @@ class TrainingWrapper(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/retinanet/src/retinanet.py b/model_zoo/official/cv/retinanet/src/retinanet.py index 6e9c4f312b6..58557d8dbd8 100644 --- a/model_zoo/official/cv/retinanet/src/retinanet.py +++ b/model_zoo/official/cv/retinanet/src/retinanet.py @@ -316,7 +316,8 @@ class TrainingWrapper(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class resnet(nn.Cell): """ diff --git a/model_zoo/official/cv/shufflenetv1/eval.py b/model_zoo/official/cv/shufflenetv1/eval.py index cc267910fb9..9941a8443c6 100644 --- a/model_zoo/official/cv/shufflenetv1/eval.py +++ b/model_zoo/official/cv/shufflenetv1/eval.py @@ -39,7 +39,7 @@ def test(): # step_size = dataset.get_dataset_size() # define net - net = shufflenetv1(model_size=config.model_size) + net = shufflenetv1(model_size=config.model_size, n_class=config.num_classes) # load checkpoint param_dict = load_checkpoint(config.ckpt_path) diff --git a/model_zoo/official/cv/shufflenetv1/export.py b/model_zoo/official/cv/shufflenetv1/export.py index dec005028b6..5f5709d8c8b 100644 --- a/model_zoo/official/cv/shufflenetv1/export.py +++ b/model_zoo/official/cv/shufflenetv1/export.py @@ -38,7 +38,7 @@ if config.device_target == "Ascend": @moxing_wrapper(pre_process=modelarts_pre_process) def model_export(): - net = ShuffleNetV1(model_size=config.model_size) + net = ShuffleNetV1(model_size=config.model_size, n_class=config.num_classes) param_dict = load_checkpoint(config.ckpt_path) load_param_into_net(net, param_dict) diff --git a/model_zoo/official/cv/shufflenetv1/train.py b/model_zoo/official/cv/shufflenetv1/train.py index 048f9bf030c..0e591e10b59 100644 --- a/model_zoo/official/cv/shufflenetv1/train.py +++ b/model_zoo/official/cv/shufflenetv1/train.py @@ -58,7 +58,7 @@ def train(): context.set_context(device_id=config.device_id) # define network - net = ShuffleNetV1(model_size=config.model_size) + net = ShuffleNetV1(model_size=config.model_size, n_class=config.num_classes) # define loss loss = CrossEntropySmooth(sparse=True, reduction="mean", smooth_factor=config.label_smooth_factor, diff --git a/model_zoo/official/cv/ssd/src/ssd.py b/model_zoo/official/cv/ssd/src/ssd.py index 7108240ffc5..171c9178054 100644 --- a/model_zoo/official/cv/ssd/src/ssd.py +++ b/model_zoo/official/cv/ssd/src/ssd.py @@ -525,7 +525,8 @@ class TrainingWrapper(nn.Cell): if self.use_global_norm: grads = self.hyper_map(F.partial(grad_scale, F.scalar_to_array(self.sens)), grads) grads = C.clip_by_global_norm(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class SSDWithMobileNetV2(nn.Cell): diff --git a/model_zoo/official/cv/unet/README.md b/model_zoo/official/cv/unet/README.md index f369e865a4d..00736eb65cb 100644 --- a/model_zoo/official/cv/unet/README.md +++ b/model_zoo/official/cv/unet/README.md @@ -98,12 +98,12 @@ If set `split`=1.0, you should split train dataset and val dataset by directorie We support script to convert COCO and a Cell_Nuclei dataset used in used in [Unet++ original paper](https://arxiv.org/abs/1912.05074) to mulyi-class dataset format. -1. Select `*yaml` in `unet`. +1. Select `*.yaml` file under `unet` and modify the parameters as needed. 2. run script to convert to mulyi-class dataset format: ```shell -python preprocess_dataset.py -d /data/save_data_path +python preprocess_dataset.py --config_path path/unet/*.yaml --data_path /data/save_data_path ``` ## [Environment Requirements](#contents) @@ -481,7 +481,7 @@ Export MindIR on local Before exporting, you need to modify the parameter in the configuration — checkpoint_file_path and batch_ Size . checkpoint_ file_ Path is the CKPT file path, batch_ Size is set to 1. ```shell -python export.py --config_path=[CONFIG_PATH] +python export.py --config_path=[CONFIG_PATH] --checkpoint_file_path=[model_ckpt_path] --file_name=[air_model_name] --file_format=AIR ``` The checkpoint_file_path parameter is required, diff --git a/model_zoo/official/cv/unet/README_CN.md b/model_zoo/official/cv/unet/README_CN.md index 7f599f4c491..cd2641c329a 100644 --- a/model_zoo/official/cv/unet/README_CN.md +++ b/model_zoo/official/cv/unet/README_CN.md @@ -102,12 +102,12 @@ UNet++是U-Net的增强版本,使用了新的跨层链接方å¼å’Œæ·±å±‚ç›‘ç£ æˆ‘ä»¬æ供了一个脚本æ¥å°† COCO å’Œ Cell_Nuclei æ•°æ®é›†ï¼ˆ[Unet++ 原论文](https://arxiv.org/abs/1912.05074) 中使用)转æ¢ä¸ºmulti-classæ ¼å¼ã€‚ -1. 在`src/model_utils/`下选择对应的yaml文件。 +1. 在unet下选择*.yaml文件,根æ®éœ€è¦ä¿®æ”¹å‚数。 2. è¿è¡Œè½¬æ¢è„šæœ¬: ```shell -python preprocess_dataset.py -d /data/save_data_path +python preprocess_dataset.py --config_path path/unet/*.yaml --data_path /data/save_data_path ``` ## 环境è¦æ±‚ @@ -480,7 +480,7 @@ python eval.py --data_path=/path/to/data/ --checkpoint_file_path=/path/to/checkp 本地导出mindir ```shell -python export.py --config_path=[CONFIG_PATH] +python export.py --config_path=[CONFIG_PATH] --checkpoint_file_path=[model_ckpt_path] --file_name=[air_model_name] --file_format=AIR ``` ModelArts导出mindir diff --git a/model_zoo/official/cv/unet/preprocess_dataset.py b/model_zoo/official/cv/unet/preprocess_dataset.py index 494b348a83f..a630fa652e8 100644 --- a/model_zoo/official/cv/unet/preprocess_dataset.py +++ b/model_zoo/official/cv/unet/preprocess_dataset.py @@ -19,7 +19,7 @@ Images within one folder is an image, the image file named `"image.png"`, the ma import os import cv2 import numpy as np -from model_zoo.official.cv.unet.src.model_utils.config import config +from src.model_utils.config import config def annToMask(ann, height, width): """Convert annotation to RLE and then to binary mask.""" diff --git a/model_zoo/official/cv/vgg16/README.md b/model_zoo/official/cv/vgg16/README.md index f59c86dab3a..902fc41e3c9 100644 --- a/model_zoo/official/cv/vgg16/README.md +++ b/model_zoo/official/cv/vgg16/README.md @@ -27,6 +27,7 @@ - [Export MindIR](#export-mindir) - [Infer on Ascend310](#infer-on-ascend310) - [result](#result) + - [Post Training Quantization](#post-training-quantization) - [Model Description](#model-description) - [Performance](#performance) - [Training Performance](#training-performance) @@ -530,6 +531,40 @@ Inference result is saved in current path, you can find result like this in acc. 'acc': 0.92 ``` +### [Post Training Quantization](#contents) + +Relative executing script files reside in the directory "ascend310_quant_infer". Please implement following steps sequentially to complete post quantization. +Current quantization project bases on CIFAR-10 dataset. + +1. Generate data of .bin format required for AIR model inference at Ascend310 platform. + +```shell +python export_bin.py --config_path [YMAL CONFIG PATH] --data_dir [DATA DIR] --result_path [RESULT PATH] +``` + +2. Export quantized AIR model. + +Post quantization of model requires special toolkits for exporting quantized AIR model. Please refer to [official website](https://www.hiascend.com/software/cann/community). + +```shell +python post_quant.py --config_path [YMAL CONFIG PATH] --ckpt_file [CKPT_PATH] --data_dir [DATASET PATH] +``` + +The quantized AIR file will be stored as "./results/vgg_quant.air". + +3. Implement inference at Ascend310 platform. + +```shell +# Ascend310 quant inference +bash run_quant_infer.sh [AIR_PATH] [DATA_PATH] [LABEL_PATH] +``` + +Inference result is saved in current path, you can find result like this in acc.log file. + +```bash +'acc': 0.91 +``` + ## [Model Description](#contents) ### [Performance](#contents) diff --git a/model_zoo/official/cv/vgg16/README_CN.md b/model_zoo/official/cv/vgg16/README_CN.md index 4efbb025f44..740a1d3b57a 100644 --- a/model_zoo/official/cv/vgg16/README_CN.md +++ b/model_zoo/official/cv/vgg16/README_CN.md @@ -29,6 +29,7 @@ - [导出MindIR](#导出mindir) - [在Ascend310执行推ç†](#在ascend310执行推ç†) - [结果](#结果) + - [训练åŽé‡åŒ–推ç†](#训练åŽé‡åŒ–推ç†) - [模型æè¿°](#模型æè¿°) - [性能](#性能) - [训练性能](#训练性能) @@ -533,6 +534,39 @@ bash run_infer_310.sh [MINDIR_PATH] [DATASET_NAME] [DATASET_PATH] [NEED_PREPROCE 'acc': 0.92 ``` +### [训练åŽé‡åŒ–推ç†](#contents) + +训练åŽé‡åŒ–推ç†çš„相关执行脚本文件在"ascend310_quant_infer"目录下,ä¾æ¬¡æ‰§è¡Œä»¥ä¸‹æ­¥éª¤å®žçŽ°è®­ç»ƒåŽé‡åŒ–推ç†ã€‚本训练åŽé‡åŒ–工程基于CIFAR-10æ•°æ®é›†ã€‚ + +1ã€ç”ŸæˆAscend310å¹³å°AIR模型推ç†éœ€è¦çš„.binæ ¼å¼æ•°æ®ã€‚ + +```shell +python export_bin.py --config_path [YMAL CONFIG PATH] --data_dir [DATA DIR] --result_path [RESULT PATH] +``` + +2ã€å¯¼å‡ºè®­ç»ƒåŽé‡åŒ–çš„AIRæ ¼å¼æ¨¡åž‹ã€‚ + +导出训练åŽé‡åŒ–模型需è¦é…套的é‡åŒ–工具包,å‚考[官方地å€](https://www.hiascend.com/software/cann/community) + +```shell +python post_quant.py --config_path [YMAL_CONFIG_PATH] --ckpt_file [CKPT_PATH] --data_dir [DATASET PATH] +``` + +导出的模型会存储在./result/vgg_quant.air。 + +3ã€åœ¨Ascend310执行推ç†é‡åŒ–模型。 + +```shell +# Ascend310 inference +bash run_quant_infer.sh [AIR_PATH] [DATA_PATH] [LABEL_PATH] +``` + +推ç†ç»“æžœä¿å­˜åœ¨è„šæœ¬æ‰§è¡Œçš„当å‰è·¯å¾„,å¯ä»¥åœ¨acc.log中看到精度计算结果。 + +```bash +'acc': 0.91 +``` + ## 模型æè¿° ### 性能 diff --git a/model_zoo/official/cv/vgg16/ascend310_quant_infer/run_quant_infer.sh b/model_zoo/official/cv/vgg16/ascend310_quant_infer/run_quant_infer.sh index 31bba45de8e..56f958ea641 100644 --- a/model_zoo/official/cv/vgg16/ascend310_quant_infer/run_quant_infer.sh +++ b/model_zoo/official/cv/vgg16/ascend310_quant_infer/run_quant_infer.sh @@ -16,7 +16,6 @@ if [ $# -lt 3 ]; then echo "Usage: bash run_quant_infer.sh [AIR_PATH] [DATA_PATH] [LABEL_PATH]" - echo "Example: bash run_quant_infer.sh ./vgg_quant.air ./00_data ./cifar10_label_ids.npy" exit 1 fi diff --git a/model_zoo/official/cv/warpctc/src/warpctc_for_train.py b/model_zoo/official/cv/warpctc/src/warpctc_for_train.py index bc261c01a7e..82671e15e92 100755 --- a/model_zoo/official/cv/warpctc/src/warpctc_for_train.py +++ b/model_zoo/official/cv/warpctc/src/warpctc_for_train.py @@ -105,4 +105,5 @@ class TrainOneStepCellWithGradClip(Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/yolov3_darknet53/src/yolo.py b/model_zoo/official/cv/yolov3_darknet53/src/yolo.py index b5cee676427..bd49548c69c 100644 --- a/model_zoo/official/cv/yolov3_darknet53/src/yolo.py +++ b/model_zoo/official/cv/yolov3_darknet53/src/yolo.py @@ -444,4 +444,5 @@ class TrainingWrapper(nn.Cell): grads = self.grad(self.network, weights)(*args, sens) if self.reducer_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py b/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py index 81a77d855f2..4e9747be0b8 100644 --- a/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py +++ b/model_zoo/official/cv/yolov3_darknet53_quant/src/yolo.py @@ -436,4 +436,5 @@ class TrainingWrapper(nn.Cell): grads = self.grad(self.network, weights)(*args, sens) if self.reducer_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/cv/yolov3_resnet18/scripts/run_eval.sh b/model_zoo/official/cv/yolov3_resnet18/scripts/run_eval.sh index e0ccd093497..804d5dc39f2 100644 --- a/model_zoo/official/cv/yolov3_resnet18/scripts/run_eval.sh +++ b/model_zoo/official/cv/yolov3_resnet18/scripts/run_eval.sh @@ -27,4 +27,4 @@ export RANK_SIZE=1 export DEVICE_ID=$1 export RANK_ID=$1 -python eval.py --ckpt_path=$2 --mindrecord_dir=$3 --image_dir=$4 --anno_path=$5 +python eval.py --ckpt_path=$2 --eval_mindrecord_dir=$3 --image_dir=$4 --anno_path=$5 diff --git a/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py b/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py index f1bfbe14550..91ac4081e4b 100644 --- a/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py +++ b/model_zoo/official/cv/yolov3_resnet18/src/yolov3.py @@ -672,7 +672,8 @@ class TrainingWrapper(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class YoloBoxScores(nn.Cell): diff --git a/model_zoo/official/cv/yolov4/src/yolo.py b/model_zoo/official/cv/yolov4/src/yolo.py index f375f750b11..074016abeba 100644 --- a/model_zoo/official/cv/yolov4/src/yolo.py +++ b/model_zoo/official/cv/yolov4/src/yolo.py @@ -184,12 +184,12 @@ class YOLOv4(nn.Cell): con6 = self.conv6(con5) con7 = self.conv7(con6) - ups1 = P.ResizeNearestNeighbor((img_hight / 16, img_width / 16))(con7) + ups1 = P.ResizeNearestNeighbor((img_hight // 16, img_width // 16))(con7) con8 = self.conv8(feature_map2) con9 = self.concat((ups1, con8)) con10, _ = self.backblock0(con9) con11 = self.conv9(con10) - ups2 = P.ResizeNearestNeighbor((img_hight / 8, img_width / 8))(con11) + ups2 = P.ResizeNearestNeighbor((img_hight // 8, img_width // 8))(con11) con12 = self.conv10(feature_map1) con13 = self.concat((ups2, con12)) con14, small_object_output = self.backblock1(con13) @@ -515,7 +515,8 @@ class TrainingWrapper(nn.Cell): grads = self.grad(self.network, weights)(*args, sens) if self.reducer_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class Giou(nn.Cell): diff --git a/model_zoo/official/cv/yolov5/README.md b/model_zoo/official/cv/yolov5/README.md index d666c644179..4203e2debf8 100644 --- a/model_zoo/official/cv/yolov5/README.md +++ b/model_zoo/official/cv/yolov5/README.md @@ -378,7 +378,7 @@ YOLOv5 on 118K images(The annotation and data format must be the same as coco201 | outputs | heatmaps | | Loss | 53 | | Speed | 1p 55 img/s 8p 440 img/s(shape=640) | -| Total time | 80h | +| Total time | 24h(8pcs) | | Checkpoint for Fine tuning | 58M (.ckpt file) | | Scripts | | diff --git a/model_zoo/official/cv/yolov5/README_CN.md b/model_zoo/official/cv/yolov5/README_CN.md index d1ac34047eb..c9d7a5861ec 100644 --- a/model_zoo/official/cv/yolov5/README_CN.md +++ b/model_zoo/official/cv/yolov5/README_CN.md @@ -45,22 +45,22 @@ YOLOv5作为先进的检测器,它比所有å¯ç”¨çš„替代检测器更快(FP - 目录结构如下,由用户定义目录和文件的å称: ```shell - ©À©¤©¤ dataset - ©À©¤©¤ YOLOv5 - ©À©¤©¤ annotations - ©¦ ©À©¤ train.json - ©¦ ©¸©¤ val.json - ©À©¤ images - ©À©¤ train - ©¦ ©¸©¤images - ©¦ ©À©¤picture1.jpg - ©¦ ©À©¤ ... - ©¦ ©¸©¤picturen.jpg - ©¸©¤ val - ©¸©¤images - ©À©¤picture1.jpg - ©À©¤ ... - ©¸©¤picturen.jpg + ├── dataset + ├── YOLOv5 + ├── annotations + │ ├─ train.json + │ └─ val.json + ├─ images + ├─ train + │ └─images + │ ├─picture1.jpg + │ ├─ ... + │ └─picturen.jpg + └─ val + └─images + ├─picture1.jpg + ├─ ... + └─picturen.jpg ``` 建议用户使用MS COCOæ•°æ®é›†æ¥ä½“验模型, @@ -125,34 +125,34 @@ bash run_eval.sh dataset/xxx checkpoint/xxx.ckpt ## [脚本和示例代ç ](#目录) ```python -©¸©¤yolov5 - ©À©¤README.md - ©À©¤mindspore_hub_conf.md # Mindspore Hubé…ç½® - ©À©¤ascend310_infer # 用于310æŽ¨ç† - ©À©¤scripts - ©À©¤run_standalone_train.sh # 在Ascend中å¯åŠ¨å•æœºè®­ç»ƒï¼ˆ1å¡ï¼‰ - ©À©¤run_distribute_train.sh # 在Ascend中å¯åŠ¨åˆ†å¸ƒå¼è®­ç»ƒï¼ˆ8å¡ï¼‰ - ©À©¤run_infer_310.sh # 在Ascend中å¯åŠ¨310æŽ¨ç† - ©¸©¤run_eval.sh # 在Ascend中å¯åŠ¨è¯„ä¼° - ©À©¤src - ©À©¤__init__.py # Pythonåˆå§‹åŒ–文件 - ©À©¤config.py # å‚æ•°é…ç½® - ©À©¤yolov5_backbone.py # 网络骨干 - ©À©¤distributed_sampler.py # æ•°æ®é›†è¿­ä»£å™¨ - ©À©¤initializer.py # å‚æ•°åˆå§‹åŒ–器 - ©À©¤logger.py # 日志函数 - ©À©¤loss.py # æŸå¤±å‡½æ•° - ©À©¤lr_scheduler.py # 生æˆå­¦ä¹ çŽ‡ - ©À©¤transforms.py # 预处ç†æ•°æ® - ©À©¤util.py # 工具函数 - ©À©¤yolo.py # YOLOv5网络 - ©À©¤yolo_dataset.py # 为YOLOv5创建数æ®é›† +└─yolov5 + ├─README.md + ├─mindspore_hub_conf.md # Mindspore Hubé…ç½® + ├─ascend310_infer # 用于310æŽ¨ç† + ├─scripts + ├─run_standalone_train.sh # 在Ascend中å¯åŠ¨å•æœºè®­ç»ƒï¼ˆ1å¡ï¼‰ + ├─run_distribute_train.sh # 在Ascend中å¯åŠ¨åˆ†å¸ƒå¼è®­ç»ƒï¼ˆ8å¡ï¼‰ + ├─run_infer_310.sh # 在Ascend中å¯åŠ¨310æŽ¨ç† + ├─run_eval.sh # 在Ascend中å¯åŠ¨è¯„ä¼° + ├─src + ├─__init__.py # Pythonåˆå§‹åŒ–文件 + ├─config.py # å‚æ•°é…ç½® + ├─yolov5_backbone.py # 网络骨干 + ├─distributed_sampler.py # æ•°æ®é›†è¿­ä»£å™¨ + ├─initializer.py # å‚æ•°åˆå§‹åŒ–器 + ├─logger.py # 日志函数 + ├─loss.py # æŸå¤±å‡½æ•° + ├─lr_scheduler.py # 生æˆå­¦ä¹ çŽ‡ + ├─transforms.py # 预处ç†æ•°æ® + ├─util.py # 工具函数 + ├─yolo.py # YOLOv5网络 + ├─yolo_dataset.py # 为YOLOv5创建数æ®é›† - ©À©¤eval.py # 评估验è¯ç»“æžœ - ©À©¤export.py # å°†MindSpore模型转æ¢ä¸ºAIR模型 - ©À©¤preprocess.py # 310推ç†å‰å¤„ç†è„šæœ¬ - ©À©¤postprocess.py # 310推ç†åŽå¤„ç†è„šæœ¬ - ©¸©¤train.py # 训练网络 + ├─eval.py # 评估验è¯ç»“æžœ + ├─export.py # å°†MindSpore模型转æ¢ä¸ºAIR模型 + ├─preprocess.py # 310推ç†å‰å¤„ç†è„šæœ¬ + ├─postprocess.py # 310推ç†åŽå¤„ç†è„šæœ¬ + ├─train.py # 训练网络 ``` ## [脚本å‚æ•°](#目录) @@ -378,7 +378,7 @@ YOLOv5应用于118000张图åƒä¸Šï¼ˆæ ‡æ³¨å’Œæ•°æ®æ ¼å¼å¿…须与COCO 2017相 |输出|heatmaps | | æŸå¤± | 53 | |速度| 1å¡ï¼š55 img/sï¼›8å¡ï¼š440 img/s(shape=640)| -| 总时长 | 80å°æ—¶ | +| 总时长 | 24å°æ—¶(8å¡) | | 微调检查点 | 58M (.ckpt文件) | |脚本| | diff --git a/model_zoo/official/cv/yolov5/src/yolo.py b/model_zoo/official/cv/yolov5/src/yolo.py index c881fd6ce00..c514fb81c28 100644 --- a/model_zoo/official/cv/yolov5/src/yolo.py +++ b/model_zoo/official/cv/yolov5/src/yolo.py @@ -427,7 +427,8 @@ class TrainingWrapper(nn.Cell): grads = self.grad(self.network, weights)(*args, sens) if self.reducer_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class Giou(nn.Cell): diff --git a/model_zoo/official/gnn/gat/src/utils.py b/model_zoo/official/gnn/gat/src/utils.py index c7bae8c8b86..441ef7c48ee 100644 --- a/model_zoo/official/gnn/gat/src/utils.py +++ b/model_zoo/official/gnn/gat/src/utils.py @@ -18,7 +18,6 @@ from mindspore.common.parameter import ParameterTuple from mindspore import Tensor from mindspore.common import dtype as mstype from mindspore.ops import composite as C -from mindspore.ops import functional as F from mindspore.ops import operations as P @@ -150,7 +149,8 @@ class TrainOneStepCell(nn.Cell): loss = self.network(feature, biases) sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) grads = self.grad(self.network, weights)(feature, biases, sens) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class TrainGAT(nn.Cell): diff --git a/model_zoo/official/nlp/bert/README.md b/model_zoo/official/nlp/bert/README.md index ff091011be9..c2753423fad 100644 --- a/model_zoo/official/nlp/bert/README.md +++ b/model_zoo/official/nlp/bert/README.md @@ -786,3 +786,15 @@ In run_pretrain.py, we set a random seed to make sure that each node has the sam # [ModelZoo Homepage](#contents) Please check the official [homepage](https://gitee.com/mindspore/mindspore/tree/master/model_zoo). + +# FAQ + +- **Q: How to resolve the continually overflow?** + + **A**: Continually overflow is usually caused by using too high learning rate. + You could try lower `learning_rate` to use lower base learning rate or higher `power` to make learning rate decrease faster in config yaml. + +- **Q: Why the training process failed with error for the shape can not match?** + **A**: This is usually caused by the config `seq_length` of model can't match the dataset. You could check and modified the `seq_length` in yaml config according to the dataset you used. + The parameter of model won't change with `seq_length`, the shapes of parameter only depends on model config `max_position_embeddings`. + diff --git a/model_zoo/official/nlp/bert/README_CN.md b/model_zoo/official/nlp/bert/README_CN.md index 76718d870fb..26cb64eb178 100644 --- a/model_zoo/official/nlp/bert/README_CN.md +++ b/model_zoo/official/nlp/bert/README_CN.md @@ -744,3 +744,11 @@ run_pretrain.py中设置了éšæœºç§å­ï¼Œç¡®ä¿åˆ†å¸ƒå¼è®­ç»ƒä¸­æ¯ä¸ªèŠ‚点 # ModelZoo主页 请æµè§ˆå®˜ç½‘[主页](https://gitee.com/mindspore/mindspore/tree/master/model_zoo)。 + +# FAQ + +- **Q: è¿è¡Œè¿‡ç¨‹ä¸­å‘生æŒç»­æº¢å‡ºæ€Žä¹ˆåŠžï¼Ÿ** + **A**: æŒç»­æº¢å‡ºé€šå¸¸æ˜¯å› ä¸ºä½¿ç”¨äº†è¾ƒé«˜çš„学习率导致训练ä¸æ”¶æ•›ã€‚å¯ä»¥è€ƒè™‘修改yamlé…置文件中的å‚数,调低`learning_rate`æ¥é™ä½Žåˆå§‹å­¦ä¹ çŽ‡æˆ–æ高`power`加速学习率衰å‡ã€‚ + +- **Q: è¿è¡ŒæŠ¥é”™shapeä¸åŒ¹é…是什么问题?** + **A**: Bert模型中的shapeä¸åŒ¹é…通常是因为模型å‚æ•°é…置和使用的数æ®é›†è§„æ ¼ä¸åŒ¹é…,主è¦æ˜¯å¥é•¿é—®é¢˜ï¼Œå¯ä»¥è€ƒè™‘修改`seq_length`å‚æ•°æ¥åŒ¹é…所使用的具体数æ®é›†ã€‚改å˜è¯¥å‚æ•°ä¸å½±å“æƒé‡çš„规格,æƒé‡çš„规格仅与`max_position_embeddings`å‚数有关。 \ No newline at end of file diff --git a/model_zoo/official/nlp/bert/src/bert_for_finetune.py b/model_zoo/official/nlp/bert/src/bert_for_finetune.py index 210339ccd01..b59f310cbd7 100644 --- a/model_zoo/official/nlp/bert/src/bert_for_finetune.py +++ b/model_zoo/official/nlp/bert/src/bert_for_finetune.py @@ -152,12 +152,9 @@ class BertFinetuneCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond) class BertSquadCell(nn.Cell): """ @@ -245,12 +242,9 @@ class BertSquadCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond) class BertCLS(nn.Cell): """ diff --git a/model_zoo/official/nlp/bert/src/bert_for_pre_training.py b/model_zoo/official/nlp/bert/src/bert_for_pre_training.py index 36fca77faef..433ef03c99b 100644 --- a/model_zoo/official/nlp/bert/src/bert_for_pre_training.py +++ b/model_zoo/official/nlp/bert/src/bert_for_pre_training.py @@ -311,8 +311,8 @@ class BertTrainOneStepCell(nn.TrainOneStepCell): if self.enable_clip_grad: grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads) grads = self.grad_reducer(grads) - succ = self.optimizer(grads) - return F.depend(loss, succ) + self.optimizer(grads) + return loss grad_scale = C.MultitypeFuncGraph("grad_scale") @@ -400,12 +400,9 @@ class BertTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell): @@ -475,9 +472,8 @@ class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell) overflow = cond if self.loss_scaling_manager is not None: overflow = self.loss_scaling_manager(scaling_sens, cond) - succ = self.optimizer(grads, overflow) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + self.optimizer(grads, overflow) + return (loss, cond, scaling_sens) cast = P.Cast() add_grads = C.MultitypeFuncGraph("add_grads") @@ -634,9 +630,7 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell): accu_overflow = self.select(overflow, self.one, self.zero) self.accu_overflow = self.select(is_accu_step, accu_overflow, self.zero) - if is_accu_step: - succ = False - else: + if not is_accu_step: # apply grad reducer on grads grads = self.grad_reducer(self.accu_grads) scaling = scaling_sens * self.degree * self.accumulation_steps @@ -653,13 +647,10 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell): overflow = self.reshape(overflow, (())) if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, overflow) - if overflow: - succ = False - else: - succ = self.optimizer(grads) + if not overflow: + self.optimizer(grads) - ret = (mean_loss, overflow, scaling_sens) - return F.depend(ret, succ) + return (mean_loss, overflow, scaling_sens) class BertTrainAccumulationAllReduceEachWithLossScaleCell(nn.Cell): diff --git a/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py b/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py index 58770011b75..6b845d28da5 100644 --- a/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py +++ b/model_zoo/official/nlp/bert_thor/src/bert_for_pre_training.py @@ -311,8 +311,8 @@ class BertTrainOneStepCell(nn.TrainOneStepCell): if self.enable_clip_grad: grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads) grads = self.grad_reducer(grads) - succ = self.optimizer(grads) - return F.depend(loss, succ) + self.optimizer(grads) + return loss grad_scale = C.MultitypeFuncGraph("grad_scale") @@ -400,12 +400,9 @@ class BertTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell): @@ -475,9 +472,8 @@ class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell) overflow = cond if self.loss_scaling_manager is not None: overflow = self.loss_scaling_manager(scaling_sens, cond) - succ = self.optimizer(grads, overflow) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + self.optimizer(grads, overflow) + return (loss, cond, scaling_sens) cast = P.Cast() add_grads = C.MultitypeFuncGraph("add_grads") @@ -634,9 +630,7 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell): accu_overflow = self.select(overflow, self.one, self.zero) self.accu_overflow = self.select(is_accu_step, accu_overflow, self.zero) - if is_accu_step: - succ = False - else: + if not is_accu_step: # apply grad reducer on grads grads = self.grad_reducer(self.accu_grads) scaling = scaling_sens * self.degree * self.accumulation_steps @@ -653,13 +647,10 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell): overflow = self.reshape(overflow, (())) if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, overflow) - if overflow: - succ = False - else: - succ = self.optimizer(grads) + if not overflow: + self.optimizer(grads) - ret = (mean_loss, overflow, scaling_sens) - return F.depend(ret, succ) + return (mean_loss, overflow, scaling_sens) class BertTrainAccumulationAllReduceEachWithLossScaleCell(nn.Cell): diff --git a/model_zoo/official/nlp/cpm/src/cpm_train.py b/model_zoo/official/nlp/cpm/src/cpm_train.py index 3087c3979a0..8c50abe4024 100644 --- a/model_zoo/official/nlp/cpm/src/cpm_train.py +++ b/model_zoo/official/nlp/cpm/src/cpm_train.py @@ -254,11 +254,9 @@ class CPMTrainOneStepWithLossScaleCell(TrainOneStepWithLossScaleCell): cond = self.get_overflow_status(status, grads) overflow = self.process_loss_scale(cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - return F.depend(loss, succ), cond, scaling_sens + if not overflow: + self.optimizer(grads) + return loss, cond, scaling_sens cast = P.Cast() @@ -352,7 +350,6 @@ class CPMTrainAccuStepsWithLossScaleCell(TrainOneStepWithLossScaleCell): accu_overflow = self.select(overflow, self.one, self.zero) if self.accumulation: - succ = False self.accu_overflow = accu_overflow else: my_zero = F.depend(self.zero, accu_overflow) @@ -378,9 +375,7 @@ class CPMTrainAccuStepsWithLossScaleCell(TrainOneStepWithLossScaleCell): overflow = self.reshape(overflow, (())) overflow = self.process_loss_scale(overflow) - if overflow: - succ = False - else: - succ = self.optimizer(grads) + if not overflow: + self.optimizer(grads) - return F.depend(loss, succ), overflow, scaling_sens + return loss, overflow, scaling_sens diff --git a/model_zoo/official/nlp/dgu/src/bert_for_finetune.py b/model_zoo/official/nlp/dgu/src/bert_for_finetune.py index 16a8da5043b..265a6bb7584 100644 --- a/model_zoo/official/nlp/dgu/src/bert_for_finetune.py +++ b/model_zoo/official/nlp/dgu/src/bert_for_finetune.py @@ -152,12 +152,9 @@ class BertFinetuneCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond) class BertSquadCell(nn.Cell): """ @@ -245,12 +242,9 @@ class BertSquadCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond) class BertCLS(nn.Cell): """ diff --git a/model_zoo/official/nlp/dgu/src/bert_for_pre_training.py b/model_zoo/official/nlp/dgu/src/bert_for_pre_training.py index c99c9318f4e..e75e928c97c 100644 --- a/model_zoo/official/nlp/dgu/src/bert_for_pre_training.py +++ b/model_zoo/official/nlp/dgu/src/bert_for_pre_training.py @@ -308,8 +308,8 @@ class BertTrainOneStepCell(nn.TrainOneStepCell): mstype.float32)) grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads) grads = self.grad_reducer(grads) - succ = self.optimizer(grads) - return F.depend(loss, succ) + self.optimizer(grads) + return loss grad_scale = C.MultitypeFuncGraph("grad_scale") @@ -397,12 +397,9 @@ class BertTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell): @@ -472,9 +469,8 @@ class BertTrainOneStepWithLossScaleCellForAdam(nn.TrainOneStepWithLossScaleCell) overflow = cond if self.loss_scaling_manager is not None: overflow = self.loss_scaling_manager(scaling_sens, cond) - succ = self.optimizer(grads, overflow) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + self.optimizer(grads, overflow) + return (loss, cond, scaling_sens) cast = P.Cast() add_grads = C.MultitypeFuncGraph("add_grads") @@ -631,9 +627,7 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell): accu_overflow = self.select(overflow, self.one, self.zero) self.accu_overflow = self.select(is_accu_step, accu_overflow, self.zero) - if is_accu_step: - succ = False - else: + if not is_accu_step: # apply grad reducer on grads grads = self.grad_reducer(self.accu_grads) scaling = scaling_sens * self.degree * self.accumulation_steps @@ -650,13 +644,10 @@ class BertTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell): overflow = self.reshape(overflow, (())) if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, overflow) - if overflow: - succ = False - else: - succ = self.optimizer(grads) + if not overflow: + self.optimizer(grads) - ret = (mean_loss, overflow, scaling_sens) - return F.depend(ret, succ) + return (mean_loss, overflow, scaling_sens) class BertTrainAccumulationAllReduceEachWithLossScaleCell(nn.Cell): diff --git a/model_zoo/official/nlp/emotect/src/ernie_for_finetune.py b/model_zoo/official/nlp/emotect/src/ernie_for_finetune.py index a951bc65eb7..93b6010517f 100755 --- a/model_zoo/official/nlp/emotect/src/ernie_for_finetune.py +++ b/model_zoo/official/nlp/emotect/src/ernie_for_finetune.py @@ -172,12 +172,9 @@ class ErnieFinetuneCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond) class ErnieCLS(nn.Cell): """ diff --git a/model_zoo/official/nlp/fasttext/src/fasttext_train.py b/model_zoo/official/nlp/fasttext/src/fasttext_train.py index 86c0d6fbf04..cddd78227f0 100644 --- a/model_zoo/official/nlp/fasttext/src/fasttext_train.py +++ b/model_zoo/official/nlp/fasttext/src/fasttext_train.py @@ -138,5 +138,5 @@ class FastTextTrainOneStepCell(nn.Cell): # apply grad reducer on grads grads = self.grad_reducer(grads) - succ = self.optimizer(grads) - return F.depend(loss, succ) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/gnmt_for_train.py b/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/gnmt_for_train.py index 76d5aa0502f..2ec0b80a033 100644 --- a/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/gnmt_for_train.py +++ b/model_zoo/official/nlp/gnmt_v2/src/gnmt_model/gnmt_for_train.py @@ -284,9 +284,6 @@ class GNMTTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) diff --git a/model_zoo/official/nlp/gpt/src/gpt_wrapcell.py b/model_zoo/official/nlp/gpt/src/gpt_wrapcell.py index b995daf283f..615c728f061 100644 --- a/model_zoo/official/nlp/gpt/src/gpt_wrapcell.py +++ b/model_zoo/official/nlp/gpt/src/gpt_wrapcell.py @@ -151,9 +151,6 @@ class GPTTrainOneStepWithLossScaleCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) diff --git a/model_zoo/official/nlp/gru/README.md b/model_zoo/official/nlp/gru/README.md index 652b72de0f2..421ace5a515 100644 --- a/model_zoo/official/nlp/gru/README.md +++ b/model_zoo/official/nlp/gru/README.md @@ -46,7 +46,7 @@ In this model, we use the Multi30K dataset as our train and test dataset.As trai # [Environment Requirements](#content) -- Hardware(Ascend) +- Hardware(Ascend or GPU) - Prepare hardware environment with Ascend processor. - Framework - [MindSpore](https://gitee.com/mindspore/mindspore) @@ -81,15 +81,27 @@ nltk.download() After dataset preparation, you can start training and evaluation as follows: ```bash - # run training example cd ./scripts - bash run_standalone_train.sh [TRAIN_DATASET_PATH] + # download dataset + bash download_dataset.sh + + # preprocess dataset + bash preprocess.sh [DATASET_PATH] + + # create mindrecord + bash create_dataset.sh [DATASET_PATH] [DATASET_PATH] + + # run training example + bash run_standalone_train_{platform}.sh [TRAIN_DATASET_PATH] # run distributed training example - bash run_distribute_train_ascend.sh [RANK_TABLE_FILE] [TRAIN_DATASET_PATH] + bash run_distribute_train_{platform}.sh [RANK_TABLE_FILE] [TRAIN_DATASET_PATH] + # platform: ascend or gpu + # do not need [RANK_TABLE_FILE] if you use GPU # run evaluation example - bash run_eval.sh [CKPT_FILE] [DATASET_PATH] + bash run_eval_{platform}.sh [CKPT_FILE] [DATASET_PATH] + # platform: ascend or gpu ``` - Running on ModelArts (If you want to run in modelarts, please check the official documentation of [modelarts](https://support.huaweicloud.com/modelarts/), and you can start training as follows) @@ -158,7 +170,6 @@ The GRU network script and code result are as follows: │ ├──local_adapter.py // Local adapter │ ├──moxing_adapter.py // Moxing adapter for ModelArts ├── src - | ├──gru.py // gru cell architecture. │ ├──create_data.py // Dataset preparation. │ ├──dataset.py // Dataset loader to feed into model. │ ├──gru_for_infer.py // GRU eval model architecture. @@ -167,16 +178,24 @@ The GRU network script and code result are as follows: │ ├──lr_schedule.py // Learning rate scheduler. │ ├──parse_output.py // Parse output file. │ ├──preprocess.py // Dataset preprocess. + | ├──rnn_cells.py // rnn cell architecture. + | ├──rnns.py // rnn layer architecture. │ ├──seq2seq.py // Seq2seq architecture. + | ├──utils.py // utils for rnn. │ ├──tokenization.py // tokenization for the dataset. │ ├──weight_init.py // Initialize weights in the net. ├── scripts │ ├──create_dataset.sh // shell script for create dataset. + │ ├──download_dataset.sh // shell script for download dataset. │ ├──parse_output.sh // shell script for parse eval output file to calculate BLEU. │ ├──preprocess.sh // shell script for preprocess dataset. - │ ├──run_distributed_train.sh // shell script for distributed train on ascend. - │ ├──run_eval.sh // shell script for standalone eval on ascend. - │ ├──run_standalone_train.sh // shell script for standalone eval on ascend. + │ ├──run_distributed_train_ascend.sh // shell script for distributed train on ascend. + │ ├──run_distributed_train_gpu.sh // shell script for distributed train on gpu. + │ ├──run_eval_ascend.sh // shell script for standalone eval on ascend. + │ ├──run_eval_gpu.sh // shell script for standalone eval on gpu. + │ ├──run_infer_310.sh // shell script for 310 inference. + │ ├──run_standalone_train_ascend.sh // shell script for standalone eval on ascend. + │ ├──run_standalone_train_gpu.sh // shell script for standalone eval on gpu. ├── default_config.yaml // Configurations ├── postprocess.py // GRU postprocess script. ├── preprocess.py // GRU preprocess script. @@ -188,7 +207,14 @@ The GRU network script and code result are as follows: ## [Dataset Preparation](#content) -Firstly, we should download the dataset from the WMT16 official net.After downloading the Multi30k dataset file, we get six dataset file, which is show as below.And we should in put the in same directory. +Firstly, we should download the dataset from the WMT16 official net. + +```bash +cd scripts +bash download_dataset.sh +``` + +After downloading the Multi30k dataset file, we get six dataset file, which is show as below.And we should in put the in same directory. ```text train.de @@ -250,14 +276,17 @@ Parameters for both training and evaluation can be set in config.py. All the dat ```bash cd ./scripts - bash run_standalone_train.sh [DATASET_PATH] + bash run_standalone_train_{platform}.sh [DATASET_PATH] + # platform: ascend or gpu ``` - Running scripts for distributed training of GRU. Task training on multiple device and run the following command in bash to be executed in `scripts/`: ``` bash cd ./scripts - bash run_distributed_train.sh [RANK_TABLE_PATH] [DATASET_PATH] + bash run_distributed_train_{platform}.sh [RANK_TABLE_PATH] [DATASET_PATH] + # platform: ascend or gpu + # do not need [RANK_TABLE_FILE] if you use GPU ``` ## [Inference Process](#content) @@ -266,7 +295,8 @@ Parameters for both training and evaluation can be set in config.py. All the dat ``` bash cd ./scripts - bash run_eval.sh [CKPT_FILE] [DATASET_PATH] + bash run_eval_{platform}.sh [CKPT_FILE] [DATASET_PATH] + # platform: ascend or gpu ``` - After evalulation, we will get eval/target.txt and eval/output.txt.Then we can use scripts/parse_output.sh to get the translation. @@ -354,35 +384,35 @@ perl multi-bleu.perl target.txt.forbleu < output.txt.forbleu ### Training Performance -| Parameters | Ascend | -| -------------------------- | -------------------------------------------------------------- | -| Resource | Ascend 910; OS Euler2.8 | -| uploaded Date | 01/18/2021 (month/day/year) | -| MindSpore Version | 1.1.0 | -| Dataset | Multi30k Dataset | -| Training Parameters | epoch=30, batch_size=16 | -| Optimizer | Adam | -| Loss Function | NLLLoss | -| outputs | probability | -| Speed | 50ms/step (1pcs) | -| Epoch Time | 13.4s (1pcs) | -| Loss | 2.5984 | -| Params (M) | 21 | -| Checkpoint for inference | 272M (.ckpt file) | -| Scripts | [gru](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/gru) | +| Parameters | Ascend | GPU | +| -------------------------- | ----------------------------- |---------------------------| +| Resource | Ascend 910; OS Euler2.8 | GTX1080Ti, Ubuntu 18.04 | +| uploaded Date | 06/05/2021 (month/day/year) | 06/05/2021 (month/day/year) | +| MindSpore Version | 1.2.0 |1.2.0 | +| Dataset | Multi30k Dataset | Multi30k Dataset | +| Training Parameters | epoch=30, batch_size=16 | epoch=30, batch_size=16 | +| Optimizer | Adam | Adam | +| Loss Function | NLLLoss | NLLLoss | +| outputs | probability | probability | +| Speed | 35ms/step (1pcs) | 200ms/step (1pcs) | +| Epoch Time | 64.4s (1pcs) | 361.5s (1pcs) | +| Loss | 3.86888 |2.533958 | +| Params (M) | 21 | 21 | +| Checkpoint for inference | 272M (.ckpt file) | 272M (.ckpt file) | +| Scripts | [gru](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/gru) |[gru](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/nlp/gru) | ### Inference Performance -| Parameters | Ascend | -| ------------------- | --------------------------- | -| Resource | Ascend 910; OS Euler2.8 | -| Uploaded Date | 01/18/2020 (month/day/year) | -| MindSpore Version | 1.1.0 | -| Dataset | Multi30K | -| batch_size | 1 | -| outputs | label index | -| Accuracy | BLEU: 30.30 | -| Model for inference | 272M (.ckpt file) | +| Parameters | Ascend | GPU | +| ------------------- | --------------------------- |---------------------------| +| Resource | Ascend 910; OS Euler2.8 | GTX1080Ti, Ubuntu 18.04 | +| Uploaded Date | 06/05/2021 (month/day/year) | 06/05/2021 (month/day/year)| +| MindSpore Version | 1.2.0 | 1.2.0 | +| Dataset | Multi30K | Multi30K | +| batch_size | 1 | 1 | +| outputs | label index | label index | +| Accuracy | BLEU: 31.26 | BLEU: 29.30 | +| Model for inference | 272M (.ckpt file) | 272M (.ckpt file) | # [Random Situation Description](#content) diff --git a/model_zoo/official/nlp/gru/default_config.yaml b/model_zoo/official/nlp/gru/default_config.yaml index c8599cce906..4c1ffebb54b 100644 --- a/model_zoo/official/nlp/gru/default_config.yaml +++ b/model_zoo/official/nlp/gru/default_config.yaml @@ -36,6 +36,8 @@ scale_factor: 2 scale_window: 2000 warmup_ratio: 0.333333 teacher_force_ratio: 0.5 +compute_type: mstype.float16 +dtype: mstype.float32 run_distribute: False dataset_path: "" diff --git a/model_zoo/official/nlp/gru/model_utils/config.py b/model_zoo/official/nlp/gru/model_utils/config.py index ad0d7497a8e..42cde250dff 100644 --- a/model_zoo/official/nlp/gru/model_utils/config.py +++ b/model_zoo/official/nlp/gru/model_utils/config.py @@ -20,6 +20,8 @@ import ast import argparse from pprint import pformat import yaml +import mindspore.common.dtype as mstype + class Config: """ @@ -108,6 +110,24 @@ def merge(args, cfg): cfg[item] = args_var[item] return cfg +def parse_dtype(dtype): + if dtype not in ["mstype.float32", "mstype.float16"]: + raise ValueError("Not supported dtype") + + if dtype == "mstype.float32": + return mstype.float32 + if dtype == "mstype.float16": + return mstype.float16 + return None + +def extra_operations(cfg): + """ + Do extra work on config + Args: + config: Object after instantiation of class 'Config'. + """ + cfg.dtype = parse_dtype(cfg.dtype) + cfg.compute_type = parse_dtype(cfg.compute_type) def get_config(): """ @@ -121,6 +141,8 @@ def get_config(): default, helper, choices = parse_yaml(path_args.config_path) args = parse_cli_to_yaml(parser=parser, cfg=default, helper=helper, choices=choices, cfg_path=path_args.config_path) final_config = merge(args, default) - return Config(final_config) + final_config = Config(final_config) + extra_operations(final_config) + return final_config config = get_config() diff --git a/model_zoo/official/nlp/gru/scripts/create_dataset.sh b/model_zoo/official/nlp/gru/scripts/create_dataset.sh index 6d6521b9ab3..9626cd7d1d4 100644 --- a/model_zoo/official/nlp/gru/scripts/create_dataset.sh +++ b/model_zoo/official/nlp/gru/scripts/create_dataset.sh @@ -17,7 +17,6 @@ echo "========================================================================== echo "Please run the script as: " echo "sh create_dataset.sh DATASET_PATH OUTPUT_PATH" echo "for example: sh create_dataset.sh /path/multi30k/ /path/multi30k/mindrecord/" -echo "DATASET_NAME including ag, dbpedia, and yelp_p" echo "It is better to use absolute path." echo "==============================================================================================================" ulimit -u unlimited diff --git a/model_zoo/official/nlp/gru/scripts/run_distribute_train_ascend.sh b/model_zoo/official/nlp/gru/scripts/run_distribute_train_ascend.sh index bc99c693497..c5e7f87b48f 100644 --- a/model_zoo/official/nlp/gru/scripts/run_distribute_train_ascend.sh +++ b/model_zoo/official/nlp/gru/scripts/run_distribute_train_ascend.sh @@ -47,6 +47,7 @@ exit 1 fi ulimit -u unlimited +export DEVICE_TARGET="Ascend" export DEVICE_NUM=8 export RANK_SIZE=8 export RANK_TABLE_FILE=$PATH1 @@ -65,6 +66,6 @@ do cd ./train_parallel$i || exit echo "start training for rank $RANK_ID, device $DEVICE_ID" env > env.log - python train.py --run_distribute=True --dataset_path=$DATASET_PATH &> log & + python train.py --device_target=$DEVICE_TARGET --run_distribute=True --dataset_path=$DATASET_PATH &> log & cd .. -done \ No newline at end of file +done diff --git a/model_zoo/official/nlp/gru/src/gru_for_train.py b/model_zoo/official/nlp/gru/src/gru_for_train.py index b60cb2d7e9f..647eed4d101 100644 --- a/model_zoo/official/nlp/gru/src/gru_for_train.py +++ b/model_zoo/official/nlp/gru/src/gru_for_train.py @@ -234,9 +234,51 @@ class GRUTrainOneStepWithLossScaleCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) + +class GRUTrainOneStepCell(nn.TrainOneStepCell): + """ + Encapsulation class of GRU network training. + Append an optimizer to the training network after that the construct + function can be called to create the backward graph. + Args: + network (Cell): The training network. Note that loss function should have been added. + optimizer (Optimizer): Optimizer for updating the weights. + sens (Number): The adjust parameter. Default: 1.0. + enable_clip_grad (boolean): If True, clip gradients in GRUTrainOneStepCell. Default: True. + """ + + def __init__(self, network, optimizer, sens=1.0, enable_clip_grad=True): + super(GRUTrainOneStepCell, self).__init__(network, optimizer, sens) + self.cast = P.Cast() + self.hyper_map = C.HyperMap() + self.clip_gradients = ClipGradients() + self.enable_clip_grad = enable_clip_grad + + def set_sens(self, value): + self.sens = value + + def construct(self, + encoder_inputs, + decoder_inputs, + teacher_force, + sens=None): + """Defines the computation performed.""" + + weights = self.weights + loss = self.network(encoder_inputs, + decoder_inputs, + teacher_force) + + grads = self.grad(self.network, weights)(encoder_inputs, + decoder_inputs, + teacher_force, + self.cast(F.tuple_to_array((self.sens,)), + mstype.float32)) + if self.enable_clip_grad: + grads = self.clip_gradients(grads, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE) + grads = self.grad_reducer(grads) + succ = self.optimizer(grads) + return F.depend(loss, succ) diff --git a/model_zoo/official/nlp/gru/src/seq2seq.py b/model_zoo/official/nlp/gru/src/seq2seq.py index 06ef8daa15f..97c117d0101 100644 --- a/model_zoo/official/nlp/gru/src/seq2seq.py +++ b/model_zoo/official/nlp/gru/src/seq2seq.py @@ -18,8 +18,8 @@ from mindspore import Tensor import mindspore.nn as nn import mindspore.ops.operations as P import mindspore.common.dtype as mstype -from src.gru import BidirectionGRU, GRU from src.weight_init import dense_default_state +from src.rnns import GRU class Attention(nn.Cell): ''' @@ -29,8 +29,8 @@ class Attention(nn.Cell): super(Attention, self).__init__() self.text_len = config.max_length self.attn = nn.Dense(in_channels=config.hidden_size * 3, - out_channels=config.hidden_size).to_float(mstype.float16) - self.fc = nn.Dense(config.hidden_size, 1, has_bias=False).to_float(mstype.float16) + out_channels=config.hidden_size).to_float(config.compute_type) + self.fc = nn.Dense(config.hidden_size, 1, has_bias=False).to_float(config.compute_type) self.expandims = P.ExpandDims() self.tanh = P.Tanh() self.softmax = P.Softmax() @@ -39,6 +39,9 @@ class Attention(nn.Cell): self.concat = P.Concat(axis=2) self.squeeze = P.Squeeze(axis=2) self.cast = P.Cast() + self.dtype = config.dtype + self.compute_type = config.compute_type + def construct(self, hidden, encoder_outputs): ''' Attention construction @@ -58,9 +61,9 @@ class Attention(nn.Cell): energy = self.tanh(out) attention = self.fc(energy) attention = self.squeeze(attention) - attention = self.cast(attention, mstype.float32) + attention = self.cast(attention, self.dtype) attention = self.softmax(attention) - attention = self.cast(attention, mstype.float16) + attention = self.cast(attention, self.compute_type) return attention class Encoder(nn.Cell): @@ -76,8 +79,9 @@ class Encoder(nn.Cell): self.vocab_size = config.src_vocab_size self.embedding_size = config.encoder_embedding_size self.embedding = nn.Embedding(self.vocab_size, self.embedding_size) - self.rnn = BidirectionGRU(config, is_training=is_training).to_float(mstype.float16) - self.fc = nn.Dense(2*self.hidden_size, self.hidden_size).to_float(mstype.float16) + self.rnn = GRU(input_size=self.embedding_size, \ + hidden_size=self.hidden_size, bidirectional=True).to_float(config.compute_type) + self.fc = nn.Dense(2*self.hidden_size, self.hidden_size).to_float(config.compute_type) self.shape = P.Shape() self.transpose = P.Transpose() self.p = P.Print() @@ -85,6 +89,8 @@ class Encoder(nn.Cell): self.text_len = config.max_length self.squeeze = P.Squeeze(axis=0) self.tanh = P.Tanh() + self.concat = P.Concat(2) + self.dtype = config.dtype def construct(self, src): ''' @@ -99,8 +105,10 @@ class Encoder(nn.Cell): ''' embedded = self.embedding(src) embedded = self.transpose(embedded, (1, 0, 2)) - embedded = self.cast(embedded, mstype.float16) + embedded = self.cast(embedded, self.dtype) output, hidden = self.rnn(embedded) + hidden = self.transpose(hidden, (1, 0, 2)) + hidden = hidden.view(hidden.shape[0], -1) hidden = self.fc(hidden) hidden = self.tanh(hidden) return output, hidden @@ -118,7 +126,8 @@ class Decoder(nn.Cell): self.vocab_size = config.trg_vocab_size self.embedding_size = config.decoder_embedding_size self.embedding = nn.Embedding(self.vocab_size, self.embedding_size) - self.rnn = GRU(config, is_training=is_training).to_float(mstype.float16) + self.rnn = GRU(input_size=self.embedding_size + self.hidden_size*2, \ + hidden_size=self.hidden_size).to_float(config.compute_type) self.text_len = config.max_length self.shape = P.Shape() self.transpose = P.Transpose() @@ -130,11 +139,13 @@ class Decoder(nn.Cell): self.log_softmax = P.LogSoftmax(axis=1) weight, bias = dense_default_state(self.embedding_size+self.hidden_size*3, self.vocab_size) self.fc = nn.Dense(self.embedding_size+self.hidden_size*3, self.vocab_size, - weight_init=weight, bias_init=bias).to_float(mstype.float16) + weight_init=weight, bias_init=bias).to_float(config.compute_type) self.attention = Attention(config) self.bmm = P.BatchMatMul() self.dropout = nn.Dropout(0.7) self.expandims = P.ExpandDims() + self.dtype = config.dtype + def construct(self, inputs, hidden, encoder_outputs): ''' Decoder construction @@ -150,21 +161,22 @@ class Decoder(nn.Cell): ''' embedded = self.embedding(inputs) embedded = self.transpose(embedded, (1, 0, 2)) - embedded = self.cast(embedded, mstype.float16) + embedded = self.cast(embedded, self.dtype) attn = self.attention(hidden, encoder_outputs) attn = self.expandims(attn, 1) encoder_outputs = self.transpose(encoder_outputs, (1, 0, 2)) weight = self.bmm(attn, encoder_outputs) weight = self.transpose(weight, (1, 0, 2)) + weight = self.cast(weight, self.dtype) emd_con = self.concat((embedded, weight)) output, hidden = self.rnn(emd_con) + output = self.cast(output, self.dtype) out = self.concat((embedded, output, weight)) out = self.squeeze(out) hidden = self.squeeze(hidden) prediction = self.fc(out) prediction = self.dropout(prediction) - prediction = self.cast(prediction, mstype.float32) - prediction = self.cast(prediction, mstype.float32) + prediction = self.cast(prediction, self.dtype) pred_prob = self.log_softmax(prediction) pred_prob = self.expandims(pred_prob, 0) return pred_prob, hidden diff --git a/model_zoo/official/nlp/gru/src/weight_init.py b/model_zoo/official/nlp/gru/src/weight_init.py index 48a1ad2460e..1f92efc14f0 100644 --- a/model_zoo/official/nlp/gru/src/weight_init.py +++ b/model_zoo/official/nlp/gru/src/weight_init.py @@ -15,21 +15,7 @@ """weight init""" import math import numpy as np -from mindspore import Tensor, Parameter - -def gru_default_state(batch_size, input_size, hidden_size, num_layers=1, bidirectional=False): - '''Weight init for gru cell''' - stdv = 1 / math.sqrt(hidden_size) - weight_i = Parameter(Tensor( - np.random.uniform(-stdv, stdv, (input_size, 3*hidden_size)).astype(np.float32)), name='weight_i') - weight_h = Parameter(Tensor( - np.random.uniform(-stdv, stdv, (hidden_size, 3*hidden_size)).astype(np.float32)), name='weight_h') - bias_i = Parameter(Tensor( - np.random.uniform(-stdv, stdv, (3*hidden_size)).astype(np.float32)), name='bias_i') - bias_h = Parameter(Tensor( - np.random.uniform(-stdv, stdv, (3*hidden_size)).astype(np.float32)), name='bias_h') - init_h = Tensor(np.zeros((batch_size, hidden_size)).astype(np.float16)) - return weight_i, weight_h, bias_i, bias_h, init_h +from mindspore import Tensor def dense_default_state(in_channel, out_channel): '''Weight init for dense cell''' diff --git a/model_zoo/official/nlp/gru/train.py b/model_zoo/official/nlp/gru/train.py index de219f93a4b..2d795e9ff77 100644 --- a/model_zoo/official/nlp/gru/train.py +++ b/model_zoo/official/nlp/gru/train.py @@ -15,17 +15,19 @@ """train script""" import os import time +import mindspore.common.dtype as mstype from mindspore.context import ParallelMode from mindspore import context -from mindspore.communication.management import init +from mindspore.communication.management import init, get_rank from mindspore.train.callback import Callback, CheckpointConfig, ModelCheckpoint, TimeMonitor from mindspore.train import Model from mindspore.common import set_seed from mindspore.train.loss_scale_manager import DynamicLossScaleManager from mindspore.nn.optim import Adam +from mindspore import log as logger from src.seq2seq import Seq2Seq -from src.gru_for_train import GRUWithLossCell, GRUTrainOneStepWithLossScaleCell +from src.gru_for_train import GRUWithLossCell, GRUTrainOneStepWithLossScaleCell, GRUTrainOneStepCell from src.dataset import create_gru_dataset from src.lr_schedule import dynamic_lr @@ -72,13 +74,20 @@ class LossCallBack(Callback): cb_params.cur_step_num, str(cb_params.net_outputs))) with open("./loss_{}.log".format(self.rank_id), "a+") as f: - f.write("time: {}, epoch: {}, step: {}, loss: {}, overflow: {}, loss_scale: {}".format( - time_stamp_current - time_stamp_first, - cb_params.cur_epoch_num, - cb_params.cur_step_num, - str(cb_params.net_outputs[0].asnumpy()), - str(cb_params.net_outputs[1].asnumpy()), - str(cb_params.net_outputs[2].asnumpy()))) + if context.get_context("device_target") == "Ascend": + f.write("time: {}, epoch: {}, step: {}, loss: {}, overflow: {}, loss_scale: {}".format( + time_stamp_current - time_stamp_first, + cb_params.cur_epoch_num, + cb_params.cur_step_num, + str(cb_params.net_outputs[0].asnumpy()), + str(cb_params.net_outputs[1].asnumpy()), + str(cb_params.net_outputs[2].asnumpy()))) + else: + f.write("time: {}, epoch: {}, step: {}, loss: {}".format( + time_stamp_current - time_stamp_first, + cb_params.cur_epoch_num, + cb_params.cur_step_num, + str(cb_params.net_outputs.asnumpy()))) f.write('\n') @@ -139,13 +148,32 @@ def modelarts_pre_process(): @moxing_wrapper(pre_process=modelarts_pre_process) def run_train(): """run train.""" - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=get_device_id(), save_graphs=False) - rank = get_rank_id() + context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target, + device_id=get_device_id(), save_graphs=False) + if config.device_target == "GPU": + if config.compute_type != mstype.float32: + logger.warning('GPU only support fp32 temporarily, run with fp32.') + config.compute_type = mstype.float32 + device_num = get_device_num() if config.run_distribute: - context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL, - gradients_mean=True) - init() + if config.device_target == "Ascend": + rank = get_rank_id() + context.set_auto_parallel_context(device_num=device_num, + parallel_mode=ParallelMode.DATA_PARALLEL, + gradients_mean=True) + init() + elif config.device_target == "GPU": + rank = get_rank() + init("nccl") + context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, + gradients_mean=True) + else: + raise ValueError(config.device_target) + else: + rank = 0 + device_num = 1 + mindrecord_file = config.dataset_path if not os.path.exists(mindrecord_file): print("dataset file {} not exists, please check!".format(mindrecord_file)) @@ -162,8 +190,10 @@ def run_train(): scale_factor=config.scale_factor, scale_window=config.scale_window) update_cell = scale_manager.get_update_cell() - netwithgrads = GRUTrainOneStepWithLossScaleCell(network, opt, update_cell) - + if config.device_target == "Ascend": + netwithgrads = GRUTrainOneStepWithLossScaleCell(network, opt, update_cell) + else: + netwithgrads = GRUTrainOneStepCell(network, opt) time_cb = TimeMonitor(data_size=dataset_size) loss_cb = LossCallBack(rank_id=rank) cb = [time_cb, loss_cb] @@ -171,10 +201,10 @@ def run_train(): if config.save_checkpoint: ckpt_config = CheckpointConfig(save_checkpoint_steps=config.ckpt_epoch * dataset_size, keep_checkpoint_max=config.keep_checkpoint_max) - save_ckpt_path = os.path.join(config.outputs_dir, 'ckpt_' + str(get_rank_id()) + '/') + save_ckpt_path = os.path.join(config.outputs_dir, 'ckpt_' + str(rank) + '/') ckpt_cb = ModelCheckpoint(config=ckpt_config, directory=save_ckpt_path, - prefix='{}'.format(get_rank_id())) + prefix='{}'.format(rank)) cb += [ckpt_cb] netwithgrads.set_train(True) model = Model(netwithgrads) diff --git a/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py b/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py index 23ff47d1a14..2164e17c1dc 100644 --- a/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py +++ b/model_zoo/official/nlp/mass/src/transformer/transformer_for_train.py @@ -368,10 +368,7 @@ class TransformerTrainOneStepWithLossScaleCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) + if not overflow: + self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + return (loss, cond, scaling_sens) diff --git a/model_zoo/official/nlp/pangu_alpha/src/dataset.py b/model_zoo/official/nlp/pangu_alpha/src/dataset.py index b8966d870c4..1ebafc072fd 100644 --- a/model_zoo/official/nlp/pangu_alpha/src/dataset.py +++ b/model_zoo/official/nlp/pangu_alpha/src/dataset.py @@ -67,7 +67,7 @@ def get_input_data_batch_slice_map(input_ids, eod_id, rank, dis, eod_reset): def create_dataset(batch_size, data_path, device_num=1, rank=0, drop=True, full_batch=False, data_start_index=0, - eod_reset=False, eod_id=9, column_name='input_ids', epoch=1): + eod_reset=False, eod_id=9, column_name='input_ids', epoch=1, num_samples=None): """ Create dataset @@ -99,7 +99,8 @@ def create_dataset(batch_size, data_path, device_num=1, rank=0, drop=True, full_ data.sort() # Load data files and preprocess - dataset = ds.MindDataset(data[data_start_index:], columns_list=[column_name], shuffle=False) + dataset = ds.MindDataset(data[data_start_index:], columns_list=[column_name], + shuffle=False, num_samples=num_samples) type_cast_op = C.TypeCast(mstype.int32) type_cast_op_float = C.TypeCast(mstype.float16) diff --git a/model_zoo/official/nlp/pangu_alpha/src/pangu_alpha_wrapcell.py b/model_zoo/official/nlp/pangu_alpha/src/pangu_alpha_wrapcell.py index 4ea05370aa2..92d4100ea8a 100644 --- a/model_zoo/official/nlp/pangu_alpha/src/pangu_alpha_wrapcell.py +++ b/model_zoo/official/nlp/pangu_alpha/src/pangu_alpha_wrapcell.py @@ -147,11 +147,9 @@ class PanguAlphaTrainOneStepWithLossScaleCell(TrainOneStepWithLossScaleCell): overflow = self.process_loss_scale(cond) # If overflow, surpass weights update # if not, update weights - if overflow: - succ = False - else: - succ = self.optimizer(grads) - return F.depend(loss, succ), cond, scaling_sens + if not overflow: + self.optimizer(grads) + return loss, cond, scaling_sens class PanguAlphaTrainPipelineWithLossScaleCell(nn.Cell): """ @@ -255,9 +253,6 @@ class PanguAlphaTrainPipelineWithLossScaleCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, overflow, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, overflow, scaling_sens) diff --git a/model_zoo/official/nlp/pangu_alpha/src/utils.py b/model_zoo/official/nlp/pangu_alpha/src/utils.py index 63a6a73cd09..83465a8d3f1 100644 --- a/model_zoo/official/nlp/pangu_alpha/src/utils.py +++ b/model_zoo/official/nlp/pangu_alpha/src/utils.py @@ -405,6 +405,10 @@ def get_args(inference=False): required=False, default=None, help='Location of data.') + parser.add_argument('--eval_data_url', + required=False, + default=None, + help='Location of eval data.') parser.add_argument('--train_url', required=False, default=None, @@ -448,6 +452,14 @@ def get_args(inference=False): type=int, default=0, help="Enable incremental training. Default 0.") + parser.add_argument("--train_and_eval_mode", + type=int, + default=0, + help="Enable evaling while training. Default 0.") + parser.add_argument("--eval_steps", + type=int, + default=10, + help="The eval step in train and eval mode. Default 10.") add_training_params(parser) if inference: add_inference_params(parser) diff --git a/model_zoo/official/nlp/pangu_alpha/train.py b/model_zoo/official/nlp/pangu_alpha/train.py index fd2a83a3784..e184260cc7a 100644 --- a/model_zoo/official/nlp/pangu_alpha/train.py +++ b/model_zoo/official/nlp/pangu_alpha/train.py @@ -18,13 +18,12 @@ PanguAlpha train script import os import math -import time from mindspore import context from mindspore.train.model import Model import mindspore.communication.management as D from mindspore.context import ParallelMode import mindspore.nn as nn -from mindspore.train.callback import TimeMonitor, Callback +from mindspore.train.callback import TimeMonitor from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell import mindspore.common.dtype as mstype from mindspore.parallel import set_algo_parameters @@ -37,40 +36,10 @@ from src.pangu_alpha_wrapcell import PanguAlphaTrainOneStepWithLossScaleCell, Pa from src.pangu_alpha_config import PANGUALPHAConfig, set_parse from src.utils import LearningRate, get_args, FP32StateAdamWeightDecay from src.utils import download_data +from src.callbacks import EvalCallBack, LossCallBack +from src.metrics import PPLMetric -class LossCallBack(Callback): - """ - Monitor the loss in training. - If the loss in NAN or INF terminating training. - """ - - def __init__(self, dataset_size=-1, local_rank=0, has_trained_epoch=0, has_trained_step=0, micro_size=1): - super(LossCallBack, self).__init__() - self._dataset_size = dataset_size - self.local_rank = local_rank - self.has_trained_epoch = has_trained_epoch - self.has_trained_step = has_trained_step - self.micro_size = micro_size - print("load has trained epoch :{} and step: {}".format(has_trained_epoch, has_trained_step), flush=True) - - def step_end(self, run_context): - """ - Print loss after each step - """ - cb_params = run_context.original_args() - if self._dataset_size > 0 and self.local_rank % 8 == 0: - percent, epoch_num = math.modf(cb_params.cur_step_num / - self._dataset_size) - if percent == 0: - epoch_num -= 1 - date = time.asctime(time.localtime(time.time())) - loss_value = cb_params.net_outputs[0].asnumpy() / self.micro_size - print("time: {} local_rank: {}, epoch: {}, step: {}, output is {}, overflow is {}, scale is {}". - format(date, int(self.local_rank), int(epoch_num) + int(self.has_trained_epoch), - cb_params.cur_step_num + int(self.has_trained_step), loss_value, - cb_params.net_outputs[1].asnumpy(), cb_params.net_outputs[2].asnumpy())) - project_root = os.path.abspath( os.path.dirname(os.path.realpath(__file__)) + os.path.sep + "..") @@ -101,73 +70,59 @@ def run_train(args_opt): The main training process. """ # Set execution mode - context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target) - context.set_context(variable_memory_max_size="31GB") + context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target, variable_memory_max_size="31GB") # Set parallel context if args_opt.distribute == "true": D.init() device_num = D.get_group_size() rank = D.get_rank() print("rank_id is {}, device_num is {}".format(rank, device_num)) - context.reset_auto_parallel_context() context.set_auto_parallel_context( - parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, - gradients_mean=False, - full_batch=bool(args_opt.full_batch), - strategy_ckpt_load_file=args_opt.strategy_load_ckpt_path, + parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, gradients_mean=False, + full_batch=bool(args_opt.full_batch), strategy_ckpt_load_file=args_opt.strategy_load_ckpt_path, enable_parallel_optimizer=bool(args_opt.optimizer_shard)) set_algo_parameters(elementwise_op_strategy_follow=True) _set_multi_subgraphs() - else: rank = 0 device_num = 1 context.set_context(save_graphs=False, save_graphs_path="./graphs_of_device_id_" + str(rank)) # copy data from the cloud to the /cache/Data cache_url = '/cache/Data/' + eval_cache_url = '/cache/EvalData/' if args_opt.offline: cache_url = args_opt.data_url + eval_cache_url = args_opt.eval_data_url else: download_data(src_data_url=args_opt.data_url, tgt_data_path=cache_url, rank=rank) + download_data(src_data_url=args_opt.eval_data_url, tgt_data_path=eval_cache_url, rank=rank) # Set model property model_parallel_num = args_opt.op_level_model_parallel_num data_parallel_num = int(device_num / model_parallel_num) + if data_parallel_num <= 1 and args_opt.optimizer_shard == 1: + raise ValueError("The dp must large than 1 when applying optimizer shard.") batch_size = args_opt.per_batch_size * data_parallel_num config = PANGUALPHAConfig( - data_parallel_num=data_parallel_num, - model_parallel_num=model_parallel_num, - batch_size=batch_size, - seq_length=args_opt.seq_length, - vocab_size=args_opt.vocab_size, - embedding_size=args_opt.embedding_size, - num_layers=args_opt.num_layers, - num_heads=args_opt.num_heads, - expand_ratio=4, - dropout_rate=0.1, - compute_dtype=mstype.float16, - stage_num=args_opt.stage_num, - micro_size=args_opt.micro_size, - eod_reset=bool(args_opt.eod_reset), - load_ckpt_path=args_opt.load_ckpt_path, + data_parallel_num=data_parallel_num, model_parallel_num=model_parallel_num, + batch_size=batch_size, seq_length=args_opt.seq_length, + vocab_size=args_opt.vocab_size, embedding_size=args_opt.embedding_size, + num_layers=args_opt.num_layers, num_heads=args_opt.num_heads, + expand_ratio=4, dropout_rate=0.1, compute_dtype=mstype.float16, + stage_num=args_opt.stage_num, micro_size=args_opt.micro_size, + eod_reset=bool(args_opt.eod_reset), load_ckpt_path=args_opt.load_ckpt_path, param_init_type=mstype.float32 if args_opt.param_init_type == 'fp32' else mstype.float16, word_emb_dp=bool(args_opt.word_emb_dp)) print("===config is: ", config, flush=True) - # Define network pangu_alpha = PanguAlpha(config) loss = CrossEntropyLoss(config) - pangu_alpha_with_loss = PanguAlphaWithLoss(config, pangu_alpha, loss) - pangu_alpha_with_loss = _VirtualDatasetCell(pangu_alpha_with_loss) - + pangu_alpha_with_loss_net = PanguAlphaWithLoss(config, pangu_alpha, loss) + pangu_alpha_with_loss = _VirtualDatasetCell(pangu_alpha_with_loss_net) print("=====args_opt is: ", args_opt, flush=True) - # Warm-up and cosine decay learning rate - lr = LearningRate(learning_rate=args_opt.start_lr, - end_learning_rate=args_opt.end_lr, - warmup_steps=args_opt.warmup_step, - decay_steps=200000) - + lr = LearningRate(learning_rate=args_opt.start_lr, end_learning_rate=args_opt.end_lr, + warmup_steps=args_opt.warmup_step, decay_steps=200000) params = pangu_alpha.trainable_params() group_params = set_weight_decay(params) if args_opt.optimizer == "lamb": @@ -180,36 +135,37 @@ def run_train(args_opt): loss_scale_value = math.pow(2, 32) epoch_num = args_opt.epoch_size # Dataset loading mindrecord files - ds = create_dataset(config.batch_size, data_path=cache_url, - data_start_index=0, eod_reset=config.eod_reset, full_batch=bool(args_opt.full_batch), - eod_id=args_opt.eod_id, device_num=device_num, rank=rank, - column_name=args_opt.data_column_name, epoch=epoch_num) - step_per_epoch = ds.get_dataset_size() - callback_size = args_opt.sink_size - actual_epoch_num = int(epoch_num * step_per_epoch / callback_size) - callback = [ - TimeMonitor(callback_size), - LossCallBack(callback_size, rank, 0, 0) - ] + ds = create_dataset(config.batch_size, data_path=cache_url, data_start_index=0, eod_reset=config.eod_reset, + full_batch=bool(args_opt.full_batch), eod_id=args_opt.eod_id, device_num=device_num, + rank=rank, column_name=args_opt.data_column_name, epoch=epoch_num) + actual_epoch_num = int(epoch_num * ds.get_dataset_size() / args_opt.sink_size) + callback = [TimeMonitor(args_opt.sink_size), LossCallBack(args_opt.sink_size, rank, 0, 0)] update_cell = DynamicLossScaleUpdateCell(loss_scale_value=loss_scale_value, scale_factor=2, scale_window=1000) pangu_alpha_with_grads = PanguAlphaTrainOneStepWithLossScaleCell( pangu_alpha_with_loss, optimizer=optimizer, scale_update_cell=update_cell, enable_global_norm=True, config=config) - model = Model(pangu_alpha_with_grads) + if args_opt.train_and_eval_mode: + ds_eval = create_dataset(config.batch_size, data_path=eval_cache_url, + data_start_index=0, eod_reset=config.eod_reset, full_batch=bool(args_opt.full_batch), + eod_id=args_opt.eod_id, device_num=device_num, rank=rank, + column_name=args_opt.data_column_name, epoch=epoch_num, + num_samples=args_opt.eval_steps * config.batch_size) + ppl_metric = PPLMetric(config.seq_length) + model = Model(pangu_alpha_with_grads, eval_network=pangu_alpha_with_loss, metrics={"ppl": ppl_metric}) + callback.append(EvalCallBack(model, ds_eval, ppl_metric)) + else: + model = Model(pangu_alpha_with_grads) if args_opt.incremental_training: from mindspore.train.serialization import load_distributed_checkpoint - strategy = model.infer_train_layout(train_dataset=ds, sink_size=callback_size) + strategy = model.infer_train_layout(train_dataset=ds, sink_size=args_opt.sink_size) print("======start load_distributed checkpoint", flush=True) # For 2.6B and 13B models, the number of ckpt files is 512. - ckpt_name = 'filerted' - ckpt_file_list = [os.path.join(args_opt.load_ckpt_path, f"{ckpt_name}_{ckpt_rank}.ckpt") for ckpt_rank in + ckpt_file_list = [os.path.join(args_opt.load_ckpt_path, f"filerted_{ckpt_rank}.ckpt") for ckpt_rank in range(0, 512)] print(f"Loading from path {ckpt_file_list[0]}", flush=True) - # Load checkpoint files load_distributed_checkpoint(model.train_network, ckpt_file_list, strategy) print("Dataset size: {}, actual_epoch_num: {}".format(ds.get_dataset_size(), actual_epoch_num), flush=True) - model.train(actual_epoch_num, ds, callbacks=callback, sink_size=callback_size, dataset_sink_mode=True) - + model.train(actual_epoch_num, ds, callbacks=callback, sink_size=args_opt.sink_size, dataset_sink_mode=True) def run_train_pipeline(args_opt): r""" @@ -224,12 +180,9 @@ def run_train_pipeline(args_opt): print("rank_id is {}, device_num is {}".format(rank_id, device_num)) context.reset_auto_parallel_context() context.set_auto_parallel_context( - parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, - gradients_mean=False, - full_batch=bool(args_opt.full_batch), - loss_repeated_mean=True, - device_num=device_num, - enable_parallel_optimizer=bool(args_opt.optimizer_shard), + parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL, gradients_mean=False, + full_batch=bool(args_opt.full_batch), loss_repeated_mean=True, + device_num=device_num, enable_parallel_optimizer=bool(args_opt.optimizer_shard), pipeline_stages=args_opt.stage_num) set_algo_parameters(elementwise_op_strategy_follow=True) _set_multi_subgraphs() @@ -238,13 +191,18 @@ def run_train_pipeline(args_opt): device_num = 1 # copy data from the cloud to the /cache/Data cache_url = '/cache/Data/' + eval_cache_url = '/cache/EvalData/' if args_opt.offline: cache_url = args_opt.data_url + eval_cache_url = args_opt.eval_data_url else: download_data(src_data_url=args_opt.data_url, tgt_data_path=cache_url, rank=rank_id) + download_data(src_data_url=args_opt.eval_data_url, tgt_data_path=eval_cache_url, rank=rank_id) model_parallel_num = args_opt.op_level_model_parallel_num stage_device_num = int(device_num / args_opt.stage_num) data_parallel_num = int(stage_device_num / model_parallel_num) + if data_parallel_num <= 1 and args_opt.optimizer_shard == 1: + raise ValueError("The dp must large than 1 when applying optimizer shard.") per_batch_size = args_opt.per_batch_size batch_size = per_batch_size * data_parallel_num * args_opt.micro_size config = PANGUALPHAConfig( @@ -267,8 +225,8 @@ def run_train_pipeline(args_opt): print("===config is: ", config, flush=True) pangu_alpha = PanguAlpha(config) loss = CrossEntropyLoss(config) - pangu_alpha_with_loss = PipelineCell(PanguAlphaWithLoss(config, pangu_alpha, loss), config.micro_size) - pangu_alpha_with_loss = _VirtualDatasetCell(pangu_alpha_with_loss) + pangu_alpha_with_loss_net = PipelineCell(PanguAlphaWithLoss(config, pangu_alpha, loss), config.micro_size) + pangu_alpha_with_loss = _VirtualDatasetCell(pangu_alpha_with_loss_net) print("=====args_opt is: ", args_opt, flush=True) lr = LearningRate(learning_rate=args_opt.start_lr, end_learning_rate=args_opt.end_lr, warmup_steps=args_opt.warmup_step, decay_steps=args_opt.decay_steps) @@ -294,6 +252,8 @@ def run_train_pipeline(args_opt): update_cell = DynamicLossScaleUpdateCell(loss_scale_value=loss_scale_value, scale_factor=2, scale_window=1000) pangu_alpha_with_grads = PanguAlphaTrainPipelineWithLossScaleCell( pangu_alpha_with_loss, optimizer=optimizer, config=config, scale_update_cell=update_cell) + if args_opt.train_and_eval_mode: + raise ValueError("The pipeline train_and_eval_mode is not supported yet") model = Model(pangu_alpha_with_grads) model.train(actual_epoch_num, ds, callbacks=callback, sink_size=callback_size, dataset_sink_mode=True) diff --git a/model_zoo/official/nlp/q8bert/src/q8bert.py b/model_zoo/official/nlp/q8bert/src/q8bert.py index c6549b30f84..e752e5d97ed 100644 --- a/model_zoo/official/nlp/q8bert/src/q8bert.py +++ b/model_zoo/official/nlp/q8bert/src/q8bert.py @@ -212,12 +212,9 @@ class BertTrainWithLossScaleCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) class BertTrainCell(nn.Cell): @@ -271,8 +268,8 @@ class BertTrainCell(nn.Cell): # apply grad reducer on grads grads = self.grad_reducer(grads) grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads) - succ = self.optimizer(grads) - return F.depend(loss, succ) + self.optimizer(grads) + return loss class BertNetworkWithLoss_td(nn.Cell): @@ -451,12 +448,9 @@ class BertEvaluationWithLossScaleCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond) class BertEvaluationCell(nn.Cell): @@ -507,5 +501,5 @@ class BertEvaluationCell(nn.Cell): # apply grad reducer on grads grads = self.grad_reducer(grads) grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads) - succ = self.optimizer(grads) - return F.depend(loss, succ) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py b/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py index 3b1468fd41d..c2e8f9f91a3 100644 --- a/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py +++ b/model_zoo/official/nlp/tinybert/src/tinybert_for_gd_td.py @@ -285,12 +285,9 @@ class BertTrainWithLossScaleCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) class BertTrainCell(nn.Cell): """ @@ -343,8 +340,8 @@ class BertTrainCell(nn.Cell): # apply grad reducer on grads grads = self.grad_reducer(grads) grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads) - succ = self.optimizer(grads) - return F.depend(loss, succ) + self.optimizer(grads) + return loss class BertNetworkWithLoss_td(nn.Cell): """ @@ -551,12 +548,9 @@ class BertEvaluationWithLossScaleCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) class BertEvaluationCell(nn.Cell): @@ -606,5 +600,5 @@ class BertEvaluationCell(nn.Cell): # apply grad reducer on grads grads = self.grad_reducer(grads) grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads) - succ = self.optimizer(grads) - return F.depend(loss, succ) + self.optimizer(grads) + return loss diff --git a/model_zoo/official/nlp/transformer/src/transformer_for_train.py b/model_zoo/official/nlp/transformer/src/transformer_for_train.py index 05555bf2df6..8fa2ce1a227 100644 --- a/model_zoo/official/nlp/transformer/src/transformer_for_train.py +++ b/model_zoo/official/nlp/transformer/src/transformer_for_train.py @@ -187,8 +187,8 @@ class TransformerTrainOneStepCell(nn.TrainOneStepCell): grads = self.hyper_map(F.partial(clip_grad, GRADIENT_CLIP_TYPE, GRADIENT_CLIP_VALUE), grads) # apply grad reducer on grads grads = self.grad_reducer(grads) - succ = self.optimizer(grads) - return F.depend(loss, succ) + self.optimizer(grads) + return loss grad_scale = C.MultitypeFuncGraph("grad_scale") @@ -277,12 +277,9 @@ class TransformerTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell) overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) cast = P.Cast() @@ -444,9 +441,7 @@ class TransformerTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell): accu_overflow = self.select(overflow, self.one, self.zero) self.accu_overflow = self.select(is_accu_step, accu_overflow, self.zero) - if is_accu_step: - succ = False - else: + if not is_accu_step: # apply grad reducer on grads grads = self.grad_reducer(self.accu_grads) scaling = scaling_sens * self.degree * self.accumulation_steps @@ -463,10 +458,7 @@ class TransformerTrainAccumulationAllReducePostWithLossScaleCell(nn.Cell): overflow = self.reshape(overflow, (())) if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, overflow) - if overflow: - succ = False - else: - succ = self.optimizer(grads) + if not overflow: + self.optimizer(grads) - ret = (mean_loss, overflow, scaling_sens) - return F.depend(ret, succ) + return (mean_loss, overflow, scaling_sens) diff --git a/model_zoo/official/recommend/ncf/src/ncf.py b/model_zoo/official/recommend/ncf/src/ncf.py index 6a9bb21059f..c48af973ca7 100644 --- a/model_zoo/official/recommend/ncf/src/ncf.py +++ b/model_zoo/official/recommend/ncf/src/ncf.py @@ -20,7 +20,6 @@ from mindspore.nn.layer.activation import get_activation import mindspore.common.dtype as mstype from mindspore.ops import operations as P from mindspore.common.initializer import initializer -from mindspore.ops import functional as F from mindspore.ops import composite as C from mindspore.context import ParallelMode from mindspore.nn.wrap.grad_reducer import DistributedGradReducer @@ -261,7 +260,8 @@ class TrainStepWrap(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class PredictWithSigmoid(nn.Cell): diff --git a/model_zoo/official/rl/dqn/README.md b/model_zoo/official/rl/dqn/README.md index 5149708cddd..5731d24741b 100644 --- a/model_zoo/official/rl/dqn/README.md +++ b/model_zoo/official/rl/dqn/README.md @@ -34,8 +34,8 @@ The overall network architecture of DQN is show below: - Framework - [MindSpore](https://www.mindspore.cn/install/en) - For more information, please check the resources below: - - [MindSpore Tutorials](https://www.mindspore.cn/tutorials/en/master/index.html) - - [MindSpore Python API](https://www.mindspore.cn/docs/api/en/master/index.html) + - [MindSpore Tutorials](https://www.mindspore.cn/tutorial/training/en/master/index.html) + - [MindSpore Python API](https://www.mindspore.cn/doc/api_python/en/master/index.html) - third-party libraries @@ -50,6 +50,7 @@ pip install gym ```python ├── dqn ├── README.md # descriptions about DQN + ├── README_CH.md # descriptions about DQN in Chinese ├── scripts │ ├──run_standalone_eval_ascend.sh # shell script for evaluation with Ascend │ ├──run_standalone_eval_gpu.sh # shell script for evaluation with GPU @@ -86,7 +87,7 @@ pip install gym GPU: python train.py --device_target GPU --ckpt_path ckpt > log.txt 2>&1 & shell: - Ascend:bash run_standalone_train_ascend.sh ckpt + Ascend: bash run_standalone_train_ascend.sh ckpt GPU: bash run_standalone_train_gpu.sh ckpt ``` @@ -95,29 +96,29 @@ pip install gym ```shell # evaluat example python - Ascend: python eval.py --device_target Ascend --ckpt_path .ckpt/checkpoint_dqn.ckpt - GPU: python eval.py --device_target GPU --ckpt_path .ckpt/checkpoint_dqn.ckpt + Ascend: python eval.py --device_target Ascend --ckpt_path ./ckpt/dqn.ckpt + GPU: python eval.py --device_target GPU --ckpt_path ./ckpt/dqn.ckpt shell: - Ascend: bash run_standalone_eval_ascend.sh .ckpt/checkpoint_dqn.ckpt - GPU: bash run_standalone_eval_gpu.sh .ckpt/checkpoint_dqn.ckpt + Ascend: bash run_standalone_eval_ascend.sh ./ckpt/dqn.ckpt + GPU: bash run_standalone_eval_gpu.sh ./ckpt/dqn.ckpt ``` ## [Performance](#content) ### Inference Performance -| Parameters | DQN | -| -------------------------- | ----------------------------------------------------------- | -| Resource | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 | -| uploaded Date | 03/10/2021 (month/day/year) | -| MindSpore Version | 1.1.0 | -| Training Parameters | batch_size = 512, lr=0.001 | -| Optimizer | RMSProp | -| Loss Function | MSELoss | -| outputs | probability | -| Params (M) | 7.3k | -| Scripts | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/rl/dqn | +| parameter | Ascend |GPU | +| -------------------------- | ------------------------------------------------------- | ----------------------------------------------------------- | +| Resource | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 |GPU | +| uploaded Date | 03/10/2021 (month/day/year) | 07/28/2021 (month/day/year) | +| MindSpore Version | 1.1.0 | 1.2.0 | +| Training Parameters | batch_size = 512, lr=0.001 | batch_size = 32, lr=0.01 | +| Optimizer | RMSProp |Adam | +| Loss Function | MSELoss |MSELoss | +| outputs | Reward | Reward | +| Params (M) | 7.3k | 7.3k | +| Scripts | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/rl/dqn | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/rl/dqn | ## [Description of Random Situation](#content) @@ -125,4 +126,4 @@ We use random seed in train.py. ## [ModeZoo Homepage](#contents) -Please check the official [homepage](https://gitee.com/mindspore/mindspore/tree/master/model_zoo). +Please check the official [homepage](https://gitee.com/mindspore/mindspore/tree/master/model_zoo). \ No newline at end of file diff --git a/model_zoo/official/rl/dqn/README_CN.md b/model_zoo/official/rl/dqn/README_CN.md index 8e014d50cd5..6fca820d493 100644 --- a/model_zoo/official/rl/dqn/README_CN.md +++ b/model_zoo/official/rl/dqn/README_CN.md @@ -35,10 +35,10 @@ DQN网络的模型结构è§è®ºæ–‡ï¼š - 硬件 - Ascend或GPU处ç†å™¨ - 框架 - - [MindSpore](https://www.mindspore.cn/install/) + - [MindSpore](https://www.mindspore.cn/install/en) - 通过下é¢ç½‘å€å¯ä»¥èŽ·å¾—更多信æ¯ï¼š - - [MindSpore Tutorials](https://www.mindspore.cn/tutorials/zh-CN/master/index.html) - - [MindSpore Python API](https://www.mindspore.cn/docs/api/zh-CN/master/index.html) + - [MindSpore Tutorials](https://www.mindspore.cn/tutorial/training/en/master/index.html) + - [MindSpore Python API](https://www.mindspore.cn/doc/api_python/en/master/index.html) - 第三方库 @@ -115,7 +115,7 @@ pip install gym | æŸå¤±å‡½æ•° | MSELoss | MSELoss | | 输出 | 游æˆå¾—分值 | 游æˆå¾—分值 | | å‚æ•°é‡(M) | 7.3k | 7.3k | -| 脚本 | <<<>>> | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/rl/dqn | +| 脚本 | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/rl/dqn | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/official/rl/dqn | # éšæœºæƒ…况æè¿° diff --git a/model_zoo/official/rl/dqn/eval.py b/model_zoo/official/rl/dqn/eval.py index 7f61abf4772..d222f0ea802 100644 --- a/model_zoo/official/rl/dqn/eval.py +++ b/model_zoo/official/rl/dqn/eval.py @@ -19,23 +19,30 @@ import gym from mindspore import context from mindspore.common import set_seed from mindspore.train.serialization import load_checkpoint, load_param_into_net + +from src.config_gpu import config_dqn as cfg_gpu from src.config import config_dqn as cfg from src.agent import Agent parser = argparse.ArgumentParser(description='MindSpore dqn Example') -parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU'], +parser.add_argument('--device_target', type=str, default='Ascend', choices=['Ascend', 'GPU'], help='device where the code will be implemented (default: Ascend)') parser.add_argument('--ckpt_path', type=str, default=None, help='if is test, must provide\ path where the trained ckpt file') args = parser.parse_args() set_seed(1) - if __name__ == "__main__": context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) - env = gym.make('CartPole-v1') + if args.device_target == 'GPU': + cfg = cfg_gpu + + env = gym.make(cfg.game) + env = env.unwrapped cfg.state_space_dim = env.observation_space.shape[0] cfg.action_space_dim = env.action_space.n + cfg.env_a_shape = 0 if isinstance(env.action_space.sample(), + int) else env.action_space.sample().shape # to confirm the shape agent = Agent(**cfg) # load checkpoint @@ -46,22 +53,25 @@ if __name__ == "__main__": raise ValueError("Load param into net fail!") score = 0 - agent.load_dict() - for episode in range(50): - s0 = env.reset() - total_reward = 1 + for episode in range(cfg.EPOCH): + s = env.reset() + ep_r = 0 while True: - a0 = agent.eval_act(s0) - s1, r1, done, _ = env.step(a0) + a, flag = agent.act(s) + s_, r, done, _ = env.step(a) - if done: - r1 = -1 + # modify the reward + x, x_dot, theta, theta_dot = s_ + r1 = (env.x_threshold - abs(x)) / env.x_threshold - 0.8 + r2 = (env.theta_threshold_radians - abs(theta)) / env.theta_threshold_radians - 0.5 + r = r1 + r2 + ep_r += r if done: break + s = s_ - total_reward += r1 - s0 = s1 - score += total_reward - print("episode", episode, "total_reward", total_reward) - print("mean_reward", score/50) + score += ep_r + print("episode", episode, "total_reward", ep_r) + print("mean_reward", score / cfg.EPOCH) + \ No newline at end of file diff --git a/model_zoo/official/rl/dqn/scripts/run_standalone_train_gpu.sh b/model_zoo/official/rl/dqn/scripts/run_standalone_train_gpu.sh index f4bc6545126..bab8ab781cf 100755 --- a/model_zoo/official/rl/dqn/scripts/run_standalone_train_gpu.sh +++ b/model_zoo/official/rl/dqn/scripts/run_standalone_train_gpu.sh @@ -17,5 +17,4 @@ # an simple tutorial as follows, more parameters can be setting script_self=$(readlink -f "$0") self_path=$(dirname "${script_self}") -CKPT_PATH=$1 -python -s ${self_path}/../train.py --device_target="GPU" --ckpt_path=$CKPT_PATH > log.txt 2>&1 & +python -s ${self_path}/../train.py --device_target="GPU" > log.txt 2>&1 & diff --git a/model_zoo/official/rl/dqn/src/agent.py b/model_zoo/official/rl/dqn/src/agent.py index c76841cdb58..16aeb479d75 100644 --- a/model_zoo/official/rl/dqn/src/agent.py +++ b/model_zoo/official/rl/dqn/src/agent.py @@ -14,14 +14,14 @@ # ============================================================================ """Agent of reinforcement learning network""" -import random import math import numpy as np import mindspore.nn as nn -from mindspore import Tensor import mindspore.common.dtype as mstype -from src.dqn import DQN, WithLossCell +from mindspore import Tensor, load_param_into_net +from mindspore.ops import operations as P +from src.dqn import DQN, WithLossCell class Agent: """ @@ -30,65 +30,93 @@ class Agent: def __init__(self, **kwargs): for key, value in kwargs.items(): setattr(self, key, value) - self.policy_net = DQN(self.state_space_dim, 256, self.action_space_dim) - self.target_net = DQN(self.state_space_dim, 256, self.action_space_dim) - self.optimizer = nn.RMSProp(self.policy_net.trainable_params(), learning_rate=self.lr) - loss_fn = nn.MSELoss() - loss_q_net = WithLossCell(self.policy_net, loss_fn) - self.policy_net_train = nn.TrainOneStepCell(loss_q_net, self.optimizer) - self.policy_net_train.set_train(mode=True) - self.buffer = [] + self.policy_net = DQN(self.state_space_dim, self.hidden_size, self.action_space_dim) + self.target_net = DQN(self.state_space_dim, self.hidden_size, self.action_space_dim) + self.policy_net.training = True + self.policy_net.requires_grad = True + self.learn_step_counter = 0 # for target updating + self.memory_counter = 0 # for storing memory + self.memory = np.zeros((self.memory_capacity, self.state_space_dim * 2 + 2)) # initialize memory + if self.dev == 'Ascend': + self.optimizer = nn.RMSProp(self.policy_net.trainable_params(), learning_rate=self.lr) + else: + self.optimizer = nn.Adam(self.policy_net.trainable_params(), learning_rate=self.lr) + self.loss_func = nn.MSELoss() + self.loss_net = WithLossCell(self.policy_net, self.loss_func) + self.train_net = nn.TrainOneStepCell(self.loss_net, self.optimizer) + self.train_net.set_train() + self.steps = 0 - def act(self, s0): + self.cast = P.Cast() + self.expand = P.ExpandDims() + self.reshape = P.Reshape() + self.argmax = P.ArgMaxWithValue(axis=1, keep_dims=True) + self.gather = P.GatherD() + + def act(self, x): """ - Agent choose action. + get action """ self.steps += 1 - epsi = self.epsi_low + (self.epsi_high - self.epsi_low) * (math.exp(-1.0 * self.steps / self.decay)) - if random.random() < epsi: - a0 = random.randrange(self.action_space_dim) + if self.dev == 'GPU': + epsilon = self.epsi_high else: - s0 = np.expand_dims(s0, axis=0) - s0 = Tensor(s0, mstype.float32) - a0 = self.policy_net(s0).asnumpy() - a0 = np.argmax(a0) - return a0 + epsilon = self.epsi_low + (self.epsi_high - self.epsi_low) * (math.exp(-1.0 * self.steps / self.decay)) + flag_com = False + if np.random.uniform() < epsilon: + x = Tensor(x, mstype.float32) + x = self.expand(x, 0) + actions_value = self.policy_net.construct(x) + action = actions_value.asnumpy() + action = np.argmax(action) + flag_com = True + else: # random + action = np.random.randint(0, self.action_space_dim) + action = action if self.env_a_shape == 0 else self.reshape(action, self.env_a_shape) + return action, flag_com - def eval_act(self, s0): - self.steps += 1 - s0 = np.expand_dims(s0, axis=0) - s0 = Tensor(s0, mstype.float32) - a0 = self.policy_net(s0).asnumpy() - a0 = np.argmax(a0) - return a0 + def eval_act(self, x): + """ + choose action in eval + """ + x = Tensor(x, mstype.float32) + x = self.expand(x, 0) + actions_value = self.policy_net.construct(x) + action = actions_value.asnumpy() + action = np.argmax(action) + return action - def put(self, *transition): - if len(self.buffer) == self.capacity: - self.buffer.pop(0) - self.buffer.append(transition) - - def load_dict(self): - for target_item, source_item in zip(self.target_net.parameters_dict(), self.policy_net.parameters_dict()): - target_param = self.target_net.parameters_dict()[target_item] - source_param = self.policy_net.parameters_dict()[source_item] - target_param.set_data(source_param.data) + def store_transition(self, s, a, r, s_): + """ + store transition + """ + transition = np.hstack((s, [a, r], s_)) + index = self.memory_counter % self.memory_capacity + self.memory[index, :] = transition + self.memory_counter += 1 def learn(self): """ Agent learn from experience data. """ - if (len(self.buffer)) < self.batch_size: - return - samples = random.sample(self.buffer, self.batch_size) - s0, a0, r1, s1 = zip(*samples) - s1 = Tensor(s1, mstype.float32) - s0 = Tensor(s0, mstype.float32) - a0 = Tensor(np.expand_dims(a0, axis=1)) - next_state_values = self.target_net(s1).asnumpy() - next_state_values = np.max(next_state_values, axis=1) + if self.learn_step_counter % self.target_replace_iter == 0: + load_param_into_net(self.target_net, self.policy_net.parameters_dict()) - y_true = r1 + self.gamma * next_state_values - y_true = Tensor(np.expand_dims(y_true, axis=1), mstype.float32) - self.policy_net_train(s0, a0, y_true) + self.learn_step_counter += 1 + + sample_index = np.random.choice(self.memory_capacity, self.batch_size) + + b_memory = self.memory[sample_index, :] + b_s = Tensor(b_memory[:, :self.state_space_dim], mstype.float32) + b_a = Tensor(b_memory[:, self.state_space_dim:self.state_space_dim + 1].astype(int), mstype.int32) + b_r = Tensor(b_memory[:, self.state_space_dim + 1:self.state_space_dim + 2], mstype.float32) + b_s_ = Tensor(b_memory[:, -self.state_space_dim:], mstype.float32) + + q_next = self.target_net(b_s_) + q_next_numpy = q_next.asnumpy() + tem_ = Tensor(np.max(q_next_numpy, axis=1).reshape(-1, 1)) + q_target = b_r + self.gamma * tem_ + self.train_net(b_s, q_target, b_a) + \ No newline at end of file diff --git a/model_zoo/official/rl/dqn/src/config.py b/model_zoo/official/rl/dqn/src/config.py index 6d7a7ef53f4..6f4efaed994 100644 --- a/model_zoo/official/rl/dqn/src/config.py +++ b/model_zoo/official/rl/dqn/src/config.py @@ -19,13 +19,20 @@ network config setting, will be used in train.py and eval.py from easydict import EasyDict as edict config_dqn = edict({ + 'dev': 'Ascend', 'gamma': 0.8, 'epsi_high': 0.9, 'epsi_low': 0.05, - 'decay': 200, 'lr': 0.001, 'capacity': 100000, 'batch_size': 512, + 'target_replace_iter': 100, + 'memory_capacity': 2000, + 'game': 'CartPole-v1', 'state_space_dim': 4, - 'action_space_dim': 2 + 'action_space_dim': 2, + 'env_a_shape': 0, + 'hidden_size': 256, + 'decay': 200, + 'EPOCH': 50 }) diff --git a/model_zoo/official/rl/dqn/src/dqn.py b/model_zoo/official/rl/dqn/src/dqn.py index 1a3e0b2dd89..5d5dfd60843 100644 --- a/model_zoo/official/rl/dqn/src/dqn.py +++ b/model_zoo/official/rl/dqn/src/dqn.py @@ -17,8 +17,10 @@ import mindspore.nn as nn import mindspore.ops as ops - -class DQN(nn. Cell): +class DQN(nn.Cell): + """ + DQN net + """ def __init__(self, input_size, hidden_size, output_size): super(DQN, self).__init__() self.linear1 = nn.Dense(input_size, hidden_size) @@ -26,6 +28,9 @@ class DQN(nn. Cell): self.relu = nn.ReLU() def construct(self, x): + """ + model construct + """ x = self.relu(self.linear1(x)) return self.linear2(x) @@ -40,8 +45,12 @@ class WithLossCell(nn.Cell): self._loss_fn = loss_fn self.gather = ops.GatherD() - def construct(self, x, act, label): + def construct(self, x, label, index): + """ + compute loss + """ out = self._backbone(x) - out = self.gather(out, 1, act) + out = self.gather(out, 1, index) loss = self._loss_fn(out, label) return loss + \ No newline at end of file diff --git a/model_zoo/official/rl/dqn/train.py b/model_zoo/official/rl/dqn/train.py index 435c960a171..40a1234028a 100644 --- a/model_zoo/official/rl/dqn/train.py +++ b/model_zoo/official/rl/dqn/train.py @@ -16,57 +16,93 @@ import os import argparse +import timeit import gym +import numpy as np from mindspore import context from mindspore.common import set_seed from mindspore.train.serialization import save_checkpoint from src.config import config_dqn as cfg +from src.config_gpu import config_dqn as cfg_gpu from src.agent import Agent parser = argparse.ArgumentParser(description='MindSpore dqn Example') -parser.add_argument('--device_target', type=str, default="Ascend", choices=['Ascend', 'GPU'], +parser.add_argument('--device_target', type=str, default='Ascend', choices=['Ascend', 'GPU'], help='device where the code will be implemented (default: Ascend)') parser.add_argument('--ckpt_path', type=str, default="./ckpt", help='if is test, must provide\ path where the trained ckpt file') args = parser.parse_args() set_seed(1) +def save_ckpt(path, model, ckpt_name): + """ + save ckpt file + """ + if not os.path.exists(path): + os.makedirs(path) + + ckpt_name = path + ckpt_name + save_checkpoint(model, ckpt_name) + if __name__ == "__main__": context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) if args.device_target == 'GPU': - # Enable graph kernel - context.set_context(enable_graph_kernel=True, graph_kernel_flags="--enable_parallel_fusion") - env = gym.make('CartPole-v1') + cfg = cfg_gpu + context.set_context(device_id=1) + + env = gym.make(cfg.game) + env = env.unwrapped cfg.state_space_dim = env.observation_space.shape[0] cfg.action_space_dim = env.action_space.n + cfg.env_a_shape = 0 if isinstance(env.action_space.sample(), + int) else env.action_space.sample().shape agent = Agent(**cfg) - agent.load_dict() - for episode in range(300): - s0 = env.reset() + rewards = [] + count = 0 + times = [] + + print('\nCollecting experience...') + for episode in range(400): + s = env.reset() total_reward = 1 + ep_r = 0 while True: - a0 = agent.act(s0) - s1, r1, done, _ = env.step(a0) + start = timeit.default_timer() + a, flag = agent.act(s) + s_, r, done_, _ = env.step(a) - if done: - r1 = -1 + # modify the reward + x, x_dot, theta, theta_dot = s_ + r1 = (env.x_threshold - abs(x)) / env.x_threshold - 0.8 + r2 = (env.theta_threshold_radians - abs(theta)) / env.theta_threshold_radians - 0.5 + r = r1 + r2 - agent.put(s0, a0, r1, s1) + if flag: + end = timeit.default_timer() + differences = end - start + times.append(differences) + count += 1 + # pass - if done: + agent.store_transition(s, a, r, s_) + ep_r += r + if agent.memory_counter > cfg.memory_capacity: + agent.learn() + if done_: + print("episode", episode, "total_reward", round(ep_r, 2)) + rewards.append(round(ep_r, 2)) + if done_: break + s = s_ + env.close() + save_ckpt(os.path.realpath(args.ckpt_path), agent.policy_net, "/dqn.ckpt") + rewards_numpy = np.array(rewards) - total_reward += r1 - s0 = s1 - agent.learn() - agent.load_dict() - print("episode", episode, "total_reward", total_reward) + times.remove(min(times)) + times.remove(max(times)) + times_numpy = np.array(times) - path = os.path.realpath(args.ckpt_path) - if not os.path.exists(path): - os.makedirs(path) - - ckpt_name = path + "/dqn.ckpt" - save_checkpoint(agent.policy_net, ckpt_name) + print(rewards_numpy.mean(), times_numpy.mean()) + \ No newline at end of file diff --git a/model_zoo/research/cv/AVA_cifar/src/network_define.py b/model_zoo/research/cv/AVA_cifar/src/network_define.py index 8e102cd486a..132e7033b34 100644 --- a/model_zoo/research/cv/AVA_cifar/src/network_define.py +++ b/model_zoo/research/cv/AVA_cifar/src/network_define.py @@ -15,7 +15,6 @@ """define network""" import mindspore.nn as nn -from mindspore.ops import functional as F from mindspore.ops import composite as C from mindspore import ParameterTuple from mindspore.nn.wrap.grad_reducer import DistributedGradReducer @@ -83,4 +82,5 @@ class TrainOneStepCell(nn.Cell): grads = self.grad(self.net_with_loss, weights)(data3, data2, data1, label) if self.reduce_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/research/cv/AVA_hpa/src/network_define_pretrain.py b/model_zoo/research/cv/AVA_hpa/src/network_define_pretrain.py index 1084f084168..4ab7d928e6f 100644 --- a/model_zoo/research/cv/AVA_hpa/src/network_define_pretrain.py +++ b/model_zoo/research/cv/AVA_hpa/src/network_define_pretrain.py @@ -14,7 +14,6 @@ # ============================================================================ """define pretrain network""" import mindspore.nn as nn -from mindspore.ops import functional as F from mindspore.ops import composite as C from mindspore.ops import operations as P from mindspore import ParameterTuple @@ -85,4 +84,5 @@ class TrainOneStepCell(nn.Cell): grads = self.grad(self.net_with_loss, weights)(data1, data2, data3, label) if self.reduce_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/research/cv/AVA_hpa/src/network_define_train.py b/model_zoo/research/cv/AVA_hpa/src/network_define_train.py index d5e4ad32fba..01167b1c6d6 100644 --- a/model_zoo/research/cv/AVA_hpa/src/network_define_train.py +++ b/model_zoo/research/cv/AVA_hpa/src/network_define_train.py @@ -14,7 +14,6 @@ # ============================================================================ """define training network""" import mindspore.nn as nn -from mindspore.ops import functional as F from mindspore.ops import composite as C from mindspore.ops import operations as P from mindspore import ParameterTuple @@ -84,4 +83,5 @@ class TrainOneStepCell(nn.Cell): grads = self.grad(self.net_with_loss, weights)(data, label) if self.reduce_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/research/cv/AttGAN/src/cell.py b/model_zoo/research/cv/AttGAN/src/cell.py index 5271048c6ea..ec8d9a2928d 100644 --- a/model_zoo/research/cv/AttGAN/src/cell.py +++ b/model_zoo/research/cv/AttGAN/src/cell.py @@ -116,7 +116,8 @@ class TrainOneStepCellGen(nn.Cell): grads = self.grad(self.network, weights)(img_a, att_a, att_a_, att_b, att_b_, sens) if self.reducer_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)), gf_loss, gc_loss, gr_loss + self.optimizer(grads) + return loss, gf_loss, gc_loss, gr_loss class TrainOneStepCellDis(nn.Cell): @@ -152,4 +153,5 @@ class TrainOneStepCellDis(nn.Cell): if self.reducer_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)), d_real_loss, d_fake_loss, dc_loss, df_gp + self.optimizer(grads) + return loss, d_real_loss, d_fake_loss, dc_loss, df_gp diff --git a/model_zoo/research/cv/FaceDetection/src/network_define.py b/model_zoo/research/cv/FaceDetection/src/network_define.py index 6a342119c43..0284586929a 100644 --- a/model_zoo/research/cv/FaceDetection/src/network_define.py +++ b/model_zoo/research/cv/FaceDetection/src/network_define.py @@ -138,10 +138,8 @@ class TrainOneStepWithLossScaleCell(nn.Cell): else: cond = self.less_equal(self.base, flag_sum) - opt = self.optimizer(grads) - - ret = (loss, cond, scaling_sens) - return F.depend(ret, opt) + self.optimizer(grads) + return (loss, cond, scaling_sens) class BuildTrainNetworkV2(nn.Cell): diff --git a/model_zoo/research/cv/ICNet/README.md b/model_zoo/research/cv/ICNet/README.md index c2496b09bd7..8b330c1d874 100644 --- a/model_zoo/research/cv/ICNet/README.md +++ b/model_zoo/research/cv/ICNet/README.md @@ -23,7 +23,7 @@ ICNet(Image Cascade Network) propose a full convolution network which incorporates multi-resolution branches under proper label guidance to address the challenge of real-time semantic segmentation. -[paper](https://arxiv.org/abs/1704.08545)ECCV2018 +[paper](https://arxiv.org/abs/1704.08545) from ECCV2018 # [Model Architecture](#Contents) @@ -31,7 +31,7 @@ ICNet takes cascade image inputs (i.e., low-, medium- and high resolution images # [Dataset](#Content) -used Dataset :[Cityscape Dataset Website](https://www.cityscapes-dataset.com/) +used Dataset :[Cityscape Dataset Website](https://www.cityscapes-dataset.com/) (please download 1st and 3rd zip) It contains 5,000 finely annotated images split into training, validation and testing sets with 2,975, 500, and 1,525 images respectively. @@ -64,6 +64,16 @@ It contains 5,000 finely annotated images split into training, validation and te ├── export.py # export mindir ├── postprocess.py # 310 infer calculate accuracy ├── README.md # descriptions about ICNet + ├── Res50V1_PRE # scripts for pretrain + │   ├── scripts + │   │   └── run_distribute_train.sh + │   ├── src + │   │   ├── config.py + │   │   ├── CrossEntropySmooth.py + │   │   ├── dataset.py + │   │   ├── lr_generator.py + │   │   └── resnet50_v1.py + │   └── train.py ├── scripts │   ├── run_distribute_train8p.sh # multi cards distributed training in ascend │   ├── run_eval.sh # validation script @@ -95,7 +105,7 @@ Set script parameters in src/model_utils/icnet.yaml . ```bash name: "icnet" -backbone: "resnet50" +backbone: "resnet50v1" base_size: 1024 # during augmentation, shorter size will be resized between [base_size*0.5, base_size*2.0] crop_size: 960 # end of augmentation, crop to training ``` @@ -116,9 +126,8 @@ valid_batch_size: 1 cityscapes_root: "/data/cityscapes/" # set dataset path epochs: 160 val_epoch: 1 -ckpt_dir: "./ckpt/" # ckpt and training log will be saved here mindrecord_dir: '' # set mindrecord path -pretrained_model_path: '/root/ResNet50V1B-150_625.ckpt' # set the pretrained model path correctly +pretrained_model_path: '/root/ResNet50V1B-150_625.ckpt' # use the latest checkpoint file after pre-training save_checkpoint_epochs: 5 keep_checkpoint_max: 10 ``` @@ -137,18 +146,28 @@ keep_checkpoint_max: 10 [MINDRCORD_PATH] in script should be consistent with 'mindrecord_dir' in config file. -### Distributed Training +### Pre-training -- Run distributed train in ascend processor environment +The folder Res50V1_PRE contains the scripts for pre-training and its dataset is [image net](https://image-net.org/). More details in [GENet_Res50](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/cv/GENet_Res50) + +- Usage: ```shell - bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] [PROJECT_PATH] + bash run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] ``` - Notes: The hccl.json file specified by [RANK_TABLE_FILE] is used when running distributed tasks. You can use [hccl_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools) to generate this file. +### Distributed Training + +- Run distributed train in ascend processor environment + +```shell + bash scripts/run_distribute_train8p.sh [RANK_TABLE_FILE] [PROJECT_PATH] +``` + ### Training Result The training results will be saved in the example path, The folder name starts with "ICNet-".You can find the checkpoint file and similar results below in LOG(0-7)/log.txt. @@ -174,7 +193,7 @@ epoch time: 97117.785 ms, per step time: 1044.277 ms Check the checkpoint path used for evaluation before running the following command. ```shell - bash run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] [PROJECT_PATH] + bash run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] [PROJECT_PATH] [DEVICE_ID] ``` ### Evaluation Result @@ -196,7 +215,7 @@ avgtime 0.19648232793807982 bash run_infer_310.sh [The path of the MINDIR for 310 infer] [The path of the dataset for 310 infer] 0 ``` -Note:: Before executing 310 infer, create the MINDIR/AIR model using "python export.py --ckpt-file [The path of the CKPT for exporting]". +- Note: Before executing 310 infer, create the MINDIR/AIR model using "python export.py --ckpt-file [The path of the CKPT for exporting]". # [Model Description](#Content) @@ -204,7 +223,7 @@ Note:: Before executing 310 infer, create the MINDIR/AIR model using "python exp ### Training Performance -|Parameter | MaskRCNN | +|Parameter | ICNet | | ------------------- | --------------------------------------------------------- | |resources | Ascend 910ï¼›CPU 2.60GHz, 192coreï¼›memory:755G | |Upload date |2021.6.1 | diff --git a/model_zoo/research/cv/ICNet/eval.py b/model_zoo/research/cv/ICNet/eval.py index bccbb3ed434..e2ab20fac6e 100644 --- a/model_zoo/research/cv/ICNet/eval.py +++ b/model_zoo/research/cv/ICNet/eval.py @@ -74,7 +74,6 @@ class Evaluator: mask = self._mask_transform(mask) # mask shape: (H,w) image = Tensor(image) - print(image) expand_dims = ops.ExpandDims() image = expand_dims(image, 0) @@ -84,8 +83,8 @@ class Evaluator: end_time = time.time() step_time = end_time - start_time - expand_dims = ops.ExpandDims() - mask = expand_dims(mask, 0) + output = np.array(output) + mask = np.expand_dims(mask, axis=0) self.metric.update(output, mask) list_time.append(step_time) diff --git a/model_zoo/research/cv/ICNet/scripts/run_eval.sh b/model_zoo/research/cv/ICNet/scripts/run_eval.sh index 74495640f9a..396d49719d2 100644 --- a/model_zoo/research/cv/ICNet/scripts/run_eval.sh +++ b/model_zoo/research/cv/ICNet/scripts/run_eval.sh @@ -14,9 +14,9 @@ # limitations under the License. # ============================================================================ -if [ $# != 3 ] +if [ $# != 4 ] then - echo "Usage: bash run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] [PROJECT_PATH]" + echo "Usage: bash run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] [PROJECT_PATH] [DEVICE_ID]" exit 1 fi @@ -53,7 +53,7 @@ fi ulimit -u unlimited export DEVICE_NUM=1 -export DEVICE_ID=0 +export DEVICE_ID=$4 export RANK_SIZE=1 export RANK_ID=0 @@ -68,6 +68,6 @@ cp -r ../src ./eval cd ./eval || exit env > env.log echo "start evaluation for device $DEVICE_ID" -python eval.py --dataset_path=$PATH1 --checkpoint_path=$PATH2 --project_path=$PATH3 &> log & +python eval.py --dataset_path=$PATH1 --checkpoint_path=$PATH2 --project_path=$PATH3 --device=$4 &> log & cd .. diff --git a/model_zoo/research/cv/ICNet/src/model_utils/icnet.yaml b/model_zoo/research/cv/ICNet/src/model_utils/icnet.yaml index 9fc8d38a8a5..649ff114b8d 100644 --- a/model_zoo/research/cv/ICNet/src/model_utils/icnet.yaml +++ b/model_zoo/research/cv/ICNet/src/model_utils/icnet.yaml @@ -1,7 +1,7 @@ ### 1.Model model: name: "icnet" - backbone: "resnet50" + backbone: "resnet50v1" base_size: 1024 # during augmentation, shorter size will be resized between [base_size*0.5, base_size*2.0] crop_size: 960 # end of augmentation, crop to training diff --git a/model_zoo/research/cv/IPT/src/loss.py b/model_zoo/research/cv/IPT/src/loss.py index 30ae4ea9f85..11a3a986ae9 100644 --- a/model_zoo/research/cv/IPT/src/loss.py +++ b/model_zoo/research/cv/IPT/src/loss.py @@ -144,12 +144,9 @@ class IPTTrainOneStepWithLossScaleCell(nn.TrainOneStepWithLossScaleCell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) class SupConLoss(nn.Cell): diff --git a/model_zoo/research/cv/IPT/src/utils.py b/model_zoo/research/cv/IPT/src/utils.py index 9928281a0c7..e2d77b0d887 100644 --- a/model_zoo/research/cv/IPT/src/utils.py +++ b/model_zoo/research/cv/IPT/src/utils.py @@ -23,7 +23,6 @@ from mindspore.common import dtype as mstype from mindspore.context import ParallelMode from mindspore.ops import operations as P from mindspore.ops import composite as C -from mindspore.ops import functional as F from mindspore.nn.wrap.grad_reducer import DistributedGradReducer from mindspore.parallel._utils import _get_parallel_mode from mindspore.train.serialization import save_checkpoint @@ -82,7 +81,8 @@ class MyTrainOneStepCell(nn.Cell): grads = self.grad(self.network, weights)(*args, sens) if self.reducer_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss def sub_mean(x): diff --git a/model_zoo/research/cv/LearningToSeeInTheDark/src/myutils.py b/model_zoo/research/cv/LearningToSeeInTheDark/src/myutils.py index 12f118deb17..428e7ae5819 100644 --- a/model_zoo/research/cv/LearningToSeeInTheDark/src/myutils.py +++ b/model_zoo/research/cv/LearningToSeeInTheDark/src/myutils.py @@ -225,11 +225,7 @@ class GNMTTrainOneStepWithLossScaleCell(nn.Cell): if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) + if not overflow: + self.optimizer(grads) self.loss_scalar("loss", loss) - ret = (loss, cond, scaling_sens) - - return F.depend(ret, succ) + return (loss, cond, scaling_sens) diff --git a/model_zoo/research/cv/MaskedFaceRecognition/model/model.py b/model_zoo/research/cv/MaskedFaceRecognition/model/model.py index df7ec1f42fe..15d38021b9e 100644 --- a/model_zoo/research/cv/MaskedFaceRecognition/model/model.py +++ b/model_zoo/research/cv/MaskedFaceRecognition/model/model.py @@ -22,7 +22,6 @@ from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits, L1Loss from mindspore.nn import Momentum from mindspore.ops import operations as P from mindspore.ops import composite as C -from mindspore.ops import functional as F from mindspore.common.initializer import HeNormal from mindspore.common.initializer import Normal from mindspore import Tensor @@ -382,7 +381,8 @@ class TrainStepWrap(nn.Cell): if not self.is_train: return loss grads = self.grad(self.network, weights)(x, labels1, labels2) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class TestStepWrap(nn.Cell): diff --git a/model_zoo/research/cv/ProtoNet/README.md b/model_zoo/research/cv/ProtoNet/README.md index 741c6e3889b..e424789b6a6 100644 --- a/model_zoo/research/cv/ProtoNet/README.md +++ b/model_zoo/research/cv/ProtoNet/README.md @@ -29,7 +29,12 @@ Proto-Net contains 2 parts named Encoder and Relation. The former one has 4 conv Note that you can run the scripts based on the dataset mentioned in original paper or widely used in relevant domain/network architecture. In the following sections, we will introduce how to run the scripts using the related dataset below. -Dataset used: [omniglot](https://github.com/brendenlake/omniglot) +The dataset omniglot can be obtained from (https://github.com/orobix/Prototypical-Networks-for-Few-shot-Learning-PyTorch/blob/master/). You can obtain the dataset after running the scripts. + +```bash +cd src +python train.py +``` - Dataset size 4.02M,32462 28*28 in 1622 classes - Train 1,200 classes @@ -39,7 +44,7 @@ Dataset used: [omniglot](https://github.com/brendenlake/omniglot) - The directory structure is as follows: -```text +```shell └─Data ├─raw ├─spilts @@ -67,13 +72,13 @@ Dataset used: [omniglot](https://github.com/brendenlake/omniglot) After installing MindSpore via the official website, you can start training and evaluation as follows: -```shell -# enter script dir, train ProtoNet in standalone -sh run_standalone_train_ascend.sh dataset 1 20 20 -# enter script dir, train ProtoNet in distribution -sh run_distribution_ascend.sh dataset rank_table dataset 20 +```python +# enter script dir, train ProtoNet +sh run_standalone_train_ascend.sh "../dataset" 1 60 500 # enter script dir, evaluate ProtoNet -sh run_standalone_eval_ascend.sh dataset best.ckpt 1 20 +sh run_standalone_eval_ascend.sh "../dataset" "./output/best_ck.ckpt" 1 5 +# enter script dir, train ProtoNet distributed +sh run_distribution_ascend.sh "./rank_table.json" "../dataset" 60 500 ``` ## [Script and Sample Code](#contents) @@ -120,8 +125,7 @@ Major parameters in train.py and config.py as follows: ### Training ```bash -# enter script dir, train ProtoNet in standalone -sh run_standalone_train_ascend.sh dataset 1 20 20 +sh run_standalone_train_ascend.sh "../dataset" 1 60 500 ``` The model checkpoint will be saved in the current directory. @@ -133,11 +137,11 @@ The model checkpoint will be saved in the current directory. Before running the command below, please check the checkpoint path used for evaluation. ```bash -# enter script dir, evaluate ProtoNet -sh run_standalone_eval_ascend.sh dataset best.ckpt 1 20 +sh run_standalone_eval_ascend.sh "../dataset" "./output/best_ck.ckpt" 1 5 ``` -```text +```shell + Test Acc: 0.9954400658607483 Loss: 0.02102319709956646 ``` @@ -149,9 +153,9 @@ Test Acc: 0.9954400658607483 Loss: 0.02102319709956646 | Parameters | ProtoNet | | -------------------------- | ---------------------------------------------------------- | -| Resource | CentOs 8.2; Ascend 910; CPU 2.60GHz; 192cores; Memory 755G | +| Resource | CentOs 8.2; Ascend 910 ; CPU 2.60GHz,192coresï¼›Memory 755G | | uploaded Date | 03/26/2021 (month/day/year) | -| MindSpore Version | 1.2.0 | +| MindSpore Version | 1.1.1 | | Dataset | OMNIGLOT | | Training Parameters | episode=500, class_num = 5, lr=0.001, classes_per_it_tr=60, num_support_tr=5, num_query_tr=5, classes_per_it_val=20, num_support_val=5, num_query_val=15 | | Optimizer | Adam | @@ -161,7 +165,7 @@ Test Acc: 0.9954400658607483 Loss: 0.02102319709956646 | Speed | 215 ms/step | | Total time | 3 h 23m (8p) | | Checkpoint for Fine tuning | 440 KB (.ckpt file) | -| Scripts | https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/cv/ProtoNet | +| Scripts | https://gitee.com/mindspore/mindspore/tree/r1.1/model_zoo/research/cv/protonet | # [ModelZoo Homepage](#contents) diff --git a/model_zoo/research/cv/ProtoNet/eval.py b/model_zoo/research/cv/ProtoNet/eval.py index 27d7cf3daa7..612fa3ae5b0 100644 --- a/model_zoo/research/cv/ProtoNet/eval.py +++ b/model_zoo/research/cv/ProtoNet/eval.py @@ -15,14 +15,13 @@ """ ProtoNet evaluation script. """ -import os +import numpy as np from mindspore import dataset as ds from mindspore import load_checkpoint import mindspore.context as context from src.protonet import ProtoNet from src.parser_util import get_parser from src.PrototypicalLoss import PrototypicalLoss -import numpy as np from model_init import init_dataloader from train import WithLossCell @@ -67,5 +66,5 @@ if __name__ == '__main__': options.classes_per_it_val, is_train=False) Net = WithLossCell(Net, loss_fn) val_dataloader = init_dataloader(options, 'val', datapath) - load_checkpoint(os.path.join(ckptpath, 'best_ck.ckpt'), net=Net) + load_checkpoint(ckptpath, net=Net) test(val_dataloader, Net) diff --git a/model_zoo/research/cv/ProtoNet/scripts/run_distribution_ascend.sh b/model_zoo/research/cv/ProtoNet/scripts/run_distribution_ascend.sh index ce0977ca511..e44f598945a 100644 --- a/model_zoo/research/cv/ProtoNet/scripts/run_distribution_ascend.sh +++ b/model_zoo/research/cv/ProtoNet/scripts/run_distribution_ascend.sh @@ -16,7 +16,7 @@ # an simple tutorial as follows, more parameters can be setting if [ $# != 4 ] then - echo "Usage: sh run_distribution_ascend.sh [RANK_TABLE_FILE] [DATA_PATH] [TRAIN_CLASS]" + echo "Usage: sh run_distribution_ascend.sh [RANK_TABLE_FILE] [DATA_PATH] [TRAIN_CLASS] [EPOCHS]" exit 1 fi @@ -33,6 +33,7 @@ RANK_TABLE_FILE=$(realpath $1) export RANK_TABLE_FILE export DATA_PATH=$2 export TRAIN_CLASS=$3 +export EPOCHS=$4 echo "RANK_TABLE_FILE=${RANK_TABLE_FILE}" export SERVER_ID=0 @@ -43,13 +44,16 @@ do export RANK_ID=$((rank_start + i)) rm -rf ./train_parallel$i mkdir ./train_parallel$i - cp -r ./src ./train_parallel$i - cp ./train.py ./train_parallel$i + cp -r ../src ./train_parallel$i + cp ../train.py ./train_parallel$i + cp ../model_init.py ./train_parallel$i echo "start training for rank $RANK_ID, device $DEVICE_ID" cd ./train_parallel$i ||exit env > env.log - python train.py --data_path=$DATA_PATH \ + python train.py --dataset_root=$DATA_PATH \ --device_id=$DEVICE_ID --device_target="Ascend" \ - --classes_per_it_tr=$TRAIN_CLASS > log 2>&1 & + --classes_per_it_tr=$TRAIN_CLASS\ + --experiment_root=./output\ + --epochs=$EPOCHS > log 2>&1 & cd .. done diff --git a/model_zoo/research/cv/ProtoNet/src/parser_util.py b/model_zoo/research/cv/ProtoNet/src/parser_util.py index 906d5385bd7..6aa7d6ffb16 100644 --- a/model_zoo/research/cv/ProtoNet/src/parser_util.py +++ b/model_zoo/research/cv/ProtoNet/src/parser_util.py @@ -49,7 +49,7 @@ def get_parser(): parser.add_argument('-exp', '--experiment_root', type=str, help='root where to store models, losses and accuracies', - default='..' + os.sep + 'output') + default='.' + os.sep + 'output') parser.add_argument('-nep', '--epochs', type=int, diff --git a/model_zoo/research/cv/SRGAN/src/trainonestep/train_gan.py b/model_zoo/research/cv/SRGAN/src/trainonestep/train_gan.py index 6c7b0792742..59cf30efd0c 100644 --- a/model_zoo/research/cv/SRGAN/src/trainonestep/train_gan.py +++ b/model_zoo/research/cv/SRGAN/src/trainonestep/train_gan.py @@ -59,7 +59,8 @@ class TrainOneStepD(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads_d = self.grad_reducer(grads_d) - return ops.depend(ld, self.optimizer(grads_d)) + self.optimizer(grads_d) + return ld class TrainOnestepG(nn.Cell): """ @@ -103,4 +104,5 @@ class TrainOnestepG(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads_g = self.grad_reducer(grads_g) - return ops.depend(lg, self.optimizer(grads_g)) + self.optimizer(grads_g) + return lg diff --git a/model_zoo/research/cv/SRGAN/src/trainonestep/train_psnr.py b/model_zoo/research/cv/SRGAN/src/trainonestep/train_psnr.py index e9182b755e8..620ef823124 100644 --- a/model_zoo/research/cv/SRGAN/src/trainonestep/train_psnr.py +++ b/model_zoo/research/cv/SRGAN/src/trainonestep/train_psnr.py @@ -59,5 +59,6 @@ class TrainOnestepPSNR(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return ops.depend(psnr_loss, self.optimizer(grads)) + self.optimizer(grads) + return psnr_loss \ No newline at end of file diff --git a/model_zoo/research/cv/STGAN/src/models/networks.py b/model_zoo/research/cv/STGAN/src/models/networks.py index da83c30c7c8..1cbd4cfd5a3 100644 --- a/model_zoo/research/cv/STGAN/src/models/networks.py +++ b/model_zoo/research/cv/STGAN/src/models/networks.py @@ -413,7 +413,8 @@ class TrainOneStepGenerator(nn.Cell): grads = self.grad(self.network, self.weights)(real_x, c_org, c_trg, attr_diff, sens) grads = self.grad_reducer(grads) - return (ops.depend(loss_G, self.optimizer(grads)), fake_x, loss_G, + self.optimizer(grads) + return (loss_G, fake_x, loss_G, loss_fake_G, loss_cls_G, loss_rec_G, loss_adv_G) @@ -451,5 +452,6 @@ class TrainOneStepDiscriminator(nn.Cell): grads = self.grad(self.network, self.weights)(real_x, c_org, c_trg, attr_diff, alpha, sens) grads = self.grad_reducer(grads) - return (ops.depend(loss_D, self.optimizer(grads)), loss_D, loss_real_D, + self.optimizer(grads) + return (loss_D, loss_D, loss_real_D, loss_fake_D, loss_cls_D, loss_gp_D, loss_adv_D, attr_diff) diff --git a/model_zoo/research/cv/SiamFC/readme.md b/model_zoo/research/cv/SiamFC/readme.md new file mode 100644 index 00000000000..21026f95241 --- /dev/null +++ b/model_zoo/research/cv/SiamFC/readme.md @@ -0,0 +1,195 @@ +# Contents + +- [SiamFC Description](#SiamFC-Description) +- [Model Architecture](#SiamFC-Architecture) +- [Dataset](#SiamFC-dataset) +- [Environmental requirements](#Environmental) +- [Quick Start](#quick-start) +- [Script Description](#script-description) + - [Script and Sample Code](#script-and-sample-code) + - [Script Parameters](#script-parameters) + - [Training Process](#training-process) + - [Training](#training) + - [Evaluation Process](#evaluation-process) + - [Evaluation](#evaluation) +- [Model Description](#model-description) + - [Performance](#performance) + - [Evaluation Performance](#evaluation-performance) + +# [SiamFC Description](#Contents) + +Siamfc proposes a new full convolution twin network as the basic tracking algorithm, which is trained end-to-end on ilsvrc15 target tracking video data set. Our tracker exceeds the real-time requirement in frame rate. Although it is very simple, it achieves the best performance on multiple benchmarks. + +[paper](https://arxiv.org/pdf/1606.09549.pdf) Luca Bertinetto Jack Valmadre JoËœao F. Henriques Andrea Vedaldi Philip H. S. Torr +Department of Engineering Science, University of Oxford + +# [Model Architecture](#Contents) + +Siamfc first uses full convolution alexnet for feature extraction online and offline, and uses twin network to train the template and background respectively. On line, after getting the box of the first frame, it carries out centrrop, and then loads checkpoint to track the subsequent frames. In order to find the box, it needs to carry out a series of penalties on the score graph, Finally, the final prediction point is obtained by twice trilinear interpolation. + +# [Dataset](#Contents) + +used Dataset :[ILSVRC2015-VID](http://bvisionweb1.cs.unc.edu/ilsvrc2015/ILSVRC2015_VID.tar.gz) + +- Dataset size : 85GB ,total 30 type + - Training set: a total of 3862 videos and their corresponding frame pictures and box positions + - Verification set: 555 videos and corresponding pictures and box locations + - Test set: a total of 973 videos and corresponding pictures and box locations +- Data format: the image is in h*w*C format, the box position includes the coordinates of the lower left corner and the upper right corner, the format is XML, and the XML needs to be parsed + +# [Environmental requirements](#Contents) + +- Hardware :(Ascend) + - Prepare ascend processor to build hardware environment +- frame: + - [Mindspore](https://www.mindspore.cn/install) +- For details, please refer to the following resources: + - [MindSpore course](https://www.mindspore.cn/tutorial/training/zh-CN/master/index.html) + - [MindSpore Python API](https://www.mindspore.cn/doc/api_python/zh-CN/master/index.html) +- more API + - got10k toolkit + - opencv + - lmdb + +# [quick start](#Contents) + +After installing mindspree through the official website, you can follow the following steps to train and evaluate: + +- Run the python script to preprocess the data set + + python src/create_dataset_ILSVRC.py -d data_dir -o output_dir + +- Run Python script to create LMDB + + python src/create_lmdb.py -d data_dir -o output_dir + + for example: + data_dir = '/data/VID/ILSVRC_VID_CURATION_train' + output_dir = '/data/VID/ILSVRC_VID_CURATION_train.lmdb' + + __Remarks:The encrypted pathname is used as the index.Therefore,you cannot change the location of the dataset + after creating it, because you need to find the corresponding image according to the index.__ + +- Run the script for training + + bash run_standalone_train_ascend.sh [Device_ID] [Dataset_path] + Remarks:For the training set position after preprocessing + +- more + + This example is single card training. + +- Run the script for evaluation + + python eval.py,need got10k toolkit,the dataset is OTB2013(50) or OTB2015(100) + +# [Script description](#Contents) + +## Script and sample code + +```python + ├── SiamFC + ├── README.md // Notes on siamfc + ├── scripts + │ ├──ma-pre-start.sh // Create environment before modelarts training + │ ├──run_standalone_train_ascend.sh // Single card training in ascend + │ ├──run_distribution_ascend.sh // Multi card distributed training in ascend + ├── src + │ ├──alexnet.py // Create dataset + │ ├──config.py // Alexnet architecture + │ ├──custom_transforms.py //Data set processing + │ ├──dataset.py //GeneratorDataset + │ ├──Groupconv.py //Mindpore does not support group convolution at present. This is an alternative + │ ├──lr_generator.py //Dynamic learning rate + │ ├──tracker.py //Trace script + │ ├──utils.py // utils + │ ├──create_dataset_ILSVRC.py // Create dataset + │ ├──create_lmdb.py //Create LMDB + ├── train.py // Training script + ├── eval.py // Evaluation script +``` + +## Script parameters + +python train.py and config.py The main parameters are as follows: + +- data_path:An absolutely complete path to training and evaluation data sets. +- epoch_size:Total training rounds +- batch_size:Training batch size. +- image_height:The image height is used as the model input. +- image_width:The image width is used as the model input. +- exemplar_size:Template size +- instance_size:Sample size. +- lr:Learning rate. +- frame_range:Select the frame interval of the template and sample. +- response_scale:Scaling factor of score chart. + +## Training process + +### Training + +- Running in ascend processor environment + +```python + python train.py --device_id=${DEVICE_ID} --data_path=${DATASET_PATH} +``` + +- After training, the loss value is as follows: + +```bash + grep "loss is " log + epoch: 1 step: 1, loss is 1.14123213 + ... + epoch: 1 step: 1536, loss is 0.5234123 + epoch: 1 step: 1537, loss is 0.4523326 + epoch: 1 step: 1538, loss is 0.6235748 + ... +``` + +- Model checkpoints are saved in the current directory. + +- After training, the loss value is as follows: + +```bash + grep "loss is " log: + epoch: 30 step: 1, loss is 0.12534634 + ... + epoch: 30 step: 1560, loss is 0.2364573 + epoch: 30 step: 1561, loss is 0.156347 + epoch: 30 step: 1561, loss is 0.173423 +``` + +## Evaluation process + +Check the checkpoint path used for evaluation before running the following command. + +- Running in ascend processor environment + +```bash + python eval.py --device_id=${DEVICE_ID} --model_path=${MODEL_PATH} +``` + + The results were as follows: + +```bash + SiamFC_159_50_6650.ckpt -prec_score:0.777 -succ_score:0.589 _succ_rate:0.754 +``` + +# [Model description](#Contents) + +## performance + +### Evaluate performance + +|parameter | Ascend | +| -------------------------- | ---------------------------------------------- | +|resources | Ascend 910ï¼›CPU 2.60GHz, 192coreï¼›memory:755G | +|Upload date |2021.5.20 | +|mindspore version |mindspore1.2.0 | +|training parameter | epoch=50,step=6650,batch_size=8,lr_init=1e-2,lr_endl=1e-5 | +|optimizer |SGD optimizer,momentum=0.0,weight_decay=0.0 | +|loss function |BCEWithLogits | +|training speed | epoch time:285693.557 ms per step time :42.961 ms | +|total time |about 5 hours | +|Script URL |https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/cv/SiamFC | +|Random number seed |set_seed = 1234 | diff --git a/model_zoo/research/cv/advanced_east/src/model.py b/model_zoo/research/cv/advanced_east/src/model.py index 532ec8d8cba..29f78eb3cce 100644 --- a/model_zoo/research/cv/advanced_east/src/model.py +++ b/model_zoo/research/cv/advanced_east/src/model.py @@ -19,7 +19,6 @@ import mindspore import mindspore.nn as nn from mindspore.ops import operations as P from mindspore.ops import composite as C -from mindspore.ops import functional as F from mindspore.ops import ResizeNearestNeighbor from mindspore import Tensor, ParameterTuple, Parameter from mindspore.common.initializer import initializer, TruncatedNormal @@ -410,7 +409,8 @@ class TrainStepWrap(nn.Cell): loss = self.network(image, label) sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) grads = self.grad(self.network, weights)(image, label, sens) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss def get_AdvancedEast_net(args): diff --git a/model_zoo/research/cv/arcface/README_CN.md b/model_zoo/research/cv/arcface/README_CN.md index 25d07b67638..f08a44a5d21 100644 --- a/model_zoo/research/cv/arcface/README_CN.md +++ b/model_zoo/research/cv/arcface/README_CN.md @@ -55,13 +55,13 @@ ```python # 分布å¼è®­ç»ƒè¿è¡Œç¤ºä¾‹ -sh scripts/run_distribute_train.sh /path/dataset /path/rank_table +bash scripts/run_distribute_train.sh /path/dataset /path/rank_table # å•æœºè®­ç»ƒè¿è¡Œç¤ºä¾‹ -sh scripts/run_standalone_train.sh /path/dataset +bash scripts/run_standalone_train.sh /path/dataset # è¿è¡Œè¯„估示例 -sh scripts/run_eval.sh /path/evalset /path/ckpt +bash scripts/run_eval.sh /path/evalset /path/ckpt ``` ## 脚本说明 @@ -108,7 +108,7 @@ train.pyå’Œval.py中主è¦å‚数如下: ### 分布å¼è®­ç»ƒ ```shell -sh scripts/run_distribute_train.sh /path/dataset /path/rank_table +bash scripts/run_distribute_train.sh /path/dataset /path/rank_table ``` 上述shell脚本将在åŽå°è¿è¡Œåˆ†å¸ƒè®­ç»ƒã€‚å¯ä»¥é€šè¿‡`device[X]/train.log`文件查看结果。 @@ -134,7 +134,7 @@ epoch time: 1104929.793 ms, per step time: 97.162 ms 在è¿è¡Œä»¥ä¸‹å‘½ä»¤ä¹‹å‰ï¼Œè¯·æ£€æŸ¥ç”¨äºŽè¯„估的检查点路径。请将检查点路径设置为ç»å¯¹å…¨è·¯å¾„,例如“username/arcface/arcface-11372-1.ckptâ€ã€‚ ```bash - sh scripts/run_eval.sh /path/evalset /path/ckpt + bash scripts/run_eval.sh /path/evalset /path/ckpt ``` 上述python命令将在åŽå°è¿è¡Œï¼Œæ‚¨å¯ä»¥é€šè¿‡eval.log文件查看结果。测试数æ®é›†çš„准确性如下: diff --git a/model_zoo/research/cv/arcface/scripts/run_distribute_train.sh b/model_zoo/research/cv/arcface/scripts/run_distribute_train.sh index 6c953ab1097..35989366537 100644 --- a/model_zoo/research/cv/arcface/scripts/run_distribute_train.sh +++ b/model_zoo/research/cv/arcface/scripts/run_distribute_train.sh @@ -27,13 +27,13 @@ get_real_path(){ echo "$(realpath -m $PWD/$1)" fi } -RANK_SIZE=8 DATA_PATH=$(get_real_path $1) RANK_TABLE=$(get_real_path $2) EXEC_PATH=$(pwd) echo "$EXEC_PATH" export RANK_TABLE_FILE=$RANK_TABLE +export RANK_SIZE=8 for((i=0;i env0.log -python3 train.py --data_url $1 --isModelArts False --run_distribute True > train0.log 2>&1 +python3 train.py --data_url $1 --isModelArts False --run_distribute True > train0.log 2>&1 & if [ $? -eq 0 ];then echo "training success" diff --git a/model_zoo/research/cv/glore_res200/src/config.py b/model_zoo/research/cv/glore_res200/src/config.py index ce2fe8bc249..88def1bdaa5 100644 --- a/model_zoo/research/cv/glore_res200/src/config.py +++ b/model_zoo/research/cv/glore_res200/src/config.py @@ -18,7 +18,7 @@ network config setting, will be used in train.py from easydict import EasyDict config1 = EasyDict({ "class_num": 1000, - "batch_size": 128, + "batch_size": 80, "loss_scale": 1024, "momentum": 0.08, "weight_decay": 0.0002, diff --git a/model_zoo/research/cv/glore_res200/train.py b/model_zoo/research/cv/glore_res200/train.py index 728b61231f5..513c63274e4 100644 --- a/model_zoo/research/cv/glore_res200/train.py +++ b/model_zoo/research/cv/glore_res200/train.py @@ -30,6 +30,7 @@ from mindspore.train.loss_scale_manager import FixedLossScaleManager from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.communication.management import init, get_group_size, get_rank import mindspore.nn as nn +from mindspore.common import set_seed import mindspore.common.initializer as weight_init from src.lr_generator import get_lr from src.config import config1, config2 @@ -64,6 +65,7 @@ elif args_opt.device_target == "GPU": random.seed(1) np.random.seed(1) de.config.set_seed(1) +set_seed(1) if __name__ == '__main__': diff --git a/model_zoo/research/cv/hardnet/README_CN.md b/model_zoo/research/cv/hardnet/README_CN.md index b9eb10bdd63..fe2409488c2 100644 --- a/model_zoo/research/cv/hardnet/README_CN.md +++ b/model_zoo/research/cv/hardnet/README_CN.md @@ -89,7 +89,7 @@ HarDNet指的是Harmonic DenseNet: A low memory traffic network,其çªå‡ºçš„ # è¿è¡Œåˆ†å¸ƒå¼è®­ç»ƒç¤ºä¾‹ python3 train.py > train.log 2>&1 & --dataset_path /path/dataset --pre_ckpt_path /path/pretrained_path --isModelArts False OR - bash run_distribute_train.sh /path/dataset /path/pretrain_path 8 + bash run_distribute_train.sh /path/dataset /path/pretrain_path /path/rank_table # è¿è¡Œè¯„估示例 python3 eval.py > eval.log 2>&1 & --dataset_path /path/dataset --ckpt_path /path/ckpt @@ -242,7 +242,7 @@ HarDNet指的是Harmonic DenseNet: A low memory traffic network,其çªå‡ºçš„ ```bash python3 train.py > train.log 2>&1 & --dataset_path /path/dataset --pre_ckpt_path /path/pretrained_path --isModelArts False OR - bash run_distribute_train.sh /path/dataset /path/pretrain_path 8 + bash run_distribute_train.sh /path/dataset /path/pretrain_path /path/rank_table ``` 上述shell脚本将在åŽå°è¿è¡Œåˆ†å¸ƒè®­ç»ƒã€‚您å¯ä»¥é€šè¿‡train_parallel[X]/log文件查看结果。采用以下方å¼è¾¾åˆ°æŸå¤±å€¼ï¼š diff --git a/model_zoo/research/cv/hardnet/scripts/run_distribute_train.sh b/model_zoo/research/cv/hardnet/scripts/run_distribute_train.sh index a5476ca1787..994d50a457b 100644 --- a/model_zoo/research/cv/hardnet/scripts/run_distribute_train.sh +++ b/model_zoo/research/cv/hardnet/scripts/run_distribute_train.sh @@ -16,40 +16,28 @@ echo "==============================================================================================================" echo "Please run the script as: " -echo "bash run_distribute_train.sh DATA_PATH pretrain_path RANK_SIZE" -echo "For example: bash run_distribute_train.sh /path/dataset /path/pretrain_path 8" +echo "bash run_distribute_train.sh DATA_PATH pretrain_path RANK_TABLE" +echo "For example: bash run_distribute_train.sh /path/dataset /path/pretrain_path /path/rank_table" echo "It is better to use the absolute path." echo "==============================================================================================================" set -e -DATA_PATH=$1 -PRETRAINED_PATH=$2 +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} +DATA_PATH=$(get_real_path $1) +PRETRAINED_PATH=$(get_real_path $2) +RANK_TABLE=$(get_real_path $3) export DATA_PATH=${DATA_PATH} -RANK_SIZE=$3 - +export RANK_SIZE=8 +export RANK_TABLE_FILE=$RANK_TABLE EXEC_PATH=$(pwd) echo "$EXEC_PATH" -test_dist_8pcs() -{ - export RANK_TABLE_FILE=${EXEC_PATH}/rank_table_8pcs.json - export RANK_SIZE=8 -} - -test_dist_4pcs() -{ - export RANK_TABLE_FILE=${EXEC_PATH}/rank_table_4pcs.json - export RANK_SIZE=4 -} - -test_dist_2pcs() -{ - export RANK_TABLE_FILE=${EXEC_PATH}/rank_table_2pcs.json - export RANK_SIZE=2 -} - -test_dist_${RANK_SIZE}pcs - export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python for((i=1;i<${RANK_SIZE};i++)) @@ -82,7 +70,7 @@ export DEVICE_ID=0 export RANK_ID=0 echo "start training for device 0" env > env0.log -nohup python3 -u train.py --dataset_path ${DATA_PATH} --isModelArts False --distribute True --pre_ckpt_path ${PRETRAINED_PATH} > train0.log 2>&1 +nohup python3 -u train.py --dataset_path ${DATA_PATH} --isModelArts False --distribute True --pre_ckpt_path ${PRETRAINED_PATH} > train0.log 2>&1 & if [ $? -eq 0 ];then echo "training success" diff --git a/model_zoo/research/cv/midas/src/midas_net.py b/model_zoo/research/cv/midas/src/midas_net.py index fe2afed0a08..8df3c229e50 100644 --- a/model_zoo/research/cv/midas/src/midas_net.py +++ b/model_zoo/research/cv/midas/src/midas_net.py @@ -22,7 +22,6 @@ from mindspore.ops import operations as P from mindspore.ops import composite as C from mindspore.ops.operations import Add, Split, Concat from mindspore.nn.wrap.grad_reducer import DistributedGradReducer -from mindspore.ops import functional as F from src.custom_op import SEBlock, GroupConv from src.blocks_ms import Interpolate, FeatureFusionBlock from src.loss import ScaleAndShiftInvariantLoss @@ -390,4 +389,5 @@ class TrainOneStepCell(nn.Cell): if self.reduce_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/research/cv/resnext152_64x4d/README.md b/model_zoo/research/cv/resnext152_64x4d/README.md index f06051c8ba4..1e099732d3f 100644 --- a/model_zoo/research/cv/resnext152_64x4d/README.md +++ b/model_zoo/research/cv/resnext152_64x4d/README.md @@ -37,8 +37,8 @@ The overall network architecture of ResNeXt is show below: Dataset used: [imagenet](http://www.image-net.org/) - Dataset size: ~125G, 1.2W colorful images in 1000 classes - - Train: 120G, 1.2W images - - Test: 5G, 50000 images +- Train: 120G, 1.2W images +- Test: 5G, 50000 images - Data format: RGB images - Note: Data will be processed in src/dataset.py @@ -46,19 +46,19 @@ Dataset used: [imagenet](http://www.image-net.org/) ## [Mixed Precision](#contents) -The [mixed precision](https://www.mindspore.cn/docs/programming_guide/en/master/enable_mixed_precision.html) training method accelerates the deep learning neural network training process by using both the single-precision and half-precision data formats, and maintains the network precision achieved by the single-precision training at the same time. Mixed precision training can accelerate the computation process, reduce memory usage, and enable a larger model or batch size to be trained on specific hardware. +The [mixed precision](https://www.mindspore.cn/tutorial/training/en/master/advanced_use/enable_mixed_precision.html) training method accelerates the deep learning neural network training process by using both the single-precision and half-precision data formats, and maintains the network precision achieved by the single-precision training at the same time. Mixed precision training can accelerate the computation process, reduce memory usage, and enable a larger model or batch size to be trained on specific hardware. For FP16 operators, if the input data type is FP32, the backend of MindSpore will automatically handle it with reduced precision. Users could check the reduced-precision operators by enabling INFO log and then searching ‘reduce precision’. # [Environment Requirements](#contents) - Hardware(Ascend) - - Prepare hardware environment with Ascend processor. +- Prepare hardware environment with Ascend processor. If you want to try Ascend, please send the [application form](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx) to ascend@huawei.com. Once approved, you can get the resources. - Framework - - [MindSpore](https://www.mindspore.cn/install/en) +- [MindSpore](https://www.mindspore.cn/install/en) - For more information, please check the resources below: - - [MindSpore Tutorials](https://www.mindspore.cn/tutorials/en/master/index.html) - - [MindSpore Python API](https://www.mindspore.cn/docs/api/en/master/index.html) +- [MindSpore Tutorials](https://www.mindspore.cn/tutorial/training/en/master/index.html) +- [MindSpore Python API](https://www.mindspore.cn/doc/api_python/en/master/index.html) # [Script description](#contents) @@ -145,18 +145,18 @@ or shell script: ```script Ascend: # distribute training example(8p) - bash run_distribute_train.sh RANK_TABLE_FILE DATA_PATH + sh run_distribute_train.sh RANK_TABLE_FILE DATA_PATH # standalone training - bash run_standalone_train.sh DEVICE_ID DATA_PATH + sh run_standalone_train.sh DEVICE_ID DATA_PATH ``` #### Launch ```bash # distributed training example(8p) for Ascend -bash scripts/run_distribute_train.sh RANK_TABLE_FILE DATA_PATH +sh scripts/run_distribute_train.sh RANK_TABLE_FILE /dataset/train # standalone training example for Ascend -bash scripts/run_standalone_train.sh DEVICE_ID DATA_PATH +sh scripts/run_standalone_train.sh 0 /dataset/train ``` You can find checkpoint file together with result in log. @@ -175,7 +175,7 @@ or shell script: ```script # Evaluation -bash run_eval.sh DEVICE_ID DATA_PATH PRETRAINED_CKPT_PATH PLATFORM +sh run_eval.sh DEVICE_ID DATA_PATH PRETRAINED_CKPT_PATH PLATFORM ``` PLATFORM is Ascend, default is Ascend. @@ -184,10 +184,10 @@ PLATFORM is Ascend, default is Ascend. ```bash # Evaluation with checkpoint -bash scripts/run_eval.sh DEVICE_ID PRETRAINED_CKPT_PATH PLATFORM +sh scripts/run_eval.sh 0 /opt/npu/datasets/classification/val /resnext152_100.ckpt Ascend -# Directly use the script to run -python eval.py --data_dir ~/imagenet/val/ --platform Ascend --pretrained ~/best_acc_4.ckpt +#Directly use the script to run +python eval.py --data_dir /opt/npu/pvc/dataset/storage/imagenet/val/ --platform Ascend --pretrained /root/test/resnext152_64x4d/outputs_demo/best_acc_4.ckpt ``` #### Result @@ -213,31 +213,31 @@ python export.py --device_target [PLATFORM] --ckpt_file [CKPT_PATH] --file_forma ### Training Performance -| Parameters | ResNeXt152 | -| -------------------------- | --------------------------------------------- | -| Resource | Ascend 910, cpu:2.60GHz 192cores, memory:755G | -| uploaded Date | 06/30/2021 | -| MindSpore Version | 1.2 | -| Dataset | ImageNet | -| Training Parameters | src/config.py | -| Optimizer | Momentum | -| Loss Function | SoftmaxCrossEntropy | -| Loss | 1.28923 | -| Accuracy | 80.08%(TOP1) | -| Total time | 7.8 h 8ps | -| Checkpoint for Fine tuning | 192 M(.ckpt file) | +| Parameters | ResNeXt152 | | +| -------------------------- | --------------------------------------------- | ---- | +| Resource | Ascend 910, cpu:2.60GHz 192cores, memory:755G | | +| uploaded Date | 06/30/2021 | | +| MindSpore Version | 1.2 | | +| Dataset | ImageNet | | +| Training Parameters | src/config.py | | +| Optimizer | Momentum | | +| Loss Function | SoftmaxCrossEntropy | | +| Loss | 1.28923 | | +| Accuracy | 80.08%(TOP1) | | +| Total time | 7.8 h 8ps | | +| Checkpoint for Fine tuning | 192 M(.ckpt file) | | #### Inference Performance -| Parameters | | -| ----------------- | ---------------- | -| Resource | Ascend 910 | -| uploaded Date | 06/20/2021 | -| MindSpore Version | 1.2 | -| Dataset | ImageNet, 1.2W | -| batch_size | 1 | -| outputs | probability | -| Accuracy | acc=80.08%(TOP1) | +| Parameters | | | | +| ----------------- | ---- | ---- | ---------------- | +| Resource | | | Ascend 910 | +| uploaded Date | | | 06/20/2021 | +| MindSpore Version | | | 1.2 | +| Dataset | | | ImageNet, 1.2W | +| batch_size | | | 1 | +| outputs | | | probability | +| Accuracy | | | acc=80.08%(TOP1) | # [Description of Random Situation](#contents) diff --git a/model_zoo/research/cv/resnext152_64x4d/README_CN.md b/model_zoo/research/cv/resnext152_64x4d/README_CN.md index b0ee1604e6e..28fe5d76433 100644 --- a/model_zoo/research/cv/resnext152_64x4d/README_CN.md +++ b/model_zoo/research/cv/resnext152_64x4d/README_CN.md @@ -51,19 +51,19 @@ ResNeXt整体网络架构如下: ## æ··åˆç²¾åº¦ -采用[æ··åˆç²¾åº¦](https://www.mindspore.cn/docs/programming_guide/zh-CN/master/enable_mixed_precision.html)的训练方法使用支æŒå•ç²¾åº¦å’ŒåŠç²¾åº¦æ•°æ®æ¥æ高深度学习神ç»ç½‘络的训练速度,åŒæ—¶ä¿æŒå•ç²¾åº¦è®­ç»ƒæ‰€èƒ½è¾¾åˆ°çš„网络精度。混åˆç²¾åº¦è®­ç»ƒæ高计算速度ã€å‡å°‘内存使用的åŒæ—¶ï¼Œæ”¯æŒåœ¨ç‰¹å®šç¡¬ä»¶ä¸Šè®­ç»ƒæ›´å¤§çš„模型或实现更大批次的训练。 +采用[æ··åˆç²¾åº¦](https://www.mindspore.cn/tutorial/training/zh-CN/master/advanced_use/enable_mixed_precision.html)的训练方法使用支æŒå•ç²¾åº¦å’ŒåŠç²¾åº¦æ•°æ®æ¥æ高深度学习神ç»ç½‘络的训练速度,åŒæ—¶ä¿æŒå•ç²¾åº¦è®­ç»ƒæ‰€èƒ½è¾¾åˆ°çš„网络精度。混åˆç²¾åº¦è®­ç»ƒæ高计算速度ã€å‡å°‘内存使用的åŒæ—¶ï¼Œæ”¯æŒåœ¨ç‰¹å®šç¡¬ä»¶ä¸Šè®­ç»ƒæ›´å¤§çš„模型或实现更大批次的训练。 以FP16ç®—å­ä¸ºä¾‹ï¼Œå¦‚果输入数æ®ç±»åž‹ä¸ºFP32,MindSporeåŽå°ä¼šè‡ªåŠ¨é™ä½Žç²¾åº¦æ¥å¤„ç†æ•°æ®ã€‚用户å¯æ‰“å¼€INFO日志,æœç´¢â€œreduce precisionâ€æŸ¥çœ‹ç²¾åº¦é™ä½Žçš„ç®—å­ã€‚ # 环境è¦æ±‚ - 硬件(Ascend) - - 使用Ascend处ç†å™¨æ¥æ­å»ºç¡¬ä»¶çŽ¯å¢ƒã€‚ + - 准备Ascend处ç†å™¨æ­å»ºç¡¬ä»¶çŽ¯å¢ƒã€‚如需试用昇腾处ç†å™¨ï¼Œè¯·å‘é€[申请表](https://obs-9be7.obs.cn-east-2.myhuaweicloud.com/file/other/Ascend%20Model%20Zoo%E4%BD%93%E9%AA%8C%E8%B5%84%E6%BA%90%E7%94%B3%E8%AF%B7%E8%A1%A8.docx)至ascend@huawei.com,审核通过å³å¯èŽ·å¾—资æºã€‚ - 框架 - [MindSpore](https://www.mindspore.cn/install) - 如需查看详情,请å‚è§å¦‚下资æºï¼š - - [MindSpore教程](https://www.mindspore.cn/tutorials/zh-CN/master/index.html) - - [MindSpore Python API](https://www.mindspore.cn/docs/api/zh-CN/master/index.html) + - [MindSpore教程](https://www.mindspore.cn/tutorial/training/zh-CN/master/index.html) + - [MindSpore Python API](https://www.mindspore.cn/doc/api_python/zh-CN/master/index.html) # 脚本说明 @@ -149,18 +149,18 @@ python train.py --data_dir ~/imagenet/train/ --platform Ascend --is_distributed ```shell Ascend: # 分布å¼è®­ç»ƒç¤ºä¾‹ï¼ˆ8å¡ï¼‰ - bash run_distribute_train.sh RANK_TABLE_FILE DATA_PATH + sh run_distribute_train.sh RANK_TABLE_FILE DATA_PATH # å•æœºè®­ç»ƒ - bash run_standalone_train.sh DEVICE_ID DATA_PATH + sh run_standalone_train.sh DEVICE_ID DATA_PATH ``` ### 样例 ```shell # Ascend分布å¼è®­ç»ƒç¤ºä¾‹ï¼ˆ8å¡ï¼‰ -bash scripts/run_distribute_train.sh RANK_TABLE_FILE DATA_PATH +sh scripts/run_distribute_train.sh RANK_TABLE_FILE /dataset/train # Ascendå•æœºè®­ç»ƒç¤ºä¾‹ -bash scripts/run_standalone_train.sh DEVICE_ID DATA_PATH +sh scripts/run_standalone_train.sh 0 /dataset/train ``` 您å¯ä»¥åœ¨æ—¥å¿—中找到检查点文件和结果。 @@ -179,7 +179,7 @@ python eval.py --data_dir ~/imagenet/val/ --platform Ascend --pretrained resnext ```shell # 评估 -bash run_eval.sh DEVICE_ID DATA_PATH PRETRAINED_CKPT_PATH PLATFORM +sh run_eval.sh DEVICE_ID DATA_PATH PRETRAINED_CKPT_PATH PLATFORM ``` PLATFORM is Ascend, default is Ascend. @@ -188,10 +188,10 @@ PLATFORM is Ascend, default is Ascend. ```shell # 检查点评估 -bash scripts/run_eval.sh DEVICE_ID PRETRAINED_CKPT_PATH PLATFORM +sh scripts/run_eval.sh 0 /opt/npu/datasets/classification/val /resnext152_100.ckpt Ascend #或者直接使用脚本è¿è¡Œ -python eval.py --data_dir ~/imagenet/val/ --platform Ascend --pretrained ~/best_acc_0.ckpt +python eval.py --data_dir /opt/npu/pvc/dataset/storage/imagenet/val/ --platform Ascend --pretrained /root/test/resnext152_64x4d/outputs_demo/best_acc_0.ckpt ``` #### 结果 @@ -217,31 +217,31 @@ python export.py --device_target [PLATFORM] --ckpt_file [CKPT_PATH] --file_forma ### 训练性能 -| å‚æ•° | ResNeXt152 | -| -------------------------- | ---------------------------------------------------------- | -| èµ„æº | Ascend 910ï¼›CPU:2.60GHz,192核;内存:755GB | -| 上传日期 | 2021-6-30 | -| MindSpore版本 | 1.2 | -| æ•°æ®é›† | ImageNet | -| 训练å‚æ•° | src/config.py | -| 优化器 | Momentum | -| æŸå¤±å‡½æ•° | Softmax交å‰ç†µ | -| æŸå¤± | 1.2892 | -| 准确率 | 80.08%(TOP1) | -| 总时长 | 7.8å°æ—¶ (8å¡ï¼‰ | -| 调优检查点 | 192 M(.ckpt文件) | +| å‚æ•° | ResNeXt152 | | +| -------------------------- | ---------------------------------------------------------- | ------------------------- | +| èµ„æº | Ascend 910ï¼›CPU:2.60GHz,192核;内存:755GB | | +| 上传日期 | 2021-6-30 | | +| MindSpore版本 | 1.2 | | +| æ•°æ®é›† | ImageNet | | +| 训练å‚æ•° | src/config.py | | +| 优化器 | Momentum | | +| æŸå¤±å‡½æ•° | Softmax交å‰ç†µ | | +| æŸå¤± | 1.2892 | | +| 准确率 | 80.08%(TOP1) | | +| 总时长 | 7.8å°æ—¶ (8å¡ï¼‰ | | +| 调优检查点 | 192 M(.ckpt文件) | | #### 推ç†æ€§èƒ½ -| å‚æ•° | | -| -------------------------- | -------------------- | -| èµ„æº | Ascend 910 | -| 上传日期 | 2021-6-20 | -| MindSpore版本 | 1.2 | -| æ•°æ®é›† | ImageNet, 1.2万 | -| batch_size | 1 | -| 输出 | 概率 | -| 准确率 | acc=80.08%(TOP1) | +| å‚æ•° | | | | +| -------------------------- | ----------------------------- | ------------------------- | -------------------- | +| èµ„æº | | | Ascend 910 | +| 上传日期 | | | 2021-6-20 | +| MindSpore版本 | | | 1.2 | +| æ•°æ®é›† | | | ImageNet, 1.2万 | +| batch_size | | | 1 | +| 输出 | | | 概率 | +| 准确率 | | | acc=80.08%(TOP1) | # éšæœºæƒ…况说明 diff --git a/model_zoo/research/cv/resnext152_64x4d/scripts/run_distribute_train.sh b/model_zoo/research/cv/resnext152_64x4d/scripts/run_distribute_train.sh index e0b10e8f0b1..2cfc0045d1e 100644 --- a/model_zoo/research/cv/resnext152_64x4d/scripts/run_distribute_train.sh +++ b/model_zoo/research/cv/resnext152_64x4d/scripts/run_distribute_train.sh @@ -52,7 +52,6 @@ do --is_distribute=1 \ --device_id=$DEVICE_ID \ --pretrained=$PATH_CHECKPOINT \ - --data_dir=$DATA_DIR \ - --run_eval=False > log_less.txt 2>&1 & + --data_dir=$DATA_DIR > log_less.txt 2>&1 & cd ../ done diff --git a/model_zoo/research/cv/resnext152_64x4d/scripts/run_standalone_train.sh b/model_zoo/research/cv/resnext152_64x4d/scripts/run_standalone_train.sh index 07cb60cfe6d..6f96801064f 100644 --- a/model_zoo/research/cv/resnext152_64x4d/scripts/run_standalone_train.sh +++ b/model_zoo/research/cv/resnext152_64x4d/scripts/run_standalone_train.sh @@ -26,6 +26,5 @@ python train.py \ --is_distribute=0 \ --device_id=$DEVICE_ID \ --pretrained=$PATH_CHECKPOINT \ - --data_dir=$DATA_DIR \ - --run_eval=False > log.txt 2>&1 & + --data_dir=$DATA_DIR > log.txt 2>&1 & diff --git a/model_zoo/research/cv/resnext152_64x4d/train.py b/model_zoo/research/cv/resnext152_64x4d/train.py index 90586184fd6..6e8436e7aef 100644 --- a/model_zoo/research/cv/resnext152_64x4d/train.py +++ b/model_zoo/research/cv/resnext152_64x4d/train.py @@ -146,7 +146,7 @@ def parse_args(cloud_args=None): #dataset of eval dataset parser.add_argument('--eval_data_dir', type=str, - default='', + default='/opt/npu/pvc/dataset/storage/imagenet/val', help='eval data dir') parser.add_argument('--eval_per_batch_size', default=32, @@ -289,6 +289,9 @@ def train(cloud_args=None): # checkpoint save progress_cb = ProgressMonitor(args) callbacks = [progress_cb,] + #eval dataset + if args.eval_data_dir is None or (not os.path.isdir(args.eval_data_dir)): + raise ValueError("{} is not a existing path.".format(args.eval_data_dir)) #code like eval.py #if run eval if args.run_eval: diff --git a/model_zoo/research/cv/retinanet_resnet101/src/retinahead.py b/model_zoo/research/cv/retinanet_resnet101/src/retinahead.py index b62bc8a6ac1..6b4dff20463 100644 --- a/model_zoo/research/cv/retinanet_resnet101/src/retinahead.py +++ b/model_zoo/research/cv/retinanet_resnet101/src/retinahead.py @@ -246,7 +246,8 @@ class TrainingWrapper(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class retinanetInferWithDecoder(nn.Cell): diff --git a/model_zoo/research/cv/retinanet_resnet152/src/retinahead.py b/model_zoo/research/cv/retinanet_resnet152/src/retinahead.py index b62bc8a6ac1..6b4dff20463 100644 --- a/model_zoo/research/cv/retinanet_resnet152/src/retinahead.py +++ b/model_zoo/research/cv/retinanet_resnet152/src/retinahead.py @@ -246,7 +246,8 @@ class TrainingWrapper(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class retinanetInferWithDecoder(nn.Cell): diff --git a/model_zoo/research/cv/simple_baselines/README.md b/model_zoo/research/cv/simple_baselines/README.md index 23f562e2b9d..fe453b8027c 100644 --- a/model_zoo/research/cv/simple_baselines/README.md +++ b/model_zoo/research/cv/simple_baselines/README.md @@ -82,13 +82,13 @@ simple_baselines的总体网络架构如下: ```text # 分布å¼è®­ç»ƒ -用法:sh run_distribute_train.sh --is_model_arts False --run_distribute True +用法:bash run_distribute_train.sh RANK_TABLE # å•æœºè®­ç»ƒ -用法:sh run_standalone_train.sh --device_id 0 --is_model_arts False --run_distribute False +用法:bash run_standalone_train.sh DEVICE_ID # è¿è¡Œè¯„估示例 -用法:sh run_eval.sh +用法:bash run_eval.sh ``` # 脚本说明 @@ -183,13 +183,13 @@ config.TEST.NMS_THRE = 1.0 # nms阈值 ```text # 分布å¼è®­ç»ƒ -用法:sh run_distribute_train.sh --is_model_arts False --run_distribute True +用法:bash run_distribute_train.sh RANK_TABLE # å•æœºè®­ç»ƒ -用法:sh run_standalone_train.sh --device_id 0 --is_model_arts False --run_distribute False +用法:bash run_standalone_train.sh DEVICE_ID # è¿è¡Œè¯„估示例 -用法:sh run_eval.sh +用法:bash run_eval.sh ``` ### 结果 @@ -219,7 +219,7 @@ epoch:140 step:2340, loss is 0.0003393 ```bash # 评估 -sh eval.sh +bash eval.sh ``` ### 结果 diff --git a/model_zoo/research/cv/simple_baselines/scripts/run_distribute_train.sh b/model_zoo/research/cv/simple_baselines/scripts/run_distribute_train.sh index b568b3d400b..a91edd71221 100644 --- a/model_zoo/research/cv/simple_baselines/scripts/run_distribute_train.sh +++ b/model_zoo/research/cv/simple_baselines/scripts/run_distribute_train.sh @@ -16,31 +16,24 @@ echo "========================================================================" echo "Please run the script as: " -echo "bash run.sh RANK_SIZE" -echo "For example: bash run_distribute.sh 8" +echo "bash run.sh RANK_TABLE" +echo "For example: bash run_distribute.sh RANK_TABLE" echo "It is better to use the absolute path." echo "========================================================================" set -e - -RANK_SIZE=$1 -export RANK_SIZE +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} +RANK_TABLE=$(get_real_path $1) EXEC_PATH=$(pwd) echo "$EXEC_PATH" - -test_dist_8pcs() -{ - export RANK_TABLE_FILE=${EXEC_PATH}/rank_table_8pcs.json - export RANK_SIZE=8 -} - -test_dist_2pcs() -{ - export RANK_TABLE_FILE=${EXEC_PATH}/rank_table_2pcs.json - export RANK_SIZE=2 -} - -test_dist_${RANK_SIZE}pcs +export RANK_TABLE_FILE=$RANK_TABLE +export RANK_SIZE=8 export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python diff --git a/model_zoo/research/cv/squeezenet1_1/README.md b/model_zoo/research/cv/squeezenet1_1/README.md index 74e614f7847..beba897264d 100644 --- a/model_zoo/research/cv/squeezenet1_1/README.md +++ b/model_zoo/research/cv/squeezenet1_1/README.md @@ -149,6 +149,13 @@ For more configuration details, please refer the script `config.py`. Usage: sh scripts/run_standalone_train.sh [DEVICE_ID] [DATASET_PATH] [PRETRAINED_CKPT_PATH](optional) ``` +```shell +# standalone training example +sh scripts/run_standalone_train.sh 0 /data/imagenet/train +``` + +checkpoint can be produced in training process and be saved in the folder ./train/ckpt_squeezenet. + For distributed training, a hccl configuration file with JSON format needs to be created in advance. Please follow the instructions in the link [hccl_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools). @@ -182,11 +189,9 @@ Usage: sh scripts/run_eval.sh [DEVICE_ID] [DATASET_PATH] [CHECKPOINT_PATH] ```shell # evaluation example -sh scripts/run_eval.sh 0 ~/data/imagenet/train ckpt_squeezenet/squeezenet_imagenet-200_40036.ckpt +sh scripts/run_eval.sh 0 /data/imagenet/val ./train/ckpt_squeezenet/squeezenet_imagenet-200_40036.ckpt ``` -checkpoint can be produced in training process. - ### Result Evaluation result will be stored in the example path, whose folder name is "eval". Under this, you can find result like the followings in log. diff --git a/model_zoo/research/cv/squeezenet1_1/eval.py b/model_zoo/research/cv/squeezenet1_1/eval.py index 755f0dbe284..2ff0adcdb0f 100644 --- a/model_zoo/research/cv/squeezenet1_1/eval.py +++ b/model_zoo/research/cv/squeezenet1_1/eval.py @@ -25,7 +25,6 @@ from src.CrossEntropySmooth import CrossEntropySmooth from src.squeezenet import SqueezeNet as squeezenet from src.dataset import create_dataset_imagenet as create_dataset from src.config import config -import moxing as mox local_data_url = '/cache/data' local_ckpt_url = '/cache/ckpt.ckpt' @@ -33,7 +32,7 @@ local_ckpt_url = '/cache/ckpt.ckpt' parser = argparse.ArgumentParser(description='Image classification') parser.add_argument('--dataset', type=str, default='imagenet', help='Dataset.') parser.add_argument('--net', type=str, default='squeezenet', help='Model.') -parser.add_argument('--run_cloudbrain', type=ast.literal_eval, default=True, +parser.add_argument('--run_cloudbrain', type=ast.literal_eval, default=False, help='Whether it is running on CloudBrain platform.') parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path') parser.add_argument('--dataset_path', type=str, default='', help='Dataset path') @@ -60,6 +59,7 @@ if __name__ == '__main__': # create dataset if args_opt.run_cloudbrain: + import moxing as mox mox.file.copy_parallel(args_opt.checkpoint_path, local_ckpt_url) mox.file.copy_parallel(args_opt.data_url, local_data_url) dataset = create_dataset(dataset_path=local_data_url, @@ -81,7 +81,10 @@ if __name__ == '__main__': net = squeezenet(num_classes=config.class_num) # load checkpoint - param_dict = load_checkpoint(local_ckpt_url) + if args_opt.run_cloudbrain: + param_dict = load_checkpoint(local_ckpt_url) + else: + param_dict = load_checkpoint(args_opt.checkpoint_path) load_param_into_net(net, param_dict) net.set_train(False) diff --git a/model_zoo/research/cv/squeezenet1_1/train.py b/model_zoo/research/cv/squeezenet1_1/train.py index fd01d4441d0..bf4a3f29b3f 100644 --- a/model_zoo/research/cv/squeezenet1_1/train.py +++ b/model_zoo/research/cv/squeezenet1_1/train.py @@ -37,9 +37,9 @@ from src.dataset import create_dataset_imagenet as create_dataset parser = argparse.ArgumentParser(description='SqueezeNet1_1') parser.add_argument('--net', type=str, default='squeezenet', help='Model.') parser.add_argument('--dataset', type=str, default='imagenet', help='Dataset.') -parser.add_argument('--run_cloudbrain', type=ast.literal_eval, default=True, +parser.add_argument('--run_cloudbrain', type=ast.literal_eval, default=False, help='Whether it is running on CloudBrain platform.') -parser.add_argument('--run_distribute', type=bool, default=True, help='Run distribute') +parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute') parser.add_argument('--device_num', type=int, default=1, help='Device num.') parser.add_argument('--dataset_path', type=str, default='', help='Dataset path') parser.add_argument('--device_target', type=str, default='Ascend', help='Device target') diff --git a/model_zoo/research/cv/ssd_ghostnet/src/ssd_ghostnet.py b/model_zoo/research/cv/ssd_ghostnet/src/ssd_ghostnet.py index c4c04105dd1..a57fcafb2d6 100644 --- a/model_zoo/research/cv/ssd_ghostnet/src/ssd_ghostnet.py +++ b/model_zoo/research/cv/ssd_ghostnet/src/ssd_ghostnet.py @@ -591,7 +591,8 @@ class TrainingWrapper(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class SSDWithGhostNet(nn.Cell): diff --git a/model_zoo/research/cv/ssd_mobilenetV2/src/ssd.py b/model_zoo/research/cv/ssd_mobilenetV2/src/ssd.py index 7671660cbf3..ff5dfdfd9ef 100644 --- a/model_zoo/research/cv/ssd_mobilenetV2/src/ssd.py +++ b/model_zoo/research/cv/ssd_mobilenetV2/src/ssd.py @@ -388,7 +388,8 @@ class TrainingWrapper(nn.Cell): if self.use_global_norm: grads = self.hyper_map(F.partial(grad_scale, F.scalar_to_array(self.sens)), grads) grads = C.clip_by_global_norm(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class SSDWithMobileNetV2(nn.Cell): diff --git a/model_zoo/research/cv/ssd_mobilenetV2_FPNlite/src/ssd.py b/model_zoo/research/cv/ssd_mobilenetV2_FPNlite/src/ssd.py index 15191e29c11..c9df5eb3c54 100644 --- a/model_zoo/research/cv/ssd_mobilenetV2_FPNlite/src/ssd.py +++ b/model_zoo/research/cv/ssd_mobilenetV2_FPNlite/src/ssd.py @@ -296,7 +296,8 @@ class TrainingWrapper(nn.Cell): if self.use_global_norm: grads = self.hyper_map(F.partial(grad_scale, F.scalar_to_array(self.sens)), grads) grads = C.clip_by_global_norm(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/model_zoo/research/cv/ssd_resnet50/src/ssd.py b/model_zoo/research/cv/ssd_resnet50/src/ssd.py index 7edccbaf659..7ec90034385 100644 --- a/model_zoo/research/cv/ssd_resnet50/src/ssd.py +++ b/model_zoo/research/cv/ssd_resnet50/src/ssd.py @@ -457,7 +457,8 @@ class TrainingWrapper(nn.Cell): if self.use_global_norm: grads = self.hyper_map(F.partial(grad_scale, F.scalar_to_array(self.sens)), grads) grads = C.clip_by_global_norm(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class SsdInferWithDecoder(nn.Cell): """ diff --git a/model_zoo/research/cv/wideresnet/README_CN.md b/model_zoo/research/cv/wideresnet/README_CN.md index 22d00098ec9..5bdbdcb888f 100644 --- a/model_zoo/research/cv/wideresnet/README_CN.md +++ b/model_zoo/research/cv/wideresnet/README_CN.md @@ -55,13 +55,15 @@ WideResNet的总体网络架构如下:[链接](https://arxiv.org/abs/1605.0714 - 下载数æ®é›†ï¼Œç›®å½•ç»“构如下: ```text -└─cifar-10-batches-bin +└─train ├─data_batch_1.bin # 训练数æ®é›† ├─data_batch_2.bin # 训练数æ®é›† ├─data_batch_3.bin # 训练数æ®é›† ├─data_batch_4.bin # 训练数æ®é›† ├─data_batch_5.bin # 训练数æ®é›† └─test_batch.bin # 评估数æ®é›† +└─eval + └─test_batch.bin # 评估数æ®é›† ``` # 环境è¦æ±‚ @@ -82,15 +84,23 @@ WideResNet的总体网络架构如下:[链接](https://arxiv.org/abs/1605.0714 ```Shell # 分布å¼è®­ç»ƒ -用法:sh run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH](å¯é€‰ï¼‰ +用法: +cd scripts +bash run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH] [MODELART] # å•æœºè®­ç»ƒ -用法:sh run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](å¯é€‰ï¼‰ +用法: +cd scripts +bash run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH] [MODELART] # è¿è¡Œè¯„估示例 -用法:sh run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] +用法: +cd scripts +bash run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] [MODELART] ``` +若没有[PRETRAINED_CKPT_PATH],使用 “†作为å‚æ•°è¿è¡Œè„šæœ¬ã€‚ + # 脚本说明 ## 脚本åŠæ ·ä¾‹ä»£ç  @@ -149,13 +159,19 @@ WideResNet的总体网络架构如下:[链接](https://arxiv.org/abs/1605.0714 ```Shell # 分布å¼è®­ç»ƒ -用法:sh run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH](å¯é€‰ï¼‰ +用法: +cd scripts +bash run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [PRETRAINED_CKPT_PATH] [MODELART] # å•æœºè®­ç»ƒ -用法:sh run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH](å¯é€‰ï¼‰ +用法: +cd scripts +bash run_standalone_train.sh [DATASET_PATH] [PRETRAINED_CKPT_PATH] [MODELART] ``` +若没有[PRETRAINED_CKPT_PATH],使用 “†作为å‚æ•°è¿è¡Œè„šæœ¬ã€‚ + 分布å¼è®­ç»ƒéœ€è¦æå‰åˆ›å»ºJSONæ ¼å¼çš„HCCLé…置文件。 具体æ“作,å‚è§[hccn_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools)中的说明。 @@ -203,12 +219,16 @@ epoch: 4 step: 195, loss is 1.221174 ```Shell # 评估 -Usage: sh run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] +用法: +cd scripts +bash run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] [MODELART] ``` ```Shell # 评估示例 -sh run_eval.sh /cifar10 WideResNet_best.ckpt +用法: +cd scripts +bash run_eval.sh /cifar10 WideResNet_best.ckpt ``` 训练过程中å¯ä»¥ç”Ÿæˆæ£€æŸ¥ç‚¹ã€‚ diff --git a/model_zoo/research/hpc/sponge/main.py b/model_zoo/research/hpc/sponge/main.py index 9f37635f6c8..503946d8370 100644 --- a/model_zoo/research/hpc/sponge/main.py +++ b/model_zoo/research/hpc/sponge/main.py @@ -16,14 +16,14 @@ import argparse import time -from src.simulation import Simulation -from src.mdnn import Mdnn, TransCrdToCV import mindspore.context as context from mindspore import Tensor from mindspore import load_checkpoint +from src.mdnn import Mdnn, TransCrdToCV +from src.simulation import Simulation parser = argparse.ArgumentParser(description='SPONGE Controller') -parser.add_argument('--i', type=str, default=None, help='Input file') +parser.add_argument('--i', type=str, default=None, help='Input .in file') parser.add_argument('--amber_parm', type=str, default=None, help='Paramter file in AMBER type') parser.add_argument('--c', type=str, default=None, help='Initial coordinates file') parser.add_argument('--r', type=str, default="restrt", help='') @@ -36,6 +36,7 @@ parser.add_argument('--checkpoint', type=str, default="", help='Checkpoint file' args_opt = parser.parse_args() context.set_context(mode=context.GRAPH_MODE, device_target="GPU", device_id=args_opt.device_id, save_graphs=False) +# context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU", device_id=args_opt.device_id, save_graphs=False) if __name__ == "__main__": simulation = Simulation(args_opt) @@ -53,7 +54,8 @@ if __name__ == "__main__": if steps == simulation.md_info.step_limit - 1: print_step = 0 temperature, total_potential_energy, sigma_of_bond_ene, sigma_of_angle_ene, sigma_of_dihedral_ene, \ - nb14_lj_energy_sum, nb14_cf_energy_sum, LJ_energy_sum, ee_ene, _ = simulation(Tensor(steps), Tensor(print_step)) + nb14_lj_energy_sum, nb14_cf_energy_sum, LJ_energy_sum, ee_ene, _, _, _, _ = simulation(Tensor(steps), + Tensor(print_step)) if steps == 0: compiler_time = time.time() diff --git a/model_zoo/research/hpc/sponge/src/angle.py b/model_zoo/research/hpc/sponge/src/angle.py index 38a1e4f3a79..a8e90dd4aae 100644 --- a/model_zoo/research/hpc/sponge/src/angle.py +++ b/model_zoo/research/hpc/sponge/src/angle.py @@ -13,12 +13,46 @@ # limitations under the License. # ============================================================================ '''Angle''' + + class Angle: '''Angle''' + def __init__(self, controller): + self.module_name = "angle" + self.h_atom_a = [] + self.h_atom_b = [] + self.h_atom_c = [] + self.h_angle_k = [] + self.h_angle_theta0 = [] + self.angle_numbers = 0 if controller.amber_parm is not None: file_path = controller.amber_parm self.read_information_from_amberfile(file_path) + self.is_initialized = 1 + else: + self.read_in_file(controller) + + def read_in_file(self, controller): + """read_in_file""" + print("START INITIALIZING ANGLE:") + name = self.module_name + "_in_file" + if name in controller.Command_Set: + path = controller.Command_Set[name] + file = open(path, 'r') + context = file.readlines() + self.angle_numbers = int(context[0].strip()) + print(" angle_numbers is ", self.angle_numbers) + for i in range(self.angle_numbers): + val = list(map(float, context[i + 1].strip().split())) + self.h_atom_a.append(int(val[0])) + self.h_atom_b.append(int(val[1])) + self.h_atom_c.append(int(val[2])) + self.h_angle_k.append(val[3]) + self.h_angle_theta0.append(val[4]) + self.is_initialized = 1 + file.close() + print("END INITIALIZING ANGLE") def read_information_from_amberfile(self, file_path): '''read amber file''' @@ -64,9 +98,9 @@ class Angle: information.extend(value) count += len(value) for _ in range(self.angle_with_H_numbers): - self.h_atom_a[angle_count] = information[angle_count * 4 + 0] / 3 - self.h_atom_b[angle_count] = information[angle_count * 4 + 1] / 3 - self.h_atom_c[angle_count] = information[angle_count * 4 + 2] / 3 + self.h_atom_a[angle_count] = int(information[angle_count * 4 + 0] / 3) + self.h_atom_b[angle_count] = int(information[angle_count * 4 + 1] / 3) + self.h_atom_c[angle_count] = int(information[angle_count * 4 + 2] / 3) self.h_type[angle_count] = information[angle_count * 4 + 3] - 1 angle_count += 1 @@ -86,9 +120,9 @@ class Angle: information.extend(value) count += len(value) for _ in range(self.angle_without_H_numbers): - self.h_atom_a[angle_count] = information[(angle_count - self.angle_with_H_numbers) * 4 + 0] / 3 - self.h_atom_b[angle_count] = information[(angle_count - self.angle_with_H_numbers) * 4 + 1] / 3 - self.h_atom_c[angle_count] = information[(angle_count - self.angle_with_H_numbers) * 4 + 2] / 3 + self.h_atom_a[angle_count] = int(information[(angle_count - self.angle_with_H_numbers) * 4 + 0] / 3) + self.h_atom_b[angle_count] = int(information[(angle_count - self.angle_with_H_numbers) * 4 + 1] / 3) + self.h_atom_c[angle_count] = int(information[(angle_count - self.angle_with_H_numbers) * 4 + 2] / 3) self.h_type[angle_count] = information[(angle_count - self.angle_with_H_numbers) * 4 + 3] - 1 angle_count += 1 break diff --git a/model_zoo/research/hpc/sponge/src/bond.py b/model_zoo/research/hpc/sponge/src/bond.py index 4cc5b659bd4..e0287f115e5 100644 --- a/model_zoo/research/hpc/sponge/src/bond.py +++ b/model_zoo/research/hpc/sponge/src/bond.py @@ -13,15 +13,45 @@ # limitations under the License. # ============================================================================ '''Bond''' + + class Bond: '''Bond''' - def __init__(self, controller, md_info): - - self.atom_numbers = md_info.atom_numbers + def __init__(self, controller): + self.module_name = "bond" + self.h_atom_a = [] + self.h_atom_b = [] + self.h_k = [] + self.h_r0 = [] + self.bond_numbers = 0 + self.is_initialized = 0 if controller.amber_parm is not None: file_path = controller.amber_parm self.read_information_from_amberfile(file_path) + self.is_initialized = 1 + else: + self.read_in_file(controller) + + def read_in_file(self, controller): + """read_in_file""" + print("START INITIALIZING BOND:") + name = self.module_name + "_in_file" + if name in controller.Command_Set: + path = controller.Command_Set[name] + file = open(path, 'r') + context = file.readlines() + self.bond_numbers = int(context[0].strip()) + print(" bond_numbers is ", self.bond_numbers) + for i in range(self.bond_numbers): + val = list(map(float, context[i + 1].strip().split())) + self.h_atom_a.append(int(val[0])) + self.h_atom_b.append(int(val[1])) + self.h_k.append(val[2]) + self.h_r0.append(val[3]) + self.is_initialized = 1 + file.close() + print("END INITIALIZING BOND") def read_information_from_amberfile(self, file_path): '''read amber file''' @@ -103,8 +133,8 @@ class Bond: count += len(value) for i in range(self.bond_with_hydrogen): - self.h_atom_a[i] = information[3 * i + 0] / 3 - self.h_atom_b[i] = information[3 * i + 1] / 3 + self.h_atom_a[i] = int(information[3 * i + 0] / 3) + self.h_atom_b[i] = int(information[3 * i + 1] / 3) tmpi = information[3 * i + 2] - 1 self.h_k[i] = self.bond_type_k[tmpi] self.h_r0[i] = self.bond_type_r[tmpi] @@ -126,8 +156,8 @@ class Bond: count += len(value) for i in range(self.bond_with_hydrogen, self.bond_numbers): - self.h_atom_a[i] = information[3 * (i - self.bond_with_hydrogen) + 0] / 3 - self.h_atom_b[i] = information[3 * (i - self.bond_with_hydrogen) + 1] / 3 + self.h_atom_a[i] = int(information[3 * (i - self.bond_with_hydrogen) + 0] / 3) + self.h_atom_b[i] = int(information[3 * (i - self.bond_with_hydrogen) + 1] / 3) tmpi = information[3 * (i - self.bond_with_hydrogen) + 2] - 1 self.h_k[i] = self.bond_type_k[tmpi] self.h_r0[i] = self.bond_type_r[tmpi] diff --git a/model_zoo/research/hpc/sponge/src/dihedral.py b/model_zoo/research/hpc/sponge/src/dihedral.py index 2d06c0e3b13..0eed5f9a8a0 100644 --- a/model_zoo/research/hpc/sponge/src/dihedral.py +++ b/model_zoo/research/hpc/sponge/src/dihedral.py @@ -18,11 +18,52 @@ import math class Dihedral: '''Dihedral''' + def __init__(self, controller): self.CONSTANT_Pi = 3.1415926535897932 + self.module_name = "dihedral" + self.h_atom_a = [] + self.h_atom_b = [] + self.h_atom_c = [] + self.h_atom_d = [] + self.h_ipn = [] + self.h_pn = [] + self.h_pk = [] + self.h_gamc = [] + self.h_gams = [] + self.dihedral_numbers = 0 if controller.amber_parm is not None: file_path = controller.amber_parm self.read_information_from_amberfile(file_path) + self.is_initialized = 1 + else: + self.read_in_file(controller) + + def read_in_file(self, controller): + """read_in_file""" + print("START INITIALIZING DIHEDRAL:") + name = self.module_name + "_in_file" + if name in controller.Command_Set: + path = controller.Command_Set[name] + file = open(path, 'r') + context = file.readlines() + self.dihedral_numbers = int(context[0].strip()) + print(" dihedral_numbers is ", self.dihedral_numbers) + for i in range(self.dihedral_numbers): + val = list(map(float, context[i + 1].strip().split())) + self.h_atom_a.append(int(val[0])) + self.h_atom_b.append(int(val[1])) + self.h_atom_c.append(int(val[2])) + self.h_atom_d.append(int(val[3])) + self.h_ipn.append(val[4]) + self.h_pn.append(val[4]) + self.h_pk.append(val[5]) + self.h_gamc.append(math.cos(val[6]) * val[5]) + self.h_gams.append(math.sin(val[6]) * val[5]) + + self.is_initialized = 1 + file.close() + print("END INITIALIZING DIHEDRAL") def read_information_from_amberfile(self, file_path): '''read amber file''' @@ -108,11 +149,11 @@ class Dihedral: self.h_atom_b = [0] * self.dihedral_numbers self.h_atom_c = [0] * self.dihedral_numbers self.h_atom_d = [0] * self.dihedral_numbers - self.pk = [] - self.gamc = [] - self.gams = [] - self.pn = [] - self.ipn = [] + self.h_pk = [] + self.h_gamc = [] + self.h_gams = [] + self.h_pn = [] + self.h_ipn = [] for idx, val in enumerate(context): if "%FLAG DIHEDRALS_INC_HYDROGEN" in val: count = 0 @@ -132,20 +173,20 @@ class Dihedral: self.h_atom_c[i] = information[i * 5 + 2] / 3 self.h_atom_d[i] = abs(information[i * 5 + 3] / 3) tmpi = information[i * 5 + 4] - 1 - self.pk.append(self.pk_type[tmpi]) + self.h_pk.append(self.pk_type[tmpi]) tmpf = self.phase_type[tmpi] if abs(tmpf - self.CONSTANT_Pi) <= 0.001: tmpf = self.CONSTANT_Pi tmpf2 = math.cos(tmpf) if abs(tmpf2) < 1e-6: tmpf2 = 0 - self.gamc.append(tmpf2 * self.pk[i]) + self.h_gamc.append(tmpf2 * self.h_pk[i]) tmpf2 = math.sin(tmpf) if abs(tmpf2) < 1e-6: tmpf2 = 0 - self.gams.append(tmpf2 * self.pk[i]) - self.pn.append(abs(self.pn_type[tmpi])) - self.ipn.append(int(self.pn[i] + 0.001)) + self.h_gams.append(tmpf2 * self.h_pk[i]) + self.h_pn.append(abs(self.pn_type[tmpi])) + self.h_ipn.append(int(self.h_pn[i] + 0.001)) break for idx, val in enumerate(context): if "%FLAG DIHEDRALS_WITHOUT_HYDROGEN" in val: @@ -166,20 +207,20 @@ class Dihedral: self.h_atom_c[i] = information[(i - self.dihedral_with_hydrogen) * 5 + 2] / 3 self.h_atom_d[i] = abs(information[(i - self.dihedral_with_hydrogen) * 5 + 3] / 3) tmpi = information[(i - self.dihedral_with_hydrogen) * 5 + 4] - 1 - self.pk.append(self.pk_type[tmpi]) + self.h_pk.append(self.pk_type[tmpi]) tmpf = self.phase_type[tmpi] if abs(tmpf - self.CONSTANT_Pi) <= 0.001: tmpf = self.CONSTANT_Pi tmpf2 = math.cos(tmpf) if abs(tmpf2) < 1e-6: tmpf2 = 0 - self.gamc.append(tmpf2 * self.pk[i]) + self.h_gamc.append(tmpf2 * self.h_pk[i]) tmpf2 = math.sin(tmpf) if abs(tmpf2) < 1e-6: tmpf2 = 0 - self.gams.append(tmpf2 * self.pk[i]) - self.pn.append(abs(self.pn_type[tmpi])) - self.ipn.append(int(self.pn[i] + 0.001)) + self.h_gams.append(tmpf2 * self.h_pk[i]) + self.h_pn.append(abs(self.pn_type[tmpi])) + self.h_ipn.append(int(self.h_pn[i] + 0.001)) break for i in range(self.dihedral_numbers): if self.h_atom_c[i] < 0: diff --git a/model_zoo/research/hpc/sponge/src/langevin_liujian_md.py b/model_zoo/research/hpc/sponge/src/langevin_liujian_md.py index 0f25929f9d5..6552f2b23df 100644 --- a/model_zoo/research/hpc/sponge/src/langevin_liujian_md.py +++ b/model_zoo/research/hpc/sponge/src/langevin_liujian_md.py @@ -20,37 +20,72 @@ import numpy as np class Langevin_Liujian: '''LagevinLiuJian''' + def __init__(self, controller, atom_numbers): + self.module_name = "langevin_liu" self.atom_numbers = atom_numbers + self.h_mass = [] + print("START INITIALIZING LANGEVIN_LIU DYNAMICS:") if controller.amber_parm is not None: file_path = controller.amber_parm self.read_information_from_amberfile(file_path) - + else: + self.read_mass_file(controller) self.CONSTANT_TIME_CONVERTION = 20.455 self.CONSTANT_kB = 0.00198716 self.target_temperature = 300.0 if "target_temperature" not in controller.Command_Set else float( controller.Command_Set["target_temperature"]) - self.gamma_ln = 1.0 if "langevin_gamma" not in controller.Command_Set else float( - controller.Command_Set["langevin_gamma"]) - self.rand_seed = 1 if "langevin_seed" not in controller.Command_Set else float( - controller.Command_Set["langevin_seed"]) - self.max_velocity = 10000.0 if "velocity_max" not in controller.Command_Set else float( - controller.Command_Set["velocity_max"]) - assert self.max_velocity > 0 - print("target temperature is ", self.target_temperature) - print("friction coefficient is ", self.gamma_ln, "ps^-1") - print("random seed is ", self.rand_seed) - self.dt = float(controller.Command_Set["dt"]) - self.dt *= self.CONSTANT_TIME_CONVERTION + self.gamma_ln = 1.0 + if "gamma" in controller.Command_Set: + self.gamma_ln = float(controller.Command_Set["gamma"]) + if "langevin_liu_gamma" in controller.Command_Set: + self.gamma_ln = float(controller.Command_Set["langevin_liu_gamma"]) + print(" langevin_liu_gamma is ", self.gamma_ln) + + self.random_seed = 1 if "seed" not in controller.Command_Set else int( + controller.Command_Set["seed"]) + + print(" target temperature is {} K".format(self.target_temperature)) + print(" friction coefficient is {} ps^-1".format(self.gamma_ln)) + print(" random seed is ", self.random_seed) + self.dt = 0.001 if "dt" not in controller.Command_Set else float( + controller.Command_Set["dt"]) * self.CONSTANT_TIME_CONVERTION self.half_dt = 0.5 * self.dt - self.rand_state = np.float32(np.zeros([math.ceil(3 * self.atom_numbers / 4.0) * 16,])) + + self.float4_numbers = math.ceil(3 * self.atom_numbers / 4.0) + self.rand_state = np.float32(np.zeros([self.float4_numbers * 16,])) self.gamma_ln = self.gamma_ln / self.CONSTANT_TIME_CONVERTION self.exp_gamma = math.exp(-1 * self.gamma_ln * self.dt) self.sqrt_gamma = math.sqrt((1. - self.exp_gamma * self.exp_gamma) * self.target_temperature * self.CONSTANT_kB) self.h_sqrt_mass = [0] * self.atom_numbers for i in range(self.atom_numbers): - self.h_sqrt_mass[i] = self.sqrt_gamma * math.sqrt(1. / self.h_mass[i]) + self.h_sqrt_mass[i] = self.sqrt_gamma * math.sqrt(1. / self.h_mass[i]) if self.h_mass[i] != 0 else 0 + + self.max_velocity = 0 + if "velocity_max" in controller.Command_Set: + self.max_velocity = float(controller.Command_Set["velocity_max"]) + if "langevin_liu_velocity_max" in controller.Command_Set: + self.max_velocity = float(controller.Command_Set["langevin_liu_velocity_max"]) + print(" max velocity is ", self.max_velocity) + + self.h_mass_inverse = [0] * self.atom_numbers + for i in range(self.atom_numbers): + self.h_mass_inverse[i] = 1. / self.h_mass[i] if self.h_mass[i] != 0 else 0 + + self.is_initialized = 1 + + print("END INITIALIZING LANGEVIN_LIU DYNAMICS") + + def read_mass_file(self, controller): + if "mass_in_file" in controller.Command_Set: + path = controller.Command_Set["mass_in_file"] + file = open(path, 'r') + context = file.readlines() + for idx, val in enumerate(context): + if idx > 0: + self.h_mass.append(float(val.strip())) + file.close() def read_information_from_amberfile(self, file_path): '''read amber file''' diff --git a/model_zoo/research/hpc/sponge/src/lennard_jones.py b/model_zoo/research/hpc/sponge/src/lennard_jones.py index b7617c11d66..4b92affb7c7 100644 --- a/model_zoo/research/hpc/sponge/src/lennard_jones.py +++ b/model_zoo/research/hpc/sponge/src/lennard_jones.py @@ -13,12 +13,95 @@ # limitations under the License. # ============================================================================ '''Lennard Jones''' +import mindspore.common.dtype as mstype +from mindspore import Tensor +from mindspore.ops import operations as P + + class Lennard_Jones_Information: '''Lennard Jones''' - def __init__(self, controller): + + def __init__(self, controller, cutoff, box_length): + self.module_name = "LJ" + self.is_initialized = 0 + self.CONSTANT_UINT_MAX_FLOAT = 4294967296.0 + self.CONSTANT_Pi = 3.1415926535897932 + self.cutoff = cutoff + self.box_length = box_length + if controller.amber_parm is not None: file_path = controller.amber_parm self.read_information_from_amberfile(file_path) + self.is_initialized = 1 + else: + self.read_in_file(controller) + + if self.is_initialized: + self.totalc6get = P.totalc6get(self.atom_numbers) + self.read_information() + + def read_in_file(self, controller): + """read_in_file""" + print("START INITIALIZING LENNADR JONES INFORMATION:") + name = self.module_name + "_in_file" + # print("read_in_file " + name) + if name in controller.Command_Set: + path = controller.Command_Set[name] + file = open(path, 'r') + context = file.readlines() + self.atom_numbers, self.atom_type_numbers = map(int, context[0].strip().split()) + print(" atom_numbers is ", self.atom_numbers) + print(" atom_LJ_type_number is ", self.atom_type_numbers) + self.pair_type_numbers = self.atom_type_numbers * (self.atom_type_numbers + 1) / 2 + self.h_LJ_A = [] + self.h_LJ_B = [] + self.h_atom_LJ_type = [] + startidx = 1 + count = 0 + print(startidx) + while count < self.atom_type_numbers: + if context[startidx].strip(): + val = list(map(float, context[startidx].strip().split())) + # print(val) + count += 1 + self.h_LJ_A.extend(val) + startidx += 1 + assert len(self.h_LJ_A) == self.pair_type_numbers + self.h_LJ_A = [x * 12.0 for x in self.h_LJ_A] + + count = 0 + print(startidx) + while count < self.atom_type_numbers: + if context[startidx].strip(): + val = list(map(float, context[startidx].strip().split())) + # print(val) + count += 1 + self.h_LJ_B.extend(val) + startidx += 1 + assert len(self.h_LJ_B) == self.pair_type_numbers + self.h_LJ_B = [x * 6.0 for x in self.h_LJ_B] + for idx, val in enumerate(context): + if idx > startidx: + self.h_atom_LJ_type.append(int(val.strip())) + file.close() + self.is_initialized = 1 + print("END INITIALIZING LENNADR JONES INFORMATION") + + def read_information(self): + """read_information""" + self.uint_dr_to_dr_cof = [1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length[0], + 1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length[1], + 1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length[2]] + print("copy lj type to new crd") + self.atom_LJ_type = Tensor(self.h_atom_LJ_type, mstype.int32) + self.LJ_B = Tensor(self.h_LJ_B, mstype.float32) + self.factor = self.totalc6get(self.atom_LJ_type, self.LJ_B) + print(" factor is: ", self.factor) + self.long_range_factor = float(self.factor.asnumpy()) + self.long_range_factor *= -2.0 / 3.0 * self.CONSTANT_Pi / self.cutoff / self.cutoff / self.cutoff / 6.0 + self.volume = self.box_length[0] * self.box_length[1] * self.box_length[1] + print(" long range correction factor is: ", self.long_range_factor) + print(" End initializing long range LJ correction") def read_information_from_amberfile(self, file_path): '''read amber file''' @@ -35,9 +118,9 @@ class Lennard_Jones_Information: self.atom_numbers = value[0] self.atom_type_numbers = value[1] self.pair_type_numbers = int( - self.atom_type_numbers * (self.atom_type_numbers + 1) / 2) # TODO 这个地方有问题啊 + self.atom_type_numbers * (self.atom_type_numbers + 1) / 2) # TODO break - self.atom_LJ_type = [0] * self.atom_numbers + self.h_atom_LJ_type = [0] * self.atom_numbers for idx, val in enumerate(context): if "%FLAG ATOM_TYPE_INDEX" in val: count = 0 @@ -52,9 +135,9 @@ class Lennard_Jones_Information: information.extend(value) count += len(value) for i in range(self.atom_numbers): - self.atom_LJ_type[i] = information[i] - 1 + self.h_atom_LJ_type[i] = information[i] - 1 break - self.LJ_A = [0] * self.pair_type_numbers + self.h_LJ_A = [0] * self.pair_type_numbers for idx, val in enumerate(context): if "%FLAG LENNARD_JONES_ACOEF" in val: count = 0 @@ -69,9 +152,9 @@ class Lennard_Jones_Information: information.extend(value) count += len(value) for i in range(self.pair_type_numbers): - self.LJ_A[i] = 12.0 * information[i] + self.h_LJ_A[i] = 12.0 * information[i] break - self.LJ_B = [0] * self.pair_type_numbers + self.h_LJ_B = [0] * self.pair_type_numbers for idx, val in enumerate(context): if "%FLAG LENNARD_JONES_BCOEF" in val: count = 0 @@ -86,5 +169,5 @@ class Lennard_Jones_Information: information.extend(value) count += len(value) for i in range(self.pair_type_numbers): - self.LJ_B[i] = 6.0 * information[i] + self.h_LJ_B[i] = 6.0 * information[i] break diff --git a/model_zoo/research/hpc/sponge/src/md_information.py b/model_zoo/research/hpc/sponge/src/md_information.py index f4dc2e26f17..263608b8e99 100644 --- a/model_zoo/research/hpc/sponge/src/md_information.py +++ b/model_zoo/research/hpc/sponge/src/md_information.py @@ -14,53 +14,206 @@ # ============================================================================ '''MD Information''' import numpy as np +from src.system_information import (periodic_box_condition_information, system_information, + non_bond_information, NVE_iteration, residue_information, trajectory_output) class md_information: '''MD Information''' + def __init__(self, controller): CONSTANT_TIME_CONVERTION = 20.455 - CONSTANT_UINT_MAX_FLOAT = 4294967296.0 + self.md_task = controller.md_task - self.mode = 0 if "mode" not in controller.Command_Set else int(controller.Command_Set["mode"]) - self.dt = 0.001 * CONSTANT_TIME_CONVERTION if "dt" not in controller.Command_Set else float( - controller.Command_Set["dt"]) * CONSTANT_TIME_CONVERTION - self.skin = 2.0 if "skin" not in controller.Command_Set else float(controller.Command_Set["skin"]) - self.trans_vec = [self.skin, self.skin, self.skin] - self.trans_vec_minus = -1 * self.trans_vec - self.step_limit = 1000 if "step_limit" not in controller.Command_Set else int( - controller.Command_Set["step_limit"]) + self.netfrc = 0 if "net_force" not in controller.Command_Set else int(controller.Command_Set["net_force"]) self.ntwx = 1000 if "write_information_interval" not in controller.Command_Set else int( controller.Command_Set["write_information_interval"]) - self.ntce = self.step_limit + 1 if "calculate_energy_interval" not in controller.Command_Set else int( - controller.Command_Set["calculate_energy_interval"]) self.atom_numbers = 0 self.residue_numbers = 0 self.density = 0.0 self.lin_serial = [] self.h_res_start = [] self.h_res_end = [] + + self.h_charge = [] self.h_mass = [] self.h_mass_inverse = [] self.h_charge = [] + self.coordinate = [] + self.box_length = [] + self.vel = [] + self.crd = [] + self.velocity = [] + + self.mode = self.read_mode(controller) + # read dt + self.dt = 0.001 * CONSTANT_TIME_CONVERTION if "dt" not in controller.Command_Set else float( + controller.Command_Set["dt"]) * CONSTANT_TIME_CONVERTION + self.dt_in_ps = 0.001 if "dt" not in controller.Command_Set else float(controller.Command_Set["dt"]) if controller.amber_parm is not None: self.read_basic_system_information_from_amber_file(controller.amber_parm) - - if "amber_irest" in controller.Command_Set: - amber_irest = int(controller.Command_Set["amber_irest"]) if controller.initial_coordinates_file is not None: - self.read_basic_system_information_from_rst7(controller.initial_coordinates_file, amber_irest) + self.read_basic_system_information_from_rst7(controller.initial_coordinates_file) + else: + self.read_coordinate_and_velocity(controller) + self.read_mass(controller) + self.read_charge(controller) + self.crd = self.coordinate - self.crd_to_uint_crd_cof = [CONSTANT_UINT_MAX_FLOAT / self.box_length[0], - CONSTANT_UINT_MAX_FLOAT / self.box_length[1], - CONSTANT_UINT_MAX_FLOAT / self.box_length[2]] - self.uint_dr_to_dr_cof = [1.0 / self.crd_to_uint_crd_cof[0], 1.0 / self.crd_to_uint_crd_cof[1], - 1.0 / self.crd_to_uint_crd_cof[2]] - self.density *= 1e24 / 6.023e23 / (self.box_length[0] * self.box_length[1] * self.box_length[2]) + self.sys = system_information(controller, self) + self.nb = non_bond_information(controller, self) + self.output = trajectory_output(controller, self) + self.nve = NVE_iteration(controller, self) + self.res = residue_information(controller, self) + self.pbc = periodic_box_condition_information(controller, self.box_length) + + if not self.h_res_start: + self.h_res_start = self.res.h_res_start + self.h_res_end = self.res.h_res_end + self.residue_numbers = self.res.residue_numbers + + # Atom_Information_Initial + self.acc = np.zeros([self.atom_numbers, 3]) + self.frc = np.zeros([self.atom_numbers, 3]) + self.sys.freedom = 3 * self.atom_numbers + self.is_initialized = 1 self.velocity = np.reshape(np.asarray(self.velocity, np.float32), [self.atom_numbers, 3]) + self.step_limit = self.sys.step_limit + + def read_mode(self, controller): + """read_mode""" + if "mode" in controller.Command_Set: + if controller.Command_Set["mode"] in ["NVT", "nvt", "1"]: + print(" Mode set to NVT\n") + mode = 1 + elif controller.Command_Set["mode"] in ["NPT", "npt", "2"]: + print(" Mode set to NPT\n") + mode = 2 + elif controller.Command_Set["mode"] in ["Minimization", "minimization", "-1"]: + print(" Mode set to Energy Minimization\n") + mode = -1 + elif controller.Command_Set["mode"] in ["NVE", "nve", "0"]: + print(" Mode set to NVE\n") + mode = 0 + else: + print( + " Warning: Mode {} is not match. Set to NVE as default\n".format(controller.Command_Set["mode"])) + mode = 0 + else: + print(" Mode set to NVE as default\n") + mode = 0 + return mode + + def read_coordinate_in_file(self, path): + '''read coordinates file''' + file = open(path, 'r') + print(" Start reading coordinate_in_file:\n") + context = file.readlines() + atom_numbers = int(context[0].strip()) + if self.atom_numbers != 0: + if self.atom_numbers is not atom_numbers: + print(" Error: atom_numbers is not equal: ", atom_numbers, self.atom_numbers) + exit(1) + else: + self.atom_numbers = atom_numbers + print(" atom_numbers is ", self.atom_numbers) + + for idx in range(self.atom_numbers): + coord = list(map(float, context[idx + 1].strip().split())) + self.coordinate.append(coord) + + self.box_length = list(map(float, context[-1].strip().split()))[:3] + print(" box_length is: x: {}, y: {}, z: {}".format( + self.box_length[0], self.box_length[1], self.box_length[2])) + self.crd = self.coordinate + file.close() + + def read_velocity_in_file(self, path): + '''read velocity file''' + file = open(path, 'r') + print(" Start reading velocity_in_file:\n") + context = file.readlines() + for idx, val in enumerate(context): + if idx == 0: + atom_numbers = int(val.strip()) + if self.atom_numbers > 0 and atom_numbers != self.atom_numbers: + print(" Error: atom_numbers is not equal: %d %d\n", idx, self.atom_numbers) + exit(1) + else: + self.atom_numbers = atom_numbers + else: + vel = list(map(float, val.strip().split())) + self.velocity.append(vel) + self.vel = self.velocity + file.close() + + def read_coordinate_and_velocity(self, controller): + """read_coordinate_and_velocity""" + if "coordinate_in_file" in controller.Command_Set: + self.read_coordinate_in_file(controller.Command_Set["coordinate_in_file"]) + if "velocity_in_file" in controller.Command_Set: + self.read_velocity_in_file(controller.Command_Set["velocity_in_file"]) + else: + print(" Velocity is set to zero as default\n") + self.velocity = [0] * 3 * self.atom_numbers + + def read_mass(self, controller): + """read_mass""" + print(" Start reading mass:") + if "mass_in_file" in controller.Command_Set: + path = controller.Command_Set["mass_in_file"] + file = open(path, 'r') + self.total_mass = 0 + context = file.readlines() + for idx, val in enumerate(context): + if idx == 0: + atom_numbers = int(val.strip()) + if self.atom_numbers > 0 and (atom_numbers != self.atom_numbers): + print(" Error: atom_numbers is not equal: ", atom_numbers, self.atom_numbers) + exit(1) + else: + self.atom_numbers = atom_numbers + else: + mass = float(val.strip()) + self.h_mass.append(mass) + self.total_mass += mass + if mass == 0: + self.h_mass_inverse.append(0.0) + else: + self.h_mass_inverse.append(1 / mass) + file.close() + else: + print(" mass is set to 20 as default") + self.total_mass = 20 * self.atom_numbers + self.h_mass = [20] * self.atom_numbers + self.h_mass_inverse = [1 / 20] * self.atom_numbers + + print(" End reading mass") + + def read_charge(self, controller): + """read_charge""" + if "charge_in_file" in controller.Command_Set: + print(" Start reading charge:") + path = controller.Command_Set["charge_in_file"] + file = open(path, 'r') + context = file.readlines() + for idx, val in enumerate(context): + if idx == 0: + atom_numbers = int(val.strip()) + if self.atom_numbers > 0 and (atom_numbers != self.atom_numbers): + print(" Error: atom_numbers is not equal: %d %d\n", idx, self.atom_numbers) + exit(1) + else: + self.atom_numbers = atom_numbers + else: + self.h_charge.append(float(val.strip())) + file.close() + else: + self.h_charge = [0.0] * self.atom_numbers + print(" End reading charge") def read_basic_system_information_from_amber_file(self, path): '''read amber file''' @@ -137,11 +290,13 @@ class md_information: count += len(value) break - def read_basic_system_information_from_rst7(self, path, irest): + def read_basic_system_information_from_rst7(self, path): '''read rst7 file''' file = open(path, 'r') context = file.readlines() file.close() + x = context[1].strip().split() + irest = 1 if len(x) > 1 else 0 atom_numbers = int(context[1].strip().split()[0]) if atom_numbers != self.atom_numbers: print("ERROR") @@ -151,7 +306,7 @@ class md_information: count = 0 start_idx = 1 if irest == 1: - self.simulation_start_time = float(context[1].strip().split()[1]) + self.simulation_start_time = float(x[1]) while count <= 6 * self.atom_numbers + 3: start_idx += 1 value = list(map(float, context[start_idx].strip().split())) @@ -169,4 +324,6 @@ class md_information: self.coordinate = information[: 3 * self.atom_numbers] self.velocity = [0.0] * (3 * self.atom_numbers) self.box_length = information[3 * self.atom_numbers:3 * self.atom_numbers + 3] + self.coordinate = np.array(self.coordinate).reshape([-1, 3]) + self.velocity = np.array(self.velocity).reshape([-1, 3]) print("system size is ", self.box_length[0], self.box_length[1], self.box_length[2]) diff --git a/model_zoo/research/hpc/sponge/src/nb14.py b/model_zoo/research/hpc/sponge/src/nb14.py index 9c37ec79e02..b28f13645d8 100644 --- a/model_zoo/research/hpc/sponge/src/nb14.py +++ b/model_zoo/research/hpc/sponge/src/nb14.py @@ -13,21 +13,51 @@ # limitations under the License. # ============================================================================ '''NON BOND''' + + class NON_BOND_14: '''NON BOND''' - def __init__(self, controller, dihedral, atom_numbers): - self.dihedral_with_hydrogen = dihedral.dihedral_with_hydrogen - self.dihedral_numbers = dihedral.dihedral_numbers - self.dihedral_type_numbers = dihedral.dihedral_type_numbers - self.atom_numbers = atom_numbers + def __init__(self, controller, dihedral, atom_numbers): + self.module_name = "nb14" + self.atom_numbers = atom_numbers + self.h_atom_a = [] + self.h_atom_b = [] + self.h_lj_scale_factor = [] + self.h_cf_scale_factor = [] + self.nb14_numbers = 0 + self.is_initialized = 0 if controller.amber_parm is not None: + self.dihedral_with_hydrogen = dihedral.dihedral_with_hydrogen + self.dihedral_numbers = dihedral.dihedral_numbers + self.dihedral_type_numbers = dihedral.dihedral_type_numbers file_path = controller.amber_parm self.read_information_from_amberfile(file_path) - self.h_atom_a = self.h_atom_a[:self.nb14_numbers] - self.h_atom_b = self.h_atom_b[:self.nb14_numbers] - self.h_lj_scale_factor = self.h_lj_scale_factor[:self.nb14_numbers] - self.h_cf_scale_factor = self.h_cf_scale_factor[:self.nb14_numbers] + self.h_atom_a = self.h_atom_a[:self.nb14_numbers] + self.h_atom_b = self.h_atom_b[:self.nb14_numbers] + self.h_lj_scale_factor = self.h_lj_scale_factor[:self.nb14_numbers] + self.h_cf_scale_factor = self.h_cf_scale_factor[:self.nb14_numbers] + self.is_initialized = 1 + else: + self.read_in_file(controller) + + def read_in_file(self, controller): + """read_in_file""" + name = self.module_name + "_in_file" + if name in controller.Command_Set: + path = controller.Command_Set[name] + file = open(path, 'r') + context = file.readlines() + self.nb14_numbers = int(context[0].strip()) + print(" non-bond 14 numbers is", self.nb14_numbers) + for i in range(self.nb14_numbers): + val = list(map(float, context[i + 1].strip().split())) + self.h_atom_a.append(int(val[0])) + self.h_atom_b.append(int(val[1])) + self.h_lj_scale_factor.append(val[2]) + self.h_cf_scale_factor.append(val[3]) + self.is_initialized = 1 + file.close() def read_information_from_amberfile(self, file_path): '''read amber file''' diff --git a/model_zoo/research/hpc/sponge/src/neighbor_list.py b/model_zoo/research/hpc/sponge/src/neighbor_list.py index 607f6d258c2..81c5868bd56 100644 --- a/model_zoo/research/hpc/sponge/src/neighbor_list.py +++ b/model_zoo/research/hpc/sponge/src/neighbor_list.py @@ -13,17 +13,24 @@ # limitations under the License. # ============================================================================ '''Neighbor List''' + + class neighbor_list: '''Neighbor List''' + def __init__(self, controller, atom_numbers, box_length): - self.refresh_interval = 20 if "neighbor_list_refresh_interval" not in controller.Command_Set else int( - controller.Command_Set["neighbor_list_refresh_interval"]) + self.CONSTANT_UINT_MAX_FLOAT = 4294967296.0 + print("START INITIALIZING NEIGHBOR LIST:") + self.module_name = "neighbor_list" + self.refresh_interval = 20 if "refresh_interval" not in controller.Command_Set else int( + controller.Command_Set["refresh_interval"]) self.max_atom_in_grid_numbers = 64 if "max_atom_in_grid_numbers" not in controller.Command_Set else int( controller.Command_Set["max_atom_in_grid_numbers"]) self.max_neighbor_numbers = 800 if "max_neighbor_numbers" not in controller.Command_Set else int( controller.Command_Set["max_neighbor_numbers"]) + self.skin = 2.0 if "skin" not in controller.Command_Set else float(controller.Command_Set["skin"]) - self.cutoff = 10.0 if "cut" not in controller.Command_Set else float(controller.Command_Set["cut"]) + self.cutoff = 10.0 if "cutoff" not in controller.Command_Set else float(controller.Command_Set["cutoff"]) self.cutoff_square = self.cutoff * self.cutoff self.cutoff_with_skin = self.cutoff + self.skin self.half_cutoff_with_skin = 0.5 * self.cutoff_with_skin @@ -31,15 +38,17 @@ class neighbor_list: self.half_skin_square = 0.25 * self.skin * self.skin self.atom_numbers = atom_numbers self.box_length = box_length + self.update_volume() + + self.initial_neighbor_grid() + self.not_first_time = 0 + self.is_initialized = 1 + self.refresh_count = [0] if controller.amber_parm is not None: file_path = controller.amber_parm self.read_information_from_amberfile(file_path) - self.Initial_Neighbor_Grid() - self.not_first_time = 0 - self.refresh_count = [0] - def read_information_from_amberfile(self, file_path): '''read amber file''' file = open(file_path, 'r') @@ -117,20 +126,23 @@ class neighbor_list: self.excluded_list.extend(tmp_list) break - def Initial_Neighbor_Grid(self): + def initial_neighbor_grid(self): '''init neighbor grid''' half_cutoff = self.half_cutoff_with_skin self.Nx = int(self.box_length[0] / half_cutoff) self.Ny = int(self.box_length[1] / half_cutoff) self.Nz = int(self.box_length[2] / half_cutoff) self.grid_N = [self.Nx, self.Ny, self.Nz] - self.grid_length = [self.box_length[0] / self.Nx, self.box_length[1] / self.Ny, self.box_length[2] / self.Nz] + self.grid_length = [self.box_length[0] / self.Nx, + self.box_length[1] / self.Ny, + self.box_length[2] / self.Nz] self.grid_length_inverse = [1.0 / self.grid_length[0], 1.0 / self.grid_length[1], 1.0 / self.grid_length[2]] + self.Nxy = self.Nx * self.Ny self.grid_numbers = self.Nz * self.Nxy - self.atom_numbers_in_grid_bucket = [0] * self.grid_numbers self.bucket = [-1] * (self.grid_numbers * self.max_atom_in_grid_numbers) + self.pointer = [] temp_grid_serial = [0] * 125 for i in range(self.grid_numbers): @@ -160,3 +172,11 @@ class neighbor_list: count += 1 temp_grid_serial = sorted(temp_grid_serial) self.pointer.extend(temp_grid_serial) + + def update_volume(self): + self.quarter_crd_to_uint_crd_cof = [0.25 * self.CONSTANT_UINT_MAX_FLOAT / self.box_length[0], + 0.25 * self.CONSTANT_UINT_MAX_FLOAT / self.box_length[1], + 0.25 * self.CONSTANT_UINT_MAX_FLOAT / self.box_length[2]] + self.uint_dr_to_dr_cof = [1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length[0], + 1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length[1], + 1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length[2]] diff --git a/model_zoo/research/hpc/sponge/src/particle_mesh_ewald.py b/model_zoo/research/hpc/sponge/src/particle_mesh_ewald.py index fd7f20f0104..4b22137d045 100644 --- a/model_zoo/research/hpc/sponge/src/particle_mesh_ewald.py +++ b/model_zoo/research/hpc/sponge/src/particle_mesh_ewald.py @@ -19,23 +19,40 @@ import math class Particle_Mesh_Ewald(): '''PME''' def __init__(self, controller, md_info): - self.cutoff = 10.0 if "cut" not in controller.Command_Set else float(controller.Command_Set["cut"]) - self.tolerance = 0.00001 if "PME_Direct_Tolerance" not in controller.Command_Set else float( - controller.Command_Set["PME_Direct_Tolerance"]) + self.module_name = "PME" + self.CONSTANT_Pi = 3.1415926535897932 + self.cutoff = 10.0 if "cutoff" not in controller.Command_Set else float(controller.Command_Set["cutoff"]) + self.tolerance = 0.00001 if "Direct_Tolerance" not in controller.Command_Set else float( + controller.Command_Set["Direct_Tolerance"]) self.fftx = -1 if "fftx" not in controller.Command_Set else int(controller.Command_Set["fftx"]) self.ffty = -1 if "ffty" not in controller.Command_Set else int(controller.Command_Set["ffty"]) self.fftz = -1 if "fftz" not in controller.Command_Set else int(controller.Command_Set["fftz"]) self.atom_numbers = md_info.atom_numbers self.box_length = md_info.box_length + self.volume = self.box_length[0] * self.box_length[1] * self.box_length[1] + if self.fftx < 0: self.fftx = self.Get_Fft_Patameter(self.box_length[0]) if self.ffty < 0: self.ffty = self.Get_Fft_Patameter(self.box_length[1]) if self.fftz < 0: self.fftz = self.Get_Fft_Patameter(self.box_length[2]) + print(" fftx: ", self.fftx) + print(" ffty: ", self.ffty) + print(" fftz: ", self.fftz) + print("pme cutoff", self.cutoff) + print("pme tolerance", self.tolerance) + self.PME_Nall = self.fftx * self.ffty * self.fftz + self.PME_Nin = self.ffty * self.fftz + self.PME_Nfft = self.fftx * self.ffty * (int(self.fftz / 2) + 1) + self.PME_inverse_box_vector = [self.fftx / self.box_length[0], + self.ffty / self.box_length[1], + self.fftz / self.box_length[2]] self.beta = self.Get_Beta(self.cutoff, self.tolerance) + self.neutralizing_factor = -0.5 * self.CONSTANT_Pi / (self.beta * self.beta * self.volume) + self.is_initialized = 1 def Get_Beta(self, cutoff, tolerance): '''GET BETA''' diff --git a/model_zoo/research/hpc/sponge/src/simulation.py b/model_zoo/research/hpc/sponge/src/simulation.py index e5474806c61..e02c844c476 100644 --- a/model_zoo/research/hpc/sponge/src/simulation.py +++ b/model_zoo/research/hpc/sponge/src/simulation.py @@ -13,23 +13,29 @@ # limitations under the License. # ============================================================================ '''Simulation''' -import numpy as np -import mindspore.common.dtype as mstype -from mindspore import Tensor -from mindspore import nn -from mindspore.common.parameter import Parameter -from mindspore.ops import functional as F -from mindspore.ops import operations as P +import numpy as np from src.angle import Angle +from src.bd_baro import BD_BARO from src.bond import Bond +from src.crd_molecular_map import CoordinateMolecularMap from src.dihedral import Dihedral from src.langevin_liujian_md import Langevin_Liujian from src.lennard_jones import Lennard_Jones_Information +from src.mc_baro import MC_BARO from src.md_information import md_information from src.nb14 import NON_BOND_14 from src.neighbor_list import neighbor_list from src.particle_mesh_ewald import Particle_Mesh_Ewald +from src.restrain import Restrain_Information +from src.simple_constrain import Simple_Constarin +from src.vatom import Virtual_Information + +import mindspore.common.dtype as mstype +from mindspore import Tensor, nn +from mindspore.common.parameter import Parameter +from mindspore.ops import functional as F +from mindspore.ops import operations as P class controller: @@ -47,6 +53,7 @@ class controller: self.Command_Set = {} self.md_task = None self.commands_from_in_file() + self.punctuation = "," def commands_from_in_file(self): '''command from in file''' @@ -55,10 +62,12 @@ class controller: file.close() self.md_task = context[0].strip() for val in context: - if "=" in val: + val = val.strip() + if val and val[0] != '#' and ("=" in val): + val = val[:val.index(",")] if ',' in val else val assert len(val.strip().split("=")) == 2 flag, value = val.strip().split("=") - value = value.replace(",", '') + value = value.replace(" ", "") flag = flag.replace(" ", "") if flag not in self.Command_Set: self.Command_Set[flag] = value @@ -73,14 +82,99 @@ class Simulation(nn.Cell): super(Simulation, self).__init__() self.control = controller(args_opt) self.md_info = md_information(self.control) - self.bond = Bond(self.control, self.md_info) + self.mode = self.md_info.mode + self.bond = Bond(self.control) + self.bond_is_initialized = self.bond.is_initialized self.angle = Angle(self.control) + self.angle_is_initialized = self.angle.is_initialized self.dihedral = Dihedral(self.control) + self.dihedral_is_initialized = self.dihedral.is_initialized self.nb14 = NON_BOND_14(self.control, self.dihedral, self.md_info.atom_numbers) + self.nb14_is_initialized = self.nb14.is_initialized self.nb_info = neighbor_list(self.control, self.md_info.atom_numbers, self.md_info.box_length) - self.LJ_info = Lennard_Jones_Information(self.control) + self.LJ_info = Lennard_Jones_Information(self.control, self.md_info.nb.cutoff, self.md_info.sys.box_length) + self.LJ_info_is_initialized = self.LJ_info.is_initialized + self.liujian_info = Langevin_Liujian(self.control, self.md_info.atom_numbers) + self.liujian_info_is_initialized = self.liujian_info.is_initialized self.pme_method = Particle_Mesh_Ewald(self.control, self.md_info) + self.pme_is_initialized = self.pme_method.is_initialized + self.restrain = Restrain_Information(self.control, self.md_info.atom_numbers, self.md_info.crd) + self.restrain_is_initialized = self.restrain.is_initialized + self.simple_constrain_is_initialized = 0 + + self.simple_constrain = Simple_Constarin(self.control, self.md_info, self.bond, self.angle, self.liujian_info) + self.simple_constrain_is_initialized = self.simple_constrain.is_initialized + self.freedom = self.simple_constrain.system_freedom + + self.vatom = Virtual_Information(self.control, self.md_info, self.md_info.sys.freedom) + self.vatom_is_initialized = 1 + + self.random = P.UniformReal(seed=1) + self.pow = P.Pow() + + self.mol_map = CoordinateMolecularMap(self.md_info.atom_numbers, self.md_info.sys.box_length, self.md_info.crd, + self.md_info.nb.excluded_atom_numbers, self.md_info.nb.h_excluded_numbers, + self.md_info.nb.h_excluded_list_start, self.md_info.nb.h_excluded_list) + self.mol_map_is_initialized = 1 + self.init_params() + self.init_Tensor() + self.op_define() + self.op_define_2() + self.depend = P.Depend() + self.print = P.Print() + self.total_count = Parameter(Tensor(0, mstype.int32), requires_grad=False) + self.accept_count = Parameter(Tensor(0, mstype.int32), requires_grad=False) + self.is_molecule_map_output = self.md_info.output.is_molecule_map_output + self.target_pressure = self.md_info.sys.target_pressure + self.Nx = self.nb_info.Nx + self.Ny = self.nb_info.Ny + self.Nz = self.nb_info.Nz + self.PME_inverse_box_vector = Parameter(Tensor(self.pme_method.PME_inverse_box_vector, mstype.float32), + requires_grad=False) + self.mc_baro_is_initialized = 0 + self.bd_baro_is_initialized = 0 + + if self.mode == 2 and self.control.Command_Set["barostat"] == "monte_carlo": + self.mc_baro = MC_BARO(self.control, self.md_info.atom_numbers, self.md_info.sys.target_pressure, + self.md_info.sys.box_length, self.md_info.res.is_initialized, self.md_info.mode) + self.mc_baro_is_initialized = self.mc_baro.is_initialized + self.update_interval = self.mc_baro.update_interval + self.mc_baro_energy_old = Parameter(Tensor(0, mstype.float32), requires_grad=False) + self.potential = Parameter(Tensor(0, mstype.float32), requires_grad=False) + self.frc_backup = Parameter(Tensor(np.zeros([self.atom_numbers, 3]), mstype.float32), requires_grad=False) + self.crd_backup = Parameter(Tensor(np.zeros([self.atom_numbers, 3]), mstype.float32), requires_grad=False) + self.crd_scale_factor = Parameter(Tensor(0.0, mstype.float32), requires_grad=False) + self.system_reinitializing_count = Parameter(Tensor(0, mstype.int32), requires_grad=False) + self.mc_baro_energy_new = Parameter(Tensor(0.0, mstype.float32), requires_grad=False) + self.scale_coordinate_by_residue = Parameter(Tensor(0, mstype.float32), requires_grad=False) + self.extra_term = Parameter(Tensor(0, mstype.float32), requires_grad=False) + self.DeltaV = Parameter(Tensor(0.0, mstype.float32), requires_grad=False) + self.target_temperature = self.md_info.sys.target_temperature + self.VDevided = Parameter(Tensor(0.0, mstype.float32), requires_grad=False) + self.log = P.Log() + self.mc_baro_accept_possibility = Parameter(Tensor(0, mstype.float32), requires_grad=False) + self.exp = P.Exp() + self.mc_baro_newV = self.mc_baro.newV + self.mc_baro_V0 = Parameter(Tensor(self.mc_baro.V0, mstype.float32), requires_grad=False) + self.mc_baro_newV = self.mc_baro.newV + self.check_interval = self.mc_baro.check_interval + + if self.mode == 2 and self.control.Command_Set["barostat"] == "berendsen": + self.bd_baro = BD_BARO(self.control, self.md_info.sys.target_pressure, self.md_info.sys.box_length, + self.md_info.mode) + self.bd_baro_is_initialized = self.bd_baro.is_initialized + self.update_interval = self.bd_baro.update_interval + self.pressure = Parameter(Tensor(self.md_info.sys.d_pressure, mstype.float32), requires_grad=False) + self.compressibility = self.bd_baro.compressibility + self.bd_baro_dt = self.bd_baro.dt + self.bd_baro_taup = self.bd_baro.taup + self.system_reinitializing_count = Parameter(Tensor(0, mstype.int32), requires_grad=False) + self.bd_baro_newV = Parameter(Tensor(self.bd_baro.newV, mstype.float32), requires_grad=False) + self.bd_baro_V0 = Parameter(Tensor(self.bd_baro.V0, mstype.float32), requires_grad=False) + + def init_params(self): + """init_params""" self.bond_energy_sum = Tensor(0, mstype.int32) self.angle_energy_sum = Tensor(0, mstype.int32) self.dihedral_energy_sum = Tensor(0, mstype.int32) @@ -101,7 +195,8 @@ class Simulation(nn.Cell): self.grid_numbers = self.nb_info.grid_numbers self.max_atom_in_grid_numbers = self.nb_info.max_atom_in_grid_numbers self.max_neighbor_numbers = self.nb_info.max_neighbor_numbers - self.excluded_atom_numbers = self.nb_info.excluded_atom_numbers + # self.excluded_atom_numbers = self.nb_info.excluded_atom_numbers + self.excluded_atom_numbers = self.md_info.nb.excluded_atom_numbers self.refresh_count = Parameter(Tensor(self.nb_info.refresh_count, mstype.int32), requires_grad=False) self.refresh_interval = self.nb_info.refresh_interval self.skin = self.nb_info.skin @@ -115,24 +210,39 @@ class Simulation(nn.Cell): self.fftx = self.pme_method.fftx self.ffty = self.pme_method.ffty self.fftz = self.pme_method.fftz - self.random_seed = self.liujian_info.rand_seed + self.random_seed = self.liujian_info.random_seed self.dt = self.liujian_info.dt self.half_dt = self.liujian_info.half_dt self.exp_gamma = self.liujian_info.exp_gamma - self.init_Tensor() - self.op_define() self.update = False self.file = None self.datfile = None + self.max_velocity = self.liujian_info.max_velocity + + # bingshui + self.CONSTANT_kB = 0.00198716 def init_Tensor(self): '''init tensor''' + # MD_Reset_Atom_Energy_And_Virial + self.uint_crd = Parameter(Tensor(np.zeros([self.atom_numbers, 3], dtype=np.uint32), mstype.uint32), + requires_grad=False) + self.need_potential = Tensor(0, mstype.int32) + self.need_pressure = Tensor(0, mstype.int32) + # self.potential = Tensor(0, mstype.float32) + self.atom_energy = Parameter(Tensor([0] * self.atom_numbers, mstype.float32), requires_grad=False) + self.atom_virial = Parameter(Tensor([0] * self.atom_numbers, mstype.float32), requires_grad=False) + self.frc = Parameter(Tensor(np.zeros([self.atom_numbers, 3]), mstype.float32), requires_grad=False) + self.crd = Parameter( - Tensor(np.float32(np.asarray(self.md_info.coordinate).reshape([self.atom_numbers, 3])), mstype.float32), + Tensor(np.array(self.md_info.coordinate).reshape([self.atom_numbers, 3]), mstype.float32), requires_grad=False) - self.crd_to_uint_crd_cof = Tensor(np.asarray(self.md_info.crd_to_uint_crd_cof, np.float32), mstype.float32) - self.uint_dr_to_dr_cof = Parameter( - Tensor(np.asarray(self.md_info.uint_dr_to_dr_cof, np.float32), mstype.float32), requires_grad=False) + self.crd_to_uint_crd_cof = Tensor(np.asarray(self.md_info.pbc.crd_to_uint_crd_cof, np.float32), mstype.float32) + self.quarter_crd_to_uint_crd_cof = Tensor(np.asarray(self.md_info.pbc.quarter_crd_to_uint_crd_cof, np.float32), + mstype.float32) + + self.uint_dr_to_dr_cof = Parameter(Tensor(self.md_info.pbc.uint_dr_to_dr_cof, mstype.float32), + requires_grad=False) self.box_length = Tensor(self.md_info.box_length, mstype.float32) self.charge = Parameter(Tensor(np.asarray(self.md_info.h_charge, dtype=np.float32), mstype.float32), requires_grad=False) @@ -140,12 +250,13 @@ class Simulation(nn.Cell): requires_grad=False) self.last_crd = Parameter(Tensor(np.zeros([self.atom_numbers, 3], dtype=np.float32), mstype.float32), requires_grad=False) - self.uint_crd = Parameter(Tensor(np.zeros([self.atom_numbers, 3], dtype=np.uint32), mstype.uint32), - requires_grad=False) + self.mass = Tensor(self.md_info.h_mass, mstype.float32) self.mass_inverse = Tensor(self.md_info.h_mass_inverse, mstype.float32) + self.res_mass = Tensor(self.md_info.res.h_mass, mstype.float32) + self.res_mass_inverse = Tensor(self.md_info.res.h_mass_inverse, mstype.float32) + self.res_start = Tensor(self.md_info.h_res_start, mstype.int32) self.res_end = Tensor(self.md_info.h_res_end, mstype.int32) - self.mass = Tensor(self.md_info.h_mass, mstype.float32) self.velocity = Parameter(Tensor(self.md_info.velocity, mstype.float32), requires_grad=False) self.acc = Parameter(Tensor(np.zeros([self.atom_numbers, 3], np.float32), mstype.float32), requires_grad=False) self.bond_atom_a = Tensor(np.asarray(self.bond.h_atom_a, np.int32), mstype.int32) @@ -161,17 +272,19 @@ class Simulation(nn.Cell): self.dihedral_atom_b = Tensor(np.asarray(self.dihedral.h_atom_b, np.int32), mstype.int32) self.dihedral_atom_c = Tensor(np.asarray(self.dihedral.h_atom_c, np.int32), mstype.int32) self.dihedral_atom_d = Tensor(np.asarray(self.dihedral.h_atom_d, np.int32), mstype.int32) - self.pk = Tensor(np.asarray(self.dihedral.pk, np.float32), mstype.float32) - self.gamc = Tensor(np.asarray(self.dihedral.gamc, np.float32), mstype.float32) - self.gams = Tensor(np.asarray(self.dihedral.gams, np.float32), mstype.float32) - self.pn = Tensor(np.asarray(self.dihedral.pn, np.float32), mstype.float32) - self.ipn = Tensor(np.asarray(self.dihedral.ipn, np.int32), mstype.int32) + self.pk = Tensor(np.asarray(self.dihedral.h_pk, np.float32), mstype.float32) + self.gamc = Tensor(np.asarray(self.dihedral.h_gamc, np.float32), mstype.float32) + self.gams = Tensor(np.asarray(self.dihedral.h_gams, np.float32), mstype.float32) + self.pn = Tensor(np.asarray(self.dihedral.h_pn, np.float32), mstype.float32) + self.ipn = Tensor(np.asarray(self.dihedral.h_ipn, np.int32), mstype.int32) self.nb14_atom_a = Tensor(np.asarray(self.nb14.h_atom_a, np.int32), mstype.int32) self.nb14_atom_b = Tensor(np.asarray(self.nb14.h_atom_b, np.int32), mstype.int32) self.lj_scale_factor = Tensor(np.asarray(self.nb14.h_lj_scale_factor, np.float32), mstype.float32) self.cf_scale_factor = Tensor(np.asarray(self.nb14.h_cf_scale_factor, np.float32), mstype.float32) self.grid_N = Tensor(self.nb_info.grid_N, mstype.int32) - self.grid_length_inverse = Tensor(self.nb_info.grid_length_inverse, mstype.float32) + self.grid_length = Parameter(Tensor(self.nb_info.grid_length, mstype.float32), requires_grad=False) + self.grid_length_inverse = Parameter(Tensor(self.nb_info.grid_length_inverse, mstype.float32), + requires_grad=False) self.bucket = Parameter(Tensor( np.asarray(self.nb_info.bucket, np.int32).reshape([self.grid_numbers, self.max_atom_in_grid_numbers]), mstype.int32), requires_grad=False) @@ -187,24 +300,29 @@ class Simulation(nn.Cell): self.nl_atom_serial = Parameter( Tensor(np.zeros([self.atom_numbers, self.max_neighbor_numbers], np.int32), mstype.int32), requires_grad=False) - self.excluded_list_start = Tensor(np.asarray(self.nb_info.excluded_list_start, np.int32), mstype.int32) - self.excluded_list = Tensor(np.asarray(self.nb_info.excluded_list, np.int32), mstype.int32) - self.excluded_numbers = Tensor(np.asarray(self.nb_info.excluded_numbers, np.int32), mstype.int32) + self.excluded_list_start = Tensor(np.asarray(self.md_info.nb.h_excluded_list_start, np.int32), mstype.int32) + self.excluded_list = Tensor(np.asarray(self.md_info.nb.h_excluded_list, np.int32), mstype.int32) + self.excluded_numbers = Tensor(np.asarray(self.md_info.nb.h_excluded_numbers, np.int32), mstype.int32) + self.need_refresh_flag = Tensor(np.asarray([0], np.int32), mstype.int32) - self.atom_LJ_type = Tensor(np.asarray(self.LJ_info.atom_LJ_type, dtype=np.int32), mstype.int32) - self.LJ_A = Tensor(np.asarray(self.LJ_info.LJ_A, dtype=np.float32), mstype.float32) - self.LJ_B = Tensor(np.asarray(self.LJ_info.LJ_B, dtype=np.float32), mstype.float32) + self.atom_LJ_type = Tensor(self.LJ_info.atom_LJ_type, mstype.int32) + self.LJ_A = Tensor(self.LJ_info.h_LJ_A, mstype.float32) + self.LJ_B = Tensor(self.LJ_info.h_LJ_B, mstype.float32) self.sqrt_mass = Tensor(self.liujian_info.h_sqrt_mass, mstype.float32) self.rand_state = Parameter(Tensor(self.liujian_info.rand_state, mstype.float32)) self.zero_fp_tensor = Tensor(np.asarray([0,], np.float32)) + self.zero_frc = Parameter(Tensor(np.zeros([self.atom_numbers, 3], dtype=np.float32), mstype.float32), + requires_grad=False) def op_define(self): '''op define''' self.crd_to_uint_crd = P.CrdToUintCrd(self.atom_numbers) + self.crd_to_uint_crd_quarter = P.CrdToUintCrdQuarter(self.atom_numbers) self.mdtemp = P.MDTemperature(self.residue_numbers, self.atom_numbers) self.setup_random_state = P.MDIterationSetupRandState(self.atom_numbers, self.random_seed) - self.bond_force_with_atom_energy = P.BondForceWithAtomEnergy(bond_numbers=self.bond_numbers, - atom_numbers=self.atom_numbers) + + self.bond_force_with_atom_energy_virial = P.BondForceWithAtomEnergyAndVirial(bond_numbers=self.bond_numbers, + atom_numbers=self.atom_numbers) self.angle_force_with_atom_energy = P.AngleForceWithAtomEnergy(angle_numbers=self.angle_numbers) self.dihedral_force_with_atom_energy = P.DihedralForceWithAtomEnergy(dihedral_numbers=self.dihedral_numbers) self.nb14_force_with_atom_energy = P.Dihedral14LJCFForceWithAtomEnergy(nb14_numbers=self.nb14_numbers, @@ -215,7 +333,6 @@ class Simulation(nn.Cell): self.pme_reciprocal_force = P.PMEReciprocalForce(self.atom_numbers, self.beta, self.fftx, self.ffty, self.fftz, self.md_info.box_length[0], self.md_info.box_length[1], self.md_info.box_length[2]) - self.bond_energy = P.BondEnergy(self.bond_numbers, self.atom_numbers) self.angle_energy = P.AngleEnergy(self.angle_numbers) self.dihedral_energy = P.DihedralEnergy(self.dihedral_numbers) @@ -225,77 +342,204 @@ class Simulation(nn.Cell): self.pme_energy = P.PMEEnergy(self.atom_numbers, self.excluded_atom_numbers, self.beta, self.fftx, self.ffty, self.fftz, self.md_info.box_length[0], self.md_info.box_length[1], self.md_info.box_length[2]) - self.md_iteration_leap_frog_liujian = P.MDIterationLeapFrogLiujian(self.atom_numbers, self.half_dt, self.dt, self.exp_gamma) - self.neighbor_list_update_init = P.NeighborListUpdate(grid_numbers=self.grid_numbers, - atom_numbers=self.atom_numbers, not_first_time=0, - nxy=self.nxy, - excluded_atom_numbers=self.excluded_atom_numbers, - cutoff_square=self.cutoff_square, - half_skin_square=self.half_skin_square, - cutoff_with_skin=self.cutoff_with_skin, - half_cutoff_with_skin=self.half_cutoff_with_skin, - cutoff_with_skin_square=self.cutoff_with_skin_square, - refresh_interval=self.refresh_interval, - cutoff=self.cutoff, skin=self.skin, - max_atom_in_grid_numbers=self.max_atom_in_grid_numbers, - max_neighbor_numbers=self.max_neighbor_numbers) + self.md_iteration_leap_frog_liujian_with_max_vel = P.MDIterationLeapFrogLiujianWithMaxVel(self.atom_numbers, + self.half_dt, self.dt, + self.exp_gamma, + self.max_velocity) + self.neighbor_list_update = \ + P.NeighborListUpdate(grid_numbers=self.grid_numbers, + atom_numbers=self.atom_numbers, + not_first_time=1, nxy=self.nxy, + excluded_atom_numbers=self.excluded_atom_numbers, + cutoff_square=self.cutoff_square, + half_skin_square=self.half_skin_square, + cutoff_with_skin=self.cutoff_with_skin, + half_cutoff_with_skin=self.half_cutoff_with_skin, + cutoff_with_skin_square=self.cutoff_with_skin_square, + refresh_interval=self.refresh_interval, cutoff=self.cutoff, + skin=self.skin, + max_atom_in_grid_numbers=self.max_atom_in_grid_numbers, + max_neighbor_numbers=self.max_neighbor_numbers) + + self.neighbor_list_update_forced_update = \ + P.NeighborListUpdate(grid_numbers=self.grid_numbers, + atom_numbers=self.atom_numbers, + not_first_time=1, nxy=self.nxy, + excluded_atom_numbers=self.excluded_atom_numbers, + cutoff_square=self.cutoff_square, + half_skin_square=self.half_skin_square, + cutoff_with_skin=self.cutoff_with_skin, + half_cutoff_with_skin=self.half_cutoff_with_skin, + cutoff_with_skin_square=self.cutoff_with_skin_square, + refresh_interval=self.refresh_interval, + cutoff=self.cutoff, + skin=self.skin, + max_atom_in_grid_numbers=self.max_atom_in_grid_numbers, + max_neighbor_numbers=self.max_neighbor_numbers, + forced_update=1) + + self.neighbor_list_update_nb = \ + P.NeighborListUpdate(grid_numbers=self.grid_numbers, + atom_numbers=self.atom_numbers, + not_first_time=1, nxy=self.nxy, + excluded_atom_numbers=self.excluded_atom_numbers, + cutoff_square=self.cutoff_square, + half_skin_square=self.half_skin_square, + cutoff_with_skin=self.cutoff_with_skin, + half_cutoff_with_skin=self.half_cutoff_with_skin, + cutoff_with_skin_square=self.cutoff_with_skin_square, + refresh_interval=self.refresh_interval, + cutoff=self.cutoff, + skin=self.skin, + max_atom_in_grid_numbers=self.max_atom_in_grid_numbers, + max_neighbor_numbers=self.max_neighbor_numbers, + forced_update=1, forced_check=1) + + def op_define_2(self): + """op_define_2""" + self.neighbor_list_update_mc = P.NeighborListUpdate(grid_numbers=self.grid_numbers, + atom_numbers=self.atom_numbers, + not_first_time=1, nxy=self.nxy, + excluded_atom_numbers=self.excluded_atom_numbers, + cutoff_square=self.cutoff_square, + half_skin_square=self.half_skin_square, + cutoff_with_skin=self.cutoff_with_skin, + half_cutoff_with_skin=self.half_cutoff_with_skin, + cutoff_with_skin_square=self.cutoff_with_skin_square, + refresh_interval=self.refresh_interval, + cutoff=self.cutoff, + skin=self.skin, + max_atom_in_grid_numbers=self.max_atom_in_grid_numbers, + max_neighbor_numbers=self.max_neighbor_numbers, + forced_update=0, forced_check=1) - self.neighbor_list_update = P.NeighborListUpdate(grid_numbers=self.grid_numbers, atom_numbers=self.atom_numbers, - not_first_time=1, nxy=self.nxy, - excluded_atom_numbers=self.excluded_atom_numbers, - cutoff_square=self.cutoff_square, - half_skin_square=self.half_skin_square, - cutoff_with_skin=self.cutoff_with_skin, - half_cutoff_with_skin=self.half_cutoff_with_skin, - cutoff_with_skin_square=self.cutoff_with_skin_square, - refresh_interval=self.refresh_interval, cutoff=self.cutoff, - skin=self.skin, - max_atom_in_grid_numbers=self.max_atom_in_grid_numbers, - max_neighbor_numbers=self.max_neighbor_numbers) self.random_force = Tensor(np.zeros([self.atom_numbers, 3], np.float32), mstype.float32) + # simple_constrain + self.constrain_pair_numbers = self.simple_constrain.constrain_pair_numbers + self.last_pair_dr = Parameter(Tensor(np.zeros([self.constrain_pair_numbers, 3], np.float32), mstype.float32), + requires_grad=False) + if self.simple_constrain_is_initialized: + self.constrain_pair_numbers = self.simple_constrain.constrain_pair_numbers + self.last_crd_to_dr = P.lastcrdtodr(self.atom_numbers, self.constrain_pair_numbers) + self.constrain_pair = np.array(self.simple_constrain.h_constrain_pair) + self.atom_i_serials = Tensor(self.constrain_pair[:, 0], mstype.int32) + self.atom_j_serials = Tensor(self.constrain_pair[:, 1], mstype.int32) + self.constant_rs = Tensor(self.constrain_pair[:, 2], mstype.float32) + self.constrain_ks = Tensor(self.constrain_pair[:, 3], mstype.float32) + self.last_pair_dr = Parameter( + Tensor(np.zeros([self.constrain_pair_numbers, 3], np.float32), mstype.float32), requires_grad=False) + self.constrain_frc = Parameter(Tensor(np.zeros([self.atom_numbers, 3], np.float32), mstype.float32), + requires_grad=False) + self.iteration_numbers = self.simple_constrain.info.iteration_numbers + self.half_exp_gamma_plus_half = self.simple_constrain.half_exp_gamma_plus_half + self.refresh_uint_crd = P.refreshuintcrd(self.atom_numbers, self.half_exp_gamma_plus_half) + self.need_pressure = 0 + self.constrain_force_cycle_with_virial = P.constrainforcecyclewithvirial(self.atom_numbers, + self.constrain_pair_numbers) + self.constrain_force_cycle = P.ConstrainForceCycle(self.atom_numbers, self.constrain_pair_numbers) + self.dt_inverse = self.simple_constrain.dt_inverse + self.refresh_crd_vel = P.refreshcrdvel(self.atom_numbers, self.dt_inverse, self.dt, self.exp_gamma, + self.half_exp_gamma_plus_half) + + if self.mol_map_is_initialized: + self.refresh_boxmaptimes = P.refreshboxmaptimes(self.atom_numbers) + self.box_map_times = Parameter(Tensor(self.mol_map.h_box_map_times, mstype.int32), requires_grad=False) + self.residue_numbers = self.md_info.residue_numbers + self.getcenterofmass = P.GetCenterOfMass(self.residue_numbers) + self.mapcenterofmass = P.MapCenterOfMass(self.residue_numbers, scaler=1.0) + + self.md_iteration_leap_frog = P.MDIterationLeapFrog(self.atom_numbers, self.dt) + self.md_iteration_leap_frog_with_max_vel = P.MDIterationLeapFrogWithMaxVel(self.atom_numbers, self.dt, + self.max_velocity) + self.md_information_gradient_descent = P.MDIterationGradientDescent(self.atom_numbers, self.dt * self.dt) + def Simulation_Beforce_Caculate_Force(self): '''simulation before calculate force''' - crd_to_uint_crd_cof = 0.5 * self.crd_to_uint_crd_cof - uint_crd = self.crd_to_uint_crd(crd_to_uint_crd_cof, self.crd) - return uint_crd + self.uint_crd = self.crd_to_uint_crd_quarter(self.quarter_crd_to_uint_crd_cof, self.crd) + return self.uint_crd def Simulation_Caculate_Force(self, uint_crd, scaler, nl_atom_numbers, nl_atom_serial): '''simulation calculate force''' - bond_force, _ = self.bond_force_with_atom_energy(uint_crd, scaler, self.bond_atom_a, - self.bond_atom_b, self.bond_k, self.bond_r0) + uint_crd = self.Simulation_Beforce_Caculate_Force() + force = self.zero_frc + if self.LJ_info_is_initialized: + lj_force = self.lj_force_pme_direct_force(uint_crd, self.atom_LJ_type, self.charge, scaler, nl_atom_numbers, + nl_atom_serial, self.LJ_A, self.LJ_B) + force = force + lj_force - angle_force, _ = self.angle_force_with_atom_energy(uint_crd, scaler, self.angle_atom_a, - self.angle_atom_b, self.angle_atom_c, - self.angle_k, self.angle_theta0) + if self.pme_is_initialized: + pme_excluded_force = self.pme_excluded_force(uint_crd, scaler, self.charge, self.excluded_list_start, + self.excluded_list, self.excluded_numbers) - dihedral_force, _ = self.dihedral_force_with_atom_energy(uint_crd, scaler, - self.dihedral_atom_a, - self.dihedral_atom_b, - self.dihedral_atom_c, - self.dihedral_atom_d, self.ipn, - self.pk, self.gamc, self.gams, - self.pn) + pme_reciprocal_force = self.pme_reciprocal_force(uint_crd, self.charge) + force = force + pme_excluded_force + pme_reciprocal_force + if self.nb14_is_initialized: + nb14_force, _ = self.nb14_force_with_atom_energy(uint_crd, self.atom_LJ_type, self.charge, + scaler, self.nb14_atom_a, self.nb14_atom_b, + self.lj_scale_factor, self.cf_scale_factor, + self.LJ_A, self.LJ_B) + force = force + nb14_force - nb14_force, _ = self.nb14_force_with_atom_energy(uint_crd, self.atom_LJ_type, self.charge, - scaler, self.nb14_atom_a, self.nb14_atom_b, - self.lj_scale_factor, self.cf_scale_factor, - self.LJ_A, self.LJ_B) + if self.bond_is_initialized: + bond_force, _, _ = self.bond_force_with_atom_energy_virial(uint_crd, scaler, self.bond_atom_a, + self.bond_atom_b, self.bond_k, self.bond_r0) + force = force + bond_force + if self.angle_is_initialized: + angle_force, _ = self.angle_force_with_atom_energy(uint_crd, scaler, self.angle_atom_a, + self.angle_atom_b, self.angle_atom_c, + self.angle_k, self.angle_theta0) + force = force + angle_force + if self.dihedral_is_initialized: + dihedral_force, _ = self.dihedral_force_with_atom_energy(uint_crd, scaler, + self.dihedral_atom_a, + self.dihedral_atom_b, + self.dihedral_atom_c, + self.dihedral_atom_d, self.ipn, + self.pk, self.gamc, self.gams, + self.pn) + force = force + dihedral_force + + if self.restrain_is_initialized: + _, _, restrain_frc = self.restrain_force_with_atom_energy_and_virial(self.restrain_list, + self.crd, + self.crd_ref, + self.box_length) + force = force + restrain_frc - lj_force = self.lj_force_pme_direct_force(uint_crd, self.atom_LJ_type, self.charge, scaler, nl_atom_numbers, - nl_atom_serial, self.LJ_A, self.LJ_B) - pme_excluded_force = self.pme_excluded_force(uint_crd, scaler, self.charge, self.excluded_list_start, - self.excluded_list, self.excluded_numbers) - pme_reciprocal_force = self.pme_reciprocal_force(uint_crd, self.charge) - force = P.AddN()( - [bond_force, angle_force, dihedral_force, nb14_force, lj_force, pme_excluded_force, pme_reciprocal_force]) return force def Simulation_Caculate_Energy(self, uint_crd, uint_dr_to_dr_cof): '''simulation calculate energy''' + + lj_energy = self.lj_energy(uint_crd, self.atom_LJ_type, self.charge, uint_dr_to_dr_cof, self.nl_atom_numbers, + self.nl_atom_serial, self.LJ_A, self.LJ_B) + + lj_energy_sum = P.ReduceSum(True)(lj_energy) + # lj_energy_sum = self.zero_fp_tensor + + reciprocal_energy, self_energy, direct_energy, correction_energy = self.pme_energy(uint_crd, self.charge, + self.nl_atom_numbers, + self.nl_atom_serial, + uint_dr_to_dr_cof, + self.excluded_list_start, + self.excluded_list, + self.excluded_numbers) + ee_ene = reciprocal_energy + self_energy + direct_energy + correction_energy + # ee_ene = self.zero_fp_tensor + + nb14_lj_energy = self.nb14_lj_energy(uint_crd, self.atom_LJ_type, self.charge, uint_dr_to_dr_cof, + self.nb14_atom_a, self.nb14_atom_b, self.lj_scale_factor, self.LJ_A, + self.LJ_B) + nb14_cf_energy = self.nb14_cf_energy(uint_crd, self.atom_LJ_type, self.charge, uint_dr_to_dr_cof, + self.nb14_atom_a, self.nb14_atom_b, self.cf_scale_factor) + nb14_lj_energy_sum = P.ReduceSum(True)(nb14_lj_energy) + nb14_cf_energy_sum = P.ReduceSum(True)(nb14_cf_energy) + # nb14_lj_energy_sum = self.zero_fp_tensor + # nb14_cf_energy_sum = self.zero_fp_tensor bond_energy = self.bond_energy(uint_crd, uint_dr_to_dr_cof, self.bond_atom_a, self.bond_atom_b, self.bond_k, self.bond_r0) bond_energy_sum = P.ReduceSum(True)(bond_energy) @@ -309,26 +553,6 @@ class Simulation(nn.Cell): self.gams, self.pn) dihedral_energy_sum = P.ReduceSum(True)(dihedral_energy) - nb14_lj_energy = self.nb14_lj_energy(uint_crd, self.atom_LJ_type, self.charge, uint_dr_to_dr_cof, - self.nb14_atom_a, self.nb14_atom_b, self.lj_scale_factor, self.LJ_A, - self.LJ_B) - nb14_cf_energy = self.nb14_cf_energy(uint_crd, self.atom_LJ_type, self.charge, uint_dr_to_dr_cof, - self.nb14_atom_a, self.nb14_atom_b, self.cf_scale_factor) - nb14_lj_energy_sum = P.ReduceSum(True)(nb14_lj_energy) - nb14_cf_energy_sum = P.ReduceSum(True)(nb14_cf_energy) - - lj_energy = self.lj_energy(uint_crd, self.atom_LJ_type, self.charge, uint_dr_to_dr_cof, self.nl_atom_numbers, - self.nl_atom_serial, self.LJ_A, self.LJ_B) - lj_energy_sum = P.ReduceSum(True)(lj_energy) - - reciprocal_energy, self_energy, direct_energy, correction_energy = self.pme_energy(uint_crd, self.charge, - self.nl_atom_numbers, - self.nl_atom_serial, - uint_dr_to_dr_cof, - self.excluded_list_start, - self.excluded_list, - self.excluded_numbers) - ee_ene = reciprocal_energy + self_energy + direct_energy + correction_energy total_energy = P.AddN()( [bond_energy_sum, angle_energy_sum, dihedral_energy_sum, nb14_lj_energy_sum, nb14_cf_energy_sum, lj_energy_sum, ee_ene]) @@ -336,19 +560,43 @@ class Simulation(nn.Cell): lj_energy_sum, ee_ene, total_energy def Simulation_Temperature(self): - '''caculate temperature''' + """calculate temperature""" res_ek_energy = self.mdtemp(self.res_start, self.res_end, self.velocity, self.mass) temperature = P.ReduceSum()(res_ek_energy) return temperature def Simulation_MDIterationLeapFrog_Liujian(self, inverse_mass, sqrt_mass_inverse, crd, frc, rand_state, random_frc): '''simulation leap frog iteration liujian''' - crd = self.md_iteration_leap_frog_liujian(inverse_mass, sqrt_mass_inverse, self.velocity, crd, frc, self.acc, - rand_state, random_frc) + if self.max_velocity <= 0: + crd = self.md_iteration_leap_frog_liujian(inverse_mass, sqrt_mass_inverse, self.velocity, crd, frc, + self.acc, + rand_state, random_frc) + else: + crd = self.md_iteration_leap_frog_liujian_with_max_vel(inverse_mass, sqrt_mass_inverse, self.velocity, crd, + frc, self.acc, + rand_state, random_frc) vel = F.depend(self.velocity, crd) acc = F.depend(self.acc, crd) return vel, crd, acc + def Simulation_MDIterationLeapFrog(self, force): + '''simulation leap frog''' + if self.max_velocity <= 0: + res = self.md_iteration_leap_frog(self.velocity, self.crd, force, self.acc, self.mass_inverse) + else: + res = self.md_iteration_leap_frog_with_max_vel(self.velocity, self.crd, force, self.acc, self.mass_inverse) + vel = F.depend(self.velocity, res) + crd = F.depend(self.crd, res) + return vel, crd, res + + def Simulation_MDInformationGradientDescent(self, force): + # print("Simulation_MDInformationGradientDescent") + res = self.md_information_gradient_descent(self.crd, force) + self.velocity = self.zero_frc + vel = F.depend(self.velocity, res) + crd = F.depend(self.crd, res) + return vel, crd, res + def Main_Print(self, *args): """compute the temperature""" steps, temperature, total_potential_energy, sigma_of_bond_ene, sigma_of_angle_ene, sigma_of_dihedral_ene, \ @@ -359,7 +607,7 @@ class Simulation(nn.Cell): temperature = temperature.asnumpy() total_potential_energy = total_potential_energy.asnumpy() - print("{:>7.0f} {:>7.3f} {:>11.3f}".format(steps, float(temperature), float(total_potential_energy)), + print("{:>7.0f} {:>7.3f} {:>11.3f}".format(steps + 1, float(temperature), float(total_potential_energy)), end=" ") if self.bond.bond_numbers > 0: sigma_of_bond_ene = sigma_of_bond_ene.asnumpy() @@ -405,34 +653,304 @@ class Simulation(nn.Cell): self.datfile.close() print("Save .dat file successfully!") + # æŽ§åŽ‹éƒ¨åˆ†ä»£ç  + def Volume_Change_Attempt(self, boxlength, DeltaV_max): + """Volume_Change_Attempt""" + nrand = self.random((1, 1)) + DeltaV = nrand * DeltaV_max + V = boxlength[0] * boxlength[1] * boxlength[2] + # crd_scale_factor = Tensor(np.crbt((V + DeltaV) / V), mstype.float32) + crd_scale_factor = self.pow((V + DeltaV) / V, -3) + return crd_scale_factor + + def Update_Volume(self, factor): + """Update_Volume""" + self.CONSTANT_UINT_MAX_FLOAT = 4294967296.0 + # f_inv = 1.0 / factor + self.box_length = factor * self.box_length + self.crd_to_uint_crd_cof = self.CONSTANT_UINT_MAX_FLOAT / self.box_length + self.quarter_crd_to_uint_crd_cof = 0.25 * self.crd_to_uint_crd_cof + self.uint_dr_to_dr_cof = 1.0 / self.crd_to_uint_crd_cof + self.uint_crd = self.crd_to_uint_crd_quarter(self.quarter_crd_to_uint_crd_cof, self.crd) + + def Neighbor_List_Update_Volume(self, box_length): + """Neighbor_List_Update_Volume""" + self.quarter_crd_to_uint_crd_cof = 0.25 * self.CONSTANT_UINT_MAX_FLOAT / box_length + self.uint_dr_to_dr_cof = 1.0 / self.CONSTANT_UINT_MAX_FLOAT * box_length + self.grid_length[0] = box_length[0] / self.Nx + self.grid_length[1] = box_length[1] / self.Ny + self.grid_length[2] = box_length[1] / self.Nz + self.grid_length_inverse = 1.0 / self.grid_length + + def LJ_Update_Volume(self): + """main destroy""" + if self.LJ_info_is_initialized: + # self.uint_dr_to_dr_cof = 1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length + self.volume = self.box_length[0] * self.box_length[1] * self.box_length[2] + + def PME_Update_Volume(self, factor): + """PME_Update_Volume""" + factor_inverse = 1.0 / factor + self.PME_inverse_box_vector[0] = self.fftx / self.box_length[0] + self.PME_inverse_box_vector[1] = self.ffty / self.box_length[1] + self.PME_inverse_box_vector[2] = self.fftz / self.box_length[2] + self.PME_inverse_box_vector = factor_inverse * self.PME_inverse_box_vector + self.beta = self.beta * factor + # self.PME_BC = self.PME_BC * factor_inverse #scale list + self.neutralizing_factor = self.pow(factor, 5.0) + + def Simple_Constrain_Update_Volume(self): + """Simple_Constrain_Update_Volume""" + if self.simple_constrain_is_initialized: + self.quarter_crd_to_uint_crd_cof = 0.25 * self.CONSTANT_UINT_MAX_FLOAT / self.box_length + self.uint_dr_to_dr_cof = 1.0 / self.CONSTANT_UINT_MAX_FLOAT * self.box_length + self.volume = self.box_length[0] * self.box_length[1] * self.box_length[2] + + def Main_Volume_Change(self, factor): + """Main_Volume_Change""" + self.Update_Volume(factor) + self.Neighbor_List_Update_Volume(self.box_length) + _ = self.neighbor_list_update_nb(self.atom_numbers_in_grid_bucket, self.bucket, + self.crd, self.box_length, self.grid_N, + self.grid_length_inverse, self.atom_in_grid_serial, + self.old_crd, self.crd_to_uint_crd_cof, self.uint_crd, + self.pointer, self.nl_atom_numbers, self.nl_atom_serial, + self.uint_dr_to_dr_cof, self.excluded_list_start, self.excluded_list, + self.excluded_numbers, self.need_refresh_flag, self.refresh_count) # Done + self.LJ_Update_Volume() + self.PME_Update_Volume(factor) + self.Simple_Constrain_Update_Volume() + # self.mol_map.Update_Volume(self.md_info.sys.box_length) + + def Main_Volume_Change_Largely(self): + """Main_Volume_Change_Largely""" + # re-initialize neighbor_list and pme + _ = self.neighbor_list_update_forced_update(self.atom_numbers_in_grid_bucket, self.bucket, + self.crd, self.box_length, self.grid_N, + self.grid_length_inverse, self.atom_in_grid_serial, + self.old_crd, self.crd_to_uint_crd_cof, self.uint_crd, + self.pointer, self.nl_atom_numbers, self.nl_atom_serial, + self.uint_dr_to_dr_cof, self.excluded_list_start, + self.excluded_list, + self.excluded_numbers, self.need_refresh_flag, + self.refresh_count) + + def Check_MC_Barostat_Accept(self): + """Check_MC_Barostat_Accept""" + self.total_count = self.total_count + 1 + rand_num = self.random((1, 1)) + if rand_num[0] < self.mc_baro_accept_possibility: + self.reject = 0 + self.accept_count += 1 + else: + self.reject = 1 + return self.reject + + def Delta_V_Max_Update(self): + """Delta_V_Max_Update""" + if self.total_count % self.check_interval == 0: + self.accept_rate = 100.0 * self.accept_count / self.total_count + if self.accept_rate < self.accept_rate_low: + self.total_count = 0 + self.accept_count = 0 + self.DeltaV_max = self.DeltaV_max * 0.9 + if self.accept_rate > self.accept_rate_high: + self.total_count = 0 + self.accept_count = 0 + self.DeltaV_max = self.DeltaV_max * 1.1 + + def Main_iteration_presssure(self, steps, force): + """Main_iteration_presssure""" + if self.mc_baro_is_initialized and steps % self.mc_baro.update_interval == 0: + # old energy + self.mc_baro_energy_old = self.potential + self.frc_backup = self.frc + self.crd_backup = self.crd + self.Volume_Change_Attempt(self.box_length, 200) + + # change coordinates + if self.is_molecule_map_output: + nowrap_crd = self.Calculate_No_Wrap_Crd() + self.crd, _ = self.Residue_Crd_Map(nowrap_crd) + _ = self.refresh_boxmaptimes(self.crd, self.old_crd, 1.0 / self.box_length, self.box_map_times) + else: + self.crd = self.crd * self.crd_scale_factor # scale list + + # change volume + self.Main_Volume_Change(self.crd_scale_factor) + self.system_reinitializing_count += 1 + + # new energy + _ = self.Simulation_Caculate_Force(self.uint_crd, self.uint_dr_to_dr_cof, self.nl_atom_numbers, + self.nl_atom_serial) + + self.energy_new = self.potential + + # calculate accepted rate + if self.scale_coordinate_by_residue: + self.extra_term = self.target_pressure * self.DeltaV - \ + self.residue_numbers * self.CONSTANT_kB * \ + self.target_temperature * self.log(self.VDevided) + else: + self.extra_term = self.target_pressure * self.DeltaV - \ + self.atom_numbers * self.CONSTANT_kB * \ + self.target_temperature * self.log(self.VDevided) + + self.mc_baro_accept_possibility = self.mc_baro_energy_new - self.mc_baro_energy_old + self.extra_term + self.mc_baro.mc_baro_accept_possibility = self.exp( + -self.mc_baro_accept_possibility / (self.CONSTANT_kB * self.target_temperature)) + + # check if accepted + if self.Check_MC_Barostat_Accept(): + # if accept, refresh + self.crd_scale_factor = 1.0 / self.crd_scale_factor + self.crd = self.crd_backup + self.Main_Volume_Change(self.crd_scale_factor) + self.system_reinitializing_count += 1 + _ = self.neighbor_list_update_mc(self.atom_numbers_in_grid_bucket, self.bucket, + self.crd, self.box_length, self.grid_N, + self.grid_length_inverse, self.atom_in_grid_serial, + self.old_crd, self.crd_to_uint_crd_cof, self.uint_crd, + self.pointer, self.nl_atom_numbers, self.nl_atom_serial, + self.uint_dr_to_dr_cof, self.excluded_list_start, self.excluded_list, + self.excluded_numbers, self.need_refresh_flag, + self.refresh_count) + self.frc = force + self.frc = self.frc_backup + + # reinitialized + if self.system_reinitializing_count >= 20000 or (not self.reject and ( + self.mc_baro_newV > 1.331 * self.mc_baro_V0 or self.mc_baro_newV < 0.729 * self.mc_baro.V0)): + self.Main_Volume_Change_Largely() + self.mc_baro_V0 = self.mc_baro_newV + self.system_reinitializing_count = self.zero_fp_tensor + self.Delta_V_Max_Update() + + def Constrain(self): + """Constrain""" + constrain_frc = self.zero_frc + for _ in range(self.iteration_numbers): + test_uint_crd = self.refresh_uint_crd(self.crd, self.quarter_crd_to_uint_crd_cof, constrain_frc, + self.mass_inverse) + if self.need_pressure: + force, _ = self.constrain_force_cycle_with_virial(test_uint_crd, self.uint_dr_to_dr_cof, + self.last_pair_dr, self.atom_i_serials, + self.atom_j_serials, self.constant_rs, + self.constrain_ks) + else: + force = self.constrain_force_cycle(test_uint_crd, self.uint_dr_to_dr_cof, self.last_pair_dr, + self.atom_i_serials, + self.atom_j_serials, self.constant_rs, self.constrain_ks) + constrain_frc = constrain_frc + force + + res = self.refresh_crd_vel(self.crd, self.velocity, constrain_frc, self.mass_inverse) + crd = self.depend(self.crd, res) + vel = self.depend(self.velocity, res) + + return crd, vel, res + + def Main_Iteration(self, steps, force): + '''Main_Iteration''' + # self.Main_iteration_presssure(steps, force) + # Remember_Last_Coordinates + # pressure control 1 + if self.simple_constrain_is_initialized: + self.last_pair_dr = self.last_crd_to_dr(self.crd, self.quarter_crd_to_uint_crd_cof, self.uint_dr_to_dr_cof, + self.atom_i_serials, + self.atom_j_serials, self.constant_rs, self.constrain_ks) + + if self.mode == 0: # NVE + self.velocity, self.crd, _ = self.Simulation_MDIterationLeapFrog(force) + elif self.mode == -1: # Minimization + _ = self.Simulation_MDInformationGradientDescent(force) + else: + if self.liujian_info_is_initialized: + self.velocity, self.crd, _ = self.Simulation_MDIterationLeapFrog_Liujian(self.mass_inverse, + self.sqrt_mass, self.crd, + force, + self.rand_state, + self.random_force) + + if self.simple_constrain_is_initialized: + self.crd, self.velocity, res1 = self.Constrain() + else: + res1 = self.zero_fp_tensor + + # MD_Information_Crd_To_Uint_Crd + self.uint_crd = self.crd_to_uint_crd_quarter(self.quarter_crd_to_uint_crd_cof, self.crd) + res2 = self.neighbor_list_update(self.atom_numbers_in_grid_bucket, + self.bucket, + self.crd, + self.box_length, + self.grid_N, + self.grid_length_inverse, + self.atom_in_grid_serial, + self.old_crd, + self.crd_to_uint_crd_cof, + self.uint_crd, + self.pointer, + self.nl_atom_numbers, + self.nl_atom_serial, + self.uint_dr_to_dr_cof, + self.excluded_list_start, + self.excluded_list, + self.excluded_numbers, + self.need_refresh_flag, + self.refresh_count) + + res3 = self.refresh_boxmaptimes(self.crd, self.old_crd, 1.0 / self.box_length, self.box_map_times) + + return self.velocity, self.crd, res1, res2, res3 + + def Calculate_No_Wrap_Crd(self): + """Calculate_No_Wrap_Crd""" + nowrap_crd = self.box_map_times * self.box_length + self.crd + return nowrap_crd + + def Residue_Crd_Map(self, nowrap_crd): + """Residue_Crd_Map""" + center_of_mass = self.getcenterofmass(self.res_start, self.res_end, nowrap_crd, self.mass, + self.res_mass_inverse) + + res = self.mapcenterofmass(self.res_start, self.res_end, center_of_mass, self.box_length, nowrap_crd, self.crd) + + return self.crd, res + def construct(self, step, print_step): '''construct''' - self.last_crd = self.crd - res = self.neighbor_list_update(self.atom_numbers_in_grid_bucket, - self.bucket, - self.crd, - self.box_length, - self.grid_N, - self.grid_length_inverse, - self.atom_in_grid_serial, - self.old_crd, - self.crd_to_uint_crd_cof, - self.uint_crd, - self.pointer, - self.nl_atom_numbers, - self.nl_atom_serial, - self.uint_dr_to_dr_cof, - self.excluded_list_start, - self.excluded_list, - self.excluded_numbers, - self.need_refresh_flag, - self.refresh_count) - uint_crd = self.Simulation_Beforce_Caculate_Force() - force = self.Simulation_Caculate_Force(uint_crd, self.uint_dr_to_dr_cof, self.nl_atom_numbers, + # self.last_crd = self.crd + if step == 0: + res = self.neighbor_list_update_forced_update(self.atom_numbers_in_grid_bucket, + self.bucket, + self.crd, + self.box_length, + self.grid_N, + self.grid_length_inverse, + self.atom_in_grid_serial, + self.old_crd, + self.crd_to_uint_crd_cof, + self.uint_crd, + self.pointer, + self.nl_atom_numbers, + self.nl_atom_serial, + self.uint_dr_to_dr_cof, + self.excluded_list_start, + self.excluded_list, + self.excluded_numbers, + self.need_refresh_flag, + self.refresh_count) + else: + res = self.zero_fp_tensor + force = self.Simulation_Caculate_Force(self.uint_crd, self.uint_dr_to_dr_cof, self.nl_atom_numbers, self.nl_atom_serial) + if step == 0: + self.rand_state = self.setup_random_state() + + self.velocity, self.crd, res1, res2, res3 = self.Main_Iteration(step + 1, force) + temperature = self.Simulation_Temperature() if print_step == 0: bond_energy_sum, angle_energy_sum, dihedral_energy_sum, nb14_lj_energy_sum, nb14_cf_energy_sum, \ - lj_energy_sum, ee_ene, total_energy = self.Simulation_Caculate_Energy(uint_crd, self.uint_dr_to_dr_cof) + lj_energy_sum, ee_ene, total_energy = self.Simulation_Caculate_Energy(self.uint_crd, self.uint_dr_to_dr_cof) else: bond_energy_sum = self.zero_fp_tensor angle_energy_sum = self.zero_fp_tensor @@ -442,12 +960,5 @@ class Simulation(nn.Cell): lj_energy_sum = self.zero_fp_tensor ee_ene = self.zero_fp_tensor total_energy = self.zero_fp_tensor - temperature = self.Simulation_Temperature() - if step == 0: - self.rand_state = self.setup_random_state() - self.velocity, self.crd, _ = self.Simulation_MDIterationLeapFrog_Liujian(self.mass_inverse, - self.sqrt_mass, self.crd, force, - self.rand_state, - self.random_force) return temperature, total_energy, bond_energy_sum, angle_energy_sum, dihedral_energy_sum, nb14_lj_energy_sum, \ - nb14_cf_energy_sum, lj_energy_sum, ee_ene, res + nb14_cf_energy_sum, lj_energy_sum, ee_ene, res, res1, res2, res3 diff --git a/model_zoo/research/nlp/gpt2/src/gpt2_for_finetune.py b/model_zoo/research/nlp/gpt2/src/gpt2_for_finetune.py index 63ac1af76df..60073bb1320 100644 --- a/model_zoo/research/nlp/gpt2/src/gpt2_for_finetune.py +++ b/model_zoo/research/nlp/gpt2/src/gpt2_for_finetune.py @@ -160,12 +160,9 @@ class GPT2FinetuneCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond) class GPT2LM(nn.Cell): diff --git a/model_zoo/research/nlp/seq2seq/src/seq2seq_model/seq2seq_for_train.py b/model_zoo/research/nlp/seq2seq/src/seq2seq_model/seq2seq_for_train.py index 8d153ea3c67..c1edff1ada2 100644 --- a/model_zoo/research/nlp/seq2seq/src/seq2seq_model/seq2seq_for_train.py +++ b/model_zoo/research/nlp/seq2seq/src/seq2seq_model/seq2seq_for_train.py @@ -296,7 +296,6 @@ class Seq2seqTrainOneStepWithLossScaleCell(nn.Cell): dtype=mstype.float32), name="loss_scale") self.add_flags(has_effect=True) - self.loss_scalar = P.ScalarSummary() def construct(self, source_eos_ids, @@ -365,12 +364,7 @@ class Seq2seqTrainOneStepWithLossScaleCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) + if not overflow: + self.optimizer(grads) - self.loss_scalar("loss", loss) - - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + return (loss, cond, scaling_sens) diff --git a/model_zoo/research/nlp/seq2seq/src/utils/optimizer.py b/model_zoo/research/nlp/seq2seq/src/utils/optimizer.py index 996ac637001..92651e2e600 100644 --- a/model_zoo/research/nlp/seq2seq/src/utils/optimizer.py +++ b/model_zoo/research/nlp/seq2seq/src/utils/optimizer.py @@ -229,7 +229,6 @@ class Adam(Optimizer): self.one = Tensor(np.array([1.0]).astype(np.float32)) self.realdiv = P.RealDiv() - self.lr_scalar = P.ScalarSummary() def construct(self, gradients): """Adam optimizer.""" @@ -240,8 +239,6 @@ class Adam(Optimizer): gradients = self.scale_grad(gradients) lr = self.get_lr() - self.lr_scalar("learning_rate", lr) - beta1_power = self.beta1_power * self.beta1 self.beta1_power = beta1_power beta2_power = self.beta2_power * self.beta2 diff --git a/model_zoo/research/nlp/seq2seq/train.py b/model_zoo/research/nlp/seq2seq/train.py index 529a01e5e19..23c9e0fb9d8 100644 --- a/model_zoo/research/nlp/seq2seq/train.py +++ b/model_zoo/research/nlp/seq2seq/train.py @@ -44,7 +44,7 @@ parser = argparse.ArgumentParser(description='Seq2seq train entry point.') parser.add_argument("--is_modelarts", type=ast.literal_eval, default=False, help="model config json file path.") parser.add_argument("--data_url", type=str, default=None, help="pre-train dataset address.") -parser.add_argument('--train_url', required=True, default=None, help='Location of training outputs.') +parser.add_argument('--train_url', type=str, default=None, help='Location of training outputs.') parser.add_argument("--config", type=str, required=True, help="model config json file path.") parser.add_argument("--pre_train_dataset", type=str, required=True, help="pre-train dataset address.") args = parser.parse_args() @@ -217,7 +217,7 @@ def _build_training_pipeline(config: Seq2seqConfig, scale_update_cell=scale_manager.get_update_cell() ) net_with_grads.set_train(True) - model = Model(net_with_grads, amp_level="O2") + model = Model(net_with_grads) loss_monitor = LossCallBack(config) dataset_size = dataset.get_dataset_size() time_cb = TimeMonitor(data_size=dataset_size) diff --git a/model_zoo/research/nlp/skipgram/src/dataset.py b/model_zoo/research/nlp/skipgram/src/dataset.py index b16d0de4fe4..bba2b2014f7 100644 --- a/model_zoo/research/nlp/skipgram/src/dataset.py +++ b/model_zoo/research/nlp/skipgram/src/dataset.py @@ -177,6 +177,8 @@ def load_eval_data(data_dir): if not os.path.isfile(data_path): continue with open(data_path, 'r') as f: + k = "capital-common-countries" + samples[k] = list() for line in f: if ':' in line: strs = line.strip().split(' ') diff --git a/model_zoo/research/recommend/Fat-DeepFFM/src/fat_deepffm.py b/model_zoo/research/recommend/Fat-DeepFFM/src/fat_deepffm.py index 3de30f1a3b3..715c02ff1bf 100644 --- a/model_zoo/research/recommend/Fat-DeepFFM/src/fat_deepffm.py +++ b/model_zoo/research/recommend/Fat-DeepFFM/src/fat_deepffm.py @@ -21,7 +21,6 @@ from mindspore.common.initializer import initializer import mindspore.ops as P from mindspore.ops import composite as C -from mindspore.ops import functional as F from mindspore import Parameter, ParameterTuple from mindspore import Tensor @@ -351,7 +350,8 @@ class TrainStepWrap(nn.Cell): grads = self.grad(self.network, weights)(cats_vals, num_vals, label, sens) if self.reducer_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class ModelBuilder: diff --git a/model_zoo/research/recommend/autodis/src/autodis.py b/model_zoo/research/recommend/autodis/src/autodis.py index a0fcd3a2799..57c775d8f57 100644 --- a/model_zoo/research/recommend/autodis/src/autodis.py +++ b/model_zoo/research/recommend/autodis/src/autodis.py @@ -18,7 +18,6 @@ import os import numpy as np from sklearn.metrics import roc_auc_score import mindspore.common.dtype as mstype -from mindspore.ops import functional as F from mindspore.ops import composite as C from mindspore.ops import operations as P from mindspore.nn import Dropout @@ -333,7 +332,8 @@ class TrainStepWrap(nn.Cell): loss = self.network(batch_ids, batch_wts, label) sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) # grads = self.grad(self.network, weights)(batch_ids, batch_wts, label, sens) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class PredictWithSigmoid(nn.Cell): @@ -346,7 +346,7 @@ class PredictWithSigmoid(nn.Cell): self.sigmoid = P.Sigmoid() def construct(self, batch_ids, batch_wts, labels): - logits, _, _, = self.network(batch_ids, batch_wts) + logits, _, _, _, _, = self.network(batch_ids, batch_wts) pred_probs = self.sigmoid(logits) return logits, pred_probs, labels diff --git a/model_zoo/utils/hccl_tools/hccl_tools.py b/model_zoo/utils/hccl_tools/hccl_tools.py index f019f179bd8..2df333b5efc 100644 --- a/model_zoo/utils/hccl_tools/hccl_tools.py +++ b/model_zoo/utils/hccl_tools/hccl_tools.py @@ -110,13 +110,9 @@ def main(): # construct hccn_table device_ips: Dict[Any, Any] = {} - with open('/etc/hccn.conf', 'r') as fin: - for hccn_item in fin.readlines(): - if hccn_item.strip().startswith('address_'): - device_id, device_ip = hccn_item.split('=') - device_id = device_id.split('_')[1] - device_ips[device_id] = device_ip.strip() - + for device_id in device_num_list: + ret = os.popen("hccn_tool -i %d -ip -g" % device_id).readlines() + device_ips[str(device_id)] = ret[0].split(":")[1].replace('\n', '') hccn_table = {'version': '1.0', 'server_count': '1', 'server_list': []} diff --git a/tests/st/auto_monad/test_auto_monad_mindtester.py b/tests/st/auto_monad/test_auto_monad_mindtester.py index 796ad620c40..8dc7af94920 100644 --- a/tests/st/auto_monad/test_auto_monad_mindtester.py +++ b/tests/st/auto_monad/test_auto_monad_mindtester.py @@ -675,10 +675,9 @@ class SideEffectControlFlowAssignDependWhileNet(Cell): return grad_out -# Now the case can't pass because the GPU RT problem, so only run on Ascend current time. @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_side_effect_grad_control_flow_assign_depend_while_net(): context.set_context(mode=context.GRAPH_MODE) diff --git a/tests/st/control/inner/test_002_single_for.py b/tests/st/control/inner/test_002_single_for.py index 1da99eed429..2f8a49e92a1 100644 --- a/tests/st/control/inner/test_002_single_for.py +++ b/tests/st/control/inner/test_002_single_for.py @@ -13,6 +13,7 @@ # limitations under the License. # ============================================================================ import numpy as np +import pytest from mindspore import context from mindspore import Tensor, nn from mindspore.common.parameter import Parameter @@ -165,7 +166,7 @@ def test_single_for_03(): assert graph_forward_res == pynative_forward_res assert graph_backward_res == pynative_backward_res - +@pytest.mark.skip(reason="not supported side effect") def test_single_for_04(): class SingleForNet(nn.Cell): def __init__(self): diff --git a/tests/st/control/inner/test_010_if_in_if.py b/tests/st/control/inner/test_010_if_in_if.py index 2d83bd15b65..a4fc529581b 100644 --- a/tests/st/control/inner/test_010_if_in_if.py +++ b/tests/st/control/inner/test_010_if_in_if.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ +import pytest from mindspore import context from mindspore import Tensor, nn from mindspore.ops import composite as C @@ -143,11 +144,13 @@ def test_if_in_if(): control_flow_if_in_if(IfInIfNet, x) +@pytest.mark.skip(reason="not supported side effect") def test_if_in_if_01(): x = Tensor(2, mstype.int32) control_flow_if_in_if(IfInIfNet1, x) +@pytest.mark.skip(reason="not supported side effect") def test_if_in_if_02(): x = Tensor(2, mstype.int32) control_flow_if_in_if(IfInIfNet2, x) diff --git a/tests/st/control/inner/test_012_if_in_for.py b/tests/st/control/inner/test_012_if_in_for.py index c4c8ec057ae..aca6bb0e4eb 100644 --- a/tests/st/control/inner/test_012_if_in_for.py +++ b/tests/st/control/inner/test_012_if_in_for.py @@ -13,6 +13,7 @@ # limitations under the License. # ============================================================================ import numpy as np +import pytest from mindspore.common import dtype as mstype from mindspore import nn from mindspore import Tensor @@ -52,6 +53,7 @@ class BackwardNet(nn.Cell): return grads +@pytest.mark.skip(reason="not supported side effect") def test_forward(): x = Tensor(np.array(1), mstype.int32) y = Tensor(np.array(3), mstype.int32) @@ -66,6 +68,7 @@ def test_forward(): assert graph_mode_out == pynative_mode_out +@pytest.mark.skip(reason="not supported side effect") def test_backward(): x = Tensor(np.array(1), mstype.int32) y = Tensor(np.array(3), mstype.int32) diff --git a/tests/st/control/inner/test_032_for_in_for.py b/tests/st/control/inner/test_032_for_in_for.py index d57a5807660..dd7094e54aa 100644 --- a/tests/st/control/inner/test_032_for_in_for.py +++ b/tests/st/control/inner/test_032_for_in_for.py @@ -13,6 +13,7 @@ # limitations under the License. # ============================================================================ import numpy as np +import pytest from mindspore import context from mindspore import Tensor, nn from mindspore.common.parameter import Parameter @@ -21,7 +22,8 @@ from mindspore.ops import operations as P from mindspore.common import dtype as mstype grad_all = C.GradOperation(get_all=True) -context.set_context(device_target="Ascend") +context.set_context(device_target="GPU") + def test_for_in_for_01(): class ForInForNet(nn.Cell): @@ -75,7 +77,9 @@ def test_for_in_for_01(): assert graph_forward_res == pynative_forward_res assert graph_backward_res == pynative_backward_res - +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard def test_for_in_for_02(): class ForInForNet(nn.Cell): def __init__(self): @@ -87,10 +91,10 @@ def test_for_in_for_02(): self.param_b = Parameter(Tensor(11, mstype.int32), name='b') def construct(self, x): - for _ in range(0, 10): + for _ in range(0, 3): x = x * 2 self.assign(self.param_a, x + self.param_a) - for _ in range(0, 5): + for _ in range(0, 2): x = self.add(x, x) self.param_b += 1 y = self.sub(x, self.param_b + self.param_a) diff --git a/tests/st/control/inner/test_101_if_after_while.py b/tests/st/control/inner/test_101_if_after_while.py index fdddfbef036..3b322db3d1e 100644 --- a/tests/st/control/inner/test_101_if_after_while.py +++ b/tests/st/control/inner/test_101_if_after_while.py @@ -13,6 +13,7 @@ # limitations under the License. # ============================================================================ import numpy as np +import pytest from mindspore.common import dtype as mstype from mindspore import nn from mindspore import Tensor @@ -73,6 +74,7 @@ def test_forward(): assert graph_mode_out == pynative_mode_out +@pytest.mark.skip(reason="not supported side effect") def test_backward(): x = Tensor(np.array(1), mstype.int32) y = Tensor(np.array(3), mstype.int32) diff --git a/tests/st/control/inner/test_110_if_after_if_in_if.py b/tests/st/control/inner/test_110_if_after_if_in_if.py index 12e269f0a6f..e0ce1edab70 100644 --- a/tests/st/control/inner/test_110_if_after_if_in_if.py +++ b/tests/st/control/inner/test_110_if_after_if_in_if.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ +import pytest from mindspore import context from mindspore import Tensor, nn from mindspore.ops import composite as C @@ -19,7 +20,7 @@ from mindspore.common import dtype as mstype from mindspore.common.parameter import Parameter grad_all = C.GradOperation(get_all=True) -context.set_context(device_target="Ascend") +context.set_context(device_target="GPU") class IfAfterIfInIfNet(nn.Cell): @@ -145,22 +146,27 @@ def control_flow_if_after_if_in_if(input_net, x): assert graph_forward_res == pynative_forward_res assert graph_backward_res == pynative_backward_res - +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard def test_if_after_if_in_if(): x = Tensor(2, mstype.int32) control_flow_if_after_if_in_if(IfAfterIfInIfNet, x) +@pytest.mark.skip(reason="not supported side effect") def test_if_after_if_in_if_01(): x = Tensor(2, mstype.int32) control_flow_if_after_if_in_if(IfAfterIfInIfNet1, x) +@pytest.mark.skip(reason="not supported side effect") def test_if_after_if_in_if_02(): x = Tensor(2, mstype.int32) control_flow_if_after_if_in_if(IfAfterIfInIfNet2, x) +@pytest.mark.skip(reason="not supported side effect") def test_if_after_if_in_if_03(): x = Tensor(2, mstype.int32) control_flow_if_after_if_in_if(IfAfterIfInIfNet3, x) diff --git a/tests/st/control/inner/test_121_if_after_while_in_while.py b/tests/st/control/inner/test_121_if_after_while_in_while.py index 32f41a8fb19..9f3feb6a16c 100644 --- a/tests/st/control/inner/test_121_if_after_while_in_while.py +++ b/tests/st/control/inner/test_121_if_after_while_in_while.py @@ -14,6 +14,7 @@ # ============================================================================ import numpy as np +import pytest from mindspore.common import dtype as mstype from mindspore import nn from mindspore import Tensor @@ -21,7 +22,7 @@ from mindspore.ops import composite as C from mindspore import context from mindspore.common.parameter import Parameter -context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend") +context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="GPU") class ForwardNet(nn.Cell): @@ -73,6 +74,7 @@ def test_forward(): assert graph_mode_out == pynative_mode_out +@pytest.mark.skip(reason="not supported side effect") def test_backward(): x = Tensor(np.array(1), mstype.int32) y = Tensor(np.array(3), mstype.int32) @@ -122,6 +124,9 @@ class BackwardNetNoAssign(nn.Cell): # This test case has a problem of evaluator endless loop. +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard def test_backward_no_assign(): x = Tensor(np.array(1), mstype.int32) y = Tensor(np.array(3), mstype.int32) diff --git a/tests/st/control/inner/test_122_if_after_while_in_for.py b/tests/st/control/inner/test_122_if_after_while_in_for.py index 4ecee12be36..5c572faeb85 100644 --- a/tests/st/control/inner/test_122_if_after_while_in_for.py +++ b/tests/st/control/inner/test_122_if_after_while_in_for.py @@ -14,6 +14,7 @@ # ============================================================================ import numpy as np +import pytest from mindspore.common import dtype as mstype from mindspore import nn from mindspore import Tensor @@ -21,7 +22,7 @@ from mindspore.ops import composite as C from mindspore import context from mindspore.common.parameter import Parameter -context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="Ascend") +context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target="GPU") class ForwardNet(nn.Cell): @@ -69,6 +70,7 @@ def test_forward(): assert graph_mode_out == pynative_mode_out +@pytest.mark.skip(reason="not supported side effect") def test_backward(): x = Tensor(np.array(1), mstype.int32) y = Tensor(np.array(3), mstype.int32) @@ -83,3 +85,52 @@ def test_backward(): pynative_backward_net = BackwardNet(pynative_forward_net) pynative_mode_grads = pynative_backward_net(x, y) assert graph_mode_grads == pynative_mode_grads + + +class ForwardNetNoAssign(nn.Cell): + def __init__(self, max_cycles=10): + super(ForwardNetNoAssign, self).__init__() + self.max_cycles = max_cycles + self.zero = Tensor(np.array(0), mstype.int32) + self.weight = Parameter(Tensor(np.array(0), mstype.int32)) + + def construct(self, x, y): + out = self.zero + for _ in range(0, self.max_cycles): + while x < y: + out = x * y + out + x = x + 1 + #self.weight = x + if out > 20: + self.weight = out + out = out - 20 + return out, self.weight + +class BackwardNetNoAssign(nn.Cell): + def __init__(self, net): + super(BackwardNetNoAssign, self).__init__(auto_prefix=False) + self.forward_net = net + self.grad = C.GradOperation(get_all=True) + + def construct(self, *inputs): + grads = self.grad(self.forward_net)(*inputs) + return grads + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_backward_no_assign(): + x = Tensor(np.array(1), mstype.int32) + y = Tensor(np.array(3), mstype.int32) + # Graph Mode + context.set_context(mode=context.GRAPH_MODE) + graph_forward_net = ForwardNetNoAssign(max_cycles=3) + graph_backward_net = BackwardNetNoAssign(graph_forward_net) + graph_mode_grads = graph_backward_net(x, y) + # Pynative Mode + context.set_context(mode=context.PYNATIVE_MODE) + pynative_forward_net = ForwardNetNoAssign(max_cycles=3) + pynative_backward_net = BackwardNetNoAssign(pynative_forward_net) + pynative_mode_grads = pynative_backward_net(x, y) + assert graph_mode_grads == pynative_mode_grads diff --git a/tests/st/control/inner/test_330_for_after_for_in_if.py b/tests/st/control/inner/test_330_for_after_for_in_if.py index d3246758f25..c05d387fc34 100644 --- a/tests/st/control/inner/test_330_for_after_for_in_if.py +++ b/tests/st/control/inner/test_330_for_after_for_in_if.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ +import pytest from mindspore import context from mindspore import Tensor, nn from mindspore.ops import composite as C @@ -21,6 +22,7 @@ from mindspore.common.parameter import Parameter grad_all = C.GradOperation(get_all=True) context.set_context(device_target="Ascend") +@pytest.mark.skip(reason="not supported side effect") def test_for_after_for_in_if(): class ForAfterForInIfNet(nn.Cell): def __init__(self): diff --git a/tests/st/control/test_cont_grad.py b/tests/st/control/test_cont_grad.py index 9b598ea4b8b..45ccc095f67 100644 --- a/tests/st/control/test_cont_grad.py +++ b/tests/st/control/test_cont_grad.py @@ -23,6 +23,7 @@ from mindspore import nn from mindspore.common.parameter import Parameter, ParameterTuple from mindspore.ops import composite as C from mindspore.ops import operations as P + # from tests.vm_impl.math_ops_vm_impl import * # from tests.vm_impl.vm_interface import * # from tests.vm_impl import * @@ -54,8 +55,9 @@ def test_while_grad(): def construct(self, *inputs): return grad_all(self.net)(*inputs) + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) while_net = MyWhileNet() net = GradNet(while_net) idx = Tensor(np.array(0), dtype=ms.int32) @@ -63,15 +65,16 @@ def test_while_grad(): x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001) assert np.allclose(graph_output[1].asnumpy(), pynative_output[1].asnumpy(), 0.0001, 0.0001) assert np.allclose(graph_output[2].asnumpy(), pynative_output[2].asnumpy(), 0.0001, 0.0001) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_while_with_const_param_grad(): class MyWhileNet(nn.Cell): @@ -93,7 +96,8 @@ def test_while_with_const_param_grad(): def construct(self, *inputs): return grad_all(self.net)(*inputs) - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + + context.set_context(mode=context.GRAPH_MODE) while_net = MyWhileNet() net = GradNet(while_net) idx = Tensor([1.1], dtype=ms.float32) @@ -104,9 +108,10 @@ def test_while_with_const_param_grad(): assert np.allclose(graph_output[0].asnumpy(), expect_one, 0.0001, 0.0001) assert np.allclose(graph_output[1].asnumpy(), expect_two, 0.0001, 0.0001) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_while_with_variable_grad(): class MyWhileNet(nn.Cell): @@ -128,7 +133,8 @@ def test_while_with_variable_grad(): def construct(self, *inputs): return grad_all(self.net)(*inputs) - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + + context.set_context(mode=context.GRAPH_MODE) while_net = MyWhileNet() net = GradNet(while_net) idx = Tensor([1.1], dtype=ms.float32) @@ -139,9 +145,10 @@ def test_while_with_variable_grad(): assert np.allclose(graph_output[0].asnumpy(), expect_one, 0.0001, 0.0001) assert np.allclose(graph_output[1].asnumpy(), expect_two, 0.0001, 0.0001) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_while_with_param_forward(): class MyWhileNet(nn.Cell): @@ -160,8 +167,9 @@ def test_while_with_param_forward(): out = out + x + self.param idx = idx + 1 return out + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) net = MyWhileNet() idx = Tensor(np.array(0), dtype=ms.int32) end = Tensor(np.array(2), dtype=ms.int32) @@ -170,12 +178,14 @@ def test_while_with_param_forward(): expect = np.array([[[6, 8], [10, 12]], [[19, 22], [25, 28]]], dtype=np.int32) assert np.allclose(graph_output.asnumpy(), expect, 0.0001, 0.0001) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_while_endless_case(): """endless case when optimization""" + class MyWhileNet(nn.Cell): def __init__(self): super().__init__() @@ -190,21 +200,23 @@ def test_while_endless_case(): out = out + part idx = idx + 1 return out + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) net = MyWhileNet() idx = Tensor(np.array(0), dtype=ms.int32) end = Tensor(np.array(2), dtype=ms.int32) x = Tensor(np.arange(8).reshape(2, 2, 2).astype(np.float32), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_while_with_param_grad(): class MyWhileNet(nn.Cell): @@ -232,7 +244,8 @@ def test_while_with_param_grad(): def construct(self, a, b, c): return grad_by_list(self.net, self.weights)(a, b, c) - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + + context.set_context(mode=context.GRAPH_MODE) while_net = MyWhileNet() net = GradNet(while_net) idx = Tensor(np.array(0), dtype=ms.int32) @@ -242,9 +255,10 @@ def test_while_with_param_grad(): expect = np.array([[[2, 2], [2, 2]], [[2, 2], [2, 2]]], dtype=np.int32) assert np.allclose(graph_output[0].asnumpy(), expect, 0.0001, 0.0001) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_while_with_param_forward_with_const_branch(): class MyWhileNet(nn.Cell): @@ -264,8 +278,9 @@ def test_while_with_param_forward_with_const_branch(): out = out + idx + self.param idx = idx + 1 return out + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) while_net = MyWhileNet() net = while_net idx = Tensor(np.array(0), dtype=ms.int32) @@ -273,16 +288,18 @@ def test_while_with_param_forward_with_const_branch(): x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_while_opt_endless(): """endless during optimization case""" + class MyWhileNet(nn.Cell): def __init__(self): super().__init__() @@ -308,8 +325,9 @@ def test_while_opt_endless(): def construct(self, *inputs): return grad_all(self.net)(*inputs) + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) while_net = MyWhileNet() net = GradNet(while_net) idx = Tensor(np.array(0), dtype=ms.int32) @@ -317,7 +335,7 @@ def test_while_opt_endless(): x = Tensor(np.ones([2, 2, 2]).astype(np.float32) * 3, dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001) @@ -343,8 +361,9 @@ def test_no_while_call(): else: out = out + idx + self.param return out + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) while_net = MyWhileNet() net = while_net idx = Tensor(np.array(0), dtype=ms.int32) @@ -352,13 +371,14 @@ def test_no_while_call(): x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_while_with_param_grad_with_const_branch(): class MyWhileNet(nn.Cell): @@ -387,8 +407,9 @@ def test_while_with_param_grad_with_const_branch(): def construct(self, a, b, c): return grad_by_list(self.net, self.weights)(a, b, c) + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) while_net = MyWhileNet() net = GradNet(while_net) idx = Tensor(np.array(0), dtype=ms.int32) @@ -396,10 +417,11 @@ def test_while_with_param_grad_with_const_branch(): x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001) + @pytest.mark.skip(reason="not supported yet") @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training @@ -435,8 +457,9 @@ def test_for_while_with_param_grad_with_const_branch(): def construct(self, a, b, c): return grad_by_list(self.net, self.weights)(a, b, c) + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) while_net = MyWhileNet() net = GradNet(while_net) idx = Tensor(np.array(0), dtype=ms.int32) @@ -444,13 +467,14 @@ def test_for_while_with_param_grad_with_const_branch(): x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_for_while_with_param_grad_basic(): class MyWhileNet(nn.Cell): @@ -479,8 +503,9 @@ def test_for_while_with_param_grad_basic(): def construct(self, a, b, c): return grad_by_list(self.net, self.weights)(a, b, c) + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) while_net = MyWhileNet() net = GradNet(while_net) idx = Tensor(np.array(0), dtype=ms.int32) @@ -488,13 +513,14 @@ def test_for_while_with_param_grad_basic(): x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_for_while_with_param_grad_normal(): class MyWhileNet(nn.Cell): @@ -523,8 +549,9 @@ def test_for_while_with_param_grad_normal(): def construct(self, a, b, c): return grad_by_list(self.net, self.weights)(a, b, c) + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) while_net = MyWhileNet() net = GradNet(while_net) idx = Tensor(np.array(0), dtype=ms.int32) @@ -532,13 +559,14 @@ def test_for_while_with_param_grad_normal(): x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_while_with_param_basic_grad(): class MyWhileNet(nn.Cell): @@ -564,8 +592,9 @@ def test_while_with_param_basic_grad(): def construct(self, a, b, c): return grad_by_list(self.net, self.weights)(a, b, c) + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) while_net = MyWhileNet() net = GradNet(while_net) idx = Tensor(np.array(0), dtype=ms.int32) @@ -573,13 +602,14 @@ def test_while_with_param_basic_grad(): x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_while_with_param_basic_grad_mul(): class MyWhileNet(nn.Cell): @@ -605,8 +635,9 @@ def test_while_with_param_basic_grad_mul(): def construct(self, a, b, c): return grad_by_list(self.net, self.weights)(a, b, c) + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) while_net = MyWhileNet() net = GradNet(while_net) idx = Tensor(np.array(0), dtype=ms.int32) @@ -614,13 +645,14 @@ def test_while_with_param_basic_grad_mul(): x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_while_with_param_basic_grad_two(): class MyWhileNet(nn.Cell): @@ -647,8 +679,9 @@ def test_while_with_param_basic_grad_two(): def construct(self, a, b, c): return grad_by_list(self.net, self.weights)(a, b, c) + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) while_net = MyWhileNet() net = GradNet(while_net) idx = Tensor(np.array(0), dtype=ms.int32) @@ -656,14 +689,15 @@ def test_while_with_param_basic_grad_two(): x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001) assert np.allclose(graph_output[1].asnumpy(), pynative_output[1].asnumpy(), 0.0001, 0.0001) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_while_with_param_basic_grad_three(): class MyWhileNet(nn.Cell): @@ -691,8 +725,9 @@ def test_while_with_param_basic_grad_three(): def construct(self, a, b, c): return grad_by_list(self.net, self.weights)(a, b, c) + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) while_net = MyWhileNet() net = GradNet(while_net) idx = Tensor(np.array(0), dtype=ms.int32) @@ -700,15 +735,16 @@ def test_while_with_param_basic_grad_three(): x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001) assert np.allclose(graph_output[1].asnumpy(), pynative_output[1].asnumpy(), 0.0001, 0.0001) assert np.allclose(graph_output[2].asnumpy(), pynative_output[2].asnumpy(), 0.0001, 0.0001) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_while_if_with_param_grad(): class MyWhileNet(nn.Cell): @@ -737,8 +773,9 @@ def test_while_if_with_param_grad(): def construct(self, a, b, c): return grad_by_list(self.net, self.weights)(a, b, c) + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) while_net = MyWhileNet() net = GradNet(while_net) idx = Tensor(np.array(0), dtype=ms.int32) @@ -746,10 +783,11 @@ def test_while_if_with_param_grad(): x = Tensor(np.ones([2, 2, 2]).astype(np.float32), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001) + @pytest.mark.skip(reason="not supported yet") @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training @@ -778,8 +816,9 @@ def test_while_with_param_grad_not_enter_while(): def construct(self, a, b, c): return grad_by_list(self.net, self.weights)(a, b, c) + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) while_net = MyWhileNet() net = GradNet(while_net) idx = Tensor(np.array(3), dtype=ms.int32) @@ -787,13 +826,14 @@ def test_while_with_param_grad_not_enter_while(): x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_with_param_if_by_if_forward(): class MyIfByIfNet(nn.Cell): @@ -810,12 +850,13 @@ def test_with_param_if_by_if_forward(): else: out = out + x if a == b: - out = out + x*3 + self.param + out = out + x * 3 + self.param else: - out = out + x*2 + out = out + x * 2 return out + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) if_net = MyIfByIfNet() net = if_net idx = Tensor(np.array(0), dtype=ms.int32) @@ -823,13 +864,14 @@ def test_with_param_if_by_if_forward(): x = Tensor(np.ones([2, 2, 2]).astype(np.float32), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_with_param_if_by_if_grad_inputs(): class MyIfByIfNet(nn.Cell): @@ -844,7 +886,7 @@ def test_with_param_if_by_if_grad_inputs(): if a < b: out = out + x + self.param * 4 if a == b: - out = out + x*3 + self.param * 3 + out = out + x * 3 + self.param * 3 return out class GradNet(nn.Cell): @@ -854,8 +896,9 @@ def test_with_param_if_by_if_grad_inputs(): def construct(self, *inputs): return grad_all(self.net)(*inputs) + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) if_net = MyIfByIfNet() net = GradNet(if_net) idx = Tensor(np.array(0), dtype=ms.int32) @@ -863,15 +906,16 @@ def test_with_param_if_by_if_grad_inputs(): x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001) assert np.allclose(graph_output[1].asnumpy(), pynative_output[1].asnumpy(), 0.0001, 0.0001) assert np.allclose(graph_output[2].asnumpy(), pynative_output[2].asnumpy(), 0.0001, 0.0001) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_with_param_if_by_if_grad_parameter(): class MyIfByIfNet(nn.Cell): @@ -886,7 +930,7 @@ def test_with_param_if_by_if_grad_parameter(): if a < b: out = out + x + self.param * 2 if a == b: - out = out + x*3 + self.param + out = out + x * 3 + self.param return out class GradNet(nn.Cell): @@ -897,8 +941,9 @@ def test_with_param_if_by_if_grad_parameter(): def construct(self, *inputs): return grad_by_list(self.net, self.weights)(*inputs) + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) if_net = MyIfByIfNet() net = GradNet(if_net) idx = Tensor(np.array(0), dtype=ms.int32) @@ -906,13 +951,14 @@ def test_with_param_if_by_if_grad_parameter(): x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_with_param_if_by_if_grad_param_excute_null(): class MyIfByIfNet(nn.Cell): @@ -936,8 +982,9 @@ def test_with_param_if_by_if_grad_param_excute_null(): def construct(self, *inputs): return grad_by_list(self.net, self.weights)(*inputs) + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) if_net = MyIfByIfNet() net = GradNet(if_net) idx = Tensor(np.array(4), dtype=ms.int32) @@ -945,13 +992,14 @@ def test_with_param_if_by_if_grad_param_excute_null(): x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_if_by_if_return_inside_grad(): class MyIfByIfNet(nn.Cell): @@ -977,8 +1025,9 @@ def test_if_by_if_return_inside_grad(): def construct(self, *inputs): return grad_by_list(self.net, self.weights)(*inputs) + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) if_net = MyIfByIfNet() net = GradNet(if_net) idx = Tensor(np.array(1), dtype=ms.int32) @@ -986,13 +1035,14 @@ def test_if_by_if_return_inside_grad(): x = Tensor(np.random.randn(2, 2, 2).astype(np.float32), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output[0].asnumpy(), pynative_output[0].asnumpy(), 0.0001, 0.0001) + @pytest.mark.level1 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_if_by_if_forward(): class MyIfByIfNet(nn.Cell): @@ -1019,8 +1069,9 @@ def test_if_by_if_forward(): a = a * b out = a + b + x return out + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) if_net = MyIfByIfNet() net = if_net idx = Tensor(np.array(2), dtype=ms.float32) @@ -1028,16 +1079,18 @@ def test_if_by_if_forward(): x = Tensor(np.array(4), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_if_by_if_forward_control_tuple_switch(): """tuple_get from switch op will generate new switch inside to eliminate tuple_get""" + class Branch3Net(nn.Cell): def __init__(self): super().__init__() @@ -1052,6 +1105,7 @@ def test_if_by_if_forward_control_tuple_switch(): else: b = self.add(a, x) return a, b, x + class Branch2Net(nn.Cell): def __init__(self): super().__init__() @@ -1086,8 +1140,9 @@ def test_if_by_if_forward_control_tuple_switch(): a = a * b out = a + b + x return out + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) if_net = MyIfByIfNet() net = if_net idx = Tensor(np.array(2), dtype=ms.float32) @@ -1095,13 +1150,14 @@ def test_if_by_if_forward_control_tuple_switch(): x = Tensor(np.array(0), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001) + @pytest.mark.level0 @pytest.mark.platform_arm_ascend_training -@pytest.mark.platform_x86_ascend_training +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_if_by_if_forward_control_inside_net(): class Branch3Net(nn.Cell): @@ -1120,6 +1176,7 @@ def test_if_by_if_forward_control_inside_net(): a = a * b out = a + b + x return out + class Branch2Net(nn.Cell): def __init__(self): super().__init__() @@ -1152,8 +1209,9 @@ def test_if_by_if_forward_control_inside_net(): a = self.sub(a, b) out = self.net(a, b, x) return out + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) if_net = MyIfByIfNet() net = if_net idx = Tensor(np.array(2), dtype=ms.float32) @@ -1161,10 +1219,11 @@ def test_if_by_if_forward_control_inside_net(): x = Tensor(np.array(0), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001) + @pytest.mark.level1 @pytest.mark.platform_arm_ascend_training @pytest.mark.platform_x86_ascend_training @@ -1194,8 +1253,9 @@ def test_if_by_if_forward_use_namespace(): a = a * b out = a + b + x return out + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) if_net = MyIfByIfNet() net = if_net idx = Tensor(np.array(2), dtype=ms.float32) @@ -1203,10 +1263,11 @@ def test_if_by_if_forward_use_namespace(): x = Tensor(np.array(0), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001) + @pytest.mark.level1 @pytest.mark.platform_arm_ascend_training @pytest.mark.platform_x86_ascend_training @@ -1240,8 +1301,9 @@ def test_if_by_if_forward_use_global_op(): a = a * b out = a + b + x return out + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) if_net = MyIfByIfNet() net = if_net idx = Tensor(np.array(2), dtype=ms.float32) @@ -1249,10 +1311,11 @@ def test_if_by_if_forward_use_global_op(): x = Tensor(np.array(0), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001) + @pytest.mark.level1 @pytest.mark.platform_arm_ascend_training @pytest.mark.platform_x86_ascend_training @@ -1273,8 +1336,9 @@ def test_for_with_if_by_if_forward(): a = a * b out = a + b + x return out + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) if_net = MyIfByIfNet() net = if_net idx = Tensor(np.array(2), dtype=ms.float32) @@ -1282,10 +1346,11 @@ def test_for_with_if_by_if_forward(): x = Tensor(np.array(0), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001) + @pytest.mark.level1 @pytest.mark.platform_arm_ascend_training @pytest.mark.platform_x86_ascend_training @@ -1308,8 +1373,9 @@ def test_for_with_if_by_if_forward_namespace(): a = a * b out = a + b + x return out + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) if_net = MyIfByIfNet() net = if_net idx = Tensor(np.array(2), dtype=ms.float32) @@ -1317,7 +1383,7 @@ def test_for_with_if_by_if_forward_namespace(): x = Tensor(np.array(0), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001) @@ -1355,8 +1421,9 @@ def test_if_by_if_forward_const_branch_inner(): a = a * b out = a + b + x return out + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) if_net = MyIfByIfNet() net = if_net idx = Tensor(np.array(2), dtype=ms.float32) @@ -1364,10 +1431,11 @@ def test_if_by_if_forward_const_branch_inner(): x = Tensor(np.array(0), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001) + @pytest.mark.level1 @pytest.mark.platform_arm_ascend_training @pytest.mark.platform_x86_ascend_training @@ -1401,8 +1469,9 @@ def test_if_by_if_forward_all_const_branch(): a = a * b out = a + b + x return out + # graph mode - context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + context.set_context(mode=context.GRAPH_MODE) if_net = MyIfByIfNet() net = if_net idx = Tensor(np.array(2), dtype=ms.float32) @@ -1410,13 +1479,14 @@ def test_if_by_if_forward_all_const_branch(): x = Tensor(np.array(0), dtype=ms.float32) graph_output = net(idx, end, x) # pynative mode - context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend") + context.set_context(mode=context.PYNATIVE_MODE) pynative_output = net(idx, end, x) assert np.allclose(graph_output.asnumpy(), pynative_output.asnumpy(), 0.0001, 0.0001) @pytest.mark.level0 @pytest.mark.platform_x86_cpu +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_if_const_grad(): class MyNet(nn.Cell): @@ -1452,6 +1522,7 @@ def test_if_const_grad(): @pytest.mark.level0 @pytest.mark.platform_x86_cpu +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_if_by_if_const_grad(): class MyNet(nn.Cell): @@ -1491,6 +1562,7 @@ def test_if_by_if_const_grad(): @pytest.mark.level0 @pytest.mark.platform_x86_cpu +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_while_const_grad(): class MyNet(nn.Cell): @@ -1524,6 +1596,7 @@ def test_while_const_grad(): @pytest.mark.level0 @pytest.mark.platform_x86_cpu +@pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard def test_if_by_while_const_grad(): class MyNet(nn.Cell): diff --git a/tests/st/dump/test_data_dump.py b/tests/st/dump/test_data_dump.py index 29056acf1ae..f1b637084d7 100644 --- a/tests/st/dump/test_data_dump.py +++ b/tests/st/dump/test_data_dump.py @@ -55,7 +55,7 @@ def change_current_dump_json(file_name, dump_path): json.dump(data, f) -@pytest.mark.level0 +@pytest.mark.level1 @pytest.mark.platform_arm_ascend_training @pytest.mark.platform_x86_ascend_training @pytest.mark.env_onecard diff --git a/tests/st/fl/albert/src/cell_wrapper.py b/tests/st/fl/albert/src/cell_wrapper.py index 477ddba3eba..187792c0543 100644 --- a/tests/st/fl/albert/src/cell_wrapper.py +++ b/tests/st/fl/albert/src/cell_wrapper.py @@ -295,5 +295,5 @@ class NetworkNoClientTrainCell(nn.Cell): self.cast(F.tuple_to_array((self.sens,)), mstype.float32)) grads = self.hyper_map(F.partial(clip_grad, self.clip_type, self.clip_value), grads) - succ = self.optimizer(grads) - return F.depend(loss, succ) + self.optimizer(grads) + return loss diff --git a/tests/st/model_zoo_tests/resnet50/test_resnet50_cifar10.py b/tests/st/model_zoo_tests/resnet50/test_resnet50_cifar10.py index f3527397549..fcc3be0fd08 100644 --- a/tests/st/model_zoo_tests/resnet50/test_resnet50_cifar10.py +++ b/tests/st/model_zoo_tests/resnet50/test_resnet50_cifar10.py @@ -33,7 +33,7 @@ def test_resnet50_cifar10_ascend(): new_list = ["total_epochs=10", "10"] utils.exec_sed_command(old_list, new_list, os.path.join(cur_model_path, "train.py")) dataset_path = os.path.join(utils.data_root, "cifar-10-batches-bin") - config_path = os.path.join(cur_model_path, "resnet50_cifar10_config.yaml") + config_path = os.path.join(cur_model_path, "config", "resnet50_cifar10_config.yaml") exec_network_shell = "cd resnet/scripts; bash run_distribute_train.sh {} {} {}"\ .format(utils.rank_table_path, dataset_path, config_path) os.system(exec_network_shell) @@ -64,7 +64,7 @@ def test_resnet50_cifar10_gpu(): new_list = ["total_epochs=10", "10"] utils.exec_sed_command(old_list, new_list, os.path.join(cur_model_path, "train.py")) dataset_path = os.path.join(utils.data_root, "cifar-10-batches-bin") - config_path = os.path.join(cur_model_path, "resnet50_cifar10_config.yaml") + config_path = os.path.join(cur_model_path, "config", "resnet50_cifar10_config.yaml") os.system("nvidia-smi") exec_network_shell = "cd resnet/scripts; sh run_distribute_train_gpu.sh {} {}" \ .format(dataset_path, config_path) diff --git a/tests/st/model_zoo_tests/transformer/test_transformer.py b/tests/st/model_zoo_tests/transformer/test_transformer.py index 8ace3c49c2d..cfcb049398a 100644 --- a/tests/st/model_zoo_tests/transformer/test_transformer.py +++ b/tests/st/model_zoo_tests/transformer/test_transformer.py @@ -145,7 +145,7 @@ class TimeMonitor(Callback): self.per_step_mseconds_list.append(epoch_mseconds / self.data_size) -@pytest.mark.level0 +@pytest.mark.level1 @pytest.mark.platform_arm_ascend_training @pytest.mark.platform_x86_ascend_training @pytest.mark.env_onecard diff --git a/tests/st/model_zoo_tests/yolov3/src/yolov3.py b/tests/st/model_zoo_tests/yolov3/src/yolov3.py index 7ddf3ae695f..643fe0be1d7 100644 --- a/tests/st/model_zoo_tests/yolov3/src/yolov3.py +++ b/tests/st/model_zoo_tests/yolov3/src/yolov3.py @@ -671,7 +671,8 @@ class TrainingWrapper(nn.Cell): if self.reducer_flag: # apply grad reducer on grads grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss class YoloBoxScores(nn.Cell): diff --git a/tests/st/model_zoo_tests/yolov3_darknet53/src/yolo.py b/tests/st/model_zoo_tests/yolov3_darknet53/src/yolo.py index 273a45302e9..8a073f0fb40 100644 --- a/tests/st/model_zoo_tests/yolov3_darknet53/src/yolo.py +++ b/tests/st/model_zoo_tests/yolov3_darknet53/src/yolo.py @@ -59,7 +59,7 @@ class YoloBlock(nn.Cell): Args: in_channels: Integer. Input channel. - out_chls: Interger. Middle channel. + out_chls: Integer. Middle channel. out_channels: Integer. Output channel. Returns: @@ -108,7 +108,7 @@ class YOLOv3(nn.Cell): Args: backbone_shape: List. Darknet output channels shape. backbone: Cell. Backbone Network. - out_channel: Interger. Output channel. + out_channel: Integer. Output channel. Returns: Tensor, output tensor. @@ -436,4 +436,5 @@ class TrainingWrapper(nn.Cell): grads = self.grad(self.network, weights)(*args, sens) if self.reducer_flag: grads = self.grad_reducer(grads) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss diff --git a/tests/st/networks/models/bert/src/bert_for_pre_training.py b/tests/st/networks/models/bert/src/bert_for_pre_training.py index 0125875fd4f..a76ae7808f3 100644 --- a/tests/st/networks/models/bert/src/bert_for_pre_training.py +++ b/tests/st/networks/models/bert/src/bert_for_pre_training.py @@ -321,8 +321,8 @@ class BertTrainOneStepCell(nn.Cell): # apply grad reducer on grads grads = self.grad_reducer(grads) - succ = self.optimizer(grads) - return F.depend(loss, succ) + self.optimizer(grads) + return loss grad_scale = C.MultitypeFuncGraph("grad_scale") @@ -431,9 +431,6 @@ class BertTrainOneStepWithLossScaleCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) diff --git a/tests/st/networks/models/bert/src/utils.py b/tests/st/networks/models/bert/src/utils.py index f76604ecfcf..2114dd12896 100644 --- a/tests/st/networks/models/bert/src/utils.py +++ b/tests/st/networks/models/bert/src/utils.py @@ -122,12 +122,9 @@ class BertFinetuneCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond) class BertCLSModel(nn.Cell): """ diff --git a/tests/st/ops/cpu/test_softplus_grad_op.py b/tests/st/ops/cpu/test_softplus_grad_op.py index 5dc8cc5a3e9..76879689960 100644 --- a/tests/st/ops/cpu/test_softplus_grad_op.py +++ b/tests/st/ops/cpu/test_softplus_grad_op.py @@ -48,7 +48,7 @@ class Grad(nn.Cell): @pytest.mark.level0 @pytest.mark.platform_x86_cpu @pytest.mark.env_onecard -def test_softplus_grad_1d_fp32(): +def test_softplus_grad(): x = np.array([0.58401114, 0.68800163, 0.9760397, 0.14702141, 0.46563736, 0.9607501, 0.14567593, 0.12261796, 0.37054458, 0.46421242]).astype(np.float32) dy = np.array([0.5559598, 0.96994054, 0.24770357, 0.34646875, 0.2984393, 0.03287048, @@ -67,7 +67,7 @@ def test_softplus_grad_1d_fp32(): @pytest.mark.level0 @pytest.mark.platform_x86_cpu @pytest.mark.env_onecard -def test_softplus_grad_3d_fp16(): +def test_softplus_grad_fp16(): np.random.seed(42) x_np = np.random.randn(5, 3, 6).astype(np.float16) dy_np = np.random.randn(5, 3, 6).astype(np.float16) @@ -76,17 +76,3 @@ def test_softplus_grad_3d_fp16(): output = grad(Tensor(x_np), Tensor(dy_np)) expect = dy_np * np.exp(x_np) / (1 + np.exp(x_np)) assert np.allclose(output[0].asnumpy(), expect, rtol=1e-2) - - -@pytest.mark.level0 -@pytest.mark.platform_x86_cpu -@pytest.mark.env_onecard -def test_softplus_grad_7d_fp32(): - np.random.seed(20) - x_np = np.random.randn(5, 3, 6, 3, 4, 5, 6).astype(np.float32) - dy_np = np.random.randn(5, 3, 6, 3, 4, 5, 6).astype(np.float32) - net = SoftplusNet() - grad = Grad(net) - output = grad(Tensor(x_np), Tensor(dy_np)) - expect = dy_np * np.exp(x_np) / (1 + np.exp(x_np)) - assert np.allclose(output[0].asnumpy(), expect, rtol=1e-2) diff --git a/tests/st/ops/cpu/test_softplus_op.py b/tests/st/ops/cpu/test_softplus_op.py index 87aada0feb8..19af2a20762 100644 --- a/tests/st/ops/cpu/test_softplus_op.py +++ b/tests/st/ops/cpu/test_softplus_op.py @@ -40,21 +40,7 @@ def SoftplusCompute(x): @pytest.mark.level0 @pytest.mark.platform_x86_cpu @pytest.mark.env_onecard -def test_softplus_0d_fp32(): - x_np = np.array(1.2, np.float32) - y_np = SoftplusCompute(x_np) - - x_ms = Tensor(x_np) - net = SoftplusNet() - y_ms = net(x_ms) - - assert np.allclose(y_np, y_ms.asnumpy()) - - -@pytest.mark.level0 -@pytest.mark.platform_x86_cpu -@pytest.mark.env_onecard -def test_softplus_1d_fp32(): +def test_softplus_1d(): x_np = np.random.random((50,)).astype(np.float32) y_np = SoftplusCompute(x_np) @@ -68,7 +54,7 @@ def test_softplus_1d_fp32(): @pytest.mark.level0 @pytest.mark.platform_x86_cpu @pytest.mark.env_onecard -def test_softplus_2d_fp32(): +def test_softplus_2d(): x_np = np.random.random((50, 40)).astype(np.float32) y_np = SoftplusCompute(x_np) @@ -82,7 +68,7 @@ def test_softplus_2d_fp32(): @pytest.mark.level0 @pytest.mark.platform_x86_cpu @pytest.mark.env_onecard -def test_softplus_4d_fp32(): +def test_softplus_4d(): x_np = np.random.random((32, 3, 224, 224)).astype(np.float32) y_np = SoftplusCompute(x_np) @@ -119,17 +105,3 @@ def test_softplus_4d_fp16(): y_ms = net(x_ms) assert np.allclose(y_np, y_ms.asnumpy(), rtol=5e-3) - - -@pytest.mark.level0 -@pytest.mark.platform_x86_cpu -@pytest.mark.env_onecard -def test_softplus_7d_fp32(): - x_np = np.random.random((32, 3, 20, 20, 20, 10, 10)).astype(np.float32) - y_np = SoftplusCompute(x_np) - - x_ms = Tensor(x_np) - net = SoftplusNet() - y_ms = net(x_ms) - - assert np.allclose(y_np, y_ms.asnumpy(), rtol=5e-3) diff --git a/tests/ut/cpp/CMakeLists.txt b/tests/ut/cpp/CMakeLists.txt index 58288960327..86d21eef618 100644 --- a/tests/ut/cpp/CMakeLists.txt +++ b/tests/ut/cpp/CMakeLists.txt @@ -68,6 +68,7 @@ if(ENABLE_MINDDATA) ./ps/*.cc ./fl/*.cc ./cxx_api/*.cc + ./tbe/*.cc ) if(NOT ENABLE_PYTHON) diff --git a/tests/ut/cpp/dataset/CMakeLists.txt b/tests/ut/cpp/dataset/CMakeLists.txt index 3fba58918d0..1f33a1b4e3f 100644 --- a/tests/ut/cpp/dataset/CMakeLists.txt +++ b/tests/ut/cpp/dataset/CMakeLists.txt @@ -12,6 +12,8 @@ SET(DE_UT_SRCS btree_test.cc buddy_test.cc build_vocab_test.cc + c_api_audio_a_to_q_test.cc + c_api_audio_r_to_z_test.cc c_api_cache_test.cc c_api_dataset_album_test.cc c_api_audio_a_to_q_test.cc @@ -20,6 +22,7 @@ SET(DE_UT_SRCS c_api_dataset_coco_test.cc c_api_dataset_config_test.cc c_api_dataset_csv_test.cc + c_api_dataset_flickr_test.cc c_api_dataset_iterator_test.cc c_api_dataset_manifest_test.cc c_api_dataset_minddata_test.cc diff --git a/tests/ut/cpp/dataset/c_api_audio_a_to_q_test.cc b/tests/ut/cpp/dataset/c_api_audio_a_to_q_test.cc index 7dc03926a17..089029ffd13 100644 --- a/tests/ut/cpp/dataset/c_api_audio_a_to_q_test.cc +++ b/tests/ut/cpp/dataset/c_api_audio_a_to_q_test.cc @@ -30,6 +30,65 @@ class MindDataTestPipeline : public UT::DatasetOpTesting { protected: }; +TEST_F(MindDataTestPipeline, TestAmplitudeToDBPipeline) { + MS_LOG(INFO) << "Basic Function Test"; + // Original waveform + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200})); + std::shared_ptr ds = RandomData(50, schema); + EXPECT_NE(ds, nullptr); + + ds = ds->SetNumWorkers(4); + EXPECT_NE(ds, nullptr); + + auto amplitude_to_db_op = audio::AmplitudeToDB(); + + ds = ds->Map({amplitude_to_db_op}); + EXPECT_NE(ds, nullptr); + + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(ds, nullptr); + + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + std::vector expected = {2, 200}; + + int i = 0; + while (row.size() != 0) { + auto col = row["inputData"]; + ASSERT_EQ(col.Shape(), expected); + ASSERT_EQ(col.Shape().size(), 2); + ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32); + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + EXPECT_EQ(i, 50); + + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestAmplitudeToDBWrongArgs) { + MS_LOG(INFO) << "Basic Function Test"; + // Original waveform + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200})); + std::shared_ptr ds = RandomData(50, schema); + EXPECT_NE(ds, nullptr); + + ds = ds->SetNumWorkers(4); + EXPECT_NE(ds, nullptr); + + auto amplitude_to_db_op = audio::AmplitudeToDB(ScaleType::kPower, 1.0, -1e-10, 80.0); + + ds = ds->Map({amplitude_to_db_op}); + EXPECT_NE(ds, nullptr); + + std::shared_ptr iter = ds->CreateIterator(); + // Expect failure + EXPECT_EQ(iter, nullptr); +} + TEST_F(MindDataTestPipeline, Level0_TestBandBiquad001) { MS_LOG(INFO) << "Basic Function Test"; // Original waveform @@ -96,4 +155,335 @@ TEST_F(MindDataTestPipeline, Level0_TestBandBiquad002) { std::shared_ptr iter02 = ds02->CreateIterator(); EXPECT_EQ(iter02, nullptr); -} \ No newline at end of file +} + +TEST_F(MindDataTestPipeline, Level0_TestAllpassBiquad001) { + MS_LOG(INFO) << "Basic Function Test"; + // Original waveform + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200})); + std::shared_ptr ds = RandomData(50, schema); + EXPECT_NE(ds, nullptr); + + ds = ds->SetNumWorkers(4); + EXPECT_NE(ds, nullptr); + + auto AllpassBiquadOp = audio::AllpassBiquad(44100, 200.0); + + ds = ds->Map({AllpassBiquadOp}); + EXPECT_NE(ds, nullptr); + + // Filtered waveform by allpassbiquad + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(ds, nullptr); + + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + std::vector expected = {2, 200}; + + int i = 0; + while (row.size() != 0) { + auto col = row["inputData"]; + ASSERT_EQ(col.Shape(), expected); + ASSERT_EQ(col.Shape().size(), 2); + ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32); + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + EXPECT_EQ(i, 50); + + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, Level0_TestAllpassBiquad002) { + MS_LOG(INFO) << "Wrong Arg."; + std::shared_ptr schema = Schema(); + // Original waveform + ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 2})); + std::shared_ptr ds = RandomData(50, schema); + std::shared_ptr ds01; + std::shared_ptr ds02; + EXPECT_NE(ds, nullptr); + + // Check sample_rate + MS_LOG(INFO) << "Sample_rate_ is zero."; + auto allpass_biquad_op_01 = audio::AllpassBiquad(0, 200.0, 0.707); + ds01 = ds->Map({allpass_biquad_op_01}); + EXPECT_NE(ds01, nullptr); + + std::shared_ptr iter01 = ds01->CreateIterator(); + EXPECT_EQ(iter01, nullptr); + + // Check Q_ + MS_LOG(INFO) << "Q_ is zero."; + auto allpass_biquad_op_02 = audio::AllpassBiquad(44100, 200, 0); + ds02 = ds->Map({allpass_biquad_op_02}); + EXPECT_NE(ds02, nullptr); + + std::shared_ptr iter02 = ds02->CreateIterator(); + EXPECT_EQ(iter02, nullptr); +} + +TEST_F(MindDataTestPipeline, Level0_TestBandpassBiquad001) { + MS_LOG(INFO) << "Basic Function Test"; + // Original waveform + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200})); + std::shared_ptr ds = RandomData(50, schema); + EXPECT_NE(ds, nullptr); + + ds = ds->SetNumWorkers(4); + EXPECT_NE(ds, nullptr); + + auto BandpassBiquadOp = audio::BandpassBiquad(44100, 200.0); + + ds = ds->Map({BandpassBiquadOp}); + EXPECT_NE(ds, nullptr); + + // Filtered waveform by bandpassbiquad + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(ds, nullptr); + + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + std::vector expected = {2, 200}; + + int i = 0; + while (row.size() != 0) { + auto col = row["inputData"]; + ASSERT_EQ(col.Shape(), expected); + ASSERT_EQ(col.Shape().size(), 2); + ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32); + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + EXPECT_EQ(i, 50); + + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, Level0_TestBandpassBiquad002) { + MS_LOG(INFO) << "Wrong Arg."; + std::shared_ptr schema = Schema(); + // Original waveform + ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 2})); + std::shared_ptr ds = RandomData(50, schema); + std::shared_ptr ds01; + std::shared_ptr ds02; + EXPECT_NE(ds, nullptr); + + // Check sample_rate + MS_LOG(INFO) << "sample_rate is zero."; + auto bandpass_biquad_op_01 = audio::BandpassBiquad(0, 200); + ds01 = ds->Map({bandpass_biquad_op_01}); + EXPECT_NE(ds01, nullptr); + + std::shared_ptr iter01 = ds01->CreateIterator(); + EXPECT_EQ(iter01, nullptr); + + // Check Q_ + MS_LOG(INFO) << "Q_ is zero."; + auto bandpass_biquad_op_02 = audio::BandpassBiquad(44100, 200, 0); + ds02 = ds->Map({bandpass_biquad_op_02}); + EXPECT_NE(ds02, nullptr); + + std::shared_ptr iter02 = ds02->CreateIterator(); + EXPECT_EQ(iter02, nullptr); +} + +TEST_F(MindDataTestPipeline, Level0_TestBandrejectBiquad001) { + MS_LOG(INFO) << "Basic Function Test"; + // Original waveform + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200})); + std::shared_ptr ds = RandomData(50, schema); + EXPECT_NE(ds, nullptr); + + ds = ds->SetNumWorkers(4); + EXPECT_NE(ds, nullptr); + + auto BandrejectBiquadOp = audio::BandrejectBiquad(44100, 200.0); + + ds = ds->Map({BandrejectBiquadOp}); + EXPECT_NE(ds, nullptr); + + // Filtered waveform by bandrejectbiquad + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(ds, nullptr); + + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + std::vector expected = {2, 200}; + + int i = 0; + while (row.size() != 0) { + auto col = row["inputData"]; + ASSERT_EQ(col.Shape(), expected); + ASSERT_EQ(col.Shape().size(), 2); + ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32); + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + EXPECT_EQ(i, 50); + + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, Level0_TestBandrejectBiquad002) { + MS_LOG(INFO) << "Wrong Arg."; + std::shared_ptr schema = Schema(); + // Original waveform + ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 2})); + std::shared_ptr ds = RandomData(50, schema); + std::shared_ptr ds01; + std::shared_ptr ds02; + EXPECT_NE(ds, nullptr); + + // Check sample_rate + MS_LOG(INFO) << "sample_rate is zero."; + auto bandreject_biquad_op_01 = audio::BandrejectBiquad(0, 200); + ds01 = ds->Map({bandreject_biquad_op_01}); + EXPECT_NE(ds01, nullptr); + + std::shared_ptr iter01 = ds01->CreateIterator(); + EXPECT_EQ(iter01, nullptr); + + // Check Q_ + MS_LOG(INFO) << "Q_ is zero."; + auto bandreject_biquad_op_02 = audio::BandrejectBiquad(44100, 200, 0); + ds02 = ds->Map({bandreject_biquad_op_02}); + EXPECT_NE(ds02, nullptr); + + std::shared_ptr iter02 = ds02->CreateIterator(); + EXPECT_EQ(iter02, nullptr); +} + +TEST_F(MindDataTestPipeline, Level0_TestBassBiquad001) { + MS_LOG(INFO) << "Basic Function Test"; + // Original waveform + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200})); + std::shared_ptr ds = RandomData(50, schema); + EXPECT_NE(ds, nullptr); + + ds = ds->SetNumWorkers(4); + EXPECT_NE(ds, nullptr); + + auto BassBiquadOp = audio::BassBiquad(44100, 50, 200.0); + + ds = ds->Map({BassBiquadOp}); + EXPECT_NE(ds, nullptr); + + // Filtered waveform by bassbiquad + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(ds, nullptr); + + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + std::vector expected = {2, 200}; + + int i = 0; + while (row.size() != 0) { + auto col = row["inputData"]; + ASSERT_EQ(col.Shape(), expected); + ASSERT_EQ(col.Shape().size(), 2); + ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32); + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + EXPECT_EQ(i, 50); + + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, Level0_TestBassBiquad002) { + MS_LOG(INFO) << "Wrong Arg."; + std::shared_ptr schema = Schema(); + // Original waveform + ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 2})); + std::shared_ptr ds = RandomData(50, schema); + std::shared_ptr ds01; + std::shared_ptr ds02; + EXPECT_NE(ds, nullptr); + + // Check sample_rate + MS_LOG(INFO) << "sample_rate is zero."; + auto bass_biquad_op_01 = audio::BassBiquad(0, 50, 200.0); + ds01 = ds->Map({bass_biquad_op_01}); + EXPECT_NE(ds01, nullptr); + + std::shared_ptr iter01 = ds01->CreateIterator(); + EXPECT_EQ(iter01, nullptr); + + // Check Q_ + MS_LOG(INFO) << "Q_ is zero."; + auto bass_biquad_op_02 = audio::BassBiquad(44100, 50, 200.0, 0); + ds02 = ds->Map({bass_biquad_op_02}); + EXPECT_NE(ds02, nullptr); + + std::shared_ptr iter02 = ds02->CreateIterator(); + EXPECT_EQ(iter02, nullptr); +} + +TEST_F(MindDataTestPipeline, TestAnglePipeline) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAnglePipeline"; + + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("complex", mindspore::DataType::kNumberTypeFloat32, {2, 2})); + std::shared_ptr ds = RandomData(50, schema); + EXPECT_NE(ds, nullptr); + + ds = ds->SetNumWorkers(4); + EXPECT_NE(ds, nullptr); + + auto angle_op = audio::Angle(); + + ds = ds->Map({angle_op}); + EXPECT_NE(ds, nullptr); + + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(ds, nullptr); + + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + std::vector expected = {2}; + + int i = 0; + while (row.size() != 0) { + auto col = row["complex"]; + ASSERT_EQ(col.Shape(), expected); + ASSERT_EQ(col.Shape().size(), 1); + ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32); + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + EXPECT_EQ(i, 50); + + iter->Stop(); +} + +TEST_F(MindDataTestPipeline, TestAnglePipelineError) { + MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAnglePipelineError"; + + std::shared_ptr schema = Schema(); + ASSERT_OK(schema->add_column("complex", mindspore::DataType::kNumberTypeFloat32, {3, 2, 1})); + std::shared_ptr ds = RandomData(4, schema); + EXPECT_NE(ds, nullptr); + + ds = ds->SetNumWorkers(4); + EXPECT_NE(ds, nullptr); + + auto angle_op = audio::Angle(); + + ds = ds->Map({angle_op}); + EXPECT_NE(ds, nullptr); + + std::shared_ptr iter = ds->CreateIterator(); + std::unordered_map row; + EXPECT_ERROR(iter->GetNextRow(&row)); +} diff --git a/tests/ut/cpp/dataset/c_api_audio_r_to_z_test.cc b/tests/ut/cpp/dataset/c_api_audio_r_to_z_test.cc index cc833a53654..902f906a5c2 100644 --- a/tests/ut/cpp/dataset/c_api_audio_r_to_z_test.cc +++ b/tests/ut/cpp/dataset/c_api_audio_r_to_z_test.cc @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include "common/common.h" #include "minddata/dataset/core/tensor.h" #include "minddata/dataset/include/dataset/datasets.h" @@ -24,73 +23,12 @@ using mindspore::LogStream; using mindspore::ExceptionType::NoExceptionType; using mindspore::MsLogLevel::INFO; -class MindDataTestPipeline : public UT::DatasetOpTesting { - protected: +class MindDataTestPipeline : public UT::Common { + public: }; -TEST_F(MindDataTestPipeline, TestTimeMaskingPipeline) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTimeMaskingPipeline."; - // Original waveform - std::shared_ptr schema = Schema(); - ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 200})); - std::shared_ptr ds = RandomData(50, schema); - EXPECT_NE(ds, nullptr); - - ds = ds->SetNumWorkers(4); - EXPECT_NE(ds, nullptr); - - auto timemasking = audio::TimeMasking(true, 6); - - ds = ds->Map({timemasking}); - EXPECT_NE(ds, nullptr); - - // Filtered waveform by bandbiquad - std::shared_ptr iter = ds->CreateIterator(); - EXPECT_NE(ds, nullptr); - - std::unordered_map row; - ASSERT_OK(iter->GetNextRow(&row)); - - std::vector expected = {2, 200}; - - int i = 0; - while (row.size() != 0) { - auto col = row["inputData"]; - ASSERT_EQ(col.Shape(), expected); - ASSERT_EQ(col.Shape().size(), 2); - ASSERT_EQ(col.DataType(), mindspore::DataType::kNumberTypeFloat32); - ASSERT_OK(iter->GetNextRow(&row)); - i++; - } - EXPECT_EQ(i, 50); - - iter->Stop(); -} - -TEST_F(MindDataTestPipeline, TestTimeMaskingWrongArgs) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTimeMaskingWrongArgs."; - // Original waveform - std::shared_ptr schema = Schema(); - ASSERT_OK(schema->add_column("inputData", mindspore::DataType::kNumberTypeFloat32, {2, 20})); - std::shared_ptr ds = RandomData(50, schema); - EXPECT_NE(ds, nullptr); - - ds = ds->SetNumWorkers(4); - EXPECT_NE(ds, nullptr); - - auto timemasking = audio::TimeMasking(true, -100); - - ds = ds->Map({timemasking}); - EXPECT_NE(ds, nullptr); - - // Filtered waveform by bandbiquad - std::shared_ptr iter = ds->CreateIterator(); - // Expect failure - EXPECT_EQ(iter, nullptr); -} - TEST_F(MindDataTestPipeline, TestTimeStretchPipeline) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTimeStretchPipeline."; + MS_LOG(INFO) << "Doing test TimeStretchOp with custom param value. Pipeline."; // op param int freq = 1025; int hop_length = 512; @@ -116,7 +54,7 @@ TEST_F(MindDataTestPipeline, TestTimeStretchPipeline) { std::unordered_map row; ASSERT_OK(iter->GetNextRow(&row)); - std::vector expected = {2, freq, static_cast(std::ceil(400 / rate)), 2}; + std::vector expected = {2, freq, int(std::ceil(400 / rate)), 2}; int i = 0; while (row.size() != 0) { @@ -132,7 +70,7 @@ TEST_F(MindDataTestPipeline, TestTimeStretchPipeline) { } TEST_F(MindDataTestPipeline, TestTimeStretchPipelineWrongArgs) { - MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTimeStretchPipelineWrongArgs."; + MS_LOG(INFO) << "Doing test TimeStretchOp with wrong param value. Pipeline."; // op param int freq = 1025; int hop_length = 512; @@ -155,4 +93,4 @@ TEST_F(MindDataTestPipeline, TestTimeStretchPipelineWrongArgs) { std::shared_ptr iter = ds->CreateIterator(); // Expect failure EXPECT_EQ(iter, nullptr); -} +} \ No newline at end of file diff --git a/tests/ut/cpp/dataset/c_api_vision_a_to_q_test.cc b/tests/ut/cpp/dataset/c_api_vision_a_to_q_test.cc index 33149fd8520..0647ae7bdd0 100644 --- a/tests/ut/cpp/dataset/c_api_vision_a_to_q_test.cc +++ b/tests/ut/cpp/dataset/c_api_vision_a_to_q_test.cc @@ -27,6 +27,102 @@ class MindDataTestPipeline : public UT::DatasetOpTesting { // Tests for vision C++ API A to Q TensorTransform Operations (in alphabetical order) +TEST_F(MindDataTestPipeline, TestAdjustGammaSuccess1) { + // pipeline 3-channel + MS_LOG(INFO) << "Pipeline Test."; + std::string MindDataPath = "data/dataset"; + std::string folder_path = MindDataPath + "/testImageNetData/train/"; + std::shared_ptr ds1 = ImageFolder(folder_path, true, std::make_shared(false, 2)); + EXPECT_NE(ds1, nullptr); + std::shared_ptr ds2 = ImageFolder(folder_path, true, std::make_shared(false, 2)); + EXPECT_NE(ds2, nullptr); + + auto adjustgamma_op = vision::AdjustGamma(10.0); + + ds1 = ds1->Map({adjustgamma_op}); + EXPECT_NE(ds1, nullptr); + + std::shared_ptr iter1 = ds1->CreateIterator(); + EXPECT_NE(iter1, nullptr); + std::unordered_map row1; + iter1->GetNextRow(&row1); + + std::shared_ptr iter2 = ds2->CreateIterator(); + EXPECT_NE(iter2, nullptr); + std::unordered_map row2; + iter2->GetNextRow(&row2); + + uint64_t i = 0; + while (row1.size() != 0) { + i++; + auto image = row1["image"]; + iter1->GetNextRow(&row1); + iter2->GetNextRow(&row2); + } + EXPECT_EQ(i, 2); + + iter1->Stop(); + iter2->Stop(); +} + +TEST_F(MindDataTestPipeline, TestAdjustGammaSuccess2) { + // pipeline 1-channel + MS_LOG(INFO) << "Pipeline Test."; + std::string MindDataPath = "data/dataset"; + std::string folder_path = MindDataPath + "/testImageNetData/train/"; + std::shared_ptr ds1 = ImageFolder(folder_path, true, std::make_shared(false, 2)); + EXPECT_NE(ds1, nullptr); + std::shared_ptr ds2 = ImageFolder(folder_path, true, std::make_shared(false, 2)); + EXPECT_NE(ds2, nullptr); + + auto adjustgamma_op = vision::AdjustGamma(10.0); + auto rgb2gray_op = vision::RGB2GRAY(); + + ds1 = ds1->Map({rgb2gray_op, adjustgamma_op}); + EXPECT_NE(ds1, nullptr); + + std::shared_ptr iter1 = ds1->CreateIterator(); + EXPECT_NE(iter1, nullptr); + std::unordered_map row1; + iter1->GetNextRow(&row1); + + std::shared_ptr iter2 = ds2->CreateIterator(); + EXPECT_NE(iter2, nullptr); + std::unordered_map row2; + iter2->GetNextRow(&row2); + + uint64_t i = 0; + while (row1.size() != 0) { + i++; + auto image = row1["image"]; + iter1->GetNextRow(&row1); + iter2->GetNextRow(&row2); + } + EXPECT_EQ(i, 2); + + iter1->Stop(); + iter2->Stop(); +} + +TEST_F(MindDataTestPipeline, TestAdjustGammaParamCheck) { + // pipeline 3-channel + MS_LOG(INFO) << "Pipeline Test."; + std::string MindDataPath = "data/dataset"; + std::string folder_path = MindDataPath + "/testImageNetData/train/"; + std::shared_ptr ds = ImageFolder(folder_path, true, std::make_shared(false, 2)); + EXPECT_NE(ds, nullptr); + + // Case 1: Negative gamma + // Create objects for the tensor ops + std::shared_ptr adjust_gamma(new vision::AdjustGamma(-1, 1.0)); + auto ds1 = ds->Map({adjust_gamma}); + EXPECT_NE(ds1, nullptr); + // Create an iterator over the result of the above dataset + std::shared_ptr iter1 = ds1->CreateIterator(); + // Expect failure: invalid value of AdjustGamma + EXPECT_EQ(iter1, nullptr); +} + TEST_F(MindDataTestPipeline, TestAutoContrastSuccess1) { MS_LOG(INFO) << "Doing MindDataTestPipeline-TestAutoContrastSuccess1."; diff --git a/tests/ut/cpp/dataset/cmu_arctic_test.cc b/tests/ut/cpp/dataset/cmu_arctic_test.cc new file mode 100644 index 00000000000..f799ebc897e --- /dev/null +++ b/tests/ut/cpp/dataset/cmu_arctic_test.cc @@ -0,0 +1,145 @@ +/** + * Copyright 2019-2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include + + +#include +#include +#include + +#include "utils/ms_utils.h" +#include "common/common.h" +#include "minddata/dataset/core/client.h" +#include "minddata/dataset/core/global_context.h" +#include "minddata/dataset/engine/datasetops/source/cmu_arctic_op.h" +#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h" +#include "minddata/dataset/engine/datasetops/source/sampler/pk_sampler.h" +#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h" +#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h" +#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h" +#include "minddata/dataset/engine/datasetops/source/sampler/subset_random_sampler.h" +#include "minddata/dataset/engine/datasetops/source/sampler/weighted_random_sampler.h" +#include "minddata/dataset/include/dataset/datasets.h" +#include "minddata/dataset/util/path.h" +#include "minddata/dataset/util/status.h" +#include "gtest/gtest.h" +#include "utils/log_adapter.h" +#include "securec.h" + +namespace common = mindspore::common; +using namespace mindspore::dataset; +using mindspore::LogStream; +using mindspore::ExceptionType::NoExceptionType; +using mindspore::MsLogLevel::ERROR; + +std::shared_ptr Repeat(int repeat_cnt); + +std::shared_ptr Build(std::vector> ops); + +class MindDataTestCmuArcticSampler : public UT::DatasetOpTesting { + protected: +}; + +TEST_F(MindDataTestCmuArcticSampler, TestSequentialCmuArcticWithRepeat) { + std::string folder_path = datasets_root_path_ + "/testCmuArcticData/"; + int64_t num_samples = 10; + int64_t start_index = 0; + std::shared_ptr ds = + CmuArctic(folder_path, "aew", std::make_shared(start_index, num_samples)); + EXPECT_NE(ds, nullptr); + ds = ds->Repeat(2); + EXPECT_NE(ds, nullptr); + std::shared_ptr iter = ds->CreateIterator(); + EXPECT_NE(iter, nullptr); + std::unordered_map row; + ASSERT_OK(iter->GetNextRow(&row)); + + std::string_view utterance; + std::string_view utterance_id; + uint32_t rate; + + uint64_t i = 0; + while (row.size() != 0) { + + auto waveform = row["waveform"]; + auto sample_rate = row["sample_rate"]; + auto utterance_ = row["utterance"]; + auto utterance_id_ = row["utterance_id"]; + + MS_LOG(ERROR) << "Tensor image shape: " << waveform.Shape(); + + std::shared_ptr t_rate; + ASSERT_OK(Tensor::CreateFromMSTensor(sample_rate, &t_rate)); + ASSERT_OK(t_rate->GetItemAt(&rate, {})); + MS_LOG(ERROR) << "Tensor rate: " << rate; + + std::shared_ptr t_utterance; + ASSERT_OK(Tensor::CreateFromMSTensor(utterance_, &t_utterance)); + ASSERT_OK(t_utterance->GetItemAt(&utterance, {})); + MS_LOG(ERROR) << "Tensor utterance value: " << utterance; + + std::shared_ptr t_utterance_id; + ASSERT_OK(Tensor::CreateFromMSTensor(utterance_id_, &t_utterance_id)); + ASSERT_OK(t_utterance_id->GetItemAt(&utterance_id, {})); + MS_LOG(ERROR) << "Tensor utterance_id value: " << utterance_id; + + + ASSERT_OK(iter->GetNextRow(&row)); + i++; + } + + EXPECT_EQ(i, 20); + iter->Stop(); +} + +// TEST_F(MindDataTestMnistSampler, TestSequentialImageFolderWithRepeatBatch) { +// std::string folder_path = datasets_root_path_ + "/testMnistData/"; +// int64_t num_samples = 10; +// int64_t start_index = 0; +// std::shared_ptr ds = +// Mnist(folder_path, "all", std::make_shared(start_index, num_samples)); +// EXPECT_NE(ds, nullptr); +// ds = ds->Repeat(2); +// EXPECT_NE(ds, nullptr); +// ds = ds->Batch(5); +// EXPECT_NE(ds, nullptr); +// std::shared_ptr iter = ds->CreateIterator(); +// EXPECT_NE(iter, nullptr); +// std::vector> expected = {{0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}}; +// std::unordered_map row; +// ASSERT_OK(iter->GetNextRow(&row)); +// uint64_t i = 0; +// while (row.size() != 0) { +// auto image = row["image"]; +// auto label = row["label"]; +// MS_LOG(INFO) << "Tensor image shape: " << image.Shape(); +// TEST_MS_LOG_MSTENSOR(INFO, "Tensor label: ", label); +// std::shared_ptr de_expected_label; +// ASSERT_OK(Tensor::CreateFromVector(expected[i % 4], &de_expected_label)); +// mindspore::MSTensor expected_label = +// mindspore::MSTensor(std::make_shared(de_expected_label)); +// EXPECT_MSTENSOR_EQ(label, expected_label); +// ASSERT_OK(iter->GetNextRow(&row)); +// i++; +// } +// EXPECT_EQ(i, 4); +// iter->Stop(); +// } + + diff --git a/tests/ut/cpp/dataset/common/bboxop_common.cc b/tests/ut/cpp/dataset/common/bboxop_common.cc index 0b612a653e7..70c794856c0 100644 --- a/tests/ut/cpp/dataset/common/bboxop_common.cc +++ b/tests/ut/cpp/dataset/common/bboxop_common.cc @@ -164,8 +164,8 @@ void BBoxOpCommon::CompareActualAndExpected(const std::string &op_name) { EXPECT_TRUE(remove(actual_path.c_str()) == 0); // compare using ==operator by Tensor std::shared_ptr expect_img_t, actual_img_t; - CVTensor::CreateFromMat(expect_img, &expect_img_t); - CVTensor::CreateFromMat(actual_img, &actual_img_t); + CVTensor::CreateFromMat(expect_img, 3, &expect_img_t); + CVTensor::CreateFromMat(actual_img, 3, &actual_img_t); if (actual_img.data) { EXPECT_EQ(*expect_img_t == *actual_img_t, true); } else { diff --git a/tests/ut/cpp/dataset/common/cvop_common.cc b/tests/ut/cpp/dataset/common/cvop_common.cc index adddb1ad41d..ec2016bf543 100644 --- a/tests/ut/cpp/dataset/common/cvop_common.cc +++ b/tests/ut/cpp/dataset/common/cvop_common.cc @@ -55,7 +55,7 @@ void CVOpCommon::GetInputImage(std::string filename) { Tensor::CreateFromFile(filename, &raw_input_tensor_); raw_cv_image_ = cv::imread(filename, cv::ImreadModes::IMREAD_COLOR); std::shared_ptr input_cv_tensor; - CVTensor::CreateFromMat(raw_cv_image_, &input_cv_tensor); + CVTensor::CreateFromMat(raw_cv_image_, 3, &input_cv_tensor); input_tensor_ = std::dynamic_pointer_cast(input_cv_tensor); SwapRedAndBlue(input_tensor_, &input_tensor_); if (raw_cv_image_.data) { @@ -134,6 +134,10 @@ void CVOpCommon::CheckImageShapeAndData(const std::shared_ptr &output_te expect_image_path = dir_path + "imagefolder/apple_expect_randomaffine.jpg"; actual_image_path = dir_path + "imagefolder/apple_actual_randomaffine.jpg"; break; + case kAdjustGamma: + expect_image_path = dir_path + "imagefolder/apple_expect_adjustgamma.png"; + actual_image_path = dir_path + "imagefolder/apple_actual_adjustgamma.png"; + break; case kAutoContrast: expect_image_path = dir_path + "imagefolder/apple_expect_autocontrast.jpg"; actual_image_path = dir_path + "imagefolder/apple_actual_autocontrast.jpg"; diff --git a/tests/ut/cpp/dataset/common/cvop_common.h b/tests/ut/cpp/dataset/common/cvop_common.h index 5dbb5ea98cd..1effc6360af 100644 --- a/tests/ut/cpp/dataset/common/cvop_common.h +++ b/tests/ut/cpp/dataset/common/cvop_common.h @@ -44,6 +44,7 @@ class CVOpCommon : public Common { kRandomAffine, kRandomPosterize, kAutoContrast, + kAdjustGamma, kEqualize }; diff --git a/tests/ut/cpp/dataset/deserialize_test.cc b/tests/ut/cpp/dataset/deserialize_test.cc index b333660171c..61b8ada1371 100644 --- a/tests/ut/cpp/dataset/deserialize_test.cc +++ b/tests/ut/cpp/dataset/deserialize_test.cc @@ -13,13 +13,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include #include "common/common.h" #include "minddata/dataset/core/global_context.h" #include "minddata/dataset/engine/serdes.h" #include "minddata/dataset/include/dataset/datasets.h" #include "minddata/dataset/include/dataset/vision.h" #include "minddata/dataset/include/dataset/transforms.h" +#include "minddata/dataset/kernels/ir/data/transforms_ir.h" using namespace mindspore::dataset; using mindspore::dataset::DatasetNode; @@ -33,14 +33,15 @@ class MindDataTestDeserialize : public UT::DatasetOpTesting { void compare_dataset(std::shared_ptr ds) { nlohmann::json out_json; - std::make_shared()->SaveToJSON(ds, "dataset_pipeline.json", &out_json); + ASSERT_OK(Serdes::SaveToJSON(ds, "dataset_pipeline.json", &out_json)); // output the deserialized out_json to ds1 and then out_json1 std::shared_ptr ds1; ASSERT_OK(Serdes::Deserialize("dataset_pipeline.json", &ds1)); EXPECT_NE(ds1, nullptr); + // check original and deserialized dataset are the same nlohmann::json out_json1; - std::make_shared()->SaveToJSON(ds1, "dataset_pipeline_1.json", &out_json1); + ASSERT_OK(Serdes::SaveToJSON(ds1, "dataset_pipeline_1.json", &out_json1)); std::stringstream json_ss; json_ss << out_json; std::stringstream json_ss1; @@ -305,6 +306,21 @@ TEST_F(MindDataTestDeserialize, TestDeserializeManifest) { std::shared_ptr cache = nullptr; std::shared_ptr ds = std::make_shared(data_file, "train", sampler, class_indexing, false, cache); + std::vector coordinates = {50, 50}; + std::vector size = {224, 224}; + std::shared_ptr operation1 = std::make_shared(coordinates, size); + std::shared_ptr operation2 = std::make_shared(); + std::shared_ptr operation3 = std::make_shared(); + std::shared_ptr operation4 = + std::make_shared(5, 5, SliceMode::kDrop, 1); + std::shared_ptr operation5 = std::make_shared(); + std::vector> operations; + operations.push_back(operation1); + operations.push_back(operation2); + operations.push_back(operation3); + operations.push_back(operation4); + operations.push_back(operation5); + ds = std::make_shared(ds, operations); ds = std::make_shared(ds, 2, false); compare_dataset(ds); } @@ -433,4 +449,36 @@ TEST_F(MindDataTestDeserialize, TestDeserializeInvalidJson) { // check the invalid json object would return error ASSERT_ERROR(Serdes::Deserialize("./data/dataset/testDataset1/datasetTestInvalidJson.json", &ds)); EXPECT_EQ(ds, nullptr); -} \ No newline at end of file +} + +TEST_F(MindDataTestDeserialize, TestDeserializeFill) { + MS_LOG(INFO) << "Doing MindDataTestDeserialize-Fill."; + std::vector dataset_files = {"./data/dataset/testTextFileDataset/1.txt"}; + std::shared_ptr cache = nullptr; + std::shared_ptr ds = std::make_shared(dataset_files, 2, ShuffleMode::kFiles, 1, 0, cache); + std::shared_ptr fill_value; + ASSERT_OK(Tensor::CreateScalar(true, &fill_value)); + std::shared_ptr operation1 = std::make_shared(fill_value); + std::shared_ptr operation2 = std::make_shared("int32_t"); + std::vector> ops = {operation1, operation2}; + ds = std::make_shared(ds, ops); + compare_dataset(ds); +} + +TEST_F(MindDataTestDeserialize, TestDeserializeTensor) { + MS_LOG(INFO) << "Doing MindDataTestDeserialize-Tensor."; + std::shared_ptr test_tensor; + std::vector input = {1.1, 0.2, 0.3, 0.4, 0.5, 0.6, 1.2, 0.7, 0.8, 0.9, 1.0, 2.0, 1.3, 3.0, 4.0}; + ASSERT_OK(Tensor::CreateFromVector(input, TensorShape{3, 5}, &test_tensor)); + nlohmann::json json_obj; + ASSERT_OK(test_tensor->to_json(&json_obj)); + std::shared_ptr test_tensor1; + ASSERT_OK(Tensor::from_json(json_obj, &test_tensor1)); + nlohmann::json json_obj1; + ASSERT_OK(test_tensor1->to_json(&json_obj1)); + std::stringstream json_ss; + json_ss << json_obj; + std::stringstream json_ss1; + json_ss1 << json_obj1; + EXPECT_EQ(json_ss.str(), json_ss1.str()); +} diff --git a/tests/ut/cpp/dataset/execute_test.cc b/tests/ut/cpp/dataset/execute_test.cc index 19654c3c816..c7069a5b2f2 100644 --- a/tests/ut/cpp/dataset/execute_test.cc +++ b/tests/ut/cpp/dataset/execute_test.cc @@ -19,7 +19,9 @@ #include "minddata/dataset/include/dataset/audio.h" #include "minddata/dataset/include/dataset/execute.h" #include "minddata/dataset/include/dataset/transforms.h" +#include "minddata/dataset/include/dataset/audio.h" #include "minddata/dataset/include/dataset/vision.h" +#include "minddata/dataset/include/dataset/audio.h" #include "minddata/dataset/include/dataset/text.h" #include "utils/log_adapter.h" @@ -32,6 +34,132 @@ class MindDataTestExecute : public UT::DatasetOpTesting { protected: }; +TEST_F(MindDataTestExecute, TestAllpassBiquadWithEager) { + MS_LOG(INFO) << "Basic Function Test With Eager."; + // Original waveform + std::vector labels = { + 2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02, + 1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02, + 1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02, + 1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02, + 1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03}; + std::shared_ptr input; + ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 10}), &input)); + auto input_02 = mindspore::MSTensor(std::make_shared(input)); + std::shared_ptr allpass_biquad_01 = std::make_shared(44100, 200); + mindspore::dataset::Execute Transform01({allpass_biquad_01}); + // Filtered waveform by allpassbiquad + Status s01 = Transform01(input_02, &input_02); + EXPECT_TRUE(s01.IsOk()); +} + +TEST_F(MindDataTestExecute, TestAllpassBiquadWithWrongArg) { + MS_LOG(INFO) << "Wrong Arg."; + std::vector labels = { + 2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02, + 1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02, + 1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02, + 1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02, + 1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03}; + std::shared_ptr input; + ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 10}), &input)); + auto input_02 = mindspore::MSTensor(std::make_shared(input)); + // Check Q + MS_LOG(INFO) << "Q is zero."; + std::shared_ptr allpass_biquad_op = std::make_shared(44100, 200, 0); + mindspore::dataset::Execute Transform01({allpass_biquad_op}); + Status s01 = Transform01(input_02, &input_02); + EXPECT_FALSE(s01.IsOk()); +} + +TEST_F(MindDataTestExecute, TestAdjustGammaEager1) { + // 3-channel eager + MS_LOG(INFO) << "3-channel image test"; + // Read images + auto image = ReadFileToTensor("data/dataset/apple.jpg"); + + // Transform params + auto decode = vision::Decode(); + auto adjust_gamma_op = vision::AdjustGamma(0.1, 1.0); + + auto transform = Execute({decode, adjust_gamma_op}); + Status rc = transform(image, &image); + EXPECT_EQ(rc, Status::OK()); +} + +TEST_F(MindDataTestExecute, TestAdjustGammaEager2) { + // 1-channel eager + MS_LOG(INFO) << "1-channel image test"; + auto m1 = ReadFileToTensor("data/dataset/apple.jpg"); + // Transform params + auto decode = vision::Decode(); + auto rgb2gray = vision::RGB2GRAY(); + auto adjust_gamma_op = vision::AdjustGamma(0.1, 1.0); + + auto transform = Execute({decode, rgb2gray, adjust_gamma_op}); + Status rc = transform(m1, &m1); + EXPECT_EQ(rc, Status::OK()); +} + +TEST_F(MindDataTestExecute, TestAmplitudeToDB) { + MS_LOG(INFO) << "Basic Function Test With Eager."; + // Original waveform + std::vector labels = { + 2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02, + 1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02, + 1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02, + 1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02, + 1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03, + 1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03}; + std::shared_ptr input; + ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 2, 2, 3}), &input)); + auto input_ms = mindspore::MSTensor(std::make_shared(input)); + std::shared_ptr amplitude_to_db_op = std::make_shared(); + // apply amplitude_to_db + mindspore::dataset::Execute trans({amplitude_to_db_op}); + Status status = trans(input_ms, &input_ms); + EXPECT_TRUE(status.IsOk()); +} + +TEST_F(MindDataTestExecute, TestAmplitudeToDBWrongArgs) { + MS_LOG(INFO) << "Wrong Arg."; + // Original waveform + std::vector labels = { + 2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02, + 1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02, + 1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02, + 1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02, + 1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03}; + std::shared_ptr input; + ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 10}), &input)); + auto input_ms = mindspore::MSTensor(std::make_shared(input)); + std::shared_ptr amplitude_to_db_op = + std::make_shared(ScaleType::kPower, 1.0, -1e-10, 80.0); + // apply amplitude_to_db + mindspore::dataset::Execute trans({amplitude_to_db_op}); + Status status = trans(input_ms, &input_ms); + EXPECT_FALSE(status.IsOk()); +} + +TEST_F(MindDataTestExecute, TestAmplitudeToDBWrongInput) { + MS_LOG(INFO) << "Wrong Input."; + // Original waveform + std::vector labels = { + 2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02, + 1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02, + 1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02, + 1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02, + 1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03}; + std::shared_ptr input; + ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({20}), &input)); + auto input_ms = mindspore::MSTensor(std::make_shared(input)); + std::shared_ptr amplitude_to_db_op = std::make_shared(); + // apply amplitude_to_db + mindspore::dataset::Execute trans({amplitude_to_db_op}); + Status status = trans(input_ms, &input_ms); + EXPECT_FALSE(status.IsOk()); +} + TEST_F(MindDataTestExecute, TestComposeTransforms) { MS_LOG(INFO) << "Doing TestComposeTransforms."; @@ -69,6 +197,65 @@ TEST_F(MindDataTestExecute, TestCrop) { EXPECT_EQ(image.Shape()[1], 15); } +TEST_F(MindDataTestExecute, TestTimeStretchEager) { + MS_LOG(INFO) << "Doing test TimeStretchOp with custom param value. Eager."; + std::shared_ptr input_tensor_; + // op param + int freq = 4; + int hop_length = 20; + float rate = 1.3; + int frame_num = 10; + // create tensor + TensorShape s = TensorShape({2, freq, frame_num, 2}); + // init input vec + std::vector input_vec(2 * freq * frame_num * 2); + for (int ind = 0; ind < input_vec.size(); ind++) { + input_vec[ind] = std::rand() % (1000) / (1000.0f); + } + ASSERT_OK(Tensor::CreateFromVector(input_vec, s, &input_tensor_)); + auto input_ms = mindspore::MSTensor(std::make_shared(input_tensor_)); + std::shared_ptr time_stretch_op = std::make_shared(hop_length, freq, rate); + + // apply timestretch + mindspore::dataset::Execute Transform({time_stretch_op}); + Status status = Transform(input_ms, &input_ms); + EXPECT_TRUE(status.IsOk()); +} + +TEST_F(MindDataTestExecute, TestTimeStretchParamCheck1) { + MS_LOG(INFO) << "Doing MindDataTestTimeStretch-TestTimeStretchParamCheck with invalid parameters."; + // Create an input + std::shared_ptr input_tensor_; + std::shared_ptr output_tensor; + TensorShape s = TensorShape({1, 4, 3, 2}); + ASSERT_OK(Tensor::CreateFromVector( + std::vector({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f}), + s, &input_tensor_)); + auto input_ms = mindspore::MSTensor(std::make_shared(input_tensor_)); + std::shared_ptr timestretch = std::make_shared(4, 512, -2); + mindspore::dataset::Execute Transform({timestretch}); + Status status = Transform(input_ms, &input_ms); + EXPECT_FALSE(status.IsOk()); +} + +TEST_F(MindDataTestExecute, TestTimeStretchParamCheck2) { + MS_LOG(INFO) << "Doing MindDataTestTimeStretch-TestTimeStretchParamCheck with invalid parameters."; + // Create an input + std::shared_ptr input_tensor_; + std::shared_ptr output_tensor; + TensorShape s = TensorShape({1, 4, 3, 2}); + ASSERT_OK(Tensor::CreateFromVector( + std::vector({1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, + 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f}), + s, &input_tensor_)); + auto input_ms = mindspore::MSTensor(std::make_shared(input_tensor_)); + std::shared_ptr timestretch = std::make_shared(4, -512, 2); + mindspore::dataset::Execute Transform({timestretch}); + Status status = Transform(input_ms, &input_ms); + EXPECT_FALSE(status.IsOk()); +} + TEST_F(MindDataTestExecute, TestTransformInput1) { MS_LOG(INFO) << "Doing MindDataTestExecute-TestTransformInput1."; // Test Execute with transform op input using API constructors, with std::shared_ptr labels = { + 2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02, + 1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02, + 1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02, + 1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02, + 1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03}; + std::shared_ptr input; + ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 10}), &input)); + auto input_02 = mindspore::MSTensor(std::make_shared(input)); + std::shared_ptr bandpass_biquad_01 = std::make_shared(44100, 200); + mindspore::dataset::Execute Transform01({bandpass_biquad_01}); + // Filtered waveform by bandpassbiquad + Status s01 = Transform01(input_02, &input_02); + EXPECT_TRUE(s01.IsOk()); +} + +TEST_F(MindDataTestExecute, TestBandpassBiquadWithWrongArg) { + MS_LOG(INFO) << "Wrong Arg."; + std::vector labels = { + 2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02, + 1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02, + 1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02, + 1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02, + 1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03}; + std::shared_ptr input; + ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 10}), &input)); + auto input_02 = mindspore::MSTensor(std::make_shared(input)); + // Check Q + MS_LOG(INFO) << "Q is zero."; + std::shared_ptr bandpass_biquad_op = std::make_shared(44100, 200, 0); + mindspore::dataset::Execute Transform01({bandpass_biquad_op}); + Status s01 = Transform01(input_02, &input_02); + EXPECT_FALSE(s01.IsOk()); +} + +TEST_F(MindDataTestExecute, TestBandrejectBiquadWithEager) { + MS_LOG(INFO) << "Basic Function Test With Eager."; + // Original waveform + std::vector labels = { + 2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02, + 1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02, + 1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02, + 1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02, + 1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03}; + std::shared_ptr input; + ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 10}), &input)); + auto input_02 = mindspore::MSTensor(std::make_shared(input)); + std::shared_ptr bandreject_biquad_01 = std::make_shared(44100, 200); + mindspore::dataset::Execute Transform01({bandreject_biquad_01}); + // Filtered waveform by bandrejectbiquad + Status s01 = Transform01(input_02, &input_02); + EXPECT_TRUE(s01.IsOk()); +} + +TEST_F(MindDataTestExecute, TestBandrejectBiquadWithWrongArg) { + MS_LOG(INFO) << "Wrong Arg."; + std::vector labels = { + 2.716064453125000000e-03, 6.347656250000000000e-03, 9.246826171875000000e-03, 1.089477539062500000e-02, + 1.138305664062500000e-02, 1.156616210937500000e-02, 1.394653320312500000e-02, 1.550292968750000000e-02, + 1.614379882812500000e-02, 1.840209960937500000e-02, 1.718139648437500000e-02, 1.599121093750000000e-02, + 1.647949218750000000e-02, 1.510620117187500000e-02, 1.385498046875000000e-02, 1.345825195312500000e-02, + 1.419067382812500000e-02, 1.284790039062500000e-02, 1.052856445312500000e-02, 9.368896484375000000e-03}; + std::shared_ptr input; + ASSERT_OK(Tensor::CreateFromVector(labels, TensorShape({2, 10}), &input)); + auto input_02 = mindspore::MSTensor(std::make_shared(input)); + // Check Q + MS_LOG(INFO) << "Q is zero."; + std::shared_ptr bandreject_biquad_op = std::make_shared(44100, 200, 0); + mindspore::dataset::Execute Transform01({bandreject_biquad_op}); + Status s01 = Transform01(input_02, &input_02); + EXPECT_FALSE(s01.IsOk()); +} + +TEST_F(MindDataTestExecute, TestAngleEager) { + MS_LOG(INFO) << "Doing MindDataTestExecute-TestAngleEager"; + std::vector origin = {1.143, 1.3123, 2.632, 2.554, -1.213, 1.3, 0.456, 3.563}; + TensorShape input_shape({4, 2}); + std::shared_ptr de_tensor; + Tensor::CreateFromVector(origin, input_shape, &de_tensor); + + std::shared_ptr angle = std::make_shared(); + auto input = mindspore::MSTensor(std::make_shared(de_tensor)); + mindspore::dataset::Execute Transform({angle}); + Status s = Transform(input, &input); + + ASSERT_TRUE(s.IsOk()); +} diff --git a/tests/ut/cpp/dataset/random_color_op_test.cc b/tests/ut/cpp/dataset/random_color_op_test.cc index 144174a49d8..0ad25711ca8 100644 --- a/tests/ut/cpp/dataset/random_color_op_test.cc +++ b/tests/ut/cpp/dataset/random_color_op_test.cc @@ -43,7 +43,7 @@ class MindDataTestRandomColorOp : public UT::CVOP::CVOpCommon { cv::Mat cv_out; cv::merge(temp, 3, cv_out); std::shared_ptr cvt_out; - CVTensor::CreateFromMat(cv_out, &cvt_out); + CVTensor::CreateFromMat(cv_out, 3, &cvt_out); gray_tensor = std::static_pointer_cast(cvt_out); } TensorShape shape; @@ -96,4 +96,4 @@ TEST_F(MindDataTestRandomColorOp, TestOp3) { auto s = op.Compute(input_tensor, &output_tensor); EXPECT_TRUE(s.IsOk()); } -} \ No newline at end of file +} diff --git a/tests/ut/cpp/dataset/rgba_to_bgr_op_test.cc b/tests/ut/cpp/dataset/rgba_to_bgr_op_test.cc index 470e18eaee7..2520c2f65d5 100644 --- a/tests/ut/cpp/dataset/rgba_to_bgr_op_test.cc +++ b/tests/ut/cpp/dataset/rgba_to_bgr_op_test.cc @@ -48,7 +48,7 @@ TEST_F(MindDataTestRgbaToBgrOp, TestOp1) { // create new tensor to test conversion std::shared_ptr rgba_input; std::shared_ptr input_cv_tensor; - CVTensor::CreateFromMat(rgba_image, &input_cv_tensor); + CVTensor::CreateFromMat(rgba_image, 3, &input_cv_tensor); rgba_input = std::dynamic_pointer_cast(input_cv_tensor); Status s = op->Compute(rgba_input, &output_tensor_); diff --git a/tests/ut/cpp/dataset/rgba_to_rgb_op_test.cc b/tests/ut/cpp/dataset/rgba_to_rgb_op_test.cc index a50c8047a0b..b9902302361 100644 --- a/tests/ut/cpp/dataset/rgba_to_rgb_op_test.cc +++ b/tests/ut/cpp/dataset/rgba_to_rgb_op_test.cc @@ -48,7 +48,7 @@ TEST_F(MindDataTestRgbaToRgbOp, TestOp1) { // create new tensor to test conversion std::shared_ptr rgba_input; std::shared_ptr input_cv_tensor; - CVTensor::CreateFromMat(rgba_image, &input_cv_tensor); + CVTensor::CreateFromMat(rgba_image, 3, &input_cv_tensor); rgba_input = std::dynamic_pointer_cast(input_cv_tensor); Status s = op->Compute(rgba_input, &output_tensor_); diff --git a/tests/ut/cpp/dataset/tensor_test.cc b/tests/ut/cpp/dataset/tensor_test.cc index 1a872ecd85e..25f03ebccd8 100644 --- a/tests/ut/cpp/dataset/tensor_test.cc +++ b/tests/ut/cpp/dataset/tensor_test.cc @@ -303,7 +303,8 @@ TEST_F(MindDataTestTensorDE, CVTensorFromMat) { m.at(1, 0) = 30; m.at(1, 1) = 40; std::shared_ptr cvt; - CVTensor::CreateFromMat(m, &cvt); + TensorShape shape{2, 2}; + CVTensor::CreateFromMat(m, 2, &cvt); std::shared_ptr t; Tensor::CreateEmpty(TensorShape({2, 2}), DataType(DataType::DE_UINT8), &t); t->SetItemAt({0, 0}, 10); @@ -318,7 +319,7 @@ TEST_F(MindDataTestTensorDE, CVTensorFromMat) { m2.at(2) = 30; m2.at(3) = 40; std::shared_ptr cvt2; - CVTensor::CreateFromMat(m2, &cvt2); + CVTensor::CreateFromMat(m2, 2, &cvt2); std::shared_ptr t2; Tensor::CreateEmpty(TensorShape({4}), DataType(DataType::DE_UINT8), &t2); t2->SetItemAt({0}, 10); @@ -360,7 +361,7 @@ TEST_F(MindDataTestTensorDE, CVTensorMatSlice) { m.at(1, 1) = 50; m.at(1, 2) = 60; std::shared_ptr cvt; - CVTensor::CreateFromMat(m, &cvt); + CVTensor::CreateFromMat(m, 2, &cvt); cv::Mat mat; cvt->MatAtIndex({1}, &mat); cv::Mat m2(3, 1, CV_32S); @@ -368,17 +369,17 @@ TEST_F(MindDataTestTensorDE, CVTensorMatSlice) { m2.at(1) = 50; m2.at(2) = 60; std::shared_ptr cvt2; - CVTensor::CreateFromMat(mat, &cvt2); + CVTensor::CreateFromMat(mat, 2, &cvt2); std::shared_ptr cvt3; - CVTensor::CreateFromMat(m2, &cvt3); + CVTensor::CreateFromMat(m2, 2, &cvt3); ASSERT_TRUE(*cvt2 == *cvt3); cvt->MatAtIndex({0}, &mat); m2.at(0) = 10; m2.at(1) = 20; m2.at(2) = 30; - CVTensor::CreateFromMat(mat, &cvt2); - CVTensor::CreateFromMat(m2, &cvt3); + CVTensor::CreateFromMat(mat, 2, &cvt2); + CVTensor::CreateFromMat(m2, 2, &cvt3); ASSERT_TRUE(*cvt2 == *cvt3); } @@ -536,44 +537,3 @@ TEST_F(MindDataTestTensorDE, TensorEmpty) { t2->Invalidate(); ASSERT_TRUE(!t2->HasData()); } - -TEST_F(MindDataTestTensorDE, TestTensorJson) { - MS_LOG(INFO) << "Doing TestTensor."; - std::vector labels = {1, 1, 2}; - std::shared_ptr input; - Tensor::CreateFromVector(labels, &input); - nlohmann::json out_json; - input->to_json(&out_json); - - std::shared_ptr check; - std::stringstream ss; - ss << out_json["shape"]; - std::string shape = ss.str(); - ss.str(""); - ss << out_json["type"]; - std::string type = ss.str(); - ss.str(""); - ss << out_json["data"]; - std::string data = ss.str(); - ss.str(""); - - ASSERT_TRUE('"' + input->shape().ToString() + '"' == shape); - ASSERT_TRUE('"' + input->type().ToString() + '"' == type); - - std::string input_data; - input_data.push_back('"'); - input_data.push_back('['); - for (int i = 0; i < labels.size(); i++) { - input_data += std::to_string(labels[i]); - if (i < labels.size() - 1) { - input_data.push_back(','); - } - } - input_data.push_back(']'); - input_data.push_back('"'); - - std::cout << input_data << std::endl; - std::cout << data << std::endl; - - ASSERT_TRUE(input_data == data); -} diff --git a/tests/ut/cpp/runtest.sh b/tests/ut/cpp/runtest.sh index e4c5f6cdf2f..df1f81e9bd2 100755 --- a/tests/ut/cpp/runtest.sh +++ b/tests/ut/cpp/runtest.sh @@ -32,6 +32,8 @@ ${PROJECT_PATH}/graphengine/third_party/prebuild/aarch64:${LD_LIBRARY_PATH} export PYTHONPATH=${PROJECT_PATH}/tests/ut/cpp/python_input:$PYTHONPATH:${PROJECT_PATH} export GLOG_v=2 export GC_COLLECT_IN_CELL=1 +## set op info config path +export MINDSPORE_OP_INFO_PATH=${PROJECT_PATH}/config/op_info.config ## prepare data for dataset & mindrecord cp -fr $PROJECT_PATH/tests/ut/data ${PROJECT_PATH}/build/mindspore/tests/ut/cpp/ diff --git a/tests/ut/cpp/stub/dynamic_shape/dynamic_shape_stub.cc b/tests/ut/cpp/stub/dynamic_shape/dynamic_shape_stub.cc index aab00605814..83f6e95cc91 100644 --- a/tests/ut/cpp/stub/dynamic_shape/dynamic_shape_stub.cc +++ b/tests/ut/cpp/stub/dynamic_shape/dynamic_shape_stub.cc @@ -18,7 +18,6 @@ #include "runtime/device/ascend/executor/rts/memcpy_rts_dynamic_kernel.h" #include "runtime/device/ascend/executor/rts/profiling_rts_dynamic_kernel.h" #include "runtime/device/ascend/executor/ai_core_dynamic_kernel.h" -#include "runtime/device/ascend/executor/tiling/op_tiling_calculater.h" #include "backend/kernel_compiler/host/host_kernel_metadata.h" #include "backend/kernel_compiler/host/host_kernel_build.h" @@ -38,11 +37,6 @@ void AiCoreDynamicKernel::Execute() {} void AiCoreDynamicKernel::UpdateArgs() {} void AiCoreDynamicKernel::Initialize() {} void AiCoreDynamicKernel::PostExecute() {} - -void OpTilingCalculater::Init() {} -void OpTilingCalculater::CalculateTiling(const NotNull &cnode, const optiling::OpCompileInfo &op_compile_info, - const std::map &depend_tensor_map, - NotNull op_run_info) {} } // namespace ascend } // namespace device } // namespace mindspore diff --git a/tests/ut/cpp/stub/ge/ge_mock.cc b/tests/ut/cpp/stub/ge/ge_mock.cc index 2a405c20073..ed32606bb32 100644 --- a/tests/ut/cpp/stub/ge/ge_mock.cc +++ b/tests/ut/cpp/stub/ge/ge_mock.cc @@ -53,10 +53,8 @@ Status Graph::SaveToFile(const string& file_name) const { return ge::GRAPH_SUCCE } // namespace ge namespace gelc { -extern "C" { uint32_t GetOptInfo(uint32_t mode, const std::string &soc_ver, std::map &opt_info_map) { return 0; } -} // extern C } // namespace gelc #endif diff --git a/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc b/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc index 0e3477976c0..7be74ba8d73 100644 --- a/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc +++ b/tests/ut/cpp/stub/ge/ge_task_launch_stub.cc @@ -22,6 +22,7 @@ HcclAdapter &HcclAdapter::GetInstance() { static HcclAdapter instance; return instance; } +bool HcclAdapter::InitHccl() { return true; } bool HcclAdapter::InitHccl(uint32_t, std::string_view, std::string_view) { return true; } bool HcclAdapter::FinalizeHccl() { return true; } HcclResult HcclAdapter::HcclCreateGroup(const std::string &, uint32_t, uint32_t *) const { return HCCL_SUCCESS; } @@ -35,7 +36,21 @@ std::string HcclAdapter::GetHcclType(const AnfNodePtr &) { return ""; } HcclResult HcclAdapter::HcclBroadcast(void *, uint64_t, HcclDataType, uint32_t, aclrtStream) const { return HCCL_SUCCESS; } -HcclResult HcclAdapter::HcclAllReduce(void *, void *, uint64_t, HcclDataType, HcclReduceOp, aclrtStream) const { +HcclResult HcclAdapter::HcclAllReduce(void *, void *, uint64_t, HcclDataType, HcclReduceOp, aclrtStream, + const std::string &) const { + return HCCL_SUCCESS; +} +HcclResult HcclAdapter::HcclAllGather(void *, void *, uint64_t, HcclDataType, aclrtStream, const std::string &) const { + return HCCL_SUCCESS; +} +HcclResult HcclAdapter::HcclReduceScatter(void *, void *, uint64_t, HcclDataType, HcclReduceOp, aclrtStream, + const std::string &) const { + return HCCL_SUCCESS; +} +HcclResult HcclAdapter::HcclSend(void *, uint64_t, HcclDataType, uint32_t, aclrtStream, const std::string &) const { + return HCCL_SUCCESS; +} +HcclResult HcclAdapter::HcclRecv(void *, uint64_t, HcclDataType, uint32_t, aclrtStream, const std::string &) const { return HCCL_SUCCESS; } HcclResult HcclAdapter::HcclExecEnqueueOp(const ::HcomOperation &op_info, const HExecCallBack &callback) const { diff --git a/tests/ut/python/dataset/test_adjustgamma.py b/tests/ut/python/dataset/test_adjustgamma.py index 32363f76b84..61e91fdc5f5 100644 --- a/tests/ut/python/dataset/test_adjustgamma.py +++ b/tests/ut/python/dataset/test_adjustgamma.py @@ -31,6 +31,8 @@ MNIST_DATA_DIR = "../data/dataset/testMnistData" DATA_DIR_2 = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"] SCHEMA_DIR = "../data/dataset/test_tf_file_3_images/datasetSchema.json" +GENERATE_GOLDEN = False + def generate_numpy_random_rgb(shape): """ @@ -88,20 +90,26 @@ def test_adjust_gamma_invalid_gamma_param_c(): logger.info("Test AdjustGamma C Op with invalid ignore parameter") try: data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0])], + data_set = data_set.map(operations=[C.Decode(), + C.Resize((224, 224)), + lambda img: np.array(img[:, :, 0])], input_columns=["image"]) # invalid gamma - data_set = data_set.map(operations=C.AdjustGamma(gamma=-10.0, gain=1.0), + data_set = data_set.map(operations=C.AdjustGamma(gamma=-10.0, + gain=1.0), input_columns="image") except ValueError as error: logger.info("Got an exception in AdjustGamma: {}".format(str(error))) assert "Input is not within the required interval of " in str(error) try: data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0])], + data_set = data_set.map(operations=[C.Decode(), + C.Resize((224, 224)), + lambda img: np.array(img[:, :, 0])], input_columns=["image"]) # invalid gamma - data_set = data_set.map(operations=C.AdjustGamma(gamma=[1, 2], gain=1.0), + data_set = data_set.map(operations=C.AdjustGamma(gamma=[1, 2], + gain=1.0), input_columns="image") except TypeError as error: logger.info("Got an exception in AdjustGamma: {}".format(str(error))) @@ -121,7 +129,8 @@ def test_adjust_gamma_invalid_gamma_param_py(): F.AdjustGamma(gamma=-10.0), F.ToTensor() ]) - data_set = data_set.map(operations=[trans], input_columns=["image"]) + data_set = data_set.map(operations=[trans], + input_columns=["image"]) except ValueError as error: logger.info("Got an exception in AdjustGamma: {}".format(str(error))) assert "Input is not within the required interval of " in str(error) @@ -133,7 +142,8 @@ def test_adjust_gamma_invalid_gamma_param_py(): F.AdjustGamma(gamma=[1, 2]), F.ToTensor() ]) - data_set = data_set.map(operations=[trans], input_columns=["image"]) + data_set = data_set.map(operations=[trans], + input_columns=["image"]) except TypeError as error: logger.info("Got an exception in AdjustGamma: {}".format(str(error))) assert "is not of type [, ], but got" in str(error) @@ -146,10 +156,13 @@ def test_adjust_gamma_invalid_gain_param_c(): logger.info("Test AdjustGamma C Op with invalid gain parameter") try: data_set = ds.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) - data_set = data_set.map(operations=[C.Decode(), C.Resize((224, 224)), lambda img: np.array(img[:, :, 0])], + data_set = data_set.map(operations=[C.Decode(), + C.Resize((224, 224)), + lambda img: np.array(img[:, :, 0])], input_columns=["image"]) # invalid gain - data_set = data_set.map(operations=C.AdjustGamma(gamma=10.0, gain=[1, 10]), + data_set = data_set.map(operations=C.AdjustGamma(gamma=10.0, + gain=[1, 10]), input_columns="image") except TypeError as error: logger.info("Got an exception in AdjustGamma: {}".format(str(error))) @@ -169,7 +182,8 @@ def test_adjust_gamma_invalid_gain_param_py(): F.AdjustGamma(gamma=10.0, gain=[1, 10]), F.ToTensor() ]) - data_set = data_set.map(operations=[trans], input_columns=["image"]) + data_set = data_set.map(operations=[trans], + input_columns=["image"]) except TypeError as error: logger.info("Got an exception in AdjustGamma: {}".format(str(error))) assert "is not of type [, ], but got " in str(error) diff --git a/tests/ut/python/dataset/test_allpass_biquad.py b/tests/ut/python/dataset/test_allpass_biquad.py index 29805ab6df3..e3cadece4f5 100644 --- a/tests/ut/python/dataset/test_allpass_biquad.py +++ b/tests/ut/python/dataset/test_allpass_biquad.py @@ -19,14 +19,16 @@ import mindspore.dataset.audio.transforms as audio from mindspore import log as logger -def count_unequal_element(data_expected, data_me, rtol, atol): +def _count_unequal_element(data_expected, data_me, rtol, atol): + assert data_expected.shape == data_me.shape total_count = len(data_expected.flatten()) error = np.abs(data_expected - data_me) greater = np.greater(error, atol + np.abs(data_expected) * rtol) loss_count = np.count_nonzero(greater) - assert (loss_count / total_count) < rtol, "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}".format( - data_expected[greater], data_me[greater], error[greater]) + assert (loss_count / total_count) < rtol, \ + "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}". \ + format(data_expected[greater], data_me[greater], error[greater]) def test_func_allpass_biquad_eager(): @@ -35,11 +37,12 @@ def test_func_allpass_biquad_eager(): # Original waveform waveform = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float64) # Expect waveform - expect_waveform = np.array([[0.96049707, 1.0, 1.0], [1.0, 1.0, 1.0]], dtype=np.float64) + expect_waveform = np.array([[0.96049707, 1.0, 1.0], + [1.0, 1.0, 1.0]], dtype=np.float64) allpass_biquad_op = audio.AllpassBiquad(44100, 200.0, 0.707) # Filtered waveform by allpassbiquad output = allpass_biquad_op(waveform) - count_unequal_element(expect_waveform, output, 0.0001, 0.0001) + _count_unequal_element(expect_waveform, output, 0.0001, 0.0001) def test_func_allpass_biquad_pipeline(): @@ -48,57 +51,56 @@ def test_func_allpass_biquad_pipeline(): # Original waveform waveform = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.float64) # Expect waveform - expect_waveform = np.array([[0.96049707, 1.0, 1.0], [1.0, 1.0, 1.0]], dtype=np.float64) + expect_waveform = np.array([[0.96049707, 1.0, 1.0], + [1.0, 1.0, 1.0]], dtype=np.float64) label = np.random.sample((2, 1)) data = (waveform, label) dataset = ds.NumpySlicesDataset(data, ["channel", "sample"], shuffle=False) allpass_biquad_op = audio.AllpassBiquad(44100, 200.0) # Filtered waveform by allpassbiquad - dataset = dataset.map(input_columns=["channel"], operations=allpass_biquad_op, num_parallel_workers=8) + dataset = dataset.map( + input_columns=["channel"], operations=allpass_biquad_op, num_parallel_workers=8) i = 0 - for item in dataset.create_dict_iterator(output_numpy=True): - count_unequal_element(expect_waveform[i, :], item['channel'], 0.0001, 0.0001) + for _ in dataset.create_dict_iterator(output_numpy=True): + _count_unequal_element(expect_waveform[i, :], + _['channel'], 0.0001, 0.0001) i += 1 - def test_invalid_input_all(): waveform = np.random.rand(2, 1000) - def test_invalid_input(test_name, sample_rate, central_freq, Q, error, error_msg): logger.info("Test Allpassallpassiquad with bad input: {0}".format(test_name)) with pytest.raises(error) as error_info: audio.AllpassBiquad(sample_rate, central_freq, Q)(waveform) assert error_msg in str(error_info.value) - test_invalid_input("invalid sample_rate parameter type as a float", 44100.5, 200, 0.707, TypeError, "Argument sample_rate with value 44100.5 is not of type []," - + " but got .") + +" but got .") test_invalid_input("invalid sample_rate parameter type as a String", "44100", 200, 0.707, TypeError, - "Argument sample_rate with value 44100 is not of type []," + + "Argument sample_rate with value 44100 is not of type [],"+ " but got .") test_invalid_input("invalid contral_freq parameter type as a String", 44100, "200", 0.707, TypeError, "Argument central_freq with value 200 is not of type [, ]," - + " but got .") + +" but got .") test_invalid_input("invalid Q parameter type as a String", 44100, 200, "0.707", TypeError, "Argument Q with value 0.707 is not of type [, ]," - + " but got .") + +" but got .") test_invalid_input("invalid sample_rate parameter value", 441324343243242342345300, 200, 0.707, ValueError, - "Input sample_rate is not within the required interval of [-2147483648, 0) and (0, 2147483647].") + "Input sample_rate is not within the required interval of [-2147483648, 2147483647].") test_invalid_input("invalid contral_freq parameter value", 44100, 32434324324234321, 0.707, ValueError, "Input central_freq is not within the required interval of [-16777216, 16777216].") test_invalid_input("invalid sample_rate parameter value", None, 200, 0.707, TypeError, "Argument sample_rate with value None is not of type []," - + " but got .") + +" but got .") test_invalid_input("invalid central_rate parameter value", 44100, None, 0.707, TypeError, "Argument central_freq with value None is not of type [, ]," - + " but got .") + +" but got .") test_invalid_input("invalid sample_rate parameter value", 0, 200, 0.707, ValueError, - "Input sample_rate is not within the required interval of [-2147483648, 0) and (0, 2147483647].") + "Input sample_rate can not be 0.") test_invalid_input("invalid Q parameter value", 44100, 200, 1.707, ValueError, "Input Q is not within the required interval of (0, 1].") - if __name__ == '__main__': - test_func_allpass_biquad_eager() - test_func_allpass_biquad_pipeline() + test_eager_allpassbiquad_mindspore_001() + test_pipeline_allpass_biquad_001() test_invalid_input_all() diff --git a/tests/ut/python/dataset/test_amplitude_to_db.py b/tests/ut/python/dataset/test_amplitude_to_db.py index 9fba2ed07b9..448b8b09ef4 100644 --- a/tests/ut/python/dataset/test_amplitude_to_db.py +++ b/tests/ut/python/dataset/test_amplitude_to_db.py @@ -23,6 +23,7 @@ import mindspore.dataset.audio.transforms as c_audio from mindspore import log as logger from mindspore.dataset.audio.utils import ScaleType + CHANNEL = 1 FREQ = 20 TIME = 15 @@ -31,18 +32,19 @@ TIME = 15 def gen(shape): np.random.seed(0) data = np.random.random(shape) - yield (np.array(data, dtype=np.float32),) + yield(np.array(data, dtype=np.float32),) -def count_unequal_element(data_expected, data_me, rtol, atol): +def _count_unequal_element(data_expected, data_me, rtol, atol): """ Precision calculation func """ assert data_expected.shape == data_me.shape total_count = len(data_expected.flatten()) error = np.abs(data_expected - data_me) greater = np.greater(error, atol + np.abs(data_expected) * rtol) loss_count = np.count_nonzero(greater) - assert (loss_count / total_count) < rtol, "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}".format( - data_expected[greater], data_me[greater], error[greater]) + assert (loss_count / total_count) < rtol, \ + "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}". \ + format(data_expected[greater], data_me[greater], error[greater]) def allclose_nparray(data_expected, data_me, rtol, atol, equal_nan=True): @@ -50,7 +52,9 @@ def allclose_nparray(data_expected, data_me, rtol, atol, equal_nan=True): if np.any(np.isnan(data_expected)): assert np.allclose(data_me, data_expected, rtol, atol, equal_nan=equal_nan) elif not np.allclose(data_me, data_expected, rtol, atol, equal_nan=equal_nan): - count_unequal_element(data_expected, data_me, rtol, atol) + _count_unequal_element(data_expected, data_me, rtol, atol) + else: + assert True def test_func_amplitude_to_db_eager(): @@ -87,7 +91,9 @@ def test_func_amplitude_to_db_pipeline(): data1 = ds.GeneratorDataset(source=generator, column_names=["multi_dimensional_data"]) - transforms = [c_audio.AmplitudeToDB()] + transforms = [ + c_audio.AmplitudeToDB() + ] data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"]) for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): @@ -96,6 +102,7 @@ def test_func_amplitude_to_db_pipeline(): def test_amplitude_to_db_invalid_input(): + def test_invalid_input(test_name, stype, ref_value, amin, top_db, error, error_msg): logger.info("Test AmplitudeToDB with bad input: {0}".format(test_name)) with pytest.raises(error) as error_info: diff --git a/tests/ut/python/dataset/test_angle.py b/tests/ut/python/dataset/test_angle.py index 1de8e2fd0a2..6c366b6a41e 100755 --- a/tests/ut/python/dataset/test_angle.py +++ b/tests/ut/python/dataset/test_angle.py @@ -19,28 +19,28 @@ import pytest import mindspore.dataset as ds import mindspore.dataset.audio.transforms as a_c_trans +def _count_unequal_element(data_expected, data_me, rtol, atol): -def count_unequal_element(data_expected, data_me, rtol, atol): assert data_expected.shape == data_me.shape total_count = len(data_expected.flatten()) error = np.abs(data_expected - data_me) greater = np.greater(error, atol + np.abs(data_expected) * rtol) loss_count = np.count_nonzero(greater) - assert (loss_count / total_count) < rtol, "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}".format( - data_expected[greater], data_me[greater], error[greater]) - + assert (loss_count / total_count) < rtol, \ + "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}". \ + format(data_expected[greater], data_me[greater], error[greater]) def test_func_angle_001(): """ Eager Test """ - arr = np.array([[73.04, -13.00], [57.49, 13.20], [-57.64, 6.51], [-52.25, 30.67], [-30.11, -18.34], + arr = np.array([[73.04, -13.00], [57.49, 13.20], [-57.64, 6.51], [-52.25, 30.67], [-30.11, -18.34], \ [-63.32, 99.33], [95.82, -24.76]], dtype=np.double) - expected = np.array([-0.17614017, 0.22569334, 3.02912684, 2.6107975, -2.59450886, 2.13831337, -0.25286988], + expected = np.array([-0.17614017, 0.22569334, 3.02912684, 2.6107975, -2.59450886, 2.13831337, -0.25286988], \ dtype=np.double) angle_op = a_c_trans.Angle() output = angle_op(arr) - count_unequal_element(expected, output, 0.0001, 0.0001) + _count_unequal_element(expected, output, 0.0001, 0.0001) def test_func_angle_002(): @@ -48,9 +48,9 @@ def test_func_angle_002(): Pipeline Test """ np.random.seed(6) - arr = np.array([[[84.25, -85.92], [-92.23, 23.06], [-7.33, -44.17], [-62.95, -14.73]], + arr = np.array([[[84.25, -85.92], [-92.23, 23.06], [-7.33, -44.17], [-62.95, -14.73]], \ [[93.09, 38.18], [-81.94, 71.34], [71.33, -39.00], [95.25, -32.94]]], dtype=np.double) - expected = np.array([[-0.79521156, 2.89658848, -1.73524737, -2.91173309], + expected = np.array([[-0.79521156, 2.89658848, -1.73524737, -2.91173309], \ [0.3892177, 2.42523905, -0.50034807, -0.33295219]], dtype=np.double) label = np.random.sample((2, 4, 1)) data = (arr, label) @@ -58,8 +58,7 @@ def test_func_angle_002(): angle_op = a_c_trans.Angle() dataset = dataset.map(operations=angle_op, input_columns=["col1"]) for item1, item2 in zip(dataset.create_dict_iterator(output_numpy=True), expected): - count_unequal_element(item2, item1['col1'], 0.0001, 0.0001) - + _count_unequal_element(item2, item1['col1'], 0.0001, 0.0001) def test_func_angle_003(): """ @@ -73,7 +72,7 @@ def test_func_angle_003(): angle_op = a_c_trans.Angle() dataset = dataset.map(operations=angle_op, input_columns=["col1"]) num_itr = 0 - with pytest.raises(RuntimeError, match="input tensor type should be int, float or double"): + with pytest.raises(RuntimeError, match="The input type should be numbers"): for _ in dataset.create_dict_iterator(output_numpy=True): num_itr += 1 diff --git a/tests/ut/python/dataset/test_bandpass_biquad.py b/tests/ut/python/dataset/test_bandpass_biquad.py index caa8277dc35..90a8ddc78b1 100644 --- a/tests/ut/python/dataset/test_bandpass_biquad.py +++ b/tests/ut/python/dataset/test_bandpass_biquad.py @@ -19,14 +19,16 @@ import mindspore.dataset.audio.transforms as audio from mindspore import log as logger -def count_unequal_element(data_expected, data_me, rtol, atol): +def _count_unequal_element(data_expected, data_me, rtol, atol): + assert data_expected.shape == data_me.shape total_count = len(data_expected.flatten()) error = np.abs(data_expected - data_me) greater = np.greater(error, atol + np.abs(data_expected) * rtol) loss_count = np.count_nonzero(greater) - assert (loss_count / total_count) < rtol, "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}".format( - data_expected[greater], data_me[greater], error[greater]) + assert (loss_count / total_count) < rtol, \ + "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}". \ + format(data_expected[greater], data_me[greater], error[greater]) def test_func_bandpass_biquad_eager(): @@ -40,7 +42,7 @@ def test_func_bandpass_biquad_eager(): bandpass_biquad_op = audio.BandpassBiquad(44000, 200.0, 0.707, False) # Filtered waveform by bandpassbiquad output = bandpass_biquad_op(waveform) - count_unequal_element(expect_waveform, output, 0.0001, 0.0001) + _count_unequal_element(expect_waveform, output, 0.0001, 0.0001) def test_func_bandpass_biquad_pipeline(): @@ -56,10 +58,12 @@ def test_func_bandpass_biquad_pipeline(): dataset = ds.NumpySlicesDataset(data, ["channel", "sample"], shuffle=False) bandpass_biquad_op = audio.BandpassBiquad(44000, 200.0) # Filtered waveform by bandpassbiquad - dataset = dataset.map(input_columns=["channel"], operations=bandpass_biquad_op, num_parallel_workers=8) + dataset = dataset.map( + input_columns=["channel"], operations=bandpass_biquad_op, num_parallel_workers=8) i = 0 - for item in dataset.create_dict_iterator(output_numpy=True): - count_unequal_element(expect_waveform[i, :], item['channel'], 0.0001, 0.0001) + for _ in dataset.create_dict_iterator(output_numpy=True): + _count_unequal_element(expect_waveform[i, :], + _['channel'], 0.0001, 0.0001) i += 1 @@ -68,7 +72,8 @@ def test_bandpass_biquad_invalid_input(): logger.info( "Test BandpassBiquad with bad input: {0}".format(test_name)) with pytest.raises(error) as error_info: - audio.BandpassBiquad(sample_rate, central_freq, Q, const_skirt_gain) + audio.BandpassBiquad( + sample_rate, central_freq, Q, const_skirt_gain) assert error_msg in str(error_info.value) test_invalid_input("invalid sample_rate parameter type as a float", 44100.5, 200, 0.707, True, TypeError, @@ -80,7 +85,7 @@ def test_bandpass_biquad_invalid_input(): "Argument central_freq with value 200 is not of type [, ]," " but got .") test_invalid_input("invalid sample_rate parameter value", 0, 200, 0.707, True, ValueError, - "Input sample_rate is not within the required interval of [-2147483648, 0) and (0, 2147483647].") + "Input sample_rate can not be 0.") test_invalid_input("invalid contral_freq parameter value", 44100, 32434324324234321, 0.707, True, ValueError, "Input central_freq is not within the required interval of [-16777216, 16777216].") test_invalid_input("invalid Q parameter type as a String", 44100, 200, "0.707", True, TypeError, @@ -91,7 +96,7 @@ def test_bandpass_biquad_invalid_input(): test_invalid_input("invalid Q parameter value", 44100, 200, 0, True, ValueError, "Input Q is not within the required interval of (0, 1].") test_invalid_input("invalid sample_rate parameter value", 441324343243242342345300, 200, 0.707, True, ValueError, - "Input sample_rate is not within the required interval of [-2147483648, 0) and (0, 2147483647].") + "Input sample_rate is not within the required interval of [-2147483648, 2147483647].") test_invalid_input("invalid sample_rate parameter value", None, 200, 0.707, True, TypeError, "Argument sample_rate with value None is not of type []," " but got .") diff --git a/tests/ut/python/dataset/test_bandreject_biquad.py b/tests/ut/python/dataset/test_bandreject_biquad.py index af04d34de25..3c799c6f827 100644 --- a/tests/ut/python/dataset/test_bandreject_biquad.py +++ b/tests/ut/python/dataset/test_bandreject_biquad.py @@ -19,14 +19,16 @@ import mindspore.dataset.audio.transforms as audio from mindspore import log as logger -def count_unequal_element(data_expected, data_me, rtol, atol): +def _count_unequal_element(data_expected, data_me, rtol, atol): + assert data_expected.shape == data_me.shape total_count = len(data_expected.flatten()) error = np.abs(data_expected - data_me) greater = np.greater(error, atol + np.abs(data_expected) * rtol) loss_count = np.count_nonzero(greater) - assert (loss_count / total_count) < rtol, "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}".format( - data_expected[greater], data_me[greater], error[greater]) + assert (loss_count / total_count) < rtol, \ + "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}". \ + format(data_expected[greater], data_me[greater], error[greater]) def test_func_bandreject_biquad_eager(): @@ -41,7 +43,7 @@ def test_func_bandreject_biquad_eager(): bandreject_biquad_op = audio.BandrejectBiquad(44100, 200.0, 0.707) # Filtered waveform by bandrejectbiquad output = bandreject_biquad_op(waveform) - count_unequal_element(expect_waveform, output, 0.0001, 0.0001) + _count_unequal_element(expect_waveform, output, 0.0001, 0.0001) def test_func_bandreject_biquad_pipeline(): @@ -61,9 +63,9 @@ def test_func_bandreject_biquad_pipeline(): dataset = dataset.map( input_columns=["channel"], operations=bandreject_biquad_op, num_parallel_workers=8) i = 0 - for item in dataset.create_dict_iterator(output_numpy=True): - count_unequal_element(expect_waveform[i, :], - item['channel'], 0.0001, 0.0001) + for _ in dataset.create_dict_iterator(output_numpy=True): + _count_unequal_element(expect_waveform[i, :], + _['channel'], 0.0001, 0.0001) i += 1 @@ -74,7 +76,6 @@ def test_bandreject_biquad_invalid_input(): with pytest.raises(error) as error_info: audio.BandrejectBiquad(sample_rate, central_freq, Q) assert error_msg in str(error_info.value) - test_invalid_input("invalid sample_rate parameter type as a float", 44100.5, 200, 0.707, TypeError, "Argument sample_rate with value 44100.5 is not of type []," " but got .") @@ -84,7 +85,7 @@ def test_bandreject_biquad_invalid_input(): "Argument central_freq with value 200 is not of type [, ]," " but got .") test_invalid_input("invalid sample_rate parameter value", 0, 200, 0.707, ValueError, - "Input sample_rate is not within the required interval of [-2147483648, 0) and (0, 2147483647].") + "Input sample_rate can not be 0.") test_invalid_input("invalid contral_freq parameter value", 44100, 32434324324234321, 0.707, ValueError, "Input central_freq is not within the required interval of [-16777216, 16777216].") test_invalid_input("invalid Q parameter type as a String", 44100, 200, "0.707", TypeError, @@ -95,7 +96,7 @@ def test_bandreject_biquad_invalid_input(): test_invalid_input("invalid Q parameter value", 44100, 200, 0, ValueError, "Input Q is not within the required interval of (0, 1].") test_invalid_input("invalid sample_rate parameter value", 441324343243242342345300, 200, 0.707, ValueError, - "Input sample_rate is not within the required interval of [-2147483648, 0) and (0, 2147483647].") + "Input sample_rate is not within the required interval of [-2147483648, 2147483647].") test_invalid_input("invalid sample_rate parameter value", None, 200, 0.707, TypeError, "Argument sample_rate with value None is not of type []," " but got .") @@ -105,6 +106,6 @@ def test_bandreject_biquad_invalid_input(): if __name__ == "__main__": - test_func_bandreject_biquad_eager() - test_func_bandreject_biquad_pipeline() - test_bandreject_biquad_invalid_input() + test_func_band_biquad_eager() + test_func_band_biquad_pipeline() + test_band_biquad_invalid_input() diff --git a/tests/ut/python/dataset/test_bass_biquad.py b/tests/ut/python/dataset/test_bass_biquad.py index 41f1e7c87cf..c06470db271 100644 --- a/tests/ut/python/dataset/test_bass_biquad.py +++ b/tests/ut/python/dataset/test_bass_biquad.py @@ -19,14 +19,16 @@ import mindspore.dataset.audio.transforms as audio from mindspore import log as logger -def count_unequal_element(data_expected, data_me, rtol, atol): +def _count_unequal_element(data_expected, data_me, rtol, atol): + assert data_expected.shape == data_me.shape total_count = len(data_expected.flatten()) error = np.abs(data_expected - data_me) greater = np.greater(error, atol + np.abs(data_expected) * rtol) loss_count = np.count_nonzero(greater) - assert (loss_count / total_count) < rtol, "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}".format( - data_expected[greater], data_me[greater], error[greater]) + assert (loss_count / total_count) < rtol, \ + "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}". \ + format(data_expected[greater], data_me[greater], error[greater]) def test_func_bass_biquad_eager(): @@ -40,7 +42,7 @@ def test_func_bass_biquad_eager(): bass_biquad_op = audio.BassBiquad(44100, 50.0, 100.0, 0.707) # Filtered waveform by bassbiquad output = bass_biquad_op(waveform) - count_unequal_element(expect_waveform, output, 0.0001, 0.0001) + _count_unequal_element(expect_waveform, output, 0.0001, 0.0001) def test_func_bass_biquad_pipeline(): @@ -59,9 +61,9 @@ def test_func_bass_biquad_pipeline(): dataset = dataset.map( input_columns=["channel"], operations=bass_biquad_op, num_parallel_workers=8) i = 0 - for item in dataset.create_dict_iterator(output_numpy=True): - count_unequal_element(expect_waveform[i, :], - item['channel'], 0.0001, 0.0001) + for _ in dataset.create_dict_iterator(output_numpy=True): + _count_unequal_element(expect_waveform[i, :], + _['channel'], 0.0001, 0.0001) i += 1 @@ -71,7 +73,6 @@ def test_invalid_invalid_input(): with pytest.raises(error) as error_info: audio.BassBiquad(sample_rate, gain, central_freq, Q) assert error_msg in str(error_info.value) - test_invalid_input("invalid sample_rate parameter type as a float", 44100.5, 50.0, 200, 0.707, TypeError, "Argument sample_rate with value 44100.5 is not of type []," " but got .") @@ -89,7 +90,7 @@ def test_invalid_invalid_input(): " but got .") test_invalid_input("invalid sample_rate parameter value", 441324343243242342345300, 50.0, 200, 0.707, ValueError, - "Input sample_rate is not within the required interval of [-2147483648, 0) and (0, 2147483647].") + "Input sample_rate is not within the required interval of [-2147483648, 2147483647].") test_invalid_input("invalid gain parameter value", 44100, 32434324324234321, 200, 0.707, ValueError, "Input gain is not within the required interval of [-16777216, 16777216].") test_invalid_input("invalid contral_freq parameter value", 44100, 50, 32434324324234321, 0.707, ValueError, @@ -106,11 +107,10 @@ def test_invalid_invalid_input(): " but got .") test_invalid_input("invalid sample_rate parameter value", 0, 50.0, 200, 0.707, ValueError, - "Input sample_rate is not within the required interval of [-2147483648, 0) and (0, 2147483647].") + "Input sample_rate can not be 0.") test_invalid_input("invalid Q parameter value", 44100, 50.0, 200, 1.707, ValueError, "Input Q is not within the required interval of (0, 1].") - if __name__ == '__main__': test_func_bass_biquad_eager() test_func_bass_biquad_pipeline() diff --git a/tests/ut/python/dataset/test_datasets_cmuarctic.py b/tests/ut/python/dataset/test_datasets_cmuarctic.py new file mode 100644 index 00000000000..8dc36bddd91 --- /dev/null +++ b/tests/ut/python/dataset/test_datasets_cmuarctic.py @@ -0,0 +1,203 @@ +""" +Test CmuArctic dataset operators +""" +import os +import pytest +import numpy as np +import matplotlib.pyplot as plt +import mindspore.dataset as ds +import mindspore.dataset.vision.c_transforms as vision +from mindspore import log as logger + +DATA_DIR = "/home/user06/zjm/data/cmu_arctic/" + +def test_cmuarctic_basic(): + """ + Validate CmuarcticDataset + """ + logger.info("Test CmuArcticDataset Op") + + # case 1: test loading fault dataset + data1 = ds.CmuArcticDataset(DATA_DIR) + num_iter1 = 0 + for _ in data1.create_dict_iterator( output_numpy=True,num_epochs=1): + num_iter1 += 1 + assert num_iter1 == 1132 + + # case 2: test num_samples + data2 = ds.CmuArcticDataset(DATA_DIR, num_samples=500) + num_iter2 = 0 + for _ in data2.create_dict_iterator( output_numpy=True,num_epochs=1): + num_iter2 += 1 + assert num_iter2 == 500 + + # case 3: test repeat + data3 = ds.CmuArcticDataset(DATA_DIR, num_samples=200) + data3 = data3.repeat(5) + num_iter3 = 0 + for _ in data3.create_dict_iterator( output_numpy=True,num_epochs=1): + num_iter3 += 1 + assert num_iter3 == 1000 + + # case 4: test batch with drop_remainder=False + data4 = ds.CmuArcticDataset(DATA_DIR, num_samples=100) + assert data4.get_dataset_size() == 100 + assert data4.get_batch_size() == 1 + data4 = data4.batch(batch_size=7) # drop_remainder is default to be False + assert data4.get_dataset_size() == 15 + assert data4.get_batch_size() == 7 + # num_iter4 = 0 + # for _ in data4.create_dict_iterator( output_numpy=True,num_epochs=1): + # num_iter4 += 1 + # assert num_iter4 == 15 + + # case 5: test batch with drop_remainder=True + data5 = ds.CmuArcticDataset(DATA_DIR, num_samples=100) + assert data5.get_dataset_size() == 100 + assert data5.get_batch_size() == 1 + data5 = data5.batch(batch_size=7, drop_remainder=True) # the rest of incomplete batch will be dropped + assert data5.get_dataset_size() == 14 + assert data5.get_batch_size() == 7 + # num_iter5 = 0 + # for _ in data5.create_dict_iterator( output_numpy=True,num_epochs=1): + # num_iter5 += 1 + # assert num_iter5 == 14 + + + +def test_cmu_arctic_sequential_sampler(): + """ + Test CmuArcticDataset with SequentialSampler + """ + logger.info("Test CmuArcticDataset Op with SequentialSampler") + num_samples = 50 + sampler = ds.SequentialSampler(num_samples=num_samples) + data1 = ds.CmuArcticDataset(DATA_DIR, sampler=sampler) + data2 = ds.CmuArcticDataset(DATA_DIR, shuffle=False, num_samples=num_samples) + label_list1, label_list2 = [], [] + num_iter = 0 + for item1, item2 in zip(data1.create_dict_iterator( output_numpy=True,num_epochs=1), data2.create_dict_iterator( output_numpy=True,num_epochs=1)): + label_list1.append(item1["utterance"]) + label_list2.append(item2["utterance"]) + num_iter += 1 + np.testing.assert_array_equal(label_list1, label_list2) + assert num_iter == num_samples + + +def test_cmu_arctic_exception(): + """ + Test error cases for CmuArcticDataset + """ + logger.info("Test error cases for CmuArcticDataset") + error_msg_1 = "sampler and shuffle cannot be specified at the same time" + with pytest.raises(RuntimeError, match=error_msg_1): + ds.CmuArcticDataset(DATA_DIR, shuffle=False, sampler=ds.PKSampler(3)) + + error_msg_2 = "sampler and sharding cannot be specified at the same time" + with pytest.raises(RuntimeError, match=error_msg_2): + ds.CmuArcticDataset(DATA_DIR, sampler=ds.PKSampler(3), num_shards=2, shard_id=0) + + error_msg_3 = "num_shards is specified and currently requires shard_id as well" + with pytest.raises(RuntimeError, match=error_msg_3): + ds.CmuArcticDataset(DATA_DIR, num_shards=10) + + error_msg_4 = "shard_id is specified but num_shards is not" + with pytest.raises(RuntimeError, match=error_msg_4): + ds.CmuArcticDataset(DATA_DIR, shard_id=0) + + error_msg_5 = "Input shard_id is not within the required interval" + with pytest.raises(ValueError, match=error_msg_5): + ds.CmuArcticDataset(DATA_DIR, num_shards=5, shard_id=-1) + with pytest.raises(ValueError, match=error_msg_5): + ds.CmuArcticDataset(DATA_DIR, num_shards=5, shard_id=5) + with pytest.raises(ValueError, match=error_msg_5): + ds.CmuArcticDataset(DATA_DIR, num_shards=2, shard_id=5) + + error_msg_6 = "num_parallel_workers exceeds" + with pytest.raises(ValueError, match=error_msg_6): + ds.CmuArcticDataset(DATA_DIR, shuffle=False, num_parallel_workers=0) + with pytest.raises(ValueError, match=error_msg_6): + ds.CmuArcticDataset(DATA_DIR, shuffle=False, num_parallel_workers=256) + with pytest.raises(ValueError, match=error_msg_6): + ds.CmuArcticDataset(DATA_DIR, shuffle=False, num_parallel_workers=-2) + + error_msg_7 = "Argument shard_id" + with pytest.raises(TypeError, match=error_msg_7): + ds.CmuArcticDataset(DATA_DIR, num_shards=2, shard_id="0") + + def exception_func(item): + raise Exception("Error occur!") + + error_msg_8 = "The corresponding data files" + with pytest.raises(RuntimeError, match=error_msg_8): + data = ds.CmuArcticDataset(DATA_DIR) + data = data.map(operations=exception_func, input_columns=["waveform"], num_parallel_workers=1) + for _ in data.__iter__(): + pass + with pytest.raises(RuntimeError, match=error_msg_8): + data = ds.CmuArcticDataset(DATA_DIR) + data = data.map(operations=vision.Decode(), input_columns=["waveform"], num_parallel_workers=1) + data = data.map(operations=exception_func, input_columns=["waveform"], num_parallel_workers=1) + for _ in data.__iter__(): + pass + with pytest.raises(RuntimeError, match=error_msg_8): + data = ds.CmuArcticDataset(DATA_DIR) + data = data.map(operations=exception_func, input_columns=["waveform"], num_parallel_workers=1) + for _ in data.__iter__(): + pass + + +def test_cmu_arctic_visualize(plot=False): + """ + Visualize CmuArcticDataset results + """ + logger.info("Test CmuArcticDataset visualization") + + data1 = ds.CmuArcticDataset(DATA_DIR, num_samples=10, shuffle=False) + num_iter = 0 + for item in data1.create_dict_iterator( num_epochs=1, output_numpy=True): + audio = item["waveform"] + sample_rate = item["sample_rate"] + assert isinstance(audio, np.ndarray) + assert audio.dtype == np.float64 + assert sample_rate.dtype == np.uint32 + num_iter += 1 + assert num_iter == 10 + + +def test_cmu_arctic_usage(): + """ + Validate CmuArcticDataset audio readings + """ + logger.info("Test CmuArcticDataset usage flag") + + def test_config(usage, cmu_arctic_path=None): + cmu_arctic_path = DATA_DIR if cmu_arctic_path is None else cmu_arctic_path + try: + data = ds.CmuArcticDataset(cmu_arctic_path, usage=usage, shuffle=False) + num_rows = 0 + for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True): + num_rows += 1 + except (ValueError, TypeError, RuntimeError) as e: + return str(e) + return num_rows + + assert test_config("aew") == 1132 + assert test_config("ahw") == 593 + assert "Input usage is not within the valid set of ['aew', 'ahw', 'aup', 'awb', 'axb', 'bdl', 'clb', 'eey', 'fem', 'gka', 'jmk', 'ksp', 'ljm', 'lnh', 'rms', 'rxr', 'slp', 'slt']." in test_config("invalid") + assert "Argument usage with value ['list'] is not of type []" in test_config(["list"]) + + all_files_path = None + if all_files_path is not None: + assert test_config("aew", all_files_path) == 1132 + assert test_config("ahw", all_files_path) == 593 + assert ds.cmu_arcticDataset(all_files_path, usage="aew").get_dataset_size() == 1132 + assert ds.cmu_arcticDataset(all_files_path, usage="ahw").get_dataset_size() == 593 + + +if __name__ == '__main__': + test_cmuarctic_basic() + test_cmu_arctic_sequential_sampler() + test_cmu_arctic_exception() + test_cmu_arctic_visualize(plot=True) + test_cmu_arctic_usage() diff --git a/tests/ut/python/dataset/test_datasets_sbd.py b/tests/ut/python/dataset/test_datasets_sbd.py index db7c3b9fd05..3801cfa669b 100644 --- a/tests/ut/python/dataset/test_datasets_sbd.py +++ b/tests/ut/python/dataset/test_datasets_sbd.py @@ -22,6 +22,7 @@ import mindspore.dataset as ds from mindspore import log as logger import mindspore.dataset.vision.c_transforms as c_vision + DATASET_DIR = "../data/dataset/testSBData/sbd" @@ -192,7 +193,6 @@ def test_sbd_usage(): """ Validate SBDataset image readings """ - def test_config(usage): try: data = ds.SBDataset(DATASET_DIR, task='Segmentation', usage=usage) diff --git a/tests/ut/python/dataset/test_schema.py b/tests/ut/python/dataset/test_schema.py index f31400dffe5..84ff09f498e 100644 --- a/tests/ut/python/dataset/test_schema.py +++ b/tests/ut/python/dataset/test_schema.py @@ -48,7 +48,7 @@ def test_schema_exception(): with pytest.raises(TypeError) as info: ds.Schema(1) - assert "Argument schema_file with value 1 is not of type []" in str(info.value) + assert "path: 1 is not string" in str(info.value) with pytest.raises(RuntimeError) as info: schema = ds.Schema(SCHEMA_FILE) diff --git a/tests/ut/python/dataset/test_serdes_dataset.py b/tests/ut/python/dataset/test_serdes_dataset.py index ef69671d250..a6a1fcee4ea 100644 --- a/tests/ut/python/dataset/test_serdes_dataset.py +++ b/tests/ut/python/dataset/test_serdes_dataset.py @@ -59,7 +59,8 @@ def test_serdes_imagefolder_dataset(remove_json_files=True): resize_op = vision.Resize((resize_height, resize_width), Inter.LINEAR) data1 = data1.map(operations=[rescale_op, resize_op], input_columns=["image"]) - data1 = data1.batch(2) + data1_1 = ds.TFRecordDataset(["../data/dataset/testTFTestAllTypes/test.data"], num_samples=6).batch(2).repeat(10) + data1 = data1.zip(data1_1) # Serialize the dataset pre-processing pipeline. # data1 should still work after saving. @@ -78,6 +79,7 @@ def test_serdes_imagefolder_dataset(remove_json_files=True): ds.serialize(data2, "imagenet_dataset_pipeline_1.json") assert validate_jsonfile("imagenet_dataset_pipeline_1.json") is True assert filecmp.cmp('imagenet_dataset_pipeline.json', 'imagenet_dataset_pipeline_1.json') + assert data1.get_dataset_size() == data2.get_dataset_size() # Deserialize the latest json file again data3 = ds.deserialize(json_filepath="imagenet_dataset_pipeline_1.json") @@ -97,7 +99,7 @@ def test_serdes_imagefolder_dataset(remove_json_files=True): num_samples += 1 logger.info("Number of data in data1: {}".format(num_samples)) - assert num_samples == 6 + assert num_samples == 11 # Remove the generated json file if remove_json_files: @@ -169,8 +171,8 @@ def test_serdes_cifar10_dataset(remove_json_files=True): data1 = data1.map(operations=trans, input_columns="image") data1 = data1.batch(3, drop_remainder=True) data1 = data1.repeat(1) - data2 = util_check_serialize_deserialize_file(data1, "cifar10_dataset_pipeline", remove_json_files) - + # json files are needed for create iterator, remove_json_files = False + data2 = util_check_serialize_deserialize_file(data1, "cifar10_dataset_pipeline", False) num_samples = 0 # Iterate and compare the data in the original pipeline (data1) against the deserialized pipeline (data2) for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), @@ -183,6 +185,8 @@ def test_serdes_cifar10_dataset(remove_json_files=True): # Restore configuration num_parallel_workers ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers) + if remove_json_files: + delete_json_files() def test_serdes_celeba_dataset(remove_json_files=True): @@ -196,7 +200,8 @@ def test_serdes_celeba_dataset(remove_json_files=True): center_crop = vision.CenterCrop((80, 80)) pad_op = vision.Pad(20, fill_value=(20, 20, 20)) data1 = data1.map(operations=[center_crop, pad_op], input_columns=["image"], num_parallel_workers=8) - data2 = util_check_serialize_deserialize_file(data1, "celeba_dataset_pipeline", remove_json_files) + # json files are needed for create iterator, remove_json_files = False + data2 = util_check_serialize_deserialize_file(data1, "celeba_dataset_pipeline", False) num_samples = 0 # Iterate and compare the data in the original pipeline (data1) against the deserialized pipeline (data2) @@ -206,6 +211,8 @@ def test_serdes_celeba_dataset(remove_json_files=True): num_samples += 1 assert num_samples == 8 + if remove_json_files: + delete_json_files() def test_serdes_csv_dataset(remove_json_files=True): @@ -220,7 +227,8 @@ def test_serdes_csv_dataset(remove_json_files=True): shuffle=False) columns = ["col1", "col4", "col2"] data1 = data1.project(columns=columns) - data2 = util_check_serialize_deserialize_file(data1, "csv_dataset_pipeline", remove_json_files) + # json files are needed for create iterator, remove_json_files = False + data2 = util_check_serialize_deserialize_file(data1, "csv_dataset_pipeline", False) num_samples = 0 # Iterate and compare the data in the original pipeline (data1) against the deserialized pipeline (data2) @@ -232,6 +240,8 @@ def test_serdes_csv_dataset(remove_json_files=True): num_samples += 1 assert num_samples == 3 + if remove_json_files: + delete_json_files() def test_serdes_voc_dataset(remove_json_files=True): @@ -251,7 +261,8 @@ def test_serdes_voc_dataset(remove_json_files=True): data1 = data1.map(operations=random_color_adjust_op, input_columns=["image"]) data1 = data1.map(operations=random_rotation_op, input_columns=["image"]) data1 = data1.skip(2) - data2 = util_check_serialize_deserialize_file(data1, "voc_dataset_pipeline", remove_json_files) + # json files are needed for create iterator, remove_json_files = False + data2 = util_check_serialize_deserialize_file(data1, "voc_dataset_pipeline", False) num_samples = 0 # Iterate and compare the data in the original pipeline (data1) against the deserialized pipeline (data2) @@ -265,6 +276,8 @@ def test_serdes_voc_dataset(remove_json_files=True): # Restore configuration num_parallel_workers ds.config.set_seed(original_seed) ds.config.set_num_parallel_workers(original_num_parallel_workers) + if remove_json_files: + delete_json_files() def test_serdes_zip_dataset(remove_json_files=True): @@ -380,8 +393,8 @@ def test_serdes_pyvision(remove_json_files=True): try: util_check_serialize_deserialize_file(data1, "pyvision_dataset_pipeline", remove_json_files) assert False - except NotImplementedError as e: - assert "python function is not yet supported" in str(e) + except RuntimeError as e: + assert "python operation is not yet supported" in str(e) def test_serdes_uniform_augment(remove_json_files=True): @@ -420,7 +433,6 @@ def skip_test_serdes_fill(remove_json_files=True): for data_row in data: np.testing.assert_array_equal(data_row[0].asnumpy(), expected) - # FIXME - need proper serdes support for Fill's fill_value parameter util_check_serialize_deserialize_file(data, "fill_pipeline", remove_json_files) @@ -434,8 +446,10 @@ def test_serdes_exception(): data1 = data1.filter(input_columns=["image", "label"], predicate=lambda data: data < 11, num_parallel_workers=4) data1_json = ds.serialize(data1) with pytest.raises(RuntimeError) as msg: - ds.deserialize(input_dict=data1_json) - assert "Filter is not yet supported by ds.engine.deserialize" in str(msg) + data2 = ds.deserialize(input_dict=data1_json) + ds.serialize(data2, "filter_dataset_fail.json") + assert "Filter operation is not supported" in str(msg) + delete_json_files() def util_check_serialize_deserialize_file(data_orig, filename, remove_json_files): @@ -456,7 +470,7 @@ def util_check_serialize_deserialize_file(data_orig, filename, remove_json_files data_changed = ds.deserialize(json_filepath=file1) ds.serialize(data_changed, file2) assert validate_jsonfile(file2) is True - assert filecmp.cmp(file1, file2) + assert filecmp.cmp(file1, file2, shallow=False) # Remove the generated json file if remove_json_files: diff --git a/tests/ut/python/dataset/test_skip.py b/tests/ut/python/dataset/test_skip.py index 187239895a1..a75e88e7bad 100644 --- a/tests/ut/python/dataset/test_skip.py +++ b/tests/ut/python/dataset/test_skip.py @@ -17,7 +17,6 @@ import pytest import mindspore.dataset as ds import mindspore.dataset.vision.c_transforms as vision -from mindspore import log as logger DATA_DIR_TF2 = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"] @@ -208,9 +207,8 @@ def test_skip_exception_1(): for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 - except RuntimeError as e: - logger.info("Got an exception in DE: {}".format(str(e))) - assert "skip_count should not be negative, skip_count: -1" in str(e) + except ValueError as e: + assert "Input count is not within the required interval" in str(e) def test_skip_exception_2(): diff --git a/tests/ut/python/dataset/test_slice_patches.py b/tests/ut/python/dataset/test_slice_patches.py index 9a681a3be5d..159d994a812 100644 --- a/tests/ut/python/dataset/test_slice_patches.py +++ b/tests/ut/python/dataset/test_slice_patches.py @@ -140,6 +140,54 @@ def test_slice_patches_exception_01(): logger.info("Got an exception in SlicePatches: {}".format(str(e))) assert "Input fill_value is not within" in str(e) +def test_slice_patches_06(): + image = np.random.randint(0, 255, (158, 126, 1)).astype(np.int32) + slice_patches_op = c_vision.SlicePatches(2, 8) + patches = slice_patches_op(image) + assert len(patches) == 16 + assert patches[0].shape == (79, 16, 1) + +def test_slice_patches_07(): + image = np.random.randint(0, 255, (158, 126)).astype(np.int32) + slice_patches_op = c_vision.SlicePatches(2, 8) + patches = slice_patches_op(image) + assert len(patches) == 16 + assert patches[0].shape == (79, 16) + +def test_slice_patches_08(): + np_data = np.random.randint(0, 255, (1, 56, 82, 256)).astype(np.uint8) + dataset = ds.NumpySlicesDataset(np_data, column_names=["image"]) + slice_patches_op = c_vision.SlicePatches(2, 2) + dataset = dataset.map(input_columns=["image"], output_columns=["img0", "img1", "img2", "img3"], + column_order=["img0", "img1", "img2", "img3"], + operations=slice_patches_op) + for item in dataset.create_dict_iterator(output_numpy=True): + patch_shape = item['img0'].shape + assert patch_shape == (28, 41, 256) + +def test_slice_patches_09(): + image = np.random.randint(0, 255, (56, 82, 256)).astype(np.uint8) + slice_patches_op = c_vision.SlicePatches(4, 3, mode.SliceMode.PAD) + patches = slice_patches_op(image) + assert len(patches) == 12 + assert patches[0].shape == (14, 28, 256) + +def skip_test_slice_patches_10(): + image = np.random.randint(0, 255, (7000, 7000, 255)).astype(np.uint8) + slice_patches_op = c_vision.SlicePatches(10, 13, mode.SliceMode.DROP) + patches = slice_patches_op(image) + assert patches[0].shape == (700, 538, 255) + +def skip_test_slice_patches_11(): + np_data = np.random.randint(0, 255, (1, 7000, 7000, 256)).astype(np.uint8) + dataset = ds.NumpySlicesDataset(np_data, column_names=["image"]) + slice_patches_op = c_vision.SlicePatches(10, 13, mode.SliceMode.DROP) + cols = ['img' + str(x) for x in range(10*13)] + dataset = dataset.map(input_columns=["image"], output_columns=cols, + column_order=cols, operations=slice_patches_op) + for item in dataset.create_dict_iterator(output_numpy=True): + patch_shape = item['img0'].shape + assert patch_shape == (700, 538, 256) def slice_patches(image, num_h, num_w, pad_or_drop, fill_value): """ help function which slice patches with numpy """ @@ -174,4 +222,8 @@ if __name__ == "__main__": test_slice_patches_03(plot=True) test_slice_patches_04(plot=True) test_slice_patches_05(plot=True) + test_slice_patches_06() + test_slice_patches_07() + test_slice_patches_08() + test_slice_patches_09() test_slice_patches_exception_01() diff --git a/tests/ut/python/dataset/test_take.py b/tests/ut/python/dataset/test_take.py index 3754aba0f87..96c79ef9c87 100644 --- a/tests/ut/python/dataset/test_take.py +++ b/tests/ut/python/dataset/test_take.py @@ -351,7 +351,7 @@ def test_take_19(): data1 = data1.batch(2) data1 = data1.take(0) - assert "positive integer" in str(info.value) + assert "within the required interval" in str(info.value) if __name__ == '__main__': test_take_01() diff --git a/tests/ut/python/dataset/test_time_stretch.py b/tests/ut/python/dataset/test_time_stretch.py index 577c40ebdbf..52a796c7ad6 100644 --- a/tests/ut/python/dataset/test_time_stretch.py +++ b/tests/ut/python/dataset/test_time_stretch.py @@ -31,24 +31,27 @@ COMPLEX = 2 def gen(shape): np.random.seed(0) data = np.random.random(shape) - yield (np.array(data, dtype=np.float32),) + yield(np.array(data, dtype=np.float32),) -def count_unequal_element(data_expected, data_me, rtol, atol): +def _count_unequal_element(data_expected, data_me, rtol, atol): assert data_expected.shape == data_me.shape total_count = len(data_expected.flatten()) error = np.abs(data_expected - data_me) greater = np.greater(error, atol + np.abs(data_expected) * rtol) loss_count = np.count_nonzero(greater) - assert (loss_count / total_count) < rtol, "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}".format( - data_expected[greater], data_me[greater], error[greater]) + assert (loss_count / total_count) < rtol, \ + "\ndata_expected_std:{0}\ndata_me_error:{1}\nloss:{2}". \ + format(data_expected[greater], data_me[greater], error[greater]) def allclose_nparray(data_expected, data_me, rtol, atol, equal_nan=True): if np.any(np.isnan(data_expected)): assert np.allclose(data_me, data_expected, rtol, atol, equal_nan=equal_nan) elif not np.allclose(data_me, data_expected, rtol, atol, equal_nan=equal_nan): - count_unequal_element(data_expected, data_me, rtol, atol) + _count_unequal_element(data_expected, data_me, rtol, atol) + else: + assert True def test_time_stretch_pipeline(): @@ -57,14 +60,18 @@ def test_time_stretch_pipeline(): """ logger.info("test TimeStretch op") generator = gen([CHANNEL_NUM, FREQ, FRAME_NUM, COMPLEX]) - data1 = ds.GeneratorDataset(source=generator, column_names=["multi_dimensional_data"]) + data1 = ds.GeneratorDataset(source=generator, column_names=[ + "multi_dimensional_data"]) - transforms = [c_audio.TimeStretch(512, FREQ, 1.3)] - data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"]) + transforms = [ + c_audio.TimeStretch(512, FREQ, 1.3) + ] + data1 = data1.map(operations=transforms, input_columns=[ + "multi_dimensional_data"]) for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): out_put = item["multi_dimensional_data"] - assert out_put.shape == (CHANNEL_NUM, FREQ, np.ceil(FRAME_NUM / 1.3), COMPLEX) + assert out_put.shape == (CHANNEL_NUM, FREQ, np.ceil(FRAME_NUM/1.3), COMPLEX) def test_time_stretch_pipeline_invalid_param(): @@ -73,15 +80,19 @@ def test_time_stretch_pipeline_invalid_param(): """ logger.info("test TimeStretch op with invalid values") generator = gen([CHANNEL_NUM, FREQ, FRAME_NUM, COMPLEX]) - data1 = ds.GeneratorDataset(source=generator, column_names=["multi_dimensional_data"]) + data1 = ds.GeneratorDataset(source=generator, column_names=[ + "multi_dimensional_data"]) with pytest.raises(ValueError, match=r"Input fixed_rate is not within the required interval of \(0, 16777216\]."): - transforms = [c_audio.TimeStretch(512, FREQ, -1.3)] - data1 = data1.map(operations=transforms, input_columns=["multi_dimensional_data"]) + transforms = [ + c_audio.TimeStretch(512, FREQ, -1.3) + ] + data1 = data1.map(operations=transforms, input_columns=[ + "multi_dimensional_data"]) for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): out_put = item["multi_dimensional_data"] - assert out_put.shape == (CHANNEL_NUM, FREQ, np.ceil(FRAME_NUM / 1.3), COMPLEX) + assert out_put.shape == (CHANNEL_NUM, FREQ, np.ceil(FRAME_NUM/1.3), COMPLEX) def test_time_stretch_eager(): @@ -91,7 +102,7 @@ def test_time_stretch_eager(): logger.info("test TimeStretch op with customized parameter values") spectrogram = next(gen([CHANNEL_NUM, FREQ, FRAME_NUM, COMPLEX]))[0] out_put = c_audio.TimeStretch(512, FREQ, 1.3)(spectrogram) - assert out_put.shape == (CHANNEL_NUM, FREQ, np.ceil(FRAME_NUM / 1.3), COMPLEX) + assert out_put.shape == (CHANNEL_NUM, FREQ, np.ceil(FRAME_NUM/1.3), COMPLEX) def test_percision_time_stretch_eager(): diff --git a/tests/ut/python/exec/test_train_with_lars.py b/tests/ut/python/exec/test_train_with_lars.py index 04087cb0f0a..beec5d21b90 100644 --- a/tests/ut/python/exec/test_train_with_lars.py +++ b/tests/ut/python/exec/test_train_with_lars.py @@ -20,7 +20,6 @@ from mindspore.common.parameter import ParameterTuple, Parameter from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits from mindspore.nn.optim import Momentum from mindspore.ops import composite as C -from mindspore.ops import functional as F from mindspore.ops import operations as P @@ -67,10 +66,11 @@ class TrainOneStepWithLarsCell(nn.Cell): bias_grads = grads[self.slice_index: self.params_len] lars_grads = self.lars(non_bias_weights, non_bias_grads, self.weight_decay) new_grads = lars_grads + bias_grads - return F.depend(loss, self.optimizer(new_grads)) + self.optimizer(new_grads) + return loss -# fn is a funcation use i as input +# fn is a function use i as input def lr_gen(fn, epoch_size): for i in range(epoch_size): yield fn(i) diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py index be3c5f16432..b21e85500bc 100755 --- a/tests/ut/python/ops/test_ops.py +++ b/tests/ut/python/ops/test_ops.py @@ -2119,6 +2119,11 @@ test_case_nn_ops = [ 'block': P.L2Loss(), 'desc_inputs': [Tensor(np.array([[1, 1], [2, 2], [3, 3], [4, 4]]), mstype.float16)], 'desc_bprop': []}), + ('SoftMarginLoss', { + 'block': P.SoftMarginLoss(reduction="none"), + 'desc_inputs': [Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]).astype(np.float32)), + Tensor(np.array([[-1, 1], [1, -1]]).astype(np.float32))], + 'desc_bprop': [Tensor(np.array([[1, 1], [1, 1]]).astype(np.float32))]}), ('BCEWithLogitsLoss', { 'block': P.BCEWithLogitsLoss(), 'desc_inputs': [[3, 3], [3, 3], [3, 3], [3, 3]], @@ -2204,6 +2209,16 @@ test_case_nn_ops = [ 'desc_inputs': [Tensor(np.array([[-4, 4, 1]]), mstype.float32)], 'desc_bprop': [Tensor(np.array([[0, 1, 0.6666]]), mstype.float32)], 'skip': ['backward']}), + ('HardShrink', { + 'block': P.HShrink(), + 'desc_inputs': [Tensor(np.array([[0.5, 1, 2.0], [0.0533, 0.0776, -2.1233]]), mstype.float32)], + 'desc_bprop': [], + 'skip': ['backward']}), + ('HShrinkGrad', { + 'block': G.HShrinkGrad(), + 'desc_inputs': [Tensor(np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]), mstype.float16), + Tensor(np.array([[-4, -3, -2], [1, 2, 4]]), mstype.float16)], + 'skip': ['backward']}), ] test_case_array_ops = [ diff --git a/tests/ut/python/optimizer/test_auto_grad.py b/tests/ut/python/optimizer/test_auto_grad.py index 3314472176a..ca5e7a85f00 100644 --- a/tests/ut/python/optimizer/test_auto_grad.py +++ b/tests/ut/python/optimizer/test_auto_grad.py @@ -252,3 +252,112 @@ def test_limit_lift_fv_scope(): grad_net = GradNet(net) grad_net.add_flags_recursive(defer_inline=True) grad_net(x, y) + + +def test_same_primal_used_by_multi_j(): + class Net(nn.Cell): + def __init__(self): + super(Net, self).__init__() + + def construct(self, x): + return x + + class GradNet(nn.Cell): + def __init__(self, net): + super(GradNet, self).__init__() + self.net = net + self.grad = ops.GradOperation() + + def construct(self, x): + out = self.net(x) + gout = self.grad(self.net)(x) + gout1 = self.grad(self.net)(x) + return out, gout, gout1 + + x = Tensor(np.array([1.0], dtype=np.float32)) + net = Net() + grad = GradNet(net) + grad(x) + + +def test_same_primal_used_by_multi_j_with_monad1(): + class AdamNet(nn.Cell): + def __init__(self, var, m, v): + super(AdamNet, self).__init__() + self.apply_adam = P.Adam() + self.var = Parameter(var, name="var") + self.m = Parameter(m, name="m") + self.v = Parameter(v, name="v") + + def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): + self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad) + return self.var + + class AdamGradNet(nn.Cell): + def __init__(self, network): + super(AdamGradNet, self).__init__() + self.grad_fn = ops.GradOperation(sens_param=True) + self.sens = [Tensor(np.ones([3, 3, 3]).astype(np.float32)), Tensor(np.ones([3, 3, 3]).astype(np.float32))] + self.network = network + + def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): + out = self.network(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad) + gout1 = self.grad_fn(self.network)(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self.sens[0]) + gout2 = self.grad_fn(self.network)(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self.sens[1]) + return out, gout1, gout2 + + var = Tensor(np.ones([3, 3, 3]).astype(np.float32)) + m = Tensor(np.ones([3, 3, 3]).astype(np.float32)) + v = Tensor(np.ones([3, 3, 3]).astype(np.float32)) + beta1_power = Tensor(np.array([0.9], dtype=np.float32)) + beta2_power = Tensor(np.array([0.999], dtype=np.float32)) + lr = Tensor(np.array([0.001], dtype=np.float32)) + beta1 = Tensor(np.array([0.9], dtype=np.float32)) + beta2 = Tensor(np.array([0.999], dtype=np.float32)) + epsilon = Tensor(np.array([1e-8], dtype=np.float32)) + grad = Tensor(np.random.rand(3, 3, 3).astype(np.float32)) + net = AdamNet(var, m, v) + grad_net = AdamGradNet(net) + grad_net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad) + + +def test_same_primal_used_by_multi_j_with_monad2(): + class AdamNet(nn.Cell): + def __init__(self, var, m, v): + super(AdamNet, self).__init__() + self.apply_adam = P.Adam() + self.var = Parameter(var, name="var") + self.m = Parameter(m, name="m") + self.v = Parameter(v, name="v") + + def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): + self.apply_adam(self.var, self.m, self.v, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad) + return self.var + + class AdamGradNet(nn.Cell): + def __init__(self, network): + super(AdamGradNet, self).__init__() + self.grad = ops.GradOperation(sens_param=True) + self.sens = [Tensor(np.ones([3, 3, 3]).astype(np.float32)), Tensor(np.ones([3, 3, 3]).astype(np.float32))] + self.network = network + + def construct(self, beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad): + out = self.network(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad) + grad_fn = self.grad(self.network) + gout1 = grad_fn(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self.sens[0]) + gout2 = grad_fn(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad, self.sens[1]) + return out, gout1, gout2 + + var = Tensor(np.ones([3, 3, 3]).astype(np.float32)) + m = Tensor(np.ones([3, 3, 3]).astype(np.float32)) + v = Tensor(np.ones([3, 3, 3]).astype(np.float32)) + beta1_power = Tensor(np.array([0.9], dtype=np.float32)) + beta2_power = Tensor(np.array([0.999], dtype=np.float32)) + lr = Tensor(np.array([0.001], dtype=np.float32)) + beta1 = Tensor(np.array([0.9], dtype=np.float32)) + beta2 = Tensor(np.array([0.999], dtype=np.float32)) + epsilon = Tensor(np.array([1e-8], dtype=np.float32)) + grad = Tensor(np.random.rand(3, 3, 3).astype(np.float32)) + net = AdamNet(var, m, v) + grad_net = AdamGradNet(net) + grad_net(beta1_power, beta2_power, lr, beta1, beta2, epsilon, grad) diff --git a/tests/ut/python/parallel/test_conv2d.py b/tests/ut/python/parallel/test_conv2d.py index 4309b707513..1ef971a0587 100644 --- a/tests/ut/python/parallel/test_conv2d.py +++ b/tests/ut/python/parallel/test_conv2d.py @@ -39,6 +39,8 @@ class Net(Cell): _x = Tensor(np.ones([32, 16, 8, 8]), dtype=ms.float32) _w1 = Tensor(np.ones([8, 16, 2, 2]), dtype=ms.float32) +_w2 = Tensor(np.ones([8, 16, 3, 3]), dtype=ms.float32) +_w3 = Tensor(np.ones([8, 16, 5, 5]), dtype=ms.float32) _b = Tensor(np.ones([32, 16, 8, 8]), dtype=ms.float32) @@ -75,6 +77,31 @@ def test_conv2d_model_parallel2(): compile_net(net) +def test_conv2d_model_parallel3(): + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0) + strategy1 = ((2, 1, 1, 4), (1, 1, 1, 1)) + strategy2 = ((2, 1, 1, 4),) + net = Net(_w2, out_channel=8, kernel_size=3, pad_mode="same", stride=1, strategy1=strategy1, strategy2=strategy2) + compile_net(net) + + +def test_conv2d_model_parallel4(): + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=32, global_rank=0) + strategy1 = ((2, 2, 1, 4), (2, 2, 1, 1)) + strategy2 = ((2, 2, 1, 4),) + net = Net(_w2, out_channel=8, kernel_size=3, pad_mode="same", stride=1, strategy1=strategy1, strategy2=strategy2) + compile_net(net) + + +def test_conv2d_left_and_right_no_need_to_send(): + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0) + strategy1 = ((2, 1, 1, 4), (1, 1, 1, 1)) + strategy2 = ((2, 1, 1, 4),) + net = Net(_w2, out_channel=8, kernel_size=3, pad_mode="same", stride=2, strategy1=strategy1, strategy2=strategy2) + with pytest.raises(RuntimeError): + compile_net(net) + + def test_conv2d_output_can_not_divisible_by_strategy(): context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0) strategy1 = ((1, 1, 1, 8), (1, 1, 1, 1)) diff --git a/tests/ut/python/parallel/test_conv2d_transpose.py b/tests/ut/python/parallel/test_conv2d_transpose.py index e5cc5d12027..46b65a2ea86 100644 --- a/tests/ut/python/parallel/test_conv2d_transpose.py +++ b/tests/ut/python/parallel/test_conv2d_transpose.py @@ -36,8 +36,24 @@ class Net(Cell): return out +class Net2(Cell): + def __init__(self, conv2d_weight, out_channel, kernel_size, pad_mode, stride, + strategy1=None, strategy2=None): + super().__init__() + self.conv2d_transpose = P.Conv2DTranspose(out_channel=out_channel, kernel_size=kernel_size, + pad_mode=pad_mode, stride=stride).shard(strategy1) + self.neg = P.Neg().shard(strategy2) + self.weight = Parameter(conv2d_weight, "w1") + + def construct(self, x, b): + out = self.conv2d_transpose(x, self.weight, (32, 16, 16, 16)) + out = self.neg(out) + return out + + _x = Tensor(np.ones([32, 8, 8, 8]), dtype=ms.float32) _w1 = Tensor(np.ones([8, 16, 2, 2]), dtype=ms.float32) +_w2 = Tensor(np.ones([8, 16, 4, 4]), dtype=ms.float32) _b = Tensor(np.ones([32, 16, 8, 8]), dtype=ms.float32) @@ -64,3 +80,21 @@ def test_conv2d_transpose_model_parallel1(): strategy2 = ((8, 1, 1, 1),) net = Net(_w1, out_channel=8, kernel_size=2, pad_mode="same", stride=1, strategy1=strategy1, strategy2=strategy2) compile_net(net) + + +def test_conv2d_transpose_model_parallel2(): + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=8, global_rank=0) + strategy1 = ((2, 1, 1, 4), (1, 1, 1, 1)) + strategy2 = ((2, 1, 1, 4),) + net = Net2(_w2, out_channel=8, kernel_size=(4, 4), pad_mode="same", stride=2, + strategy1=strategy1, strategy2=strategy2) + compile_net(net) + + +def test_conv2d_transpose_model_parallel3(): + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0) + strategy1 = ((2, 2, 1, 4), (2, 1, 1, 1)) + strategy2 = ((2, 2, 1, 4),) + net = Net2(_w2, out_channel=8, kernel_size=(4, 4), pad_mode="same", stride=2, + strategy1=strategy1, strategy2=strategy2) + compile_net(net) diff --git a/tests/ut/python/parallel/test_dataset_interface.py b/tests/ut/python/parallel/test_dataset_interface.py index fbe8a7b0480..a662ff81567 100644 --- a/tests/ut/python/parallel/test_dataset_interface.py +++ b/tests/ut/python/parallel/test_dataset_interface.py @@ -21,7 +21,7 @@ from mindspore import context from mindspore.common.parameter import Parameter, ParameterTuple from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits from mindspore.nn.optim.momentum import Momentum -from mindspore.ops import composite as C, functional as F, operations as P +from mindspore.ops import composite as C, operations as P from mindspore.train import Model from mindspore.context import ParallelMode from mindspore.train.loss_scale_manager import DynamicLossScaleManager @@ -114,7 +114,8 @@ class TrainOneStepCell(nn.Cell): weights = self.weights loss = self.network(data) grads = self.grad(self.network, weights)(data, sens) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss def loss_scale_manager_sens(strategy1, sens): diff --git a/tests/ut/python/parallel/test_gather_v2_primitive.py b/tests/ut/python/parallel/test_gather_v2_primitive.py index ab6a2a6283b..d307fb7a57e 100644 --- a/tests/ut/python/parallel/test_gather_v2_primitive.py +++ b/tests/ut/python/parallel/test_gather_v2_primitive.py @@ -25,7 +25,6 @@ from mindspore.nn import Dense, Cell from mindspore.nn.loss.loss import LossBase from mindspore.nn.optim import Momentum from mindspore.ops import composite as C -from mindspore.ops import functional as F from mindspore.ops import operations as P from mindspore.train import Model from mindspore.context import ParallelMode @@ -121,7 +120,8 @@ class TrainOneStepCell(Cell): sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) grads = self.grad(self.network, weights)(data, sens) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss def net_trains(criterion, rank): diff --git a/tests/ut/python/parallel/test_gatherd.py b/tests/ut/python/parallel/test_gatherd.py index 2ee2a9c7964..abdcdd69391 100644 --- a/tests/ut/python/parallel/test_gatherd.py +++ b/tests/ut/python/parallel/test_gatherd.py @@ -65,6 +65,14 @@ def test_gathernd_dim2(): compile_net(net) +def test_gathernd_dim2_default_batch_parallel(): + context.set_auto_parallel_context(parallel_mode="semi_auto_parallel", device_num=16, global_rank=0) + strategy1 = None + strategy2 = ((2, 8, 1),) + net = Net(2, _w1, strategy1, strategy2) + compile_net(net) + + def test_gathernd_auto_parallel(): context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=16, global_rank=0) net = Net(1, _w1) diff --git a/tests/ut/python/parallel/test_loss_scale.py b/tests/ut/python/parallel/test_loss_scale.py index c707e1bedf4..ebf10b68141 100644 --- a/tests/ut/python/parallel/test_loss_scale.py +++ b/tests/ut/python/parallel/test_loss_scale.py @@ -105,12 +105,9 @@ class TrainOneStepWithLossScaleCell(nn.Cell): overflow = cond if sens is None: overflow = self.loss_scaling_manager(self.loss_scale, cond) - if overflow: - succ = False - else: - succ = self.optimizer(grads) - ret = (loss, cond, scaling_sens) - return F.depend(ret, succ) + if not overflow: + self.optimizer(grads) + return (loss, cond, scaling_sens) class DatasetLenet(MindData): diff --git a/tests/ut/python/parallel/test_reshape.py b/tests/ut/python/parallel/test_reshape.py index 5db1eb409e2..9f1b81b057b 100644 --- a/tests/ut/python/parallel/test_reshape.py +++ b/tests/ut/python/parallel/test_reshape.py @@ -24,7 +24,6 @@ from mindspore.common.parameter import ParameterTuple from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits from mindspore.nn.optim.momentum import Momentum from mindspore.ops import composite as C -from mindspore.ops import functional as F from mindspore.ops import operations as P from mindspore.nn.wrap.cell_wrapper import _VirtualDatasetCell from mindspore.parallel import set_algo_parameters @@ -419,7 +418,8 @@ class TrainOneStepCell(nn.Cell): sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) grads = self.grad(self.network, weights)(data, sens) - return F.depend(loss, self.optimizer(grads)) + self.optimizer(grads) + return loss def reshape_common2(parallel_mode, net): diff --git a/third_party/patch/icu4c/icu4c.patch01 b/third_party/patch/icu4c/icu4c.patch01 index 4b002c024ae..19378ec36cc 100644 --- a/third_party/patch/icu4c/icu4c.patch01 +++ b/third_party/patch/icu4c/icu4c.patch01 @@ -5,8 +5,8 @@ THE_OS="Linux" THE_COMP="the clang or else GNU C++" - RELEASE_CFLAGS='-O3' -+ RELEASE_CFLAGS='-fstack-protector -D_FORTIFY_SOURCE=2 -O3 -Wl,-z,relro,-z,now' ++ RELEASE_CFLAGS='-fstack-protector -D_FORTIFY_SOURCE=2 -O3 -Wl,-z,relro,-z,now -s' - RELEASE_CXXFLAGS='-O3' -+ RELEASE_CXXFLAGS='-fstack-protector -D_FORTIFY_SOURCE=2 -O3 -Wl,-z,relro,-z,now' ++ RELEASE_CXXFLAGS='-fstack-protector -D_FORTIFY_SOURCE=2 -O3 -Wl,-z,relro,-z,now -s' DEBUG_CFLAGS='-g' DEBUG_CXXFLAGS='-g' diff --git a/third_party/patch/sqlite/sqlite.patch001 b/third_party/patch/sqlite/sqlite.patch001 index d40825a1488..bd3210dbaf7 100644 --- a/third_party/patch/sqlite/sqlite.patch001 +++ b/third_party/patch/sqlite/sqlite.patch001 @@ -1,6 +1,6 @@ -diff -Npur sqlite-version-3.32.2/src/expr.c sqlite-version-3.32.2-patched/src/expr.c ---- sqlite-version-3.32.2/src/expr.c 2020-06-04 08:58:43.000000000 -0400 -+++ sqlite-version-3.32.2-patched/src/expr.c 2021-04-29 04:06:04.544208700 -0400 +diff -Npur sqlite-version-3.32.2-new/src/expr.c sqlite-version-3.32.2/src/expr.c +--- sqlite-version-3.32.2-new/src/expr.c 2020-06-04 08:58:43.000000000 -0400 ++++ sqlite-version-3.32.2/src/expr.c 2021-08-04 11:57:45.029230992 -0400 @@ -3813,6 +3813,7 @@ expr_code_doover: AggInfo *pAggInfo = pExpr->pAggInfo; struct AggInfo_col *pCol; @@ -32,9 +32,9 @@ diff -Npur sqlite-version-3.32.2/src/expr.c sqlite-version-3.32.2-patched/src/ex int i; struct SrcCount *p = pWalker->u.pSrcCount; SrcList *pSrc = p->pSrc; -diff -Npur sqlite-version-3.32.2/src/global.c sqlite-version-3.32.2-patched/src/global.c ---- sqlite-version-3.32.2/src/global.c 2020-06-04 08:58:43.000000000 -0400 -+++ sqlite-version-3.32.2-patched/src/global.c 2021-04-29 04:06:04.544208700 -0400 +diff -Npur sqlite-version-3.32.2-new/src/global.c sqlite-version-3.32.2/src/global.c +--- sqlite-version-3.32.2-new/src/global.c 2020-06-04 08:58:43.000000000 -0400 ++++ sqlite-version-3.32.2/src/global.c 2021-08-04 11:57:45.033230992 -0400 @@ -300,6 +300,11 @@ sqlite3_uint64 sqlite3NProfileCnt = 0; int sqlite3PendingByte = 0x40000000; #endif @@ -47,9 +47,9 @@ diff -Npur sqlite-version-3.32.2/src/global.c sqlite-version-3.32.2-patched/src/ #include "opcodes.h" /* ** Properties of opcodes. The OPFLG_INITIALIZER macro is -diff -Npur sqlite-version-3.32.2/src/resolve.c sqlite-version-3.32.2-patched/src/resolve.c ---- sqlite-version-3.32.2/src/resolve.c 2020-06-04 08:58:43.000000000 -0400 -+++ sqlite-version-3.32.2-patched/src/resolve.c 2021-04-29 04:06:04.545208700 -0400 +diff -Npur sqlite-version-3.32.2-new/src/resolve.c sqlite-version-3.32.2/src/resolve.c +--- sqlite-version-3.32.2-new/src/resolve.c 2020-06-04 08:58:43.000000000 -0400 ++++ sqlite-version-3.32.2/src/resolve.c 2021-08-04 11:57:45.033230992 -0400 @@ -1715,6 +1715,14 @@ static int resolveSelectStep(Walker *pWa return WRC_Abort; } @@ -65,9 +65,9 @@ diff -Npur sqlite-version-3.32.2/src/resolve.c sqlite-version-3.32.2-patched/src } #endif -diff -Npur sqlite-version-3.32.2/src/select.c sqlite-version-3.32.2-patched/src/select.c ---- sqlite-version-3.32.2/src/select.c 2020-06-04 08:58:43.000000000 -0400 -+++ sqlite-version-3.32.2-patched/src/select.c 2021-04-29 04:07:21.458212191 -0400 +diff -Npur sqlite-version-3.32.2-new/src/select.c sqlite-version-3.32.2/src/select.c +--- sqlite-version-3.32.2-new/src/select.c 2020-06-04 08:58:43.000000000 -0400 ++++ sqlite-version-3.32.2/src/select.c 2021-08-04 12:27:34.737267443 -0400 @@ -15,20 +15,6 @@ #include "sqliteInt.h" @@ -89,7 +89,27 @@ diff -Npur sqlite-version-3.32.2/src/select.c sqlite-version-3.32.2-patched/src/ ** An instance of the following object is used to record information about ** how to process the DISTINCT keyword, to simplify passing that information ** into the selectInnerLoop() routine. -@@ -4426,11 +4412,14 @@ static int pushDownWhereTerms( +@@ -2717,9 +2703,7 @@ static int multiSelect( + selectOpName(p->op))); + rc = sqlite3Select(pParse, p, &uniondest); + testcase( rc!=SQLITE_OK ); +- /* Query flattening in sqlite3Select() might refill p->pOrderBy. +- ** Be sure to delete p->pOrderBy, therefore, to avoid a memory leak. */ +- sqlite3ExprListDelete(db, p->pOrderBy); ++ assert( p->pOrderBy==0 ); + pDelete = p->pPrior; + p->pPrior = pPrior; + p->pOrderBy = 0; +@@ -4105,7 +4089,7 @@ static int flattenSubquery( + ** We look at every expression in the outer query and every place we see + ** "a" we substitute "x*3" and every place we see "b" we substitute "y+10". + */ +- if( pSub->pOrderBy ){ ++ if( pSub->pOrderBy && (pParent->selFlags & SF_NoopOrderBy)==0 ){ + /* At this point, any non-zero iOrderByCol values indicate that the + ** ORDER BY column expression is identical to the iOrderByCol'th + ** expression returned by SELECT statement pSub. Since these values +@@ -4426,11 +4410,14 @@ static int pushDownWhereTerms( ){ Expr *pNew; int nChng = 0; @@ -105,7 +125,7 @@ diff -Npur sqlite-version-3.32.2/src/select.c sqlite-version-3.32.2-patched/src/ #endif #ifdef SQLITE_DEBUG -@@ -5553,7 +5542,9 @@ static void explainSimpleCount( +@@ -5553,7 +5540,9 @@ static void explainSimpleCount( static int havingToWhereExprCb(Walker *pWalker, Expr *pExpr){ if( pExpr->op!=TK_AND ){ Select *pS = pWalker->u.pSelect; @@ -116,7 +136,7 @@ diff -Npur sqlite-version-3.32.2/src/select.c sqlite-version-3.32.2-patched/src/ sqlite3 *db = pWalker->pParse->db; Expr *pNew = sqlite3Expr(db, TK_INTEGER, "1"); if( pNew ){ -@@ -5766,6 +5757,9 @@ int sqlite3Select( +@@ -5766,6 +5755,9 @@ int sqlite3Select( } if( sqlite3AuthCheck(pParse, SQLITE_SELECT, 0, 0, 0) ) return 1; memset(&sAggInfo, 0, sizeof(sAggInfo)); @@ -126,7 +146,15 @@ diff -Npur sqlite-version-3.32.2/src/select.c sqlite-version-3.32.2-patched/src/ #if SELECTTRACE_ENABLED SELECTTRACE(1,pParse,p, ("begin processing:\n", pParse->addrExplain)); if( sqlite3SelectTrace & 0x100 ){ -@@ -5804,19 +5798,6 @@ int sqlite3Select( +@@ -5787,6 +5779,7 @@ int sqlite3Select( + sqlite3ExprListDelete(db, p->pOrderBy); + p->pOrderBy = 0; + p->selFlags &= ~SF_Distinct; ++ p->selFlags |= SF_NoopOrderBy; + } + sqlite3SelectPrep(pParse, p, 0); + if( pParse->nErr || db->mallocFailed ){ +@@ -5804,19 +5797,6 @@ int sqlite3Select( generateColumnNames(pParse, p); } @@ -146,7 +174,7 @@ diff -Npur sqlite-version-3.32.2/src/select.c sqlite-version-3.32.2-patched/src/ pTabList = p->pSrc; isAgg = (p->selFlags & SF_Aggregate)!=0; memset(&sSort, 0, sizeof(sSort)); -@@ -6144,7 +6125,7 @@ int sqlite3Select( +@@ -6144,7 +6124,7 @@ int sqlite3Select( if( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct && sqlite3ExprListCompare(sSort.pOrderBy, pEList, -1)==0 #ifndef SQLITE_OMIT_WINDOWFUNC @@ -155,7 +183,7 @@ diff -Npur sqlite-version-3.32.2/src/select.c sqlite-version-3.32.2-patched/src/ #endif ){ p->selFlags &= ~SF_Distinct; -@@ -6791,6 +6772,14 @@ int sqlite3Select( +@@ -6791,6 +6771,14 @@ int sqlite3Select( select_end: sqlite3ExprListDelete(db, pMinMaxOrderBy); sqlite3DbFree(db, sAggInfo.aCol); @@ -170,9 +198,9 @@ diff -Npur sqlite-version-3.32.2/src/select.c sqlite-version-3.32.2-patched/src/ sqlite3DbFree(db, sAggInfo.aFunc); #if SELECTTRACE_ENABLED SELECTTRACE(0x1,pParse,p,("end processing\n")); -diff -Npur sqlite-version-3.32.2/src/sqliteInt.h sqlite-version-3.32.2-patched/src/sqliteInt.h ---- sqlite-version-3.32.2/src/sqliteInt.h 2020-06-04 08:58:43.000000000 -0400 -+++ sqlite-version-3.32.2-patched/src/sqliteInt.h 2021-04-29 04:06:04.547208700 -0400 +diff -Npur sqlite-version-3.32.2-new/src/sqliteInt.h sqlite-version-3.32.2/src/sqliteInt.h +--- sqlite-version-3.32.2-new/src/sqliteInt.h 2020-06-04 08:58:43.000000000 -0400 ++++ sqlite-version-3.32.2/src/sqliteInt.h 2021-08-04 12:28:22.825268422 -0400 @@ -976,7 +976,12 @@ typedef INT16_TYPE LogEst; */ #if defined(SQLITE_ENABLE_SELECTTRACE) @@ -211,7 +239,15 @@ diff -Npur sqlite-version-3.32.2/src/sqliteInt.h sqlite-version-3.32.2-patched/s ** The datatype ynVar is a signed integer, either 16-bit or 32-bit. ** Usually it is 16-bits. But if SQLITE_MAX_VARIABLE_NUMBER is greater ** than 32767 we have to make it 32-bit. 16-bit is preferred because -@@ -4546,10 +4566,11 @@ extern const unsigned char sqlite3UpperT +@@ -3105,6 +3125,7 @@ struct Select { + #define SF_WhereBegin 0x0080000 /* Really a WhereBegin() call. Debug Only */ + #define SF_WinRewrite 0x0100000 /* Window function rewrite accomplished */ + #define SF_View 0x0200000 /* SELECT statement is a view */ ++#define SF_NoopOrderBy 0x0400000 /* ORDER BY is ignored for this query */ + + /* + ** The results of a SELECT can be distributed in several ways, as defined +@@ -4546,10 +4567,11 @@ extern const unsigned char sqlite3UpperT extern const unsigned char sqlite3CtypeMap[]; extern SQLITE_WSD struct Sqlite3Config sqlite3Config; extern FuncDefHash sqlite3BuiltinFunctions; @@ -224,9 +260,9 @@ diff -Npur sqlite-version-3.32.2/src/sqliteInt.h sqlite-version-3.32.2-patched/s #ifdef VDBE_PROFILE extern sqlite3_uint64 sqlite3NProfileCnt; #endif -diff -Npur sqlite-version-3.32.2/src/test1.c sqlite-version-3.32.2-patched/src/test1.c ---- sqlite-version-3.32.2/src/test1.c 2020-06-04 08:58:43.000000000 -0400 -+++ sqlite-version-3.32.2-patched/src/test1.c 2021-04-29 04:06:04.548208700 -0400 +diff -Npur sqlite-version-3.32.2-new/src/test1.c sqlite-version-3.32.2/src/test1.c +--- sqlite-version-3.32.2-new/src/test1.c 2020-06-04 08:58:43.000000000 -0400 ++++ sqlite-version-3.32.2/src/test1.c 2021-08-04 11:57:45.037230992 -0400 @@ -8164,7 +8164,7 @@ int Sqlitetest1_Init(Tcl_Interp *interp) #endif #endif @@ -236,9 +272,9 @@ diff -Npur sqlite-version-3.32.2/src/test1.c sqlite-version-3.32.2-patched/src/t #endif for(i=0; idb; Select *pSub = 0; /* The subquery */ -diff -Npur sqlite-version-3.32.2/test/having.test sqlite-version-3.32.2-patched/test/having.test ---- sqlite-version-3.32.2/test/having.test 2020-06-04 08:58:43.000000000 -0400 -+++ sqlite-version-3.32.2-patched/test/having.test 2021-04-29 04:08:11.785214475 -0400 +diff -Npur sqlite-version-3.32.2-new/test/having.test sqlite-version-3.32.2/test/having.test +--- sqlite-version-3.32.2-new/test/having.test 2020-06-04 08:58:43.000000000 -0400 ++++ sqlite-version-3.32.2/test/having.test 2021-08-04 11:57:45.041230992 -0400 @@ -154,5 +154,24 @@ do_execsql_test 4.3 { SELECT a, sum(b) FROM t3 WHERE nondeter(a) GROUP BY a } {1 4 2 2} - + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 5.0 { @@ -274,11 +310,41 @@ diff -Npur sqlite-version-3.32.2/test/having.test sqlite-version-3.32.2-patched/ + SELECT x FROM t2 WHERE a=2 GROUP BY y HAVING 0 + ) FROM t1; +} {b {}} - + finish_test -diff -Npur sqlite-version-3.32.2/test/window1.test sqlite-version-3.32.2-patched/test/window1.test ---- sqlite-version-3.32.2/test/window1.test 2020-06-04 08:58:43.000000000 -0400 -+++ sqlite-version-3.32.2-patched/test/window1.test 2021-04-29 04:06:04.549208700 -0400 +diff -Npur sqlite-version-3.32.2-new/test/selectA.test sqlite-version-3.32.2/test/selectA.test +--- sqlite-version-3.32.2-new/test/selectA.test 2020-06-04 08:58:43.000000000 -0400 ++++ sqlite-version-3.32.2/test/selectA.test 2021-08-04 12:29:43.021270055 -0400 +@@ -1446,5 +1446,26 @@ do_execsql_test 6.1 { + SELECT * FROM (SELECT a FROM t1 UNION SELECT b FROM t2) WHERE a=a; + } {12345} + ++# 2020-06-15 ticket 8f157e8010b22af0 ++# ++reset_db ++do_execsql_test 7.1 { ++ CREATE TABLE t1(c1); INSERT INTO t1 VALUES(12),(123),(1234),(NULL),('abc'); ++ CREATE TABLE t2(c2); INSERT INTO t2 VALUES(44),(55),(123); ++ CREATE TABLE t3(c3,c4); INSERT INTO t3 VALUES(66,1),(123,2),(77,3); ++ CREATE VIEW t4 AS SELECT c3 FROM t3; ++ CREATE VIEW t5 AS SELECT c3 FROM t3 ORDER BY c4; ++} ++do_execsql_test 7.2 { ++ SELECT * FROM t1, t2 WHERE c1=(SELECT 123 INTERSECT SELECT c2 FROM t4) AND c1=123; ++} {123 123} ++do_execsql_test 7.3 { ++ SELECT * FROM t1, t2 WHERE c1=(SELECT 123 INTERSECT SELECT c2 FROM t5) AND c1=123; ++} {123 123} ++do_execsql_test 7.4 { ++ CREATE TABLE a(b); ++ CREATE VIEW c(d) AS SELECT b FROM a ORDER BY b; ++ SELECT sum(d) OVER( PARTITION BY(SELECT 0 FROM c JOIN a WHERE b =(SELECT b INTERSECT SELECT d FROM c) AND b = 123)) FROM c; ++} {} + + finish_test +diff -Npur sqlite-version-3.32.2-new/test/window1.test sqlite-version-3.32.2/test/window1.test +--- sqlite-version-3.32.2-new/test/window1.test 2020-06-04 08:58:43.000000000 -0400 ++++ sqlite-version-3.32.2/test/window1.test 2021-08-04 11:57:45.041230992 -0400 @@ -1743,5 +1743,47 @@ do_execsql_test 53.0 { WHERE a.c); } {4 4 4 4} diff --git a/version.txt b/version.txt index 589268e6fed..e21e727f96f 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -1.3.0 \ No newline at end of file +1.4.0 \ No newline at end of file