diff --git a/.jenkins/check/config/whitelizard.txt b/.jenkins/check/config/whitelizard.txt index 20fb8320baf..05f3f3ec345 100644 --- a/.jenkins/check/config/whitelizard.txt +++ b/.jenkins/check/config/whitelizard.txt @@ -6,6 +6,7 @@ mindspore/mindspore/lite/src/ops/primitive_c.cc:mindspore::lite::PrimitiveC::Create mindspore/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/csv_op.cc:mindspore::dataset::CsvOp::CsvParser::InitCsvParser mindspore/mindspore/lite/tools/converter/graphdef_transform.cc:mindspore::lite::GraphDefTransform::Transform +mindspore/mindspore/lite/tools/benchmark/nnie_proposal/src/proposal.cc:mindspore::proposal::Rpn mindspore/mindspore/core/abstract/primitive_infer_map.cc:mindspore::abstract::GetPrimitiveToEvalImplMap mindspore/mindspore/ccsrc/frontend/optimizer/irpass.cc:mindspore::opt::irpass::OptimizeIRPassLib::OptimizeIRPassLib mindspore/mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_p_info.cc:mindspore::parallel::GatherV2PInfo::CheckStrategy diff --git a/cmake/package_lite.cmake b/cmake/package_lite.cmake index fbe1e7d767d..ecfd29ce0de 100644 --- a/cmake/package_lite.cmake +++ b/cmake/package_lite.cmake @@ -11,6 +11,7 @@ set(TEST_CASE_DIR ${TOP_DIR}/mindspore/lite/test/build) set(RUNTIME_DIR ${RUNTIME_PKG_NAME}/runtime) set(RUNTIME_INC_DIR ${RUNTIME_PKG_NAME}/runtime/include) set(RUNTIME_LIB_DIR ${RUNTIME_PKG_NAME}/runtime/lib) +set(PROVIDERS_LIB_DIR ${RUNTIME_PKG_NAME}/providers) set(MIND_DATA_INC_DIR ${RUNTIME_PKG_NAME}/runtime/include/dataset) set(TURBO_DIR ${RUNTIME_PKG_NAME}/runtime/third_party/libjpeg-turbo) set(GLOG_DIR ${RUNTIME_PKG_NAME}/runtime/third_party/glog) @@ -18,6 +19,10 @@ set(SECUREC_DIR ${RUNTIME_PKG_NAME}/runtime/third_party/securec) set(MINDSPORE_LITE_LIB_NAME libmindspore-lite) set(MINDSPORE_CORE_LIB_NAME libmindspore_core) set(BENCHMARK_NAME benchmark) +set(MSLITE_NNIE_LIB_NAME libmslite_nnie) +set(MSLITE_PROPOSAL_LIB_NAME libmslite_proposal) +set(MICRO_NNIE_LIB_NAME libmicro_nnie) +set(DPICO_ACL_ADAPTER_LIB_NAME libdpico_acl_adapter) set(BENCHMARK_ROOT_DIR ${RUNTIME_PKG_NAME}/tools/benchmark) set(MINDSPORE_LITE_TRAIN_LIB_NAME libmindspore-lite-train) @@ -227,11 +232,31 @@ if(PLATFORM_ARM64) COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "ops*" EXCLUDE) install(DIRECTORY ${TOP_DIR}/include/c_api/ DESTINATION ${RUNTIME_INC_DIR}/c_api COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") - if(NOT MSLITE_ENABLE_DPICO_ACL_ADAPTER) + if(NOT TARGET_MIX210) __install_micro_wrapper() endif() if(MSLITE_ENABLE_TOOLS) - install(TARGETS ${BENCHMARK_NAME} RUNTIME DESTINATION ${BENCHMARK_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) + if(NOT BUILD_FIRST) + install(TARGETS ${BENCHMARK_NAME} RUNTIME DESTINATION ${BENCHMARK_ROOT_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + if(TARGET_HIMIX) + if(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3559A") + install(FILES ${TOP_DIR}/mindspore/lite/build/tools/benchmark/nnie/${MSLITE_NNIE_LIB_NAME}.so + DESTINATION ${PROVIDERS_LIB_DIR}/${MSLITE_REGISTRY_DEVICE} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES + ${TOP_DIR}/mindspore/lite/build/tools/benchmark/nnie_proposal/${MSLITE_PROPOSAL_LIB_NAME}.so + DESTINATION ${PROVIDERS_LIB_DIR}/${MSLITE_REGISTRY_DEVICE} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + endif() + elseif(TARGET_MIX210) + if(${MSLITE_REGISTRY_DEVICE} STREQUAL "SD3403") + install(FILES ${TOP_DIR}/mindspore/lite/build/tools/benchmark/dpico/${DPICO_ACL_ADAPTER_LIB_NAME}.so + DESTINATION ${PROVIDERS_LIB_DIR}/${MSLITE_REGISTRY_DEVICE} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + endif() + endif() + endif() if(SUPPORT_TRAIN) install(TARGETS ${BENCHMARK_TRAIN_NAME} RUNTIME DESTINATION ${BENCHMARK_TRAIN_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) @@ -310,7 +335,27 @@ elseif(PLATFORM_ARM32) COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") __install_micro_wrapper() if(MSLITE_ENABLE_TOOLS AND NOT TARGET_OHOS_LITE) - install(TARGETS ${BENCHMARK_NAME} RUNTIME DESTINATION ${BENCHMARK_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) + if(NOT BUILD_FIRST) + install(TARGETS ${BENCHMARK_NAME} RUNTIME + DESTINATION ${BENCHMARK_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) + if(TARGET_HIMIX) + if(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3516D" OR ${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3519A") + install(FILES ${TOP_DIR}/mindspore/lite/build/tools/benchmark/nnie/${MSLITE_NNIE_LIB_NAME}.so + DESTINATION ${PROVIDERS_LIB_DIR}/${MSLITE_REGISTRY_DEVICE} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES + ${TOP_DIR}/mindspore/lite/build/tools/benchmark/nnie_proposal/${MSLITE_PROPOSAL_LIB_NAME}.so + DESTINATION ${PROVIDERS_LIB_DIR}/${MSLITE_REGISTRY_DEVICE} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + if(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3516D") + install(FILES + ${TOP_DIR}/mindspore/lite/tools/benchmark/nnie/third_patry/${MICRO_NNIE_LIB_NAME}.so + DESTINATION ${PROVIDERS_LIB_DIR}/${MSLITE_REGISTRY_DEVICE} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + endif() + endif() + endif() + endif() if(SUPPORT_TRAIN) install(TARGETS ${BENCHMARK_TRAIN_NAME} RUNTIME DESTINATION ${BENCHMARK_TRAIN_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) @@ -516,7 +561,10 @@ else() __install_micro_codegen() endif() if(MSLITE_ENABLE_TOOLS) - install(TARGETS ${BENCHMARK_NAME} RUNTIME DESTINATION ${BENCHMARK_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) + if(NOT BUILD_FIRST) + install(TARGETS ${BENCHMARK_NAME} RUNTIME DESTINATION ${BENCHMARK_ROOT_DIR} + COMPONENT ${RUNTIME_COMPONENT_NAME}) + endif() if(SUPPORT_TRAIN) install(TARGETS ${BENCHMARK_TRAIN_NAME} RUNTIME DESTINATION ${BENCHMARK_TRAIN_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/optimize/CMakeLists.txt b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/optimize/CMakeLists.txt index 67c775ecc97..406274030ad 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/optimize/CMakeLists.txt +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/optimize/CMakeLists.txt @@ -35,13 +35,10 @@ if(NOT PLATFORM_ARM32 AND NOT TARGET_HIMIX AND NOT MACHINE_LINUX_ARM64) list(APPEND SDOT_FILES ${SDOT_SRC}) add_library(nnacl_optimize_mid OBJECT ${SDOT_FILES}) add_dependencies(nnacl_optimize_mid fbs_src) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+dotprod+fp16") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16") -endif() - -if(TARGET_MIX210) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+fp16") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+fp16") + if(NOT TARGET_MIX210) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+dotprod+fp16") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16") + endif() endif() if(MSLITE_ENABLE_FP16) diff --git a/mindspore/lite/CMakeLists.txt b/mindspore/lite/CMakeLists.txt index 87c213d798c..1868047a138 100644 --- a/mindspore/lite/CMakeLists.txt +++ b/mindspore/lite/CMakeLists.txt @@ -10,7 +10,7 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/compile_link_option.cmake) set(MSLITE_GPU_BACKEND "" CACHE STRING "enable gpu backend, \ opencl only support arm64 and x86_64 , tensorrt only support x86_64, opencl/cuda/tensorrt/off") set(MSLITE_REGISTRY_DEVICE "off" CACHE STRING "Compile Mindspore Lite that supports specific devices, \ - currently supported devices: Hi3516D/Hi3519A/Hi3559A/sd3403") + currently supported devices: Hi3516D/Hi3519A/Hi3559A/SD3403") option(MSLITE_ENABLE_NPU "enable npu, only arm64 or arm32 support" off) option(MSLITE_ENABLE_TRAIN "enable train" on) option(MSLITE_ENABLE_SSE "enable SSE instruction set, only x86_64 support" off) @@ -53,13 +53,6 @@ if(DEFINED ENV{MSLITE_GPU_BACKEND}) endif() if(DEFINED ENV{MSLITE_REGISTRY_DEVICE}) set(MSLITE_REGISTRY_DEVICE $ENV{MSLITE_REGISTRY_DEVICE}) - if(MSLITE_REGISTRY_DEVICE STREQUAL sd3403) - if(NOT PLATFORM_ARM64) - set(MSLITE_ENABLE_DPICO_ATC_ADAPTER on) - else() - set(MSLITE_ENABLE_DPICO_ACL_ADAPTER on) - endif() - endif() endif() if(DEFINED ENV{MSLITE_ENABLE_NPU}) set(MSLITE_ENABLE_NPU $ENV{MSLITE_ENABLE_NPU}) @@ -190,6 +183,9 @@ elseif(PLATFORM_ARM32) elseif(WIN32) set(MSLITE_GPU_BACKEND "off") else() + if(${MSLITE_REGISTRY_DEVICE} STREQUAL "SD3403") + set(MSLITE_ENABLE_DPICO_ATC_ADAPTER on) + endif() if(MSLITE_GPU_BACKEND STREQUAL "") set(MSLITE_GPU_BACKEND "off") endif() @@ -379,10 +375,6 @@ else() set(RUNTIME_COMPONENT_NAME "linux-x64") endif() -if(MSLITE_ENABLE_DPICO_ACL_ADAPTER) - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark/dpico) -endif() - string(REPLACE "/mindspore/lite" "" TOP_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(CORE_DIR ${TOP_DIR}/mindspore/core) set(CCSRC_DIR ${TOP_DIR}/mindspore/ccsrc) @@ -567,16 +559,16 @@ if(BUILD_MINDDATA STREQUAL "lite_cv") add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/minddata) endif() -if(NOT MSLITE_ENABLE_DPICO_ACL_ADAPTER) - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src/ops) - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/micro/coder) - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src) - add_subdirectory(${CCSRC_DIR}/backend/kernel_compiler/cpu/nnacl build) -endif() +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src/ops) +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/micro/coder) +add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src) +add_subdirectory(${CCSRC_DIR}/backend/kernel_compiler/cpu/nnacl build) if(MSLITE_ENABLE_TOOLS) - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark) + if(NOT BUILD_FIRST) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark) + endif() if(SUPPORT_TRAIN) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark_train) endif() diff --git a/mindspore/lite/build_lite.sh b/mindspore/lite/build_lite.sh index 2cbdd47f722..d1c8609d9a5 100755 --- a/mindspore/lite/build_lite.sh +++ b/mindspore/lite/build_lite.sh @@ -25,6 +25,15 @@ checkndk() { fi } +check_Hi35xx() { + if [[ "X${HI35XX_SDK_PATH}" == "X" ]]; then + echo "error: to compile the runtime package of Hi35XX, you need to set HI35XX_SDK_PATH to declare the path of Hi35XX sdk." + exit 1 + else + cp -r ${HI35XX_SDK_PATH}/third_patry ${BASEPATH}/mindspore/lite/tools/benchmark/nnie/ + fi +} + get_version() { VERSION_MAJOR=$(grep "const int ms_version_major =" ${BASEPATH}/mindspore/lite/include/version.h | tr -dc "[0-9]") VERSION_MINOR=$(grep "const int ms_version_minor =" ${BASEPATH}/mindspore/lite/include/version.h | tr -dc "[0-9]") @@ -142,16 +151,19 @@ build_lite() { CMAKE_TOOLCHAIN_FILE=${BASEPATH}/cmake/lite_ios.cmake fi - BRANCH_NAME=nnie_3516_master_dev + BRANCH_NAME=nnie_3516_master if [[ ("${MSLITE_REGISTRY_DEVICE}" == "Hi3516D" || "${TOOLCHAIN_NAME}" == "himix200") && "${local_lite_platform}" == "arm32" ]]; then TOOLCHAIN_NAME="himix200" MSLITE_REGISTRY_DEVICE=Hi3516D + check_Hi35xx elif [[ "${MSLITE_REGISTRY_DEVICE}" == "Hi3559A" && "${local_lite_platform}" == "arm64" ]]; then TOOLCHAIN_NAME="himix100" - elif [[ "${MSLITE_REGISTRY_DEVICE}" == "sd3403" && "${local_lite_platform}" == "arm64" ]]; then + check_Hi35xx + elif [[ "${MSLITE_REGISTRY_DEVICE}" == "SD3403" && "${local_lite_platform}" == "arm64" ]]; then TOOLCHAIN_NAME="mix210" elif [[ "${MSLITE_REGISTRY_DEVICE}" == "Hi3519A" && "${local_lite_platform}" == "arm32" ]]; then TOOLCHAIN_NAME="himix200" + check_Hi35xx elif [[ ("${MSLITE_ENABLE_NNIE}" == "on" || "${MSLITE_REGISTRY_DEVICE}" == "Hi3516D") && "${local_lite_platform}" == "x86_64" ]]; then MSLITE_REGISTRY_DEVICE=Hi3516D fi @@ -190,13 +202,11 @@ build_lite() { LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DTOOLCHAIN_NAME=himix100" LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DBUILD_MINDDATA=off" LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_ENABLE_FP16=off -DMSLITE_ENABLE_TRAIN=off -DMSLITE_GPU_BACKEND=off" - LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_ENABLE_TOOLS=off" elif [[ "${TOOLCHAIN_NAME}" == "mix210" ]]; then CMAKE_TOOLCHAIN_FILE=${BASEPATH}/mindspore/lite/cmake/mix210.toolchain.cmake LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DTOOLCHAIN_NAME=mix210" LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DBUILD_MINDDATA=off" - LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_ENABLE_FP16=off -DMSLITE_ENABLE_TRAIN=off -DMSLITE_GPU_BACKEND=off" - LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_ENABLE_TOOLS=off" + LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_ENABLE_FP16=on -DMSLITE_ENABLE_TRAIN=off -DMSLITE_GPU_BACKEND=off" else if [[ "${machine}" == "aarch64" ]]; then LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMACHINE_LINUX_ARM64=on" @@ -228,26 +238,25 @@ build_lite() { if [[ "X$CMAKE_TOOLCHAIN_FILE" != "X" ]]; then LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}" fi - if [[ "X$MSLITE_REGISTRY_DEVICE" != "X" ]] && [[ "${MSLITE_REGISTRY_DEVICE}" != "sd3403" ]]; then + if [[ "X$MSLITE_REGISTRY_DEVICE" != "X" ]]; then LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_REGISTRY_DEVICE=${MSLITE_REGISTRY_DEVICE}" fi if [[ "${local_lite_platform}" == "arm64" || "${local_lite_platform}" == "arm32" ]]; then echo "default link libc++_static.a, export MSLITE_ANDROID_STL=c++_shared to link libc++_shared.so" fi - echo "cmake ${LITE_CMAKE_ARGS} ${BASEPATH}/mindspore/lite" - if [[ "${MSLITE_REGISTRY_DEVICE}" == "sd3403" ]] && [[ "${local_lite_platform}" == "arm64" ]]; then - export MSLITE_REGISTRY_DEVICE="" - cmake ${LITE_CMAKE_ARGS} "${BASEPATH}/mindspore/lite" - export MSLITE_REGISTRY_DEVICE=sd3403 - else - cmake ${LITE_CMAKE_ARGS} "${BASEPATH}/mindspore/lite" - fi + + echo "cmake ${LITE_CMAKE_ARGS} -DBUILD_FIRST=ON ${BASEPATH}/mindspore/lite" + cmake ${LITE_CMAKE_ARGS} -DBUILD_FIRST=ON "${BASEPATH}/mindspore/lite" if [[ "$(uname)" == "Darwin" && "${local_lite_platform}" != "x86_64" ]]; then xcodebuild ONLY_ACTIVE_ARCH=NO -configuration Release -scheme mindspore-lite_static -target mindspore-lite_static -sdk iphoneos -quiet elif [[ "$(uname)" == "Darwin" && "${local_lite_platform}" == "x86_64" ]]; then xcodebuild ONLY_ACTIVE_ARCH=NO -configuration Release -scheme mindspore-lite_static -target mindspore-lite_static -sdk iphonesimulator -quiet else + make -j$THREAD_NUM && make install + cp -r ${BASEPATH}/output/tmp/mindspore*/runtime ${BASEPATH}/mindspore/lite/tools/benchmark + cmake ${LITE_CMAKE_ARGS} -DBUILD_FIRST=off --target benchmark "${BASEPATH}/mindspore/lite" + make -j$THREAD_NUM && make install && make package if [[ "${local_lite_platform}" == "x86_64" ]]; then if [ "${JAVA_HOME}" ]; then @@ -288,37 +297,16 @@ build_lite() { fi [ -n "${BASEPATH}" ] && rm -rf ${BASEPATH}/output/tmp/ - if [[ "X$MSLITE_REGISTRY_DEVICE" != "X" ]] && [[ "${MSLITE_REGISTRY_DEVICE}" != "sd3403" ]]; then + if [[ "X$MSLITE_REGISTRY_DEVICE" != "X" ]] && [[ "${MSLITE_REGISTRY_DEVICE}" != "SD3403" ]]; then local compile_nnie_script=${BASEPATH}/mindspore/lite/tools/providers/NNIE/Hi3516D/compile_nnie.sh cd ${BASEPATH}/../ if [[ "${local_lite_platform}" == "x86_64" ]]; then bash ${compile_nnie_script} -I ${local_lite_platform} -b ${BRANCH_NAME} -j $THREAD_NUM - else - bash ${compile_nnie_script} -I ${local_lite_platform} -b ${BRANCH_NAME} -t ${TOOLCHAIN_NAME} -d ${MSLITE_REGISTRY_DEVICE} -j $THREAD_NUM fi if [[ $? -ne 0 ]]; then echo "compile ${local_lite_platform} for nnie failed." exit 1 fi - elif [[ "${MSLITE_REGISTRY_DEVICE}" == "sd3403" ]] && [[ "${local_lite_platform}" == "arm64" ]]; then - LITE_CMAKE_ARGS=$(echo ${LITE_CMAKE_ARGS} | sed -e "s/MSLITE_ENABLE_TOOLS=off/MSLITE_ENABLE_TOOLS=on/g") - LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_REGISTRY_DEVICE=${MSLITE_REGISTRY_DEVICE}" - cmake ${LITE_CMAKE_ARGS} "${BASEPATH}/mindspore/lite" - cd ${BASEPATH} - compile_dpico_script=${BASEPATH}/mindspore/lite/tools/providers/dpico/sd3403/compile_3403.sh - bash ${compile_dpico_script} -t prepare_third_party - if [[ $? -ne 0 ]]; then - echo "prepare for dpico failed." - exit 1 - fi - cd ${BASEPATH}/mindspore/lite/build - make -j$THREAD_NUM - cd ${BASEPATH} - sh ${compile_dpico_script} - if [[ $? -ne 0 ]]; then - echo "second compile arm64 for dpico failed." - exit 1 - fi fi echo "---------------- mindspore lite: build success ----------------" fi diff --git a/mindspore/lite/cmake/compile_link_option.cmake b/mindspore/lite/cmake/compile_link_option.cmake index 80d85fd6c11..00489c7c23e 100644 --- a/mindspore/lite/cmake/compile_link_option.cmake +++ b/mindspore/lite/cmake/compile_link_option.cmake @@ -21,6 +21,8 @@ else() if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") string(REPLACE "-O2" "-O0" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") string(REPLACE "-O2" "-O0" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + string(REPLACE "-D_FORTIFY_SOURCE=2" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + string(REPLACE "-D_FORTIFY_SOURCE=2" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") endif() set(CMAKE_SHARED_LINKER_FLAGS "${SECURE_SHARED_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS}") set(CMAKE_EXE_LINKER_FLAGS "${SECURE_EXE_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS}") diff --git a/mindspore/lite/cmake/merge.cmake b/mindspore/lite/cmake/merge.cmake index 5dce635c956..7241e43adaa 100644 --- a/mindspore/lite/cmake/merge.cmake +++ b/mindspore/lite/cmake/merge.cmake @@ -3,6 +3,9 @@ function(merge_parser CL_SRC_DIR OUT_FILE_NAME) if(NOT EXISTS ${CL_SRC_DIR}) return() endif() + if(DEFINED BUILD_FIRST AND NOT BUILD_FIRST) + return() + endif() file(GLOB_RECURSE CL_LIST ${CL_SRC_DIR}/*.cc) list(SORT CL_LIST) set(out_file ${OUT_FILE_NAME}) diff --git a/mindspore/lite/cmake/mix210.toolchain.cmake b/mindspore/lite/cmake/mix210.toolchain.cmake index c73e576cba1..125f174f46c 100644 --- a/mindspore/lite/cmake/mix210.toolchain.cmake +++ b/mindspore/lite/cmake/mix210.toolchain.cmake @@ -19,6 +19,9 @@ set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) #set(CMAKE_CXX_FLAGS "-march= -mfloat-abi=softfp -mfpu=neon-vfpv4 ${CMAKE_CXX_FLAGS}") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+fp16") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+fp16") + # cache flags set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags") diff --git a/mindspore/lite/micro/cmake/package_cmsis.cmake b/mindspore/lite/micro/cmake/package_cmsis.cmake deleted file mode 100644 index 283b64abec1..00000000000 --- a/mindspore/lite/micro/cmake/package_cmsis.cmake +++ /dev/null @@ -1,20 +0,0 @@ -set(CMSIS_DIR ${CMAKE_BINARY_DIR}/cmsis) -message("build cmsis kernels") -include_directories(${CMSIS_DIR}/CMSIS/Core/Include) -include_directories(${CMSIS_DIR}/CMSIS/DSP/Include) -include_directories(${CMSIS_DIR}/CMSIS/NN/Include) - -file(REMOVE ${CMSIS_DIR}/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c) - -file(GLOB CMSIS_OPS - ${CMSIS_DIR}/CMSIS/NN/Source/BasicMathFunctions/*.c - ${CMSIS_DIR}/CMSIS/NN/Source/ActivationFunctions/*.c - ${CMSIS_DIR}/CMSIS/NN/Source/ConcatenationFunctions/*.c - ${CMSIS_DIR}/CMSIS/NN/Source/ConvolutionFunctions/*.c - ${CMSIS_DIR}/CMSIS/NN/Source/FullyConnectedFunctions/*.c - ${CMSIS_DIR}/CMSIS/NN/Source/NNSupportFunctions/*.c - ${CMSIS_DIR}/CMSIS/NN/Source/PoolingFunctions/*.c - ${CMSIS_DIR}/CMSIS/NN/Source/ReshapeFunctions/*.c - ${CMSIS_DIR}/CMSIS/NN/Source/SoftmaxFunctions/*.c - ) - diff --git a/mindspore/lite/micro/coder/wrapper/CMakeLists.txt b/mindspore/lite/micro/coder/wrapper/CMakeLists.txt index f354192d2ab..4cc6f817558 100644 --- a/mindspore/lite/micro/coder/wrapper/CMakeLists.txt +++ b/mindspore/lite/micro/coder/wrapper/CMakeLists.txt @@ -16,7 +16,24 @@ if(PLATFORM_ARM64) elseif(PLATFORM_ARM32) add_compile_definitions(ENABLE_ARM32) else() - include(${MICRO_DIR}/cmake/package_cmsis.cmake) + set(CMSIS_DIR ${CMAKE_BINARY_DIR}/cmsis) + message("build cmsis kernels") + include_directories(${CMSIS_DIR}/CMSIS/Core/Include) + include_directories(${CMSIS_DIR}/CMSIS/DSP/Include) + include_directories(${CMSIS_DIR}/CMSIS/NN/Include) + + file(REMOVE ${CMSIS_DIR}/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c) + file(GLOB CMSIS_OPS + ${CMSIS_DIR}/CMSIS/NN/Source/BasicMathFunctions/*.c + ${CMSIS_DIR}/CMSIS/NN/Source/ActivationFunctions/*.c + ${CMSIS_DIR}/CMSIS/NN/Source/ConcatenationFunctions/*.c + ${CMSIS_DIR}/CMSIS/NN/Source/ConvolutionFunctions/*.c + ${CMSIS_DIR}/CMSIS/NN/Source/FullyConnectedFunctions/*.c + ${CMSIS_DIR}/CMSIS/NN/Source/NNSupportFunctions/*.c + ${CMSIS_DIR}/CMSIS/NN/Source/PoolingFunctions/*.c + ${CMSIS_DIR}/CMSIS/NN/Source/ReshapeFunctions/*.c + ${CMSIS_DIR}/CMSIS/NN/Source/SoftmaxFunctions/*.c + ) add_library(cmsis_nn STATIC ${CMSIS_OPS}) endif() diff --git a/mindspore/lite/test/st/scripts/dpico/run_converter_3403.sh b/mindspore/lite/test/st/scripts/dpico/run_converter_3403.sh index 1fbad940f33..af14f830867 100644 --- a/mindspore/lite/test/st/scripts/dpico/run_converter_3403.sh +++ b/mindspore/lite/test/st/scripts/dpico/run_converter_3403.sh @@ -2,7 +2,7 @@ # Build x86 tar.gz file for dpico function Run_Build_x86() { - export MSLITE_REGISTRY_DEVICE=sd3403 + export MSLITE_REGISTRY_DEVICE=SD3403 unset JAVA_HOME bash ${mindspore_top_dir}/build.sh -I x86_64 -j 80 if [ $? = 0 ]; then @@ -19,7 +19,7 @@ function Run_Build_x86() { # Build arm32 tar.gz file for dpico function Run_Build_arm64() { - export MSLITE_REGISTRY_DEVICE=sd3403 + export MSLITE_REGISTRY_DEVICE=SD3403 unset JAVA_HOME bash ${mindspore_top_dir}/build.sh -I arm64 -j 80 if [ $? = 0 ]; then diff --git a/mindspore/lite/tools/benchmark/CMakeLists.txt b/mindspore/lite/tools/benchmark/CMakeLists.txt index 7b5004d14a0..4f234fc1b59 100644 --- a/mindspore/lite/tools/benchmark/CMakeLists.txt +++ b/mindspore/lite/tools/benchmark/CMakeLists.txt @@ -1,4 +1,48 @@ -# add shared link library +cmake_minimum_required(VERSION 3.14) +project(Lite_benchmark) + +set(BENCHMARK_LINK_LIB mindspore-lite) +if(TARGET_HIMIX) + add_subdirectory(nnie) + add_subdirectory(nnie_proposal) + set(CMAKE_SKIP_BUILD_RPATH on) + set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} pthread + mslite_proposal mslite_nnie dl nnie mpi VoiceEngine upvqe dnvqe securec) + if(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3516D") + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/nnie/third_patry/hi3516_sdk) + link_directories(${CMAKE_CURRENT_SOURCE_DIR}/nnie/third_patry/hi3516_sdk/lib) + set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} mindspore::json) + elseif(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3519A") + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/nnie/third_patry/hi3519_sdk) + link_directories(${CMAKE_CURRENT_SOURCE_DIR}/nnie/third_patry/hi3519_sdk/lib) + set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} mindspore::json) + elseif(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3559A") + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/nnie/third_patry/hi3559_sdk) + link_directories(${CMAKE_CURRENT_SOURCE_DIR}/nnie/third_patry/hi3559_sdk/lib) + add_compile_definitions(BENCHMARK_CLIP_JSON) + endif() +elseif(TARGET_MIX210) + set(CMAKE_SKIP_BUILD_RPATH on) + set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} mindspore::json pthread + dpico_acl_adapter svp_acl dl securec protobuf-c stdc++) + if(${MSLITE_REGISTRY_DEVICE} STREQUAL "SD3403") + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/dpico) + message("34xx_sdk_SOURCE_DIR:${34xx_sdk_SOURCE_DIR}.") + include_directories(${34xx_sdk_SOURCE_DIR}/include) + include_directories(${34xx_sdk_SOURCE_DIR}) + link_directories(${34xx_sdk_SOURCE_DIR}/lib) + endif() +else() + set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} mindspore::json) + if(PLATFORM_ARM32 OR PLATFORM_ARM64 AND NOT TARGET_OHOS_LITE AND NOT MACHINE_LINUX_ARM64) + if(SUPPORT_NPU AND ANDROID_STL STREQUAL "c++_static") + set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} c++_shared) + endif() + elseif(NOT MSVC) + set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} pthread) + endif() +endif() + include_directories(${CCSRC_DIR}/backend/kernel_compiler/cpu) set(COMMON_SRC ${CMAKE_CURRENT_SOURCE_DIR}/../common/flag_parser.cc @@ -7,67 +51,32 @@ set(COMMON_SRC ${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/utils.cc ${CMAKE_CURRENT_SOURCE_DIR}/../../../ccsrc/backend/kernel_compiler/cpu/nnacl/nnacl_common.c ) -if(NOT MSLITE_ENABLE_DPICO_ACL_ADAPTER) - if(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL) - set(COMMON_SRC ${COMMON_SRC} ../common/opengl_util.cc) - endif() - add_executable(benchmark - ${CMAKE_CURRENT_SOURCE_DIR}/main.cc - ${CMAKE_CURRENT_SOURCE_DIR}/run_benchmark.cc - ${CMAKE_CURRENT_SOURCE_DIR}/benchmark_base.cc - ${CMAKE_CURRENT_SOURCE_DIR}/benchmark.cc - ${CMAKE_CURRENT_SOURCE_DIR}/benchmark_unified_api.cc - ${CMAKE_CURRENT_SOURCE_DIR}/benchmark_c_api.cc - ${COMMON_SRC}) +if(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL) + set(COMMON_SRC ${COMMON_SRC} ../common/opengl_util.cc) +endif() - add_dependencies(benchmark fbs_src) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../../lite) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../../core) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/runtime) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/runtime/include) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/runtime/include/third_party) +link_directories(${CMAKE_CURRENT_SOURCE_DIR}/runtime/lib) - if(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL) - list(APPEND opengl_lib EGL GLESv3) - target_link_libraries(benchmark ${opengl_lib}) - endif() +if(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL) + list(APPEND opengl_lib EGL GLESv3) + set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} ${opengl_lib}) +endif() - if((PLATFORM_ARM32 OR PLATFORM_ARM64) AND NOT TARGET_HIMIX - AND NOT TARGET_OHOS_LITE AND NOT MACHINE_LINUX_ARM64 AND NOT TARGET_MIX210) - if(SUPPORT_NPU AND ANDROID_STL STREQUAL "c++_static") - target_link_libraries(benchmark mindspore-lite mindspore::json c++_shared) - else() - target_link_libraries(benchmark mindspore-lite mindspore::json) - endif() - elseif(MSVC) - target_link_libraries(benchmark mindspore-lite mindspore::json) - else() - target_link_libraries(benchmark mindspore-lite mindspore::json pthread) - endif() -else() - __download_pkg(34xx_sdk - http://mindspore-repo.csi.rnd.huawei.com/mindspore/enterprise/dpico/34xx_sdk.tar.gz - f64a9129615b3b41b63debe17c6785af) +add_executable(benchmark + ${CMAKE_CURRENT_SOURCE_DIR}/main.cc + ${CMAKE_CURRENT_SOURCE_DIR}/run_benchmark.cc + ${CMAKE_CURRENT_SOURCE_DIR}/benchmark_base.cc + ${CMAKE_CURRENT_SOURCE_DIR}/benchmark.cc + ${CMAKE_CURRENT_SOURCE_DIR}/benchmark_unified_api.cc + ${CMAKE_CURRENT_SOURCE_DIR}/benchmark_c_api.cc + ${COMMON_SRC}) - include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../../lite) - include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../../core) - include_directories(${CMAKE_CURRENT_SOURCE_DIR}/dpico/third_party/runtime) - include_directories(${CMAKE_CURRENT_SOURCE_DIR}/dpico/third_party/runtime/include) - include_directories(${CMAKE_CURRENT_SOURCE_DIR}/dpico/third_party/runtime/include/third_party) +add_dependencies(benchmark fbs_src) - include_directories(${34xx_sdk_SOURCE_DIR}/include) - include_directories(${34xx_sdk_SOURCE_DIR}) - link_directories(${34xx_sdk_SOURCE_DIR}/lib) - link_directories(${CMAKE_CURRENT_SOURCE_DIR}/dpico/third_party/runtime/lib) - set(CMAKE_SKIP_BUILD_RPATH on) - - add_executable(benchmark - ${CMAKE_CURRENT_SOURCE_DIR}/main.cc - ${CMAKE_CURRENT_SOURCE_DIR}/run_benchmark.cc - ${CMAKE_CURRENT_SOURCE_DIR}/benchmark_base.cc - ${CMAKE_CURRENT_SOURCE_DIR}/benchmark.cc - ${CMAKE_CURRENT_SOURCE_DIR}/benchmark_unified_api.cc - ${CMAKE_CURRENT_SOURCE_DIR}/benchmark_c_api.cc - ${COMMON_SRC}) - - add_dependencies(benchmark fbs_src) - - target_link_libraries(benchmark mindspore-lite mindspore::json pthread - dpico_acl_adapter dl svp_acl securec protobuf-c stdc++) -endif() \ No newline at end of file +target_link_libraries(benchmark ${BENCHMARK_LINK_LIB}) diff --git a/mindspore/lite/tools/benchmark/benchmark.cc b/mindspore/lite/tools/benchmark/benchmark.cc index e66050adc48..4d05acd6efd 100644 --- a/mindspore/lite/tools/benchmark/benchmark.cc +++ b/mindspore/lite/tools/benchmark/benchmark.cc @@ -35,6 +35,12 @@ #include #include #endif +#ifdef SUPPORT_NNIE +#include "include/hi_common.h" +#include "include/hi_comm_vb.h" +#include "include/mpi_sys.h" +#include "include/mpi_vb.h" +#endif namespace mindspore { namespace lite { @@ -344,18 +350,48 @@ int Benchmark::InitContext(const std::shared_ptr &context) { return RET_OK; } +tensor::MSTensor *Benchmark::GetTensorByNodeShape(const std::vector &node_shape) { + std::vector match_tensors; + std::vector shape_vector; + (void)std::transform(node_shape.begin(), node_shape.end(), std::back_inserter(shape_vector), + [](const size_t &value) { return static_cast(value); }); + auto tensors = session_->GetOutputs(); + for (auto &out_tensor_pair : tensors) { + if (out_tensor_pair.second->shape() == shape_vector) { + match_tensors.emplace_back(out_tensor_pair.second); + } + } + if (match_tensors.empty() || match_tensors.size() != 1) { + MS_LOG(ERROR) << "get tensor by node shape failed"; + return nullptr; + } + return match_tensors.front(); +} + +tensor::MSTensor *Benchmark::GetTensorByNameOrShape(const std::string &node_or_tensor_name, + const std::vector &dims) { + tensor::MSTensor *tensor = session_->GetOutputByTensorName(node_or_tensor_name); + if (tensor == nullptr) { + MS_LOG(INFO) << "Cannot find output node: " << node_or_tensor_name + << " or node has more than one output tensor, switch to GetOutputByTensorName"; + auto tensors = session_->GetOutputsByNodeName(node_or_tensor_name); + if (!tensors.empty() && tensors.size() == 1) { + tensor = tensors.front(); + } else { + return GetTensorByNodeShape(dims); + } + } + return tensor; +} + int Benchmark::CompareOutput() { std::cout << "================ Comparing Output data ================" << std::endl; float total_bias = 0; int total_size = 0; - // check the output tensor name. - if (this->benchmark_tensor_names_ != session_->GetOutputTensorNames()) { - MS_LOG(ERROR) << "The output tensor name is wrong."; - return RET_ERROR; - } + for (const auto &calib_tensor : benchmark_data_) { std::string tensor_name = calib_tensor.first; - tensor::MSTensor *tensor = session_->GetOutputByTensorName(tensor_name); + tensor::MSTensor *tensor = GetTensorByNameOrShape(tensor_name, calib_tensor.second->shape); if (tensor == nullptr) { MS_LOG(ERROR) << "Get tensor failed, tensor name: " << tensor_name; return RET_ERROR; @@ -940,7 +976,7 @@ std::string DumpMSTensor(tensor::MSTensor *tensor) { } return oss.str(); } - +#ifndef BENCHMARK_CLIP_JSON std::string GenerateOutputFileName(tensor::MSTensor *tensor, const std::string &op_name, const std::string &file_type, const size_t &idx) { std::string file_name = op_name; @@ -962,6 +998,7 @@ std::string GenerateOutputFileName(tensor::MSTensor *tensor, const std::string & } return file_name; } +#endif } // namespace int Benchmark::InitPrintTensorDataCallbackParameter() { @@ -990,6 +1027,7 @@ int Benchmark::InitPrintTensorDataCallbackParameter() { return RET_OK; } int Benchmark::InitDumpTensorDataCallbackParameter() { +#ifndef BENCHMARK_CLIP_JSON // before callback before_call_back_ = [&](const std::vector &before_inputs, const std::vector &before_outputs, @@ -1035,6 +1073,7 @@ int Benchmark::InitDumpTensorDataCallbackParameter() { } return true; }; +#endif return RET_OK; } diff --git a/mindspore/lite/tools/benchmark/benchmark.h b/mindspore/lite/tools/benchmark/benchmark.h index 5746ceb2c21..9ef25e0d4ec 100644 --- a/mindspore/lite/tools/benchmark/benchmark.h +++ b/mindspore/lite/tools/benchmark/benchmark.h @@ -29,7 +29,9 @@ #include #include #include +#ifndef BENCHMARK_CLIP_JSON #include +#endif #include "tools/benchmark/benchmark_base.h" #include "include/model.h" #include "tools/common/flag_parser.h" @@ -96,6 +98,8 @@ class MS_API Benchmark : public BenchmarkBase { int CompareDataGetTotalCosineDistanceAndSize(const std::string &name, tensor::MSTensor *tensor, float *total_cosine_distance, int *total_size); + tensor::MSTensor *GetTensorByNodeShape(const std::vector &node_shape); + tensor::MSTensor *GetTensorByNameOrShape(const std::string &node_or_tensor_name, const std::vector &dims); private: #ifdef ENABLE_OPENGL_TEXTURE diff --git a/mindspore/lite/tools/benchmark/benchmark_base.cc b/mindspore/lite/tools/benchmark/benchmark_base.cc index 55750f0d975..34045f61ff5 100644 --- a/mindspore/lite/tools/benchmark/benchmark_base.cc +++ b/mindspore/lite/tools/benchmark/benchmark_base.cc @@ -34,6 +34,12 @@ #include #include #endif +#ifdef SUPPORT_NNIE +#include "include/hi_common.h" +#include "include/hi_comm_vb.h" +#include "include/mpi_sys.h" +#include "include/mpi_vb.h" +#endif namespace mindspore { namespace lite { @@ -57,6 +63,10 @@ constexpr int16_t kInputDataInt8Min = -127; constexpr int16_t kInputDataInt8Max = 127; constexpr int16_t kInputDataUint8Min = 0; constexpr int16_t kInputDataUint8Max = 254; +#ifdef SUPPORT_NNIE +constexpr int kNNIEMaxPoolCnt = 2; +constexpr int kNNIEBlkSize = 768 * 576 * 2; +#endif const std::unordered_map kTypeIdMap{ {kNumberTypeFloat16, "Float16"}, {kNumberTypeFloat, "Float32"}, {kNumberTypeFloat32, "Float32"}, @@ -294,6 +304,7 @@ int BenchmarkBase::CheckDeviceTypeValid() { } int BenchmarkBase::InitDumpConfigFromJson(char *path) { +#ifndef BENCHMARK_CLIP_JSON auto real_path = RealPath(path); std::ifstream ifs(real_path); if (!ifs.good()) { @@ -354,7 +365,7 @@ int BenchmarkBase::InitDumpConfigFromJson(char *path) { MS_LOG(ERROR) << "create data output directory failed."; return RET_ERROR; } - +#endif return RET_OK; } @@ -623,6 +634,72 @@ int BenchmarkBase::PrintPerfResult(const std::vector &title, } #endif +#ifdef SUPPORT_NNIE +int SvpSysInit() { + HI_S32 ret = HI_SUCCESS; + VB_CONFIG_S struVbConf; + ret = HI_MPI_SYS_Exit(); + if (HI_SUCCESS != ret) { + MS_LOG(ERROR) << "HI_MPI_SYS_Exit failed!"; + return RET_ERROR; + } + + ret = HI_MPI_VB_Exit(); + if (HI_SUCCESS != ret) { + MS_LOG(WARNING) << "HI_MPI_VB_Exit failed!"; + ret = HI_MPI_SYS_Init(); + if (HI_SUCCESS != ret) { + MS_LOG(ERROR) << "Error:HI_MPI_SYS_Init failed!"; + return RET_ERROR; + } + return RET_OK; + } + + memset(&struVbConf, 0, sizeof(VB_CONFIG_S)); + struVbConf.u32MaxPoolCnt = kNNIEMaxPoolCnt; + struVbConf.astCommPool[1].u64BlkSize = kNNIEBlkSize; + struVbConf.astCommPool[1].u32BlkCnt = 1; + + ret = HI_MPI_VB_SetConfig((const VB_CONFIG_S *)&struVbConf); + if (HI_SUCCESS != ret) { + MS_LOG(ERROR) << "Error:HI_MPI_VB_SetConf failed!"; + return RET_ERROR; + } + + ret = HI_MPI_VB_Init(); + if (HI_SUCCESS != ret) { + MS_LOG(ERROR) << "Error:HI_MPI_VB_Init failed!"; + return RET_ERROR; + } + + ret = HI_MPI_SYS_Init(); + if (HI_SUCCESS != ret) { + MS_LOG(ERROR) << "Error:HI_MPI_SYS_Init failed!"; + return RET_ERROR; + } + + return RET_OK; +} + +int SvpSysExit() { + HI_S32 ret = HI_SUCCESS; + + ret = HI_MPI_SYS_Exit(); + if (HI_SUCCESS != ret) { + MS_LOG(ERROR) << "HI_MPI_SYS_Exit failed!"; + return RET_ERROR; + } + + ret = HI_MPI_VB_Exit(); + if (HI_SUCCESS != ret) { + MS_LOG(WARNING) << "HI_MPI_VB_Exit failed!"; + return RET_OK; + } + + return RET_OK; +} +#endif + BenchmarkBase::~BenchmarkBase() { for (auto &iter : this->benchmark_data_) { iter.second->shape.clear(); @@ -631,6 +708,9 @@ BenchmarkBase::~BenchmarkBase() { iter.second = nullptr; } this->benchmark_data_.clear(); +#ifdef SUPPORT_NNIE + SvpSysExit(); +#endif } } // namespace lite } // namespace mindspore diff --git a/mindspore/lite/tools/benchmark/benchmark_base.h b/mindspore/lite/tools/benchmark/benchmark_base.h index 617ef55852f..4e7de4bf06f 100644 --- a/mindspore/lite/tools/benchmark/benchmark_base.h +++ b/mindspore/lite/tools/benchmark/benchmark_base.h @@ -29,7 +29,9 @@ #include #include #include +#ifndef BENCHMARK_CLIP_JSON #include +#endif #include "include/model.h" #include "include/api/types.h" #include "include/api/format.h" @@ -419,9 +421,10 @@ class MS_API BenchmarkBase { float op_cost_total_ = 0.0f; std::map> op_times_by_type_; std::map> op_times_by_name_; - +#ifndef BENCHMARK_CLIP_JSON // dump data nlohmann::json dump_cfg_json_; +#endif std::string dump_file_output_dir_; #ifdef ENABLE_ARM64 int perf_fd = 0; @@ -432,6 +435,10 @@ class MS_API BenchmarkBase { #endif std::mt19937 random_engine_; }; +#ifdef SUPPORT_NNIE +int SvpSysInit(); +int SvpSysExit(); +#endif } // namespace mindspore::lite #endif // MINNIE_BENCHMARK_BENCHMARK_BASE_H_ diff --git a/mindspore/lite/tools/benchmark/benchmark_unified_api.cc b/mindspore/lite/tools/benchmark/benchmark_unified_api.cc index a0656bdadb0..cc6cefd52d1 100644 --- a/mindspore/lite/tools/benchmark/benchmark_unified_api.cc +++ b/mindspore/lite/tools/benchmark/benchmark_unified_api.cc @@ -36,6 +36,12 @@ #include #include #endif +#ifdef SUPPORT_NNIE +#include "include/hi_common.h" +#include "include/hi_comm_vb.h" +#include "include/mpi_sys.h" +#include "include/mpi_vb.h" +#endif namespace mindspore { constexpr size_t kDataToStringMaxNum = 40; @@ -1081,7 +1087,7 @@ std::string DumpMSTensor(mindspore::MSTensor *tensor) { } return oss.str(); } - +#ifndef BENCHMARK_CLIP_JSON std::string GenerateOutputFileName(mindspore::MSTensor *tensor, const std::string &op_name, const std::string &file_type, const size_t &idx) { std::string file_name = op_name; @@ -1105,6 +1111,7 @@ std::string GenerateOutputFileName(mindspore::MSTensor *tensor, const std::strin file_name += +".bin"; return file_name; } +#endif } // namespace int BenchmarkUnifiedApi::InitPrintTensorDataCallbackParameter() { @@ -1132,6 +1139,7 @@ int BenchmarkUnifiedApi::InitPrintTensorDataCallbackParameter() { return RET_OK; } int BenchmarkUnifiedApi::InitDumpTensorDataCallbackParameter() { +#ifndef BENCHMARK_CLIP_JSON // before callback ms_before_call_back_ = [&](const std::vector &before_inputs, const std::vector &before_outputs, @@ -1177,6 +1185,7 @@ int BenchmarkUnifiedApi::InitDumpTensorDataCallbackParameter() { } return true; }; +#endif return RET_OK; } diff --git a/mindspore/lite/tools/benchmark/benchmark_unified_api.h b/mindspore/lite/tools/benchmark/benchmark_unified_api.h index 289015acfe6..fa7ba3d2ac7 100644 --- a/mindspore/lite/tools/benchmark/benchmark_unified_api.h +++ b/mindspore/lite/tools/benchmark/benchmark_unified_api.h @@ -29,7 +29,9 @@ #include #include #include +#ifndef BENCHMARK_CLIP_JSON #include +#endif #include "tools/benchmark/benchmark_base.h" #include "include/model.h" #include "tools/common/flag_parser.h" diff --git a/mindspore/lite/tools/benchmark/dpico/CMakeLists.txt b/mindspore/lite/tools/benchmark/dpico/CMakeLists.txt index e4ba71f5cfe..3eacdd1f558 100644 --- a/mindspore/lite/tools/benchmark/dpico/CMakeLists.txt +++ b/mindspore/lite/tools/benchmark/dpico/CMakeLists.txt @@ -7,9 +7,9 @@ __download_pkg(34xx_sdk include_directories(${CMAKE_CURRENT_SOURCE_DIR}) include_directories(${34xx_sdk_SOURCE_DIR}) include_directories(${34xx_sdk_SOURCE_DIR}/include) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/runtime) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/runtime/include) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/runtime/include/third_party) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime/include) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime/include/third_party) link_directories(${34xx_sdk_SOURCE_DIR}/lib) aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/src COMMON_SRC3) diff --git a/mindspore/lite/tools/benchmark/dpico/src/custom_infer.cc b/mindspore/lite/tools/benchmark/dpico/src/custom_infer.cc index 52bb1d22e34..48db5496ea6 100644 --- a/mindspore/lite/tools/benchmark/dpico/src/custom_infer.cc +++ b/mindspore/lite/tools/benchmark/dpico/src/custom_infer.cc @@ -27,7 +27,7 @@ using mindspore::schema::PrimitiveType_Custom; namespace mindspore { namespace dpico { namespace { -constexpr int kBaseValue = 10; +constexpr int kDecimal = 10; constexpr auto kInputShape = "inputs_shape"; constexpr auto kOutputShape = "outputs_shape"; constexpr auto kOutputsFormat = "outputs_format"; @@ -66,13 +66,13 @@ Status GetCustomShape(const std::map &attrs, const std char *save_ptr = nullptr; res = strtok_r(attr.data(), delims, &save_ptr); while (res != nullptr) { - int64_t ndims = strtol(res, &res, kBaseValue); + int64_t ndims = strtol(res, &res, kDecimal); int j = 0; std::vector shape; shape.resize(ndims); for (; j < ndims; j++) { res = strtok_r(NULL, delims, &save_ptr); - shape[j] = static_cast(strtol(res, &res, kBaseValue)); + shape[j] = static_cast(strtol(res, &res, kDecimal)); } shapes->push_back(shape); diff --git a/mindspore/lite/tools/benchmark/nnie/CMakeLists.txt b/mindspore/lite/tools/benchmark/nnie/CMakeLists.txt new file mode 100644 index 00000000000..1c5d20713f6 --- /dev/null +++ b/mindspore/lite/tools/benchmark/nnie/CMakeLists.txt @@ -0,0 +1,37 @@ +cmake_minimum_required(VERSION 3.14) +project(NNIE_Custom) + +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) +if(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3516D") + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_patry/hi3516_sdk/) + link_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_patry/hi3516_sdk/lib) +elseif(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3519A") + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_patry/hi3519_sdk/) + link_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_patry/hi3519_sdk/lib) +elseif(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3559A") + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_patry/hi3559_sdk/) + link_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_patry/hi3559_sdk/lib) +endif() +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime/include) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime/include/third_party) + + +aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/src COMMON_SRC3) + +set(MSLITE_NNIE_LINK_LIB nnie mpi VoiceEngine upvqe dnvqe) + +add_library(mslite_nnie SHARED + ${COMMON_SRC3}) +target_link_libraries(mslite_nnie ${MSLITE_NNIE_LINK_LIB} securec) + +if(DEFINED HIMIX_STRIP) + set(NDK_STRIP ${HIMIX_STRIP}) +else() + set(NDK_STRIP "arm-himix200-linux-strip") +endif() + +if("${CMAKE_BUILD_TYPE}" STREQUAL "Release") + add_custom_command(TARGET mslite_nnie POST_BUILD COMMAND ${NDK_STRIP} + ${CMAKE_CURRENT_BINARY_DIR}/libmslite_nnie.so) +endif() diff --git a/mindspore/lite/tools/benchmark/nnie/src/custom_fp32.cc b/mindspore/lite/tools/benchmark/nnie/src/custom_fp32.cc new file mode 100644 index 00000000000..f3630c02b96 --- /dev/null +++ b/mindspore/lite/tools/benchmark/nnie/src/custom_fp32.cc @@ -0,0 +1,178 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/custom_fp32.h" +#include +#include +#include "schema/model_generated.h" +#include "include/registry/register_kernel.h" +#include "include/errorcode.h" +#include "src/nnie_manager.h" +#include "src/nnie_print.h" +#include "src/nnie_cfg_parser.h" + +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_Custom; + +namespace mindspore { +namespace nnie { +bool CustomCPUKernel::load_model_ = false; + +int CustomCPUKernel::run_seg_ = 0; +bool CustomCPUKernel::roi_used_ = false; +int CustomCPUKernel::Prepare() { + if (!load_model_) { + Flags flags; + flags.Init(); + if (nnie::NNIEManager::GetInstance()->CfgInit(flags.max_roi_num_, flags.time_step_, flags.core_ids_) != RET_OK) { + LOGE("Nnie init cfg fail"); + return RET_ERROR; + } + + if (nnie::NNIEManager::GetInstance()->Init(reinterpret_cast(inputs_[inputs_.size() - 1].MutableData()), + static_cast(inputs_[inputs_.size() - 1].ElementNum()), inputs_)) { + // LOGW("Load WK Model Fail"); + return RET_OK; + } + load_model_ = true; + } + outputs_shapes_.resize(outputs_.size()); + for (size_t i = 0; i < outputs_.size(); i++) { + outputs_shapes_[i] = outputs_[i].Shape(); + } + return RET_OK; +} + +int CustomCPUKernel::ReSize() { + if (load_model_) { + nnie::NNIEManager::GetInstance()->Release(); + load_model_ = false; + } + + return Prepare(); +} + +int CustomCPUKernel::Execute() { + if (!load_model_) { + LOGE("WK Model is not load."); + return RET_ERROR; + } + run_seg_ = seg_id_; + + if (nnie::NNIEManager::GetInstance()->FillData(&inputs_, run_seg_)) { + LOGE("Fail Fill Data"); + return RET_ERROR; + } + + if (nnie::NNIEManager::GetInstance()->Run(&outputs_, run_seg_, outputs_shapes_)) { + LOGE("Fail WK Run"); + return RET_ERROR; + } + run_seg_++; + return RET_OK; +} + +CustomCPUKernel::~CustomCPUKernel() { + if (load_model_) { + nnie::NNIEManager::GetInstance()->Release(); + load_model_ = false; + } +} + +bool GetCustomAttr(char *buf, int buf_size, const mindspore::schema::Custom *op, const std::string &attr) { + int attr_size; + for (size_t i = 0; i < op->attr()->size(); i++) { + if (op->attr()->Get(i)->name()->str() == attr) { + auto output_info = op->attr()->Get(i)->data(); + attr_size = static_cast(output_info->size()); + if (attr_size >= buf_size) { + LOGE("attr size too big"); + return false; + } + for (int j = 0; j < attr_size; j++) { + buf[j] = static_cast(output_info->Get(j)); + } + buf[attr_size] = 0; + return true; + } + } + return false; +} + +std::shared_ptr CustomCreateKernel(const std::vector &inputs, + const std::vector &outputs, + const mindspore::schema::Primitive *primitive, + const mindspore::Context *ctx) { + if (primitive->value_type() != mindspore::schema::PrimitiveType_Custom) { + LOGE("Primitive type is not PrimitiveType_Custom"); + return nullptr; + } + + auto op = primitive->value_as_Custom(); + if (op->attr()->size() < 1) { + LOGE("There are at least 1 attribute of Custom"); + return nullptr; + } + + int64_t ndims; + bool forward_bbox = false; + char *res = nullptr; + char buf[kMaxSize]; + if (GetCustomAttr(buf, kMaxSize, op, "id")) { + res = nullptr; + ndims = strtol(buf, &res, kDecimal); + if ((*res) != 0) { + LOGE("Get attr id data fail"); + return nullptr; + } + } else { + LOGE("Custom op should have id"); + return nullptr; + } + + if (GetCustomAttr(buf, kMaxSize, op, "ForwardWithBbox")) { + res = nullptr; + int64_t temp_val = strtol(buf, &res, kDecimal); + if ((*res) != 0) { + LOGE("Get attr ForwardWithBbox data fail"); + return nullptr; + } + if (temp_val > 0) { + forward_bbox = true; + } + } + auto kernel = std::make_shared(ndims, forward_bbox, inputs, outputs, primitive, ctx); + if (kernel == nullptr) { + LOGE("new custom kernel is nullptr"); + return nullptr; + } + return kernel; +} +} // namespace nnie +} // namespace mindspore +namespace mindspore { +namespace registry { +namespace { +const auto kFloat32 = DataType::kNumberTypeFloat32; +const auto kInt8 = DataType::kNumberTypeInt8; +const auto kUint8 = DataType::kNumberTypeUInt8; +} // namespace +REGISTER_CUSTOM_KERNEL(CPU, NNIE, kFloat32, NNIE, nnie::CustomCreateKernel) +REGISTER_CUSTOM_KERNEL(CPU, NNIE, kInt8, NNIE, nnie::CustomCreateKernel) +REGISTER_CUSTOM_KERNEL(CPU, NNIE, kUint8, NNIE, nnie::CustomCreateKernel) +} // namespace registry +} // namespace mindspore diff --git a/mindspore/lite/tools/benchmark/nnie/src/custom_fp32.h b/mindspore/lite/tools/benchmark/nnie/src/custom_fp32.h new file mode 100644 index 00000000000..679f847070c --- /dev/null +++ b/mindspore/lite/tools/benchmark/nnie/src/custom_fp32.h @@ -0,0 +1,66 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CUSTOM_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CUSTOM_H_ + +#include +#include +#include "include/schema/model_generated.h" +#include "include/context.h" +#include "include/api/kernel.h" +#include "src/custom_infer.h" + +using mindspore::kernel::Kernel; +using mindspore::tensor::MSTensor; +namespace mindspore { +namespace nnie { +class CustomCPUKernel : public Kernel { + public: + CustomCPUKernel(int seg_id, bool forward_bbox, const std::vector &inputs, + const std::vector &outputs, const mindspore::schema::Primitive *primitive, + const mindspore::Context *ctx) + : Kernel(inputs, outputs, primitive, ctx), seg_id_(seg_id), forward_bbox_(forward_bbox) { + if (forward_bbox) { + roi_used_ = true; + } + } + + ~CustomCPUKernel() override; + + int Prepare() override; + int ReSize() override; + int Execute() override; + + int seg_id(void) const { return seg_id_; } + + void set_seg_id(int id) { seg_id_ = id; } + + int forward_bbox(void) const { return forward_bbox_; } + + void set_forward_bbox(bool flag) { forward_bbox_ = flag; } + + private: + static bool load_model_; + static int run_seg_; + static bool roi_used_; + int seg_id_ = 0; + bool forward_bbox_ = false; + std::vector> outputs_shapes_; +}; +} // namespace nnie +} // namespace mindspore +#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CUSTOM_H_ diff --git a/mindspore/lite/tools/benchmark/nnie/src/custom_infer.cc b/mindspore/lite/tools/benchmark/nnie/src/custom_infer.cc new file mode 100644 index 00000000000..c96b4b83a7b --- /dev/null +++ b/mindspore/lite/tools/benchmark/nnie/src/custom_infer.cc @@ -0,0 +1,160 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/custom_infer.h" +#include +#include +#include "include/errorcode.h" +#include "src/nnie_print.h" +#include "include/api/format.h" +#include "include/registry/register_kernel_interface.h" + +using mindspore::kernel::KernelInterface; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_Custom; + +namespace mindspore { +namespace nnie { +std::shared_ptr CustomInferCreater() { + auto infer = new (std::nothrow) CustomInterface(); + if (infer == nullptr) { + LOGE("new custom infer is nullptr"); + return nullptr; + } + return std::shared_ptr(infer); +} + +int GetCustomShape(const mindspore::schema::Custom *op, const std::string &attr, + std::vector> *shapes) { + char buf[kMaxSize]; + bool has_outputs_shape = false; + + for (size_t i = 0; i < op->attr()->size(); i++) { + if (op->attr()->Get(i)->name()->str() == attr) { + auto output_info = op->attr()->Get(i)->data(); + int attr_size = static_cast(output_info->size()); + if (attr_size >= kMaxSize) { + LOGE("attr size too big"); + return RET_ERROR; + } + for (int j = 0; j < attr_size; j++) { + buf[j] = static_cast(output_info->Get(j)); + } + buf[attr_size] = 0; + has_outputs_shape = true; + break; + } + } + + if (!has_outputs_shape) { + LOGE("Custom op don't have %s attr.", attr.c_str()); + return RET_ERROR; + } + + char delims[] = ","; + char *res = nullptr; + char *save_ptr = nullptr; + res = strtok_r(buf, delims, &save_ptr); + while (res != nullptr) { + // 待补完 + // outputs[id]->format_ = input->format_; + // outputs[id]->data_type_ = kNumberTypeFloat32; + int64_t ndims = strtol(res, &res, kDecimal); + int j = 0; + std::vector shape; + shape.resize(ndims); + for (; j < ndims; j++) { + res = strtok_r(NULL, delims, &save_ptr); + shape[j] = static_cast(strtol(res, &res, kDecimal)); + } + shapes->push_back(shape); + + res = strtok_r(NULL, delims, &save_ptr); + } + return RET_OK; +} + +Status CustomInterface::Infer(std::vector *inputs, std::vector *outputs, + const mindspore::schema::Primitive *primitive) { + if (inputs->empty()) { + LOGE("Inputs size 0"); + return kLiteError; + } + if (outputs->empty()) { + LOGE("Outputs size 0"); + return kLiteError; + } + if (primitive->value_type() != mindspore::schema::PrimitiveType_Custom) { + LOGE("Primitive type is not PrimitiveType_Custom"); + return kLiteError; + } + + auto op = primitive->value_as_Custom(); + if (op->attr()->size() < 1) { + LOGE("There are at least 1 attribute of Custom"); + return kLiteError; + } + std::vector> inputs_shape; + if (GetCustomShape(op, "inputs_shape", &inputs_shape) != RET_OK) { + LOGE("parser inputs_shape attribute err."); + return kLiteError; + } + std::vector> outputs_shape; + if (GetCustomShape(op, "outputs_shape", &outputs_shape) != RET_OK) { + LOGE("parser outputs_shape attribute err."); + return kLiteError; + } + if (inputs_shape.size() != (inputs->size() - 1)) { + LOGE("inputs num diff inputs_shape num."); + return kLiteError; + } + if (inputs_shape[0].size() != (*inputs)[0].Shape().size()) { + LOGE("shape size err."); + return kLiteError; + } + bool resize_flag = false; + int resize_num = 1; + for (size_t i = 0; i < inputs_shape[0].size(); i++) { + if (inputs_shape[0][i] != (*inputs)[0].Shape()[i]) { + if (i == 0) { + resize_flag = true; + resize_num = (*inputs)[0].Shape()[i]; + } else { + LOGE("Custom of NNIE only support batch_num resize."); + return kLiteError; + } + } + } + if (resize_flag) { + for (auto &output_shape : outputs_shape) { + output_shape[0] = resize_num; + } + } + for (size_t i = 0; i < outputs->size(); i++) { + (*outputs)[i].SetShape(outputs_shape[i]); + (*outputs)[i].SetDataType(DataType::kNumberTypeFloat32); + (*outputs)[i].SetFormat(Format::NCHW); + } + return kSuccess; +} +} // namespace nnie +} // namespace mindspore +namespace mindspore { +namespace kernel { +REGISTER_CUSTOM_KERNEL_INTERFACE(NNIE, NNIE, nnie::CustomInferCreater); +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/lite/tools/benchmark/nnie/src/custom_infer.h b/mindspore/lite/tools/benchmark/nnie/src/custom_infer.h new file mode 100644 index 00000000000..938b7a19128 --- /dev/null +++ b/mindspore/lite/tools/benchmark/nnie/src/custom_infer.h @@ -0,0 +1,35 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_NNACL_CUSTOM_PARAMETER_H_ +#define MINDSPORE_LITE_NNACL_CUSTOM_PARAMETER_H_ +#include +#include +#include "include/kernel_interface.h" + +namespace mindspore { +namespace nnie { +class CustomInterface : public mindspore::kernel::KernelInterface { + public: + CustomInterface() {} + + ~CustomInterface() = default; + + Status Infer(std::vector *inputs, std::vector *outputs, + const mindspore::schema::Primitive *primitive) override; +}; +} // namespace nnie +} // namespace mindspore +#endif // MINDSPORE_LITE_NNACL_CUSTOM_PARAMETER_H_ diff --git a/mindspore/lite/tools/benchmark/nnie/src/nnie_cfg_parser.cc b/mindspore/lite/tools/benchmark/nnie/src/nnie_cfg_parser.cc new file mode 100644 index 00000000000..d4e413ea503 --- /dev/null +++ b/mindspore/lite/tools/benchmark/nnie/src/nnie_cfg_parser.cc @@ -0,0 +1,101 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/nnie_cfg_parser.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include "include/errorcode.h" +#include "src/nnie_manager.h" +#include "src/nnie_print.h" + +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +namespace mindspore { +namespace nnie { +namespace { +constexpr auto ENV_TIME_STEP = "TIME_STEP"; +constexpr auto ENV_MAX_ROI_NUM = "MAX_ROI_NUM"; +constexpr auto ENV_CORE_IDS = "CORE_IDS"; +constexpr auto DELIM = ","; +constexpr int MAX_CORE_ID = 7; +} // namespace +void Flags::Init() { + auto *time_step = std::getenv(ENV_TIME_STEP); + if (time_step != nullptr) { + auto iter = std::find_if(time_step, time_step + strlen(time_step), [](char val) { return val < '0' || val > '9'; }); + if (iter != time_step) { + *iter = '\0'; + this->time_step_ = atoi(time_step); + } else { + LOGE("TIME_STEP ENV is invalid, now set to default value %d", this->time_step_); + } + } else { + LOGW("TIME_STEP ENV is not set, now set to default value %d", this->time_step_); + } + auto *max_roi_num = std::getenv(ENV_MAX_ROI_NUM); + if (max_roi_num != nullptr) { + auto iter = + std::find_if(max_roi_num, max_roi_num + strlen(max_roi_num), [](char val) { return val < '0' || val > '9'; }); + if (iter != max_roi_num) { + *iter = '\0'; + this->max_roi_num_ = atoi(max_roi_num); + } else { + LOGW("MAX_ROI_NUM ENV is invalid, now set to default value %d", this->max_roi_num_); + } + } else { + LOGW("MAX_ROI_NUM ENV is not set, now set to default value %d", this->max_roi_num_); + } + auto ids = std::getenv(ENV_CORE_IDS); + if (ids != nullptr) { + auto iter = std::find_if(ids, ids + strlen(ids), [](char val) { return (val < '0' || val > '9') && val != ','; }); + std::vector core_ids; + if (iter != ids) { + *iter = '\0'; + char *saveptr; + char *p = strtok_r(ids, DELIM, &saveptr); + while (p != nullptr) { + int id = atoi(p); + p = strtok_r(NULL, DELIM, &saveptr); + if (id > MAX_CORE_ID || id < 0) { + LOGE("id is out of range"); + continue; + } + if (std::find(core_ids.begin(), core_ids.end(), id) != core_ids.end()) { + continue; + } + core_ids.push_back(id); + } + } + if (!core_ids.empty()) { + this->core_ids_ = core_ids; + } else { + std::string message = + "CORE_IDS ENV is invalid, now set to default value {" + std::to_string(this->core_ids_.front()) + "}"; + LOGW(message.c_str()); + } + } else { + std::string message = + "CORE_IDS ENV is not set, now set to default value {" + std::to_string(this->core_ids_.front()) + "}"; + LOGW(message.c_str()); + } +} +} // namespace nnie +} // namespace mindspore diff --git a/mindspore/lite/tools/benchmark/nnie/src/nnie_cfg_parser.h b/mindspore/lite/tools/benchmark/nnie/src/nnie_cfg_parser.h new file mode 100644 index 00000000000..238189bfe41 --- /dev/null +++ b/mindspore/lite/tools/benchmark/nnie/src/nnie_cfg_parser.h @@ -0,0 +1,44 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_NNIE_CFG_PARSER_H_ +#define MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_NNIE_CFG_PARSER_H_ +#include + +namespace mindspore { +namespace nnie { +/** + * Flags is a config container. + * Member objects: + * 1.time_step_: step num only for rnn or lstm model. Default is 1. + * 2.max_roi_num_: maximum number of ROI area, which is single picture supports, must be greater than 0.Default is 300. + * 3.core_ids_: running kernels' id, support multi-core, separated by commas when setting, such as {0, 1, 2}. + * each element must be a integer, wch meet such inequality 0 <= val < 8. + * Default is {0}. + */ +class Flags { + public: + Flags() = default; + ~Flags() = default; + void Init(); + + public: + int time_step_{1}; + int max_roi_num_{300}; + std::vector core_ids_{0}; +}; +} // namespace nnie +} // namespace mindspore +#endif diff --git a/mindspore/lite/tools/benchmark/nnie/src/nnie_common.cc b/mindspore/lite/tools/benchmark/nnie/src/nnie_common.cc new file mode 100644 index 00000000000..3b4459d1adc --- /dev/null +++ b/mindspore/lite/tools/benchmark/nnie/src/nnie_common.cc @@ -0,0 +1,943 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/nnie_common.h" +#include "include/mpi_nnie.h" +#include "include/hi_type.h" +#include "include/errorcode.h" +#include "src/nnie_print.h" +#include "src/nnie_memory.h" + +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +constexpr int kSleepUs = 100; + +namespace mindspore { +namespace nnie { +static void NnieParamRelease(NnieParam *nnie_param) { + if (nnie_param == nullptr) { + return; + } + + if (nnie_param->task_buf_.u64PhyAddr != 0 && nnie_param->task_buf_.u64VirAddr != 0) { + NNIE_MEM_FREE(nnie_param->task_buf_.u64PhyAddr, nnie_param->task_buf_.u64VirAddr); + nnie_param->task_buf_.u64PhyAddr = 0; + nnie_param->task_buf_.u64VirAddr = 0; + } + + if (nnie_param->step_buf_.u64PhyAddr != 0 && nnie_param->step_buf_.u64VirAddr != 0) { + NNIE_MEM_FREE(nnie_param->step_buf_.u64PhyAddr, nnie_param->step_buf_.u64VirAddr); + nnie_param->step_buf_.u64PhyAddr = 0; + nnie_param->step_buf_.u64VirAddr = 0; + } +} + +bool CheckNnieInnerNode(const HI_CHAR *name, NnieParam *nnie_param) { + for (HI_U32 i = 0; i < nnie_param->model_->u32NetSegNum; i++) { + for (HI_U32 j = 0; j < nnie_param->model_->astSeg[i].u16DstNum; j++) + if (strncmp(name, nnie_param->model_->astSeg[i].astDstNode[j].szName, SVP_NNIE_NODE_NAME_LEN) == 0) { + nnie_param->mem_cfg_.seg_[i].dst_node_[j] = true; + return true; + } + } + return false; +} + +bool ConnectNnieInnerNode(const HI_CHAR *name, NnieParam *nnie_param, SVP_SRC_BLOB_S *blob) { + for (HI_U32 i = 0; i < nnie_param->model_->u32NetSegNum; i++) { + for (HI_U32 j = 0; j < nnie_param->model_->astSeg[i].u16DstNum; j++) + if (strncmp(name, nnie_param->model_->astSeg[i].astDstNode[j].szName, SVP_NNIE_NODE_NAME_LEN) == 0) { + blob->u64PhyAddr = nnie_param->seg_data_[i].dst_[j].u64PhyAddr; + blob->u64VirAddr = nnie_param->seg_data_[i].dst_[j].u64VirAddr; + return true; + } + } + return false; +} + +static void FillForwardInfo(NnieCfg *nnie_cfg, NnieParam *nnie_param) { + HI_U32 i, j; + HI_U32 num; + memset(&nnie_param->mem_cfg_, false, sizeof(NNIEMemCfg)); + for (i = 0; i < nnie_param->model_->u32NetSegNum; i++) { + if (SVP_NNIE_NET_TYPE_ROI == nnie_param->model_->astSeg[i].enNetType) { + nnie_param->forward_with_bbox_ctrl_[i].enNnieId = nnie_cfg->nnie_core_id_[i]; + nnie_param->forward_with_bbox_ctrl_[i].u32SrcNum = nnie_param->model_->astSeg[i].u16SrcNum; + nnie_param->forward_with_bbox_ctrl_[i].u32DstNum = nnie_param->model_->astSeg[i].u16DstNum; + nnie_param->forward_with_bbox_ctrl_[i].u32ProposalNum = 1; + nnie_param->forward_with_bbox_ctrl_[i].u32NetSegId = i; + } else if (SVP_NNIE_NET_TYPE_CNN == nnie_param->model_->astSeg[i].enNetType || + SVP_NNIE_NET_TYPE_RECURRENT == nnie_param->model_->astSeg[i].enNetType) { + nnie_param->forward_ctrl_[i].enNnieId = nnie_cfg->nnie_core_id_[i]; + nnie_param->forward_ctrl_[i].u32SrcNum = nnie_param->model_->astSeg[i].u16SrcNum; + nnie_param->forward_ctrl_[i].u32DstNum = nnie_param->model_->astSeg[i].u16DstNum; + nnie_param->forward_ctrl_[i].u32NetSegId = i; + } + + for (j = 0; j < nnie_param->model_->astSeg[i].u16SrcNum; j++) { + if (i > 0) { + if (CheckNnieInnerNode(nnie_param->model_->astSeg[i].astSrcNode[j].szName, nnie_param)) { + nnie_param->mem_cfg_.seg_[i].src_node_[j] = true; + } + } + + if (SVP_BLOB_TYPE_SEQ_S32 == nnie_param->model_->astSeg[i].astSrcNode[j].enType) { + nnie_param->seg_data_[i].src_[j].enType = nnie_param->model_->astSeg[i].astSrcNode[j].enType; + nnie_param->seg_data_[i].src_[j].unShape.stSeq.u32Dim = + nnie_param->model_->astSeg[i].astSrcNode[j].unShape.u32Dim; + nnie_param->seg_data_[i].src_[j].u32Num = nnie_cfg->max_input_num_; + nnie_param->seg_data_[i].src_[j].unShape.stSeq.u64VirAddrStep = + nnie_cfg->step_vir_addr_[i * NNIE_EACH_SEG_STEP_ADDR_NUM]; + } else { + nnie_param->seg_data_[i].src_[j].enType = nnie_param->model_->astSeg[i].astSrcNode[j].enType; + nnie_param->seg_data_[i].src_[j].unShape.stWhc.u32Chn = + nnie_param->model_->astSeg[i].astSrcNode[j].unShape.stWhc.u32Chn; + nnie_param->seg_data_[i].src_[j].unShape.stWhc.u32Height = + nnie_param->model_->astSeg[i].astSrcNode[j].unShape.stWhc.u32Height; + nnie_param->seg_data_[i].src_[j].unShape.stWhc.u32Width = + nnie_param->model_->astSeg[i].astSrcNode[j].unShape.stWhc.u32Width; + nnie_param->seg_data_[i].src_[j].u32Num = nnie_cfg->max_input_num_; + } + } + + if (SVP_NNIE_NET_TYPE_ROI == nnie_param->model_->astSeg[i].enNetType) { + num = nnie_cfg->max_roi_num_ * nnie_cfg->max_input_num_; + } else { + num = nnie_cfg->max_input_num_; + } + + for (j = 0; j < nnie_param->model_->astSeg[i].u16DstNum; j++) { + if (SVP_BLOB_TYPE_SEQ_S32 == nnie_param->model_->astSeg[i].astDstNode[j].enType) { + nnie_param->seg_data_[i].dst_[j].enType = nnie_param->model_->astSeg[i].astDstNode[j].enType; + nnie_param->seg_data_[i].dst_[j].unShape.stSeq.u32Dim = + nnie_param->model_->astSeg[i].astDstNode[j].unShape.u32Dim; + nnie_param->seg_data_[i].dst_[j].u32Num = num; + nnie_param->seg_data_[i].dst_[j].unShape.stSeq.u64VirAddrStep = + nnie_cfg->step_vir_addr_[i * NNIE_EACH_SEG_STEP_ADDR_NUM + 1]; + } else { + nnie_param->seg_data_[i].dst_[j].enType = nnie_param->model_->astSeg[i].astDstNode[j].enType; + nnie_param->seg_data_[i].dst_[j].unShape.stWhc.u32Chn = + nnie_param->model_->astSeg[i].astDstNode[j].unShape.stWhc.u32Chn; + nnie_param->seg_data_[i].dst_[j].unShape.stWhc.u32Height = + nnie_param->model_->astSeg[i].astDstNode[j].unShape.stWhc.u32Height; + nnie_param->seg_data_[i].dst_[j].unShape.stWhc.u32Width = + nnie_param->model_->astSeg[i].astDstNode[j].unShape.stWhc.u32Width; + nnie_param->seg_data_[i].dst_[j].u32Num = num; + } + } + } +} + +static void GetBlobMemSize(SVP_NNIE_NODE_S nnie_node[], HI_U32 node_num, HI_U32 total_step, SVP_BLOB_S blob[], + HI_U32 align32, HI_U32 *total_size, HI_U32 blob_size[], bool *mem_alloc = nullptr) { + HI_U32 i = 0; + HI_U32 size; + HI_U32 stride; + + for (i = 0; i < node_num; i++) { + if (SVP_BLOB_TYPE_S32 == nnie_node[i].enType || SVP_BLOB_TYPE_VEC_S32 == nnie_node[i].enType || + SVP_BLOB_TYPE_SEQ_S32 == nnie_node[i].enType) { + size = sizeof(HI_U32); + } else { + size = sizeof(HI_U8); + } + if (SVP_BLOB_TYPE_SEQ_S32 == nnie_node[i].enType) { + if (NNIE_ALIGN_16 == align32) { + stride = NNIE_ALIGN16(nnie_node[i].unShape.u32Dim * size); + } else { + stride = NNIE_ALIGN32(nnie_node[i].unShape.u32Dim * size); + } + blob_size[i] = total_step * stride; + } else { + if (NNIE_ALIGN_16 == align32) { + stride = NNIE_ALIGN16(nnie_node[i].unShape.stWhc.u32Width * size); + } else { + stride = NNIE_ALIGN32(nnie_node[i].unShape.stWhc.u32Width * size); + } + blob_size[i] = blob[i].u32Num * stride * nnie_node[i].unShape.stWhc.u32Height * nnie_node[i].unShape.stWhc.u32Chn; + } + if (mem_alloc != nullptr) { + if (mem_alloc[i]) { + blob_size[i] = 0; + } + } + *total_size += blob_size[i]; + blob[i].u32Stride = stride; + } +} + +static int GetTaskAndBlobBufSize(NnieCfg *nnie_cfg, NnieParam *nnie_param, HI_U32 *total_task_buf_size, + HI_U32 *tmp_buf_size, NnieBlobSize blob_size[], HI_U32 *total_size) { + HI_S32 ret = HI_SUCCESS; + HI_U32 i, j; + HI_U32 total_step = 0; + + ret = HI_MPI_SVP_NNIE_GetTskBufSize(nnie_cfg->max_input_num_, nnie_cfg->max_roi_num_, nnie_param->model_, + nnie_param->task_buf_size_, nnie_param->model_->u32NetSegNum); + if (HI_SUCCESS != ret) { + LOGE("HI_MPI_SVP_NNIE_GetTskBufSize"); + return RET_ERROR; + } + + *total_task_buf_size = 0; + for (i = 0; i < nnie_param->model_->u32NetSegNum; i++) { + *total_task_buf_size += nnie_param->task_buf_size_[i]; + } + + *tmp_buf_size = nnie_param->model_->u32TmpBufSize; + *total_size += *total_task_buf_size + *tmp_buf_size; + + for (i = 0; i < nnie_param->model_->u32NetSegNum; i++) { + if (SVP_NNIE_NET_TYPE_RECURRENT == nnie_param->model_->astSeg[i].enNetType) { + for (j = 0; j < nnie_param->seg_data_[i].src_[0].u32Num; j++) { + total_step += *(reinterpret_cast( + static_cast(nnie_param->seg_data_[i].src_[0].unShape.stSeq.u64VirAddrStep)) + + j); + } + } + GetBlobMemSize(&(nnie_param->model_->astSeg[i].astSrcNode[0]), nnie_param->model_->astSeg[i].u16SrcNum, total_step, + &(nnie_param->seg_data_[i].src_[0]), NNIE_ALIGN_16, total_size, &(blob_size[i].src_size_[0]), + &(nnie_param->mem_cfg_.seg_[i].src_node_[0])); + + GetBlobMemSize(&(nnie_param->model_->astSeg[i].astDstNode[0]), nnie_param->model_->astSeg[i].u16DstNum, total_step, + &(nnie_param->seg_data_[i].dst_[0]), NNIE_ALIGN_16, total_size, &(blob_size[i].dst_size_[0])); + } + return RET_OK; +} + +static int NnieParamInit(NnieCfg *nnie_cfg, NnieParam *nnie_param) { + HI_U32 i, j; + HI_U32 total_size = 0; + HI_U32 total_task_buf_size = 0; + HI_U32 tmp_buf_size_ = 0; + HI_S32 ret = HI_SUCCESS; + HI_U32 off_set = 0; + HI_U64 phy_addr = 0; + HI_U8 *vir_addr = nullptr; + NnieBlobSize blob_size[SVP_NNIE_MAX_NET_SEG_NUM] = {0}; + + FillForwardInfo(nnie_cfg, nnie_param); + + ret = GetTaskAndBlobBufSize(nnie_cfg, nnie_param, &total_task_buf_size, &tmp_buf_size_, blob_size, &total_size); + if (HI_SUCCESS != ret) { + LOGE("Error,Malloc memory failed! "); + return RET_ERROR; + } + bool has_roi = false; + for (i = 0; i < nnie_param->model_->u32NetSegNum; i++) { + if (SVP_NNIE_NET_TYPE_ROI == nnie_param->model_->astSeg[i].enNetType) { + has_roi = true; + } + } + if (has_roi) { + nnie_param->rpn_bbox_.enType = SVP_BLOB_TYPE_S32; + nnie_param->rpn_bbox_.unShape.stWhc.u32Chn = 1; + nnie_param->rpn_bbox_.unShape.stWhc.u32Height = nnie_cfg->max_roi_num_; + nnie_param->rpn_bbox_.unShape.stWhc.u32Width = NNIE_COORDI_NUM; + nnie_param->rpn_bbox_.u32Stride = NNIE_ALIGN16(NNIE_COORDI_NUM * sizeof(HI_U32)); + nnie_param->rpn_bbox_.u32Num = nnie_cfg->max_input_num_; + total_size += + nnie_param->rpn_bbox_.u32Num * nnie_param->rpn_bbox_.unShape.stWhc.u32Height * nnie_param->rpn_bbox_.u32Stride; + } + + ret = NnieMemMallocCached(std::string("NNIE_NNIE_TASK").data(), nullptr, reinterpret_cast(&phy_addr), + reinterpret_cast(&vir_addr), total_size); + if (HI_SUCCESS != ret) { + LOGE("Error,Malloc memory failed! "); + return RET_ERROR; + } + memset(vir_addr, 0, total_size); + NnieMemFlushCache(phy_addr, reinterpret_cast(vir_addr), total_size); + + nnie_param->task_buf_.u32Size = total_task_buf_size; + nnie_param->task_buf_.u64PhyAddr = phy_addr; + nnie_param->task_buf_.u64VirAddr = (HI_U64)(HI_UL)vir_addr; + + nnie_param->tmp_buf_.u32Size = tmp_buf_size_; + nnie_param->tmp_buf_.u64PhyAddr = phy_addr + total_task_buf_size; + nnie_param->tmp_buf_.u64VirAddr = (HI_U64)(HI_UL)vir_addr + total_task_buf_size; + + for (i = 0; i < nnie_param->model_->u32NetSegNum; i++) { + if (SVP_NNIE_NET_TYPE_ROI == nnie_param->model_->astSeg[i].enNetType) { + nnie_param->forward_with_bbox_ctrl_[i].stTmpBuf = nnie_param->tmp_buf_; + nnie_param->forward_with_bbox_ctrl_[i].stTskBuf.u64PhyAddr = nnie_param->task_buf_.u64PhyAddr + off_set; + nnie_param->forward_with_bbox_ctrl_[i].stTskBuf.u64VirAddr = nnie_param->task_buf_.u64VirAddr + off_set; + nnie_param->forward_with_bbox_ctrl_[i].stTskBuf.u32Size = nnie_param->task_buf_size_[i]; + } else if (SVP_NNIE_NET_TYPE_CNN == nnie_param->model_->astSeg[i].enNetType || + SVP_NNIE_NET_TYPE_RECURRENT == nnie_param->model_->astSeg[i].enNetType) { + nnie_param->forward_ctrl_[i].stTmpBuf = nnie_param->tmp_buf_; + nnie_param->forward_ctrl_[i].stTskBuf.u64PhyAddr = nnie_param->task_buf_.u64PhyAddr + off_set; + nnie_param->forward_ctrl_[i].stTskBuf.u64VirAddr = nnie_param->task_buf_.u64VirAddr + off_set; + nnie_param->forward_ctrl_[i].stTskBuf.u32Size = nnie_param->task_buf_size_[i]; + } + off_set += nnie_param->task_buf_size_[i]; + } + + phy_addr = phy_addr + total_task_buf_size + tmp_buf_size_; + vir_addr = vir_addr + total_task_buf_size + tmp_buf_size_; + for (i = 0; i < nnie_param->model_->u32NetSegNum; i++) { + for (j = 0; j < nnie_param->model_->astSeg[i].u16SrcNum; j++) { + if (j != 0) { + phy_addr += blob_size[i].src_size_[j - 1]; + vir_addr += blob_size[i].src_size_[j - 1]; + } + if (nnie_param->mem_cfg_.seg_[i].src_node_[j]) { + if (!ConnectNnieInnerNode(nnie_param->model_->astSeg[i].astSrcNode[j].szName, nnie_param, + &(nnie_param->seg_data_[i].src_[j]))) { + LOGE("ConnectNnieInnerNode failed! "); + return RET_ERROR; + } + } else { + nnie_param->seg_data_[i].src_[j].u64PhyAddr = phy_addr; + nnie_param->seg_data_[i].src_[j].u64VirAddr = (HI_U64)(HI_UL)vir_addr; + } + } + phy_addr += blob_size[i].src_size_[j - 1]; + vir_addr += blob_size[i].src_size_[j - 1]; + + for (j = 0; j < nnie_param->model_->astSeg[i].u16DstNum; j++) { + if (j != 0) { + phy_addr += blob_size[i].dst_size_[j - 1]; + vir_addr += blob_size[i].dst_size_[j - 1]; + } + nnie_param->seg_data_[i].dst_[j].u64PhyAddr = phy_addr; + nnie_param->seg_data_[i].dst_[j].u64VirAddr = (HI_U64)(HI_UL)vir_addr; + } + phy_addr += blob_size[i].dst_size_[j - 1]; + vir_addr += blob_size[i].dst_size_[j - 1]; + } + if (has_roi) { + nnie_param->rpn_bbox_.u64PhyAddr = phy_addr; + nnie_param->rpn_bbox_.u64VirAddr = (HI_U64)((HI_UL)vir_addr); + } + return RET_OK; +} + +static int NnieLoadModel(char *model_buf, int size, NnieModel *nnie_model) { + HI_S32 ret = HI_INVALID_VALUE; + HI_U64 phy_addr = 0; + HI_U8 *vir_addr = nullptr; + ret = NnieMemMalloc(std::string("NNIE_NNIE_MODEL").data(), nullptr, reinterpret_cast(&phy_addr), + reinterpret_cast(&vir_addr), size); + if (HI_SUCCESS != ret) { + LOGE("Error,Malloc memory failed! "); + return RET_ERROR; + } + nnie_model->model_buf_.u32Size = (HI_U32)size; + nnie_model->model_buf_.u64PhyAddr = phy_addr; + nnie_model->model_buf_.u64VirAddr = (HI_U64)(HI_UL)vir_addr; + memcpy(vir_addr, model_buf, size); + ret = HI_MPI_SVP_NNIE_LoadModel(&nnie_model->model_buf_, &nnie_model->model_); + if (HI_SUCCESS != ret) { + NNIE_MEM_FREE(nnie_model->model_buf_.u64PhyAddr, nnie_model->model_buf_.u64VirAddr); + nnie_model->model_buf_.u32Size = 0; + LOGE("HI_MPI_SVP_NNIE_LoadModel failed!"); + return RET_ERROR; + } + return RET_OK; +} + +static void NnieUnloadModel(NnieModel *nnie_model) { + if (nnie_model == nullptr) { + return; + } + + if (nnie_model->model_buf_.u64PhyAddr != 0 && nnie_model->model_buf_.u64VirAddr != 0) { + NNIE_MEM_FREE(nnie_model->model_buf_.u64PhyAddr, nnie_model->model_buf_.u64VirAddr); + nnie_model->model_buf_.u64PhyAddr = 0; + nnie_model->model_buf_.u64VirAddr = 0; + } +} + +static int NnieForward(NnieParam *nnie_param, NnieDataIndex *input_data_idx, HI_BOOL instant) { + HI_S32 ret = HI_SUCCESS; + HI_U32 i, j; + HI_BOOL finish = HI_FALSE; + SVP_NNIE_HANDLE svp_nnie_handle = 0; + HI_U32 total_step_num = 0; + SVP_NNIE_FORWARD_CTRL_S *forward_handle = &nnie_param->forward_ctrl_[input_data_idx->seg_idx_]; + NnieSegData *seg_data = &nnie_param->seg_data_[input_data_idx->seg_idx_]; + + NnieMemFlushCache(forward_handle->stTskBuf.u64PhyAddr, + NNIE_CONVERT_64BIT_ADDR(HI_VOID, forward_handle->stTskBuf.u64VirAddr), + forward_handle->stTskBuf.u32Size); + + for (i = 0; i < forward_handle->u32DstNum; i++) { + if (SVP_BLOB_TYPE_SEQ_S32 == seg_data->dst_[i].enType) { + for (j = 0; j < seg_data->dst_[i].u32Num; j++) { + total_step_num += *(NNIE_CONVERT_64BIT_ADDR(HI_U32, seg_data->dst_[i].unShape.stSeq.u64VirAddrStep) + j); + } + NnieMemFlushCache(seg_data->dst_[i].u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, seg_data->dst_[i].u64VirAddr), + total_step_num * seg_data->dst_[i].u32Stride); + } else { + NnieMemFlushCache(seg_data->dst_[i].u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, seg_data->dst_[i].u64VirAddr), + seg_data->dst_[i].u32Num * seg_data->dst_[i].unShape.stWhc.u32Chn * + seg_data->dst_[i].unShape.stWhc.u32Height * seg_data->dst_[i].u32Stride); + } + } + + ret = HI_MPI_SVP_NNIE_Forward(&svp_nnie_handle, seg_data->src_, nnie_param->model_, seg_data->dst_, forward_handle, + instant); + if (HI_SUCCESS != ret) { + LOGE("HI_MPI_SVP_NNIE_Forward failed!"); + return RET_ERROR; + } + + if (instant) { + while (HI_ERR_SVP_NNIE_QUERY_TIMEOUT == + (ret = HI_MPI_SVP_NNIE_Query(forward_handle->enNnieId, svp_nnie_handle, &finish, HI_TRUE))) { + usleep(kSleepUs); + } + } + + total_step_num = 0; + for (i = 0; i < forward_handle->u32DstNum; i++) { + if (SVP_BLOB_TYPE_SEQ_S32 == seg_data->dst_[i].enType) { + for (j = 0; j < seg_data->dst_[i].u32Num; j++) { + total_step_num += *(NNIE_CONVERT_64BIT_ADDR(HI_U32, seg_data->dst_[i].unShape.stSeq.u64VirAddrStep) + j); + } + NnieMemFlushCache(seg_data->dst_[i].u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, seg_data->dst_[i].u64VirAddr), + total_step_num * seg_data->dst_[i].u32Stride); + } else { + NnieMemFlushCache(seg_data->dst_[i].u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, seg_data->dst_[i].u64VirAddr), + seg_data->dst_[i].u32Num * seg_data->dst_[i].unShape.stWhc.u32Chn * + seg_data->dst_[i].unShape.stWhc.u32Height * seg_data->dst_[i].u32Stride); + } + } + + return RET_OK; +} + +static HI_S32 NNIE_ForwardWithBbox(NnieParam *pstNnieParam, NnieDataIndex *pstInputDataIdx, SVP_SRC_BLOB_S astBbox[], + HI_BOOL bInstant) { + HI_S32 ret = HI_SUCCESS; + HI_BOOL finish = HI_FALSE; + SVP_NNIE_HANDLE svp_nnie_handle = 0; + HI_U32 total_step_num = 0; + HI_U32 i, j; + + NnieMemFlushCache(pstNnieParam->forward_with_bbox_ctrl_[pstInputDataIdx->seg_idx_].stTskBuf.u64PhyAddr, + NNIE_CONVERT_64BIT_ADDR( + HI_VOID, pstNnieParam->forward_with_bbox_ctrl_[pstInputDataIdx->seg_idx_].stTskBuf.u64VirAddr), + pstNnieParam->forward_with_bbox_ctrl_[pstInputDataIdx->seg_idx_].stTskBuf.u32Size); + + for (i = 0; i < pstNnieParam->forward_with_bbox_ctrl_[pstInputDataIdx->seg_idx_].u32DstNum; i++) { + if (SVP_BLOB_TYPE_SEQ_S32 == pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].enType) { + for (j = 0; j < pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Num; j++) { + total_step_num += + *(NNIE_CONVERT_64BIT_ADDR( + HI_U32, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].unShape.stSeq.u64VirAddrStep) + + j); + } + NnieMemFlushCache( + pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64PhyAddr, + NNIE_CONVERT_64BIT_ADDR(HI_VOID, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64VirAddr), + total_step_num * pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Stride); + } else { + NnieMemFlushCache( + pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64PhyAddr, + NNIE_CONVERT_64BIT_ADDR(HI_VOID, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64VirAddr), + pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Num * + pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].unShape.stWhc.u32Chn * + pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].unShape.stWhc.u32Height * + pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Stride); + } + } + + ret = + HI_MPI_SVP_NNIE_ForwardWithBbox(&svp_nnie_handle, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].src_, astBbox, + pstNnieParam->model_, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_, + &pstNnieParam->forward_with_bbox_ctrl_[pstInputDataIdx->seg_idx_], bInstant); + if (HI_SUCCESS != ret) { + LOGE("HI_MPI_SVP_NNIE_ForwardWithBbox failed!"); + return RET_ERROR; + } + + if (bInstant) { + while (HI_ERR_SVP_NNIE_QUERY_TIMEOUT == + (ret = HI_MPI_SVP_NNIE_Query(pstNnieParam->forward_with_bbox_ctrl_[pstInputDataIdx->seg_idx_].enNnieId, + svp_nnie_handle, &finish, HI_TRUE))) { + usleep(kSleepUs); + LOGE("HI_MPI_SVP_NNIE_Query Query timeout!"); + } + } + + total_step_num = 0; + + for (i = 0; i < pstNnieParam->forward_with_bbox_ctrl_[pstInputDataIdx->seg_idx_].u32DstNum; i++) { + if (SVP_BLOB_TYPE_SEQ_S32 == pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].enType) { + for (j = 0; j < pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Num; j++) { + total_step_num += + *(NNIE_CONVERT_64BIT_ADDR( + HI_U32, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].unShape.stSeq.u64VirAddrStep) + + j); + } + NnieMemFlushCache( + pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64PhyAddr, + NNIE_CONVERT_64BIT_ADDR(HI_VOID, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64VirAddr), + total_step_num * pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Stride); + } else { + NnieMemFlushCache( + pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64PhyAddr, + NNIE_CONVERT_64BIT_ADDR(HI_VOID, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64VirAddr), + pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Num * + pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].unShape.stWhc.u32Chn * + pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].unShape.stWhc.u32Height * + pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Stride); + } + } + + return ret; +} + +int FillByUnsignedChar(HI_U32 input_size, HI_U32 num, HI_U32 width, HI_U32 stride, HI_U8 *src, HI_U8 *dst) { + HI_U32 i, j; + if (input_size != num * width) { + LOGE("input size error:%d <-> %d.", input_size, num * width); + return RET_ERROR; + } + for (i = 0; i < num; i++) { + for (j = 0; j < width; j++) { + dst[j] = src[j]; + } + dst += stride; + src += width; + } + return RET_OK; +} + +int FillByFloat(HI_U32 input_size, HI_U32 num, HI_U32 width, HI_U32 stride, HI_FLOAT *src, HI_S32 *dst, HI_U8 *dst_u8) { + HI_U32 i, j; + if (input_size != num * width) { + LOGE("input size error:%d <-> %d.", input_size, num * width); + return RET_ERROR; + } + for (i = 0; i < num; i++) { + for (j = 0; j < width; j++) { + dst[j] = (src[j] * NNIE_QUANT_BASE); + } + dst_u8 += stride; + dst = reinterpret_cast(dst_u8); + src += width; + } + return RET_OK; +} + +static int NnieFillSrcData(NnieCfg *nnie_cfg, NnieParam *nnie_param, NnieDataIndex *input_data_idx, int64_t *shape, + int size) { + HI_U32 i, j, n, ret; + HI_U32 height, width, channel, stride, dim; + HI_U8 *input_addr_u8 = nullptr; + HI_S32 *input_addr_s32 = nullptr; + HI_U32 *step_addr_u32 = nullptr; + HI_FLOAT *float_src_data = nullptr; + HI_U8 *u8_src_data = nullptr; + HI_U32 total_step_num = 0; + HI_U32 input_size = 1; + SVP_SRC_BLOB_S *blob = &nnie_param->seg_data_[input_data_idx->seg_idx_].src_[input_data_idx->node_idx_]; + for (n = 0; n < (HI_U32)size; n++) { + input_size *= shape[n]; + } + input_addr_u8 = NNIE_CONVERT_64BIT_ADDR(HI_U8, blob->u64VirAddr); + input_addr_s32 = NNIE_CONVERT_64BIT_ADDR(HI_S32, blob->u64VirAddr); + float_src_data = reinterpret_cast(nnie_cfg->data_ptr_); + u8_src_data = reinterpret_cast(nnie_cfg->data_ptr_); + if (SVP_BLOB_TYPE_SEQ_S32 == blob->enType) { + step_addr_u32 = NNIE_CONVERT_64BIT_ADDR(HI_U32, blob->unShape.stSeq.u64VirAddrStep); + dim = blob->unShape.stSeq.u32Dim; + stride = blob->u32Stride; + + for (n = 0; n < blob->u32Num; n++) { + total_step_num += *(step_addr_u32 + n); + } + + if (input_size != total_step_num * dim) { + LOGE("input size error:%d <-> %d.", input_size, total_step_num * dim); + return RET_ERROR; + } + for (n = 0; n < blob->u32Num; n++) { + for (i = 0; i < *(step_addr_u32 + n); i++) { + for (j = 0; j < dim; j++) { + input_addr_s32[j] = (float_src_data[j] * NNIE_QUANT_BASE); + } + input_addr_u8 += stride; + input_addr_s32 = reinterpret_cast(input_addr_u8); + float_src_data += dim; + } + } + NnieMemFlushCache(blob->u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, blob->u64VirAddr), total_step_num * stride); + } else { + height = blob->unShape.stWhc.u32Height; + width = blob->unShape.stWhc.u32Width; + channel = blob->unShape.stWhc.u32Chn; + stride = blob->u32Stride; + if (SVP_BLOB_TYPE_YVU420SP == blob->enType) { + ret = FillByUnsignedChar(input_size, blob->u32Num * static_cast(channel * height / 2), width, stride, + u8_src_data, input_addr_u8); + } else if (SVP_BLOB_TYPE_YVU422SP == blob->enType) { + ret = FillByUnsignedChar(input_size, blob->u32Num * height * 2, width, stride, u8_src_data, input_addr_u8); + } else { + if (SVP_BLOB_TYPE_U8 == blob->enType) { + ret = + FillByUnsignedChar(input_size, blob->u32Num * channel * height, width, stride, u8_src_data, input_addr_u8); + } else { + ret = FillByFloat(input_size, blob->u32Num * channel * height, width, stride, float_src_data, input_addr_s32, + input_addr_u8); + } + } + if (ret != RET_OK) { + return ret; + } + NnieMemFlushCache(blob->u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, blob->u64VirAddr), + blob->u32Num * channel * height * stride); + } + + return RET_OK; +} + +static int NnieGetDstData(NnieCfg *nnie_cfg, NnieParam *nnie_param, NnieDataIndex *input_data_idx, int64_t *shape, + int size) { + HI_U32 i, j, n; + HI_U32 height, width, channel, stride, dim; + HI_U8 *output_addr_u8 = nullptr; + HI_S32 *output_addr_s32 = nullptr; + HI_U32 *step_addr_u32 = nullptr; + HI_FLOAT *float_dst_data = nullptr; + HI_U32 total_step_num = 0; + HI_U32 input_num = 1; + SVP_SRC_BLOB_S *blob = &nnie_param->seg_data_[input_data_idx->seg_idx_ - 1].dst_[input_data_idx->node_idx_]; + for (n = 0; n < (HI_U32)size; n++) { + input_num *= shape[n]; + } + + if (SVP_BLOB_TYPE_U8 <= blob->enType && SVP_BLOB_TYPE_YVU422SP >= blob->enType) { + LOGE("Nnie output type error"); + return RET_ERROR; + } + + output_addr_u8 = NNIE_CONVERT_64BIT_ADDR(HI_U8, blob->u64VirAddr); + output_addr_s32 = NNIE_CONVERT_64BIT_ADDR(HI_S32, blob->u64VirAddr); + float_dst_data = reinterpret_cast(nnie_cfg->data_ptr_); + + if (SVP_BLOB_TYPE_SEQ_S32 == blob->enType) { + dim = blob->unShape.stSeq.u32Dim; + stride = blob->u32Stride; + step_addr_u32 = NNIE_CONVERT_64BIT_ADDR(HI_U32, blob->unShape.stSeq.u64VirAddrStep); + + for (n = 0; n < blob->u32Num; n++) { + total_step_num += *(step_addr_u32 + n); + } + if (input_num != total_step_num * dim) { + LOGE("input shape"); + return RET_ERROR; + } + for (n = 0; n < blob->u32Num; n++) { + for (i = 0; i < *(step_addr_u32 + n); i++) { + for (j = 0; j < dim; j++) { + float_dst_data[j] = (HI_FLOAT)output_addr_s32[j] / NNIE_QUANT_BASE; + } + output_addr_u8 += stride; + output_addr_s32 = reinterpret_cast(output_addr_u8); + float_dst_data += dim; + } + } + } else { + height = blob->unShape.stWhc.u32Height; + width = blob->unShape.stWhc.u32Width; + channel = blob->unShape.stWhc.u32Chn; + stride = blob->u32Stride; + if (input_num != height * channel * width * blob->u32Num) { + LOGE("output shape diff:%d<->%d.", input_num, height * channel * width * blob->u32Num); + return RET_ERROR; + } + for (n = 0; n < blob->u32Num; n++) { + for (i = 0; i < channel * height; i++) { + for (j = 0; j < width; j++) { + float_dst_data[j] = (HI_FLOAT)output_addr_s32[j] / NNIE_QUANT_BASE; + } + output_addr_u8 += stride; + output_addr_s32 = reinterpret_cast(output_addr_u8); + float_dst_data += width; + } + } + } + return RET_OK; +} + +int CheckMsShapeN(NnieRunCfg *nnie_run_cfg, const std::vector &input_shape, const SVP_NNIE_NODE_S &nnie_node) { + size_t ms_input_size = 1, i; + for (i = 1; i < input_shape.size(); i++) { + ms_input_size *= input_shape[i]; + } + + size_t nnie_input_size; + if (SVP_BLOB_TYPE_SEQ_S32 == nnie_node.enType) { + if (nnie_run_cfg->cfg_.step_ == 0) { + LOGE("request time_step set! Please export NNIE_RUNTIME_CONFIG_PATH"); + return RET_ERROR; + } + if (ms_input_size != nnie_node.unShape.u32Dim) { + LOGE("The input data does not meet the required size %d <-> %d.", static_cast(ms_input_size), + nnie_node.unShape.u32Dim); + return RET_ERROR; + } + if ((input_shape[0] < static_cast(nnie_run_cfg->cfg_.step_)) || + (input_shape[0] % nnie_run_cfg->cfg_.step_ != 0)) { + LOGW("The num value(%d) of input must be an integer multiple of time_step(%d)", static_cast(input_shape[0]), + nnie_run_cfg->cfg_.step_); + return RET_ERROR; + } + nnie_input_size = nnie_node.unShape.u32Dim * nnie_run_cfg->cfg_.step_; + } else { + auto height = nnie_node.unShape.stWhc.u32Height; + auto width = nnie_node.unShape.stWhc.u32Width; + auto channel = nnie_node.unShape.stWhc.u32Chn; + if (SVP_BLOB_TYPE_YVU420SP == nnie_node.enType) { + nnie_input_size = static_cast(channel * height / 2) * width; + } else if (SVP_BLOB_TYPE_YVU422SP == nnie_node.enType) { + nnie_input_size = height * 2 * width; + } else { + nnie_input_size = channel * height * width; + } + if (ms_input_size != nnie_input_size) { + LOGE("The input data does not meet the required size %d <-> %d.", static_cast(ms_input_size), + static_cast(nnie_input_size)); + return RET_ERROR; + } + } + nnie_run_cfg->cfg_.max_input_num_ = (ms_input_size * input_shape[0]) / nnie_input_size; + fprintf(stdout, "The input num is %d.", nnie_run_cfg->cfg_.max_input_num_); + return RET_OK; +} + +size_t GetFillIndex(const std::vector &inputs, size_t input_size, const HI_CHAR *name) { + size_t j; + for (j = 0; j < input_size; j++) { + auto input_str = inputs[j].Name(); + if (input_str.length() > 4) { + if (input_str.substr(input_str.length() - 4) == "_pre") { + input_str = input_str.substr(0, input_str.length() - 4); + } else if (input_str.length() > 5) { + if (input_str.substr(input_str.length() - 5) == "_post") { + input_str = input_str.substr(0, input_str.length() - 5); + } + } + } + + if (strcmp(input_str.c_str(), name) == 0) { + break; + } + } + if (j == input_size) { + for (j = 0; j < input_size; j++) { + auto input_str = inputs[j].Name(); + if (input_str.length() > 4) { + if (input_str.substr(input_str.length() - 4) == "_pre") { + input_str = input_str.substr(0, input_str.length() - 4); + } else if (input_str.length() > 5) { + if (input_str.substr(input_str.length() - 5) == "_post") { + input_str = input_str.substr(0, input_str.length() - 5); + } + } + } + + if (strncmp(input_str.c_str(), name, input_str.length()) == 0) { + break; + } + } + } + return j; +} + +int NnieCommCreate(NnieRunCfg *nnie_run_cfg, char *model_buf, int size, + const std::vector &inputs) { + HI_U8 *vir_addr = nullptr; + HI_U32 seg_num; + HI_U32 off_set; + HI_U32 total_size; + HI_U32 i, j; + HI_S32 ret = HI_SUCCESS; + NnieModel *model = &nnie_run_cfg->model_; + NnieParam *param = &nnie_run_cfg->param_; + NnieCfg *cfg = &nnie_run_cfg->cfg_; + HI_U32 step = cfg->step_; // time step + + ret = NnieLoadModel(model_buf, size, model); + if (ret != RET_OK) { + LOGE("NnieLoadModel failed!"); + return RET_ERROR; + } + if (inputs.size() <= 1) { + LOGE("inputs size need greater than 1!"); + return RET_ERROR; + } + if (inputs[0].Shape().size() <= 1) { + LOGE("input shape size need greater than 1!"); + return RET_ERROR; + } + + j = GetFillIndex(inputs, inputs.size() - 1, model->model_.astSeg[0].astSrcNode[0].szName); + if (j == (inputs.size() - 1)) { + j = 0; + // LOGW("input tensor name(%s) can't match wk node name(%s).", inputs[0].Name().c_str(), + // model->model_.astSeg[0].astSrcNode[0].szName); + } + if (CheckMsShapeN(nnie_run_cfg, inputs[j].Shape(), model->model_.astSeg[0].astSrcNode[0]) != RET_OK) { + return RET_ERROR; + } + + bool has_roi = false; + for (i = 0; i < model->model_.u32NetSegNum; i++) { + if (SVP_NNIE_NET_TYPE_ROI == model->model_.astSeg[i].enNetType) { + has_roi = true; + } + } + if (has_roi) { + if (cfg->max_roi_num_ == 0) { + LOGE("NNIE_RUNTIME_CONFIG_PATH: max_roi_num(0) should greater than 0!"); + return RET_ERROR; + } + } else { + if (cfg->max_roi_num_ != 0) { + LOGW("NNIE_RUNTIME_CONFIG_PATH: max_roi_num should euqal to 0!"); + cfg->max_roi_num_ = 0; + } + } + + if (model->model_.astSeg[0].enNetType == SVP_NNIE_NET_TYPE_RECURRENT) { + if (step == 0) { + LOGE("request time_step set! No NNIE_RUNTIME_CONFIG_PATH, please export NNIE_RUNTIME_CONFIG_PATH"); + return RET_ERROR; + } + seg_num = model->model_.u32NetSegNum; + total_size = cfg->max_input_num_ * sizeof(HI_S32) * seg_num * 2; + ret = NnieMemMalloc(std::string("SVP_NNIE_STEP").data(), nullptr, + reinterpret_cast(¶m->step_buf_.u64PhyAddr), reinterpret_cast(&vir_addr), + total_size); + if (HI_SUCCESS != ret) { + LOGE("Malloc memory failed:"); + return RET_ERROR; + } + + param->step_buf_.u64VirAddr = (HI_U64)((HI_UL)vir_addr); + for (i = 0; i < seg_num * NNIE_EACH_SEG_STEP_ADDR_NUM; i++) { + cfg->step_vir_addr_[i] = param->step_buf_.u64VirAddr + i * cfg->max_input_num_ * sizeof(HI_S32); + } + + for (i = 0; i < seg_num; i++) { + off_set = i * NNIE_EACH_SEG_STEP_ADDR_NUM; + for (j = 0; j < cfg->max_input_num_; j++) { + *(reinterpret_cast(static_cast(cfg->step_vir_addr_[off_set])) + j) = + step; // step of input x_t + *(reinterpret_cast(static_cast(cfg->step_vir_addr_[off_set + 1])) + j) = + step; // step of output h_t + } + } + } + param->model_ = &(model->model_); + ret = NnieParamInit(cfg, param); + if (ret != RET_OK) { + LOGE("NnieParamInit failed!"); + return RET_ERROR; + } + nnie_run_cfg->run_idx_.seg_idx_ = 0; + return RET_OK; +} + +void NnieCommDelete(NnieParam *pstNnieParamm, NnieModel *nnie_model) { + NnieParamRelease(pstNnieParamm); + NnieUnloadModel(nnie_model); +} + +int NnieCommGetOutputData(NnieRunCfg *nnie_run_cfg, float *data, int64_t *shape, int size, int tensor_index) { + if (nnie_run_cfg->run_idx_.seg_idx_ <= 0) { + LOGE("output seg index error."); + return RET_ERROR; + } + HI_U32 ret = 0; + int id = tensor_index; + + nnie_run_cfg->run_idx_.node_idx_ = id; + nnie_run_cfg->cfg_.data_ptr_ = data; + ret = NnieGetDstData(&nnie_run_cfg->cfg_, &nnie_run_cfg->param_, &nnie_run_cfg->run_idx_, shape, size); + if (ret != RET_OK) { + LOGE("NnieGetDstData failed!"); + return RET_ERROR; + } + return RET_OK; +} + +int NnieCommFillData(NnieRunCfg *nnie_run_cfg, void *data, mindspore::DataType dtype, int64_t *shape, int size, + int tensor_index) { + HI_U32 ret = 0; + int id = tensor_index; + HI_U32 seg_idx = nnie_run_cfg->run_idx_.seg_idx_; + + if (id >= nnie_run_cfg->param_.model_->astSeg[seg_idx].u16SrcNum) { + LOGE("Nnie input node index error!"); + return RET_ERROR; + } + SVP_BLOB_TYPE_E src_type = nnie_run_cfg->param_.seg_data_[seg_idx].src_[id].enType; + if (SVP_BLOB_TYPE_U8 <= src_type && src_type <= SVP_BLOB_TYPE_YVU422SP) { + if (!(dtype == DataType::kNumberTypeUInt8 || dtype == DataType::kNumberTypeInt8)) { + LOGE("Nnie input node type error!"); + return RET_ERROR; + } + } else { + if (dtype != DataType::kNumberTypeFloat32) { + LOGE("Nnie input node type error!"); + return RET_ERROR; + } + } + nnie_run_cfg->run_idx_.node_idx_ = id; + nnie_run_cfg->cfg_.data_ptr_ = data; + ret = NnieFillSrcData(&nnie_run_cfg->cfg_, &nnie_run_cfg->param_, &nnie_run_cfg->run_idx_, shape, size); + if (ret != RET_OK) { + LOGE("NnieFillSrcData failed!"); + return RET_ERROR; + } + return RET_OK; +} + +int NnieCommRun(NnieRunCfg *nnie_run_cfg, bool run_box) { + HI_U32 segidx = nnie_run_cfg->run_idx_.seg_idx_; + HI_U32 ret = 0; + + if (segidx >= nnie_run_cfg->param_.model_->u32NetSegNum) { + LOGE("seg num err!\n"); + return RET_ERROR; + } + // NniePrintReportResultInputSeg(&nnie_run_cfg->param_, segidx); + nnie_run_cfg->run_idx_.node_idx_ = 0; + if (run_box) { + ret = + NNIE_ForwardWithBbox(&nnie_run_cfg->param_, &nnie_run_cfg->run_idx_, &nnie_run_cfg->param_.rpn_bbox_, HI_TRUE); + if (HI_SUCCESS != ret) { + LOGE("NnieForward failed!"); + return RET_ERROR; + } + } else { + ret = NnieForward(&nnie_run_cfg->param_, &nnie_run_cfg->run_idx_, HI_TRUE); + if (HI_SUCCESS != ret) { + LOGE("NnieForward failed!"); + return RET_ERROR; + } + } + + nnie_run_cfg->run_idx_.seg_idx_ = ++segidx; + return RET_OK; +} +} // namespace nnie +} // namespace mindspore diff --git a/mindspore/lite/tools/benchmark/nnie/src/nnie_common.h b/mindspore/lite/tools/benchmark/nnie/src/nnie_common.h new file mode 100644 index 00000000000..2c54e39e3a4 --- /dev/null +++ b/mindspore/lite/tools/benchmark/nnie/src/nnie_common.h @@ -0,0 +1,115 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_COMMON_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_COMMON_H_ +#include +#include +#include +#include "include/api/types.h" +#include "include/mpi_vb.h" +#include "include/hi_comm_svp.h" +#include "include/hi_nnie.h" +#include "include/mpi_nnie.h" +#include "include/ir/dtype/type_id.h" + +namespace mindspore { +namespace nnie { +#define NNIE_ALIGN_16 16 +#define NNIE_ALIGN16(u32Num) ((u32Num + NNIE_ALIGN_16 - 1) / NNIE_ALIGN_16 * NNIE_ALIGN_16) + +#define NNIE_ALIGN_32 32 +#define NNIE_ALIGN32(u32Num) ((u32Num + NNIE_ALIGN_32 - 1) / NNIE_ALIGN_32 * NNIE_ALIGN_32) + +#define NNIE_CONVERT_64BIT_ADDR(Type, Addr) reinterpret_cast((HI_UL)(Addr)) +#define NNIE_QUANT_BASE 4096 + +#define NNIE_COORDI_NUM 4 +#define NNIE_EACH_SEG_STEP_ADDR_NUM 2 +#define NNIE_REPORT_NAME_LENGTH 64 + +typedef struct { + SVP_NNIE_MODEL_S model_; + SVP_MEM_INFO_S model_buf_; // store Model file +} NnieModel; +typedef struct { + SVP_SRC_BLOB_S src_[SVP_NNIE_MAX_INPUT_NUM]; + SVP_DST_BLOB_S dst_[SVP_NNIE_MAX_OUTPUT_NUM]; +} NnieSegData; + +typedef struct { + bool src_node_[SVP_NNIE_MAX_INPUT_NUM]; + bool dst_node_[SVP_NNIE_MAX_OUTPUT_NUM]; +} NNIEMemSegInfo; + +typedef struct { + NNIEMemSegInfo seg_[SVP_NNIE_MAX_NET_SEG_NUM]; +} NNIEMemCfg; + +typedef struct { + SVP_NNIE_MODEL_S *model_; + HI_U32 task_buf_size_[SVP_NNIE_MAX_NET_SEG_NUM]; + SVP_MEM_INFO_S task_buf_; + SVP_MEM_INFO_S tmp_buf_; + SVP_MEM_INFO_S step_buf_; // store Lstm step info + SVP_SRC_BLOB_S rpn_bbox_; + NnieSegData seg_data_[SVP_NNIE_MAX_NET_SEG_NUM]; // each seg's input and output blob + SVP_NNIE_FORWARD_CTRL_S forward_ctrl_[SVP_NNIE_MAX_NET_SEG_NUM]; + SVP_NNIE_FORWARD_WITHBBOX_CTRL_S forward_with_bbox_ctrl_[SVP_NNIE_MAX_NET_SEG_NUM]; + NNIEMemCfg mem_cfg_; +} NnieParam; + +typedef struct { + HI_VOID *data_ptr_; + HI_U32 max_input_num_; + HI_U32 max_roi_num_; + HI_U32 step_; + HI_U64 step_vir_addr_[NNIE_EACH_SEG_STEP_ADDR_NUM * + SVP_NNIE_MAX_NET_SEG_NUM]; // virtual addr of LSTM's or RNN's step buffer + SVP_NNIE_ID_E nnie_core_id_[SVP_NNIE_MAX_NET_SEG_NUM]; +} NnieCfg; + +typedef struct { + HI_U32 seg_idx_; + HI_U32 node_idx_; +} NnieDataIndex; + +typedef struct { + HI_U32 src_size_[SVP_NNIE_MAX_INPUT_NUM]; + HI_U32 dst_size_[SVP_NNIE_MAX_OUTPUT_NUM]; +} NnieBlobSize; + +typedef struct { + NnieModel model_; + NnieParam param_; + NnieCfg cfg_; + NnieDataIndex run_idx_; +} NnieRunCfg; + +int NnieCommCreate(NnieRunCfg *nnie_run_cfg, char *model_buf, int size, const std::vector &inputs); + +size_t GetFillIndex(const std::vector &inputs, size_t input_size, const HI_CHAR *name); + +void NnieCommDelete(NnieParam *pstNnieParamm, NnieModel *nnie_model); + +int NnieCommRun(NnieRunCfg *nnie_run_cfg, bool run_box); + +int NnieCommFillData(NnieRunCfg *nnie_run_cfg, void *data, mindspore::DataType dtype, int64_t *shape, int size, int id); + +int NnieCommGetOutputData(NnieRunCfg *nnie_run_cfg, float *data, int64_t *shape, int size, int tensor_index); +} // namespace nnie +} // namespace mindspore +#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_COMMON_H_ diff --git a/mindspore/lite/tools/benchmark/nnie/src/nnie_manager.cc b/mindspore/lite/tools/benchmark/nnie/src/nnie_manager.cc new file mode 100644 index 00000000000..a9ca88f6a22 --- /dev/null +++ b/mindspore/lite/tools/benchmark/nnie/src/nnie_manager.cc @@ -0,0 +1,222 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include "src/nnie_manager.h" +#include "src/nnie_common.h" +#include "src/nnie_print.h" +#include "src/nnie_memory.h" + +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +constexpr int kNumInput2 = 2; + +namespace mindspore { +namespace nnie { +int NNIEManager::CfgInit(int max_roi_num, int step, const std::vector &core_id) { + memset(&nnie_cfg_, 0, sizeof(NnieRunCfg)); + + nnie_cfg_.cfg_.max_roi_num_ = max_roi_num; + + nnie_cfg_.cfg_.step_ = step; + for (size_t i = 0; i < SVP_NNIE_MAX_NET_SEG_NUM && i < core_id.size(); i++) { + if (core_id[i] < SVP_NNIE_ID_BUTT) { + nnie_cfg_.cfg_.nnie_core_id_[i] = (SVP_NNIE_ID_E)core_id[i]; + } else { + LOGE("nnie core num toobig.\n"); + return RET_ERROR; + } + } + return RET_OK; +} +void NNIEManager::SetInputNum(int max_input_num) { nnie_cfg_.cfg_.max_input_num_ = max_input_num; } + +int NNIEManager::Init(char *model_buf, int size, const std::vector &inputs) { + if (NnieCommCreate(&nnie_cfg_, model_buf, size, inputs) != RET_OK) { + NnieCommDelete(&nnie_cfg_.param_, &nnie_cfg_.model_); + return RET_ERROR; + } + return RET_OK; +} + +int NNIEManager::Run(std::vector *outputs, unsigned int seg_id, + const std::vector> &outputs_shape) { + bool run_box = false; + nnie_cfg_.run_idx_.seg_idx_ = seg_id; + if (nnie_cfg_.param_.model_->astSeg[seg_id].enNetType == SVP_NNIE_NET_TYPE_ROI) { + run_box = true; + } + + if (NnieCommRun(&nnie_cfg_, run_box)) { + LOGE("Nnie Run Fail!"); + return RET_ERROR; + } + if (GetOutputData(outputs, outputs_shape, run_box)) { + LOGE("Get Output Data Fail!"); + return RET_ERROR; + } + return RET_OK; +} + +void NNIEManager::Release() { + // NniePrintReportResult(&nnie_cfg_.param_); + NnieCommDelete(&nnie_cfg_.param_, &nnie_cfg_.model_); +} + +int NNIEManager::GetOutputData(std::vector *outputs, + const std::vector> &outputs_shape, bool run_box) { + int i, j, output_size = outputs->size(); + if (output_size != nnie_cfg_.param_.model_->astSeg[nnie_cfg_.run_idx_.seg_idx_ - 1].u16DstNum) { + LOGE("seg%d: %d output tensors are required, but there are %d outputs.", nnie_cfg_.run_idx_.seg_idx_ - 1, + nnie_cfg_.param_.model_->astSeg[nnie_cfg_.run_idx_.seg_idx_ - 1].u16DstNum, output_size); + return RET_ERROR; + } + + if (run_box) { + for (i = 0; i < output_size; i++) { + auto input_data_type = (*outputs)[i].DataType(); + if (input_data_type == DataType::kNumberTypeFloat32) { + auto ptr_shape = outputs_shape[i]; + int max_roi_num = nnie_cfg_.param_.seg_data_[nnie_cfg_.run_idx_.seg_idx_ - 1].dst_[0].u32Num; + ptr_shape.insert(ptr_shape.begin(), max_roi_num); + (*outputs)[i].SetShape(ptr_shape); + } else { + LOGE("Unsupported DataType!"); + return RET_ERROR; + } + } + } + HI_U32 seg_idx = nnie_cfg_.run_idx_.seg_idx_ - 1; + for (i = 0; i < nnie_cfg_.param_.model_->astSeg[seg_idx].u16DstNum; i++) { + if (nnie_cfg_.param_.mem_cfg_.seg_[seg_idx].dst_node_[i]) { + continue; + } + + j = GetFillIndex(*outputs, output_size, nnie_cfg_.param_.model_->astSeg[seg_idx].astDstNode[i].szName); + if (j == output_size) { + j = i; + // LOGW("output tensor name(%s) can't match wk node name(%s).", (*outputs)[j].Name().c_str(), + // nnie_cfg_.param_.model_->astSeg[seg_idx].astDstNode[i].szName); + } + + auto input_data_type = (*outputs)[j].DataType(); + if (input_data_type == DataType::kNumberTypeFloat32) { + auto ptr_shape = (*outputs)[j].Shape(); + auto ptr = reinterpret_cast((*outputs)[j].MutableData()); + if (NnieCommGetOutputData(&nnie_cfg_, ptr, ptr_shape.data(), ptr_shape.size(), i) != RET_OK) { + return RET_ERROR; + } + } else { + LOGE("Unsupported DataType!"); + return RET_ERROR; + } + } + + return RET_OK; +} + +int NNIEManager::FillRoiPooling(mindspore::MSTensor *input) { + auto roi_shape = input->Shape(); + if (roi_shape[1] != NNIE_COORDI_NUM) { + LOGE("Roi shape err!"); + return RET_ERROR; + } + + if (roi_shape[0] > static_cast(nnie_cfg_.cfg_.max_roi_num_)) { + LOGE("NNIE_RUNTIME_CONFIG_PATH: The maximum [max_roi_num] value set is less than the actual value: %d < %d.", + nnie_cfg_.cfg_.max_roi_num_, static_cast(roi_shape[0])); + return RET_ERROR; + } + nnie_cfg_.param_.rpn_bbox_.unShape.stWhc.u32Height = roi_shape[0]; + HI_U32 dst_stride = nnie_cfg_.param_.rpn_bbox_.u32Stride; + auto proposal_result = NNIE_CONVERT_64BIT_ADDR(HI_S32, nnie_cfg_.param_.rpn_bbox_.u64VirAddr); + auto float_src_data = reinterpret_cast(input->MutableData()); + + for (size_t j = 0; j < nnie_cfg_.param_.rpn_bbox_.unShape.stWhc.u32Height; j++) { + proposal_result[dst_stride / sizeof(HI_U32) * j] = *(float_src_data++) * NNIE_QUANT_BASE; + proposal_result[dst_stride / sizeof(HI_U32) * j + 1] = *(float_src_data++) * NNIE_QUANT_BASE; + proposal_result[dst_stride / sizeof(HI_U32) * j + 2] = *(float_src_data++) * NNIE_QUANT_BASE; + proposal_result[dst_stride / sizeof(HI_U32) * j + 3] = *(float_src_data++) * NNIE_QUANT_BASE; + } + NnieMemFlushCache(nnie_cfg_.param_.rpn_bbox_.u64PhyAddr, + NNIE_CONVERT_64BIT_ADDR(HI_VOID, nnie_cfg_.param_.rpn_bbox_.u64VirAddr), + dst_stride * nnie_cfg_.param_.rpn_bbox_.unShape.stWhc.u32Height); + + return RET_OK; +} + +int NNIEManager::FillData(std::vector *inputs, unsigned int seg_id) { + bool run_box = false; + size_t i, j; + size_t input_size = inputs->size(); + if (seg_id >= nnie_cfg_.param_.model_->u32NetSegNum) { + LOGE("seg num err!"); + return RET_ERROR; + } + + nnie_cfg_.run_idx_.seg_idx_ = seg_id; + + if (nnie_cfg_.param_.model_->astSeg[nnie_cfg_.run_idx_.seg_idx_].enNetType == SVP_NNIE_NET_TYPE_ROI) { + run_box = true; + for (i = 0; i < (input_size - 1); i++) { + if ((*inputs)[i].Name() == "proposal") { + FillRoiPooling(&(*inputs)[i]); + break; + } + } + if (i == (input_size - 1)) { + LOGE("Can't find proposal out!"); + return RET_ERROR; + } + } else if ((input_size < kNumInput2) || + (input_size - 1) != nnie_cfg_.param_.model_->astSeg[nnie_cfg_.run_idx_.seg_idx_].u16SrcNum) { + LOGE("Input Size Err!"); + return RET_ERROR; + } + + for (i = 0; i < nnie_cfg_.param_.model_->astSeg[seg_id].u16SrcNum; i++) { + if (nnie_cfg_.param_.mem_cfg_.seg_[seg_id].src_node_[i]) { + continue; + } + j = GetFillIndex(*inputs, input_size - 1, nnie_cfg_.param_.model_->astSeg[seg_id].astSrcNode[i].szName); + if (j == (input_size - 1)) { + if (run_box && (*inputs)[i].Name() == "proposal") { + continue; + } else { + j = i; + // LOGW("input tensor name(%s) can't match wk node name(%s).", (*inputs)[i].Name().c_str(), + // nnie_cfg_.param_.model_->astSeg[seg_id].astSrcNode[i].szName); + } + } + + auto input_data_type = (*inputs)[j].DataType(); + if ((input_data_type == DataType::kNumberTypeFloat32) || (input_data_type == DataType::kNumberTypeUInt8) || + (input_data_type == DataType::kNumberTypeInt8)) { + auto ptr_shape = (*inputs)[j].Shape(); + if (NnieCommFillData(&nnie_cfg_, (*inputs)[j].MutableData(), input_data_type, ptr_shape.data(), ptr_shape.size(), + i) != RET_OK) { + LOGE("FillData failed!"); + return RET_ERROR; + } + } else { + LOGE("Unsupported DataType!"); + return RET_ERROR; + } + } + + return RET_OK; +} +} // namespace nnie +} // namespace mindspore diff --git a/mindspore/lite/tools/benchmark/nnie/src/nnie_manager.h b/mindspore/lite/tools/benchmark/nnie/src/nnie_manager.h new file mode 100644 index 00000000000..0c5888751f5 --- /dev/null +++ b/mindspore/lite/tools/benchmark/nnie/src/nnie_manager.h @@ -0,0 +1,62 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MANAGER_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MANAGER_H_ +#include +#include "include/errorcode.h" +#include "include/api/types.h" +#include "src/nnie_common.h" + +namespace mindspore { +namespace nnie { +class NNIEManager { + public: + static NNIEManager *GetInstance() { + static NNIEManager manager; + return &manager; + } + + NNIEManager() {} + + ~NNIEManager() {} + + int Init(char *model_buf, int size, const std::vector &inputs); + + int CfgInit(int max_roi_num, int step, const std::vector &core_id); + + void SetInputNum(int max_input_num); + + int FillData(std::vector *inputs, unsigned int seg_id); + + int Run(std::vector *outputs, unsigned int seg_id, + const std::vector> &outputs_shape); + + void Release(); + + private: + int GetOutputData(std::vector *outputs, const std::vector> &outputs_shape, + bool run_box = false); + int FillRoiPooling(mindspore::MSTensor *input); + char *wk_model_ = nullptr; + + int model_size_ = 0; + + NnieRunCfg nnie_cfg_; +}; +} // namespace nnie +} // namespace mindspore +#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MANAGER_H_ diff --git a/mindspore/lite/tools/benchmark/nnie/src/nnie_memory.cc b/mindspore/lite/tools/benchmark/nnie/src/nnie_memory.cc new file mode 100644 index 00000000000..07a9f4cb823 --- /dev/null +++ b/mindspore/lite/tools/benchmark/nnie/src/nnie_memory.cc @@ -0,0 +1,35 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "src/nnie_memory.h" +#include "include/hi_common.h" +#include "include/mpi_sys.h" + +namespace mindspore { +namespace nnie { +HI_S32 NnieMemMalloc(const HI_CHAR *mmb, HI_CHAR *zone, HI_U64 *pu_phy_addr, HI_VOID **ppv_vir_addr, HI_U32 size) { + return HI_MPI_SYS_MmzAlloc(pu_phy_addr, ppv_vir_addr, mmb, zone, size); +} + +HI_S32 NnieMemMallocCached(const HI_CHAR *mmb, HI_CHAR *zone, HI_U64 *pu_phy_addr, HI_VOID **ppv_vir_addr, + HI_U32 size) { + return HI_MPI_SYS_MmzAlloc_Cached(pu_phy_addr, ppv_vir_addr, mmb, zone, size); +} + +HI_S32 NnieMemFlushCache(HI_U64 phy_addr, HI_VOID *pv_vir_addr, HI_U32 size) { + return HI_MPI_SYS_MmzFlushCache(phy_addr, pv_vir_addr, size); +} +} // namespace nnie +} // namespace mindspore diff --git a/mindspore/lite/tools/benchmark/nnie/src/nnie_memory.h b/mindspore/lite/tools/benchmark/nnie/src/nnie_memory.h new file mode 100644 index 00000000000..140d388c2ad --- /dev/null +++ b/mindspore/lite/tools/benchmark/nnie/src/nnie_memory.h @@ -0,0 +1,48 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MEMORY_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MEMORY_H_ +#include +#include +#include +#include +#include +#include "include/hi_common.h" +#include "include/hi_debug.h" +#include "include/hi_comm_svp.h" +#include "include/hi_nnie.h" +#include "include/mpi_nnie.h" +#include "include/mpi_sys.h" + +namespace mindspore { +namespace nnie { +#define NNIE_MEM_FREE(phy, vir) \ + do { \ + if ((0 != (phy)) && (0 != (vir))) { \ + HI_MPI_SYS_MmzFree((phy), reinterpret_cast(static_cast(vir))); \ + (phy) = 0; \ + (vir) = 0; \ + } \ + } while (0) + +HI_S32 NnieMemMalloc(const HI_CHAR *mmb, HI_CHAR *zone, HI_U64 *pu_phy_addr, HI_VOID **ppv_vir_addr, HI_U32 size); + +HI_S32 NnieMemMallocCached(const HI_CHAR *mmb, HI_CHAR *zone, HI_U64 *pu_phy_addr, HI_VOID **ppv_vir_addr, HI_U32 size); + +HI_S32 NnieMemFlushCache(HI_U64 phy_addr, HI_VOID *pv_vir_addr, HI_U32 size); +} // namespace nnie +} // namespace mindspore +#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MEMORY_H_ diff --git a/mindspore/lite/tools/benchmark/nnie/src/nnie_print.cc b/mindspore/lite/tools/benchmark/nnie/src/nnie_print.cc new file mode 100644 index 00000000000..dc1d2c5b718 --- /dev/null +++ b/mindspore/lite/tools/benchmark/nnie/src/nnie_print.cc @@ -0,0 +1,176 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/nnie_print.h" + +namespace mindspore { +namespace nnie { +HI_S32 NniePrintReportResult(NnieParam *pst_nnie_param) { + HI_U32 u32seg_num = pst_nnie_param->model_->u32NetSegNum; + HI_U32 i, j, k, n; + HI_U32 seg_idx_, node_idx_; + HI_S32 ret; + HI_CHAR acReportFileName[NNIE_REPORT_NAME_LENGTH] = {'\0'}; + FILE *fp = nullptr; + HI_U32 *pu32StepAddr = nullptr; + HI_S32 *ps32ResultAddr = nullptr; + HI_U32 u32Height, u32Width, u32Chn, u32Stride, u32Dim; + + for (seg_idx_ = 0; seg_idx_ < u32seg_num; seg_idx_++) { + for (node_idx_ = 0; node_idx_ < pst_nnie_param->model_->astSeg[seg_idx_].u16DstNum; node_idx_++) { + ret = snprintf(acReportFileName, NNIE_REPORT_NAME_LENGTH, "./ms/fseg%d(%d,%d)_%s.txt", seg_idx_, node_idx_, + pst_nnie_param->model_->astSeg[seg_idx_].astDstNode[node_idx_].u32NodeId, + pst_nnie_param->model_->astSeg[seg_idx_].astDstNode[node_idx_].szName); + if (ret < 0) { + LOGE("Error,create file name failed!"); + return HI_FAILURE; + } + + fp = fopen(acReportFileName, "w"); + if (fp == nullptr) { + LOGE("Error,open file failed!"); + return HI_FAILURE; + } + + if (SVP_BLOB_TYPE_SEQ_S32 == pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].enType) { + u32Dim = pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].unShape.stSeq.u32Dim; + u32Stride = pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].u32Stride; + pu32StepAddr = NNIE_CONVERT_64BIT_ADDR( + HI_U32, pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].unShape.stSeq.u64VirAddrStep); + ps32ResultAddr = + NNIE_CONVERT_64BIT_ADDR(HI_S32, pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].u64VirAddr); + + for (n = 0; n < pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].u32Num; n++) { + for (i = 0; i < *(pu32StepAddr + n); i++) { + for (j = 0; j < u32Dim; j++) { + fprintf(fp, "%f ", static_cast(*(ps32ResultAddr + j)) / NNIE_QUANT_BASE); + } + ps32ResultAddr += u32Stride / sizeof(HI_U32); + } + } + } else { + u32Height = pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].unShape.stWhc.u32Height; + u32Width = pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].unShape.stWhc.u32Width; + u32Chn = pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].unShape.stWhc.u32Chn; + u32Stride = pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].u32Stride; + ps32ResultAddr = + NNIE_CONVERT_64BIT_ADDR(HI_S32, pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].u64VirAddr); + fprintf(fp, "%s 4 1 %d %d %d\n", pst_nnie_param->model_->astSeg[seg_idx_].astDstNode[node_idx_].szName, + u32Height, u32Width, u32Chn); + for (n = 0; n < pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].u32Num; n++) { + for (i = 0; i < u32Chn; i++) { + for (j = 0; j < u32Height; j++) { + for (k = 0; k < u32Width; k++) { + ret = fprintf(fp, "%f ", static_cast(*(ps32ResultAddr + k)) / NNIE_QUANT_BASE); + if (ret < 0) { + fclose(fp); + return HI_FAILURE; + } + } + ps32ResultAddr += u32Stride / sizeof(HI_U32); + } + } + } + } + fclose(fp); + } + } + return HI_SUCCESS; +} + +HI_S32 NniePrintReportResultInputSeg(NnieParam *pst_nnie_param, int segnum) { + HI_U32 i, j, k, n; + HI_U32 seg_idx_ = segnum, node_idx_; + HI_S32 ret; + HI_CHAR acReportFileName[NNIE_REPORT_NAME_LENGTH] = {'\0'}; + FILE *fp = nullptr; + HI_U32 *pu32StepAddr = nullptr; + HI_S32 *ps32ResultAddr = nullptr; + HI_U8 *pu8ResultAddr = nullptr; + HI_U32 u32Height, u32Width, u32Chn, u32Stride, u32Dim; + + for (node_idx_ = 0; node_idx_ < pst_nnie_param->model_->astSeg[seg_idx_].u16SrcNum; node_idx_++) { + ret = snprintf(acReportFileName, NNIE_REPORT_NAME_LENGTH, "seg%d_layer%d_input(%s)_inst.linear.hex", seg_idx_, + pst_nnie_param->model_->astSeg[seg_idx_].astSrcNode[node_idx_].u32NodeId, + pst_nnie_param->model_->astSeg[seg_idx_].astSrcNode[node_idx_].szName); + if (ret < 0) { + LOGE("Error,create file name failed!\n"); + return HI_FAILURE; + } + + fp = fopen(acReportFileName, "w"); + if (fp == nullptr) { + LOGE("Error,open file failed!"); + return HI_FAILURE; + } + + if (SVP_BLOB_TYPE_SEQ_S32 == pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].enType) { + u32Dim = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stSeq.u32Dim; + u32Stride = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u32Stride; + pu32StepAddr = NNIE_CONVERT_64BIT_ADDR( + HI_U32, pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stSeq.u64VirAddrStep); + ps32ResultAddr = NNIE_CONVERT_64BIT_ADDR(HI_S32, pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u64VirAddr); + + for (n = 0; n < pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u32Num; n++) { + for (i = 0; i < *(pu32StepAddr + n); i++) { + for (j = 0; j < u32Dim; j++) { + fprintf(fp, "%d ", *(ps32ResultAddr + j)); + } + ps32ResultAddr += u32Stride / sizeof(HI_U32); + } + } + } else if (pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].enType == SVP_BLOB_TYPE_U8) { + u32Height = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stWhc.u32Height; + u32Width = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stWhc.u32Width; + u32Chn = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stWhc.u32Chn; + u32Stride = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u32Stride; + pu8ResultAddr = NNIE_CONVERT_64BIT_ADDR(HI_U8, pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u64VirAddr); + for (n = 0; n < pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u32Num; n++) { + for (i = 0; i < u32Chn; i++) { + for (j = 0; j < u32Height; j++) { + for (k = 0; k < u32Width; k++) { + fprintf(fp, "%d ", *(pu8ResultAddr + k)); + } + pu8ResultAddr += u32Stride / sizeof(HI_U8); + } + } + } + } else { + u32Height = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stWhc.u32Height; + u32Width = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stWhc.u32Width; + u32Chn = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stWhc.u32Chn; + u32Stride = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u32Stride; + ps32ResultAddr = NNIE_CONVERT_64BIT_ADDR(HI_S32, pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u64VirAddr); + fprintf(fp, "%s 4 1 %d %d %d\n", pst_nnie_param->model_->astSeg[seg_idx_].astSrcNode[node_idx_].szName, u32Height, + u32Width, u32Chn); + for (n = 0; n < pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u32Num; n++) { + for (i = 0; i < u32Chn; i++) { + for (j = 0; j < u32Height; j++) { + for (k = 0; k < u32Width; k++) { + fprintf(fp, "%f ", static_cast(*(ps32ResultAddr + k) / NNIE_QUANT_BASE)); + } + ps32ResultAddr += u32Stride / sizeof(HI_U32); + } + } + } + } + fclose(fp); + } + + return HI_SUCCESS; +} +} // namespace nnie +} // namespace mindspore diff --git a/mindspore/lite/tools/benchmark/nnie/src/nnie_print.h b/mindspore/lite/tools/benchmark/nnie/src/nnie_print.h new file mode 100644 index 00000000000..1d6315fe1ad --- /dev/null +++ b/mindspore/lite/tools/benchmark/nnie/src/nnie_print.h @@ -0,0 +1,50 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_PRINT_H_ +#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_PRINT_H_ +#include "include/mpi_nnie.h" +#include "include/hi_type.h" +#include "src/nnie_common.h" +#include "src/nnie_memory.h" + +#define LOG_TAG1 "NNIE" +#define LOGE(format, ...) \ + do { \ + if (1) { \ + fprintf(stderr, "\n[ERROR] " LOG_TAG1 " [" __FILE__ ":%d] %s] ", __LINE__, __FUNCTION__); \ + fprintf(stderr, format, ##__VA_ARGS__); \ + } \ + } while (0) + +#define LOGW(format, ...) \ + do { \ + if (1) { \ + fprintf(stderr, "\n[Warning] " LOG_TAG1 " [" __FILE__ ":%d] %s] ", __LINE__, __FUNCTION__); \ + fprintf(stderr, format, ##__VA_ARGS__); \ + } \ + } while (0) + +constexpr int kMaxSize = 1024; +constexpr int kDecimal = 10; + +namespace mindspore { +namespace nnie { +HI_S32 NniePrintReportResult(NnieParam *pst_nnie_param); + +HI_S32 NniePrintReportResultInputSeg(NnieParam *pst_nnie_param, int segnum); +} // namespace nnie +} // namespace mindspore +#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_PRINT_H_ diff --git a/mindspore/lite/tools/benchmark/nnie_proposal/CMakeLists.txt b/mindspore/lite/tools/benchmark/nnie_proposal/CMakeLists.txt new file mode 100644 index 00000000000..a55e37c5d3c --- /dev/null +++ b/mindspore/lite/tools/benchmark/nnie_proposal/CMakeLists.txt @@ -0,0 +1,22 @@ +cmake_minimum_required(VERSION 3.14) +project(NNIE_proposal) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime/include) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime/include/third_party) + +aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/src COMMON_SRC3) + +add_library(mslite_proposal SHARED ${COMMON_SRC3}) +target_link_libraries(mslite_proposal ${LINK_LOCAT_LIB}) + +if(DEFINED HIMIX_STRIP) + set(NDK_STRIP ${HIMIX_STRIP}) +else() + set(NDK_STRIP "arm-himix200-linux-strip") +endif() + +if("${CMAKE_BUILD_TYPE}" STREQUAL "Release") + add_custom_command(TARGET mslite_proposal POST_BUILD COMMAND ${NDK_STRIP} + ${CMAKE_CURRENT_BINARY_DIR}/libmslite_proposal.so) +endif() \ No newline at end of file diff --git a/mindspore/lite/tools/benchmark/nnie_proposal/src/proposal.cc b/mindspore/lite/tools/benchmark/nnie_proposal/src/proposal.cc new file mode 100644 index 00000000000..d338595978a --- /dev/null +++ b/mindspore/lite/tools/benchmark/nnie_proposal/src/proposal.cc @@ -0,0 +1,650 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/proposal.h" +#include +#include +#include +#include "include/errorcode.h" + +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +constexpr int kNumInput2 = 2; +constexpr int kNCHWDims = 4; + +namespace mindspore { +namespace proposal { +uint32_t RpnTmpBufSize(uint32_t num_ratio_anchors, uint32_t num_scale_anchors, uint32_t input_height, + uint32_t input_width) { + uint32_t anchors_num = num_ratio_anchors * num_scale_anchors * input_height * input_width; + uint32_t anchors_size = sizeof(uint32_t) * COORDI_NUM * anchors_num; + uint32_t bbox_delta_size = anchors_size; + uint32_t proposal_size = sizeof(uint32_t) * PROPOSAL_WIDTH * anchors_num; + uint32_t ratio_anchors_size = sizeof(float) * num_ratio_anchors * COORDI_NUM; + uint32_t scale_anchors_size = sizeof(float) * num_ratio_anchors * num_scale_anchors * COORDI_NUM; + uint32_t score_size = sizeof(float) * anchors_num * 2; + uint32_t stack_size = sizeof(Stack) * anchors_num; + uint32_t total_size = + anchors_size + bbox_delta_size + proposal_size + ratio_anchors_size + scale_anchors_size + score_size + stack_size; + return total_size; +} + +static float exp_coef[10][16] = { + {1.0f, 1.00024f, 1.00049f, 1.00073f, 1.00098f, 1.00122f, 1.00147f, 1.00171f, 1.00196f, 1.0022f, 1.00244f, 1.00269f, + 1.00293f, 1.00318f, 1.00342f, 1.00367f}, + {1.0f, 1.00391f, 1.00784f, 1.01179f, 1.01575f, 1.01972f, 1.02371f, 1.02772f, 1.03174f, 1.03578f, 1.03984f, 1.04391f, + 1.04799f, 1.05209f, 1.05621f, 1.06034f}, + {1.0f, 1.06449f, 1.13315f, 1.20623f, 1.28403f, 1.36684f, 1.45499f, 1.54883f, 1.64872f, 1.75505f, 1.86825f, 1.98874f, + 2.117f, 2.25353f, 2.39888f, 2.55359f}, + {1.0f, 2.71828f, 7.38906f, 20.0855f, 54.5981f, 148.413f, 403.429f, 1096.63f, 2980.96f, 8103.08f, 22026.5f, 59874.1f, + 162755.0f, 442413.0f, 1.2026e+006f, 3.26902e+006f}, + {1.0f, 8.88611e+006f, 7.8963e+013f, 7.01674e+020f, 6.23515e+027f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, + 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, + 5.54062e+034f}, + {1.0f, 0.999756f, 0.999512f, 0.999268f, 0.999024f, 0.99878f, 0.998536f, 0.998292f, 0.998049f, 0.997805f, 0.997562f, + 0.997318f, 0.997075f, 0.996831f, 0.996588f, 0.996345f}, + {1.0f, 0.996101f, 0.992218f, 0.98835f, 0.984496f, 0.980658f, 0.976835f, 0.973027f, 0.969233f, 0.965455f, 0.961691f, + 0.957941f, 0.954207f, 0.950487f, 0.946781f, 0.94309f}, + {1.0f, 0.939413f, 0.882497f, 0.829029f, 0.778801f, 0.731616f, 0.687289f, 0.645649f, 0.606531f, 0.569783f, 0.535261f, + 0.502832f, 0.472367f, 0.443747f, 0.416862f, 0.391606f}, + {1.0f, 0.367879f, 0.135335f, 0.0497871f, 0.0183156f, 0.00673795f, 0.00247875f, 0.000911882f, 0.000335463f, + 0.00012341f, 4.53999e-005f, 1.67017e-005f, 6.14421e-006f, 2.26033e-006f, 8.31529e-007f, 3.05902e-007f}, + {1.0f, 1.12535e-007f, 1.26642e-014f, 1.42516e-021f, 1.60381e-028f, 1.80485e-035f, 2.03048e-042f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}}; +static float QuickExp(int32_t value) { + if (value & 0x80000000) { + value = ~value + 0x00000001; + return exp_coef[5][value & 0x0000000F] * exp_coef[6][(value >> 4) & 0x0000000F] * + exp_coef[7][(value >> 8) & 0x0000000F] * exp_coef[8][(value >> 12) & 0x0000000F] * + exp_coef[9][(value >> 16) & 0x0000000F]; + } else { + return exp_coef[0][value & 0x0000000F] * exp_coef[1][(value >> 4) & 0x0000000F] * + exp_coef[2][(value >> 8) & 0x0000000F] * exp_coef[3][(value >> 12) & 0x0000000F] * + exp_coef[4][(value >> 16) & 0x0000000F]; + } +} + +static int32_t SoftMax(float *src, uint32_t num) { + float max = 0; + float sum = 0; + uint32_t i = 0; + + for (i = 0; i < num; ++i) { + if (max < src[i]) { + max = src[i]; + } + } + + for (i = 0; i < num; ++i) { + src[i] = QuickExp(static_cast((src[i] - max) * QUANT_BASE)); + sum += src[i]; + } + + for (i = 0; i < num; ++i) { + src[i] /= sum; + } + return RET_OK; +} +static void Argswap(int32_t *src1, int32_t *src2) { + for (uint32_t i = 0; i < PROPOSAL_WIDTH; i++) { + int32_t tmp = src1[i]; + src1[i] = src2[i]; + src2[i] = tmp; + } +} + +static int32_t NonRecursiveArgQuickSort(int32_t *array, int32_t low, int32_t high, Stack *stack, int32_t max_num) { + int32_t top = 0; + stack[top].min_ = low; + stack[top].max_ = high; + + while (top > -1) { + low = stack[top].min_; + high = stack[top].max_; + int32_t i = low; + int32_t j = high; + + int32_t key_confidence = array[PROPOSAL_WIDTH * low + 4]; + top--; + while (i < j) { + while ((i < j) && (key_confidence > array[j * PROPOSAL_WIDTH + 4])) { + j--; + } + if (i < j) { + Argswap(&array[i * PROPOSAL_WIDTH], &array[j * PROPOSAL_WIDTH]); + i++; + } + + while ((i < j) && (key_confidence < array[i * PROPOSAL_WIDTH + 4])) { + i++; + } + if (i < j) { + Argswap(&array[i * PROPOSAL_WIDTH], &array[j * PROPOSAL_WIDTH]); + j--; + } + } + + if (low <= max_num) { + if (low < i - 1) { + top++; + stack[top].min_ = low; + stack[top].max_ = i - 1; + } + + if (high > i + 1) { + top++; + stack[top].min_ = i + 1; + stack[top].max_ = high; + } + } + } + return RET_OK; +} + +static int32_t FilterLowScoreBbox(int32_t *proposals, uint32_t anchors_num, uint32_t filter_thresh, + uint32_t *num_after_filter) { + uint32_t proposal_cnt = anchors_num; + + if (filter_thresh > 0) { + uint32_t i; + for (i = 0; i < anchors_num; i++) { + if (proposals[PROPOSAL_WIDTH * i + 4] < static_cast(filter_thresh)) { + proposals[PROPOSAL_WIDTH * i + 5] = 1; + } + } + + proposal_cnt = 0; + for (i = 0; i < anchors_num; i++) { + if (proposals[PROPOSAL_WIDTH * i + 5] == 0) { + proposals[PROPOSAL_WIDTH * proposal_cnt] = proposals[PROPOSAL_WIDTH * i]; + proposals[PROPOSAL_WIDTH * proposal_cnt + 1] = proposals[PROPOSAL_WIDTH * i + 1]; + proposals[PROPOSAL_WIDTH * proposal_cnt + 2] = proposals[PROPOSAL_WIDTH * i + 2]; + proposals[PROPOSAL_WIDTH * proposal_cnt + 3] = proposals[PROPOSAL_WIDTH * i + 3]; + proposals[PROPOSAL_WIDTH * proposal_cnt + 4] = proposals[PROPOSAL_WIDTH * i + 4]; + proposals[PROPOSAL_WIDTH * proposal_cnt + 5] = proposals[PROPOSAL_WIDTH * i + 5]; + proposal_cnt++; + } + } + } + *num_after_filter = proposal_cnt; + return RET_OK; +} + +static int32_t SVP_NNIE_Overlap(int32_t x_min1, int32_t y_min1, int32_t x_max1, int32_t y_max1, int32_t x_min2, + int32_t y_min2, int32_t x_max2, int32_t y_max2, int32_t *area_sum, + int32_t *area_inter) { + /*** Check the input, and change the Return value ***/ + int32_t inter = 0; + int32_t total = 0; + int32_t x_min = 0; + int32_t y_min = 0; + int32_t x_max = 0; + int32_t y_max = 0; + int32_t area1 = 0; + int32_t area2 = 0; + int32_t inter_width = 0; + int32_t inter_height = 0; + + x_min = MAX(x_min1, x_min2); + y_min = MAX(y_min1, y_min2); + x_max = MIN(x_max1, x_max2); + y_max = MIN(y_max1, y_max2); + + inter_width = x_max - x_min + 1; + inter_height = y_max - y_min + 1; + + inter_width = (inter_width >= 0) ? inter_width : 0; + inter_height = (inter_height >= 0) ? inter_height : 0; + + inter = inter_width * inter_height; + area1 = (x_max1 - x_min1 + 1) * (y_max1 - y_min1 + 1); + area2 = (x_max2 - x_min2 + 1) * (y_max2 - y_min2 + 1); + + total = area1 + area2 - inter; + + *area_sum = total; + *area_inter = inter; + return RET_OK; +} + +static int32_t SVP_NNIE_NonMaxSuppression(int32_t *proposals, uint32_t anchors_num, uint32_t nms_thresh, + uint32_t max_roi_num) { + /****** define variables *******/ + int32_t x_min1; + int32_t y_min1; + int32_t x_max1; + int32_t y_max1; + int32_t x_min2; + int32_t y_min2; + int32_t x_max2; + int32_t y_max2; + int32_t s32AreaTotal = 0; + int32_t area_inter = 0; + uint32_t i; + uint32_t j; + uint32_t num = 0; + bool bNoOverlap; + for (i = 0; i < anchors_num && num < max_roi_num; i++) { + if (proposals[PROPOSAL_WIDTH * i + 5] == 0) { + num++; + x_min1 = proposals[PROPOSAL_WIDTH * i]; + y_min1 = proposals[PROPOSAL_WIDTH * i + 1]; + x_max1 = proposals[PROPOSAL_WIDTH * i + 2]; + y_max1 = proposals[PROPOSAL_WIDTH * i + 3]; + for (j = i + 1; j < anchors_num; j++) { + if (proposals[PROPOSAL_WIDTH * j + 5] == 0) { + x_min2 = proposals[PROPOSAL_WIDTH * j]; + y_min2 = proposals[PROPOSAL_WIDTH * j + 1]; + x_max2 = proposals[PROPOSAL_WIDTH * j + 2]; + y_max2 = proposals[PROPOSAL_WIDTH * j + 3]; + bNoOverlap = (x_min2 > x_max1) || (x_max2 < x_min1) || (y_min2 > y_max1) || (y_max2 < y_min1); + if (bNoOverlap) { + continue; + } + (void)SVP_NNIE_Overlap(x_min1, y_min1, x_max1, y_max1, x_min2, y_min2, x_max2, y_max2, &s32AreaTotal, + &area_inter); + if (area_inter * QUANT_BASE > static_cast(nms_thresh * s32AreaTotal)) { + if (proposals[PROPOSAL_WIDTH * i + 4] >= proposals[PROPOSAL_WIDTH * j + 4]) { + proposals[PROPOSAL_WIDTH * j + 5] = 1; + } else { + proposals[PROPOSAL_WIDTH * i + 5] = 1; + } + } + } + } + } + } + return RET_OK; +} + +static void Rpn(float **inputs, uint32_t num_ratio_anchors, uint32_t num_scale_anchors, uint32_t *scales, + uint32_t *ratios, uint32_t ori_image_height, uint32_t ori_image_width, uint32_t *inputs_height, + uint32_t *inputs_width, uint32_t *inputs_channel, uint32_t inputs_stride, uint32_t max_rois, + uint32_t min_size, uint32_t spatial_scale, uint32_t nms_thresh, uint32_t filter_thresh, + uint32_t num_before_nms, char *pu32MemPool, float *proposal_result, uint32_t dst_stride, + uint32_t *num_rois) { +#if 1 + /******************** define parameters ****************/ + uint32_t size; + int32_t *anchors = nullptr; + int32_t *bbox_delta = nullptr; + int32_t *proposals = nullptr; + int32_t *ptr1 = nullptr; + int32_t *ptr2 = nullptr; + int32_t *ptr3 = nullptr; + uint32_t num_after_filter = 0; + uint32_t num_anchors; + float base_w; + float base_h; + float base_x_ctr; + float base_y_ctr; + float *ratio_anchors = nullptr; + float *f32_ptr = nullptr; + float *f32_ptr2 = nullptr; + float *scale_anchors = nullptr; + float *scores = nullptr; + float f32_size; + uint32_t pixel_interval; + uint32_t src_bbox_index; + uint32_t src_fg_prob_index; + uint32_t src_bg_prob_index; + uint32_t src_bbox_bias; + uint32_t src_prob_bias; + uint32_t des_box; + uint32_t bg_blob_size; + uint32_t anchors_per_pixel; + uint32_t map_size; + uint32_t line_size; + int32_t proposal_width; + int32_t proposal_height; + uint32_t roi_count; + Stack *stack = nullptr; + uint32_t c; + uint32_t h; + uint32_t w; + uint32_t i; + uint32_t j; + uint32_t p; + uint32_t q; + uint32_t z; + uint32_t base_anchor[4] = {0, 0, (min_size - 1), (min_size - 1)}; + + /*********************************** Faster RCNN *********************************************/ + /********* calculate the start pointer of each part in MemPool *********/ + anchors = reinterpret_cast(pu32MemPool); + num_anchors = num_ratio_anchors * num_scale_anchors * (inputs_height[0] * inputs_width[0]); + size = COORDI_NUM * num_anchors; + pu32MemPool += size * sizeof(int32_t); + + bbox_delta = reinterpret_cast(pu32MemPool); + pu32MemPool += size * sizeof(int32_t); + + proposals = reinterpret_cast(pu32MemPool); + size = PROPOSAL_WIDTH * num_anchors; + pu32MemPool += size * sizeof(int32_t); + + ratio_anchors = reinterpret_cast(static_cast(pu32MemPool)); + f32_ptr = reinterpret_cast(static_cast(pu32MemPool)); + size = num_ratio_anchors * COORDI_NUM; + f32_ptr = f32_ptr + size; + + scale_anchors = f32_ptr; + size = num_scale_anchors * num_ratio_anchors * COORDI_NUM; + f32_ptr = f32_ptr + size; + + scores = f32_ptr; + size = num_anchors * SCORE_NUM; + f32_ptr = f32_ptr + size; + + stack = reinterpret_cast(f32_ptr); + + /********************* Generate the base anchor ***********************/ + base_w = static_cast(base_anchor[2] - base_anchor[0] + 1); + base_h = static_cast(base_anchor[3] - base_anchor[1] + 1); + base_x_ctr = static_cast(base_anchor[0] + ((base_w - 1) * 0.5)); + base_y_ctr = static_cast(base_anchor[1] + ((base_h - 1) * 0.5)); + + /*************** Generate Ratio Anchors for the base anchor ***********/ + f32_ptr = ratio_anchors; + f32_size = base_w * base_h; + for (i = 0; i < num_ratio_anchors; i++) { + float f32_ratios = static_cast(ratios[i]) / QUANT_BASE; + base_w = sqrt(f32_size / f32_ratios); + base_w = static_cast( + 1.0 * ((base_w) >= 0 ? static_cast(base_w + HALF_VAL) : static_cast(base_w - HALF_VAL))); + base_h = base_w * f32_ratios; + base_h = static_cast( + 1.0 * ((base_h) >= 0 ? static_cast(base_h + HALF_VAL) : static_cast(base_h - HALF_VAL))); + + *f32_ptr++ = static_cast(base_x_ctr - ((base_w - 1) * HALF_VAL)); + *(f32_ptr++) = static_cast(base_y_ctr - ((base_h - 1) * HALF_VAL)); + *(f32_ptr++) = static_cast(base_x_ctr + ((base_w - 1) * HALF_VAL)); + *(f32_ptr++) = static_cast(base_y_ctr + ((base_h - 1) * HALF_VAL)); + } + + /********* Generate Scale Anchors for each Ratio Anchor **********/ + f32_ptr = ratio_anchors; + f32_ptr2 = scale_anchors; + /* Generate Scale Anchors for one pixel */ + for (i = 0; i < num_ratio_anchors; i++) { + for (j = 0; j < num_scale_anchors; j++) { + base_w = *(f32_ptr + 2) - *(f32_ptr) + 1; + base_h = *(f32_ptr + 3) - *(f32_ptr + 1) + 1; + base_x_ctr = static_cast(*(f32_ptr) + ((base_w - 1) * HALF_VAL)); + base_y_ctr = static_cast(*(f32_ptr + 1) + ((base_h - 1) * HALF_VAL)); + + *(f32_ptr2++) = + static_cast(base_x_ctr - ((base_w * (static_cast(scales[j]) / QUANT_BASE) - 1) * HALF_VAL)); + *(f32_ptr2++) = + static_cast(base_y_ctr - ((base_h * (static_cast(scales[j]) / QUANT_BASE) - 1) * HALF_VAL)); + *(f32_ptr2++) = + static_cast(base_x_ctr + ((base_w * (static_cast(scales[j]) / QUANT_BASE) - 1) * HALF_VAL)); + *(f32_ptr2++) = + static_cast(base_y_ctr + ((base_h * (static_cast(scales[j]) / QUANT_BASE) - 1) * HALF_VAL)); + } + f32_ptr += COORDI_NUM; + } + + /******************* Copy the anchors to every pixel in the feature map ******************/ + ptr1 = anchors; + pixel_interval = QUANT_BASE / spatial_scale; + + for (p = 0; p < inputs_height[0]; p++) { + for (q = 0; q < inputs_width[0]; q++) { + f32_ptr2 = scale_anchors; + for (z = 0; z < num_scale_anchors * num_ratio_anchors; z++) { + *(ptr1++) = static_cast(q * pixel_interval + *(f32_ptr2++)); + *(ptr1++) = static_cast(p * pixel_interval + *(f32_ptr2++)); + *(ptr1++) = static_cast(q * pixel_interval + *(f32_ptr2++)); + *(ptr1++) = static_cast(p * pixel_interval + *(f32_ptr2++)); + } + } + } + + /********** do transpose, convert the blob from (M,C,H,W) to (M,H,W,C) **********/ + map_size = inputs_height[1] * inputs_stride / sizeof(uint32_t); + anchors_per_pixel = num_ratio_anchors * num_scale_anchors; + bg_blob_size = anchors_per_pixel * map_size; + line_size = inputs_stride / sizeof(uint32_t); + src_prob_bias = 0; + src_bbox_bias = 0; + + for (c = 0; c < inputs_channel[1]; c++) { + for (h = 0; h < inputs_height[1]; h++) { + for (w = 0; w < inputs_width[1]; w++) { + src_bbox_index = src_bbox_bias + c * map_size + h * line_size + w; + src_bg_prob_index = src_prob_bias + (c / COORDI_NUM) * map_size + h * line_size + w; + src_fg_prob_index = bg_blob_size + src_bg_prob_index; + + des_box = (anchors_per_pixel) * (h * inputs_width[1] + w) + c / COORDI_NUM; + + uint32_t des_bbox_delta_index = COORDI_NUM * des_box + c % COORDI_NUM; + bbox_delta[des_bbox_delta_index] = static_cast(inputs[1][src_bbox_index] * QUANT_BASE); + + uint32_t des_score_index = (SCORE_NUM)*des_box; + scores[des_score_index] = inputs[0][src_bg_prob_index]; + scores[des_score_index + 1] = inputs[0][src_fg_prob_index]; + } + } + } + + /************************* do softmax ****************************/ + f32_ptr = scores; + for (i = 0; i < num_anchors; i++) { + SoftMax(f32_ptr, SCORE_NUM); + f32_ptr += SCORE_NUM; + } + + /************************* BBox Transform *****************************/ + for (i = 0; i < num_anchors; i++) { + ptr1 = anchors; + ptr1 = ptr1 + COORDI_NUM * i; + ptr2 = proposals; + ptr2 = ptr2 + PROPOSAL_WIDTH * i; + ptr3 = bbox_delta; + ptr3 = ptr3 + COORDI_NUM * i; + f32_ptr = scores; + f32_ptr = f32_ptr + i * (SCORE_NUM); + + proposal_width = *(ptr1 + 2) - *(ptr1) + 1; + proposal_height = *(ptr1 + 3) - *(ptr1 + 1) + 1; + int32_t proposal_center_x = *(ptr1) + static_cast(proposal_width * HALF_VAL); + int32_t proposal_center_y = *(ptr1 + 1) + static_cast(proposal_height * HALF_VAL); + int32_t pred_center_x = + static_cast((static_cast(*(ptr3)) / QUANT_BASE) * proposal_width + proposal_center_x); + int32_t pred_center_y = + static_cast((static_cast(*(ptr3 + 1)) / QUANT_BASE) * proposal_height + proposal_center_y); + + int32_t pred_w = static_cast(proposal_width * QuickExp(static_cast(*(ptr3 + 2)))); + int32_t pred_h = static_cast(proposal_height * QuickExp(static_cast(*(ptr3 + 3)))); + *(ptr2) = static_cast(pred_center_x - HALF_VAL * pred_w); + *(ptr2 + 1) = static_cast(pred_center_y - HALF_VAL * pred_h); + *(ptr2 + 2) = static_cast(pred_center_x + HALF_VAL * pred_w); + *(ptr2 + 3) = static_cast(pred_center_y + HALF_VAL * pred_h); + *(ptr2 + 4) = static_cast(*(f32_ptr + 1) * QUANT_BASE); + *(ptr2 + 5) = 0; + } + + /************************ clip bbox *****************************/ + for (i = 0; i < num_anchors; i++) { + ptr1 = proposals; + ptr1 = ptr1 + PROPOSAL_WIDTH * i; + *ptr1 = MAX(MIN(*ptr1, static_cast(ori_image_width) - 1), 0); + *(ptr1 + 1) = MAX(MIN(*(ptr1 + 1), static_cast(ori_image_height) - 1), 0); + *(ptr1 + 2) = MAX(MIN(*(ptr1 + 2), static_cast(ori_image_width) - 1), 0); + *(ptr1 + 3) = MAX(MIN(*(ptr1 + 3), static_cast(ori_image_height) - 1), 0); + } + + /************ remove the bboxes which are too small *************/ + for (i = 0; i < num_anchors; i++) { + ptr1 = proposals; + ptr1 = ptr1 + PROPOSAL_WIDTH * i; + proposal_width = *(ptr1 + 2) - *(ptr1) + 1; + proposal_height = *(ptr1 + 3) - *(ptr1 + 1) + 1; + if (proposal_width < static_cast(min_size) || proposal_height < static_cast(min_size)) { + *(ptr1 + 5) = 1; + } + } + + /********** remove low score bboxes ************/ + (void)FilterLowScoreBbox(proposals, num_anchors, filter_thresh, &num_after_filter); + + /********** sort ***********/ + (void)NonRecursiveArgQuickSort(proposals, 0, num_after_filter - 1, stack, static_cast(num_before_nms)); + num_after_filter = (num_after_filter < num_before_nms) ? num_after_filter : num_before_nms; + + /* do nms to remove highly overlapped bbox */ + (void)SVP_NNIE_NonMaxSuppression(proposals, num_after_filter, nms_thresh, max_rois); /* function NMS */ + + /************** write the final result to output ***************/ + roi_count = 0; + for (i = 0; i < num_after_filter; i++) { + ptr1 = proposals; + ptr1 = ptr1 + PROPOSAL_WIDTH * i; + if (*(ptr1 + 5) == 0) { + proposal_result[dst_stride / sizeof(uint32_t) * roi_count] = *ptr1; + proposal_result[dst_stride / sizeof(uint32_t) * roi_count + 1] = *(ptr1 + 1); + proposal_result[dst_stride / sizeof(uint32_t) * roi_count + 2] = *(ptr1 + 2); + proposal_result[dst_stride / sizeof(uint32_t) * roi_count + 3] = *(ptr1 + 3); + roi_count++; + } + if (roi_count >= max_rois) { + break; + } + } + + *num_rois = roi_count; +#endif +} + +int32_t ProposalInit(ProposalParam *param, const std::vector &inputs, uint32_t max_roi_num, + uint32_t ori_image_height, uint32_t ori_image_width) { + uint32_t tmp_buf_size = 0; + uint32_t bbox_buf_size = 0; + uint32_t total_size = 0; + param->max_roi_num_ = max_roi_num; + + param->num_ratio_anchors_ = 1; + param->num_scale_anchors_ = NUM_SCALE_ANCHORS; + param->scales_[0] = 1.5 * QUANT_BASE; + param->scales_[1] = 2.1 * QUANT_BASE; + param->scales_[2] = 2.9 * QUANT_BASE; + param->scales_[3] = 4.1 * QUANT_BASE; + param->scales_[4] = 5.8 * QUANT_BASE; + param->scales_[5] = 8.0 * QUANT_BASE; + param->scales_[6] = 11.3 * QUANT_BASE; + param->scales_[7] = 15.8 * QUANT_BASE; + param->scales_[8] = 22.1 * QUANT_BASE; + param->ratios_[0] = 2.44 * QUANT_BASE; + + param->ori_image_height_ = ori_image_height; + param->ori_image_width_ = ori_image_width; + param->min_size_ = MIN_SIZE; + param->spatial_scale_ = (uint32_t)(0.0625 * QUANT_BASE); + param->nms_thresh_ = (uint32_t)(0.7 * QUANT_BASE); + param->filter_thresh_ = 0; + param->num_before_nms_ = NUM_NMS; + + param->rpn_bounding_box_.chn_ = 1; + param->rpn_bounding_box_.height_ = max_roi_num; + param->rpn_bounding_box_.width_ = COORDI_NUM; + param->rpn_bounding_box_.stride_ = COORDI_NUM * sizeof(float); + param->rpn_bounding_box_.num_ = 1; + if (inputs.size() < kNumInput2) { + LOGE("inputs tensor size error."); + return RET_ERROR; + } + + for (int i = 0; i < kNumInput2; i++) { + auto input_data_type = inputs[i].DataType(); + if (input_data_type == DataType::kNumberTypeFloat32) { + auto ptr_shape = inputs[i].Shape(); + if ((ptr_shape.size() == kNCHWDims)) { + param->inputs_height_[i] = ptr_shape[2]; + param->inputs_width_[i] = ptr_shape[3]; + param->inputs_channel_[i] = ptr_shape[1]; + if (i == 0) { + param->inputs_stride_ = ptr_shape[3] * sizeof(float); + } + } + } + } + + tmp_buf_size = RpnTmpBufSize(param->num_ratio_anchors_, param->num_scale_anchors_, param->inputs_height_[0], + param->inputs_width_[0]); + + bbox_buf_size = param->rpn_bounding_box_.num_ * param->rpn_bounding_box_.height_ * param->rpn_bounding_box_.stride_; + total_size = tmp_buf_size + bbox_buf_size; + + if (param->rpn_tmp_buf_ != nullptr) { + free(param->rpn_tmp_buf_); + param->rpn_tmp_buf_ = nullptr; + } + param->rpn_tmp_buf_ = malloc(total_size); + if (param->rpn_tmp_buf_ == nullptr) { + LOGE("malloc buf fail."); + return RET_ERROR; + } + param->rpn_bounding_box_.data_ = reinterpret_cast(param->rpn_tmp_buf_) + tmp_buf_size; + + return RET_OK; +} + +int32_t ProposalRun(std::vector *inputs, std::vector *outputs, + ProposalParam *param) { + if (inputs->size() < kNumInput2) { + LOGE("inputs tensor size error."); + return RET_ERROR; + } + if (outputs->size() != 1) { + LOGE("outputs tensor size error."); + return RET_ERROR; + } + for (int i = 0; i < kNumInput2; i++) { + auto input_data_type = inputs->at(i).DataType(); + if (input_data_type == DataType::kNumberTypeFloat32) { + param->inputs_[i] = reinterpret_cast((*inputs)[i].MutableData()); + } + } + auto output_data_type = (*outputs)[0].DataType(); + if (output_data_type != DataType::kNumberTypeFloat32) { + LOGE("outputs tensor data type error."); + return RET_ERROR; + } + + Rpn(param->inputs_, param->num_ratio_anchors_, param->num_scale_anchors_, param->scales_, param->ratios_, + param->ori_image_height_, param->ori_image_width_, param->inputs_height_, param->inputs_width_, + param->inputs_channel_, param->inputs_stride_, param->max_roi_num_, param->min_size_, param->spatial_scale_, + param->nms_thresh_, param->filter_thresh_, param->num_before_nms_, reinterpret_cast(param->rpn_tmp_buf_), + reinterpret_cast(param->rpn_bounding_box_.data_), param->rpn_bounding_box_.stride_, + ¶m->rpn_bounding_box_.height_); + + std::vector shape{static_cast(param->rpn_bounding_box_.height_), COORDI_NUM}; + (*outputs)[0].SetShape(shape); + auto output_data = (*outputs)[0].MutableData(); + memcpy(output_data, param->rpn_bounding_box_.data_, param->rpn_bounding_box_.height_ * COORDI_NUM * sizeof(float)); + + return RET_OK; +} + +void ProposalDeInit(ProposalParam *param) { + if (param->rpn_tmp_buf_ != 0) { + free(param->rpn_tmp_buf_); + param->rpn_tmp_buf_ = 0; + } +} +} // namespace proposal +} // namespace mindspore diff --git a/mindspore/lite/tools/benchmark/nnie_proposal/src/proposal.h b/mindspore/lite/tools/benchmark/nnie_proposal/src/proposal.h new file mode 100644 index 00000000000..b156e14dfa6 --- /dev/null +++ b/mindspore/lite/tools/benchmark/nnie_proposal/src/proposal.h @@ -0,0 +1,95 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_H_ +#define MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_H_ +#include +#include "include/api/types.h" + +#define LOG_TAG1 "Proposal" +#define LOGE(format, ...) \ + do { \ + if (1) { \ + fprintf(stderr, "\n[ERROR] " LOG_TAG1 " [" __FILE__ ":%d] %s] ", __LINE__, __FUNCTION__); \ + fprintf(stderr, format, ##__VA_ARGS__); \ + } \ + } while (0) + +#define LOGW(format, ...) \ + do { \ + if (1) { \ + fprintf(stderr, "\n[Warning] " LOG_TAG1 " [" __FILE__ ":%d] %s] ", __LINE__, __FUNCTION__); \ + fprintf(stderr, format, ##__VA_ARGS__); \ + } \ + } while (0) + +namespace mindspore { +namespace proposal { +typedef struct { + uint32_t stride_; + void *data_; + uint32_t num_; + uint32_t width_; + uint32_t height_; + uint32_t chn_; +} RpnBoundingBox; + +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define HALF_VAL 0.5f // the half value +#define COORDI_NUM 4 // coordinate numbers +#define PROPOSAL_WIDTH 6 // the number of proposal values +#define QUANT_BASE 4096 // the base value +#define SCORE_NUM 2 // the num of RPN scores +#define NUM_SCALE_ANCHORS 9 +#define NUM_NMS 6000 +#define MIN_SIZE 16 + +typedef struct { + uint32_t scales_[9]; + uint32_t ratios_[9]; + uint32_t inputs_height_[2]; + uint32_t inputs_width_[2]; + uint32_t inputs_channel_[2]; + uint32_t inputs_stride_; + uint32_t num_ratio_anchors_; + uint32_t num_scale_anchors_; + uint32_t ori_image_height_; + uint32_t ori_image_width_; + uint32_t min_size_; + uint32_t spatial_scale_; + uint32_t nms_thresh_; + uint32_t filter_thresh_; + uint32_t max_roi_num_; + uint32_t num_before_nms_; + float *inputs_[2]; + void *rpn_tmp_buf_; + RpnBoundingBox rpn_bounding_box_; +} ProposalParam; + +typedef struct { + int32_t min_; + int32_t max_; +} Stack; + +int32_t ProposalInit(ProposalParam *param, const std::vector &inputs, uint32_t max_roi_num, + uint32_t ori_image_height, uint32_t ori_image_width); +int32_t ProposalRun(std::vector *inputs, std::vector *outputs, + ProposalParam *param); +void ProposalDeInit(ProposalParam *param); +} // namespace proposal +} // namespace mindspore +#endif // MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_H_ diff --git a/mindspore/lite/tools/benchmark/nnie_proposal/src/proposal_fp32.cc b/mindspore/lite/tools/benchmark/nnie_proposal/src/proposal_fp32.cc new file mode 100644 index 00000000000..cc1261ddf57 --- /dev/null +++ b/mindspore/lite/tools/benchmark/nnie_proposal/src/proposal_fp32.cc @@ -0,0 +1,200 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/proposal_fp32.h" +#include +#include +#include "schema/model_generated.h" +#include "include/registry/register_kernel.h" +#include "include/errorcode.h" + +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_Custom; +constexpr int kMaxSize = 1024; +constexpr int kNumInput2 = 2; +constexpr int kDecimal = 10; + +namespace mindspore { +namespace proposal { +int ProposalCPUKernel::Prepare() { + if (inputs_.size() < kNumInput2) { + LOGE("inputs tensor num error."); + return RET_ERROR; + } + if (outputs_.size() != 1) { + LOGE("outputs tensor num error."); + return RET_ERROR; + } + std::vector inputs_name = {"rpn_cls_score", "rpn_bbox_pred"}; + std::vector inputs; + for (size_t i = 0; i < inputs_name.size(); i++) { + bool find_flag = false; + for (auto &input : inputs_) { + if (input.Name() == inputs_name[i]) { + inputs.push_back(input); + find_flag = true; + break; + } + } + if (!find_flag) { + for (auto &input : inputs_) { + if (std::find(inputs.begin(), inputs.end(), input) != inputs.end()) { + continue; + } + inputs.push_back(input); + LOGW("input tensor name diff '%s' vs '%s'.", inputs_name[i].c_str(), input.Name().c_str()); + break; + } + } + } + if (inputs.size() != inputs_name.size()) { + LOGE("inputs size error."); + return RET_ERROR; + } + this->set_inputs(inputs); + if (inputs[0].Shape()[0] != 1) { + LOGE("proposal only support input num == 1."); + return RET_ERROR; + } + + outputs_[0].SetTensorName("proposal"); + + int max_roi_num_int = 300; + auto *max_roi_num = std::getenv("MAX_ROI_NUM"); + if (max_roi_num != nullptr) { + auto iter = + std::find_if(max_roi_num, max_roi_num + strlen(max_roi_num), [](char val) { return val < '0' || val > '9'; }); + if (iter != max_roi_num) { + *iter = '\0'; + max_roi_num_int = atoi(max_roi_num); + } else { + LOGW("MAX_ROI_NUM ENV is invalid, now set to default value %d", max_roi_num_int); + } + } else { + LOGW("MAX_ROI_NUM ENV is not set, now set to default value %d", max_roi_num_int); + } + + return ProposalInit(&proposal_param_, inputs_, max_roi_num_int, image_height_, image_weight_); +} + +int ProposalCPUKernel::ReSize() { + if (inputs_[0].Shape()[0] != 1) { + LOGE("proposal only support input num == 1."); + return RET_ERROR; + } + return RET_OK; +} + +int ProposalCPUKernel::Execute() { return ProposalRun(&inputs_, &outputs_, &proposal_param_); } + +ProposalCPUKernel::~ProposalCPUKernel() { ProposalDeInit(&proposal_param_); } + +bool GetCustomAttr(char *buf, int buf_size, const mindspore::schema::Custom *op, const std::string &attr) { + int attr_size; + for (size_t i = 0; i < op->attr()->size(); i++) { + if (op->attr()->Get(i)->name()->str() == attr) { + auto output_info = op->attr()->Get(i)->data(); + attr_size = static_cast(output_info->size()); + if (attr_size >= buf_size) { + LOGE("attr size too big"); + return false; + } + for (int j = 0; j < attr_size; j++) { + buf[j] = static_cast(output_info->Get(j)); + } + buf[attr_size] = 0; + return true; + } + } + return false; +} + +std::shared_ptr ProposalCreateKernel(const std::vector &inputs, + const std::vector &outputs, + const mindspore::schema::Primitive *primitive, + const mindspore::Context *ctx) { + if (primitive->value_type() != mindspore::schema::PrimitiveType_Custom) { + LOGE("Primitive type is not PrimitiveType_Custom"); + return nullptr; + } + + auto op = primitive->value_as_Custom(); + if (op->attr()->size() < 1) { + LOGE("There are at least 1 attribute of Custom"); + return nullptr; + } + int64_t ndims; + int64_t image_height; + int64_t image_width; + + char *res = nullptr; + char buf[kMaxSize]; + if (GetCustomAttr(buf, kMaxSize, op, "proposal_id")) { + res = nullptr; + ndims = strtol(buf, &res, kDecimal); + if ((*res) != 0) { + LOGE("Get attr id data fail"); + return nullptr; + } + } else { + LOGE("Proposal Custom op should have id"); + return nullptr; + } + + if (GetCustomAttr(buf, kMaxSize, op, "image_height")) { + res = nullptr; + image_height = strtol(buf, &res, kDecimal); + if ((*res) != 0) { + LOGE("Get attr id data fail"); + return nullptr; + } + } else { + LOGE("Proposal Custom op should have image_height"); + return nullptr; + } + if (GetCustomAttr(buf, kMaxSize, op, "image_width")) { + res = nullptr; + image_width = strtol(buf, &res, kDecimal); + if ((*res) != 0) { + LOGE("Get attr id data fail"); + return nullptr; + } + } else { + LOGE("Proposal Custom op should have image_width"); + return nullptr; + } + + auto kernel = std::make_shared(inputs, outputs, primitive, ctx, ndims, image_height, image_width); + // auto kernel = new (std::nothrow) ProposalCPUKernel(inputs, outputs, primitive, ctx, ndims, image_height, + // image_width); + if (kernel == nullptr) { + LOGE("new custom kernel is nullptr"); + return nullptr; + } + return kernel; +} +} // namespace proposal +} // namespace mindspore + +namespace mindspore { +namespace kernel { +namespace { +const auto kFloat32 = DataType::kNumberTypeFloat32; +} +REGISTER_CUSTOM_KERNEL(CPU, NNIE, kFloat32, Proposal, proposal::ProposalCreateKernel) +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/lite/tools/benchmark/nnie_proposal/src/proposal_fp32.h b/mindspore/lite/tools/benchmark/nnie_proposal/src/proposal_fp32.h new file mode 100644 index 00000000000..bcd5e4720e5 --- /dev/null +++ b/mindspore/lite/tools/benchmark/nnie_proposal/src/proposal_fp32.h @@ -0,0 +1,51 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_FP32_H_ +#define MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_FP32_H_ + +#include +#include "schema/model_generated.h" +#include "include/context.h" +#include "include/api/kernel.h" +#include "src/proposal.h" + +using mindspore::kernel::Kernel; +namespace mindspore { +namespace proposal { +class ProposalCPUKernel : public Kernel { + public: + ProposalCPUKernel(const std::vector &inputs, const std::vector &outputs, + const mindspore::schema::Primitive *primitive, const mindspore::Context *ctx, int id, + int image_height, int image_width) + : Kernel(inputs, outputs, primitive, ctx), id_(id), image_height_(image_height), image_weight_(image_width) {} + + ~ProposalCPUKernel() override; + + int Prepare() override; + int ReSize() override; + int Execute() override; + + private: + proposal::ProposalParam proposal_param_ = {0}; + int64_t id_; + int64_t image_height_; + int64_t image_weight_; +}; +} // namespace proposal +} // namespace mindspore + +#endif // MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_FP32_H_ diff --git a/mindspore/lite/tools/benchmark/nnie_proposal/src/proposal_infer.cc b/mindspore/lite/tools/benchmark/nnie_proposal/src/proposal_infer.cc new file mode 100644 index 00000000000..d4fad4e2544 --- /dev/null +++ b/mindspore/lite/tools/benchmark/nnie_proposal/src/proposal_infer.cc @@ -0,0 +1,77 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/proposal_infer.h" +#include +#include +#include "include/errorcode.h" +#include "src/proposal.h" +#include "include/api/format.h" +#include "include/registry/register_kernel_interface.h" + +using mindspore::kernel::KernelInterface; +using mindspore::lite::RET_ERROR; +using mindspore::lite::RET_OK; +using mindspore::schema::PrimitiveType_Custom; + +namespace mindspore { +namespace proposal { +std::shared_ptr ProposalInferCreater() { + auto infer = std::make_shared(); + if (infer == nullptr) { + LOGE("new custom infer is nullptr"); + return nullptr; + } + + return infer; +} +Status ProposalInterface::Infer(std::vector *inputs, std::vector *outputs, + const mindspore::schema::Primitive *primitive) { + if (inputs->size() != 2) { + LOGE("Inputs size less 2"); + return kLiteError; + } + if (outputs->size() == 0) { + LOGE("Outputs size 0"); + return kLiteError; + } + if (primitive->value_type() != mindspore::schema::PrimitiveType_Custom) { + LOGE("Primitive type is not PrimitiveType_Custom"); + return kLiteError; + } + + size_t id = 0; + while (id < outputs->size()) { + // 待补完 + // outputs[id]->format_ = input->format_; + // outputs[id]->data_type_ = kNumberTypeFloat32; + // 设置type为int + std::vector shape{-1, COORDI_NUM}; + (*outputs)[id].SetShape(shape); + (*outputs)[id].SetDataType(DataType::kNumberTypeFloat32); + (*outputs)[id].SetFormat(Format::NCHW); + id++; + } + return kSuccess; +} +} // namespace proposal +} // namespace mindspore +namespace mindspore { +namespace kernel { +// static KernelInterfaceReg a(aa, schema::PrimitiveType_Custom, CustomInferCreater); +REGISTER_CUSTOM_KERNEL_INTERFACE(NNIE, Proposal, proposal::ProposalInferCreater); +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/lite/tools/benchmark/nnie_proposal/src/proposal_infer.h b/mindspore/lite/tools/benchmark/nnie_proposal/src/proposal_infer.h new file mode 100644 index 00000000000..9f9f3875bbf --- /dev/null +++ b/mindspore/lite/tools/benchmark/nnie_proposal/src/proposal_infer.h @@ -0,0 +1,35 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_INFER_H_ +#define MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_INFER_H_ +#include +#include "include/kernel_interface.h" + +namespace mindspore { +namespace proposal { +class ProposalInterface : public mindspore::kernel::KernelInterface { + public: + ProposalInterface() {} + + ~ProposalInterface() = default; + + Status Infer(std::vector *inputs, std::vector *outputs, + const mindspore::schema::Primitive *primitive) override; +}; +} // namespace proposal +} // namespace mindspore +#endif // MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_INFER_H_ diff --git a/mindspore/lite/tools/benchmark/run_benchmark.cc b/mindspore/lite/tools/benchmark/run_benchmark.cc index 50e02858fc1..fa96e55acea 100644 --- a/mindspore/lite/tools/benchmark/run_benchmark.cc +++ b/mindspore/lite/tools/benchmark/run_benchmark.cc @@ -26,6 +26,12 @@ namespace lite { int RunBenchmark(int argc, const char **argv) { BenchmarkFlags flags; Option err = flags.ParseFlags(argc, argv); +#ifdef SUPPORT_NNIE + if (SvpSysInit() != RET_OK) { + std::cerr << "SVP Init failed" << std::endl; + return RET_ERROR; + } +#endif if (err.IsSome()) { std::cerr << err.Get() << std::endl; std::cerr << flags.Usage() << std::endl; @@ -36,7 +42,9 @@ int RunBenchmark(int argc, const char **argv) { std::cerr << flags.Usage() << std::endl; return RET_OK; } - +#ifdef SUPPORT_NNIE + BenchmarkBase *benchmark = new (std::nothrow) Benchmark(&flags); +#else auto api_type = std::getenv("MSLITE_API_TYPE"); if (api_type != nullptr) { MS_LOG(INFO) << "MSLITE_API_TYPE = " << api_type; @@ -53,6 +61,7 @@ int RunBenchmark(int argc, const char **argv) { BENCHMARK_LOG_ERROR("Invalid MSLITE_API_TYPE, (OLD/NEW/C, default:OLD)"); return RET_ERROR; } +#endif if (benchmark == nullptr) { BENCHMARK_LOG_ERROR("new benchmark failed "); return RET_ERROR; @@ -61,6 +70,7 @@ int RunBenchmark(int argc, const char **argv) { auto status = benchmark->Init(); if (status != 0) { BENCHMARK_LOG_ERROR("Benchmark init Error : " << status); + delete benchmark; return RET_ERROR; } auto model_name = flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1); @@ -68,6 +78,7 @@ int RunBenchmark(int argc, const char **argv) { status = benchmark->RunBenchmark(); if (status != 0) { BENCHMARK_LOG_ERROR("Run Benchmark " << model_name << " Failed : " << status); + delete benchmark; return RET_ERROR; } diff --git a/mindspore/lite/tools/providers/NNIE/Hi3516D/compile_nnie.sh b/mindspore/lite/tools/providers/NNIE/Hi3516D/compile_nnie.sh index 9aa44d806c2..8d283e47605 100644 --- a/mindspore/lite/tools/providers/NNIE/Hi3516D/compile_nnie.sh +++ b/mindspore/lite/tools/providers/NNIE/Hi3516D/compile_nnie.sh @@ -53,58 +53,6 @@ function Run_Build_x86() { fi } -# Build arm32 for nnie -function Run_Build_arm() { - # decompress release_pkg - cd ${open_source_ms_path}/output/ || exit 1 - file_name=$(ls ./*linux-${package_name}.tar.gz) - IFS="-" read -r -a file_name_array <<< "$file_name" - version=${file_name_array[2]} - tar -xf mindspore-lite-${version}-linux-${package_name}.tar.gz - - # cp runtime folder - cd ${open_source_ms_path}/output/mindspore-lite-${version}-linux-${package_name} || exit 1 - rm -rf ${nnie_code_path}/mindspore/mindspore/lite/tools/benchmark/nnie/third_patry/runtime/ - mkdir -p ${nnie_code_path}/mindspore/mindspore/lite/tools/benchmark/nnie/third_patry/runtime/ || exit 1 - rm -rf ${nnie_code_path}/mindspore/mindspore/lite/tools/benchmark/nnie_proposal/third_patry/runtime/ - mkdir -p ${nnie_code_path}/mindspore/mindspore/lite/tools/benchmark/nnie_proposal/third_patry/runtime/ || exit 1 - cp -r ./runtime/ ${nnie_code_path}/mindspore/mindspore/lite/tools/benchmark/nnie/third_patry/ - cp -r ./runtime/ ${nnie_code_path}/mindspore/mindspore/lite/tools/benchmark/nnie_proposal/third_patry/ - - # compile nnie runtime so - export TOOLCHAIN_NAME=${toolchain_name} - export TOOLCHAIN_FILE=${open_source_ms_path}/mindspore/lite/cmake/${toolchain_name}.toolchain.cmake - export MSLITE_REGISTRY_DEVICE=${device_name} - - # disable gpu & npu & train - export MSLITE_GPU_BACKEND=off - export MSLITE_ENABLE_NPU=off - export MSLITE_ENABLE_TRAIN=off - export MSLITE_ENABLE_NNIE=on - - bash ${nnie_code_path}/mindspore/build.sh -I ${task} -e cpu -j ${thread_num} - if [ $? = 0 ]; then - echo "build arm for nnie success" - release_path=${open_source_ms_path}/output/mindspore-lite-${version}-linux-${package_name}/providers/${device_name}/ - rm -rf ${release_path} - mkdir -p ${release_path} - mkdir -p ${open_source_ms_path}/output/mindspore-lite-${version}-linux-${package_name}/tools/benchmark/ - cp ${nnie_code_path}/mindspore/mindspore/lite/build/tools/benchmark/benchmark ${open_source_ms_path}/output/mindspore-lite-${version}-linux-${package_name}/tools/benchmark/ || exit 1 - cp ${nnie_code_path}/mindspore/mindspore/lite/build/tools/benchmark/nnie/libmslite_nnie.so ${release_path}/ || exit 1 - cp ${nnie_code_path}/mindspore/mindspore/lite/build/tools/benchmark/nnie_proposal/libmslite_proposal.so ${release_path}/ || exit 1 - if [ ${device_name} == "Hi3516D" ]; then - cp ${nnie_code_path}/mindspore/mindspore/lite/micro/example/hi3516d/libmicro_nnie.so ${release_path}/ || exit 1 - fi - echo "cp new nnie so to release pkg success" - cd ${open_source_ms_path}/output/ || exit 1 - rm ${open_source_ms_path}/output/mindspore-lite-${version}-linux-${package_name}.tar.gz - tar -zcf ./mindspore-lite-${version}-linux-${package_name}.tar.gz ./mindspore-lite-${version}-linux-${package_name}/ || exit 1 - sha256sum ./mindspore-lite-${version}-linux-${package_name}.tar.gz > ./mindspore-lite-${version}-linux-${package_name}.tar.gz.sha256 || exit 1 - else - echo "build arm for nnie failed"; return 1 - fi -} - # bashpath should be /home/jenkins/agent-working-dir/workspace/Compile_Lite_ARM32_3516D/ basepath=$(pwd) echo "basepath is ${basepath}" @@ -123,12 +71,8 @@ while getopts "I:b:j:t:d:" opt; do echo "branch name is ${OPTARG}" ;; t) - toolchain_name=${OPTARG} - echo "toolchain_name is ${OPTARG}" ;; d) - device_name=${OPTARG} - echo "device_name is ${OPTARG}" ;; j) thread_num=${OPTARG} @@ -163,14 +107,6 @@ fi if [ ${task} == "x86_64" ]; then echo "start building x86 for nnie..." Run_Build_x86 -elif [ ${task} == "arm32" ]; then - echo "start building arm32 for nnie..." - package_name=aarch32 - Run_Build_arm -elif [ ${task} == "arm64" ]; then - echo "start building arm64 for nnie..." - package_name=aarch64 - Run_Build_arm fi Run_build_PID=$! diff --git a/mindspore/lite/tools/providers/dpico/sd3403/compile_3403.sh b/mindspore/lite/tools/providers/dpico/sd3403/compile_3403.sh deleted file mode 100644 index 5c69722d34a..00000000000 --- a/mindspore/lite/tools/providers/dpico/sd3403/compile_3403.sh +++ /dev/null @@ -1,81 +0,0 @@ -#!/bin/bash -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -prepare_third_party() { - dpico_third_party=${mindspore_lite_top_dir}/tools/benchmark/dpico/third_party - rm -rf ${dpico_third_party} || exit 1 - mkdir -p ${dpico_third_party} || exit 1 - cd ${mindspore_top_dir}/output || exit 1 - file_name=$(ls *tar.gz) - tar_name=${file_name%%.tar.gz} - tar xzvf ${tar_name}.tar.gz || exit 1 - cd .. - cp -rf ${mindspore_top_dir}/output/${tar_name}/runtime/ ${dpico_third_party} || exit 1 -} - -# Build arm64 for dpico -make_dpico_benchmark_package() { - cd ${mindspore_top_dir}/output || exit 1 - file_name=$(ls *tar.gz) - tar_name=${file_name%%.tar.gz} - dpico_sd3403_release_path=${mindspore_top_dir}/output/${tar_name}/providers/SD3403/ - mkdir -p ${dpico_sd3403_release_path} - dpico_benchmark_path=${mindspore_top_dir}/mindspore/lite/build/tools/benchmark - cp ${dpico_benchmark_path}/dpico/libdpico_acl_adapter.so ${dpico_sd3403_release_path} || exit 1 - echo "install dpico adapter so success." - rm ${tar_name}.tar.gz || exit 1 - tar -zcf ${tar_name}.tar.gz ${tar_name} || exit 1 - rm -rf ${tar_name} || exit 1 - sha256sum ${tar_name}.tar.gz > ${tar_name}.tar.gz.sha256 || exit 1 - echo "generate dpico package success!" - cd ${basepath} - rm -rf ${dpico_third_party} || exit 1 -} - -basepath=$(pwd) -echo "basepath is ${basepath}" -#set -e -mindspore_top_dir=${basepath} -mindspore_lite_top_dir=${mindspore_top_dir}/mindspore/lite - -while getopts "t:" opt; do - case ${opt} in - t) - task=${OPTARG} - echo "compile task is ${OPTARG}" - ;; - ?) - echo "unknown para" - exit 1;; - esac -done - -if [[ ${task} == "prepare_third_party" ]]; then - prepare_third_party - if [ $? -eq 1 ]; then - echo "prepare third party failed" - return 1 - fi -else - echo "start make package for dpico..." - make_dpico_benchmark_package & - make_dpico_benchmark_package_pid=$! - sleep 1 - - wait ${make_dpico_benchmark_package_pid} - make_dpico_benchmark_package_status=$? - exit ${make_dpico_benchmark_package_status} -fi