forked from mindspore-Ecosystem/mindspore
add 35xx build
This commit is contained in:
parent
74c8a66ab9
commit
bbfd0dbdce
|
@ -6,6 +6,7 @@
|
|||
mindspore/mindspore/lite/src/ops/primitive_c.cc:mindspore::lite::PrimitiveC::Create
|
||||
mindspore/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/csv_op.cc:mindspore::dataset::CsvOp::CsvParser::InitCsvParser
|
||||
mindspore/mindspore/lite/tools/converter/graphdef_transform.cc:mindspore::lite::GraphDefTransform::Transform
|
||||
mindspore/mindspore/lite/tools/benchmark/nnie_proposal/src/proposal.cc:mindspore::proposal::Rpn
|
||||
mindspore/mindspore/core/abstract/primitive_infer_map.cc:mindspore::abstract::GetPrimitiveToEvalImplMap
|
||||
mindspore/mindspore/ccsrc/frontend/optimizer/irpass.cc:mindspore::opt::irpass::OptimizeIRPassLib::OptimizeIRPassLib
|
||||
mindspore/mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_p_info.cc:mindspore::parallel::GatherV2PInfo::CheckStrategy
|
||||
|
|
|
@ -11,6 +11,7 @@ set(TEST_CASE_DIR ${TOP_DIR}/mindspore/lite/test/build)
|
|||
set(RUNTIME_DIR ${RUNTIME_PKG_NAME}/runtime)
|
||||
set(RUNTIME_INC_DIR ${RUNTIME_PKG_NAME}/runtime/include)
|
||||
set(RUNTIME_LIB_DIR ${RUNTIME_PKG_NAME}/runtime/lib)
|
||||
set(PROVIDERS_LIB_DIR ${RUNTIME_PKG_NAME}/providers)
|
||||
set(MIND_DATA_INC_DIR ${RUNTIME_PKG_NAME}/runtime/include/dataset)
|
||||
set(TURBO_DIR ${RUNTIME_PKG_NAME}/runtime/third_party/libjpeg-turbo)
|
||||
set(GLOG_DIR ${RUNTIME_PKG_NAME}/runtime/third_party/glog)
|
||||
|
@ -18,6 +19,10 @@ set(SECUREC_DIR ${RUNTIME_PKG_NAME}/runtime/third_party/securec)
|
|||
set(MINDSPORE_LITE_LIB_NAME libmindspore-lite)
|
||||
set(MINDSPORE_CORE_LIB_NAME libmindspore_core)
|
||||
set(BENCHMARK_NAME benchmark)
|
||||
set(MSLITE_NNIE_LIB_NAME libmslite_nnie)
|
||||
set(MSLITE_PROPOSAL_LIB_NAME libmslite_proposal)
|
||||
set(MICRO_NNIE_LIB_NAME libmicro_nnie)
|
||||
set(DPICO_ACL_ADAPTER_LIB_NAME libdpico_acl_adapter)
|
||||
set(BENCHMARK_ROOT_DIR ${RUNTIME_PKG_NAME}/tools/benchmark)
|
||||
|
||||
set(MINDSPORE_LITE_TRAIN_LIB_NAME libmindspore-lite-train)
|
||||
|
@ -227,11 +232,31 @@ if(PLATFORM_ARM64)
|
|||
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "ops*" EXCLUDE)
|
||||
install(DIRECTORY ${TOP_DIR}/include/c_api/ DESTINATION ${RUNTIME_INC_DIR}/c_api
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
|
||||
if(NOT MSLITE_ENABLE_DPICO_ACL_ADAPTER)
|
||||
if(NOT TARGET_MIX210)
|
||||
__install_micro_wrapper()
|
||||
endif()
|
||||
if(MSLITE_ENABLE_TOOLS)
|
||||
install(TARGETS ${BENCHMARK_NAME} RUNTIME DESTINATION ${BENCHMARK_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
if(NOT BUILD_FIRST)
|
||||
install(TARGETS ${BENCHMARK_NAME} RUNTIME DESTINATION ${BENCHMARK_ROOT_DIR}
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
if(TARGET_HIMIX)
|
||||
if(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3559A")
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/build/tools/benchmark/nnie/${MSLITE_NNIE_LIB_NAME}.so
|
||||
DESTINATION ${PROVIDERS_LIB_DIR}/${MSLITE_REGISTRY_DEVICE}
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
install(FILES
|
||||
${TOP_DIR}/mindspore/lite/build/tools/benchmark/nnie_proposal/${MSLITE_PROPOSAL_LIB_NAME}.so
|
||||
DESTINATION ${PROVIDERS_LIB_DIR}/${MSLITE_REGISTRY_DEVICE}
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
endif()
|
||||
elseif(TARGET_MIX210)
|
||||
if(${MSLITE_REGISTRY_DEVICE} STREQUAL "SD3403")
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/build/tools/benchmark/dpico/${DPICO_ACL_ADAPTER_LIB_NAME}.so
|
||||
DESTINATION ${PROVIDERS_LIB_DIR}/${MSLITE_REGISTRY_DEVICE}
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
if(SUPPORT_TRAIN)
|
||||
install(TARGETS ${BENCHMARK_TRAIN_NAME} RUNTIME DESTINATION ${BENCHMARK_TRAIN_ROOT_DIR} COMPONENT
|
||||
${RUNTIME_COMPONENT_NAME})
|
||||
|
@ -310,7 +335,27 @@ elseif(PLATFORM_ARM32)
|
|||
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
|
||||
__install_micro_wrapper()
|
||||
if(MSLITE_ENABLE_TOOLS AND NOT TARGET_OHOS_LITE)
|
||||
install(TARGETS ${BENCHMARK_NAME} RUNTIME DESTINATION ${BENCHMARK_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
if(NOT BUILD_FIRST)
|
||||
install(TARGETS ${BENCHMARK_NAME} RUNTIME
|
||||
DESTINATION ${BENCHMARK_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
if(TARGET_HIMIX)
|
||||
if(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3516D" OR ${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3519A")
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/build/tools/benchmark/nnie/${MSLITE_NNIE_LIB_NAME}.so
|
||||
DESTINATION ${PROVIDERS_LIB_DIR}/${MSLITE_REGISTRY_DEVICE}
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
install(FILES
|
||||
${TOP_DIR}/mindspore/lite/build/tools/benchmark/nnie_proposal/${MSLITE_PROPOSAL_LIB_NAME}.so
|
||||
DESTINATION ${PROVIDERS_LIB_DIR}/${MSLITE_REGISTRY_DEVICE}
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
if(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3516D")
|
||||
install(FILES
|
||||
${TOP_DIR}/mindspore/lite/tools/benchmark/nnie/third_patry/${MICRO_NNIE_LIB_NAME}.so
|
||||
DESTINATION ${PROVIDERS_LIB_DIR}/${MSLITE_REGISTRY_DEVICE}
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
if(SUPPORT_TRAIN)
|
||||
install(TARGETS ${BENCHMARK_TRAIN_NAME} RUNTIME DESTINATION ${BENCHMARK_TRAIN_ROOT_DIR} COMPONENT
|
||||
${RUNTIME_COMPONENT_NAME})
|
||||
|
@ -516,7 +561,10 @@ else()
|
|||
__install_micro_codegen()
|
||||
endif()
|
||||
if(MSLITE_ENABLE_TOOLS)
|
||||
install(TARGETS ${BENCHMARK_NAME} RUNTIME DESTINATION ${BENCHMARK_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
if(NOT BUILD_FIRST)
|
||||
install(TARGETS ${BENCHMARK_NAME} RUNTIME DESTINATION ${BENCHMARK_ROOT_DIR}
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
endif()
|
||||
if(SUPPORT_TRAIN)
|
||||
install(TARGETS ${BENCHMARK_TRAIN_NAME} RUNTIME DESTINATION ${BENCHMARK_TRAIN_ROOT_DIR} COMPONENT
|
||||
${RUNTIME_COMPONENT_NAME})
|
||||
|
|
|
@ -35,13 +35,10 @@ if(NOT PLATFORM_ARM32 AND NOT TARGET_HIMIX AND NOT MACHINE_LINUX_ARM64)
|
|||
list(APPEND SDOT_FILES ${SDOT_SRC})
|
||||
add_library(nnacl_optimize_mid OBJECT ${SDOT_FILES})
|
||||
add_dependencies(nnacl_optimize_mid fbs_src)
|
||||
if(NOT TARGET_MIX210)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+dotprod+fp16")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16")
|
||||
endif()
|
||||
|
||||
if(TARGET_MIX210)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+fp16")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+fp16")
|
||||
endif()
|
||||
|
||||
if(MSLITE_ENABLE_FP16)
|
||||
|
|
|
@ -10,7 +10,7 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/compile_link_option.cmake)
|
|||
set(MSLITE_GPU_BACKEND "" CACHE STRING "enable gpu backend, \
|
||||
opencl only support arm64 and x86_64 , tensorrt only support x86_64, opencl/cuda/tensorrt/off")
|
||||
set(MSLITE_REGISTRY_DEVICE "off" CACHE STRING "Compile Mindspore Lite that supports specific devices, \
|
||||
currently supported devices: Hi3516D/Hi3519A/Hi3559A/sd3403")
|
||||
currently supported devices: Hi3516D/Hi3519A/Hi3559A/SD3403")
|
||||
option(MSLITE_ENABLE_NPU "enable npu, only arm64 or arm32 support" off)
|
||||
option(MSLITE_ENABLE_TRAIN "enable train" on)
|
||||
option(MSLITE_ENABLE_SSE "enable SSE instruction set, only x86_64 support" off)
|
||||
|
@ -53,13 +53,6 @@ if(DEFINED ENV{MSLITE_GPU_BACKEND})
|
|||
endif()
|
||||
if(DEFINED ENV{MSLITE_REGISTRY_DEVICE})
|
||||
set(MSLITE_REGISTRY_DEVICE $ENV{MSLITE_REGISTRY_DEVICE})
|
||||
if(MSLITE_REGISTRY_DEVICE STREQUAL sd3403)
|
||||
if(NOT PLATFORM_ARM64)
|
||||
set(MSLITE_ENABLE_DPICO_ATC_ADAPTER on)
|
||||
else()
|
||||
set(MSLITE_ENABLE_DPICO_ACL_ADAPTER on)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
if(DEFINED ENV{MSLITE_ENABLE_NPU})
|
||||
set(MSLITE_ENABLE_NPU $ENV{MSLITE_ENABLE_NPU})
|
||||
|
@ -190,6 +183,9 @@ elseif(PLATFORM_ARM32)
|
|||
elseif(WIN32)
|
||||
set(MSLITE_GPU_BACKEND "off")
|
||||
else()
|
||||
if(${MSLITE_REGISTRY_DEVICE} STREQUAL "SD3403")
|
||||
set(MSLITE_ENABLE_DPICO_ATC_ADAPTER on)
|
||||
endif()
|
||||
if(MSLITE_GPU_BACKEND STREQUAL "")
|
||||
set(MSLITE_GPU_BACKEND "off")
|
||||
endif()
|
||||
|
@ -379,10 +375,6 @@ else()
|
|||
set(RUNTIME_COMPONENT_NAME "linux-x64")
|
||||
endif()
|
||||
|
||||
if(MSLITE_ENABLE_DPICO_ACL_ADAPTER)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark/dpico)
|
||||
endif()
|
||||
|
||||
string(REPLACE "/mindspore/lite" "" TOP_DIR ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
set(CORE_DIR ${TOP_DIR}/mindspore/core)
|
||||
set(CCSRC_DIR ${TOP_DIR}/mindspore/ccsrc)
|
||||
|
@ -567,16 +559,16 @@ if(BUILD_MINDDATA STREQUAL "lite_cv")
|
|||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/minddata)
|
||||
endif()
|
||||
|
||||
if(NOT MSLITE_ENABLE_DPICO_ACL_ADAPTER)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src/ops)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/micro/coder)
|
||||
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src)
|
||||
add_subdirectory(${CCSRC_DIR}/backend/kernel_compiler/cpu/nnacl build)
|
||||
endif()
|
||||
|
||||
|
||||
if(MSLITE_ENABLE_TOOLS)
|
||||
if(NOT BUILD_FIRST)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark)
|
||||
endif()
|
||||
if(SUPPORT_TRAIN)
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark_train)
|
||||
endif()
|
||||
|
|
|
@ -25,6 +25,15 @@ checkndk() {
|
|||
fi
|
||||
}
|
||||
|
||||
check_Hi35xx() {
|
||||
if [[ "X${HI35XX_SDK_PATH}" == "X" ]]; then
|
||||
echo "error: to compile the runtime package of Hi35XX, you need to set HI35XX_SDK_PATH to declare the path of Hi35XX sdk."
|
||||
exit 1
|
||||
else
|
||||
cp -r ${HI35XX_SDK_PATH}/third_patry ${BASEPATH}/mindspore/lite/tools/benchmark/nnie/
|
||||
fi
|
||||
}
|
||||
|
||||
get_version() {
|
||||
VERSION_MAJOR=$(grep "const int ms_version_major =" ${BASEPATH}/mindspore/lite/include/version.h | tr -dc "[0-9]")
|
||||
VERSION_MINOR=$(grep "const int ms_version_minor =" ${BASEPATH}/mindspore/lite/include/version.h | tr -dc "[0-9]")
|
||||
|
@ -142,16 +151,19 @@ build_lite() {
|
|||
CMAKE_TOOLCHAIN_FILE=${BASEPATH}/cmake/lite_ios.cmake
|
||||
fi
|
||||
|
||||
BRANCH_NAME=nnie_3516_master_dev
|
||||
BRANCH_NAME=nnie_3516_master
|
||||
if [[ ("${MSLITE_REGISTRY_DEVICE}" == "Hi3516D" || "${TOOLCHAIN_NAME}" == "himix200") && "${local_lite_platform}" == "arm32" ]]; then
|
||||
TOOLCHAIN_NAME="himix200"
|
||||
MSLITE_REGISTRY_DEVICE=Hi3516D
|
||||
check_Hi35xx
|
||||
elif [[ "${MSLITE_REGISTRY_DEVICE}" == "Hi3559A" && "${local_lite_platform}" == "arm64" ]]; then
|
||||
TOOLCHAIN_NAME="himix100"
|
||||
elif [[ "${MSLITE_REGISTRY_DEVICE}" == "sd3403" && "${local_lite_platform}" == "arm64" ]]; then
|
||||
check_Hi35xx
|
||||
elif [[ "${MSLITE_REGISTRY_DEVICE}" == "SD3403" && "${local_lite_platform}" == "arm64" ]]; then
|
||||
TOOLCHAIN_NAME="mix210"
|
||||
elif [[ "${MSLITE_REGISTRY_DEVICE}" == "Hi3519A" && "${local_lite_platform}" == "arm32" ]]; then
|
||||
TOOLCHAIN_NAME="himix200"
|
||||
check_Hi35xx
|
||||
elif [[ ("${MSLITE_ENABLE_NNIE}" == "on" || "${MSLITE_REGISTRY_DEVICE}" == "Hi3516D") && "${local_lite_platform}" == "x86_64" ]]; then
|
||||
MSLITE_REGISTRY_DEVICE=Hi3516D
|
||||
fi
|
||||
|
@ -190,13 +202,11 @@ build_lite() {
|
|||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DTOOLCHAIN_NAME=himix100"
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DBUILD_MINDDATA=off"
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_ENABLE_FP16=off -DMSLITE_ENABLE_TRAIN=off -DMSLITE_GPU_BACKEND=off"
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_ENABLE_TOOLS=off"
|
||||
elif [[ "${TOOLCHAIN_NAME}" == "mix210" ]]; then
|
||||
CMAKE_TOOLCHAIN_FILE=${BASEPATH}/mindspore/lite/cmake/mix210.toolchain.cmake
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DTOOLCHAIN_NAME=mix210"
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DBUILD_MINDDATA=off"
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_ENABLE_FP16=off -DMSLITE_ENABLE_TRAIN=off -DMSLITE_GPU_BACKEND=off"
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_ENABLE_TOOLS=off"
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_ENABLE_FP16=on -DMSLITE_ENABLE_TRAIN=off -DMSLITE_GPU_BACKEND=off"
|
||||
else
|
||||
if [[ "${machine}" == "aarch64" ]]; then
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMACHINE_LINUX_ARM64=on"
|
||||
|
@ -228,26 +238,25 @@ build_lite() {
|
|||
if [[ "X$CMAKE_TOOLCHAIN_FILE" != "X" ]]; then
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}"
|
||||
fi
|
||||
if [[ "X$MSLITE_REGISTRY_DEVICE" != "X" ]] && [[ "${MSLITE_REGISTRY_DEVICE}" != "sd3403" ]]; then
|
||||
if [[ "X$MSLITE_REGISTRY_DEVICE" != "X" ]]; then
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_REGISTRY_DEVICE=${MSLITE_REGISTRY_DEVICE}"
|
||||
fi
|
||||
if [[ "${local_lite_platform}" == "arm64" || "${local_lite_platform}" == "arm32" ]]; then
|
||||
echo "default link libc++_static.a, export MSLITE_ANDROID_STL=c++_shared to link libc++_shared.so"
|
||||
fi
|
||||
echo "cmake ${LITE_CMAKE_ARGS} ${BASEPATH}/mindspore/lite"
|
||||
if [[ "${MSLITE_REGISTRY_DEVICE}" == "sd3403" ]] && [[ "${local_lite_platform}" == "arm64" ]]; then
|
||||
export MSLITE_REGISTRY_DEVICE=""
|
||||
cmake ${LITE_CMAKE_ARGS} "${BASEPATH}/mindspore/lite"
|
||||
export MSLITE_REGISTRY_DEVICE=sd3403
|
||||
else
|
||||
cmake ${LITE_CMAKE_ARGS} "${BASEPATH}/mindspore/lite"
|
||||
fi
|
||||
|
||||
echo "cmake ${LITE_CMAKE_ARGS} -DBUILD_FIRST=ON ${BASEPATH}/mindspore/lite"
|
||||
cmake ${LITE_CMAKE_ARGS} -DBUILD_FIRST=ON "${BASEPATH}/mindspore/lite"
|
||||
|
||||
if [[ "$(uname)" == "Darwin" && "${local_lite_platform}" != "x86_64" ]]; then
|
||||
xcodebuild ONLY_ACTIVE_ARCH=NO -configuration Release -scheme mindspore-lite_static -target mindspore-lite_static -sdk iphoneos -quiet
|
||||
elif [[ "$(uname)" == "Darwin" && "${local_lite_platform}" == "x86_64" ]]; then
|
||||
xcodebuild ONLY_ACTIVE_ARCH=NO -configuration Release -scheme mindspore-lite_static -target mindspore-lite_static -sdk iphonesimulator -quiet
|
||||
else
|
||||
make -j$THREAD_NUM && make install
|
||||
cp -r ${BASEPATH}/output/tmp/mindspore*/runtime ${BASEPATH}/mindspore/lite/tools/benchmark
|
||||
cmake ${LITE_CMAKE_ARGS} -DBUILD_FIRST=off --target benchmark "${BASEPATH}/mindspore/lite"
|
||||
|
||||
make -j$THREAD_NUM && make install && make package
|
||||
if [[ "${local_lite_platform}" == "x86_64" ]]; then
|
||||
if [ "${JAVA_HOME}" ]; then
|
||||
|
@ -288,37 +297,16 @@ build_lite() {
|
|||
fi
|
||||
|
||||
[ -n "${BASEPATH}" ] && rm -rf ${BASEPATH}/output/tmp/
|
||||
if [[ "X$MSLITE_REGISTRY_DEVICE" != "X" ]] && [[ "${MSLITE_REGISTRY_DEVICE}" != "sd3403" ]]; then
|
||||
if [[ "X$MSLITE_REGISTRY_DEVICE" != "X" ]] && [[ "${MSLITE_REGISTRY_DEVICE}" != "SD3403" ]]; then
|
||||
local compile_nnie_script=${BASEPATH}/mindspore/lite/tools/providers/NNIE/Hi3516D/compile_nnie.sh
|
||||
cd ${BASEPATH}/../
|
||||
if [[ "${local_lite_platform}" == "x86_64" ]]; then
|
||||
bash ${compile_nnie_script} -I ${local_lite_platform} -b ${BRANCH_NAME} -j $THREAD_NUM
|
||||
else
|
||||
bash ${compile_nnie_script} -I ${local_lite_platform} -b ${BRANCH_NAME} -t ${TOOLCHAIN_NAME} -d ${MSLITE_REGISTRY_DEVICE} -j $THREAD_NUM
|
||||
fi
|
||||
if [[ $? -ne 0 ]]; then
|
||||
echo "compile ${local_lite_platform} for nnie failed."
|
||||
exit 1
|
||||
fi
|
||||
elif [[ "${MSLITE_REGISTRY_DEVICE}" == "sd3403" ]] && [[ "${local_lite_platform}" == "arm64" ]]; then
|
||||
LITE_CMAKE_ARGS=$(echo ${LITE_CMAKE_ARGS} | sed -e "s/MSLITE_ENABLE_TOOLS=off/MSLITE_ENABLE_TOOLS=on/g")
|
||||
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_REGISTRY_DEVICE=${MSLITE_REGISTRY_DEVICE}"
|
||||
cmake ${LITE_CMAKE_ARGS} "${BASEPATH}/mindspore/lite"
|
||||
cd ${BASEPATH}
|
||||
compile_dpico_script=${BASEPATH}/mindspore/lite/tools/providers/dpico/sd3403/compile_3403.sh
|
||||
bash ${compile_dpico_script} -t prepare_third_party
|
||||
if [[ $? -ne 0 ]]; then
|
||||
echo "prepare for dpico failed."
|
||||
exit 1
|
||||
fi
|
||||
cd ${BASEPATH}/mindspore/lite/build
|
||||
make -j$THREAD_NUM
|
||||
cd ${BASEPATH}
|
||||
sh ${compile_dpico_script}
|
||||
if [[ $? -ne 0 ]]; then
|
||||
echo "second compile arm64 for dpico failed."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
echo "---------------- mindspore lite: build success ----------------"
|
||||
fi
|
||||
|
|
|
@ -21,6 +21,8 @@ else()
|
|||
if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
|
||||
string(REPLACE "-O2" "-O0" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
|
||||
string(REPLACE "-O2" "-O0" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
string(REPLACE "-D_FORTIFY_SOURCE=2" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
|
||||
string(REPLACE "-D_FORTIFY_SOURCE=2" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
endif()
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${SECURE_SHARED_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS}")
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${SECURE_EXE_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS}")
|
||||
|
|
|
@ -3,6 +3,9 @@ function(merge_parser CL_SRC_DIR OUT_FILE_NAME)
|
|||
if(NOT EXISTS ${CL_SRC_DIR})
|
||||
return()
|
||||
endif()
|
||||
if(DEFINED BUILD_FIRST AND NOT BUILD_FIRST)
|
||||
return()
|
||||
endif()
|
||||
file(GLOB_RECURSE CL_LIST ${CL_SRC_DIR}/*.cc)
|
||||
list(SORT CL_LIST)
|
||||
set(out_file ${OUT_FILE_NAME})
|
||||
|
|
|
@ -19,6 +19,9 @@ set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
|||
|
||||
#set(CMAKE_CXX_FLAGS "-march= -mfloat-abi=softfp -mfpu=neon-vfpv4 ${CMAKE_CXX_FLAGS}")
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+fp16")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+fp16")
|
||||
|
||||
# cache flags
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags")
|
||||
|
|
|
@ -1,20 +0,0 @@
|
|||
set(CMSIS_DIR ${CMAKE_BINARY_DIR}/cmsis)
|
||||
message("build cmsis kernels")
|
||||
include_directories(${CMSIS_DIR}/CMSIS/Core/Include)
|
||||
include_directories(${CMSIS_DIR}/CMSIS/DSP/Include)
|
||||
include_directories(${CMSIS_DIR}/CMSIS/NN/Include)
|
||||
|
||||
file(REMOVE ${CMSIS_DIR}/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c)
|
||||
|
||||
file(GLOB CMSIS_OPS
|
||||
${CMSIS_DIR}/CMSIS/NN/Source/BasicMathFunctions/*.c
|
||||
${CMSIS_DIR}/CMSIS/NN/Source/ActivationFunctions/*.c
|
||||
${CMSIS_DIR}/CMSIS/NN/Source/ConcatenationFunctions/*.c
|
||||
${CMSIS_DIR}/CMSIS/NN/Source/ConvolutionFunctions/*.c
|
||||
${CMSIS_DIR}/CMSIS/NN/Source/FullyConnectedFunctions/*.c
|
||||
${CMSIS_DIR}/CMSIS/NN/Source/NNSupportFunctions/*.c
|
||||
${CMSIS_DIR}/CMSIS/NN/Source/PoolingFunctions/*.c
|
||||
${CMSIS_DIR}/CMSIS/NN/Source/ReshapeFunctions/*.c
|
||||
${CMSIS_DIR}/CMSIS/NN/Source/SoftmaxFunctions/*.c
|
||||
)
|
||||
|
|
@ -16,7 +16,24 @@ if(PLATFORM_ARM64)
|
|||
elseif(PLATFORM_ARM32)
|
||||
add_compile_definitions(ENABLE_ARM32)
|
||||
else()
|
||||
include(${MICRO_DIR}/cmake/package_cmsis.cmake)
|
||||
set(CMSIS_DIR ${CMAKE_BINARY_DIR}/cmsis)
|
||||
message("build cmsis kernels")
|
||||
include_directories(${CMSIS_DIR}/CMSIS/Core/Include)
|
||||
include_directories(${CMSIS_DIR}/CMSIS/DSP/Include)
|
||||
include_directories(${CMSIS_DIR}/CMSIS/NN/Include)
|
||||
|
||||
file(REMOVE ${CMSIS_DIR}/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c)
|
||||
file(GLOB CMSIS_OPS
|
||||
${CMSIS_DIR}/CMSIS/NN/Source/BasicMathFunctions/*.c
|
||||
${CMSIS_DIR}/CMSIS/NN/Source/ActivationFunctions/*.c
|
||||
${CMSIS_DIR}/CMSIS/NN/Source/ConcatenationFunctions/*.c
|
||||
${CMSIS_DIR}/CMSIS/NN/Source/ConvolutionFunctions/*.c
|
||||
${CMSIS_DIR}/CMSIS/NN/Source/FullyConnectedFunctions/*.c
|
||||
${CMSIS_DIR}/CMSIS/NN/Source/NNSupportFunctions/*.c
|
||||
${CMSIS_DIR}/CMSIS/NN/Source/PoolingFunctions/*.c
|
||||
${CMSIS_DIR}/CMSIS/NN/Source/ReshapeFunctions/*.c
|
||||
${CMSIS_DIR}/CMSIS/NN/Source/SoftmaxFunctions/*.c
|
||||
)
|
||||
add_library(cmsis_nn STATIC ${CMSIS_OPS})
|
||||
endif()
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
# Build x86 tar.gz file for dpico
|
||||
function Run_Build_x86() {
|
||||
export MSLITE_REGISTRY_DEVICE=sd3403
|
||||
export MSLITE_REGISTRY_DEVICE=SD3403
|
||||
unset JAVA_HOME
|
||||
bash ${mindspore_top_dir}/build.sh -I x86_64 -j 80
|
||||
if [ $? = 0 ]; then
|
||||
|
@ -19,7 +19,7 @@ function Run_Build_x86() {
|
|||
|
||||
# Build arm32 tar.gz file for dpico
|
||||
function Run_Build_arm64() {
|
||||
export MSLITE_REGISTRY_DEVICE=sd3403
|
||||
export MSLITE_REGISTRY_DEVICE=SD3403
|
||||
unset JAVA_HOME
|
||||
bash ${mindspore_top_dir}/build.sh -I arm64 -j 80
|
||||
if [ $? = 0 ]; then
|
||||
|
|
|
@ -1,4 +1,48 @@
|
|||
# add shared link library
|
||||
cmake_minimum_required(VERSION 3.14)
|
||||
project(Lite_benchmark)
|
||||
|
||||
set(BENCHMARK_LINK_LIB mindspore-lite)
|
||||
if(TARGET_HIMIX)
|
||||
add_subdirectory(nnie)
|
||||
add_subdirectory(nnie_proposal)
|
||||
set(CMAKE_SKIP_BUILD_RPATH on)
|
||||
set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} pthread
|
||||
mslite_proposal mslite_nnie dl nnie mpi VoiceEngine upvqe dnvqe securec)
|
||||
if(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3516D")
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/nnie/third_patry/hi3516_sdk)
|
||||
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/nnie/third_patry/hi3516_sdk/lib)
|
||||
set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} mindspore::json)
|
||||
elseif(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3519A")
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/nnie/third_patry/hi3519_sdk)
|
||||
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/nnie/third_patry/hi3519_sdk/lib)
|
||||
set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} mindspore::json)
|
||||
elseif(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3559A")
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/nnie/third_patry/hi3559_sdk)
|
||||
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/nnie/third_patry/hi3559_sdk/lib)
|
||||
add_compile_definitions(BENCHMARK_CLIP_JSON)
|
||||
endif()
|
||||
elseif(TARGET_MIX210)
|
||||
set(CMAKE_SKIP_BUILD_RPATH on)
|
||||
set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} mindspore::json pthread
|
||||
dpico_acl_adapter svp_acl dl securec protobuf-c stdc++)
|
||||
if(${MSLITE_REGISTRY_DEVICE} STREQUAL "SD3403")
|
||||
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/dpico)
|
||||
message("34xx_sdk_SOURCE_DIR:${34xx_sdk_SOURCE_DIR}.")
|
||||
include_directories(${34xx_sdk_SOURCE_DIR}/include)
|
||||
include_directories(${34xx_sdk_SOURCE_DIR})
|
||||
link_directories(${34xx_sdk_SOURCE_DIR}/lib)
|
||||
endif()
|
||||
else()
|
||||
set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} mindspore::json)
|
||||
if(PLATFORM_ARM32 OR PLATFORM_ARM64 AND NOT TARGET_OHOS_LITE AND NOT MACHINE_LINUX_ARM64)
|
||||
if(SUPPORT_NPU AND ANDROID_STL STREQUAL "c++_static")
|
||||
set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} c++_shared)
|
||||
endif()
|
||||
elseif(NOT MSVC)
|
||||
set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} pthread)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
include_directories(${CCSRC_DIR}/backend/kernel_compiler/cpu)
|
||||
set(COMMON_SRC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../common/flag_parser.cc
|
||||
|
@ -7,56 +51,23 @@ set(COMMON_SRC
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/utils.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../../ccsrc/backend/kernel_compiler/cpu/nnacl/nnacl_common.c
|
||||
)
|
||||
if(NOT MSLITE_ENABLE_DPICO_ACL_ADAPTER)
|
||||
|
||||
if(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL)
|
||||
set(COMMON_SRC ${COMMON_SRC} ../common/opengl_util.cc)
|
||||
endif()
|
||||
|
||||
add_executable(benchmark
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/main.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/run_benchmark.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/benchmark_base.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/benchmark.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/benchmark_unified_api.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/benchmark_c_api.cc
|
||||
${COMMON_SRC})
|
||||
|
||||
add_dependencies(benchmark fbs_src)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../../lite)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../../core)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/runtime)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/runtime/include)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/runtime/include/third_party)
|
||||
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/runtime/lib)
|
||||
|
||||
if(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL)
|
||||
list(APPEND opengl_lib EGL GLESv3)
|
||||
target_link_libraries(benchmark ${opengl_lib})
|
||||
set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} ${opengl_lib})
|
||||
endif()
|
||||
|
||||
if((PLATFORM_ARM32 OR PLATFORM_ARM64) AND NOT TARGET_HIMIX
|
||||
AND NOT TARGET_OHOS_LITE AND NOT MACHINE_LINUX_ARM64 AND NOT TARGET_MIX210)
|
||||
if(SUPPORT_NPU AND ANDROID_STL STREQUAL "c++_static")
|
||||
target_link_libraries(benchmark mindspore-lite mindspore::json c++_shared)
|
||||
else()
|
||||
target_link_libraries(benchmark mindspore-lite mindspore::json)
|
||||
endif()
|
||||
elseif(MSVC)
|
||||
target_link_libraries(benchmark mindspore-lite mindspore::json)
|
||||
else()
|
||||
target_link_libraries(benchmark mindspore-lite mindspore::json pthread)
|
||||
endif()
|
||||
else()
|
||||
__download_pkg(34xx_sdk
|
||||
http://mindspore-repo.csi.rnd.huawei.com/mindspore/enterprise/dpico/34xx_sdk.tar.gz
|
||||
f64a9129615b3b41b63debe17c6785af)
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../../lite)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../../core)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/dpico/third_party/runtime)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/dpico/third_party/runtime/include)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/dpico/third_party/runtime/include/third_party)
|
||||
|
||||
include_directories(${34xx_sdk_SOURCE_DIR}/include)
|
||||
include_directories(${34xx_sdk_SOURCE_DIR})
|
||||
link_directories(${34xx_sdk_SOURCE_DIR}/lib)
|
||||
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/dpico/third_party/runtime/lib)
|
||||
set(CMAKE_SKIP_BUILD_RPATH on)
|
||||
|
||||
add_executable(benchmark
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/main.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/run_benchmark.cc
|
||||
|
@ -68,6 +79,4 @@ else()
|
|||
|
||||
add_dependencies(benchmark fbs_src)
|
||||
|
||||
target_link_libraries(benchmark mindspore-lite mindspore::json pthread
|
||||
dpico_acl_adapter dl svp_acl securec protobuf-c stdc++)
|
||||
endif()
|
||||
target_link_libraries(benchmark ${BENCHMARK_LINK_LIB})
|
||||
|
|
|
@ -35,6 +35,12 @@
|
|||
#include <asm/unistd.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef SUPPORT_NNIE
|
||||
#include "include/hi_common.h"
|
||||
#include "include/hi_comm_vb.h"
|
||||
#include "include/mpi_sys.h"
|
||||
#include "include/mpi_vb.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
|
@ -344,18 +350,48 @@ int Benchmark::InitContext(const std::shared_ptr<Context> &context) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
tensor::MSTensor *Benchmark::GetTensorByNodeShape(const std::vector<size_t> &node_shape) {
|
||||
std::vector<tensor::MSTensor *> match_tensors;
|
||||
std::vector<int> shape_vector;
|
||||
(void)std::transform(node_shape.begin(), node_shape.end(), std::back_inserter(shape_vector),
|
||||
[](const size_t &value) { return static_cast<int>(value); });
|
||||
auto tensors = session_->GetOutputs();
|
||||
for (auto &out_tensor_pair : tensors) {
|
||||
if (out_tensor_pair.second->shape() == shape_vector) {
|
||||
match_tensors.emplace_back(out_tensor_pair.second);
|
||||
}
|
||||
}
|
||||
if (match_tensors.empty() || match_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "get tensor by node shape failed";
|
||||
return nullptr;
|
||||
}
|
||||
return match_tensors.front();
|
||||
}
|
||||
|
||||
tensor::MSTensor *Benchmark::GetTensorByNameOrShape(const std::string &node_or_tensor_name,
|
||||
const std::vector<size_t> &dims) {
|
||||
tensor::MSTensor *tensor = session_->GetOutputByTensorName(node_or_tensor_name);
|
||||
if (tensor == nullptr) {
|
||||
MS_LOG(INFO) << "Cannot find output node: " << node_or_tensor_name
|
||||
<< " or node has more than one output tensor, switch to GetOutputByTensorName";
|
||||
auto tensors = session_->GetOutputsByNodeName(node_or_tensor_name);
|
||||
if (!tensors.empty() && tensors.size() == 1) {
|
||||
tensor = tensors.front();
|
||||
} else {
|
||||
return GetTensorByNodeShape(dims);
|
||||
}
|
||||
}
|
||||
return tensor;
|
||||
}
|
||||
|
||||
int Benchmark::CompareOutput() {
|
||||
std::cout << "================ Comparing Output data ================" << std::endl;
|
||||
float total_bias = 0;
|
||||
int total_size = 0;
|
||||
// check the output tensor name.
|
||||
if (this->benchmark_tensor_names_ != session_->GetOutputTensorNames()) {
|
||||
MS_LOG(ERROR) << "The output tensor name is wrong.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
for (const auto &calib_tensor : benchmark_data_) {
|
||||
std::string tensor_name = calib_tensor.first;
|
||||
tensor::MSTensor *tensor = session_->GetOutputByTensorName(tensor_name);
|
||||
tensor::MSTensor *tensor = GetTensorByNameOrShape(tensor_name, calib_tensor.second->shape);
|
||||
if (tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "Get tensor failed, tensor name: " << tensor_name;
|
||||
return RET_ERROR;
|
||||
|
@ -940,7 +976,7 @@ std::string DumpMSTensor(tensor::MSTensor *tensor) {
|
|||
}
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
#ifndef BENCHMARK_CLIP_JSON
|
||||
std::string GenerateOutputFileName(tensor::MSTensor *tensor, const std::string &op_name, const std::string &file_type,
|
||||
const size_t &idx) {
|
||||
std::string file_name = op_name;
|
||||
|
@ -962,6 +998,7 @@ std::string GenerateOutputFileName(tensor::MSTensor *tensor, const std::string &
|
|||
}
|
||||
return file_name;
|
||||
}
|
||||
#endif
|
||||
} // namespace
|
||||
|
||||
int Benchmark::InitPrintTensorDataCallbackParameter() {
|
||||
|
@ -990,6 +1027,7 @@ int Benchmark::InitPrintTensorDataCallbackParameter() {
|
|||
return RET_OK;
|
||||
}
|
||||
int Benchmark::InitDumpTensorDataCallbackParameter() {
|
||||
#ifndef BENCHMARK_CLIP_JSON
|
||||
// before callback
|
||||
before_call_back_ = [&](const std::vector<mindspore::tensor::MSTensor *> &before_inputs,
|
||||
const std::vector<mindspore::tensor::MSTensor *> &before_outputs,
|
||||
|
@ -1035,6 +1073,7 @@ int Benchmark::InitDumpTensorDataCallbackParameter() {
|
|||
}
|
||||
return true;
|
||||
};
|
||||
#endif
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -29,7 +29,9 @@
|
|||
#include <memory>
|
||||
#include <cfloat>
|
||||
#include <utility>
|
||||
#ifndef BENCHMARK_CLIP_JSON
|
||||
#include <nlohmann/json.hpp>
|
||||
#endif
|
||||
#include "tools/benchmark/benchmark_base.h"
|
||||
#include "include/model.h"
|
||||
#include "tools/common/flag_parser.h"
|
||||
|
@ -96,6 +98,8 @@ class MS_API Benchmark : public BenchmarkBase {
|
|||
|
||||
int CompareDataGetTotalCosineDistanceAndSize(const std::string &name, tensor::MSTensor *tensor,
|
||||
float *total_cosine_distance, int *total_size);
|
||||
tensor::MSTensor *GetTensorByNodeShape(const std::vector<size_t> &node_shape);
|
||||
tensor::MSTensor *GetTensorByNameOrShape(const std::string &node_or_tensor_name, const std::vector<size_t> &dims);
|
||||
|
||||
private:
|
||||
#ifdef ENABLE_OPENGL_TEXTURE
|
||||
|
|
|
@ -34,6 +34,12 @@
|
|||
#include <asm/unistd.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef SUPPORT_NNIE
|
||||
#include "include/hi_common.h"
|
||||
#include "include/hi_comm_vb.h"
|
||||
#include "include/mpi_sys.h"
|
||||
#include "include/mpi_vb.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
|
@ -57,6 +63,10 @@ constexpr int16_t kInputDataInt8Min = -127;
|
|||
constexpr int16_t kInputDataInt8Max = 127;
|
||||
constexpr int16_t kInputDataUint8Min = 0;
|
||||
constexpr int16_t kInputDataUint8Max = 254;
|
||||
#ifdef SUPPORT_NNIE
|
||||
constexpr int kNNIEMaxPoolCnt = 2;
|
||||
constexpr int kNNIEBlkSize = 768 * 576 * 2;
|
||||
#endif
|
||||
|
||||
const std::unordered_map<int, std::string> kTypeIdMap{
|
||||
{kNumberTypeFloat16, "Float16"}, {kNumberTypeFloat, "Float32"}, {kNumberTypeFloat32, "Float32"},
|
||||
|
@ -294,6 +304,7 @@ int BenchmarkBase::CheckDeviceTypeValid() {
|
|||
}
|
||||
|
||||
int BenchmarkBase::InitDumpConfigFromJson(char *path) {
|
||||
#ifndef BENCHMARK_CLIP_JSON
|
||||
auto real_path = RealPath(path);
|
||||
std::ifstream ifs(real_path);
|
||||
if (!ifs.good()) {
|
||||
|
@ -354,7 +365,7 @@ int BenchmarkBase::InitDumpConfigFromJson(char *path) {
|
|||
MS_LOG(ERROR) << "create data output directory failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
#endif
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -623,6 +634,72 @@ int BenchmarkBase::PrintPerfResult(const std::vector<std::string> &title,
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef SUPPORT_NNIE
|
||||
int SvpSysInit() {
|
||||
HI_S32 ret = HI_SUCCESS;
|
||||
VB_CONFIG_S struVbConf;
|
||||
ret = HI_MPI_SYS_Exit();
|
||||
if (HI_SUCCESS != ret) {
|
||||
MS_LOG(ERROR) << "HI_MPI_SYS_Exit failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
ret = HI_MPI_VB_Exit();
|
||||
if (HI_SUCCESS != ret) {
|
||||
MS_LOG(WARNING) << "HI_MPI_VB_Exit failed!";
|
||||
ret = HI_MPI_SYS_Init();
|
||||
if (HI_SUCCESS != ret) {
|
||||
MS_LOG(ERROR) << "Error:HI_MPI_SYS_Init failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
memset(&struVbConf, 0, sizeof(VB_CONFIG_S));
|
||||
struVbConf.u32MaxPoolCnt = kNNIEMaxPoolCnt;
|
||||
struVbConf.astCommPool[1].u64BlkSize = kNNIEBlkSize;
|
||||
struVbConf.astCommPool[1].u32BlkCnt = 1;
|
||||
|
||||
ret = HI_MPI_VB_SetConfig((const VB_CONFIG_S *)&struVbConf);
|
||||
if (HI_SUCCESS != ret) {
|
||||
MS_LOG(ERROR) << "Error:HI_MPI_VB_SetConf failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
ret = HI_MPI_VB_Init();
|
||||
if (HI_SUCCESS != ret) {
|
||||
MS_LOG(ERROR) << "Error:HI_MPI_VB_Init failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
ret = HI_MPI_SYS_Init();
|
||||
if (HI_SUCCESS != ret) {
|
||||
MS_LOG(ERROR) << "Error:HI_MPI_SYS_Init failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int SvpSysExit() {
|
||||
HI_S32 ret = HI_SUCCESS;
|
||||
|
||||
ret = HI_MPI_SYS_Exit();
|
||||
if (HI_SUCCESS != ret) {
|
||||
MS_LOG(ERROR) << "HI_MPI_SYS_Exit failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
ret = HI_MPI_VB_Exit();
|
||||
if (HI_SUCCESS != ret) {
|
||||
MS_LOG(WARNING) << "HI_MPI_VB_Exit failed!";
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
BenchmarkBase::~BenchmarkBase() {
|
||||
for (auto &iter : this->benchmark_data_) {
|
||||
iter.second->shape.clear();
|
||||
|
@ -631,6 +708,9 @@ BenchmarkBase::~BenchmarkBase() {
|
|||
iter.second = nullptr;
|
||||
}
|
||||
this->benchmark_data_.clear();
|
||||
#ifdef SUPPORT_NNIE
|
||||
SvpSysExit();
|
||||
#endif
|
||||
}
|
||||
} // namespace lite
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -29,7 +29,9 @@
|
|||
#include <memory>
|
||||
#include <cfloat>
|
||||
#include <utility>
|
||||
#ifndef BENCHMARK_CLIP_JSON
|
||||
#include <nlohmann/json.hpp>
|
||||
#endif
|
||||
#include "include/model.h"
|
||||
#include "include/api/types.h"
|
||||
#include "include/api/format.h"
|
||||
|
@ -419,9 +421,10 @@ class MS_API BenchmarkBase {
|
|||
float op_cost_total_ = 0.0f;
|
||||
std::map<std::string, std::pair<int, float>> op_times_by_type_;
|
||||
std::map<std::string, std::pair<int, float>> op_times_by_name_;
|
||||
|
||||
#ifndef BENCHMARK_CLIP_JSON
|
||||
// dump data
|
||||
nlohmann::json dump_cfg_json_;
|
||||
#endif
|
||||
std::string dump_file_output_dir_;
|
||||
#ifdef ENABLE_ARM64
|
||||
int perf_fd = 0;
|
||||
|
@ -432,6 +435,10 @@ class MS_API BenchmarkBase {
|
|||
#endif
|
||||
std::mt19937 random_engine_;
|
||||
};
|
||||
#ifdef SUPPORT_NNIE
|
||||
int SvpSysInit();
|
||||
int SvpSysExit();
|
||||
#endif
|
||||
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINNIE_BENCHMARK_BENCHMARK_BASE_H_
|
||||
|
|
|
@ -36,6 +36,12 @@
|
|||
#include <asm/unistd.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef SUPPORT_NNIE
|
||||
#include "include/hi_common.h"
|
||||
#include "include/hi_comm_vb.h"
|
||||
#include "include/mpi_sys.h"
|
||||
#include "include/mpi_vb.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
constexpr size_t kDataToStringMaxNum = 40;
|
||||
|
@ -1081,7 +1087,7 @@ std::string DumpMSTensor(mindspore::MSTensor *tensor) {
|
|||
}
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
#ifndef BENCHMARK_CLIP_JSON
|
||||
std::string GenerateOutputFileName(mindspore::MSTensor *tensor, const std::string &op_name,
|
||||
const std::string &file_type, const size_t &idx) {
|
||||
std::string file_name = op_name;
|
||||
|
@ -1105,6 +1111,7 @@ std::string GenerateOutputFileName(mindspore::MSTensor *tensor, const std::strin
|
|||
file_name += +".bin";
|
||||
return file_name;
|
||||
}
|
||||
#endif
|
||||
} // namespace
|
||||
|
||||
int BenchmarkUnifiedApi::InitPrintTensorDataCallbackParameter() {
|
||||
|
@ -1132,6 +1139,7 @@ int BenchmarkUnifiedApi::InitPrintTensorDataCallbackParameter() {
|
|||
return RET_OK;
|
||||
}
|
||||
int BenchmarkUnifiedApi::InitDumpTensorDataCallbackParameter() {
|
||||
#ifndef BENCHMARK_CLIP_JSON
|
||||
// before callback
|
||||
ms_before_call_back_ = [&](const std::vector<mindspore::MSTensor> &before_inputs,
|
||||
const std::vector<mindspore::MSTensor> &before_outputs,
|
||||
|
@ -1177,6 +1185,7 @@ int BenchmarkUnifiedApi::InitDumpTensorDataCallbackParameter() {
|
|||
}
|
||||
return true;
|
||||
};
|
||||
#endif
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -29,7 +29,9 @@
|
|||
#include <memory>
|
||||
#include <cfloat>
|
||||
#include <utility>
|
||||
#ifndef BENCHMARK_CLIP_JSON
|
||||
#include <nlohmann/json.hpp>
|
||||
#endif
|
||||
#include "tools/benchmark/benchmark_base.h"
|
||||
#include "include/model.h"
|
||||
#include "tools/common/flag_parser.h"
|
||||
|
|
|
@ -7,9 +7,9 @@ __download_pkg(34xx_sdk
|
|||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
include_directories(${34xx_sdk_SOURCE_DIR})
|
||||
include_directories(${34xx_sdk_SOURCE_DIR}/include)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/runtime)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/runtime/include)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/runtime/include/third_party)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime/include)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime/include/third_party)
|
||||
link_directories(${34xx_sdk_SOURCE_DIR}/lib)
|
||||
|
||||
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/src COMMON_SRC3)
|
||||
|
|
|
@ -27,7 +27,7 @@ using mindspore::schema::PrimitiveType_Custom;
|
|||
namespace mindspore {
|
||||
namespace dpico {
|
||||
namespace {
|
||||
constexpr int kBaseValue = 10;
|
||||
constexpr int kDecimal = 10;
|
||||
constexpr auto kInputShape = "inputs_shape";
|
||||
constexpr auto kOutputShape = "outputs_shape";
|
||||
constexpr auto kOutputsFormat = "outputs_format";
|
||||
|
@ -66,13 +66,13 @@ Status GetCustomShape(const std::map<std::string, std::string> &attrs, const std
|
|||
char *save_ptr = nullptr;
|
||||
res = strtok_r(attr.data(), delims, &save_ptr);
|
||||
while (res != nullptr) {
|
||||
int64_t ndims = strtol(res, &res, kBaseValue);
|
||||
int64_t ndims = strtol(res, &res, kDecimal);
|
||||
int j = 0;
|
||||
std::vector<int64_t> shape;
|
||||
shape.resize(ndims);
|
||||
for (; j < ndims; j++) {
|
||||
res = strtok_r(NULL, delims, &save_ptr);
|
||||
shape[j] = static_cast<int64_t>(strtol(res, &res, kBaseValue));
|
||||
shape[j] = static_cast<int64_t>(strtol(res, &res, kDecimal));
|
||||
}
|
||||
shapes->push_back(shape);
|
||||
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
cmake_minimum_required(VERSION 3.14)
|
||||
project(NNIE_Custom)
|
||||
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
if(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3516D")
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_patry/hi3516_sdk/)
|
||||
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_patry/hi3516_sdk/lib)
|
||||
elseif(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3519A")
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_patry/hi3519_sdk/)
|
||||
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_patry/hi3519_sdk/lib)
|
||||
elseif(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3559A")
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_patry/hi3559_sdk/)
|
||||
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_patry/hi3559_sdk/lib)
|
||||
endif()
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime/include)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime/include/third_party)
|
||||
|
||||
|
||||
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/src COMMON_SRC3)
|
||||
|
||||
set(MSLITE_NNIE_LINK_LIB nnie mpi VoiceEngine upvqe dnvqe)
|
||||
|
||||
add_library(mslite_nnie SHARED
|
||||
${COMMON_SRC3})
|
||||
target_link_libraries(mslite_nnie ${MSLITE_NNIE_LINK_LIB} securec)
|
||||
|
||||
if(DEFINED HIMIX_STRIP)
|
||||
set(NDK_STRIP ${HIMIX_STRIP})
|
||||
else()
|
||||
set(NDK_STRIP "arm-himix200-linux-strip")
|
||||
endif()
|
||||
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
|
||||
add_custom_command(TARGET mslite_nnie POST_BUILD COMMAND ${NDK_STRIP}
|
||||
${CMAKE_CURRENT_BINARY_DIR}/libmslite_nnie.so)
|
||||
endif()
|
|
@ -0,0 +1,178 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/custom_fp32.h"
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include "schema/model_generated.h"
|
||||
#include "include/registry/register_kernel.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "src/nnie_manager.h"
|
||||
#include "src/nnie_print.h"
|
||||
#include "src/nnie_cfg_parser.h"
|
||||
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_Custom;
|
||||
|
||||
namespace mindspore {
|
||||
namespace nnie {
|
||||
bool CustomCPUKernel::load_model_ = false;
|
||||
|
||||
int CustomCPUKernel::run_seg_ = 0;
|
||||
bool CustomCPUKernel::roi_used_ = false;
|
||||
int CustomCPUKernel::Prepare() {
|
||||
if (!load_model_) {
|
||||
Flags flags;
|
||||
flags.Init();
|
||||
if (nnie::NNIEManager::GetInstance()->CfgInit(flags.max_roi_num_, flags.time_step_, flags.core_ids_) != RET_OK) {
|
||||
LOGE("Nnie init cfg fail");
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (nnie::NNIEManager::GetInstance()->Init(reinterpret_cast<char *>(inputs_[inputs_.size() - 1].MutableData()),
|
||||
static_cast<int>(inputs_[inputs_.size() - 1].ElementNum()), inputs_)) {
|
||||
// LOGW("Load WK Model Fail");
|
||||
return RET_OK;
|
||||
}
|
||||
load_model_ = true;
|
||||
}
|
||||
outputs_shapes_.resize(outputs_.size());
|
||||
for (size_t i = 0; i < outputs_.size(); i++) {
|
||||
outputs_shapes_[i] = outputs_[i].Shape();
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int CustomCPUKernel::ReSize() {
|
||||
if (load_model_) {
|
||||
nnie::NNIEManager::GetInstance()->Release();
|
||||
load_model_ = false;
|
||||
}
|
||||
|
||||
return Prepare();
|
||||
}
|
||||
|
||||
int CustomCPUKernel::Execute() {
|
||||
if (!load_model_) {
|
||||
LOGE("WK Model is not load.");
|
||||
return RET_ERROR;
|
||||
}
|
||||
run_seg_ = seg_id_;
|
||||
|
||||
if (nnie::NNIEManager::GetInstance()->FillData(&inputs_, run_seg_)) {
|
||||
LOGE("Fail Fill Data");
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (nnie::NNIEManager::GetInstance()->Run(&outputs_, run_seg_, outputs_shapes_)) {
|
||||
LOGE("Fail WK Run");
|
||||
return RET_ERROR;
|
||||
}
|
||||
run_seg_++;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
CustomCPUKernel::~CustomCPUKernel() {
|
||||
if (load_model_) {
|
||||
nnie::NNIEManager::GetInstance()->Release();
|
||||
load_model_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
bool GetCustomAttr(char *buf, int buf_size, const mindspore::schema::Custom *op, const std::string &attr) {
|
||||
int attr_size;
|
||||
for (size_t i = 0; i < op->attr()->size(); i++) {
|
||||
if (op->attr()->Get(i)->name()->str() == attr) {
|
||||
auto output_info = op->attr()->Get(i)->data();
|
||||
attr_size = static_cast<int>(output_info->size());
|
||||
if (attr_size >= buf_size) {
|
||||
LOGE("attr size too big");
|
||||
return false;
|
||||
}
|
||||
for (int j = 0; j < attr_size; j++) {
|
||||
buf[j] = static_cast<char>(output_info->Get(j));
|
||||
}
|
||||
buf[attr_size] = 0;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
std::shared_ptr<mindspore::kernel::Kernel> CustomCreateKernel(const std::vector<MSTensor> &inputs,
|
||||
const std::vector<MSTensor> &outputs,
|
||||
const mindspore::schema::Primitive *primitive,
|
||||
const mindspore::Context *ctx) {
|
||||
if (primitive->value_type() != mindspore::schema::PrimitiveType_Custom) {
|
||||
LOGE("Primitive type is not PrimitiveType_Custom");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto op = primitive->value_as_Custom();
|
||||
if (op->attr()->size() < 1) {
|
||||
LOGE("There are at least 1 attribute of Custom");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int64_t ndims;
|
||||
bool forward_bbox = false;
|
||||
char *res = nullptr;
|
||||
char buf[kMaxSize];
|
||||
if (GetCustomAttr(buf, kMaxSize, op, "id")) {
|
||||
res = nullptr;
|
||||
ndims = strtol(buf, &res, kDecimal);
|
||||
if ((*res) != 0) {
|
||||
LOGE("Get attr id data fail");
|
||||
return nullptr;
|
||||
}
|
||||
} else {
|
||||
LOGE("Custom op should have id");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (GetCustomAttr(buf, kMaxSize, op, "ForwardWithBbox")) {
|
||||
res = nullptr;
|
||||
int64_t temp_val = strtol(buf, &res, kDecimal);
|
||||
if ((*res) != 0) {
|
||||
LOGE("Get attr ForwardWithBbox data fail");
|
||||
return nullptr;
|
||||
}
|
||||
if (temp_val > 0) {
|
||||
forward_bbox = true;
|
||||
}
|
||||
}
|
||||
auto kernel = std::make_shared<CustomCPUKernel>(ndims, forward_bbox, inputs, outputs, primitive, ctx);
|
||||
if (kernel == nullptr) {
|
||||
LOGE("new custom kernel is nullptr");
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
} // namespace nnie
|
||||
} // namespace mindspore
|
||||
namespace mindspore {
|
||||
namespace registry {
|
||||
namespace {
|
||||
const auto kFloat32 = DataType::kNumberTypeFloat32;
|
||||
const auto kInt8 = DataType::kNumberTypeInt8;
|
||||
const auto kUint8 = DataType::kNumberTypeUInt8;
|
||||
} // namespace
|
||||
REGISTER_CUSTOM_KERNEL(CPU, NNIE, kFloat32, NNIE, nnie::CustomCreateKernel)
|
||||
REGISTER_CUSTOM_KERNEL(CPU, NNIE, kInt8, NNIE, nnie::CustomCreateKernel)
|
||||
REGISTER_CUSTOM_KERNEL(CPU, NNIE, kUint8, NNIE, nnie::CustomCreateKernel)
|
||||
} // namespace registry
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,66 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CUSTOM_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CUSTOM_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "include/schema/model_generated.h"
|
||||
#include "include/context.h"
|
||||
#include "include/api/kernel.h"
|
||||
#include "src/custom_infer.h"
|
||||
|
||||
using mindspore::kernel::Kernel;
|
||||
using mindspore::tensor::MSTensor;
|
||||
namespace mindspore {
|
||||
namespace nnie {
|
||||
class CustomCPUKernel : public Kernel {
|
||||
public:
|
||||
CustomCPUKernel(int seg_id, bool forward_bbox, const std::vector<MSTensor> &inputs,
|
||||
const std::vector<MSTensor> &outputs, const mindspore::schema::Primitive *primitive,
|
||||
const mindspore::Context *ctx)
|
||||
: Kernel(inputs, outputs, primitive, ctx), seg_id_(seg_id), forward_bbox_(forward_bbox) {
|
||||
if (forward_bbox) {
|
||||
roi_used_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
~CustomCPUKernel() override;
|
||||
|
||||
int Prepare() override;
|
||||
int ReSize() override;
|
||||
int Execute() override;
|
||||
|
||||
int seg_id(void) const { return seg_id_; }
|
||||
|
||||
void set_seg_id(int id) { seg_id_ = id; }
|
||||
|
||||
int forward_bbox(void) const { return forward_bbox_; }
|
||||
|
||||
void set_forward_bbox(bool flag) { forward_bbox_ = flag; }
|
||||
|
||||
private:
|
||||
static bool load_model_;
|
||||
static int run_seg_;
|
||||
static bool roi_used_;
|
||||
int seg_id_ = 0;
|
||||
bool forward_bbox_ = false;
|
||||
std::vector<std::vector<int64_t>> outputs_shapes_;
|
||||
};
|
||||
} // namespace nnie
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CUSTOM_H_
|
|
@ -0,0 +1,160 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/custom_infer.h"
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include "include/errorcode.h"
|
||||
#include "src/nnie_print.h"
|
||||
#include "include/api/format.h"
|
||||
#include "include/registry/register_kernel_interface.h"
|
||||
|
||||
using mindspore::kernel::KernelInterface;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_Custom;
|
||||
|
||||
namespace mindspore {
|
||||
namespace nnie {
|
||||
std::shared_ptr<KernelInterface> CustomInferCreater() {
|
||||
auto infer = new (std::nothrow) CustomInterface();
|
||||
if (infer == nullptr) {
|
||||
LOGE("new custom infer is nullptr");
|
||||
return nullptr;
|
||||
}
|
||||
return std::shared_ptr<KernelInterface>(infer);
|
||||
}
|
||||
|
||||
int GetCustomShape(const mindspore::schema::Custom *op, const std::string &attr,
|
||||
std::vector<std::vector<int64_t>> *shapes) {
|
||||
char buf[kMaxSize];
|
||||
bool has_outputs_shape = false;
|
||||
|
||||
for (size_t i = 0; i < op->attr()->size(); i++) {
|
||||
if (op->attr()->Get(i)->name()->str() == attr) {
|
||||
auto output_info = op->attr()->Get(i)->data();
|
||||
int attr_size = static_cast<int>(output_info->size());
|
||||
if (attr_size >= kMaxSize) {
|
||||
LOGE("attr size too big");
|
||||
return RET_ERROR;
|
||||
}
|
||||
for (int j = 0; j < attr_size; j++) {
|
||||
buf[j] = static_cast<char>(output_info->Get(j));
|
||||
}
|
||||
buf[attr_size] = 0;
|
||||
has_outputs_shape = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_outputs_shape) {
|
||||
LOGE("Custom op don't have %s attr.", attr.c_str());
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
char delims[] = ",";
|
||||
char *res = nullptr;
|
||||
char *save_ptr = nullptr;
|
||||
res = strtok_r(buf, delims, &save_ptr);
|
||||
while (res != nullptr) {
|
||||
// 待补完
|
||||
// outputs[id]->format_ = input->format_;
|
||||
// outputs[id]->data_type_ = kNumberTypeFloat32;
|
||||
int64_t ndims = strtol(res, &res, kDecimal);
|
||||
int j = 0;
|
||||
std::vector<int64_t> shape;
|
||||
shape.resize(ndims);
|
||||
for (; j < ndims; j++) {
|
||||
res = strtok_r(NULL, delims, &save_ptr);
|
||||
shape[j] = static_cast<int64_t>(strtol(res, &res, kDecimal));
|
||||
}
|
||||
shapes->push_back(shape);
|
||||
|
||||
res = strtok_r(NULL, delims, &save_ptr);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
Status CustomInterface::Infer(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
|
||||
const mindspore::schema::Primitive *primitive) {
|
||||
if (inputs->empty()) {
|
||||
LOGE("Inputs size 0");
|
||||
return kLiteError;
|
||||
}
|
||||
if (outputs->empty()) {
|
||||
LOGE("Outputs size 0");
|
||||
return kLiteError;
|
||||
}
|
||||
if (primitive->value_type() != mindspore::schema::PrimitiveType_Custom) {
|
||||
LOGE("Primitive type is not PrimitiveType_Custom");
|
||||
return kLiteError;
|
||||
}
|
||||
|
||||
auto op = primitive->value_as_Custom();
|
||||
if (op->attr()->size() < 1) {
|
||||
LOGE("There are at least 1 attribute of Custom");
|
||||
return kLiteError;
|
||||
}
|
||||
std::vector<std::vector<int64_t>> inputs_shape;
|
||||
if (GetCustomShape(op, "inputs_shape", &inputs_shape) != RET_OK) {
|
||||
LOGE("parser inputs_shape attribute err.");
|
||||
return kLiteError;
|
||||
}
|
||||
std::vector<std::vector<int64_t>> outputs_shape;
|
||||
if (GetCustomShape(op, "outputs_shape", &outputs_shape) != RET_OK) {
|
||||
LOGE("parser outputs_shape attribute err.");
|
||||
return kLiteError;
|
||||
}
|
||||
if (inputs_shape.size() != (inputs->size() - 1)) {
|
||||
LOGE("inputs num diff inputs_shape num.");
|
||||
return kLiteError;
|
||||
}
|
||||
if (inputs_shape[0].size() != (*inputs)[0].Shape().size()) {
|
||||
LOGE("shape size err.");
|
||||
return kLiteError;
|
||||
}
|
||||
bool resize_flag = false;
|
||||
int resize_num = 1;
|
||||
for (size_t i = 0; i < inputs_shape[0].size(); i++) {
|
||||
if (inputs_shape[0][i] != (*inputs)[0].Shape()[i]) {
|
||||
if (i == 0) {
|
||||
resize_flag = true;
|
||||
resize_num = (*inputs)[0].Shape()[i];
|
||||
} else {
|
||||
LOGE("Custom of NNIE only support batch_num resize.");
|
||||
return kLiteError;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (resize_flag) {
|
||||
for (auto &output_shape : outputs_shape) {
|
||||
output_shape[0] = resize_num;
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < outputs->size(); i++) {
|
||||
(*outputs)[i].SetShape(outputs_shape[i]);
|
||||
(*outputs)[i].SetDataType(DataType::kNumberTypeFloat32);
|
||||
(*outputs)[i].SetFormat(Format::NCHW);
|
||||
}
|
||||
return kSuccess;
|
||||
}
|
||||
} // namespace nnie
|
||||
} // namespace mindspore
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
REGISTER_CUSTOM_KERNEL_INTERFACE(NNIE, NNIE, nnie::CustomInferCreater);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,35 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_NNACL_CUSTOM_PARAMETER_H_
|
||||
#define MINDSPORE_LITE_NNACL_CUSTOM_PARAMETER_H_
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "include/kernel_interface.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace nnie {
|
||||
class CustomInterface : public mindspore::kernel::KernelInterface {
|
||||
public:
|
||||
CustomInterface() {}
|
||||
|
||||
~CustomInterface() = default;
|
||||
|
||||
Status Infer(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
|
||||
const mindspore::schema::Primitive *primitive) override;
|
||||
};
|
||||
} // namespace nnie
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_NNACL_CUSTOM_PARAMETER_H_
|
|
@ -0,0 +1,101 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/nnie_cfg_parser.h"
|
||||
#include <climits>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "include/errorcode.h"
|
||||
#include "src/nnie_manager.h"
|
||||
#include "src/nnie_print.h"
|
||||
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
namespace mindspore {
|
||||
namespace nnie {
|
||||
namespace {
|
||||
constexpr auto ENV_TIME_STEP = "TIME_STEP";
|
||||
constexpr auto ENV_MAX_ROI_NUM = "MAX_ROI_NUM";
|
||||
constexpr auto ENV_CORE_IDS = "CORE_IDS";
|
||||
constexpr auto DELIM = ",";
|
||||
constexpr int MAX_CORE_ID = 7;
|
||||
} // namespace
|
||||
void Flags::Init() {
|
||||
auto *time_step = std::getenv(ENV_TIME_STEP);
|
||||
if (time_step != nullptr) {
|
||||
auto iter = std::find_if(time_step, time_step + strlen(time_step), [](char val) { return val < '0' || val > '9'; });
|
||||
if (iter != time_step) {
|
||||
*iter = '\0';
|
||||
this->time_step_ = atoi(time_step);
|
||||
} else {
|
||||
LOGE("TIME_STEP ENV is invalid, now set to default value %d", this->time_step_);
|
||||
}
|
||||
} else {
|
||||
LOGW("TIME_STEP ENV is not set, now set to default value %d", this->time_step_);
|
||||
}
|
||||
auto *max_roi_num = std::getenv(ENV_MAX_ROI_NUM);
|
||||
if (max_roi_num != nullptr) {
|
||||
auto iter =
|
||||
std::find_if(max_roi_num, max_roi_num + strlen(max_roi_num), [](char val) { return val < '0' || val > '9'; });
|
||||
if (iter != max_roi_num) {
|
||||
*iter = '\0';
|
||||
this->max_roi_num_ = atoi(max_roi_num);
|
||||
} else {
|
||||
LOGW("MAX_ROI_NUM ENV is invalid, now set to default value %d", this->max_roi_num_);
|
||||
}
|
||||
} else {
|
||||
LOGW("MAX_ROI_NUM ENV is not set, now set to default value %d", this->max_roi_num_);
|
||||
}
|
||||
auto ids = std::getenv(ENV_CORE_IDS);
|
||||
if (ids != nullptr) {
|
||||
auto iter = std::find_if(ids, ids + strlen(ids), [](char val) { return (val < '0' || val > '9') && val != ','; });
|
||||
std::vector<int> core_ids;
|
||||
if (iter != ids) {
|
||||
*iter = '\0';
|
||||
char *saveptr;
|
||||
char *p = strtok_r(ids, DELIM, &saveptr);
|
||||
while (p != nullptr) {
|
||||
int id = atoi(p);
|
||||
p = strtok_r(NULL, DELIM, &saveptr);
|
||||
if (id > MAX_CORE_ID || id < 0) {
|
||||
LOGE("id is out of range");
|
||||
continue;
|
||||
}
|
||||
if (std::find(core_ids.begin(), core_ids.end(), id) != core_ids.end()) {
|
||||
continue;
|
||||
}
|
||||
core_ids.push_back(id);
|
||||
}
|
||||
}
|
||||
if (!core_ids.empty()) {
|
||||
this->core_ids_ = core_ids;
|
||||
} else {
|
||||
std::string message =
|
||||
"CORE_IDS ENV is invalid, now set to default value {" + std::to_string(this->core_ids_.front()) + "}";
|
||||
LOGW(message.c_str());
|
||||
}
|
||||
} else {
|
||||
std::string message =
|
||||
"CORE_IDS ENV is not set, now set to default value {" + std::to_string(this->core_ids_.front()) + "}";
|
||||
LOGW(message.c_str());
|
||||
}
|
||||
}
|
||||
} // namespace nnie
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,44 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_NNIE_CFG_PARSER_H_
|
||||
#define MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_NNIE_CFG_PARSER_H_
|
||||
#include <vector>
|
||||
|
||||
namespace mindspore {
|
||||
namespace nnie {
|
||||
/**
|
||||
* Flags is a config container.
|
||||
* Member objects:
|
||||
* 1.time_step_: step num only for rnn or lstm model. Default is 1.
|
||||
* 2.max_roi_num_: maximum number of ROI area, which is single picture supports, must be greater than 0.Default is 300.
|
||||
* 3.core_ids_: running kernels' id, support multi-core, separated by commas when setting, such as {0, 1, 2}.
|
||||
* each element must be a integer, wch meet such inequality 0 <= val < 8.
|
||||
* Default is {0}.
|
||||
*/
|
||||
class Flags {
|
||||
public:
|
||||
Flags() = default;
|
||||
~Flags() = default;
|
||||
void Init();
|
||||
|
||||
public:
|
||||
int time_step_{1};
|
||||
int max_roi_num_{300};
|
||||
std::vector<int> core_ids_{0};
|
||||
};
|
||||
} // namespace nnie
|
||||
} // namespace mindspore
|
||||
#endif
|
|
@ -0,0 +1,943 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/nnie_common.h"
|
||||
#include "include/mpi_nnie.h"
|
||||
#include "include/hi_type.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "src/nnie_print.h"
|
||||
#include "src/nnie_memory.h"
|
||||
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
constexpr int kSleepUs = 100;
|
||||
|
||||
namespace mindspore {
|
||||
namespace nnie {
|
||||
static void NnieParamRelease(NnieParam *nnie_param) {
|
||||
if (nnie_param == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (nnie_param->task_buf_.u64PhyAddr != 0 && nnie_param->task_buf_.u64VirAddr != 0) {
|
||||
NNIE_MEM_FREE(nnie_param->task_buf_.u64PhyAddr, nnie_param->task_buf_.u64VirAddr);
|
||||
nnie_param->task_buf_.u64PhyAddr = 0;
|
||||
nnie_param->task_buf_.u64VirAddr = 0;
|
||||
}
|
||||
|
||||
if (nnie_param->step_buf_.u64PhyAddr != 0 && nnie_param->step_buf_.u64VirAddr != 0) {
|
||||
NNIE_MEM_FREE(nnie_param->step_buf_.u64PhyAddr, nnie_param->step_buf_.u64VirAddr);
|
||||
nnie_param->step_buf_.u64PhyAddr = 0;
|
||||
nnie_param->step_buf_.u64VirAddr = 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool CheckNnieInnerNode(const HI_CHAR *name, NnieParam *nnie_param) {
|
||||
for (HI_U32 i = 0; i < nnie_param->model_->u32NetSegNum; i++) {
|
||||
for (HI_U32 j = 0; j < nnie_param->model_->astSeg[i].u16DstNum; j++)
|
||||
if (strncmp(name, nnie_param->model_->astSeg[i].astDstNode[j].szName, SVP_NNIE_NODE_NAME_LEN) == 0) {
|
||||
nnie_param->mem_cfg_.seg_[i].dst_node_[j] = true;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ConnectNnieInnerNode(const HI_CHAR *name, NnieParam *nnie_param, SVP_SRC_BLOB_S *blob) {
|
||||
for (HI_U32 i = 0; i < nnie_param->model_->u32NetSegNum; i++) {
|
||||
for (HI_U32 j = 0; j < nnie_param->model_->astSeg[i].u16DstNum; j++)
|
||||
if (strncmp(name, nnie_param->model_->astSeg[i].astDstNode[j].szName, SVP_NNIE_NODE_NAME_LEN) == 0) {
|
||||
blob->u64PhyAddr = nnie_param->seg_data_[i].dst_[j].u64PhyAddr;
|
||||
blob->u64VirAddr = nnie_param->seg_data_[i].dst_[j].u64VirAddr;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static void FillForwardInfo(NnieCfg *nnie_cfg, NnieParam *nnie_param) {
|
||||
HI_U32 i, j;
|
||||
HI_U32 num;
|
||||
memset(&nnie_param->mem_cfg_, false, sizeof(NNIEMemCfg));
|
||||
for (i = 0; i < nnie_param->model_->u32NetSegNum; i++) {
|
||||
if (SVP_NNIE_NET_TYPE_ROI == nnie_param->model_->astSeg[i].enNetType) {
|
||||
nnie_param->forward_with_bbox_ctrl_[i].enNnieId = nnie_cfg->nnie_core_id_[i];
|
||||
nnie_param->forward_with_bbox_ctrl_[i].u32SrcNum = nnie_param->model_->astSeg[i].u16SrcNum;
|
||||
nnie_param->forward_with_bbox_ctrl_[i].u32DstNum = nnie_param->model_->astSeg[i].u16DstNum;
|
||||
nnie_param->forward_with_bbox_ctrl_[i].u32ProposalNum = 1;
|
||||
nnie_param->forward_with_bbox_ctrl_[i].u32NetSegId = i;
|
||||
} else if (SVP_NNIE_NET_TYPE_CNN == nnie_param->model_->astSeg[i].enNetType ||
|
||||
SVP_NNIE_NET_TYPE_RECURRENT == nnie_param->model_->astSeg[i].enNetType) {
|
||||
nnie_param->forward_ctrl_[i].enNnieId = nnie_cfg->nnie_core_id_[i];
|
||||
nnie_param->forward_ctrl_[i].u32SrcNum = nnie_param->model_->astSeg[i].u16SrcNum;
|
||||
nnie_param->forward_ctrl_[i].u32DstNum = nnie_param->model_->astSeg[i].u16DstNum;
|
||||
nnie_param->forward_ctrl_[i].u32NetSegId = i;
|
||||
}
|
||||
|
||||
for (j = 0; j < nnie_param->model_->astSeg[i].u16SrcNum; j++) {
|
||||
if (i > 0) {
|
||||
if (CheckNnieInnerNode(nnie_param->model_->astSeg[i].astSrcNode[j].szName, nnie_param)) {
|
||||
nnie_param->mem_cfg_.seg_[i].src_node_[j] = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (SVP_BLOB_TYPE_SEQ_S32 == nnie_param->model_->astSeg[i].astSrcNode[j].enType) {
|
||||
nnie_param->seg_data_[i].src_[j].enType = nnie_param->model_->astSeg[i].astSrcNode[j].enType;
|
||||
nnie_param->seg_data_[i].src_[j].unShape.stSeq.u32Dim =
|
||||
nnie_param->model_->astSeg[i].astSrcNode[j].unShape.u32Dim;
|
||||
nnie_param->seg_data_[i].src_[j].u32Num = nnie_cfg->max_input_num_;
|
||||
nnie_param->seg_data_[i].src_[j].unShape.stSeq.u64VirAddrStep =
|
||||
nnie_cfg->step_vir_addr_[i * NNIE_EACH_SEG_STEP_ADDR_NUM];
|
||||
} else {
|
||||
nnie_param->seg_data_[i].src_[j].enType = nnie_param->model_->astSeg[i].astSrcNode[j].enType;
|
||||
nnie_param->seg_data_[i].src_[j].unShape.stWhc.u32Chn =
|
||||
nnie_param->model_->astSeg[i].astSrcNode[j].unShape.stWhc.u32Chn;
|
||||
nnie_param->seg_data_[i].src_[j].unShape.stWhc.u32Height =
|
||||
nnie_param->model_->astSeg[i].astSrcNode[j].unShape.stWhc.u32Height;
|
||||
nnie_param->seg_data_[i].src_[j].unShape.stWhc.u32Width =
|
||||
nnie_param->model_->astSeg[i].astSrcNode[j].unShape.stWhc.u32Width;
|
||||
nnie_param->seg_data_[i].src_[j].u32Num = nnie_cfg->max_input_num_;
|
||||
}
|
||||
}
|
||||
|
||||
if (SVP_NNIE_NET_TYPE_ROI == nnie_param->model_->astSeg[i].enNetType) {
|
||||
num = nnie_cfg->max_roi_num_ * nnie_cfg->max_input_num_;
|
||||
} else {
|
||||
num = nnie_cfg->max_input_num_;
|
||||
}
|
||||
|
||||
for (j = 0; j < nnie_param->model_->astSeg[i].u16DstNum; j++) {
|
||||
if (SVP_BLOB_TYPE_SEQ_S32 == nnie_param->model_->astSeg[i].astDstNode[j].enType) {
|
||||
nnie_param->seg_data_[i].dst_[j].enType = nnie_param->model_->astSeg[i].astDstNode[j].enType;
|
||||
nnie_param->seg_data_[i].dst_[j].unShape.stSeq.u32Dim =
|
||||
nnie_param->model_->astSeg[i].astDstNode[j].unShape.u32Dim;
|
||||
nnie_param->seg_data_[i].dst_[j].u32Num = num;
|
||||
nnie_param->seg_data_[i].dst_[j].unShape.stSeq.u64VirAddrStep =
|
||||
nnie_cfg->step_vir_addr_[i * NNIE_EACH_SEG_STEP_ADDR_NUM + 1];
|
||||
} else {
|
||||
nnie_param->seg_data_[i].dst_[j].enType = nnie_param->model_->astSeg[i].astDstNode[j].enType;
|
||||
nnie_param->seg_data_[i].dst_[j].unShape.stWhc.u32Chn =
|
||||
nnie_param->model_->astSeg[i].astDstNode[j].unShape.stWhc.u32Chn;
|
||||
nnie_param->seg_data_[i].dst_[j].unShape.stWhc.u32Height =
|
||||
nnie_param->model_->astSeg[i].astDstNode[j].unShape.stWhc.u32Height;
|
||||
nnie_param->seg_data_[i].dst_[j].unShape.stWhc.u32Width =
|
||||
nnie_param->model_->astSeg[i].astDstNode[j].unShape.stWhc.u32Width;
|
||||
nnie_param->seg_data_[i].dst_[j].u32Num = num;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void GetBlobMemSize(SVP_NNIE_NODE_S nnie_node[], HI_U32 node_num, HI_U32 total_step, SVP_BLOB_S blob[],
|
||||
HI_U32 align32, HI_U32 *total_size, HI_U32 blob_size[], bool *mem_alloc = nullptr) {
|
||||
HI_U32 i = 0;
|
||||
HI_U32 size;
|
||||
HI_U32 stride;
|
||||
|
||||
for (i = 0; i < node_num; i++) {
|
||||
if (SVP_BLOB_TYPE_S32 == nnie_node[i].enType || SVP_BLOB_TYPE_VEC_S32 == nnie_node[i].enType ||
|
||||
SVP_BLOB_TYPE_SEQ_S32 == nnie_node[i].enType) {
|
||||
size = sizeof(HI_U32);
|
||||
} else {
|
||||
size = sizeof(HI_U8);
|
||||
}
|
||||
if (SVP_BLOB_TYPE_SEQ_S32 == nnie_node[i].enType) {
|
||||
if (NNIE_ALIGN_16 == align32) {
|
||||
stride = NNIE_ALIGN16(nnie_node[i].unShape.u32Dim * size);
|
||||
} else {
|
||||
stride = NNIE_ALIGN32(nnie_node[i].unShape.u32Dim * size);
|
||||
}
|
||||
blob_size[i] = total_step * stride;
|
||||
} else {
|
||||
if (NNIE_ALIGN_16 == align32) {
|
||||
stride = NNIE_ALIGN16(nnie_node[i].unShape.stWhc.u32Width * size);
|
||||
} else {
|
||||
stride = NNIE_ALIGN32(nnie_node[i].unShape.stWhc.u32Width * size);
|
||||
}
|
||||
blob_size[i] = blob[i].u32Num * stride * nnie_node[i].unShape.stWhc.u32Height * nnie_node[i].unShape.stWhc.u32Chn;
|
||||
}
|
||||
if (mem_alloc != nullptr) {
|
||||
if (mem_alloc[i]) {
|
||||
blob_size[i] = 0;
|
||||
}
|
||||
}
|
||||
*total_size += blob_size[i];
|
||||
blob[i].u32Stride = stride;
|
||||
}
|
||||
}
|
||||
|
||||
static int GetTaskAndBlobBufSize(NnieCfg *nnie_cfg, NnieParam *nnie_param, HI_U32 *total_task_buf_size,
|
||||
HI_U32 *tmp_buf_size, NnieBlobSize blob_size[], HI_U32 *total_size) {
|
||||
HI_S32 ret = HI_SUCCESS;
|
||||
HI_U32 i, j;
|
||||
HI_U32 total_step = 0;
|
||||
|
||||
ret = HI_MPI_SVP_NNIE_GetTskBufSize(nnie_cfg->max_input_num_, nnie_cfg->max_roi_num_, nnie_param->model_,
|
||||
nnie_param->task_buf_size_, nnie_param->model_->u32NetSegNum);
|
||||
if (HI_SUCCESS != ret) {
|
||||
LOGE("HI_MPI_SVP_NNIE_GetTskBufSize");
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
*total_task_buf_size = 0;
|
||||
for (i = 0; i < nnie_param->model_->u32NetSegNum; i++) {
|
||||
*total_task_buf_size += nnie_param->task_buf_size_[i];
|
||||
}
|
||||
|
||||
*tmp_buf_size = nnie_param->model_->u32TmpBufSize;
|
||||
*total_size += *total_task_buf_size + *tmp_buf_size;
|
||||
|
||||
for (i = 0; i < nnie_param->model_->u32NetSegNum; i++) {
|
||||
if (SVP_NNIE_NET_TYPE_RECURRENT == nnie_param->model_->astSeg[i].enNetType) {
|
||||
for (j = 0; j < nnie_param->seg_data_[i].src_[0].u32Num; j++) {
|
||||
total_step += *(reinterpret_cast<HI_S32 *>(
|
||||
static_cast<HI_UL>(nnie_param->seg_data_[i].src_[0].unShape.stSeq.u64VirAddrStep)) +
|
||||
j);
|
||||
}
|
||||
}
|
||||
GetBlobMemSize(&(nnie_param->model_->astSeg[i].astSrcNode[0]), nnie_param->model_->astSeg[i].u16SrcNum, total_step,
|
||||
&(nnie_param->seg_data_[i].src_[0]), NNIE_ALIGN_16, total_size, &(blob_size[i].src_size_[0]),
|
||||
&(nnie_param->mem_cfg_.seg_[i].src_node_[0]));
|
||||
|
||||
GetBlobMemSize(&(nnie_param->model_->astSeg[i].astDstNode[0]), nnie_param->model_->astSeg[i].u16DstNum, total_step,
|
||||
&(nnie_param->seg_data_[i].dst_[0]), NNIE_ALIGN_16, total_size, &(blob_size[i].dst_size_[0]));
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
static int NnieParamInit(NnieCfg *nnie_cfg, NnieParam *nnie_param) {
|
||||
HI_U32 i, j;
|
||||
HI_U32 total_size = 0;
|
||||
HI_U32 total_task_buf_size = 0;
|
||||
HI_U32 tmp_buf_size_ = 0;
|
||||
HI_S32 ret = HI_SUCCESS;
|
||||
HI_U32 off_set = 0;
|
||||
HI_U64 phy_addr = 0;
|
||||
HI_U8 *vir_addr = nullptr;
|
||||
NnieBlobSize blob_size[SVP_NNIE_MAX_NET_SEG_NUM] = {0};
|
||||
|
||||
FillForwardInfo(nnie_cfg, nnie_param);
|
||||
|
||||
ret = GetTaskAndBlobBufSize(nnie_cfg, nnie_param, &total_task_buf_size, &tmp_buf_size_, blob_size, &total_size);
|
||||
if (HI_SUCCESS != ret) {
|
||||
LOGE("Error,Malloc memory failed! ");
|
||||
return RET_ERROR;
|
||||
}
|
||||
bool has_roi = false;
|
||||
for (i = 0; i < nnie_param->model_->u32NetSegNum; i++) {
|
||||
if (SVP_NNIE_NET_TYPE_ROI == nnie_param->model_->astSeg[i].enNetType) {
|
||||
has_roi = true;
|
||||
}
|
||||
}
|
||||
if (has_roi) {
|
||||
nnie_param->rpn_bbox_.enType = SVP_BLOB_TYPE_S32;
|
||||
nnie_param->rpn_bbox_.unShape.stWhc.u32Chn = 1;
|
||||
nnie_param->rpn_bbox_.unShape.stWhc.u32Height = nnie_cfg->max_roi_num_;
|
||||
nnie_param->rpn_bbox_.unShape.stWhc.u32Width = NNIE_COORDI_NUM;
|
||||
nnie_param->rpn_bbox_.u32Stride = NNIE_ALIGN16(NNIE_COORDI_NUM * sizeof(HI_U32));
|
||||
nnie_param->rpn_bbox_.u32Num = nnie_cfg->max_input_num_;
|
||||
total_size +=
|
||||
nnie_param->rpn_bbox_.u32Num * nnie_param->rpn_bbox_.unShape.stWhc.u32Height * nnie_param->rpn_bbox_.u32Stride;
|
||||
}
|
||||
|
||||
ret = NnieMemMallocCached(std::string("NNIE_NNIE_TASK").data(), nullptr, reinterpret_cast<HI_U64 *>(&phy_addr),
|
||||
reinterpret_cast<void **>(&vir_addr), total_size);
|
||||
if (HI_SUCCESS != ret) {
|
||||
LOGE("Error,Malloc memory failed! ");
|
||||
return RET_ERROR;
|
||||
}
|
||||
memset(vir_addr, 0, total_size);
|
||||
NnieMemFlushCache(phy_addr, reinterpret_cast<void *>(vir_addr), total_size);
|
||||
|
||||
nnie_param->task_buf_.u32Size = total_task_buf_size;
|
||||
nnie_param->task_buf_.u64PhyAddr = phy_addr;
|
||||
nnie_param->task_buf_.u64VirAddr = (HI_U64)(HI_UL)vir_addr;
|
||||
|
||||
nnie_param->tmp_buf_.u32Size = tmp_buf_size_;
|
||||
nnie_param->tmp_buf_.u64PhyAddr = phy_addr + total_task_buf_size;
|
||||
nnie_param->tmp_buf_.u64VirAddr = (HI_U64)(HI_UL)vir_addr + total_task_buf_size;
|
||||
|
||||
for (i = 0; i < nnie_param->model_->u32NetSegNum; i++) {
|
||||
if (SVP_NNIE_NET_TYPE_ROI == nnie_param->model_->astSeg[i].enNetType) {
|
||||
nnie_param->forward_with_bbox_ctrl_[i].stTmpBuf = nnie_param->tmp_buf_;
|
||||
nnie_param->forward_with_bbox_ctrl_[i].stTskBuf.u64PhyAddr = nnie_param->task_buf_.u64PhyAddr + off_set;
|
||||
nnie_param->forward_with_bbox_ctrl_[i].stTskBuf.u64VirAddr = nnie_param->task_buf_.u64VirAddr + off_set;
|
||||
nnie_param->forward_with_bbox_ctrl_[i].stTskBuf.u32Size = nnie_param->task_buf_size_[i];
|
||||
} else if (SVP_NNIE_NET_TYPE_CNN == nnie_param->model_->astSeg[i].enNetType ||
|
||||
SVP_NNIE_NET_TYPE_RECURRENT == nnie_param->model_->astSeg[i].enNetType) {
|
||||
nnie_param->forward_ctrl_[i].stTmpBuf = nnie_param->tmp_buf_;
|
||||
nnie_param->forward_ctrl_[i].stTskBuf.u64PhyAddr = nnie_param->task_buf_.u64PhyAddr + off_set;
|
||||
nnie_param->forward_ctrl_[i].stTskBuf.u64VirAddr = nnie_param->task_buf_.u64VirAddr + off_set;
|
||||
nnie_param->forward_ctrl_[i].stTskBuf.u32Size = nnie_param->task_buf_size_[i];
|
||||
}
|
||||
off_set += nnie_param->task_buf_size_[i];
|
||||
}
|
||||
|
||||
phy_addr = phy_addr + total_task_buf_size + tmp_buf_size_;
|
||||
vir_addr = vir_addr + total_task_buf_size + tmp_buf_size_;
|
||||
for (i = 0; i < nnie_param->model_->u32NetSegNum; i++) {
|
||||
for (j = 0; j < nnie_param->model_->astSeg[i].u16SrcNum; j++) {
|
||||
if (j != 0) {
|
||||
phy_addr += blob_size[i].src_size_[j - 1];
|
||||
vir_addr += blob_size[i].src_size_[j - 1];
|
||||
}
|
||||
if (nnie_param->mem_cfg_.seg_[i].src_node_[j]) {
|
||||
if (!ConnectNnieInnerNode(nnie_param->model_->astSeg[i].astSrcNode[j].szName, nnie_param,
|
||||
&(nnie_param->seg_data_[i].src_[j]))) {
|
||||
LOGE("ConnectNnieInnerNode failed! ");
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else {
|
||||
nnie_param->seg_data_[i].src_[j].u64PhyAddr = phy_addr;
|
||||
nnie_param->seg_data_[i].src_[j].u64VirAddr = (HI_U64)(HI_UL)vir_addr;
|
||||
}
|
||||
}
|
||||
phy_addr += blob_size[i].src_size_[j - 1];
|
||||
vir_addr += blob_size[i].src_size_[j - 1];
|
||||
|
||||
for (j = 0; j < nnie_param->model_->astSeg[i].u16DstNum; j++) {
|
||||
if (j != 0) {
|
||||
phy_addr += blob_size[i].dst_size_[j - 1];
|
||||
vir_addr += blob_size[i].dst_size_[j - 1];
|
||||
}
|
||||
nnie_param->seg_data_[i].dst_[j].u64PhyAddr = phy_addr;
|
||||
nnie_param->seg_data_[i].dst_[j].u64VirAddr = (HI_U64)(HI_UL)vir_addr;
|
||||
}
|
||||
phy_addr += blob_size[i].dst_size_[j - 1];
|
||||
vir_addr += blob_size[i].dst_size_[j - 1];
|
||||
}
|
||||
if (has_roi) {
|
||||
nnie_param->rpn_bbox_.u64PhyAddr = phy_addr;
|
||||
nnie_param->rpn_bbox_.u64VirAddr = (HI_U64)((HI_UL)vir_addr);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
static int NnieLoadModel(char *model_buf, int size, NnieModel *nnie_model) {
|
||||
HI_S32 ret = HI_INVALID_VALUE;
|
||||
HI_U64 phy_addr = 0;
|
||||
HI_U8 *vir_addr = nullptr;
|
||||
ret = NnieMemMalloc(std::string("NNIE_NNIE_MODEL").data(), nullptr, reinterpret_cast<HI_U64 *>(&phy_addr),
|
||||
reinterpret_cast<void **>(&vir_addr), size);
|
||||
if (HI_SUCCESS != ret) {
|
||||
LOGE("Error,Malloc memory failed! ");
|
||||
return RET_ERROR;
|
||||
}
|
||||
nnie_model->model_buf_.u32Size = (HI_U32)size;
|
||||
nnie_model->model_buf_.u64PhyAddr = phy_addr;
|
||||
nnie_model->model_buf_.u64VirAddr = (HI_U64)(HI_UL)vir_addr;
|
||||
memcpy(vir_addr, model_buf, size);
|
||||
ret = HI_MPI_SVP_NNIE_LoadModel(&nnie_model->model_buf_, &nnie_model->model_);
|
||||
if (HI_SUCCESS != ret) {
|
||||
NNIE_MEM_FREE(nnie_model->model_buf_.u64PhyAddr, nnie_model->model_buf_.u64VirAddr);
|
||||
nnie_model->model_buf_.u32Size = 0;
|
||||
LOGE("HI_MPI_SVP_NNIE_LoadModel failed!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
static void NnieUnloadModel(NnieModel *nnie_model) {
|
||||
if (nnie_model == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (nnie_model->model_buf_.u64PhyAddr != 0 && nnie_model->model_buf_.u64VirAddr != 0) {
|
||||
NNIE_MEM_FREE(nnie_model->model_buf_.u64PhyAddr, nnie_model->model_buf_.u64VirAddr);
|
||||
nnie_model->model_buf_.u64PhyAddr = 0;
|
||||
nnie_model->model_buf_.u64VirAddr = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int NnieForward(NnieParam *nnie_param, NnieDataIndex *input_data_idx, HI_BOOL instant) {
|
||||
HI_S32 ret = HI_SUCCESS;
|
||||
HI_U32 i, j;
|
||||
HI_BOOL finish = HI_FALSE;
|
||||
SVP_NNIE_HANDLE svp_nnie_handle = 0;
|
||||
HI_U32 total_step_num = 0;
|
||||
SVP_NNIE_FORWARD_CTRL_S *forward_handle = &nnie_param->forward_ctrl_[input_data_idx->seg_idx_];
|
||||
NnieSegData *seg_data = &nnie_param->seg_data_[input_data_idx->seg_idx_];
|
||||
|
||||
NnieMemFlushCache(forward_handle->stTskBuf.u64PhyAddr,
|
||||
NNIE_CONVERT_64BIT_ADDR(HI_VOID, forward_handle->stTskBuf.u64VirAddr),
|
||||
forward_handle->stTskBuf.u32Size);
|
||||
|
||||
for (i = 0; i < forward_handle->u32DstNum; i++) {
|
||||
if (SVP_BLOB_TYPE_SEQ_S32 == seg_data->dst_[i].enType) {
|
||||
for (j = 0; j < seg_data->dst_[i].u32Num; j++) {
|
||||
total_step_num += *(NNIE_CONVERT_64BIT_ADDR(HI_U32, seg_data->dst_[i].unShape.stSeq.u64VirAddrStep) + j);
|
||||
}
|
||||
NnieMemFlushCache(seg_data->dst_[i].u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, seg_data->dst_[i].u64VirAddr),
|
||||
total_step_num * seg_data->dst_[i].u32Stride);
|
||||
} else {
|
||||
NnieMemFlushCache(seg_data->dst_[i].u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, seg_data->dst_[i].u64VirAddr),
|
||||
seg_data->dst_[i].u32Num * seg_data->dst_[i].unShape.stWhc.u32Chn *
|
||||
seg_data->dst_[i].unShape.stWhc.u32Height * seg_data->dst_[i].u32Stride);
|
||||
}
|
||||
}
|
||||
|
||||
ret = HI_MPI_SVP_NNIE_Forward(&svp_nnie_handle, seg_data->src_, nnie_param->model_, seg_data->dst_, forward_handle,
|
||||
instant);
|
||||
if (HI_SUCCESS != ret) {
|
||||
LOGE("HI_MPI_SVP_NNIE_Forward failed!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (instant) {
|
||||
while (HI_ERR_SVP_NNIE_QUERY_TIMEOUT ==
|
||||
(ret = HI_MPI_SVP_NNIE_Query(forward_handle->enNnieId, svp_nnie_handle, &finish, HI_TRUE))) {
|
||||
usleep(kSleepUs);
|
||||
}
|
||||
}
|
||||
|
||||
total_step_num = 0;
|
||||
for (i = 0; i < forward_handle->u32DstNum; i++) {
|
||||
if (SVP_BLOB_TYPE_SEQ_S32 == seg_data->dst_[i].enType) {
|
||||
for (j = 0; j < seg_data->dst_[i].u32Num; j++) {
|
||||
total_step_num += *(NNIE_CONVERT_64BIT_ADDR(HI_U32, seg_data->dst_[i].unShape.stSeq.u64VirAddrStep) + j);
|
||||
}
|
||||
NnieMemFlushCache(seg_data->dst_[i].u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, seg_data->dst_[i].u64VirAddr),
|
||||
total_step_num * seg_data->dst_[i].u32Stride);
|
||||
} else {
|
||||
NnieMemFlushCache(seg_data->dst_[i].u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, seg_data->dst_[i].u64VirAddr),
|
||||
seg_data->dst_[i].u32Num * seg_data->dst_[i].unShape.stWhc.u32Chn *
|
||||
seg_data->dst_[i].unShape.stWhc.u32Height * seg_data->dst_[i].u32Stride);
|
||||
}
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
static HI_S32 NNIE_ForwardWithBbox(NnieParam *pstNnieParam, NnieDataIndex *pstInputDataIdx, SVP_SRC_BLOB_S astBbox[],
|
||||
HI_BOOL bInstant) {
|
||||
HI_S32 ret = HI_SUCCESS;
|
||||
HI_BOOL finish = HI_FALSE;
|
||||
SVP_NNIE_HANDLE svp_nnie_handle = 0;
|
||||
HI_U32 total_step_num = 0;
|
||||
HI_U32 i, j;
|
||||
|
||||
NnieMemFlushCache(pstNnieParam->forward_with_bbox_ctrl_[pstInputDataIdx->seg_idx_].stTskBuf.u64PhyAddr,
|
||||
NNIE_CONVERT_64BIT_ADDR(
|
||||
HI_VOID, pstNnieParam->forward_with_bbox_ctrl_[pstInputDataIdx->seg_idx_].stTskBuf.u64VirAddr),
|
||||
pstNnieParam->forward_with_bbox_ctrl_[pstInputDataIdx->seg_idx_].stTskBuf.u32Size);
|
||||
|
||||
for (i = 0; i < pstNnieParam->forward_with_bbox_ctrl_[pstInputDataIdx->seg_idx_].u32DstNum; i++) {
|
||||
if (SVP_BLOB_TYPE_SEQ_S32 == pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].enType) {
|
||||
for (j = 0; j < pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Num; j++) {
|
||||
total_step_num +=
|
||||
*(NNIE_CONVERT_64BIT_ADDR(
|
||||
HI_U32, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].unShape.stSeq.u64VirAddrStep) +
|
||||
j);
|
||||
}
|
||||
NnieMemFlushCache(
|
||||
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64PhyAddr,
|
||||
NNIE_CONVERT_64BIT_ADDR(HI_VOID, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64VirAddr),
|
||||
total_step_num * pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Stride);
|
||||
} else {
|
||||
NnieMemFlushCache(
|
||||
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64PhyAddr,
|
||||
NNIE_CONVERT_64BIT_ADDR(HI_VOID, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64VirAddr),
|
||||
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Num *
|
||||
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].unShape.stWhc.u32Chn *
|
||||
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].unShape.stWhc.u32Height *
|
||||
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Stride);
|
||||
}
|
||||
}
|
||||
|
||||
ret =
|
||||
HI_MPI_SVP_NNIE_ForwardWithBbox(&svp_nnie_handle, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].src_, astBbox,
|
||||
pstNnieParam->model_, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_,
|
||||
&pstNnieParam->forward_with_bbox_ctrl_[pstInputDataIdx->seg_idx_], bInstant);
|
||||
if (HI_SUCCESS != ret) {
|
||||
LOGE("HI_MPI_SVP_NNIE_ForwardWithBbox failed!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (bInstant) {
|
||||
while (HI_ERR_SVP_NNIE_QUERY_TIMEOUT ==
|
||||
(ret = HI_MPI_SVP_NNIE_Query(pstNnieParam->forward_with_bbox_ctrl_[pstInputDataIdx->seg_idx_].enNnieId,
|
||||
svp_nnie_handle, &finish, HI_TRUE))) {
|
||||
usleep(kSleepUs);
|
||||
LOGE("HI_MPI_SVP_NNIE_Query Query timeout!");
|
||||
}
|
||||
}
|
||||
|
||||
total_step_num = 0;
|
||||
|
||||
for (i = 0; i < pstNnieParam->forward_with_bbox_ctrl_[pstInputDataIdx->seg_idx_].u32DstNum; i++) {
|
||||
if (SVP_BLOB_TYPE_SEQ_S32 == pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].enType) {
|
||||
for (j = 0; j < pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Num; j++) {
|
||||
total_step_num +=
|
||||
*(NNIE_CONVERT_64BIT_ADDR(
|
||||
HI_U32, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].unShape.stSeq.u64VirAddrStep) +
|
||||
j);
|
||||
}
|
||||
NnieMemFlushCache(
|
||||
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64PhyAddr,
|
||||
NNIE_CONVERT_64BIT_ADDR(HI_VOID, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64VirAddr),
|
||||
total_step_num * pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Stride);
|
||||
} else {
|
||||
NnieMemFlushCache(
|
||||
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64PhyAddr,
|
||||
NNIE_CONVERT_64BIT_ADDR(HI_VOID, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64VirAddr),
|
||||
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Num *
|
||||
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].unShape.stWhc.u32Chn *
|
||||
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].unShape.stWhc.u32Height *
|
||||
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Stride);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int FillByUnsignedChar(HI_U32 input_size, HI_U32 num, HI_U32 width, HI_U32 stride, HI_U8 *src, HI_U8 *dst) {
|
||||
HI_U32 i, j;
|
||||
if (input_size != num * width) {
|
||||
LOGE("input size error:%d <-> %d.", input_size, num * width);
|
||||
return RET_ERROR;
|
||||
}
|
||||
for (i = 0; i < num; i++) {
|
||||
for (j = 0; j < width; j++) {
|
||||
dst[j] = src[j];
|
||||
}
|
||||
dst += stride;
|
||||
src += width;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int FillByFloat(HI_U32 input_size, HI_U32 num, HI_U32 width, HI_U32 stride, HI_FLOAT *src, HI_S32 *dst, HI_U8 *dst_u8) {
|
||||
HI_U32 i, j;
|
||||
if (input_size != num * width) {
|
||||
LOGE("input size error:%d <-> %d.", input_size, num * width);
|
||||
return RET_ERROR;
|
||||
}
|
||||
for (i = 0; i < num; i++) {
|
||||
for (j = 0; j < width; j++) {
|
||||
dst[j] = (src[j] * NNIE_QUANT_BASE);
|
||||
}
|
||||
dst_u8 += stride;
|
||||
dst = reinterpret_cast<HI_S32 *>(dst_u8);
|
||||
src += width;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
static int NnieFillSrcData(NnieCfg *nnie_cfg, NnieParam *nnie_param, NnieDataIndex *input_data_idx, int64_t *shape,
|
||||
int size) {
|
||||
HI_U32 i, j, n, ret;
|
||||
HI_U32 height, width, channel, stride, dim;
|
||||
HI_U8 *input_addr_u8 = nullptr;
|
||||
HI_S32 *input_addr_s32 = nullptr;
|
||||
HI_U32 *step_addr_u32 = nullptr;
|
||||
HI_FLOAT *float_src_data = nullptr;
|
||||
HI_U8 *u8_src_data = nullptr;
|
||||
HI_U32 total_step_num = 0;
|
||||
HI_U32 input_size = 1;
|
||||
SVP_SRC_BLOB_S *blob = &nnie_param->seg_data_[input_data_idx->seg_idx_].src_[input_data_idx->node_idx_];
|
||||
for (n = 0; n < (HI_U32)size; n++) {
|
||||
input_size *= shape[n];
|
||||
}
|
||||
input_addr_u8 = NNIE_CONVERT_64BIT_ADDR(HI_U8, blob->u64VirAddr);
|
||||
input_addr_s32 = NNIE_CONVERT_64BIT_ADDR(HI_S32, blob->u64VirAddr);
|
||||
float_src_data = reinterpret_cast<float *>(nnie_cfg->data_ptr_);
|
||||
u8_src_data = reinterpret_cast<unsigned char *>(nnie_cfg->data_ptr_);
|
||||
if (SVP_BLOB_TYPE_SEQ_S32 == blob->enType) {
|
||||
step_addr_u32 = NNIE_CONVERT_64BIT_ADDR(HI_U32, blob->unShape.stSeq.u64VirAddrStep);
|
||||
dim = blob->unShape.stSeq.u32Dim;
|
||||
stride = blob->u32Stride;
|
||||
|
||||
for (n = 0; n < blob->u32Num; n++) {
|
||||
total_step_num += *(step_addr_u32 + n);
|
||||
}
|
||||
|
||||
if (input_size != total_step_num * dim) {
|
||||
LOGE("input size error:%d <-> %d.", input_size, total_step_num * dim);
|
||||
return RET_ERROR;
|
||||
}
|
||||
for (n = 0; n < blob->u32Num; n++) {
|
||||
for (i = 0; i < *(step_addr_u32 + n); i++) {
|
||||
for (j = 0; j < dim; j++) {
|
||||
input_addr_s32[j] = (float_src_data[j] * NNIE_QUANT_BASE);
|
||||
}
|
||||
input_addr_u8 += stride;
|
||||
input_addr_s32 = reinterpret_cast<HI_S32 *>(input_addr_u8);
|
||||
float_src_data += dim;
|
||||
}
|
||||
}
|
||||
NnieMemFlushCache(blob->u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, blob->u64VirAddr), total_step_num * stride);
|
||||
} else {
|
||||
height = blob->unShape.stWhc.u32Height;
|
||||
width = blob->unShape.stWhc.u32Width;
|
||||
channel = blob->unShape.stWhc.u32Chn;
|
||||
stride = blob->u32Stride;
|
||||
if (SVP_BLOB_TYPE_YVU420SP == blob->enType) {
|
||||
ret = FillByUnsignedChar(input_size, blob->u32Num * static_cast<HI_U32>(channel * height / 2), width, stride,
|
||||
u8_src_data, input_addr_u8);
|
||||
} else if (SVP_BLOB_TYPE_YVU422SP == blob->enType) {
|
||||
ret = FillByUnsignedChar(input_size, blob->u32Num * height * 2, width, stride, u8_src_data, input_addr_u8);
|
||||
} else {
|
||||
if (SVP_BLOB_TYPE_U8 == blob->enType) {
|
||||
ret =
|
||||
FillByUnsignedChar(input_size, blob->u32Num * channel * height, width, stride, u8_src_data, input_addr_u8);
|
||||
} else {
|
||||
ret = FillByFloat(input_size, blob->u32Num * channel * height, width, stride, float_src_data, input_addr_s32,
|
||||
input_addr_u8);
|
||||
}
|
||||
}
|
||||
if (ret != RET_OK) {
|
||||
return ret;
|
||||
}
|
||||
NnieMemFlushCache(blob->u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, blob->u64VirAddr),
|
||||
blob->u32Num * channel * height * stride);
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
static int NnieGetDstData(NnieCfg *nnie_cfg, NnieParam *nnie_param, NnieDataIndex *input_data_idx, int64_t *shape,
|
||||
int size) {
|
||||
HI_U32 i, j, n;
|
||||
HI_U32 height, width, channel, stride, dim;
|
||||
HI_U8 *output_addr_u8 = nullptr;
|
||||
HI_S32 *output_addr_s32 = nullptr;
|
||||
HI_U32 *step_addr_u32 = nullptr;
|
||||
HI_FLOAT *float_dst_data = nullptr;
|
||||
HI_U32 total_step_num = 0;
|
||||
HI_U32 input_num = 1;
|
||||
SVP_SRC_BLOB_S *blob = &nnie_param->seg_data_[input_data_idx->seg_idx_ - 1].dst_[input_data_idx->node_idx_];
|
||||
for (n = 0; n < (HI_U32)size; n++) {
|
||||
input_num *= shape[n];
|
||||
}
|
||||
|
||||
if (SVP_BLOB_TYPE_U8 <= blob->enType && SVP_BLOB_TYPE_YVU422SP >= blob->enType) {
|
||||
LOGE("Nnie output type error");
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
output_addr_u8 = NNIE_CONVERT_64BIT_ADDR(HI_U8, blob->u64VirAddr);
|
||||
output_addr_s32 = NNIE_CONVERT_64BIT_ADDR(HI_S32, blob->u64VirAddr);
|
||||
float_dst_data = reinterpret_cast<float *>(nnie_cfg->data_ptr_);
|
||||
|
||||
if (SVP_BLOB_TYPE_SEQ_S32 == blob->enType) {
|
||||
dim = blob->unShape.stSeq.u32Dim;
|
||||
stride = blob->u32Stride;
|
||||
step_addr_u32 = NNIE_CONVERT_64BIT_ADDR(HI_U32, blob->unShape.stSeq.u64VirAddrStep);
|
||||
|
||||
for (n = 0; n < blob->u32Num; n++) {
|
||||
total_step_num += *(step_addr_u32 + n);
|
||||
}
|
||||
if (input_num != total_step_num * dim) {
|
||||
LOGE("input shape");
|
||||
return RET_ERROR;
|
||||
}
|
||||
for (n = 0; n < blob->u32Num; n++) {
|
||||
for (i = 0; i < *(step_addr_u32 + n); i++) {
|
||||
for (j = 0; j < dim; j++) {
|
||||
float_dst_data[j] = (HI_FLOAT)output_addr_s32[j] / NNIE_QUANT_BASE;
|
||||
}
|
||||
output_addr_u8 += stride;
|
||||
output_addr_s32 = reinterpret_cast<HI_S32 *>(output_addr_u8);
|
||||
float_dst_data += dim;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
height = blob->unShape.stWhc.u32Height;
|
||||
width = blob->unShape.stWhc.u32Width;
|
||||
channel = blob->unShape.stWhc.u32Chn;
|
||||
stride = blob->u32Stride;
|
||||
if (input_num != height * channel * width * blob->u32Num) {
|
||||
LOGE("output shape diff:%d<->%d.", input_num, height * channel * width * blob->u32Num);
|
||||
return RET_ERROR;
|
||||
}
|
||||
for (n = 0; n < blob->u32Num; n++) {
|
||||
for (i = 0; i < channel * height; i++) {
|
||||
for (j = 0; j < width; j++) {
|
||||
float_dst_data[j] = (HI_FLOAT)output_addr_s32[j] / NNIE_QUANT_BASE;
|
||||
}
|
||||
output_addr_u8 += stride;
|
||||
output_addr_s32 = reinterpret_cast<HI_S32 *>(output_addr_u8);
|
||||
float_dst_data += width;
|
||||
}
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int CheckMsShapeN(NnieRunCfg *nnie_run_cfg, const std::vector<int64_t> &input_shape, const SVP_NNIE_NODE_S &nnie_node) {
|
||||
size_t ms_input_size = 1, i;
|
||||
for (i = 1; i < input_shape.size(); i++) {
|
||||
ms_input_size *= input_shape[i];
|
||||
}
|
||||
|
||||
size_t nnie_input_size;
|
||||
if (SVP_BLOB_TYPE_SEQ_S32 == nnie_node.enType) {
|
||||
if (nnie_run_cfg->cfg_.step_ == 0) {
|
||||
LOGE("request time_step set! Please export NNIE_RUNTIME_CONFIG_PATH");
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (ms_input_size != nnie_node.unShape.u32Dim) {
|
||||
LOGE("The input data does not meet the required size %d <-> %d.", static_cast<int>(ms_input_size),
|
||||
nnie_node.unShape.u32Dim);
|
||||
return RET_ERROR;
|
||||
}
|
||||
if ((input_shape[0] < static_cast<int>(nnie_run_cfg->cfg_.step_)) ||
|
||||
(input_shape[0] % nnie_run_cfg->cfg_.step_ != 0)) {
|
||||
LOGW("The num value(%d) of input must be an integer multiple of time_step(%d)", static_cast<int>(input_shape[0]),
|
||||
nnie_run_cfg->cfg_.step_);
|
||||
return RET_ERROR;
|
||||
}
|
||||
nnie_input_size = nnie_node.unShape.u32Dim * nnie_run_cfg->cfg_.step_;
|
||||
} else {
|
||||
auto height = nnie_node.unShape.stWhc.u32Height;
|
||||
auto width = nnie_node.unShape.stWhc.u32Width;
|
||||
auto channel = nnie_node.unShape.stWhc.u32Chn;
|
||||
if (SVP_BLOB_TYPE_YVU420SP == nnie_node.enType) {
|
||||
nnie_input_size = static_cast<HI_U32>(channel * height / 2) * width;
|
||||
} else if (SVP_BLOB_TYPE_YVU422SP == nnie_node.enType) {
|
||||
nnie_input_size = height * 2 * width;
|
||||
} else {
|
||||
nnie_input_size = channel * height * width;
|
||||
}
|
||||
if (ms_input_size != nnie_input_size) {
|
||||
LOGE("The input data does not meet the required size %d <-> %d.", static_cast<int>(ms_input_size),
|
||||
static_cast<int>(nnie_input_size));
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
nnie_run_cfg->cfg_.max_input_num_ = (ms_input_size * input_shape[0]) / nnie_input_size;
|
||||
fprintf(stdout, "The input num is %d.", nnie_run_cfg->cfg_.max_input_num_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
size_t GetFillIndex(const std::vector<mindspore::MSTensor> &inputs, size_t input_size, const HI_CHAR *name) {
|
||||
size_t j;
|
||||
for (j = 0; j < input_size; j++) {
|
||||
auto input_str = inputs[j].Name();
|
||||
if (input_str.length() > 4) {
|
||||
if (input_str.substr(input_str.length() - 4) == "_pre") {
|
||||
input_str = input_str.substr(0, input_str.length() - 4);
|
||||
} else if (input_str.length() > 5) {
|
||||
if (input_str.substr(input_str.length() - 5) == "_post") {
|
||||
input_str = input_str.substr(0, input_str.length() - 5);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (strcmp(input_str.c_str(), name) == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (j == input_size) {
|
||||
for (j = 0; j < input_size; j++) {
|
||||
auto input_str = inputs[j].Name();
|
||||
if (input_str.length() > 4) {
|
||||
if (input_str.substr(input_str.length() - 4) == "_pre") {
|
||||
input_str = input_str.substr(0, input_str.length() - 4);
|
||||
} else if (input_str.length() > 5) {
|
||||
if (input_str.substr(input_str.length() - 5) == "_post") {
|
||||
input_str = input_str.substr(0, input_str.length() - 5);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (strncmp(input_str.c_str(), name, input_str.length()) == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return j;
|
||||
}
|
||||
|
||||
int NnieCommCreate(NnieRunCfg *nnie_run_cfg, char *model_buf, int size,
|
||||
const std::vector<mindspore::MSTensor> &inputs) {
|
||||
HI_U8 *vir_addr = nullptr;
|
||||
HI_U32 seg_num;
|
||||
HI_U32 off_set;
|
||||
HI_U32 total_size;
|
||||
HI_U32 i, j;
|
||||
HI_S32 ret = HI_SUCCESS;
|
||||
NnieModel *model = &nnie_run_cfg->model_;
|
||||
NnieParam *param = &nnie_run_cfg->param_;
|
||||
NnieCfg *cfg = &nnie_run_cfg->cfg_;
|
||||
HI_U32 step = cfg->step_; // time step
|
||||
|
||||
ret = NnieLoadModel(model_buf, size, model);
|
||||
if (ret != RET_OK) {
|
||||
LOGE("NnieLoadModel failed!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (inputs.size() <= 1) {
|
||||
LOGE("inputs size need greater than 1!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (inputs[0].Shape().size() <= 1) {
|
||||
LOGE("input shape size need greater than 1!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
j = GetFillIndex(inputs, inputs.size() - 1, model->model_.astSeg[0].astSrcNode[0].szName);
|
||||
if (j == (inputs.size() - 1)) {
|
||||
j = 0;
|
||||
// LOGW("input tensor name(%s) can't match wk node name(%s).", inputs[0].Name().c_str(),
|
||||
// model->model_.astSeg[0].astSrcNode[0].szName);
|
||||
}
|
||||
if (CheckMsShapeN(nnie_run_cfg, inputs[j].Shape(), model->model_.astSeg[0].astSrcNode[0]) != RET_OK) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
bool has_roi = false;
|
||||
for (i = 0; i < model->model_.u32NetSegNum; i++) {
|
||||
if (SVP_NNIE_NET_TYPE_ROI == model->model_.astSeg[i].enNetType) {
|
||||
has_roi = true;
|
||||
}
|
||||
}
|
||||
if (has_roi) {
|
||||
if (cfg->max_roi_num_ == 0) {
|
||||
LOGE("NNIE_RUNTIME_CONFIG_PATH: max_roi_num(0) should greater than 0!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else {
|
||||
if (cfg->max_roi_num_ != 0) {
|
||||
LOGW("NNIE_RUNTIME_CONFIG_PATH: max_roi_num should euqal to 0!");
|
||||
cfg->max_roi_num_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (model->model_.astSeg[0].enNetType == SVP_NNIE_NET_TYPE_RECURRENT) {
|
||||
if (step == 0) {
|
||||
LOGE("request time_step set! No NNIE_RUNTIME_CONFIG_PATH, please export NNIE_RUNTIME_CONFIG_PATH");
|
||||
return RET_ERROR;
|
||||
}
|
||||
seg_num = model->model_.u32NetSegNum;
|
||||
total_size = cfg->max_input_num_ * sizeof(HI_S32) * seg_num * 2;
|
||||
ret = NnieMemMalloc(std::string("SVP_NNIE_STEP").data(), nullptr,
|
||||
reinterpret_cast<HI_U64 *>(¶m->step_buf_.u64PhyAddr), reinterpret_cast<void **>(&vir_addr),
|
||||
total_size);
|
||||
if (HI_SUCCESS != ret) {
|
||||
LOGE("Malloc memory failed:");
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
param->step_buf_.u64VirAddr = (HI_U64)((HI_UL)vir_addr);
|
||||
for (i = 0; i < seg_num * NNIE_EACH_SEG_STEP_ADDR_NUM; i++) {
|
||||
cfg->step_vir_addr_[i] = param->step_buf_.u64VirAddr + i * cfg->max_input_num_ * sizeof(HI_S32);
|
||||
}
|
||||
|
||||
for (i = 0; i < seg_num; i++) {
|
||||
off_set = i * NNIE_EACH_SEG_STEP_ADDR_NUM;
|
||||
for (j = 0; j < cfg->max_input_num_; j++) {
|
||||
*(reinterpret_cast<HI_U32 *>(static_cast<HI_UL>(cfg->step_vir_addr_[off_set])) + j) =
|
||||
step; // step of input x_t
|
||||
*(reinterpret_cast<HI_U32 *>(static_cast<HI_UL>(cfg->step_vir_addr_[off_set + 1])) + j) =
|
||||
step; // step of output h_t
|
||||
}
|
||||
}
|
||||
}
|
||||
param->model_ = &(model->model_);
|
||||
ret = NnieParamInit(cfg, param);
|
||||
if (ret != RET_OK) {
|
||||
LOGE("NnieParamInit failed!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
nnie_run_cfg->run_idx_.seg_idx_ = 0;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
void NnieCommDelete(NnieParam *pstNnieParamm, NnieModel *nnie_model) {
|
||||
NnieParamRelease(pstNnieParamm);
|
||||
NnieUnloadModel(nnie_model);
|
||||
}
|
||||
|
||||
int NnieCommGetOutputData(NnieRunCfg *nnie_run_cfg, float *data, int64_t *shape, int size, int tensor_index) {
|
||||
if (nnie_run_cfg->run_idx_.seg_idx_ <= 0) {
|
||||
LOGE("output seg index error.");
|
||||
return RET_ERROR;
|
||||
}
|
||||
HI_U32 ret = 0;
|
||||
int id = tensor_index;
|
||||
|
||||
nnie_run_cfg->run_idx_.node_idx_ = id;
|
||||
nnie_run_cfg->cfg_.data_ptr_ = data;
|
||||
ret = NnieGetDstData(&nnie_run_cfg->cfg_, &nnie_run_cfg->param_, &nnie_run_cfg->run_idx_, shape, size);
|
||||
if (ret != RET_OK) {
|
||||
LOGE("NnieGetDstData failed!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NnieCommFillData(NnieRunCfg *nnie_run_cfg, void *data, mindspore::DataType dtype, int64_t *shape, int size,
|
||||
int tensor_index) {
|
||||
HI_U32 ret = 0;
|
||||
int id = tensor_index;
|
||||
HI_U32 seg_idx = nnie_run_cfg->run_idx_.seg_idx_;
|
||||
|
||||
if (id >= nnie_run_cfg->param_.model_->astSeg[seg_idx].u16SrcNum) {
|
||||
LOGE("Nnie input node index error!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
SVP_BLOB_TYPE_E src_type = nnie_run_cfg->param_.seg_data_[seg_idx].src_[id].enType;
|
||||
if (SVP_BLOB_TYPE_U8 <= src_type && src_type <= SVP_BLOB_TYPE_YVU422SP) {
|
||||
if (!(dtype == DataType::kNumberTypeUInt8 || dtype == DataType::kNumberTypeInt8)) {
|
||||
LOGE("Nnie input node type error!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else {
|
||||
if (dtype != DataType::kNumberTypeFloat32) {
|
||||
LOGE("Nnie input node type error!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
nnie_run_cfg->run_idx_.node_idx_ = id;
|
||||
nnie_run_cfg->cfg_.data_ptr_ = data;
|
||||
ret = NnieFillSrcData(&nnie_run_cfg->cfg_, &nnie_run_cfg->param_, &nnie_run_cfg->run_idx_, shape, size);
|
||||
if (ret != RET_OK) {
|
||||
LOGE("NnieFillSrcData failed!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NnieCommRun(NnieRunCfg *nnie_run_cfg, bool run_box) {
|
||||
HI_U32 segidx = nnie_run_cfg->run_idx_.seg_idx_;
|
||||
HI_U32 ret = 0;
|
||||
|
||||
if (segidx >= nnie_run_cfg->param_.model_->u32NetSegNum) {
|
||||
LOGE("seg num err!\n");
|
||||
return RET_ERROR;
|
||||
}
|
||||
// NniePrintReportResultInputSeg(&nnie_run_cfg->param_, segidx);
|
||||
nnie_run_cfg->run_idx_.node_idx_ = 0;
|
||||
if (run_box) {
|
||||
ret =
|
||||
NNIE_ForwardWithBbox(&nnie_run_cfg->param_, &nnie_run_cfg->run_idx_, &nnie_run_cfg->param_.rpn_bbox_, HI_TRUE);
|
||||
if (HI_SUCCESS != ret) {
|
||||
LOGE("NnieForward failed!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else {
|
||||
ret = NnieForward(&nnie_run_cfg->param_, &nnie_run_cfg->run_idx_, HI_TRUE);
|
||||
if (HI_SUCCESS != ret) {
|
||||
LOGE("NnieForward failed!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
nnie_run_cfg->run_idx_.seg_idx_ = ++segidx;
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace nnie
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,115 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_COMMON_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_COMMON_H_
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "include/api/types.h"
|
||||
#include "include/mpi_vb.h"
|
||||
#include "include/hi_comm_svp.h"
|
||||
#include "include/hi_nnie.h"
|
||||
#include "include/mpi_nnie.h"
|
||||
#include "include/ir/dtype/type_id.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace nnie {
|
||||
#define NNIE_ALIGN_16 16
|
||||
#define NNIE_ALIGN16(u32Num) ((u32Num + NNIE_ALIGN_16 - 1) / NNIE_ALIGN_16 * NNIE_ALIGN_16)
|
||||
|
||||
#define NNIE_ALIGN_32 32
|
||||
#define NNIE_ALIGN32(u32Num) ((u32Num + NNIE_ALIGN_32 - 1) / NNIE_ALIGN_32 * NNIE_ALIGN_32)
|
||||
|
||||
#define NNIE_CONVERT_64BIT_ADDR(Type, Addr) reinterpret_cast<Type *>((HI_UL)(Addr))
|
||||
#define NNIE_QUANT_BASE 4096
|
||||
|
||||
#define NNIE_COORDI_NUM 4
|
||||
#define NNIE_EACH_SEG_STEP_ADDR_NUM 2
|
||||
#define NNIE_REPORT_NAME_LENGTH 64
|
||||
|
||||
typedef struct {
|
||||
SVP_NNIE_MODEL_S model_;
|
||||
SVP_MEM_INFO_S model_buf_; // store Model file
|
||||
} NnieModel;
|
||||
typedef struct {
|
||||
SVP_SRC_BLOB_S src_[SVP_NNIE_MAX_INPUT_NUM];
|
||||
SVP_DST_BLOB_S dst_[SVP_NNIE_MAX_OUTPUT_NUM];
|
||||
} NnieSegData;
|
||||
|
||||
typedef struct {
|
||||
bool src_node_[SVP_NNIE_MAX_INPUT_NUM];
|
||||
bool dst_node_[SVP_NNIE_MAX_OUTPUT_NUM];
|
||||
} NNIEMemSegInfo;
|
||||
|
||||
typedef struct {
|
||||
NNIEMemSegInfo seg_[SVP_NNIE_MAX_NET_SEG_NUM];
|
||||
} NNIEMemCfg;
|
||||
|
||||
typedef struct {
|
||||
SVP_NNIE_MODEL_S *model_;
|
||||
HI_U32 task_buf_size_[SVP_NNIE_MAX_NET_SEG_NUM];
|
||||
SVP_MEM_INFO_S task_buf_;
|
||||
SVP_MEM_INFO_S tmp_buf_;
|
||||
SVP_MEM_INFO_S step_buf_; // store Lstm step info
|
||||
SVP_SRC_BLOB_S rpn_bbox_;
|
||||
NnieSegData seg_data_[SVP_NNIE_MAX_NET_SEG_NUM]; // each seg's input and output blob
|
||||
SVP_NNIE_FORWARD_CTRL_S forward_ctrl_[SVP_NNIE_MAX_NET_SEG_NUM];
|
||||
SVP_NNIE_FORWARD_WITHBBOX_CTRL_S forward_with_bbox_ctrl_[SVP_NNIE_MAX_NET_SEG_NUM];
|
||||
NNIEMemCfg mem_cfg_;
|
||||
} NnieParam;
|
||||
|
||||
typedef struct {
|
||||
HI_VOID *data_ptr_;
|
||||
HI_U32 max_input_num_;
|
||||
HI_U32 max_roi_num_;
|
||||
HI_U32 step_;
|
||||
HI_U64 step_vir_addr_[NNIE_EACH_SEG_STEP_ADDR_NUM *
|
||||
SVP_NNIE_MAX_NET_SEG_NUM]; // virtual addr of LSTM's or RNN's step buffer
|
||||
SVP_NNIE_ID_E nnie_core_id_[SVP_NNIE_MAX_NET_SEG_NUM];
|
||||
} NnieCfg;
|
||||
|
||||
typedef struct {
|
||||
HI_U32 seg_idx_;
|
||||
HI_U32 node_idx_;
|
||||
} NnieDataIndex;
|
||||
|
||||
typedef struct {
|
||||
HI_U32 src_size_[SVP_NNIE_MAX_INPUT_NUM];
|
||||
HI_U32 dst_size_[SVP_NNIE_MAX_OUTPUT_NUM];
|
||||
} NnieBlobSize;
|
||||
|
||||
typedef struct {
|
||||
NnieModel model_;
|
||||
NnieParam param_;
|
||||
NnieCfg cfg_;
|
||||
NnieDataIndex run_idx_;
|
||||
} NnieRunCfg;
|
||||
|
||||
int NnieCommCreate(NnieRunCfg *nnie_run_cfg, char *model_buf, int size, const std::vector<mindspore::MSTensor> &inputs);
|
||||
|
||||
size_t GetFillIndex(const std::vector<mindspore::MSTensor> &inputs, size_t input_size, const HI_CHAR *name);
|
||||
|
||||
void NnieCommDelete(NnieParam *pstNnieParamm, NnieModel *nnie_model);
|
||||
|
||||
int NnieCommRun(NnieRunCfg *nnie_run_cfg, bool run_box);
|
||||
|
||||
int NnieCommFillData(NnieRunCfg *nnie_run_cfg, void *data, mindspore::DataType dtype, int64_t *shape, int size, int id);
|
||||
|
||||
int NnieCommGetOutputData(NnieRunCfg *nnie_run_cfg, float *data, int64_t *shape, int size, int tensor_index);
|
||||
} // namespace nnie
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_COMMON_H_
|
|
@ -0,0 +1,222 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include <cstring>
|
||||
#include "src/nnie_manager.h"
|
||||
#include "src/nnie_common.h"
|
||||
#include "src/nnie_print.h"
|
||||
#include "src/nnie_memory.h"
|
||||
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
constexpr int kNumInput2 = 2;
|
||||
|
||||
namespace mindspore {
|
||||
namespace nnie {
|
||||
int NNIEManager::CfgInit(int max_roi_num, int step, const std::vector<int> &core_id) {
|
||||
memset(&nnie_cfg_, 0, sizeof(NnieRunCfg));
|
||||
|
||||
nnie_cfg_.cfg_.max_roi_num_ = max_roi_num;
|
||||
|
||||
nnie_cfg_.cfg_.step_ = step;
|
||||
for (size_t i = 0; i < SVP_NNIE_MAX_NET_SEG_NUM && i < core_id.size(); i++) {
|
||||
if (core_id[i] < SVP_NNIE_ID_BUTT) {
|
||||
nnie_cfg_.cfg_.nnie_core_id_[i] = (SVP_NNIE_ID_E)core_id[i];
|
||||
} else {
|
||||
LOGE("nnie core num toobig.\n");
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
void NNIEManager::SetInputNum(int max_input_num) { nnie_cfg_.cfg_.max_input_num_ = max_input_num; }
|
||||
|
||||
int NNIEManager::Init(char *model_buf, int size, const std::vector<mindspore::MSTensor> &inputs) {
|
||||
if (NnieCommCreate(&nnie_cfg_, model_buf, size, inputs) != RET_OK) {
|
||||
NnieCommDelete(&nnie_cfg_.param_, &nnie_cfg_.model_);
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NNIEManager::Run(std::vector<mindspore::MSTensor> *outputs, unsigned int seg_id,
|
||||
const std::vector<std::vector<int64_t>> &outputs_shape) {
|
||||
bool run_box = false;
|
||||
nnie_cfg_.run_idx_.seg_idx_ = seg_id;
|
||||
if (nnie_cfg_.param_.model_->astSeg[seg_id].enNetType == SVP_NNIE_NET_TYPE_ROI) {
|
||||
run_box = true;
|
||||
}
|
||||
|
||||
if (NnieCommRun(&nnie_cfg_, run_box)) {
|
||||
LOGE("Nnie Run Fail!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (GetOutputData(outputs, outputs_shape, run_box)) {
|
||||
LOGE("Get Output Data Fail!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
void NNIEManager::Release() {
|
||||
// NniePrintReportResult(&nnie_cfg_.param_);
|
||||
NnieCommDelete(&nnie_cfg_.param_, &nnie_cfg_.model_);
|
||||
}
|
||||
|
||||
int NNIEManager::GetOutputData(std::vector<mindspore::MSTensor> *outputs,
|
||||
const std::vector<std::vector<int64_t>> &outputs_shape, bool run_box) {
|
||||
int i, j, output_size = outputs->size();
|
||||
if (output_size != nnie_cfg_.param_.model_->astSeg[nnie_cfg_.run_idx_.seg_idx_ - 1].u16DstNum) {
|
||||
LOGE("seg%d: %d output tensors are required, but there are %d outputs.", nnie_cfg_.run_idx_.seg_idx_ - 1,
|
||||
nnie_cfg_.param_.model_->astSeg[nnie_cfg_.run_idx_.seg_idx_ - 1].u16DstNum, output_size);
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (run_box) {
|
||||
for (i = 0; i < output_size; i++) {
|
||||
auto input_data_type = (*outputs)[i].DataType();
|
||||
if (input_data_type == DataType::kNumberTypeFloat32) {
|
||||
auto ptr_shape = outputs_shape[i];
|
||||
int max_roi_num = nnie_cfg_.param_.seg_data_[nnie_cfg_.run_idx_.seg_idx_ - 1].dst_[0].u32Num;
|
||||
ptr_shape.insert(ptr_shape.begin(), max_roi_num);
|
||||
(*outputs)[i].SetShape(ptr_shape);
|
||||
} else {
|
||||
LOGE("Unsupported DataType!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
}
|
||||
HI_U32 seg_idx = nnie_cfg_.run_idx_.seg_idx_ - 1;
|
||||
for (i = 0; i < nnie_cfg_.param_.model_->astSeg[seg_idx].u16DstNum; i++) {
|
||||
if (nnie_cfg_.param_.mem_cfg_.seg_[seg_idx].dst_node_[i]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
j = GetFillIndex(*outputs, output_size, nnie_cfg_.param_.model_->astSeg[seg_idx].astDstNode[i].szName);
|
||||
if (j == output_size) {
|
||||
j = i;
|
||||
// LOGW("output tensor name(%s) can't match wk node name(%s).", (*outputs)[j].Name().c_str(),
|
||||
// nnie_cfg_.param_.model_->astSeg[seg_idx].astDstNode[i].szName);
|
||||
}
|
||||
|
||||
auto input_data_type = (*outputs)[j].DataType();
|
||||
if (input_data_type == DataType::kNumberTypeFloat32) {
|
||||
auto ptr_shape = (*outputs)[j].Shape();
|
||||
auto ptr = reinterpret_cast<float *>((*outputs)[j].MutableData());
|
||||
if (NnieCommGetOutputData(&nnie_cfg_, ptr, ptr_shape.data(), ptr_shape.size(), i) != RET_OK) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else {
|
||||
LOGE("Unsupported DataType!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NNIEManager::FillRoiPooling(mindspore::MSTensor *input) {
|
||||
auto roi_shape = input->Shape();
|
||||
if (roi_shape[1] != NNIE_COORDI_NUM) {
|
||||
LOGE("Roi shape err!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (roi_shape[0] > static_cast<int64_t>(nnie_cfg_.cfg_.max_roi_num_)) {
|
||||
LOGE("NNIE_RUNTIME_CONFIG_PATH: The maximum [max_roi_num] value set is less than the actual value: %d < %d.",
|
||||
nnie_cfg_.cfg_.max_roi_num_, static_cast<int>(roi_shape[0]));
|
||||
return RET_ERROR;
|
||||
}
|
||||
nnie_cfg_.param_.rpn_bbox_.unShape.stWhc.u32Height = roi_shape[0];
|
||||
HI_U32 dst_stride = nnie_cfg_.param_.rpn_bbox_.u32Stride;
|
||||
auto proposal_result = NNIE_CONVERT_64BIT_ADDR(HI_S32, nnie_cfg_.param_.rpn_bbox_.u64VirAddr);
|
||||
auto float_src_data = reinterpret_cast<float *>(input->MutableData());
|
||||
|
||||
for (size_t j = 0; j < nnie_cfg_.param_.rpn_bbox_.unShape.stWhc.u32Height; j++) {
|
||||
proposal_result[dst_stride / sizeof(HI_U32) * j] = *(float_src_data++) * NNIE_QUANT_BASE;
|
||||
proposal_result[dst_stride / sizeof(HI_U32) * j + 1] = *(float_src_data++) * NNIE_QUANT_BASE;
|
||||
proposal_result[dst_stride / sizeof(HI_U32) * j + 2] = *(float_src_data++) * NNIE_QUANT_BASE;
|
||||
proposal_result[dst_stride / sizeof(HI_U32) * j + 3] = *(float_src_data++) * NNIE_QUANT_BASE;
|
||||
}
|
||||
NnieMemFlushCache(nnie_cfg_.param_.rpn_bbox_.u64PhyAddr,
|
||||
NNIE_CONVERT_64BIT_ADDR(HI_VOID, nnie_cfg_.param_.rpn_bbox_.u64VirAddr),
|
||||
dst_stride * nnie_cfg_.param_.rpn_bbox_.unShape.stWhc.u32Height);
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int NNIEManager::FillData(std::vector<mindspore::MSTensor> *inputs, unsigned int seg_id) {
|
||||
bool run_box = false;
|
||||
size_t i, j;
|
||||
size_t input_size = inputs->size();
|
||||
if (seg_id >= nnie_cfg_.param_.model_->u32NetSegNum) {
|
||||
LOGE("seg num err!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
nnie_cfg_.run_idx_.seg_idx_ = seg_id;
|
||||
|
||||
if (nnie_cfg_.param_.model_->astSeg[nnie_cfg_.run_idx_.seg_idx_].enNetType == SVP_NNIE_NET_TYPE_ROI) {
|
||||
run_box = true;
|
||||
for (i = 0; i < (input_size - 1); i++) {
|
||||
if ((*inputs)[i].Name() == "proposal") {
|
||||
FillRoiPooling(&(*inputs)[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == (input_size - 1)) {
|
||||
LOGE("Can't find proposal out!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else if ((input_size < kNumInput2) ||
|
||||
(input_size - 1) != nnie_cfg_.param_.model_->astSeg[nnie_cfg_.run_idx_.seg_idx_].u16SrcNum) {
|
||||
LOGE("Input Size Err!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
for (i = 0; i < nnie_cfg_.param_.model_->astSeg[seg_id].u16SrcNum; i++) {
|
||||
if (nnie_cfg_.param_.mem_cfg_.seg_[seg_id].src_node_[i]) {
|
||||
continue;
|
||||
}
|
||||
j = GetFillIndex(*inputs, input_size - 1, nnie_cfg_.param_.model_->astSeg[seg_id].astSrcNode[i].szName);
|
||||
if (j == (input_size - 1)) {
|
||||
if (run_box && (*inputs)[i].Name() == "proposal") {
|
||||
continue;
|
||||
} else {
|
||||
j = i;
|
||||
// LOGW("input tensor name(%s) can't match wk node name(%s).", (*inputs)[i].Name().c_str(),
|
||||
// nnie_cfg_.param_.model_->astSeg[seg_id].astSrcNode[i].szName);
|
||||
}
|
||||
}
|
||||
|
||||
auto input_data_type = (*inputs)[j].DataType();
|
||||
if ((input_data_type == DataType::kNumberTypeFloat32) || (input_data_type == DataType::kNumberTypeUInt8) ||
|
||||
(input_data_type == DataType::kNumberTypeInt8)) {
|
||||
auto ptr_shape = (*inputs)[j].Shape();
|
||||
if (NnieCommFillData(&nnie_cfg_, (*inputs)[j].MutableData(), input_data_type, ptr_shape.data(), ptr_shape.size(),
|
||||
i) != RET_OK) {
|
||||
LOGE("FillData failed!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else {
|
||||
LOGE("Unsupported DataType!");
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace nnie
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,62 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MANAGER_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MANAGER_H_
|
||||
#include <vector>
|
||||
#include "include/errorcode.h"
|
||||
#include "include/api/types.h"
|
||||
#include "src/nnie_common.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace nnie {
|
||||
class NNIEManager {
|
||||
public:
|
||||
static NNIEManager *GetInstance() {
|
||||
static NNIEManager manager;
|
||||
return &manager;
|
||||
}
|
||||
|
||||
NNIEManager() {}
|
||||
|
||||
~NNIEManager() {}
|
||||
|
||||
int Init(char *model_buf, int size, const std::vector<mindspore::MSTensor> &inputs);
|
||||
|
||||
int CfgInit(int max_roi_num, int step, const std::vector<int> &core_id);
|
||||
|
||||
void SetInputNum(int max_input_num);
|
||||
|
||||
int FillData(std::vector<mindspore::MSTensor> *inputs, unsigned int seg_id);
|
||||
|
||||
int Run(std::vector<mindspore::MSTensor> *outputs, unsigned int seg_id,
|
||||
const std::vector<std::vector<int64_t>> &outputs_shape);
|
||||
|
||||
void Release();
|
||||
|
||||
private:
|
||||
int GetOutputData(std::vector<mindspore::MSTensor> *outputs, const std::vector<std::vector<int64_t>> &outputs_shape,
|
||||
bool run_box = false);
|
||||
int FillRoiPooling(mindspore::MSTensor *input);
|
||||
char *wk_model_ = nullptr;
|
||||
|
||||
int model_size_ = 0;
|
||||
|
||||
NnieRunCfg nnie_cfg_;
|
||||
};
|
||||
} // namespace nnie
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MANAGER_H_
|
|
@ -0,0 +1,35 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/nnie_memory.h"
|
||||
#include "include/hi_common.h"
|
||||
#include "include/mpi_sys.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace nnie {
|
||||
HI_S32 NnieMemMalloc(const HI_CHAR *mmb, HI_CHAR *zone, HI_U64 *pu_phy_addr, HI_VOID **ppv_vir_addr, HI_U32 size) {
|
||||
return HI_MPI_SYS_MmzAlloc(pu_phy_addr, ppv_vir_addr, mmb, zone, size);
|
||||
}
|
||||
|
||||
HI_S32 NnieMemMallocCached(const HI_CHAR *mmb, HI_CHAR *zone, HI_U64 *pu_phy_addr, HI_VOID **ppv_vir_addr,
|
||||
HI_U32 size) {
|
||||
return HI_MPI_SYS_MmzAlloc_Cached(pu_phy_addr, ppv_vir_addr, mmb, zone, size);
|
||||
}
|
||||
|
||||
HI_S32 NnieMemFlushCache(HI_U64 phy_addr, HI_VOID *pv_vir_addr, HI_U32 size) {
|
||||
return HI_MPI_SYS_MmzFlushCache(phy_addr, pv_vir_addr, size);
|
||||
}
|
||||
} // namespace nnie
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,48 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MEMORY_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MEMORY_H_
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include "include/hi_common.h"
|
||||
#include "include/hi_debug.h"
|
||||
#include "include/hi_comm_svp.h"
|
||||
#include "include/hi_nnie.h"
|
||||
#include "include/mpi_nnie.h"
|
||||
#include "include/mpi_sys.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace nnie {
|
||||
#define NNIE_MEM_FREE(phy, vir) \
|
||||
do { \
|
||||
if ((0 != (phy)) && (0 != (vir))) { \
|
||||
HI_MPI_SYS_MmzFree((phy), reinterpret_cast<void *>(static_cast<HI_UL>(vir))); \
|
||||
(phy) = 0; \
|
||||
(vir) = 0; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
HI_S32 NnieMemMalloc(const HI_CHAR *mmb, HI_CHAR *zone, HI_U64 *pu_phy_addr, HI_VOID **ppv_vir_addr, HI_U32 size);
|
||||
|
||||
HI_S32 NnieMemMallocCached(const HI_CHAR *mmb, HI_CHAR *zone, HI_U64 *pu_phy_addr, HI_VOID **ppv_vir_addr, HI_U32 size);
|
||||
|
||||
HI_S32 NnieMemFlushCache(HI_U64 phy_addr, HI_VOID *pv_vir_addr, HI_U32 size);
|
||||
} // namespace nnie
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MEMORY_H_
|
|
@ -0,0 +1,176 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/nnie_print.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace nnie {
|
||||
HI_S32 NniePrintReportResult(NnieParam *pst_nnie_param) {
|
||||
HI_U32 u32seg_num = pst_nnie_param->model_->u32NetSegNum;
|
||||
HI_U32 i, j, k, n;
|
||||
HI_U32 seg_idx_, node_idx_;
|
||||
HI_S32 ret;
|
||||
HI_CHAR acReportFileName[NNIE_REPORT_NAME_LENGTH] = {'\0'};
|
||||
FILE *fp = nullptr;
|
||||
HI_U32 *pu32StepAddr = nullptr;
|
||||
HI_S32 *ps32ResultAddr = nullptr;
|
||||
HI_U32 u32Height, u32Width, u32Chn, u32Stride, u32Dim;
|
||||
|
||||
for (seg_idx_ = 0; seg_idx_ < u32seg_num; seg_idx_++) {
|
||||
for (node_idx_ = 0; node_idx_ < pst_nnie_param->model_->astSeg[seg_idx_].u16DstNum; node_idx_++) {
|
||||
ret = snprintf(acReportFileName, NNIE_REPORT_NAME_LENGTH, "./ms/fseg%d(%d,%d)_%s.txt", seg_idx_, node_idx_,
|
||||
pst_nnie_param->model_->astSeg[seg_idx_].astDstNode[node_idx_].u32NodeId,
|
||||
pst_nnie_param->model_->astSeg[seg_idx_].astDstNode[node_idx_].szName);
|
||||
if (ret < 0) {
|
||||
LOGE("Error,create file name failed!");
|
||||
return HI_FAILURE;
|
||||
}
|
||||
|
||||
fp = fopen(acReportFileName, "w");
|
||||
if (fp == nullptr) {
|
||||
LOGE("Error,open file failed!");
|
||||
return HI_FAILURE;
|
||||
}
|
||||
|
||||
if (SVP_BLOB_TYPE_SEQ_S32 == pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].enType) {
|
||||
u32Dim = pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].unShape.stSeq.u32Dim;
|
||||
u32Stride = pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].u32Stride;
|
||||
pu32StepAddr = NNIE_CONVERT_64BIT_ADDR(
|
||||
HI_U32, pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].unShape.stSeq.u64VirAddrStep);
|
||||
ps32ResultAddr =
|
||||
NNIE_CONVERT_64BIT_ADDR(HI_S32, pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].u64VirAddr);
|
||||
|
||||
for (n = 0; n < pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].u32Num; n++) {
|
||||
for (i = 0; i < *(pu32StepAddr + n); i++) {
|
||||
for (j = 0; j < u32Dim; j++) {
|
||||
fprintf(fp, "%f ", static_cast<float>(*(ps32ResultAddr + j)) / NNIE_QUANT_BASE);
|
||||
}
|
||||
ps32ResultAddr += u32Stride / sizeof(HI_U32);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
u32Height = pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].unShape.stWhc.u32Height;
|
||||
u32Width = pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].unShape.stWhc.u32Width;
|
||||
u32Chn = pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].unShape.stWhc.u32Chn;
|
||||
u32Stride = pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].u32Stride;
|
||||
ps32ResultAddr =
|
||||
NNIE_CONVERT_64BIT_ADDR(HI_S32, pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].u64VirAddr);
|
||||
fprintf(fp, "%s 4 1 %d %d %d\n", pst_nnie_param->model_->astSeg[seg_idx_].astDstNode[node_idx_].szName,
|
||||
u32Height, u32Width, u32Chn);
|
||||
for (n = 0; n < pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].u32Num; n++) {
|
||||
for (i = 0; i < u32Chn; i++) {
|
||||
for (j = 0; j < u32Height; j++) {
|
||||
for (k = 0; k < u32Width; k++) {
|
||||
ret = fprintf(fp, "%f ", static_cast<float>(*(ps32ResultAddr + k)) / NNIE_QUANT_BASE);
|
||||
if (ret < 0) {
|
||||
fclose(fp);
|
||||
return HI_FAILURE;
|
||||
}
|
||||
}
|
||||
ps32ResultAddr += u32Stride / sizeof(HI_U32);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
}
|
||||
return HI_SUCCESS;
|
||||
}
|
||||
|
||||
HI_S32 NniePrintReportResultInputSeg(NnieParam *pst_nnie_param, int segnum) {
|
||||
HI_U32 i, j, k, n;
|
||||
HI_U32 seg_idx_ = segnum, node_idx_;
|
||||
HI_S32 ret;
|
||||
HI_CHAR acReportFileName[NNIE_REPORT_NAME_LENGTH] = {'\0'};
|
||||
FILE *fp = nullptr;
|
||||
HI_U32 *pu32StepAddr = nullptr;
|
||||
HI_S32 *ps32ResultAddr = nullptr;
|
||||
HI_U8 *pu8ResultAddr = nullptr;
|
||||
HI_U32 u32Height, u32Width, u32Chn, u32Stride, u32Dim;
|
||||
|
||||
for (node_idx_ = 0; node_idx_ < pst_nnie_param->model_->astSeg[seg_idx_].u16SrcNum; node_idx_++) {
|
||||
ret = snprintf(acReportFileName, NNIE_REPORT_NAME_LENGTH, "seg%d_layer%d_input(%s)_inst.linear.hex", seg_idx_,
|
||||
pst_nnie_param->model_->astSeg[seg_idx_].astSrcNode[node_idx_].u32NodeId,
|
||||
pst_nnie_param->model_->astSeg[seg_idx_].astSrcNode[node_idx_].szName);
|
||||
if (ret < 0) {
|
||||
LOGE("Error,create file name failed!\n");
|
||||
return HI_FAILURE;
|
||||
}
|
||||
|
||||
fp = fopen(acReportFileName, "w");
|
||||
if (fp == nullptr) {
|
||||
LOGE("Error,open file failed!");
|
||||
return HI_FAILURE;
|
||||
}
|
||||
|
||||
if (SVP_BLOB_TYPE_SEQ_S32 == pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].enType) {
|
||||
u32Dim = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stSeq.u32Dim;
|
||||
u32Stride = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u32Stride;
|
||||
pu32StepAddr = NNIE_CONVERT_64BIT_ADDR(
|
||||
HI_U32, pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stSeq.u64VirAddrStep);
|
||||
ps32ResultAddr = NNIE_CONVERT_64BIT_ADDR(HI_S32, pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u64VirAddr);
|
||||
|
||||
for (n = 0; n < pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u32Num; n++) {
|
||||
for (i = 0; i < *(pu32StepAddr + n); i++) {
|
||||
for (j = 0; j < u32Dim; j++) {
|
||||
fprintf(fp, "%d ", *(ps32ResultAddr + j));
|
||||
}
|
||||
ps32ResultAddr += u32Stride / sizeof(HI_U32);
|
||||
}
|
||||
}
|
||||
} else if (pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].enType == SVP_BLOB_TYPE_U8) {
|
||||
u32Height = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stWhc.u32Height;
|
||||
u32Width = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stWhc.u32Width;
|
||||
u32Chn = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stWhc.u32Chn;
|
||||
u32Stride = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u32Stride;
|
||||
pu8ResultAddr = NNIE_CONVERT_64BIT_ADDR(HI_U8, pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u64VirAddr);
|
||||
for (n = 0; n < pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u32Num; n++) {
|
||||
for (i = 0; i < u32Chn; i++) {
|
||||
for (j = 0; j < u32Height; j++) {
|
||||
for (k = 0; k < u32Width; k++) {
|
||||
fprintf(fp, "%d ", *(pu8ResultAddr + k));
|
||||
}
|
||||
pu8ResultAddr += u32Stride / sizeof(HI_U8);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
u32Height = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stWhc.u32Height;
|
||||
u32Width = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stWhc.u32Width;
|
||||
u32Chn = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stWhc.u32Chn;
|
||||
u32Stride = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u32Stride;
|
||||
ps32ResultAddr = NNIE_CONVERT_64BIT_ADDR(HI_S32, pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u64VirAddr);
|
||||
fprintf(fp, "%s 4 1 %d %d %d\n", pst_nnie_param->model_->astSeg[seg_idx_].astSrcNode[node_idx_].szName, u32Height,
|
||||
u32Width, u32Chn);
|
||||
for (n = 0; n < pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u32Num; n++) {
|
||||
for (i = 0; i < u32Chn; i++) {
|
||||
for (j = 0; j < u32Height; j++) {
|
||||
for (k = 0; k < u32Width; k++) {
|
||||
fprintf(fp, "%f ", static_cast<float>(*(ps32ResultAddr + k) / NNIE_QUANT_BASE));
|
||||
}
|
||||
ps32ResultAddr += u32Stride / sizeof(HI_U32);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
return HI_SUCCESS;
|
||||
}
|
||||
} // namespace nnie
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,50 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_PRINT_H_
|
||||
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_PRINT_H_
|
||||
#include "include/mpi_nnie.h"
|
||||
#include "include/hi_type.h"
|
||||
#include "src/nnie_common.h"
|
||||
#include "src/nnie_memory.h"
|
||||
|
||||
#define LOG_TAG1 "NNIE"
|
||||
#define LOGE(format, ...) \
|
||||
do { \
|
||||
if (1) { \
|
||||
fprintf(stderr, "\n[ERROR] " LOG_TAG1 " [" __FILE__ ":%d] %s] ", __LINE__, __FUNCTION__); \
|
||||
fprintf(stderr, format, ##__VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define LOGW(format, ...) \
|
||||
do { \
|
||||
if (1) { \
|
||||
fprintf(stderr, "\n[Warning] " LOG_TAG1 " [" __FILE__ ":%d] %s] ", __LINE__, __FUNCTION__); \
|
||||
fprintf(stderr, format, ##__VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
constexpr int kMaxSize = 1024;
|
||||
constexpr int kDecimal = 10;
|
||||
|
||||
namespace mindspore {
|
||||
namespace nnie {
|
||||
HI_S32 NniePrintReportResult(NnieParam *pst_nnie_param);
|
||||
|
||||
HI_S32 NniePrintReportResultInputSeg(NnieParam *pst_nnie_param, int segnum);
|
||||
} // namespace nnie
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_PRINT_H_
|
|
@ -0,0 +1,22 @@
|
|||
cmake_minimum_required(VERSION 3.14)
|
||||
project(NNIE_proposal)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime/include)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime/include/third_party)
|
||||
|
||||
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/src COMMON_SRC3)
|
||||
|
||||
add_library(mslite_proposal SHARED ${COMMON_SRC3})
|
||||
target_link_libraries(mslite_proposal ${LINK_LOCAT_LIB})
|
||||
|
||||
if(DEFINED HIMIX_STRIP)
|
||||
set(NDK_STRIP ${HIMIX_STRIP})
|
||||
else()
|
||||
set(NDK_STRIP "arm-himix200-linux-strip")
|
||||
endif()
|
||||
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
|
||||
add_custom_command(TARGET mslite_proposal POST_BUILD COMMAND ${NDK_STRIP}
|
||||
${CMAKE_CURRENT_BINARY_DIR}/libmslite_proposal.so)
|
||||
endif()
|
|
@ -0,0 +1,650 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/proposal.h"
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include "include/errorcode.h"
|
||||
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
constexpr int kNumInput2 = 2;
|
||||
constexpr int kNCHWDims = 4;
|
||||
|
||||
namespace mindspore {
|
||||
namespace proposal {
|
||||
uint32_t RpnTmpBufSize(uint32_t num_ratio_anchors, uint32_t num_scale_anchors, uint32_t input_height,
|
||||
uint32_t input_width) {
|
||||
uint32_t anchors_num = num_ratio_anchors * num_scale_anchors * input_height * input_width;
|
||||
uint32_t anchors_size = sizeof(uint32_t) * COORDI_NUM * anchors_num;
|
||||
uint32_t bbox_delta_size = anchors_size;
|
||||
uint32_t proposal_size = sizeof(uint32_t) * PROPOSAL_WIDTH * anchors_num;
|
||||
uint32_t ratio_anchors_size = sizeof(float) * num_ratio_anchors * COORDI_NUM;
|
||||
uint32_t scale_anchors_size = sizeof(float) * num_ratio_anchors * num_scale_anchors * COORDI_NUM;
|
||||
uint32_t score_size = sizeof(float) * anchors_num * 2;
|
||||
uint32_t stack_size = sizeof(Stack) * anchors_num;
|
||||
uint32_t total_size =
|
||||
anchors_size + bbox_delta_size + proposal_size + ratio_anchors_size + scale_anchors_size + score_size + stack_size;
|
||||
return total_size;
|
||||
}
|
||||
|
||||
static float exp_coef[10][16] = {
|
||||
{1.0f, 1.00024f, 1.00049f, 1.00073f, 1.00098f, 1.00122f, 1.00147f, 1.00171f, 1.00196f, 1.0022f, 1.00244f, 1.00269f,
|
||||
1.00293f, 1.00318f, 1.00342f, 1.00367f},
|
||||
{1.0f, 1.00391f, 1.00784f, 1.01179f, 1.01575f, 1.01972f, 1.02371f, 1.02772f, 1.03174f, 1.03578f, 1.03984f, 1.04391f,
|
||||
1.04799f, 1.05209f, 1.05621f, 1.06034f},
|
||||
{1.0f, 1.06449f, 1.13315f, 1.20623f, 1.28403f, 1.36684f, 1.45499f, 1.54883f, 1.64872f, 1.75505f, 1.86825f, 1.98874f,
|
||||
2.117f, 2.25353f, 2.39888f, 2.55359f},
|
||||
{1.0f, 2.71828f, 7.38906f, 20.0855f, 54.5981f, 148.413f, 403.429f, 1096.63f, 2980.96f, 8103.08f, 22026.5f, 59874.1f,
|
||||
162755.0f, 442413.0f, 1.2026e+006f, 3.26902e+006f},
|
||||
{1.0f, 8.88611e+006f, 7.8963e+013f, 7.01674e+020f, 6.23515e+027f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f,
|
||||
5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f,
|
||||
5.54062e+034f},
|
||||
{1.0f, 0.999756f, 0.999512f, 0.999268f, 0.999024f, 0.99878f, 0.998536f, 0.998292f, 0.998049f, 0.997805f, 0.997562f,
|
||||
0.997318f, 0.997075f, 0.996831f, 0.996588f, 0.996345f},
|
||||
{1.0f, 0.996101f, 0.992218f, 0.98835f, 0.984496f, 0.980658f, 0.976835f, 0.973027f, 0.969233f, 0.965455f, 0.961691f,
|
||||
0.957941f, 0.954207f, 0.950487f, 0.946781f, 0.94309f},
|
||||
{1.0f, 0.939413f, 0.882497f, 0.829029f, 0.778801f, 0.731616f, 0.687289f, 0.645649f, 0.606531f, 0.569783f, 0.535261f,
|
||||
0.502832f, 0.472367f, 0.443747f, 0.416862f, 0.391606f},
|
||||
{1.0f, 0.367879f, 0.135335f, 0.0497871f, 0.0183156f, 0.00673795f, 0.00247875f, 0.000911882f, 0.000335463f,
|
||||
0.00012341f, 4.53999e-005f, 1.67017e-005f, 6.14421e-006f, 2.26033e-006f, 8.31529e-007f, 3.05902e-007f},
|
||||
{1.0f, 1.12535e-007f, 1.26642e-014f, 1.42516e-021f, 1.60381e-028f, 1.80485e-035f, 2.03048e-042f, 0.0f, 0.0f, 0.0f,
|
||||
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}};
|
||||
static float QuickExp(int32_t value) {
|
||||
if (value & 0x80000000) {
|
||||
value = ~value + 0x00000001;
|
||||
return exp_coef[5][value & 0x0000000F] * exp_coef[6][(value >> 4) & 0x0000000F] *
|
||||
exp_coef[7][(value >> 8) & 0x0000000F] * exp_coef[8][(value >> 12) & 0x0000000F] *
|
||||
exp_coef[9][(value >> 16) & 0x0000000F];
|
||||
} else {
|
||||
return exp_coef[0][value & 0x0000000F] * exp_coef[1][(value >> 4) & 0x0000000F] *
|
||||
exp_coef[2][(value >> 8) & 0x0000000F] * exp_coef[3][(value >> 12) & 0x0000000F] *
|
||||
exp_coef[4][(value >> 16) & 0x0000000F];
|
||||
}
|
||||
}
|
||||
|
||||
static int32_t SoftMax(float *src, uint32_t num) {
|
||||
float max = 0;
|
||||
float sum = 0;
|
||||
uint32_t i = 0;
|
||||
|
||||
for (i = 0; i < num; ++i) {
|
||||
if (max < src[i]) {
|
||||
max = src[i];
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < num; ++i) {
|
||||
src[i] = QuickExp(static_cast<int32_t>((src[i] - max) * QUANT_BASE));
|
||||
sum += src[i];
|
||||
}
|
||||
|
||||
for (i = 0; i < num; ++i) {
|
||||
src[i] /= sum;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
static void Argswap(int32_t *src1, int32_t *src2) {
|
||||
for (uint32_t i = 0; i < PROPOSAL_WIDTH; i++) {
|
||||
int32_t tmp = src1[i];
|
||||
src1[i] = src2[i];
|
||||
src2[i] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
static int32_t NonRecursiveArgQuickSort(int32_t *array, int32_t low, int32_t high, Stack *stack, int32_t max_num) {
|
||||
int32_t top = 0;
|
||||
stack[top].min_ = low;
|
||||
stack[top].max_ = high;
|
||||
|
||||
while (top > -1) {
|
||||
low = stack[top].min_;
|
||||
high = stack[top].max_;
|
||||
int32_t i = low;
|
||||
int32_t j = high;
|
||||
|
||||
int32_t key_confidence = array[PROPOSAL_WIDTH * low + 4];
|
||||
top--;
|
||||
while (i < j) {
|
||||
while ((i < j) && (key_confidence > array[j * PROPOSAL_WIDTH + 4])) {
|
||||
j--;
|
||||
}
|
||||
if (i < j) {
|
||||
Argswap(&array[i * PROPOSAL_WIDTH], &array[j * PROPOSAL_WIDTH]);
|
||||
i++;
|
||||
}
|
||||
|
||||
while ((i < j) && (key_confidence < array[i * PROPOSAL_WIDTH + 4])) {
|
||||
i++;
|
||||
}
|
||||
if (i < j) {
|
||||
Argswap(&array[i * PROPOSAL_WIDTH], &array[j * PROPOSAL_WIDTH]);
|
||||
j--;
|
||||
}
|
||||
}
|
||||
|
||||
if (low <= max_num) {
|
||||
if (low < i - 1) {
|
||||
top++;
|
||||
stack[top].min_ = low;
|
||||
stack[top].max_ = i - 1;
|
||||
}
|
||||
|
||||
if (high > i + 1) {
|
||||
top++;
|
||||
stack[top].min_ = i + 1;
|
||||
stack[top].max_ = high;
|
||||
}
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
static int32_t FilterLowScoreBbox(int32_t *proposals, uint32_t anchors_num, uint32_t filter_thresh,
|
||||
uint32_t *num_after_filter) {
|
||||
uint32_t proposal_cnt = anchors_num;
|
||||
|
||||
if (filter_thresh > 0) {
|
||||
uint32_t i;
|
||||
for (i = 0; i < anchors_num; i++) {
|
||||
if (proposals[PROPOSAL_WIDTH * i + 4] < static_cast<int32_t>(filter_thresh)) {
|
||||
proposals[PROPOSAL_WIDTH * i + 5] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
proposal_cnt = 0;
|
||||
for (i = 0; i < anchors_num; i++) {
|
||||
if (proposals[PROPOSAL_WIDTH * i + 5] == 0) {
|
||||
proposals[PROPOSAL_WIDTH * proposal_cnt] = proposals[PROPOSAL_WIDTH * i];
|
||||
proposals[PROPOSAL_WIDTH * proposal_cnt + 1] = proposals[PROPOSAL_WIDTH * i + 1];
|
||||
proposals[PROPOSAL_WIDTH * proposal_cnt + 2] = proposals[PROPOSAL_WIDTH * i + 2];
|
||||
proposals[PROPOSAL_WIDTH * proposal_cnt + 3] = proposals[PROPOSAL_WIDTH * i + 3];
|
||||
proposals[PROPOSAL_WIDTH * proposal_cnt + 4] = proposals[PROPOSAL_WIDTH * i + 4];
|
||||
proposals[PROPOSAL_WIDTH * proposal_cnt + 5] = proposals[PROPOSAL_WIDTH * i + 5];
|
||||
proposal_cnt++;
|
||||
}
|
||||
}
|
||||
}
|
||||
*num_after_filter = proposal_cnt;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
static int32_t SVP_NNIE_Overlap(int32_t x_min1, int32_t y_min1, int32_t x_max1, int32_t y_max1, int32_t x_min2,
|
||||
int32_t y_min2, int32_t x_max2, int32_t y_max2, int32_t *area_sum,
|
||||
int32_t *area_inter) {
|
||||
/*** Check the input, and change the Return value ***/
|
||||
int32_t inter = 0;
|
||||
int32_t total = 0;
|
||||
int32_t x_min = 0;
|
||||
int32_t y_min = 0;
|
||||
int32_t x_max = 0;
|
||||
int32_t y_max = 0;
|
||||
int32_t area1 = 0;
|
||||
int32_t area2 = 0;
|
||||
int32_t inter_width = 0;
|
||||
int32_t inter_height = 0;
|
||||
|
||||
x_min = MAX(x_min1, x_min2);
|
||||
y_min = MAX(y_min1, y_min2);
|
||||
x_max = MIN(x_max1, x_max2);
|
||||
y_max = MIN(y_max1, y_max2);
|
||||
|
||||
inter_width = x_max - x_min + 1;
|
||||
inter_height = y_max - y_min + 1;
|
||||
|
||||
inter_width = (inter_width >= 0) ? inter_width : 0;
|
||||
inter_height = (inter_height >= 0) ? inter_height : 0;
|
||||
|
||||
inter = inter_width * inter_height;
|
||||
area1 = (x_max1 - x_min1 + 1) * (y_max1 - y_min1 + 1);
|
||||
area2 = (x_max2 - x_min2 + 1) * (y_max2 - y_min2 + 1);
|
||||
|
||||
total = area1 + area2 - inter;
|
||||
|
||||
*area_sum = total;
|
||||
*area_inter = inter;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
static int32_t SVP_NNIE_NonMaxSuppression(int32_t *proposals, uint32_t anchors_num, uint32_t nms_thresh,
|
||||
uint32_t max_roi_num) {
|
||||
/****** define variables *******/
|
||||
int32_t x_min1;
|
||||
int32_t y_min1;
|
||||
int32_t x_max1;
|
||||
int32_t y_max1;
|
||||
int32_t x_min2;
|
||||
int32_t y_min2;
|
||||
int32_t x_max2;
|
||||
int32_t y_max2;
|
||||
int32_t s32AreaTotal = 0;
|
||||
int32_t area_inter = 0;
|
||||
uint32_t i;
|
||||
uint32_t j;
|
||||
uint32_t num = 0;
|
||||
bool bNoOverlap;
|
||||
for (i = 0; i < anchors_num && num < max_roi_num; i++) {
|
||||
if (proposals[PROPOSAL_WIDTH * i + 5] == 0) {
|
||||
num++;
|
||||
x_min1 = proposals[PROPOSAL_WIDTH * i];
|
||||
y_min1 = proposals[PROPOSAL_WIDTH * i + 1];
|
||||
x_max1 = proposals[PROPOSAL_WIDTH * i + 2];
|
||||
y_max1 = proposals[PROPOSAL_WIDTH * i + 3];
|
||||
for (j = i + 1; j < anchors_num; j++) {
|
||||
if (proposals[PROPOSAL_WIDTH * j + 5] == 0) {
|
||||
x_min2 = proposals[PROPOSAL_WIDTH * j];
|
||||
y_min2 = proposals[PROPOSAL_WIDTH * j + 1];
|
||||
x_max2 = proposals[PROPOSAL_WIDTH * j + 2];
|
||||
y_max2 = proposals[PROPOSAL_WIDTH * j + 3];
|
||||
bNoOverlap = (x_min2 > x_max1) || (x_max2 < x_min1) || (y_min2 > y_max1) || (y_max2 < y_min1);
|
||||
if (bNoOverlap) {
|
||||
continue;
|
||||
}
|
||||
(void)SVP_NNIE_Overlap(x_min1, y_min1, x_max1, y_max1, x_min2, y_min2, x_max2, y_max2, &s32AreaTotal,
|
||||
&area_inter);
|
||||
if (area_inter * QUANT_BASE > static_cast<int32_t>(nms_thresh * s32AreaTotal)) {
|
||||
if (proposals[PROPOSAL_WIDTH * i + 4] >= proposals[PROPOSAL_WIDTH * j + 4]) {
|
||||
proposals[PROPOSAL_WIDTH * j + 5] = 1;
|
||||
} else {
|
||||
proposals[PROPOSAL_WIDTH * i + 5] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
static void Rpn(float **inputs, uint32_t num_ratio_anchors, uint32_t num_scale_anchors, uint32_t *scales,
|
||||
uint32_t *ratios, uint32_t ori_image_height, uint32_t ori_image_width, uint32_t *inputs_height,
|
||||
uint32_t *inputs_width, uint32_t *inputs_channel, uint32_t inputs_stride, uint32_t max_rois,
|
||||
uint32_t min_size, uint32_t spatial_scale, uint32_t nms_thresh, uint32_t filter_thresh,
|
||||
uint32_t num_before_nms, char *pu32MemPool, float *proposal_result, uint32_t dst_stride,
|
||||
uint32_t *num_rois) {
|
||||
#if 1
|
||||
/******************** define parameters ****************/
|
||||
uint32_t size;
|
||||
int32_t *anchors = nullptr;
|
||||
int32_t *bbox_delta = nullptr;
|
||||
int32_t *proposals = nullptr;
|
||||
int32_t *ptr1 = nullptr;
|
||||
int32_t *ptr2 = nullptr;
|
||||
int32_t *ptr3 = nullptr;
|
||||
uint32_t num_after_filter = 0;
|
||||
uint32_t num_anchors;
|
||||
float base_w;
|
||||
float base_h;
|
||||
float base_x_ctr;
|
||||
float base_y_ctr;
|
||||
float *ratio_anchors = nullptr;
|
||||
float *f32_ptr = nullptr;
|
||||
float *f32_ptr2 = nullptr;
|
||||
float *scale_anchors = nullptr;
|
||||
float *scores = nullptr;
|
||||
float f32_size;
|
||||
uint32_t pixel_interval;
|
||||
uint32_t src_bbox_index;
|
||||
uint32_t src_fg_prob_index;
|
||||
uint32_t src_bg_prob_index;
|
||||
uint32_t src_bbox_bias;
|
||||
uint32_t src_prob_bias;
|
||||
uint32_t des_box;
|
||||
uint32_t bg_blob_size;
|
||||
uint32_t anchors_per_pixel;
|
||||
uint32_t map_size;
|
||||
uint32_t line_size;
|
||||
int32_t proposal_width;
|
||||
int32_t proposal_height;
|
||||
uint32_t roi_count;
|
||||
Stack *stack = nullptr;
|
||||
uint32_t c;
|
||||
uint32_t h;
|
||||
uint32_t w;
|
||||
uint32_t i;
|
||||
uint32_t j;
|
||||
uint32_t p;
|
||||
uint32_t q;
|
||||
uint32_t z;
|
||||
uint32_t base_anchor[4] = {0, 0, (min_size - 1), (min_size - 1)};
|
||||
|
||||
/*********************************** Faster RCNN *********************************************/
|
||||
/********* calculate the start pointer of each part in MemPool *********/
|
||||
anchors = reinterpret_cast<int32_t *>(pu32MemPool);
|
||||
num_anchors = num_ratio_anchors * num_scale_anchors * (inputs_height[0] * inputs_width[0]);
|
||||
size = COORDI_NUM * num_anchors;
|
||||
pu32MemPool += size * sizeof(int32_t);
|
||||
|
||||
bbox_delta = reinterpret_cast<int32_t *>(pu32MemPool);
|
||||
pu32MemPool += size * sizeof(int32_t);
|
||||
|
||||
proposals = reinterpret_cast<int32_t *>(pu32MemPool);
|
||||
size = PROPOSAL_WIDTH * num_anchors;
|
||||
pu32MemPool += size * sizeof(int32_t);
|
||||
|
||||
ratio_anchors = reinterpret_cast<float *>(static_cast<void *>(pu32MemPool));
|
||||
f32_ptr = reinterpret_cast<float *>(static_cast<void *>(pu32MemPool));
|
||||
size = num_ratio_anchors * COORDI_NUM;
|
||||
f32_ptr = f32_ptr + size;
|
||||
|
||||
scale_anchors = f32_ptr;
|
||||
size = num_scale_anchors * num_ratio_anchors * COORDI_NUM;
|
||||
f32_ptr = f32_ptr + size;
|
||||
|
||||
scores = f32_ptr;
|
||||
size = num_anchors * SCORE_NUM;
|
||||
f32_ptr = f32_ptr + size;
|
||||
|
||||
stack = reinterpret_cast<Stack *>(f32_ptr);
|
||||
|
||||
/********************* Generate the base anchor ***********************/
|
||||
base_w = static_cast<float>(base_anchor[2] - base_anchor[0] + 1);
|
||||
base_h = static_cast<float>(base_anchor[3] - base_anchor[1] + 1);
|
||||
base_x_ctr = static_cast<float>(base_anchor[0] + ((base_w - 1) * 0.5));
|
||||
base_y_ctr = static_cast<float>(base_anchor[1] + ((base_h - 1) * 0.5));
|
||||
|
||||
/*************** Generate Ratio Anchors for the base anchor ***********/
|
||||
f32_ptr = ratio_anchors;
|
||||
f32_size = base_w * base_h;
|
||||
for (i = 0; i < num_ratio_anchors; i++) {
|
||||
float f32_ratios = static_cast<float>(ratios[i]) / QUANT_BASE;
|
||||
base_w = sqrt(f32_size / f32_ratios);
|
||||
base_w = static_cast<float>(
|
||||
1.0 * ((base_w) >= 0 ? static_cast<int32_t>(base_w + HALF_VAL) : static_cast<int32_t>(base_w - HALF_VAL)));
|
||||
base_h = base_w * f32_ratios;
|
||||
base_h = static_cast<float>(
|
||||
1.0 * ((base_h) >= 0 ? static_cast<int32_t>(base_h + HALF_VAL) : static_cast<int32_t>(base_h - HALF_VAL)));
|
||||
|
||||
*f32_ptr++ = static_cast<float>(base_x_ctr - ((base_w - 1) * HALF_VAL));
|
||||
*(f32_ptr++) = static_cast<float>(base_y_ctr - ((base_h - 1) * HALF_VAL));
|
||||
*(f32_ptr++) = static_cast<float>(base_x_ctr + ((base_w - 1) * HALF_VAL));
|
||||
*(f32_ptr++) = static_cast<float>(base_y_ctr + ((base_h - 1) * HALF_VAL));
|
||||
}
|
||||
|
||||
/********* Generate Scale Anchors for each Ratio Anchor **********/
|
||||
f32_ptr = ratio_anchors;
|
||||
f32_ptr2 = scale_anchors;
|
||||
/* Generate Scale Anchors for one pixel */
|
||||
for (i = 0; i < num_ratio_anchors; i++) {
|
||||
for (j = 0; j < num_scale_anchors; j++) {
|
||||
base_w = *(f32_ptr + 2) - *(f32_ptr) + 1;
|
||||
base_h = *(f32_ptr + 3) - *(f32_ptr + 1) + 1;
|
||||
base_x_ctr = static_cast<float>(*(f32_ptr) + ((base_w - 1) * HALF_VAL));
|
||||
base_y_ctr = static_cast<float>(*(f32_ptr + 1) + ((base_h - 1) * HALF_VAL));
|
||||
|
||||
*(f32_ptr2++) =
|
||||
static_cast<float>(base_x_ctr - ((base_w * (static_cast<float>(scales[j]) / QUANT_BASE) - 1) * HALF_VAL));
|
||||
*(f32_ptr2++) =
|
||||
static_cast<float>(base_y_ctr - ((base_h * (static_cast<float>(scales[j]) / QUANT_BASE) - 1) * HALF_VAL));
|
||||
*(f32_ptr2++) =
|
||||
static_cast<float>(base_x_ctr + ((base_w * (static_cast<float>(scales[j]) / QUANT_BASE) - 1) * HALF_VAL));
|
||||
*(f32_ptr2++) =
|
||||
static_cast<float>(base_y_ctr + ((base_h * (static_cast<float>(scales[j]) / QUANT_BASE) - 1) * HALF_VAL));
|
||||
}
|
||||
f32_ptr += COORDI_NUM;
|
||||
}
|
||||
|
||||
/******************* Copy the anchors to every pixel in the feature map ******************/
|
||||
ptr1 = anchors;
|
||||
pixel_interval = QUANT_BASE / spatial_scale;
|
||||
|
||||
for (p = 0; p < inputs_height[0]; p++) {
|
||||
for (q = 0; q < inputs_width[0]; q++) {
|
||||
f32_ptr2 = scale_anchors;
|
||||
for (z = 0; z < num_scale_anchors * num_ratio_anchors; z++) {
|
||||
*(ptr1++) = static_cast<int32_t>(q * pixel_interval + *(f32_ptr2++));
|
||||
*(ptr1++) = static_cast<int32_t>(p * pixel_interval + *(f32_ptr2++));
|
||||
*(ptr1++) = static_cast<int32_t>(q * pixel_interval + *(f32_ptr2++));
|
||||
*(ptr1++) = static_cast<int32_t>(p * pixel_interval + *(f32_ptr2++));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/********** do transpose, convert the blob from (M,C,H,W) to (M,H,W,C) **********/
|
||||
map_size = inputs_height[1] * inputs_stride / sizeof(uint32_t);
|
||||
anchors_per_pixel = num_ratio_anchors * num_scale_anchors;
|
||||
bg_blob_size = anchors_per_pixel * map_size;
|
||||
line_size = inputs_stride / sizeof(uint32_t);
|
||||
src_prob_bias = 0;
|
||||
src_bbox_bias = 0;
|
||||
|
||||
for (c = 0; c < inputs_channel[1]; c++) {
|
||||
for (h = 0; h < inputs_height[1]; h++) {
|
||||
for (w = 0; w < inputs_width[1]; w++) {
|
||||
src_bbox_index = src_bbox_bias + c * map_size + h * line_size + w;
|
||||
src_bg_prob_index = src_prob_bias + (c / COORDI_NUM) * map_size + h * line_size + w;
|
||||
src_fg_prob_index = bg_blob_size + src_bg_prob_index;
|
||||
|
||||
des_box = (anchors_per_pixel) * (h * inputs_width[1] + w) + c / COORDI_NUM;
|
||||
|
||||
uint32_t des_bbox_delta_index = COORDI_NUM * des_box + c % COORDI_NUM;
|
||||
bbox_delta[des_bbox_delta_index] = static_cast<int32_t>(inputs[1][src_bbox_index] * QUANT_BASE);
|
||||
|
||||
uint32_t des_score_index = (SCORE_NUM)*des_box;
|
||||
scores[des_score_index] = inputs[0][src_bg_prob_index];
|
||||
scores[des_score_index + 1] = inputs[0][src_fg_prob_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/************************* do softmax ****************************/
|
||||
f32_ptr = scores;
|
||||
for (i = 0; i < num_anchors; i++) {
|
||||
SoftMax(f32_ptr, SCORE_NUM);
|
||||
f32_ptr += SCORE_NUM;
|
||||
}
|
||||
|
||||
/************************* BBox Transform *****************************/
|
||||
for (i = 0; i < num_anchors; i++) {
|
||||
ptr1 = anchors;
|
||||
ptr1 = ptr1 + COORDI_NUM * i;
|
||||
ptr2 = proposals;
|
||||
ptr2 = ptr2 + PROPOSAL_WIDTH * i;
|
||||
ptr3 = bbox_delta;
|
||||
ptr3 = ptr3 + COORDI_NUM * i;
|
||||
f32_ptr = scores;
|
||||
f32_ptr = f32_ptr + i * (SCORE_NUM);
|
||||
|
||||
proposal_width = *(ptr1 + 2) - *(ptr1) + 1;
|
||||
proposal_height = *(ptr1 + 3) - *(ptr1 + 1) + 1;
|
||||
int32_t proposal_center_x = *(ptr1) + static_cast<int32_t>(proposal_width * HALF_VAL);
|
||||
int32_t proposal_center_y = *(ptr1 + 1) + static_cast<int32_t>(proposal_height * HALF_VAL);
|
||||
int32_t pred_center_x =
|
||||
static_cast<int32_t>((static_cast<float>(*(ptr3)) / QUANT_BASE) * proposal_width + proposal_center_x);
|
||||
int32_t pred_center_y =
|
||||
static_cast<int32_t>((static_cast<float>(*(ptr3 + 1)) / QUANT_BASE) * proposal_height + proposal_center_y);
|
||||
|
||||
int32_t pred_w = static_cast<int32_t>(proposal_width * QuickExp(static_cast<int32_t>(*(ptr3 + 2))));
|
||||
int32_t pred_h = static_cast<int32_t>(proposal_height * QuickExp(static_cast<int32_t>(*(ptr3 + 3))));
|
||||
*(ptr2) = static_cast<int32_t>(pred_center_x - HALF_VAL * pred_w);
|
||||
*(ptr2 + 1) = static_cast<int32_t>(pred_center_y - HALF_VAL * pred_h);
|
||||
*(ptr2 + 2) = static_cast<int32_t>(pred_center_x + HALF_VAL * pred_w);
|
||||
*(ptr2 + 3) = static_cast<int32_t>(pred_center_y + HALF_VAL * pred_h);
|
||||
*(ptr2 + 4) = static_cast<int32_t>(*(f32_ptr + 1) * QUANT_BASE);
|
||||
*(ptr2 + 5) = 0;
|
||||
}
|
||||
|
||||
/************************ clip bbox *****************************/
|
||||
for (i = 0; i < num_anchors; i++) {
|
||||
ptr1 = proposals;
|
||||
ptr1 = ptr1 + PROPOSAL_WIDTH * i;
|
||||
*ptr1 = MAX(MIN(*ptr1, static_cast<int32_t>(ori_image_width) - 1), 0);
|
||||
*(ptr1 + 1) = MAX(MIN(*(ptr1 + 1), static_cast<int32_t>(ori_image_height) - 1), 0);
|
||||
*(ptr1 + 2) = MAX(MIN(*(ptr1 + 2), static_cast<int32_t>(ori_image_width) - 1), 0);
|
||||
*(ptr1 + 3) = MAX(MIN(*(ptr1 + 3), static_cast<int32_t>(ori_image_height) - 1), 0);
|
||||
}
|
||||
|
||||
/************ remove the bboxes which are too small *************/
|
||||
for (i = 0; i < num_anchors; i++) {
|
||||
ptr1 = proposals;
|
||||
ptr1 = ptr1 + PROPOSAL_WIDTH * i;
|
||||
proposal_width = *(ptr1 + 2) - *(ptr1) + 1;
|
||||
proposal_height = *(ptr1 + 3) - *(ptr1 + 1) + 1;
|
||||
if (proposal_width < static_cast<int32_t>(min_size) || proposal_height < static_cast<int32_t>(min_size)) {
|
||||
*(ptr1 + 5) = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/********** remove low score bboxes ************/
|
||||
(void)FilterLowScoreBbox(proposals, num_anchors, filter_thresh, &num_after_filter);
|
||||
|
||||
/********** sort ***********/
|
||||
(void)NonRecursiveArgQuickSort(proposals, 0, num_after_filter - 1, stack, static_cast<int32_t>(num_before_nms));
|
||||
num_after_filter = (num_after_filter < num_before_nms) ? num_after_filter : num_before_nms;
|
||||
|
||||
/* do nms to remove highly overlapped bbox */
|
||||
(void)SVP_NNIE_NonMaxSuppression(proposals, num_after_filter, nms_thresh, max_rois); /* function NMS */
|
||||
|
||||
/************** write the final result to output ***************/
|
||||
roi_count = 0;
|
||||
for (i = 0; i < num_after_filter; i++) {
|
||||
ptr1 = proposals;
|
||||
ptr1 = ptr1 + PROPOSAL_WIDTH * i;
|
||||
if (*(ptr1 + 5) == 0) {
|
||||
proposal_result[dst_stride / sizeof(uint32_t) * roi_count] = *ptr1;
|
||||
proposal_result[dst_stride / sizeof(uint32_t) * roi_count + 1] = *(ptr1 + 1);
|
||||
proposal_result[dst_stride / sizeof(uint32_t) * roi_count + 2] = *(ptr1 + 2);
|
||||
proposal_result[dst_stride / sizeof(uint32_t) * roi_count + 3] = *(ptr1 + 3);
|
||||
roi_count++;
|
||||
}
|
||||
if (roi_count >= max_rois) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*num_rois = roi_count;
|
||||
#endif
|
||||
}
|
||||
|
||||
int32_t ProposalInit(ProposalParam *param, const std::vector<mindspore::MSTensor> &inputs, uint32_t max_roi_num,
|
||||
uint32_t ori_image_height, uint32_t ori_image_width) {
|
||||
uint32_t tmp_buf_size = 0;
|
||||
uint32_t bbox_buf_size = 0;
|
||||
uint32_t total_size = 0;
|
||||
param->max_roi_num_ = max_roi_num;
|
||||
|
||||
param->num_ratio_anchors_ = 1;
|
||||
param->num_scale_anchors_ = NUM_SCALE_ANCHORS;
|
||||
param->scales_[0] = 1.5 * QUANT_BASE;
|
||||
param->scales_[1] = 2.1 * QUANT_BASE;
|
||||
param->scales_[2] = 2.9 * QUANT_BASE;
|
||||
param->scales_[3] = 4.1 * QUANT_BASE;
|
||||
param->scales_[4] = 5.8 * QUANT_BASE;
|
||||
param->scales_[5] = 8.0 * QUANT_BASE;
|
||||
param->scales_[6] = 11.3 * QUANT_BASE;
|
||||
param->scales_[7] = 15.8 * QUANT_BASE;
|
||||
param->scales_[8] = 22.1 * QUANT_BASE;
|
||||
param->ratios_[0] = 2.44 * QUANT_BASE;
|
||||
|
||||
param->ori_image_height_ = ori_image_height;
|
||||
param->ori_image_width_ = ori_image_width;
|
||||
param->min_size_ = MIN_SIZE;
|
||||
param->spatial_scale_ = (uint32_t)(0.0625 * QUANT_BASE);
|
||||
param->nms_thresh_ = (uint32_t)(0.7 * QUANT_BASE);
|
||||
param->filter_thresh_ = 0;
|
||||
param->num_before_nms_ = NUM_NMS;
|
||||
|
||||
param->rpn_bounding_box_.chn_ = 1;
|
||||
param->rpn_bounding_box_.height_ = max_roi_num;
|
||||
param->rpn_bounding_box_.width_ = COORDI_NUM;
|
||||
param->rpn_bounding_box_.stride_ = COORDI_NUM * sizeof(float);
|
||||
param->rpn_bounding_box_.num_ = 1;
|
||||
if (inputs.size() < kNumInput2) {
|
||||
LOGE("inputs tensor size error.");
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
for (int i = 0; i < kNumInput2; i++) {
|
||||
auto input_data_type = inputs[i].DataType();
|
||||
if (input_data_type == DataType::kNumberTypeFloat32) {
|
||||
auto ptr_shape = inputs[i].Shape();
|
||||
if ((ptr_shape.size() == kNCHWDims)) {
|
||||
param->inputs_height_[i] = ptr_shape[2];
|
||||
param->inputs_width_[i] = ptr_shape[3];
|
||||
param->inputs_channel_[i] = ptr_shape[1];
|
||||
if (i == 0) {
|
||||
param->inputs_stride_ = ptr_shape[3] * sizeof(float);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tmp_buf_size = RpnTmpBufSize(param->num_ratio_anchors_, param->num_scale_anchors_, param->inputs_height_[0],
|
||||
param->inputs_width_[0]);
|
||||
|
||||
bbox_buf_size = param->rpn_bounding_box_.num_ * param->rpn_bounding_box_.height_ * param->rpn_bounding_box_.stride_;
|
||||
total_size = tmp_buf_size + bbox_buf_size;
|
||||
|
||||
if (param->rpn_tmp_buf_ != nullptr) {
|
||||
free(param->rpn_tmp_buf_);
|
||||
param->rpn_tmp_buf_ = nullptr;
|
||||
}
|
||||
param->rpn_tmp_buf_ = malloc(total_size);
|
||||
if (param->rpn_tmp_buf_ == nullptr) {
|
||||
LOGE("malloc buf fail.");
|
||||
return RET_ERROR;
|
||||
}
|
||||
param->rpn_bounding_box_.data_ = reinterpret_cast<char *>(param->rpn_tmp_buf_) + tmp_buf_size;
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int32_t ProposalRun(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
|
||||
ProposalParam *param) {
|
||||
if (inputs->size() < kNumInput2) {
|
||||
LOGE("inputs tensor size error.");
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (outputs->size() != 1) {
|
||||
LOGE("outputs tensor size error.");
|
||||
return RET_ERROR;
|
||||
}
|
||||
for (int i = 0; i < kNumInput2; i++) {
|
||||
auto input_data_type = inputs->at(i).DataType();
|
||||
if (input_data_type == DataType::kNumberTypeFloat32) {
|
||||
param->inputs_[i] = reinterpret_cast<float *>((*inputs)[i].MutableData());
|
||||
}
|
||||
}
|
||||
auto output_data_type = (*outputs)[0].DataType();
|
||||
if (output_data_type != DataType::kNumberTypeFloat32) {
|
||||
LOGE("outputs tensor data type error.");
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
Rpn(param->inputs_, param->num_ratio_anchors_, param->num_scale_anchors_, param->scales_, param->ratios_,
|
||||
param->ori_image_height_, param->ori_image_width_, param->inputs_height_, param->inputs_width_,
|
||||
param->inputs_channel_, param->inputs_stride_, param->max_roi_num_, param->min_size_, param->spatial_scale_,
|
||||
param->nms_thresh_, param->filter_thresh_, param->num_before_nms_, reinterpret_cast<char *>(param->rpn_tmp_buf_),
|
||||
reinterpret_cast<float *>(param->rpn_bounding_box_.data_), param->rpn_bounding_box_.stride_,
|
||||
¶m->rpn_bounding_box_.height_);
|
||||
|
||||
std::vector<int64_t> shape{static_cast<int64_t>(param->rpn_bounding_box_.height_), COORDI_NUM};
|
||||
(*outputs)[0].SetShape(shape);
|
||||
auto output_data = (*outputs)[0].MutableData();
|
||||
memcpy(output_data, param->rpn_bounding_box_.data_, param->rpn_bounding_box_.height_ * COORDI_NUM * sizeof(float));
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
void ProposalDeInit(ProposalParam *param) {
|
||||
if (param->rpn_tmp_buf_ != 0) {
|
||||
free(param->rpn_tmp_buf_);
|
||||
param->rpn_tmp_buf_ = 0;
|
||||
}
|
||||
}
|
||||
} // namespace proposal
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,95 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_H_
|
||||
#define MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_H_
|
||||
#include <vector>
|
||||
#include "include/api/types.h"
|
||||
|
||||
#define LOG_TAG1 "Proposal"
|
||||
#define LOGE(format, ...) \
|
||||
do { \
|
||||
if (1) { \
|
||||
fprintf(stderr, "\n[ERROR] " LOG_TAG1 " [" __FILE__ ":%d] %s] ", __LINE__, __FUNCTION__); \
|
||||
fprintf(stderr, format, ##__VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define LOGW(format, ...) \
|
||||
do { \
|
||||
if (1) { \
|
||||
fprintf(stderr, "\n[Warning] " LOG_TAG1 " [" __FILE__ ":%d] %s] ", __LINE__, __FUNCTION__); \
|
||||
fprintf(stderr, format, ##__VA_ARGS__); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
namespace mindspore {
|
||||
namespace proposal {
|
||||
typedef struct {
|
||||
uint32_t stride_;
|
||||
void *data_;
|
||||
uint32_t num_;
|
||||
uint32_t width_;
|
||||
uint32_t height_;
|
||||
uint32_t chn_;
|
||||
} RpnBoundingBox;
|
||||
|
||||
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
|
||||
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
|
||||
#define HALF_VAL 0.5f // the half value
|
||||
#define COORDI_NUM 4 // coordinate numbers
|
||||
#define PROPOSAL_WIDTH 6 // the number of proposal values
|
||||
#define QUANT_BASE 4096 // the base value
|
||||
#define SCORE_NUM 2 // the num of RPN scores
|
||||
#define NUM_SCALE_ANCHORS 9
|
||||
#define NUM_NMS 6000
|
||||
#define MIN_SIZE 16
|
||||
|
||||
typedef struct {
|
||||
uint32_t scales_[9];
|
||||
uint32_t ratios_[9];
|
||||
uint32_t inputs_height_[2];
|
||||
uint32_t inputs_width_[2];
|
||||
uint32_t inputs_channel_[2];
|
||||
uint32_t inputs_stride_;
|
||||
uint32_t num_ratio_anchors_;
|
||||
uint32_t num_scale_anchors_;
|
||||
uint32_t ori_image_height_;
|
||||
uint32_t ori_image_width_;
|
||||
uint32_t min_size_;
|
||||
uint32_t spatial_scale_;
|
||||
uint32_t nms_thresh_;
|
||||
uint32_t filter_thresh_;
|
||||
uint32_t max_roi_num_;
|
||||
uint32_t num_before_nms_;
|
||||
float *inputs_[2];
|
||||
void *rpn_tmp_buf_;
|
||||
RpnBoundingBox rpn_bounding_box_;
|
||||
} ProposalParam;
|
||||
|
||||
typedef struct {
|
||||
int32_t min_;
|
||||
int32_t max_;
|
||||
} Stack;
|
||||
|
||||
int32_t ProposalInit(ProposalParam *param, const std::vector<mindspore::MSTensor> &inputs, uint32_t max_roi_num,
|
||||
uint32_t ori_image_height, uint32_t ori_image_width);
|
||||
int32_t ProposalRun(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
|
||||
ProposalParam *param);
|
||||
void ProposalDeInit(ProposalParam *param);
|
||||
} // namespace proposal
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_H_
|
|
@ -0,0 +1,200 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/proposal_fp32.h"
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "schema/model_generated.h"
|
||||
#include "include/registry/register_kernel.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_Custom;
|
||||
constexpr int kMaxSize = 1024;
|
||||
constexpr int kNumInput2 = 2;
|
||||
constexpr int kDecimal = 10;
|
||||
|
||||
namespace mindspore {
|
||||
namespace proposal {
|
||||
int ProposalCPUKernel::Prepare() {
|
||||
if (inputs_.size() < kNumInput2) {
|
||||
LOGE("inputs tensor num error.");
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (outputs_.size() != 1) {
|
||||
LOGE("outputs tensor num error.");
|
||||
return RET_ERROR;
|
||||
}
|
||||
std::vector<std::string> inputs_name = {"rpn_cls_score", "rpn_bbox_pred"};
|
||||
std::vector<mindspore::MSTensor> inputs;
|
||||
for (size_t i = 0; i < inputs_name.size(); i++) {
|
||||
bool find_flag = false;
|
||||
for (auto &input : inputs_) {
|
||||
if (input.Name() == inputs_name[i]) {
|
||||
inputs.push_back(input);
|
||||
find_flag = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!find_flag) {
|
||||
for (auto &input : inputs_) {
|
||||
if (std::find(inputs.begin(), inputs.end(), input) != inputs.end()) {
|
||||
continue;
|
||||
}
|
||||
inputs.push_back(input);
|
||||
LOGW("input tensor name diff '%s' vs '%s'.", inputs_name[i].c_str(), input.Name().c_str());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (inputs.size() != inputs_name.size()) {
|
||||
LOGE("inputs size error.");
|
||||
return RET_ERROR;
|
||||
}
|
||||
this->set_inputs(inputs);
|
||||
if (inputs[0].Shape()[0] != 1) {
|
||||
LOGE("proposal only support input num == 1.");
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
outputs_[0].SetTensorName("proposal");
|
||||
|
||||
int max_roi_num_int = 300;
|
||||
auto *max_roi_num = std::getenv("MAX_ROI_NUM");
|
||||
if (max_roi_num != nullptr) {
|
||||
auto iter =
|
||||
std::find_if(max_roi_num, max_roi_num + strlen(max_roi_num), [](char val) { return val < '0' || val > '9'; });
|
||||
if (iter != max_roi_num) {
|
||||
*iter = '\0';
|
||||
max_roi_num_int = atoi(max_roi_num);
|
||||
} else {
|
||||
LOGW("MAX_ROI_NUM ENV is invalid, now set to default value %d", max_roi_num_int);
|
||||
}
|
||||
} else {
|
||||
LOGW("MAX_ROI_NUM ENV is not set, now set to default value %d", max_roi_num_int);
|
||||
}
|
||||
|
||||
return ProposalInit(&proposal_param_, inputs_, max_roi_num_int, image_height_, image_weight_);
|
||||
}
|
||||
|
||||
int ProposalCPUKernel::ReSize() {
|
||||
if (inputs_[0].Shape()[0] != 1) {
|
||||
LOGE("proposal only support input num == 1.");
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ProposalCPUKernel::Execute() { return ProposalRun(&inputs_, &outputs_, &proposal_param_); }
|
||||
|
||||
ProposalCPUKernel::~ProposalCPUKernel() { ProposalDeInit(&proposal_param_); }
|
||||
|
||||
bool GetCustomAttr(char *buf, int buf_size, const mindspore::schema::Custom *op, const std::string &attr) {
|
||||
int attr_size;
|
||||
for (size_t i = 0; i < op->attr()->size(); i++) {
|
||||
if (op->attr()->Get(i)->name()->str() == attr) {
|
||||
auto output_info = op->attr()->Get(i)->data();
|
||||
attr_size = static_cast<int>(output_info->size());
|
||||
if (attr_size >= buf_size) {
|
||||
LOGE("attr size too big");
|
||||
return false;
|
||||
}
|
||||
for (int j = 0; j < attr_size; j++) {
|
||||
buf[j] = static_cast<char>(output_info->Get(j));
|
||||
}
|
||||
buf[attr_size] = 0;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
std::shared_ptr<mindspore::kernel::Kernel> ProposalCreateKernel(const std::vector<mindspore::MSTensor> &inputs,
|
||||
const std::vector<mindspore::MSTensor> &outputs,
|
||||
const mindspore::schema::Primitive *primitive,
|
||||
const mindspore::Context *ctx) {
|
||||
if (primitive->value_type() != mindspore::schema::PrimitiveType_Custom) {
|
||||
LOGE("Primitive type is not PrimitiveType_Custom");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto op = primitive->value_as_Custom();
|
||||
if (op->attr()->size() < 1) {
|
||||
LOGE("There are at least 1 attribute of Custom");
|
||||
return nullptr;
|
||||
}
|
||||
int64_t ndims;
|
||||
int64_t image_height;
|
||||
int64_t image_width;
|
||||
|
||||
char *res = nullptr;
|
||||
char buf[kMaxSize];
|
||||
if (GetCustomAttr(buf, kMaxSize, op, "proposal_id")) {
|
||||
res = nullptr;
|
||||
ndims = strtol(buf, &res, kDecimal);
|
||||
if ((*res) != 0) {
|
||||
LOGE("Get attr id data fail");
|
||||
return nullptr;
|
||||
}
|
||||
} else {
|
||||
LOGE("Proposal Custom op should have id");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (GetCustomAttr(buf, kMaxSize, op, "image_height")) {
|
||||
res = nullptr;
|
||||
image_height = strtol(buf, &res, kDecimal);
|
||||
if ((*res) != 0) {
|
||||
LOGE("Get attr id data fail");
|
||||
return nullptr;
|
||||
}
|
||||
} else {
|
||||
LOGE("Proposal Custom op should have image_height");
|
||||
return nullptr;
|
||||
}
|
||||
if (GetCustomAttr(buf, kMaxSize, op, "image_width")) {
|
||||
res = nullptr;
|
||||
image_width = strtol(buf, &res, kDecimal);
|
||||
if ((*res) != 0) {
|
||||
LOGE("Get attr id data fail");
|
||||
return nullptr;
|
||||
}
|
||||
} else {
|
||||
LOGE("Proposal Custom op should have image_width");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto kernel = std::make_shared<ProposalCPUKernel>(inputs, outputs, primitive, ctx, ndims, image_height, image_width);
|
||||
// auto kernel = new (std::nothrow) ProposalCPUKernel(inputs, outputs, primitive, ctx, ndims, image_height,
|
||||
// image_width);
|
||||
if (kernel == nullptr) {
|
||||
LOGE("new custom kernel is nullptr");
|
||||
return nullptr;
|
||||
}
|
||||
return kernel;
|
||||
}
|
||||
} // namespace proposal
|
||||
} // namespace mindspore
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace {
|
||||
const auto kFloat32 = DataType::kNumberTypeFloat32;
|
||||
}
|
||||
REGISTER_CUSTOM_KERNEL(CPU, NNIE, kFloat32, Proposal, proposal::ProposalCreateKernel)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,51 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_FP32_H_
|
||||
#define MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_FP32_H_
|
||||
|
||||
#include <vector>
|
||||
#include "schema/model_generated.h"
|
||||
#include "include/context.h"
|
||||
#include "include/api/kernel.h"
|
||||
#include "src/proposal.h"
|
||||
|
||||
using mindspore::kernel::Kernel;
|
||||
namespace mindspore {
|
||||
namespace proposal {
|
||||
class ProposalCPUKernel : public Kernel {
|
||||
public:
|
||||
ProposalCPUKernel(const std::vector<mindspore::MSTensor> &inputs, const std::vector<mindspore::MSTensor> &outputs,
|
||||
const mindspore::schema::Primitive *primitive, const mindspore::Context *ctx, int id,
|
||||
int image_height, int image_width)
|
||||
: Kernel(inputs, outputs, primitive, ctx), id_(id), image_height_(image_height), image_weight_(image_width) {}
|
||||
|
||||
~ProposalCPUKernel() override;
|
||||
|
||||
int Prepare() override;
|
||||
int ReSize() override;
|
||||
int Execute() override;
|
||||
|
||||
private:
|
||||
proposal::ProposalParam proposal_param_ = {0};
|
||||
int64_t id_;
|
||||
int64_t image_height_;
|
||||
int64_t image_weight_;
|
||||
};
|
||||
} // namespace proposal
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_FP32_H_
|
|
@ -0,0 +1,77 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/proposal_infer.h"
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "include/errorcode.h"
|
||||
#include "src/proposal.h"
|
||||
#include "include/api/format.h"
|
||||
#include "include/registry/register_kernel_interface.h"
|
||||
|
||||
using mindspore::kernel::KernelInterface;
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_OK;
|
||||
using mindspore::schema::PrimitiveType_Custom;
|
||||
|
||||
namespace mindspore {
|
||||
namespace proposal {
|
||||
std::shared_ptr<KernelInterface> ProposalInferCreater() {
|
||||
auto infer = std::make_shared<ProposalInterface>();
|
||||
if (infer == nullptr) {
|
||||
LOGE("new custom infer is nullptr");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return infer;
|
||||
}
|
||||
Status ProposalInterface::Infer(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
|
||||
const mindspore::schema::Primitive *primitive) {
|
||||
if (inputs->size() != 2) {
|
||||
LOGE("Inputs size less 2");
|
||||
return kLiteError;
|
||||
}
|
||||
if (outputs->size() == 0) {
|
||||
LOGE("Outputs size 0");
|
||||
return kLiteError;
|
||||
}
|
||||
if (primitive->value_type() != mindspore::schema::PrimitiveType_Custom) {
|
||||
LOGE("Primitive type is not PrimitiveType_Custom");
|
||||
return kLiteError;
|
||||
}
|
||||
|
||||
size_t id = 0;
|
||||
while (id < outputs->size()) {
|
||||
// 待补完
|
||||
// outputs[id]->format_ = input->format_;
|
||||
// outputs[id]->data_type_ = kNumberTypeFloat32;
|
||||
// 设置type为int
|
||||
std::vector<int64_t> shape{-1, COORDI_NUM};
|
||||
(*outputs)[id].SetShape(shape);
|
||||
(*outputs)[id].SetDataType(DataType::kNumberTypeFloat32);
|
||||
(*outputs)[id].SetFormat(Format::NCHW);
|
||||
id++;
|
||||
}
|
||||
return kSuccess;
|
||||
}
|
||||
} // namespace proposal
|
||||
} // namespace mindspore
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
// static KernelInterfaceReg a(aa, schema::PrimitiveType_Custom, CustomInferCreater);
|
||||
REGISTER_CUSTOM_KERNEL_INTERFACE(NNIE, Proposal, proposal::ProposalInferCreater);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,35 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_INFER_H_
|
||||
#define MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_INFER_H_
|
||||
#include <vector>
|
||||
#include "include/kernel_interface.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace proposal {
|
||||
class ProposalInterface : public mindspore::kernel::KernelInterface {
|
||||
public:
|
||||
ProposalInterface() {}
|
||||
|
||||
~ProposalInterface() = default;
|
||||
|
||||
Status Infer(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
|
||||
const mindspore::schema::Primitive *primitive) override;
|
||||
};
|
||||
} // namespace proposal
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_INFER_H_
|
|
@ -26,6 +26,12 @@ namespace lite {
|
|||
int RunBenchmark(int argc, const char **argv) {
|
||||
BenchmarkFlags flags;
|
||||
Option<std::string> err = flags.ParseFlags(argc, argv);
|
||||
#ifdef SUPPORT_NNIE
|
||||
if (SvpSysInit() != RET_OK) {
|
||||
std::cerr << "SVP Init failed" << std::endl;
|
||||
return RET_ERROR;
|
||||
}
|
||||
#endif
|
||||
if (err.IsSome()) {
|
||||
std::cerr << err.Get() << std::endl;
|
||||
std::cerr << flags.Usage() << std::endl;
|
||||
|
@ -36,7 +42,9 @@ int RunBenchmark(int argc, const char **argv) {
|
|||
std::cerr << flags.Usage() << std::endl;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
#ifdef SUPPORT_NNIE
|
||||
BenchmarkBase *benchmark = new (std::nothrow) Benchmark(&flags);
|
||||
#else
|
||||
auto api_type = std::getenv("MSLITE_API_TYPE");
|
||||
if (api_type != nullptr) {
|
||||
MS_LOG(INFO) << "MSLITE_API_TYPE = " << api_type;
|
||||
|
@ -53,6 +61,7 @@ int RunBenchmark(int argc, const char **argv) {
|
|||
BENCHMARK_LOG_ERROR("Invalid MSLITE_API_TYPE, (OLD/NEW/C, default:OLD)");
|
||||
return RET_ERROR;
|
||||
}
|
||||
#endif
|
||||
if (benchmark == nullptr) {
|
||||
BENCHMARK_LOG_ERROR("new benchmark failed ");
|
||||
return RET_ERROR;
|
||||
|
@ -61,6 +70,7 @@ int RunBenchmark(int argc, const char **argv) {
|
|||
auto status = benchmark->Init();
|
||||
if (status != 0) {
|
||||
BENCHMARK_LOG_ERROR("Benchmark init Error : " << status);
|
||||
delete benchmark;
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto model_name = flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1);
|
||||
|
@ -68,6 +78,7 @@ int RunBenchmark(int argc, const char **argv) {
|
|||
status = benchmark->RunBenchmark();
|
||||
if (status != 0) {
|
||||
BENCHMARK_LOG_ERROR("Run Benchmark " << model_name << " Failed : " << status);
|
||||
delete benchmark;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
|
|
|
@ -53,58 +53,6 @@ function Run_Build_x86() {
|
|||
fi
|
||||
}
|
||||
|
||||
# Build arm32 for nnie
|
||||
function Run_Build_arm() {
|
||||
# decompress release_pkg
|
||||
cd ${open_source_ms_path}/output/ || exit 1
|
||||
file_name=$(ls ./*linux-${package_name}.tar.gz)
|
||||
IFS="-" read -r -a file_name_array <<< "$file_name"
|
||||
version=${file_name_array[2]}
|
||||
tar -xf mindspore-lite-${version}-linux-${package_name}.tar.gz
|
||||
|
||||
# cp runtime folder
|
||||
cd ${open_source_ms_path}/output/mindspore-lite-${version}-linux-${package_name} || exit 1
|
||||
rm -rf ${nnie_code_path}/mindspore/mindspore/lite/tools/benchmark/nnie/third_patry/runtime/
|
||||
mkdir -p ${nnie_code_path}/mindspore/mindspore/lite/tools/benchmark/nnie/third_patry/runtime/ || exit 1
|
||||
rm -rf ${nnie_code_path}/mindspore/mindspore/lite/tools/benchmark/nnie_proposal/third_patry/runtime/
|
||||
mkdir -p ${nnie_code_path}/mindspore/mindspore/lite/tools/benchmark/nnie_proposal/third_patry/runtime/ || exit 1
|
||||
cp -r ./runtime/ ${nnie_code_path}/mindspore/mindspore/lite/tools/benchmark/nnie/third_patry/
|
||||
cp -r ./runtime/ ${nnie_code_path}/mindspore/mindspore/lite/tools/benchmark/nnie_proposal/third_patry/
|
||||
|
||||
# compile nnie runtime so
|
||||
export TOOLCHAIN_NAME=${toolchain_name}
|
||||
export TOOLCHAIN_FILE=${open_source_ms_path}/mindspore/lite/cmake/${toolchain_name}.toolchain.cmake
|
||||
export MSLITE_REGISTRY_DEVICE=${device_name}
|
||||
|
||||
# disable gpu & npu & train
|
||||
export MSLITE_GPU_BACKEND=off
|
||||
export MSLITE_ENABLE_NPU=off
|
||||
export MSLITE_ENABLE_TRAIN=off
|
||||
export MSLITE_ENABLE_NNIE=on
|
||||
|
||||
bash ${nnie_code_path}/mindspore/build.sh -I ${task} -e cpu -j ${thread_num}
|
||||
if [ $? = 0 ]; then
|
||||
echo "build arm for nnie success"
|
||||
release_path=${open_source_ms_path}/output/mindspore-lite-${version}-linux-${package_name}/providers/${device_name}/
|
||||
rm -rf ${release_path}
|
||||
mkdir -p ${release_path}
|
||||
mkdir -p ${open_source_ms_path}/output/mindspore-lite-${version}-linux-${package_name}/tools/benchmark/
|
||||
cp ${nnie_code_path}/mindspore/mindspore/lite/build/tools/benchmark/benchmark ${open_source_ms_path}/output/mindspore-lite-${version}-linux-${package_name}/tools/benchmark/ || exit 1
|
||||
cp ${nnie_code_path}/mindspore/mindspore/lite/build/tools/benchmark/nnie/libmslite_nnie.so ${release_path}/ || exit 1
|
||||
cp ${nnie_code_path}/mindspore/mindspore/lite/build/tools/benchmark/nnie_proposal/libmslite_proposal.so ${release_path}/ || exit 1
|
||||
if [ ${device_name} == "Hi3516D" ]; then
|
||||
cp ${nnie_code_path}/mindspore/mindspore/lite/micro/example/hi3516d/libmicro_nnie.so ${release_path}/ || exit 1
|
||||
fi
|
||||
echo "cp new nnie so to release pkg success"
|
||||
cd ${open_source_ms_path}/output/ || exit 1
|
||||
rm ${open_source_ms_path}/output/mindspore-lite-${version}-linux-${package_name}.tar.gz
|
||||
tar -zcf ./mindspore-lite-${version}-linux-${package_name}.tar.gz ./mindspore-lite-${version}-linux-${package_name}/ || exit 1
|
||||
sha256sum ./mindspore-lite-${version}-linux-${package_name}.tar.gz > ./mindspore-lite-${version}-linux-${package_name}.tar.gz.sha256 || exit 1
|
||||
else
|
||||
echo "build arm for nnie failed"; return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# bashpath should be /home/jenkins/agent-working-dir/workspace/Compile_Lite_ARM32_3516D/
|
||||
basepath=$(pwd)
|
||||
echo "basepath is ${basepath}"
|
||||
|
@ -123,12 +71,8 @@ while getopts "I:b:j:t:d:" opt; do
|
|||
echo "branch name is ${OPTARG}"
|
||||
;;
|
||||
t)
|
||||
toolchain_name=${OPTARG}
|
||||
echo "toolchain_name is ${OPTARG}"
|
||||
;;
|
||||
d)
|
||||
device_name=${OPTARG}
|
||||
echo "device_name is ${OPTARG}"
|
||||
;;
|
||||
j)
|
||||
thread_num=${OPTARG}
|
||||
|
@ -163,14 +107,6 @@ fi
|
|||
if [ ${task} == "x86_64" ]; then
|
||||
echo "start building x86 for nnie..."
|
||||
Run_Build_x86
|
||||
elif [ ${task} == "arm32" ]; then
|
||||
echo "start building arm32 for nnie..."
|
||||
package_name=aarch32
|
||||
Run_Build_arm
|
||||
elif [ ${task} == "arm64" ]; then
|
||||
echo "start building arm64 for nnie..."
|
||||
package_name=aarch64
|
||||
Run_Build_arm
|
||||
fi
|
||||
|
||||
Run_build_PID=$!
|
||||
|
|
|
@ -1,81 +0,0 @@
|
|||
#!/bin/bash
|
||||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
prepare_third_party() {
|
||||
dpico_third_party=${mindspore_lite_top_dir}/tools/benchmark/dpico/third_party
|
||||
rm -rf ${dpico_third_party} || exit 1
|
||||
mkdir -p ${dpico_third_party} || exit 1
|
||||
cd ${mindspore_top_dir}/output || exit 1
|
||||
file_name=$(ls *tar.gz)
|
||||
tar_name=${file_name%%.tar.gz}
|
||||
tar xzvf ${tar_name}.tar.gz || exit 1
|
||||
cd ..
|
||||
cp -rf ${mindspore_top_dir}/output/${tar_name}/runtime/ ${dpico_third_party} || exit 1
|
||||
}
|
||||
|
||||
# Build arm64 for dpico
|
||||
make_dpico_benchmark_package() {
|
||||
cd ${mindspore_top_dir}/output || exit 1
|
||||
file_name=$(ls *tar.gz)
|
||||
tar_name=${file_name%%.tar.gz}
|
||||
dpico_sd3403_release_path=${mindspore_top_dir}/output/${tar_name}/providers/SD3403/
|
||||
mkdir -p ${dpico_sd3403_release_path}
|
||||
dpico_benchmark_path=${mindspore_top_dir}/mindspore/lite/build/tools/benchmark
|
||||
cp ${dpico_benchmark_path}/dpico/libdpico_acl_adapter.so ${dpico_sd3403_release_path} || exit 1
|
||||
echo "install dpico adapter so success."
|
||||
rm ${tar_name}.tar.gz || exit 1
|
||||
tar -zcf ${tar_name}.tar.gz ${tar_name} || exit 1
|
||||
rm -rf ${tar_name} || exit 1
|
||||
sha256sum ${tar_name}.tar.gz > ${tar_name}.tar.gz.sha256 || exit 1
|
||||
echo "generate dpico package success!"
|
||||
cd ${basepath}
|
||||
rm -rf ${dpico_third_party} || exit 1
|
||||
}
|
||||
|
||||
basepath=$(pwd)
|
||||
echo "basepath is ${basepath}"
|
||||
#set -e
|
||||
mindspore_top_dir=${basepath}
|
||||
mindspore_lite_top_dir=${mindspore_top_dir}/mindspore/lite
|
||||
|
||||
while getopts "t:" opt; do
|
||||
case ${opt} in
|
||||
t)
|
||||
task=${OPTARG}
|
||||
echo "compile task is ${OPTARG}"
|
||||
;;
|
||||
?)
|
||||
echo "unknown para"
|
||||
exit 1;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ${task} == "prepare_third_party" ]]; then
|
||||
prepare_third_party
|
||||
if [ $? -eq 1 ]; then
|
||||
echo "prepare third party failed"
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
echo "start make package for dpico..."
|
||||
make_dpico_benchmark_package &
|
||||
make_dpico_benchmark_package_pid=$!
|
||||
sleep 1
|
||||
|
||||
wait ${make_dpico_benchmark_package_pid}
|
||||
make_dpico_benchmark_package_status=$?
|
||||
exit ${make_dpico_benchmark_package_status}
|
||||
fi
|
Loading…
Reference in New Issue