add 35xx build

This commit is contained in:
gongdaguo 2021-12-11 14:36:21 +08:00
parent 74c8a66ab9
commit bbfd0dbdce
45 changed files with 3759 additions and 310 deletions

View File

@ -6,6 +6,7 @@
mindspore/mindspore/lite/src/ops/primitive_c.cc:mindspore::lite::PrimitiveC::Create
mindspore/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/csv_op.cc:mindspore::dataset::CsvOp::CsvParser::InitCsvParser
mindspore/mindspore/lite/tools/converter/graphdef_transform.cc:mindspore::lite::GraphDefTransform::Transform
mindspore/mindspore/lite/tools/benchmark/nnie_proposal/src/proposal.cc:mindspore::proposal::Rpn
mindspore/mindspore/core/abstract/primitive_infer_map.cc:mindspore::abstract::GetPrimitiveToEvalImplMap
mindspore/mindspore/ccsrc/frontend/optimizer/irpass.cc:mindspore::opt::irpass::OptimizeIRPassLib::OptimizeIRPassLib
mindspore/mindspore/ccsrc/frontend/parallel/ops_info/gather_v2_p_info.cc:mindspore::parallel::GatherV2PInfo::CheckStrategy

View File

@ -11,6 +11,7 @@ set(TEST_CASE_DIR ${TOP_DIR}/mindspore/lite/test/build)
set(RUNTIME_DIR ${RUNTIME_PKG_NAME}/runtime)
set(RUNTIME_INC_DIR ${RUNTIME_PKG_NAME}/runtime/include)
set(RUNTIME_LIB_DIR ${RUNTIME_PKG_NAME}/runtime/lib)
set(PROVIDERS_LIB_DIR ${RUNTIME_PKG_NAME}/providers)
set(MIND_DATA_INC_DIR ${RUNTIME_PKG_NAME}/runtime/include/dataset)
set(TURBO_DIR ${RUNTIME_PKG_NAME}/runtime/third_party/libjpeg-turbo)
set(GLOG_DIR ${RUNTIME_PKG_NAME}/runtime/third_party/glog)
@ -18,6 +19,10 @@ set(SECUREC_DIR ${RUNTIME_PKG_NAME}/runtime/third_party/securec)
set(MINDSPORE_LITE_LIB_NAME libmindspore-lite)
set(MINDSPORE_CORE_LIB_NAME libmindspore_core)
set(BENCHMARK_NAME benchmark)
set(MSLITE_NNIE_LIB_NAME libmslite_nnie)
set(MSLITE_PROPOSAL_LIB_NAME libmslite_proposal)
set(MICRO_NNIE_LIB_NAME libmicro_nnie)
set(DPICO_ACL_ADAPTER_LIB_NAME libdpico_acl_adapter)
set(BENCHMARK_ROOT_DIR ${RUNTIME_PKG_NAME}/tools/benchmark)
set(MINDSPORE_LITE_TRAIN_LIB_NAME libmindspore-lite-train)
@ -227,11 +232,31 @@ if(PLATFORM_ARM64)
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "ops*" EXCLUDE)
install(DIRECTORY ${TOP_DIR}/include/c_api/ DESTINATION ${RUNTIME_INC_DIR}/c_api
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
if(NOT MSLITE_ENABLE_DPICO_ACL_ADAPTER)
if(NOT TARGET_MIX210)
__install_micro_wrapper()
endif()
if(MSLITE_ENABLE_TOOLS)
install(TARGETS ${BENCHMARK_NAME} RUNTIME DESTINATION ${BENCHMARK_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
if(NOT BUILD_FIRST)
install(TARGETS ${BENCHMARK_NAME} RUNTIME DESTINATION ${BENCHMARK_ROOT_DIR}
COMPONENT ${RUNTIME_COMPONENT_NAME})
if(TARGET_HIMIX)
if(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3559A")
install(FILES ${TOP_DIR}/mindspore/lite/build/tools/benchmark/nnie/${MSLITE_NNIE_LIB_NAME}.so
DESTINATION ${PROVIDERS_LIB_DIR}/${MSLITE_REGISTRY_DEVICE}
COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES
${TOP_DIR}/mindspore/lite/build/tools/benchmark/nnie_proposal/${MSLITE_PROPOSAL_LIB_NAME}.so
DESTINATION ${PROVIDERS_LIB_DIR}/${MSLITE_REGISTRY_DEVICE}
COMPONENT ${RUNTIME_COMPONENT_NAME})
endif()
elseif(TARGET_MIX210)
if(${MSLITE_REGISTRY_DEVICE} STREQUAL "SD3403")
install(FILES ${TOP_DIR}/mindspore/lite/build/tools/benchmark/dpico/${DPICO_ACL_ADAPTER_LIB_NAME}.so
DESTINATION ${PROVIDERS_LIB_DIR}/${MSLITE_REGISTRY_DEVICE}
COMPONENT ${RUNTIME_COMPONENT_NAME})
endif()
endif()
endif()
if(SUPPORT_TRAIN)
install(TARGETS ${BENCHMARK_TRAIN_NAME} RUNTIME DESTINATION ${BENCHMARK_TRAIN_ROOT_DIR} COMPONENT
${RUNTIME_COMPONENT_NAME})
@ -310,7 +335,27 @@ elseif(PLATFORM_ARM32)
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
__install_micro_wrapper()
if(MSLITE_ENABLE_TOOLS AND NOT TARGET_OHOS_LITE)
install(TARGETS ${BENCHMARK_NAME} RUNTIME DESTINATION ${BENCHMARK_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
if(NOT BUILD_FIRST)
install(TARGETS ${BENCHMARK_NAME} RUNTIME
DESTINATION ${BENCHMARK_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
if(TARGET_HIMIX)
if(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3516D" OR ${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3519A")
install(FILES ${TOP_DIR}/mindspore/lite/build/tools/benchmark/nnie/${MSLITE_NNIE_LIB_NAME}.so
DESTINATION ${PROVIDERS_LIB_DIR}/${MSLITE_REGISTRY_DEVICE}
COMPONENT ${RUNTIME_COMPONENT_NAME})
install(FILES
${TOP_DIR}/mindspore/lite/build/tools/benchmark/nnie_proposal/${MSLITE_PROPOSAL_LIB_NAME}.so
DESTINATION ${PROVIDERS_LIB_DIR}/${MSLITE_REGISTRY_DEVICE}
COMPONENT ${RUNTIME_COMPONENT_NAME})
if(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3516D")
install(FILES
${TOP_DIR}/mindspore/lite/tools/benchmark/nnie/third_patry/${MICRO_NNIE_LIB_NAME}.so
DESTINATION ${PROVIDERS_LIB_DIR}/${MSLITE_REGISTRY_DEVICE}
COMPONENT ${RUNTIME_COMPONENT_NAME})
endif()
endif()
endif()
endif()
if(SUPPORT_TRAIN)
install(TARGETS ${BENCHMARK_TRAIN_NAME} RUNTIME DESTINATION ${BENCHMARK_TRAIN_ROOT_DIR} COMPONENT
${RUNTIME_COMPONENT_NAME})
@ -516,7 +561,10 @@ else()
__install_micro_codegen()
endif()
if(MSLITE_ENABLE_TOOLS)
install(TARGETS ${BENCHMARK_NAME} RUNTIME DESTINATION ${BENCHMARK_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
if(NOT BUILD_FIRST)
install(TARGETS ${BENCHMARK_NAME} RUNTIME DESTINATION ${BENCHMARK_ROOT_DIR}
COMPONENT ${RUNTIME_COMPONENT_NAME})
endif()
if(SUPPORT_TRAIN)
install(TARGETS ${BENCHMARK_TRAIN_NAME} RUNTIME DESTINATION ${BENCHMARK_TRAIN_ROOT_DIR} COMPONENT
${RUNTIME_COMPONENT_NAME})

View File

@ -35,13 +35,10 @@ if(NOT PLATFORM_ARM32 AND NOT TARGET_HIMIX AND NOT MACHINE_LINUX_ARM64)
list(APPEND SDOT_FILES ${SDOT_SRC})
add_library(nnacl_optimize_mid OBJECT ${SDOT_FILES})
add_dependencies(nnacl_optimize_mid fbs_src)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+dotprod+fp16")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16")
endif()
if(TARGET_MIX210)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+fp16")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+fp16")
if(NOT TARGET_MIX210)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+dotprod+fp16")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16")
endif()
endif()
if(MSLITE_ENABLE_FP16)

View File

@ -10,7 +10,7 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/compile_link_option.cmake)
set(MSLITE_GPU_BACKEND "" CACHE STRING "enable gpu backend, \
opencl only support arm64 and x86_64 , tensorrt only support x86_64, opencl/cuda/tensorrt/off")
set(MSLITE_REGISTRY_DEVICE "off" CACHE STRING "Compile Mindspore Lite that supports specific devices, \
currently supported devices: Hi3516D/Hi3519A/Hi3559A/sd3403")
currently supported devices: Hi3516D/Hi3519A/Hi3559A/SD3403")
option(MSLITE_ENABLE_NPU "enable npu, only arm64 or arm32 support" off)
option(MSLITE_ENABLE_TRAIN "enable train" on)
option(MSLITE_ENABLE_SSE "enable SSE instruction set, only x86_64 support" off)
@ -53,13 +53,6 @@ if(DEFINED ENV{MSLITE_GPU_BACKEND})
endif()
if(DEFINED ENV{MSLITE_REGISTRY_DEVICE})
set(MSLITE_REGISTRY_DEVICE $ENV{MSLITE_REGISTRY_DEVICE})
if(MSLITE_REGISTRY_DEVICE STREQUAL sd3403)
if(NOT PLATFORM_ARM64)
set(MSLITE_ENABLE_DPICO_ATC_ADAPTER on)
else()
set(MSLITE_ENABLE_DPICO_ACL_ADAPTER on)
endif()
endif()
endif()
if(DEFINED ENV{MSLITE_ENABLE_NPU})
set(MSLITE_ENABLE_NPU $ENV{MSLITE_ENABLE_NPU})
@ -190,6 +183,9 @@ elseif(PLATFORM_ARM32)
elseif(WIN32)
set(MSLITE_GPU_BACKEND "off")
else()
if(${MSLITE_REGISTRY_DEVICE} STREQUAL "SD3403")
set(MSLITE_ENABLE_DPICO_ATC_ADAPTER on)
endif()
if(MSLITE_GPU_BACKEND STREQUAL "")
set(MSLITE_GPU_BACKEND "off")
endif()
@ -379,10 +375,6 @@ else()
set(RUNTIME_COMPONENT_NAME "linux-x64")
endif()
if(MSLITE_ENABLE_DPICO_ACL_ADAPTER)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark/dpico)
endif()
string(REPLACE "/mindspore/lite" "" TOP_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(CORE_DIR ${TOP_DIR}/mindspore/core)
set(CCSRC_DIR ${TOP_DIR}/mindspore/ccsrc)
@ -567,16 +559,16 @@ if(BUILD_MINDDATA STREQUAL "lite_cv")
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/minddata)
endif()
if(NOT MSLITE_ENABLE_DPICO_ACL_ADAPTER)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src/ops)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/micro/coder)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src)
add_subdirectory(${CCSRC_DIR}/backend/kernel_compiler/cpu/nnacl build)
endif()
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src/ops)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/micro/coder)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/src)
add_subdirectory(${CCSRC_DIR}/backend/kernel_compiler/cpu/nnacl build)
if(MSLITE_ENABLE_TOOLS)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark)
if(NOT BUILD_FIRST)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark)
endif()
if(SUPPORT_TRAIN)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tools/benchmark_train)
endif()

View File

@ -25,6 +25,15 @@ checkndk() {
fi
}
check_Hi35xx() {
if [[ "X${HI35XX_SDK_PATH}" == "X" ]]; then
echo "error: to compile the runtime package of Hi35XX, you need to set HI35XX_SDK_PATH to declare the path of Hi35XX sdk."
exit 1
else
cp -r ${HI35XX_SDK_PATH}/third_patry ${BASEPATH}/mindspore/lite/tools/benchmark/nnie/
fi
}
get_version() {
VERSION_MAJOR=$(grep "const int ms_version_major =" ${BASEPATH}/mindspore/lite/include/version.h | tr -dc "[0-9]")
VERSION_MINOR=$(grep "const int ms_version_minor =" ${BASEPATH}/mindspore/lite/include/version.h | tr -dc "[0-9]")
@ -142,16 +151,19 @@ build_lite() {
CMAKE_TOOLCHAIN_FILE=${BASEPATH}/cmake/lite_ios.cmake
fi
BRANCH_NAME=nnie_3516_master_dev
BRANCH_NAME=nnie_3516_master
if [[ ("${MSLITE_REGISTRY_DEVICE}" == "Hi3516D" || "${TOOLCHAIN_NAME}" == "himix200") && "${local_lite_platform}" == "arm32" ]]; then
TOOLCHAIN_NAME="himix200"
MSLITE_REGISTRY_DEVICE=Hi3516D
check_Hi35xx
elif [[ "${MSLITE_REGISTRY_DEVICE}" == "Hi3559A" && "${local_lite_platform}" == "arm64" ]]; then
TOOLCHAIN_NAME="himix100"
elif [[ "${MSLITE_REGISTRY_DEVICE}" == "sd3403" && "${local_lite_platform}" == "arm64" ]]; then
check_Hi35xx
elif [[ "${MSLITE_REGISTRY_DEVICE}" == "SD3403" && "${local_lite_platform}" == "arm64" ]]; then
TOOLCHAIN_NAME="mix210"
elif [[ "${MSLITE_REGISTRY_DEVICE}" == "Hi3519A" && "${local_lite_platform}" == "arm32" ]]; then
TOOLCHAIN_NAME="himix200"
check_Hi35xx
elif [[ ("${MSLITE_ENABLE_NNIE}" == "on" || "${MSLITE_REGISTRY_DEVICE}" == "Hi3516D") && "${local_lite_platform}" == "x86_64" ]]; then
MSLITE_REGISTRY_DEVICE=Hi3516D
fi
@ -190,13 +202,11 @@ build_lite() {
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DTOOLCHAIN_NAME=himix100"
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DBUILD_MINDDATA=off"
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_ENABLE_FP16=off -DMSLITE_ENABLE_TRAIN=off -DMSLITE_GPU_BACKEND=off"
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_ENABLE_TOOLS=off"
elif [[ "${TOOLCHAIN_NAME}" == "mix210" ]]; then
CMAKE_TOOLCHAIN_FILE=${BASEPATH}/mindspore/lite/cmake/mix210.toolchain.cmake
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DTOOLCHAIN_NAME=mix210"
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DBUILD_MINDDATA=off"
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_ENABLE_FP16=off -DMSLITE_ENABLE_TRAIN=off -DMSLITE_GPU_BACKEND=off"
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_ENABLE_TOOLS=off"
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_ENABLE_FP16=on -DMSLITE_ENABLE_TRAIN=off -DMSLITE_GPU_BACKEND=off"
else
if [[ "${machine}" == "aarch64" ]]; then
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMACHINE_LINUX_ARM64=on"
@ -228,26 +238,25 @@ build_lite() {
if [[ "X$CMAKE_TOOLCHAIN_FILE" != "X" ]]; then
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}"
fi
if [[ "X$MSLITE_REGISTRY_DEVICE" != "X" ]] && [[ "${MSLITE_REGISTRY_DEVICE}" != "sd3403" ]]; then
if [[ "X$MSLITE_REGISTRY_DEVICE" != "X" ]]; then
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_REGISTRY_DEVICE=${MSLITE_REGISTRY_DEVICE}"
fi
if [[ "${local_lite_platform}" == "arm64" || "${local_lite_platform}" == "arm32" ]]; then
echo "default link libc++_static.a, export MSLITE_ANDROID_STL=c++_shared to link libc++_shared.so"
fi
echo "cmake ${LITE_CMAKE_ARGS} ${BASEPATH}/mindspore/lite"
if [[ "${MSLITE_REGISTRY_DEVICE}" == "sd3403" ]] && [[ "${local_lite_platform}" == "arm64" ]]; then
export MSLITE_REGISTRY_DEVICE=""
cmake ${LITE_CMAKE_ARGS} "${BASEPATH}/mindspore/lite"
export MSLITE_REGISTRY_DEVICE=sd3403
else
cmake ${LITE_CMAKE_ARGS} "${BASEPATH}/mindspore/lite"
fi
echo "cmake ${LITE_CMAKE_ARGS} -DBUILD_FIRST=ON ${BASEPATH}/mindspore/lite"
cmake ${LITE_CMAKE_ARGS} -DBUILD_FIRST=ON "${BASEPATH}/mindspore/lite"
if [[ "$(uname)" == "Darwin" && "${local_lite_platform}" != "x86_64" ]]; then
xcodebuild ONLY_ACTIVE_ARCH=NO -configuration Release -scheme mindspore-lite_static -target mindspore-lite_static -sdk iphoneos -quiet
elif [[ "$(uname)" == "Darwin" && "${local_lite_platform}" == "x86_64" ]]; then
xcodebuild ONLY_ACTIVE_ARCH=NO -configuration Release -scheme mindspore-lite_static -target mindspore-lite_static -sdk iphonesimulator -quiet
else
make -j$THREAD_NUM && make install
cp -r ${BASEPATH}/output/tmp/mindspore*/runtime ${BASEPATH}/mindspore/lite/tools/benchmark
cmake ${LITE_CMAKE_ARGS} -DBUILD_FIRST=off --target benchmark "${BASEPATH}/mindspore/lite"
make -j$THREAD_NUM && make install && make package
if [[ "${local_lite_platform}" == "x86_64" ]]; then
if [ "${JAVA_HOME}" ]; then
@ -288,37 +297,16 @@ build_lite() {
fi
[ -n "${BASEPATH}" ] && rm -rf ${BASEPATH}/output/tmp/
if [[ "X$MSLITE_REGISTRY_DEVICE" != "X" ]] && [[ "${MSLITE_REGISTRY_DEVICE}" != "sd3403" ]]; then
if [[ "X$MSLITE_REGISTRY_DEVICE" != "X" ]] && [[ "${MSLITE_REGISTRY_DEVICE}" != "SD3403" ]]; then
local compile_nnie_script=${BASEPATH}/mindspore/lite/tools/providers/NNIE/Hi3516D/compile_nnie.sh
cd ${BASEPATH}/../
if [[ "${local_lite_platform}" == "x86_64" ]]; then
bash ${compile_nnie_script} -I ${local_lite_platform} -b ${BRANCH_NAME} -j $THREAD_NUM
else
bash ${compile_nnie_script} -I ${local_lite_platform} -b ${BRANCH_NAME} -t ${TOOLCHAIN_NAME} -d ${MSLITE_REGISTRY_DEVICE} -j $THREAD_NUM
fi
if [[ $? -ne 0 ]]; then
echo "compile ${local_lite_platform} for nnie failed."
exit 1
fi
elif [[ "${MSLITE_REGISTRY_DEVICE}" == "sd3403" ]] && [[ "${local_lite_platform}" == "arm64" ]]; then
LITE_CMAKE_ARGS=$(echo ${LITE_CMAKE_ARGS} | sed -e "s/MSLITE_ENABLE_TOOLS=off/MSLITE_ENABLE_TOOLS=on/g")
LITE_CMAKE_ARGS="${LITE_CMAKE_ARGS} -DMSLITE_REGISTRY_DEVICE=${MSLITE_REGISTRY_DEVICE}"
cmake ${LITE_CMAKE_ARGS} "${BASEPATH}/mindspore/lite"
cd ${BASEPATH}
compile_dpico_script=${BASEPATH}/mindspore/lite/tools/providers/dpico/sd3403/compile_3403.sh
bash ${compile_dpico_script} -t prepare_third_party
if [[ $? -ne 0 ]]; then
echo "prepare for dpico failed."
exit 1
fi
cd ${BASEPATH}/mindspore/lite/build
make -j$THREAD_NUM
cd ${BASEPATH}
sh ${compile_dpico_script}
if [[ $? -ne 0 ]]; then
echo "second compile arm64 for dpico failed."
exit 1
fi
fi
echo "---------------- mindspore lite: build success ----------------"
fi

View File

@ -21,6 +21,8 @@ else()
if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
string(REPLACE "-O2" "-O0" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
string(REPLACE "-O2" "-O0" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-D_FORTIFY_SOURCE=2" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
string(REPLACE "-D_FORTIFY_SOURCE=2" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
endif()
set(CMAKE_SHARED_LINKER_FLAGS "${SECURE_SHARED_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${SECURE_EXE_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS}")

View File

@ -3,6 +3,9 @@ function(merge_parser CL_SRC_DIR OUT_FILE_NAME)
if(NOT EXISTS ${CL_SRC_DIR})
return()
endif()
if(DEFINED BUILD_FIRST AND NOT BUILD_FIRST)
return()
endif()
file(GLOB_RECURSE CL_LIST ${CL_SRC_DIR}/*.cc)
list(SORT CL_LIST)
set(out_file ${OUT_FILE_NAME})

View File

@ -19,6 +19,9 @@ set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
#set(CMAKE_CXX_FLAGS "-march= -mfloat-abi=softfp -mfpu=neon-vfpv4 ${CMAKE_CXX_FLAGS}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+fp16")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+fp16")
# cache flags
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags")

View File

@ -1,20 +0,0 @@
set(CMSIS_DIR ${CMAKE_BINARY_DIR}/cmsis)
message("build cmsis kernels")
include_directories(${CMSIS_DIR}/CMSIS/Core/Include)
include_directories(${CMSIS_DIR}/CMSIS/DSP/Include)
include_directories(${CMSIS_DIR}/CMSIS/NN/Include)
file(REMOVE ${CMSIS_DIR}/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c)
file(GLOB CMSIS_OPS
${CMSIS_DIR}/CMSIS/NN/Source/BasicMathFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/ActivationFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/ConcatenationFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/ConvolutionFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/FullyConnectedFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/NNSupportFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/PoolingFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/ReshapeFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/SoftmaxFunctions/*.c
)

View File

@ -16,7 +16,24 @@ if(PLATFORM_ARM64)
elseif(PLATFORM_ARM32)
add_compile_definitions(ENABLE_ARM32)
else()
include(${MICRO_DIR}/cmake/package_cmsis.cmake)
set(CMSIS_DIR ${CMAKE_BINARY_DIR}/cmsis)
message("build cmsis kernels")
include_directories(${CMSIS_DIR}/CMSIS/Core/Include)
include_directories(${CMSIS_DIR}/CMSIS/DSP/Include)
include_directories(${CMSIS_DIR}/CMSIS/NN/Include)
file(REMOVE ${CMSIS_DIR}/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c)
file(GLOB CMSIS_OPS
${CMSIS_DIR}/CMSIS/NN/Source/BasicMathFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/ActivationFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/ConcatenationFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/ConvolutionFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/FullyConnectedFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/NNSupportFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/PoolingFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/ReshapeFunctions/*.c
${CMSIS_DIR}/CMSIS/NN/Source/SoftmaxFunctions/*.c
)
add_library(cmsis_nn STATIC ${CMSIS_OPS})
endif()

View File

@ -2,7 +2,7 @@
# Build x86 tar.gz file for dpico
function Run_Build_x86() {
export MSLITE_REGISTRY_DEVICE=sd3403
export MSLITE_REGISTRY_DEVICE=SD3403
unset JAVA_HOME
bash ${mindspore_top_dir}/build.sh -I x86_64 -j 80
if [ $? = 0 ]; then
@ -19,7 +19,7 @@ function Run_Build_x86() {
# Build arm32 tar.gz file for dpico
function Run_Build_arm64() {
export MSLITE_REGISTRY_DEVICE=sd3403
export MSLITE_REGISTRY_DEVICE=SD3403
unset JAVA_HOME
bash ${mindspore_top_dir}/build.sh -I arm64 -j 80
if [ $? = 0 ]; then

View File

@ -1,4 +1,48 @@
# add shared link library
cmake_minimum_required(VERSION 3.14)
project(Lite_benchmark)
set(BENCHMARK_LINK_LIB mindspore-lite)
if(TARGET_HIMIX)
add_subdirectory(nnie)
add_subdirectory(nnie_proposal)
set(CMAKE_SKIP_BUILD_RPATH on)
set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} pthread
mslite_proposal mslite_nnie dl nnie mpi VoiceEngine upvqe dnvqe securec)
if(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3516D")
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/nnie/third_patry/hi3516_sdk)
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/nnie/third_patry/hi3516_sdk/lib)
set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} mindspore::json)
elseif(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3519A")
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/nnie/third_patry/hi3519_sdk)
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/nnie/third_patry/hi3519_sdk/lib)
set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} mindspore::json)
elseif(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3559A")
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/nnie/third_patry/hi3559_sdk)
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/nnie/third_patry/hi3559_sdk/lib)
add_compile_definitions(BENCHMARK_CLIP_JSON)
endif()
elseif(TARGET_MIX210)
set(CMAKE_SKIP_BUILD_RPATH on)
set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} mindspore::json pthread
dpico_acl_adapter svp_acl dl securec protobuf-c stdc++)
if(${MSLITE_REGISTRY_DEVICE} STREQUAL "SD3403")
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/dpico)
message("34xx_sdk_SOURCE_DIR:${34xx_sdk_SOURCE_DIR}.")
include_directories(${34xx_sdk_SOURCE_DIR}/include)
include_directories(${34xx_sdk_SOURCE_DIR})
link_directories(${34xx_sdk_SOURCE_DIR}/lib)
endif()
else()
set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} mindspore::json)
if(PLATFORM_ARM32 OR PLATFORM_ARM64 AND NOT TARGET_OHOS_LITE AND NOT MACHINE_LINUX_ARM64)
if(SUPPORT_NPU AND ANDROID_STL STREQUAL "c++_static")
set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} c++_shared)
endif()
elseif(NOT MSVC)
set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} pthread)
endif()
endif()
include_directories(${CCSRC_DIR}/backend/kernel_compiler/cpu)
set(COMMON_SRC
${CMAKE_CURRENT_SOURCE_DIR}/../common/flag_parser.cc
@ -7,67 +51,32 @@ set(COMMON_SRC
${CMAKE_CURRENT_SOURCE_DIR}/../../src/common/utils.cc
${CMAKE_CURRENT_SOURCE_DIR}/../../../ccsrc/backend/kernel_compiler/cpu/nnacl/nnacl_common.c
)
if(NOT MSLITE_ENABLE_DPICO_ACL_ADAPTER)
if(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL)
set(COMMON_SRC ${COMMON_SRC} ../common/opengl_util.cc)
endif()
add_executable(benchmark
${CMAKE_CURRENT_SOURCE_DIR}/main.cc
${CMAKE_CURRENT_SOURCE_DIR}/run_benchmark.cc
${CMAKE_CURRENT_SOURCE_DIR}/benchmark_base.cc
${CMAKE_CURRENT_SOURCE_DIR}/benchmark.cc
${CMAKE_CURRENT_SOURCE_DIR}/benchmark_unified_api.cc
${CMAKE_CURRENT_SOURCE_DIR}/benchmark_c_api.cc
${COMMON_SRC})
if(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL)
set(COMMON_SRC ${COMMON_SRC} ../common/opengl_util.cc)
endif()
add_dependencies(benchmark fbs_src)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../../lite)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../../core)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/runtime)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/runtime/include)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/runtime/include/third_party)
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/runtime/lib)
if(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL)
list(APPEND opengl_lib EGL GLESv3)
target_link_libraries(benchmark ${opengl_lib})
endif()
if(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL)
list(APPEND opengl_lib EGL GLESv3)
set(BENCHMARK_LINK_LIB ${BENCHMARK_LINK_LIB} ${opengl_lib})
endif()
if((PLATFORM_ARM32 OR PLATFORM_ARM64) AND NOT TARGET_HIMIX
AND NOT TARGET_OHOS_LITE AND NOT MACHINE_LINUX_ARM64 AND NOT TARGET_MIX210)
if(SUPPORT_NPU AND ANDROID_STL STREQUAL "c++_static")
target_link_libraries(benchmark mindspore-lite mindspore::json c++_shared)
else()
target_link_libraries(benchmark mindspore-lite mindspore::json)
endif()
elseif(MSVC)
target_link_libraries(benchmark mindspore-lite mindspore::json)
else()
target_link_libraries(benchmark mindspore-lite mindspore::json pthread)
endif()
else()
__download_pkg(34xx_sdk
http://mindspore-repo.csi.rnd.huawei.com/mindspore/enterprise/dpico/34xx_sdk.tar.gz
f64a9129615b3b41b63debe17c6785af)
add_executable(benchmark
${CMAKE_CURRENT_SOURCE_DIR}/main.cc
${CMAKE_CURRENT_SOURCE_DIR}/run_benchmark.cc
${CMAKE_CURRENT_SOURCE_DIR}/benchmark_base.cc
${CMAKE_CURRENT_SOURCE_DIR}/benchmark.cc
${CMAKE_CURRENT_SOURCE_DIR}/benchmark_unified_api.cc
${CMAKE_CURRENT_SOURCE_DIR}/benchmark_c_api.cc
${COMMON_SRC})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../../lite)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../../core)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/dpico/third_party/runtime)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/dpico/third_party/runtime/include)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/dpico/third_party/runtime/include/third_party)
add_dependencies(benchmark fbs_src)
include_directories(${34xx_sdk_SOURCE_DIR}/include)
include_directories(${34xx_sdk_SOURCE_DIR})
link_directories(${34xx_sdk_SOURCE_DIR}/lib)
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/dpico/third_party/runtime/lib)
set(CMAKE_SKIP_BUILD_RPATH on)
add_executable(benchmark
${CMAKE_CURRENT_SOURCE_DIR}/main.cc
${CMAKE_CURRENT_SOURCE_DIR}/run_benchmark.cc
${CMAKE_CURRENT_SOURCE_DIR}/benchmark_base.cc
${CMAKE_CURRENT_SOURCE_DIR}/benchmark.cc
${CMAKE_CURRENT_SOURCE_DIR}/benchmark_unified_api.cc
${CMAKE_CURRENT_SOURCE_DIR}/benchmark_c_api.cc
${COMMON_SRC})
add_dependencies(benchmark fbs_src)
target_link_libraries(benchmark mindspore-lite mindspore::json pthread
dpico_acl_adapter dl svp_acl securec protobuf-c stdc++)
endif()
target_link_libraries(benchmark ${BENCHMARK_LINK_LIB})

View File

@ -35,6 +35,12 @@
#include <asm/unistd.h>
#include <unistd.h>
#endif
#ifdef SUPPORT_NNIE
#include "include/hi_common.h"
#include "include/hi_comm_vb.h"
#include "include/mpi_sys.h"
#include "include/mpi_vb.h"
#endif
namespace mindspore {
namespace lite {
@ -344,18 +350,48 @@ int Benchmark::InitContext(const std::shared_ptr<Context> &context) {
return RET_OK;
}
tensor::MSTensor *Benchmark::GetTensorByNodeShape(const std::vector<size_t> &node_shape) {
std::vector<tensor::MSTensor *> match_tensors;
std::vector<int> shape_vector;
(void)std::transform(node_shape.begin(), node_shape.end(), std::back_inserter(shape_vector),
[](const size_t &value) { return static_cast<int>(value); });
auto tensors = session_->GetOutputs();
for (auto &out_tensor_pair : tensors) {
if (out_tensor_pair.second->shape() == shape_vector) {
match_tensors.emplace_back(out_tensor_pair.second);
}
}
if (match_tensors.empty() || match_tensors.size() != 1) {
MS_LOG(ERROR) << "get tensor by node shape failed";
return nullptr;
}
return match_tensors.front();
}
tensor::MSTensor *Benchmark::GetTensorByNameOrShape(const std::string &node_or_tensor_name,
const std::vector<size_t> &dims) {
tensor::MSTensor *tensor = session_->GetOutputByTensorName(node_or_tensor_name);
if (tensor == nullptr) {
MS_LOG(INFO) << "Cannot find output node: " << node_or_tensor_name
<< " or node has more than one output tensor, switch to GetOutputByTensorName";
auto tensors = session_->GetOutputsByNodeName(node_or_tensor_name);
if (!tensors.empty() && tensors.size() == 1) {
tensor = tensors.front();
} else {
return GetTensorByNodeShape(dims);
}
}
return tensor;
}
int Benchmark::CompareOutput() {
std::cout << "================ Comparing Output data ================" << std::endl;
float total_bias = 0;
int total_size = 0;
// check the output tensor name.
if (this->benchmark_tensor_names_ != session_->GetOutputTensorNames()) {
MS_LOG(ERROR) << "The output tensor name is wrong.";
return RET_ERROR;
}
for (const auto &calib_tensor : benchmark_data_) {
std::string tensor_name = calib_tensor.first;
tensor::MSTensor *tensor = session_->GetOutputByTensorName(tensor_name);
tensor::MSTensor *tensor = GetTensorByNameOrShape(tensor_name, calib_tensor.second->shape);
if (tensor == nullptr) {
MS_LOG(ERROR) << "Get tensor failed, tensor name: " << tensor_name;
return RET_ERROR;
@ -940,7 +976,7 @@ std::string DumpMSTensor(tensor::MSTensor *tensor) {
}
return oss.str();
}
#ifndef BENCHMARK_CLIP_JSON
std::string GenerateOutputFileName(tensor::MSTensor *tensor, const std::string &op_name, const std::string &file_type,
const size_t &idx) {
std::string file_name = op_name;
@ -962,6 +998,7 @@ std::string GenerateOutputFileName(tensor::MSTensor *tensor, const std::string &
}
return file_name;
}
#endif
} // namespace
int Benchmark::InitPrintTensorDataCallbackParameter() {
@ -990,6 +1027,7 @@ int Benchmark::InitPrintTensorDataCallbackParameter() {
return RET_OK;
}
int Benchmark::InitDumpTensorDataCallbackParameter() {
#ifndef BENCHMARK_CLIP_JSON
// before callback
before_call_back_ = [&](const std::vector<mindspore::tensor::MSTensor *> &before_inputs,
const std::vector<mindspore::tensor::MSTensor *> &before_outputs,
@ -1035,6 +1073,7 @@ int Benchmark::InitDumpTensorDataCallbackParameter() {
}
return true;
};
#endif
return RET_OK;
}

View File

@ -29,7 +29,9 @@
#include <memory>
#include <cfloat>
#include <utility>
#ifndef BENCHMARK_CLIP_JSON
#include <nlohmann/json.hpp>
#endif
#include "tools/benchmark/benchmark_base.h"
#include "include/model.h"
#include "tools/common/flag_parser.h"
@ -96,6 +98,8 @@ class MS_API Benchmark : public BenchmarkBase {
int CompareDataGetTotalCosineDistanceAndSize(const std::string &name, tensor::MSTensor *tensor,
float *total_cosine_distance, int *total_size);
tensor::MSTensor *GetTensorByNodeShape(const std::vector<size_t> &node_shape);
tensor::MSTensor *GetTensorByNameOrShape(const std::string &node_or_tensor_name, const std::vector<size_t> &dims);
private:
#ifdef ENABLE_OPENGL_TEXTURE

View File

@ -34,6 +34,12 @@
#include <asm/unistd.h>
#include <unistd.h>
#endif
#ifdef SUPPORT_NNIE
#include "include/hi_common.h"
#include "include/hi_comm_vb.h"
#include "include/mpi_sys.h"
#include "include/mpi_vb.h"
#endif
namespace mindspore {
namespace lite {
@ -57,6 +63,10 @@ constexpr int16_t kInputDataInt8Min = -127;
constexpr int16_t kInputDataInt8Max = 127;
constexpr int16_t kInputDataUint8Min = 0;
constexpr int16_t kInputDataUint8Max = 254;
#ifdef SUPPORT_NNIE
constexpr int kNNIEMaxPoolCnt = 2;
constexpr int kNNIEBlkSize = 768 * 576 * 2;
#endif
const std::unordered_map<int, std::string> kTypeIdMap{
{kNumberTypeFloat16, "Float16"}, {kNumberTypeFloat, "Float32"}, {kNumberTypeFloat32, "Float32"},
@ -294,6 +304,7 @@ int BenchmarkBase::CheckDeviceTypeValid() {
}
int BenchmarkBase::InitDumpConfigFromJson(char *path) {
#ifndef BENCHMARK_CLIP_JSON
auto real_path = RealPath(path);
std::ifstream ifs(real_path);
if (!ifs.good()) {
@ -354,7 +365,7 @@ int BenchmarkBase::InitDumpConfigFromJson(char *path) {
MS_LOG(ERROR) << "create data output directory failed.";
return RET_ERROR;
}
#endif
return RET_OK;
}
@ -623,6 +634,72 @@ int BenchmarkBase::PrintPerfResult(const std::vector<std::string> &title,
}
#endif
#ifdef SUPPORT_NNIE
int SvpSysInit() {
HI_S32 ret = HI_SUCCESS;
VB_CONFIG_S struVbConf;
ret = HI_MPI_SYS_Exit();
if (HI_SUCCESS != ret) {
MS_LOG(ERROR) << "HI_MPI_SYS_Exit failed!";
return RET_ERROR;
}
ret = HI_MPI_VB_Exit();
if (HI_SUCCESS != ret) {
MS_LOG(WARNING) << "HI_MPI_VB_Exit failed!";
ret = HI_MPI_SYS_Init();
if (HI_SUCCESS != ret) {
MS_LOG(ERROR) << "Error:HI_MPI_SYS_Init failed!";
return RET_ERROR;
}
return RET_OK;
}
memset(&struVbConf, 0, sizeof(VB_CONFIG_S));
struVbConf.u32MaxPoolCnt = kNNIEMaxPoolCnt;
struVbConf.astCommPool[1].u64BlkSize = kNNIEBlkSize;
struVbConf.astCommPool[1].u32BlkCnt = 1;
ret = HI_MPI_VB_SetConfig((const VB_CONFIG_S *)&struVbConf);
if (HI_SUCCESS != ret) {
MS_LOG(ERROR) << "Error:HI_MPI_VB_SetConf failed!";
return RET_ERROR;
}
ret = HI_MPI_VB_Init();
if (HI_SUCCESS != ret) {
MS_LOG(ERROR) << "Error:HI_MPI_VB_Init failed!";
return RET_ERROR;
}
ret = HI_MPI_SYS_Init();
if (HI_SUCCESS != ret) {
MS_LOG(ERROR) << "Error:HI_MPI_SYS_Init failed!";
return RET_ERROR;
}
return RET_OK;
}
int SvpSysExit() {
HI_S32 ret = HI_SUCCESS;
ret = HI_MPI_SYS_Exit();
if (HI_SUCCESS != ret) {
MS_LOG(ERROR) << "HI_MPI_SYS_Exit failed!";
return RET_ERROR;
}
ret = HI_MPI_VB_Exit();
if (HI_SUCCESS != ret) {
MS_LOG(WARNING) << "HI_MPI_VB_Exit failed!";
return RET_OK;
}
return RET_OK;
}
#endif
BenchmarkBase::~BenchmarkBase() {
for (auto &iter : this->benchmark_data_) {
iter.second->shape.clear();
@ -631,6 +708,9 @@ BenchmarkBase::~BenchmarkBase() {
iter.second = nullptr;
}
this->benchmark_data_.clear();
#ifdef SUPPORT_NNIE
SvpSysExit();
#endif
}
} // namespace lite
} // namespace mindspore

View File

@ -29,7 +29,9 @@
#include <memory>
#include <cfloat>
#include <utility>
#ifndef BENCHMARK_CLIP_JSON
#include <nlohmann/json.hpp>
#endif
#include "include/model.h"
#include "include/api/types.h"
#include "include/api/format.h"
@ -419,9 +421,10 @@ class MS_API BenchmarkBase {
float op_cost_total_ = 0.0f;
std::map<std::string, std::pair<int, float>> op_times_by_type_;
std::map<std::string, std::pair<int, float>> op_times_by_name_;
#ifndef BENCHMARK_CLIP_JSON
// dump data
nlohmann::json dump_cfg_json_;
#endif
std::string dump_file_output_dir_;
#ifdef ENABLE_ARM64
int perf_fd = 0;
@ -432,6 +435,10 @@ class MS_API BenchmarkBase {
#endif
std::mt19937 random_engine_;
};
#ifdef SUPPORT_NNIE
int SvpSysInit();
int SvpSysExit();
#endif
} // namespace mindspore::lite
#endif // MINNIE_BENCHMARK_BENCHMARK_BASE_H_

View File

@ -36,6 +36,12 @@
#include <asm/unistd.h>
#include <unistd.h>
#endif
#ifdef SUPPORT_NNIE
#include "include/hi_common.h"
#include "include/hi_comm_vb.h"
#include "include/mpi_sys.h"
#include "include/mpi_vb.h"
#endif
namespace mindspore {
constexpr size_t kDataToStringMaxNum = 40;
@ -1081,7 +1087,7 @@ std::string DumpMSTensor(mindspore::MSTensor *tensor) {
}
return oss.str();
}
#ifndef BENCHMARK_CLIP_JSON
std::string GenerateOutputFileName(mindspore::MSTensor *tensor, const std::string &op_name,
const std::string &file_type, const size_t &idx) {
std::string file_name = op_name;
@ -1105,6 +1111,7 @@ std::string GenerateOutputFileName(mindspore::MSTensor *tensor, const std::strin
file_name += +".bin";
return file_name;
}
#endif
} // namespace
int BenchmarkUnifiedApi::InitPrintTensorDataCallbackParameter() {
@ -1132,6 +1139,7 @@ int BenchmarkUnifiedApi::InitPrintTensorDataCallbackParameter() {
return RET_OK;
}
int BenchmarkUnifiedApi::InitDumpTensorDataCallbackParameter() {
#ifndef BENCHMARK_CLIP_JSON
// before callback
ms_before_call_back_ = [&](const std::vector<mindspore::MSTensor> &before_inputs,
const std::vector<mindspore::MSTensor> &before_outputs,
@ -1177,6 +1185,7 @@ int BenchmarkUnifiedApi::InitDumpTensorDataCallbackParameter() {
}
return true;
};
#endif
return RET_OK;
}

View File

@ -29,7 +29,9 @@
#include <memory>
#include <cfloat>
#include <utility>
#ifndef BENCHMARK_CLIP_JSON
#include <nlohmann/json.hpp>
#endif
#include "tools/benchmark/benchmark_base.h"
#include "include/model.h"
#include "tools/common/flag_parser.h"

View File

@ -7,9 +7,9 @@ __download_pkg(34xx_sdk
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${34xx_sdk_SOURCE_DIR})
include_directories(${34xx_sdk_SOURCE_DIR}/include)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/runtime)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/runtime/include)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_party/runtime/include/third_party)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime/include)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime/include/third_party)
link_directories(${34xx_sdk_SOURCE_DIR}/lib)
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/src COMMON_SRC3)

View File

@ -27,7 +27,7 @@ using mindspore::schema::PrimitiveType_Custom;
namespace mindspore {
namespace dpico {
namespace {
constexpr int kBaseValue = 10;
constexpr int kDecimal = 10;
constexpr auto kInputShape = "inputs_shape";
constexpr auto kOutputShape = "outputs_shape";
constexpr auto kOutputsFormat = "outputs_format";
@ -66,13 +66,13 @@ Status GetCustomShape(const std::map<std::string, std::string> &attrs, const std
char *save_ptr = nullptr;
res = strtok_r(attr.data(), delims, &save_ptr);
while (res != nullptr) {
int64_t ndims = strtol(res, &res, kBaseValue);
int64_t ndims = strtol(res, &res, kDecimal);
int j = 0;
std::vector<int64_t> shape;
shape.resize(ndims);
for (; j < ndims; j++) {
res = strtok_r(NULL, delims, &save_ptr);
shape[j] = static_cast<int64_t>(strtol(res, &res, kBaseValue));
shape[j] = static_cast<int64_t>(strtol(res, &res, kDecimal));
}
shapes->push_back(shape);

View File

@ -0,0 +1,37 @@
cmake_minimum_required(VERSION 3.14)
project(NNIE_Custom)
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
if(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3516D")
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_patry/hi3516_sdk/)
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_patry/hi3516_sdk/lib)
elseif(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3519A")
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_patry/hi3519_sdk/)
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_patry/hi3519_sdk/lib)
elseif(${MSLITE_REGISTRY_DEVICE} STREQUAL "Hi3559A")
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_patry/hi3559_sdk/)
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/third_patry/hi3559_sdk/lib)
endif()
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime/include)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime/include/third_party)
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/src COMMON_SRC3)
set(MSLITE_NNIE_LINK_LIB nnie mpi VoiceEngine upvqe dnvqe)
add_library(mslite_nnie SHARED
${COMMON_SRC3})
target_link_libraries(mslite_nnie ${MSLITE_NNIE_LINK_LIB} securec)
if(DEFINED HIMIX_STRIP)
set(NDK_STRIP ${HIMIX_STRIP})
else()
set(NDK_STRIP "arm-himix200-linux-strip")
endif()
if("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
add_custom_command(TARGET mslite_nnie POST_BUILD COMMAND ${NDK_STRIP}
${CMAKE_CURRENT_BINARY_DIR}/libmslite_nnie.so)
endif()

View File

@ -0,0 +1,178 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/custom_fp32.h"
#include <map>
#include <memory>
#include "schema/model_generated.h"
#include "include/registry/register_kernel.h"
#include "include/errorcode.h"
#include "src/nnie_manager.h"
#include "src/nnie_print.h"
#include "src/nnie_cfg_parser.h"
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Custom;
namespace mindspore {
namespace nnie {
bool CustomCPUKernel::load_model_ = false;
int CustomCPUKernel::run_seg_ = 0;
bool CustomCPUKernel::roi_used_ = false;
int CustomCPUKernel::Prepare() {
if (!load_model_) {
Flags flags;
flags.Init();
if (nnie::NNIEManager::GetInstance()->CfgInit(flags.max_roi_num_, flags.time_step_, flags.core_ids_) != RET_OK) {
LOGE("Nnie init cfg fail");
return RET_ERROR;
}
if (nnie::NNIEManager::GetInstance()->Init(reinterpret_cast<char *>(inputs_[inputs_.size() - 1].MutableData()),
static_cast<int>(inputs_[inputs_.size() - 1].ElementNum()), inputs_)) {
// LOGW("Load WK Model Fail");
return RET_OK;
}
load_model_ = true;
}
outputs_shapes_.resize(outputs_.size());
for (size_t i = 0; i < outputs_.size(); i++) {
outputs_shapes_[i] = outputs_[i].Shape();
}
return RET_OK;
}
int CustomCPUKernel::ReSize() {
if (load_model_) {
nnie::NNIEManager::GetInstance()->Release();
load_model_ = false;
}
return Prepare();
}
int CustomCPUKernel::Execute() {
if (!load_model_) {
LOGE("WK Model is not load.");
return RET_ERROR;
}
run_seg_ = seg_id_;
if (nnie::NNIEManager::GetInstance()->FillData(&inputs_, run_seg_)) {
LOGE("Fail Fill Data");
return RET_ERROR;
}
if (nnie::NNIEManager::GetInstance()->Run(&outputs_, run_seg_, outputs_shapes_)) {
LOGE("Fail WK Run");
return RET_ERROR;
}
run_seg_++;
return RET_OK;
}
CustomCPUKernel::~CustomCPUKernel() {
if (load_model_) {
nnie::NNIEManager::GetInstance()->Release();
load_model_ = false;
}
}
bool GetCustomAttr(char *buf, int buf_size, const mindspore::schema::Custom *op, const std::string &attr) {
int attr_size;
for (size_t i = 0; i < op->attr()->size(); i++) {
if (op->attr()->Get(i)->name()->str() == attr) {
auto output_info = op->attr()->Get(i)->data();
attr_size = static_cast<int>(output_info->size());
if (attr_size >= buf_size) {
LOGE("attr size too big");
return false;
}
for (int j = 0; j < attr_size; j++) {
buf[j] = static_cast<char>(output_info->Get(j));
}
buf[attr_size] = 0;
return true;
}
}
return false;
}
std::shared_ptr<mindspore::kernel::Kernel> CustomCreateKernel(const std::vector<MSTensor> &inputs,
const std::vector<MSTensor> &outputs,
const mindspore::schema::Primitive *primitive,
const mindspore::Context *ctx) {
if (primitive->value_type() != mindspore::schema::PrimitiveType_Custom) {
LOGE("Primitive type is not PrimitiveType_Custom");
return nullptr;
}
auto op = primitive->value_as_Custom();
if (op->attr()->size() < 1) {
LOGE("There are at least 1 attribute of Custom");
return nullptr;
}
int64_t ndims;
bool forward_bbox = false;
char *res = nullptr;
char buf[kMaxSize];
if (GetCustomAttr(buf, kMaxSize, op, "id")) {
res = nullptr;
ndims = strtol(buf, &res, kDecimal);
if ((*res) != 0) {
LOGE("Get attr id data fail");
return nullptr;
}
} else {
LOGE("Custom op should have id");
return nullptr;
}
if (GetCustomAttr(buf, kMaxSize, op, "ForwardWithBbox")) {
res = nullptr;
int64_t temp_val = strtol(buf, &res, kDecimal);
if ((*res) != 0) {
LOGE("Get attr ForwardWithBbox data fail");
return nullptr;
}
if (temp_val > 0) {
forward_bbox = true;
}
}
auto kernel = std::make_shared<CustomCPUKernel>(ndims, forward_bbox, inputs, outputs, primitive, ctx);
if (kernel == nullptr) {
LOGE("new custom kernel is nullptr");
return nullptr;
}
return kernel;
}
} // namespace nnie
} // namespace mindspore
namespace mindspore {
namespace registry {
namespace {
const auto kFloat32 = DataType::kNumberTypeFloat32;
const auto kInt8 = DataType::kNumberTypeInt8;
const auto kUint8 = DataType::kNumberTypeUInt8;
} // namespace
REGISTER_CUSTOM_KERNEL(CPU, NNIE, kFloat32, NNIE, nnie::CustomCreateKernel)
REGISTER_CUSTOM_KERNEL(CPU, NNIE, kInt8, NNIE, nnie::CustomCreateKernel)
REGISTER_CUSTOM_KERNEL(CPU, NNIE, kUint8, NNIE, nnie::CustomCreateKernel)
} // namespace registry
} // namespace mindspore

View File

@ -0,0 +1,66 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CUSTOM_H_
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CUSTOM_H_
#include <vector>
#include <string>
#include "include/schema/model_generated.h"
#include "include/context.h"
#include "include/api/kernel.h"
#include "src/custom_infer.h"
using mindspore::kernel::Kernel;
using mindspore::tensor::MSTensor;
namespace mindspore {
namespace nnie {
class CustomCPUKernel : public Kernel {
public:
CustomCPUKernel(int seg_id, bool forward_bbox, const std::vector<MSTensor> &inputs,
const std::vector<MSTensor> &outputs, const mindspore::schema::Primitive *primitive,
const mindspore::Context *ctx)
: Kernel(inputs, outputs, primitive, ctx), seg_id_(seg_id), forward_bbox_(forward_bbox) {
if (forward_bbox) {
roi_used_ = true;
}
}
~CustomCPUKernel() override;
int Prepare() override;
int ReSize() override;
int Execute() override;
int seg_id(void) const { return seg_id_; }
void set_seg_id(int id) { seg_id_ = id; }
int forward_bbox(void) const { return forward_bbox_; }
void set_forward_bbox(bool flag) { forward_bbox_ = flag; }
private:
static bool load_model_;
static int run_seg_;
static bool roi_used_;
int seg_id_ = 0;
bool forward_bbox_ = false;
std::vector<std::vector<int64_t>> outputs_shapes_;
};
} // namespace nnie
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ARM_FP32_CUSTOM_H_

View File

@ -0,0 +1,160 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/custom_infer.h"
#include <string>
#include <iostream>
#include "include/errorcode.h"
#include "src/nnie_print.h"
#include "include/api/format.h"
#include "include/registry/register_kernel_interface.h"
using mindspore::kernel::KernelInterface;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Custom;
namespace mindspore {
namespace nnie {
std::shared_ptr<KernelInterface> CustomInferCreater() {
auto infer = new (std::nothrow) CustomInterface();
if (infer == nullptr) {
LOGE("new custom infer is nullptr");
return nullptr;
}
return std::shared_ptr<KernelInterface>(infer);
}
int GetCustomShape(const mindspore::schema::Custom *op, const std::string &attr,
std::vector<std::vector<int64_t>> *shapes) {
char buf[kMaxSize];
bool has_outputs_shape = false;
for (size_t i = 0; i < op->attr()->size(); i++) {
if (op->attr()->Get(i)->name()->str() == attr) {
auto output_info = op->attr()->Get(i)->data();
int attr_size = static_cast<int>(output_info->size());
if (attr_size >= kMaxSize) {
LOGE("attr size too big");
return RET_ERROR;
}
for (int j = 0; j < attr_size; j++) {
buf[j] = static_cast<char>(output_info->Get(j));
}
buf[attr_size] = 0;
has_outputs_shape = true;
break;
}
}
if (!has_outputs_shape) {
LOGE("Custom op don't have %s attr.", attr.c_str());
return RET_ERROR;
}
char delims[] = ",";
char *res = nullptr;
char *save_ptr = nullptr;
res = strtok_r(buf, delims, &save_ptr);
while (res != nullptr) {
// 待补完
// outputs[id]->format_ = input->format_;
// outputs[id]->data_type_ = kNumberTypeFloat32;
int64_t ndims = strtol(res, &res, kDecimal);
int j = 0;
std::vector<int64_t> shape;
shape.resize(ndims);
for (; j < ndims; j++) {
res = strtok_r(NULL, delims, &save_ptr);
shape[j] = static_cast<int64_t>(strtol(res, &res, kDecimal));
}
shapes->push_back(shape);
res = strtok_r(NULL, delims, &save_ptr);
}
return RET_OK;
}
Status CustomInterface::Infer(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
const mindspore::schema::Primitive *primitive) {
if (inputs->empty()) {
LOGE("Inputs size 0");
return kLiteError;
}
if (outputs->empty()) {
LOGE("Outputs size 0");
return kLiteError;
}
if (primitive->value_type() != mindspore::schema::PrimitiveType_Custom) {
LOGE("Primitive type is not PrimitiveType_Custom");
return kLiteError;
}
auto op = primitive->value_as_Custom();
if (op->attr()->size() < 1) {
LOGE("There are at least 1 attribute of Custom");
return kLiteError;
}
std::vector<std::vector<int64_t>> inputs_shape;
if (GetCustomShape(op, "inputs_shape", &inputs_shape) != RET_OK) {
LOGE("parser inputs_shape attribute err.");
return kLiteError;
}
std::vector<std::vector<int64_t>> outputs_shape;
if (GetCustomShape(op, "outputs_shape", &outputs_shape) != RET_OK) {
LOGE("parser outputs_shape attribute err.");
return kLiteError;
}
if (inputs_shape.size() != (inputs->size() - 1)) {
LOGE("inputs num diff inputs_shape num.");
return kLiteError;
}
if (inputs_shape[0].size() != (*inputs)[0].Shape().size()) {
LOGE("shape size err.");
return kLiteError;
}
bool resize_flag = false;
int resize_num = 1;
for (size_t i = 0; i < inputs_shape[0].size(); i++) {
if (inputs_shape[0][i] != (*inputs)[0].Shape()[i]) {
if (i == 0) {
resize_flag = true;
resize_num = (*inputs)[0].Shape()[i];
} else {
LOGE("Custom of NNIE only support batch_num resize.");
return kLiteError;
}
}
}
if (resize_flag) {
for (auto &output_shape : outputs_shape) {
output_shape[0] = resize_num;
}
}
for (size_t i = 0; i < outputs->size(); i++) {
(*outputs)[i].SetShape(outputs_shape[i]);
(*outputs)[i].SetDataType(DataType::kNumberTypeFloat32);
(*outputs)[i].SetFormat(Format::NCHW);
}
return kSuccess;
}
} // namespace nnie
} // namespace mindspore
namespace mindspore {
namespace kernel {
REGISTER_CUSTOM_KERNEL_INTERFACE(NNIE, NNIE, nnie::CustomInferCreater);
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,35 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_NNACL_CUSTOM_PARAMETER_H_
#define MINDSPORE_LITE_NNACL_CUSTOM_PARAMETER_H_
#include <vector>
#include <memory>
#include "include/kernel_interface.h"
namespace mindspore {
namespace nnie {
class CustomInterface : public mindspore::kernel::KernelInterface {
public:
CustomInterface() {}
~CustomInterface() = default;
Status Infer(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
const mindspore::schema::Primitive *primitive) override;
};
} // namespace nnie
} // namespace mindspore
#endif // MINDSPORE_LITE_NNACL_CUSTOM_PARAMETER_H_

View File

@ -0,0 +1,101 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/nnie_cfg_parser.h"
#include <climits>
#include <cstdlib>
#include <cstring>
#include <algorithm>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "include/errorcode.h"
#include "src/nnie_manager.h"
#include "src/nnie_print.h"
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
namespace mindspore {
namespace nnie {
namespace {
constexpr auto ENV_TIME_STEP = "TIME_STEP";
constexpr auto ENV_MAX_ROI_NUM = "MAX_ROI_NUM";
constexpr auto ENV_CORE_IDS = "CORE_IDS";
constexpr auto DELIM = ",";
constexpr int MAX_CORE_ID = 7;
} // namespace
void Flags::Init() {
auto *time_step = std::getenv(ENV_TIME_STEP);
if (time_step != nullptr) {
auto iter = std::find_if(time_step, time_step + strlen(time_step), [](char val) { return val < '0' || val > '9'; });
if (iter != time_step) {
*iter = '\0';
this->time_step_ = atoi(time_step);
} else {
LOGE("TIME_STEP ENV is invalid, now set to default value %d", this->time_step_);
}
} else {
LOGW("TIME_STEP ENV is not set, now set to default value %d", this->time_step_);
}
auto *max_roi_num = std::getenv(ENV_MAX_ROI_NUM);
if (max_roi_num != nullptr) {
auto iter =
std::find_if(max_roi_num, max_roi_num + strlen(max_roi_num), [](char val) { return val < '0' || val > '9'; });
if (iter != max_roi_num) {
*iter = '\0';
this->max_roi_num_ = atoi(max_roi_num);
} else {
LOGW("MAX_ROI_NUM ENV is invalid, now set to default value %d", this->max_roi_num_);
}
} else {
LOGW("MAX_ROI_NUM ENV is not set, now set to default value %d", this->max_roi_num_);
}
auto ids = std::getenv(ENV_CORE_IDS);
if (ids != nullptr) {
auto iter = std::find_if(ids, ids + strlen(ids), [](char val) { return (val < '0' || val > '9') && val != ','; });
std::vector<int> core_ids;
if (iter != ids) {
*iter = '\0';
char *saveptr;
char *p = strtok_r(ids, DELIM, &saveptr);
while (p != nullptr) {
int id = atoi(p);
p = strtok_r(NULL, DELIM, &saveptr);
if (id > MAX_CORE_ID || id < 0) {
LOGE("id is out of range");
continue;
}
if (std::find(core_ids.begin(), core_ids.end(), id) != core_ids.end()) {
continue;
}
core_ids.push_back(id);
}
}
if (!core_ids.empty()) {
this->core_ids_ = core_ids;
} else {
std::string message =
"CORE_IDS ENV is invalid, now set to default value {" + std::to_string(this->core_ids_.front()) + "}";
LOGW(message.c_str());
}
} else {
std::string message =
"CORE_IDS ENV is not set, now set to default value {" + std::to_string(this->core_ids_.front()) + "}";
LOGW(message.c_str());
}
}
} // namespace nnie
} // namespace mindspore

View File

@ -0,0 +1,44 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_NNIE_CFG_PARSER_H_
#define MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_NNIE_CFG_PARSER_H_
#include <vector>
namespace mindspore {
namespace nnie {
/**
* Flags is a config container.
* Member objects:
* 1.time_step_: step num only for rnn or lstm model. Default is 1.
* 2.max_roi_num_: maximum number of ROI area, which is single picture supports, must be greater than 0.Default is 300.
* 3.core_ids_: running kernels' id, support multi-core, separated by commas when setting, such as {0, 1, 2}.
* each element must be a integer, wch meet such inequality 0 <= val < 8.
* Default is {0}.
*/
class Flags {
public:
Flags() = default;
~Flags() = default;
void Init();
public:
int time_step_{1};
int max_roi_num_{300};
std::vector<int> core_ids_{0};
};
} // namespace nnie
} // namespace mindspore
#endif

View File

@ -0,0 +1,943 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/nnie_common.h"
#include "include/mpi_nnie.h"
#include "include/hi_type.h"
#include "include/errorcode.h"
#include "src/nnie_print.h"
#include "src/nnie_memory.h"
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
constexpr int kSleepUs = 100;
namespace mindspore {
namespace nnie {
static void NnieParamRelease(NnieParam *nnie_param) {
if (nnie_param == nullptr) {
return;
}
if (nnie_param->task_buf_.u64PhyAddr != 0 && nnie_param->task_buf_.u64VirAddr != 0) {
NNIE_MEM_FREE(nnie_param->task_buf_.u64PhyAddr, nnie_param->task_buf_.u64VirAddr);
nnie_param->task_buf_.u64PhyAddr = 0;
nnie_param->task_buf_.u64VirAddr = 0;
}
if (nnie_param->step_buf_.u64PhyAddr != 0 && nnie_param->step_buf_.u64VirAddr != 0) {
NNIE_MEM_FREE(nnie_param->step_buf_.u64PhyAddr, nnie_param->step_buf_.u64VirAddr);
nnie_param->step_buf_.u64PhyAddr = 0;
nnie_param->step_buf_.u64VirAddr = 0;
}
}
bool CheckNnieInnerNode(const HI_CHAR *name, NnieParam *nnie_param) {
for (HI_U32 i = 0; i < nnie_param->model_->u32NetSegNum; i++) {
for (HI_U32 j = 0; j < nnie_param->model_->astSeg[i].u16DstNum; j++)
if (strncmp(name, nnie_param->model_->astSeg[i].astDstNode[j].szName, SVP_NNIE_NODE_NAME_LEN) == 0) {
nnie_param->mem_cfg_.seg_[i].dst_node_[j] = true;
return true;
}
}
return false;
}
bool ConnectNnieInnerNode(const HI_CHAR *name, NnieParam *nnie_param, SVP_SRC_BLOB_S *blob) {
for (HI_U32 i = 0; i < nnie_param->model_->u32NetSegNum; i++) {
for (HI_U32 j = 0; j < nnie_param->model_->astSeg[i].u16DstNum; j++)
if (strncmp(name, nnie_param->model_->astSeg[i].astDstNode[j].szName, SVP_NNIE_NODE_NAME_LEN) == 0) {
blob->u64PhyAddr = nnie_param->seg_data_[i].dst_[j].u64PhyAddr;
blob->u64VirAddr = nnie_param->seg_data_[i].dst_[j].u64VirAddr;
return true;
}
}
return false;
}
static void FillForwardInfo(NnieCfg *nnie_cfg, NnieParam *nnie_param) {
HI_U32 i, j;
HI_U32 num;
memset(&nnie_param->mem_cfg_, false, sizeof(NNIEMemCfg));
for (i = 0; i < nnie_param->model_->u32NetSegNum; i++) {
if (SVP_NNIE_NET_TYPE_ROI == nnie_param->model_->astSeg[i].enNetType) {
nnie_param->forward_with_bbox_ctrl_[i].enNnieId = nnie_cfg->nnie_core_id_[i];
nnie_param->forward_with_bbox_ctrl_[i].u32SrcNum = nnie_param->model_->astSeg[i].u16SrcNum;
nnie_param->forward_with_bbox_ctrl_[i].u32DstNum = nnie_param->model_->astSeg[i].u16DstNum;
nnie_param->forward_with_bbox_ctrl_[i].u32ProposalNum = 1;
nnie_param->forward_with_bbox_ctrl_[i].u32NetSegId = i;
} else if (SVP_NNIE_NET_TYPE_CNN == nnie_param->model_->astSeg[i].enNetType ||
SVP_NNIE_NET_TYPE_RECURRENT == nnie_param->model_->astSeg[i].enNetType) {
nnie_param->forward_ctrl_[i].enNnieId = nnie_cfg->nnie_core_id_[i];
nnie_param->forward_ctrl_[i].u32SrcNum = nnie_param->model_->astSeg[i].u16SrcNum;
nnie_param->forward_ctrl_[i].u32DstNum = nnie_param->model_->astSeg[i].u16DstNum;
nnie_param->forward_ctrl_[i].u32NetSegId = i;
}
for (j = 0; j < nnie_param->model_->astSeg[i].u16SrcNum; j++) {
if (i > 0) {
if (CheckNnieInnerNode(nnie_param->model_->astSeg[i].astSrcNode[j].szName, nnie_param)) {
nnie_param->mem_cfg_.seg_[i].src_node_[j] = true;
}
}
if (SVP_BLOB_TYPE_SEQ_S32 == nnie_param->model_->astSeg[i].astSrcNode[j].enType) {
nnie_param->seg_data_[i].src_[j].enType = nnie_param->model_->astSeg[i].astSrcNode[j].enType;
nnie_param->seg_data_[i].src_[j].unShape.stSeq.u32Dim =
nnie_param->model_->astSeg[i].astSrcNode[j].unShape.u32Dim;
nnie_param->seg_data_[i].src_[j].u32Num = nnie_cfg->max_input_num_;
nnie_param->seg_data_[i].src_[j].unShape.stSeq.u64VirAddrStep =
nnie_cfg->step_vir_addr_[i * NNIE_EACH_SEG_STEP_ADDR_NUM];
} else {
nnie_param->seg_data_[i].src_[j].enType = nnie_param->model_->astSeg[i].astSrcNode[j].enType;
nnie_param->seg_data_[i].src_[j].unShape.stWhc.u32Chn =
nnie_param->model_->astSeg[i].astSrcNode[j].unShape.stWhc.u32Chn;
nnie_param->seg_data_[i].src_[j].unShape.stWhc.u32Height =
nnie_param->model_->astSeg[i].astSrcNode[j].unShape.stWhc.u32Height;
nnie_param->seg_data_[i].src_[j].unShape.stWhc.u32Width =
nnie_param->model_->astSeg[i].astSrcNode[j].unShape.stWhc.u32Width;
nnie_param->seg_data_[i].src_[j].u32Num = nnie_cfg->max_input_num_;
}
}
if (SVP_NNIE_NET_TYPE_ROI == nnie_param->model_->astSeg[i].enNetType) {
num = nnie_cfg->max_roi_num_ * nnie_cfg->max_input_num_;
} else {
num = nnie_cfg->max_input_num_;
}
for (j = 0; j < nnie_param->model_->astSeg[i].u16DstNum; j++) {
if (SVP_BLOB_TYPE_SEQ_S32 == nnie_param->model_->astSeg[i].astDstNode[j].enType) {
nnie_param->seg_data_[i].dst_[j].enType = nnie_param->model_->astSeg[i].astDstNode[j].enType;
nnie_param->seg_data_[i].dst_[j].unShape.stSeq.u32Dim =
nnie_param->model_->astSeg[i].astDstNode[j].unShape.u32Dim;
nnie_param->seg_data_[i].dst_[j].u32Num = num;
nnie_param->seg_data_[i].dst_[j].unShape.stSeq.u64VirAddrStep =
nnie_cfg->step_vir_addr_[i * NNIE_EACH_SEG_STEP_ADDR_NUM + 1];
} else {
nnie_param->seg_data_[i].dst_[j].enType = nnie_param->model_->astSeg[i].astDstNode[j].enType;
nnie_param->seg_data_[i].dst_[j].unShape.stWhc.u32Chn =
nnie_param->model_->astSeg[i].astDstNode[j].unShape.stWhc.u32Chn;
nnie_param->seg_data_[i].dst_[j].unShape.stWhc.u32Height =
nnie_param->model_->astSeg[i].astDstNode[j].unShape.stWhc.u32Height;
nnie_param->seg_data_[i].dst_[j].unShape.stWhc.u32Width =
nnie_param->model_->astSeg[i].astDstNode[j].unShape.stWhc.u32Width;
nnie_param->seg_data_[i].dst_[j].u32Num = num;
}
}
}
}
static void GetBlobMemSize(SVP_NNIE_NODE_S nnie_node[], HI_U32 node_num, HI_U32 total_step, SVP_BLOB_S blob[],
HI_U32 align32, HI_U32 *total_size, HI_U32 blob_size[], bool *mem_alloc = nullptr) {
HI_U32 i = 0;
HI_U32 size;
HI_U32 stride;
for (i = 0; i < node_num; i++) {
if (SVP_BLOB_TYPE_S32 == nnie_node[i].enType || SVP_BLOB_TYPE_VEC_S32 == nnie_node[i].enType ||
SVP_BLOB_TYPE_SEQ_S32 == nnie_node[i].enType) {
size = sizeof(HI_U32);
} else {
size = sizeof(HI_U8);
}
if (SVP_BLOB_TYPE_SEQ_S32 == nnie_node[i].enType) {
if (NNIE_ALIGN_16 == align32) {
stride = NNIE_ALIGN16(nnie_node[i].unShape.u32Dim * size);
} else {
stride = NNIE_ALIGN32(nnie_node[i].unShape.u32Dim * size);
}
blob_size[i] = total_step * stride;
} else {
if (NNIE_ALIGN_16 == align32) {
stride = NNIE_ALIGN16(nnie_node[i].unShape.stWhc.u32Width * size);
} else {
stride = NNIE_ALIGN32(nnie_node[i].unShape.stWhc.u32Width * size);
}
blob_size[i] = blob[i].u32Num * stride * nnie_node[i].unShape.stWhc.u32Height * nnie_node[i].unShape.stWhc.u32Chn;
}
if (mem_alloc != nullptr) {
if (mem_alloc[i]) {
blob_size[i] = 0;
}
}
*total_size += blob_size[i];
blob[i].u32Stride = stride;
}
}
static int GetTaskAndBlobBufSize(NnieCfg *nnie_cfg, NnieParam *nnie_param, HI_U32 *total_task_buf_size,
HI_U32 *tmp_buf_size, NnieBlobSize blob_size[], HI_U32 *total_size) {
HI_S32 ret = HI_SUCCESS;
HI_U32 i, j;
HI_U32 total_step = 0;
ret = HI_MPI_SVP_NNIE_GetTskBufSize(nnie_cfg->max_input_num_, nnie_cfg->max_roi_num_, nnie_param->model_,
nnie_param->task_buf_size_, nnie_param->model_->u32NetSegNum);
if (HI_SUCCESS != ret) {
LOGE("HI_MPI_SVP_NNIE_GetTskBufSize");
return RET_ERROR;
}
*total_task_buf_size = 0;
for (i = 0; i < nnie_param->model_->u32NetSegNum; i++) {
*total_task_buf_size += nnie_param->task_buf_size_[i];
}
*tmp_buf_size = nnie_param->model_->u32TmpBufSize;
*total_size += *total_task_buf_size + *tmp_buf_size;
for (i = 0; i < nnie_param->model_->u32NetSegNum; i++) {
if (SVP_NNIE_NET_TYPE_RECURRENT == nnie_param->model_->astSeg[i].enNetType) {
for (j = 0; j < nnie_param->seg_data_[i].src_[0].u32Num; j++) {
total_step += *(reinterpret_cast<HI_S32 *>(
static_cast<HI_UL>(nnie_param->seg_data_[i].src_[0].unShape.stSeq.u64VirAddrStep)) +
j);
}
}
GetBlobMemSize(&(nnie_param->model_->astSeg[i].astSrcNode[0]), nnie_param->model_->astSeg[i].u16SrcNum, total_step,
&(nnie_param->seg_data_[i].src_[0]), NNIE_ALIGN_16, total_size, &(blob_size[i].src_size_[0]),
&(nnie_param->mem_cfg_.seg_[i].src_node_[0]));
GetBlobMemSize(&(nnie_param->model_->astSeg[i].astDstNode[0]), nnie_param->model_->astSeg[i].u16DstNum, total_step,
&(nnie_param->seg_data_[i].dst_[0]), NNIE_ALIGN_16, total_size, &(blob_size[i].dst_size_[0]));
}
return RET_OK;
}
static int NnieParamInit(NnieCfg *nnie_cfg, NnieParam *nnie_param) {
HI_U32 i, j;
HI_U32 total_size = 0;
HI_U32 total_task_buf_size = 0;
HI_U32 tmp_buf_size_ = 0;
HI_S32 ret = HI_SUCCESS;
HI_U32 off_set = 0;
HI_U64 phy_addr = 0;
HI_U8 *vir_addr = nullptr;
NnieBlobSize blob_size[SVP_NNIE_MAX_NET_SEG_NUM] = {0};
FillForwardInfo(nnie_cfg, nnie_param);
ret = GetTaskAndBlobBufSize(nnie_cfg, nnie_param, &total_task_buf_size, &tmp_buf_size_, blob_size, &total_size);
if (HI_SUCCESS != ret) {
LOGE("Error,Malloc memory failed! ");
return RET_ERROR;
}
bool has_roi = false;
for (i = 0; i < nnie_param->model_->u32NetSegNum; i++) {
if (SVP_NNIE_NET_TYPE_ROI == nnie_param->model_->astSeg[i].enNetType) {
has_roi = true;
}
}
if (has_roi) {
nnie_param->rpn_bbox_.enType = SVP_BLOB_TYPE_S32;
nnie_param->rpn_bbox_.unShape.stWhc.u32Chn = 1;
nnie_param->rpn_bbox_.unShape.stWhc.u32Height = nnie_cfg->max_roi_num_;
nnie_param->rpn_bbox_.unShape.stWhc.u32Width = NNIE_COORDI_NUM;
nnie_param->rpn_bbox_.u32Stride = NNIE_ALIGN16(NNIE_COORDI_NUM * sizeof(HI_U32));
nnie_param->rpn_bbox_.u32Num = nnie_cfg->max_input_num_;
total_size +=
nnie_param->rpn_bbox_.u32Num * nnie_param->rpn_bbox_.unShape.stWhc.u32Height * nnie_param->rpn_bbox_.u32Stride;
}
ret = NnieMemMallocCached(std::string("NNIE_NNIE_TASK").data(), nullptr, reinterpret_cast<HI_U64 *>(&phy_addr),
reinterpret_cast<void **>(&vir_addr), total_size);
if (HI_SUCCESS != ret) {
LOGE("Error,Malloc memory failed! ");
return RET_ERROR;
}
memset(vir_addr, 0, total_size);
NnieMemFlushCache(phy_addr, reinterpret_cast<void *>(vir_addr), total_size);
nnie_param->task_buf_.u32Size = total_task_buf_size;
nnie_param->task_buf_.u64PhyAddr = phy_addr;
nnie_param->task_buf_.u64VirAddr = (HI_U64)(HI_UL)vir_addr;
nnie_param->tmp_buf_.u32Size = tmp_buf_size_;
nnie_param->tmp_buf_.u64PhyAddr = phy_addr + total_task_buf_size;
nnie_param->tmp_buf_.u64VirAddr = (HI_U64)(HI_UL)vir_addr + total_task_buf_size;
for (i = 0; i < nnie_param->model_->u32NetSegNum; i++) {
if (SVP_NNIE_NET_TYPE_ROI == nnie_param->model_->astSeg[i].enNetType) {
nnie_param->forward_with_bbox_ctrl_[i].stTmpBuf = nnie_param->tmp_buf_;
nnie_param->forward_with_bbox_ctrl_[i].stTskBuf.u64PhyAddr = nnie_param->task_buf_.u64PhyAddr + off_set;
nnie_param->forward_with_bbox_ctrl_[i].stTskBuf.u64VirAddr = nnie_param->task_buf_.u64VirAddr + off_set;
nnie_param->forward_with_bbox_ctrl_[i].stTskBuf.u32Size = nnie_param->task_buf_size_[i];
} else if (SVP_NNIE_NET_TYPE_CNN == nnie_param->model_->astSeg[i].enNetType ||
SVP_NNIE_NET_TYPE_RECURRENT == nnie_param->model_->astSeg[i].enNetType) {
nnie_param->forward_ctrl_[i].stTmpBuf = nnie_param->tmp_buf_;
nnie_param->forward_ctrl_[i].stTskBuf.u64PhyAddr = nnie_param->task_buf_.u64PhyAddr + off_set;
nnie_param->forward_ctrl_[i].stTskBuf.u64VirAddr = nnie_param->task_buf_.u64VirAddr + off_set;
nnie_param->forward_ctrl_[i].stTskBuf.u32Size = nnie_param->task_buf_size_[i];
}
off_set += nnie_param->task_buf_size_[i];
}
phy_addr = phy_addr + total_task_buf_size + tmp_buf_size_;
vir_addr = vir_addr + total_task_buf_size + tmp_buf_size_;
for (i = 0; i < nnie_param->model_->u32NetSegNum; i++) {
for (j = 0; j < nnie_param->model_->astSeg[i].u16SrcNum; j++) {
if (j != 0) {
phy_addr += blob_size[i].src_size_[j - 1];
vir_addr += blob_size[i].src_size_[j - 1];
}
if (nnie_param->mem_cfg_.seg_[i].src_node_[j]) {
if (!ConnectNnieInnerNode(nnie_param->model_->astSeg[i].astSrcNode[j].szName, nnie_param,
&(nnie_param->seg_data_[i].src_[j]))) {
LOGE("ConnectNnieInnerNode failed! ");
return RET_ERROR;
}
} else {
nnie_param->seg_data_[i].src_[j].u64PhyAddr = phy_addr;
nnie_param->seg_data_[i].src_[j].u64VirAddr = (HI_U64)(HI_UL)vir_addr;
}
}
phy_addr += blob_size[i].src_size_[j - 1];
vir_addr += blob_size[i].src_size_[j - 1];
for (j = 0; j < nnie_param->model_->astSeg[i].u16DstNum; j++) {
if (j != 0) {
phy_addr += blob_size[i].dst_size_[j - 1];
vir_addr += blob_size[i].dst_size_[j - 1];
}
nnie_param->seg_data_[i].dst_[j].u64PhyAddr = phy_addr;
nnie_param->seg_data_[i].dst_[j].u64VirAddr = (HI_U64)(HI_UL)vir_addr;
}
phy_addr += blob_size[i].dst_size_[j - 1];
vir_addr += blob_size[i].dst_size_[j - 1];
}
if (has_roi) {
nnie_param->rpn_bbox_.u64PhyAddr = phy_addr;
nnie_param->rpn_bbox_.u64VirAddr = (HI_U64)((HI_UL)vir_addr);
}
return RET_OK;
}
static int NnieLoadModel(char *model_buf, int size, NnieModel *nnie_model) {
HI_S32 ret = HI_INVALID_VALUE;
HI_U64 phy_addr = 0;
HI_U8 *vir_addr = nullptr;
ret = NnieMemMalloc(std::string("NNIE_NNIE_MODEL").data(), nullptr, reinterpret_cast<HI_U64 *>(&phy_addr),
reinterpret_cast<void **>(&vir_addr), size);
if (HI_SUCCESS != ret) {
LOGE("Error,Malloc memory failed! ");
return RET_ERROR;
}
nnie_model->model_buf_.u32Size = (HI_U32)size;
nnie_model->model_buf_.u64PhyAddr = phy_addr;
nnie_model->model_buf_.u64VirAddr = (HI_U64)(HI_UL)vir_addr;
memcpy(vir_addr, model_buf, size);
ret = HI_MPI_SVP_NNIE_LoadModel(&nnie_model->model_buf_, &nnie_model->model_);
if (HI_SUCCESS != ret) {
NNIE_MEM_FREE(nnie_model->model_buf_.u64PhyAddr, nnie_model->model_buf_.u64VirAddr);
nnie_model->model_buf_.u32Size = 0;
LOGE("HI_MPI_SVP_NNIE_LoadModel failed!");
return RET_ERROR;
}
return RET_OK;
}
static void NnieUnloadModel(NnieModel *nnie_model) {
if (nnie_model == nullptr) {
return;
}
if (nnie_model->model_buf_.u64PhyAddr != 0 && nnie_model->model_buf_.u64VirAddr != 0) {
NNIE_MEM_FREE(nnie_model->model_buf_.u64PhyAddr, nnie_model->model_buf_.u64VirAddr);
nnie_model->model_buf_.u64PhyAddr = 0;
nnie_model->model_buf_.u64VirAddr = 0;
}
}
static int NnieForward(NnieParam *nnie_param, NnieDataIndex *input_data_idx, HI_BOOL instant) {
HI_S32 ret = HI_SUCCESS;
HI_U32 i, j;
HI_BOOL finish = HI_FALSE;
SVP_NNIE_HANDLE svp_nnie_handle = 0;
HI_U32 total_step_num = 0;
SVP_NNIE_FORWARD_CTRL_S *forward_handle = &nnie_param->forward_ctrl_[input_data_idx->seg_idx_];
NnieSegData *seg_data = &nnie_param->seg_data_[input_data_idx->seg_idx_];
NnieMemFlushCache(forward_handle->stTskBuf.u64PhyAddr,
NNIE_CONVERT_64BIT_ADDR(HI_VOID, forward_handle->stTskBuf.u64VirAddr),
forward_handle->stTskBuf.u32Size);
for (i = 0; i < forward_handle->u32DstNum; i++) {
if (SVP_BLOB_TYPE_SEQ_S32 == seg_data->dst_[i].enType) {
for (j = 0; j < seg_data->dst_[i].u32Num; j++) {
total_step_num += *(NNIE_CONVERT_64BIT_ADDR(HI_U32, seg_data->dst_[i].unShape.stSeq.u64VirAddrStep) + j);
}
NnieMemFlushCache(seg_data->dst_[i].u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, seg_data->dst_[i].u64VirAddr),
total_step_num * seg_data->dst_[i].u32Stride);
} else {
NnieMemFlushCache(seg_data->dst_[i].u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, seg_data->dst_[i].u64VirAddr),
seg_data->dst_[i].u32Num * seg_data->dst_[i].unShape.stWhc.u32Chn *
seg_data->dst_[i].unShape.stWhc.u32Height * seg_data->dst_[i].u32Stride);
}
}
ret = HI_MPI_SVP_NNIE_Forward(&svp_nnie_handle, seg_data->src_, nnie_param->model_, seg_data->dst_, forward_handle,
instant);
if (HI_SUCCESS != ret) {
LOGE("HI_MPI_SVP_NNIE_Forward failed!");
return RET_ERROR;
}
if (instant) {
while (HI_ERR_SVP_NNIE_QUERY_TIMEOUT ==
(ret = HI_MPI_SVP_NNIE_Query(forward_handle->enNnieId, svp_nnie_handle, &finish, HI_TRUE))) {
usleep(kSleepUs);
}
}
total_step_num = 0;
for (i = 0; i < forward_handle->u32DstNum; i++) {
if (SVP_BLOB_TYPE_SEQ_S32 == seg_data->dst_[i].enType) {
for (j = 0; j < seg_data->dst_[i].u32Num; j++) {
total_step_num += *(NNIE_CONVERT_64BIT_ADDR(HI_U32, seg_data->dst_[i].unShape.stSeq.u64VirAddrStep) + j);
}
NnieMemFlushCache(seg_data->dst_[i].u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, seg_data->dst_[i].u64VirAddr),
total_step_num * seg_data->dst_[i].u32Stride);
} else {
NnieMemFlushCache(seg_data->dst_[i].u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, seg_data->dst_[i].u64VirAddr),
seg_data->dst_[i].u32Num * seg_data->dst_[i].unShape.stWhc.u32Chn *
seg_data->dst_[i].unShape.stWhc.u32Height * seg_data->dst_[i].u32Stride);
}
}
return RET_OK;
}
static HI_S32 NNIE_ForwardWithBbox(NnieParam *pstNnieParam, NnieDataIndex *pstInputDataIdx, SVP_SRC_BLOB_S astBbox[],
HI_BOOL bInstant) {
HI_S32 ret = HI_SUCCESS;
HI_BOOL finish = HI_FALSE;
SVP_NNIE_HANDLE svp_nnie_handle = 0;
HI_U32 total_step_num = 0;
HI_U32 i, j;
NnieMemFlushCache(pstNnieParam->forward_with_bbox_ctrl_[pstInputDataIdx->seg_idx_].stTskBuf.u64PhyAddr,
NNIE_CONVERT_64BIT_ADDR(
HI_VOID, pstNnieParam->forward_with_bbox_ctrl_[pstInputDataIdx->seg_idx_].stTskBuf.u64VirAddr),
pstNnieParam->forward_with_bbox_ctrl_[pstInputDataIdx->seg_idx_].stTskBuf.u32Size);
for (i = 0; i < pstNnieParam->forward_with_bbox_ctrl_[pstInputDataIdx->seg_idx_].u32DstNum; i++) {
if (SVP_BLOB_TYPE_SEQ_S32 == pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].enType) {
for (j = 0; j < pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Num; j++) {
total_step_num +=
*(NNIE_CONVERT_64BIT_ADDR(
HI_U32, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].unShape.stSeq.u64VirAddrStep) +
j);
}
NnieMemFlushCache(
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64PhyAddr,
NNIE_CONVERT_64BIT_ADDR(HI_VOID, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64VirAddr),
total_step_num * pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Stride);
} else {
NnieMemFlushCache(
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64PhyAddr,
NNIE_CONVERT_64BIT_ADDR(HI_VOID, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64VirAddr),
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Num *
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].unShape.stWhc.u32Chn *
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].unShape.stWhc.u32Height *
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Stride);
}
}
ret =
HI_MPI_SVP_NNIE_ForwardWithBbox(&svp_nnie_handle, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].src_, astBbox,
pstNnieParam->model_, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_,
&pstNnieParam->forward_with_bbox_ctrl_[pstInputDataIdx->seg_idx_], bInstant);
if (HI_SUCCESS != ret) {
LOGE("HI_MPI_SVP_NNIE_ForwardWithBbox failed!");
return RET_ERROR;
}
if (bInstant) {
while (HI_ERR_SVP_NNIE_QUERY_TIMEOUT ==
(ret = HI_MPI_SVP_NNIE_Query(pstNnieParam->forward_with_bbox_ctrl_[pstInputDataIdx->seg_idx_].enNnieId,
svp_nnie_handle, &finish, HI_TRUE))) {
usleep(kSleepUs);
LOGE("HI_MPI_SVP_NNIE_Query Query timeout!");
}
}
total_step_num = 0;
for (i = 0; i < pstNnieParam->forward_with_bbox_ctrl_[pstInputDataIdx->seg_idx_].u32DstNum; i++) {
if (SVP_BLOB_TYPE_SEQ_S32 == pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].enType) {
for (j = 0; j < pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Num; j++) {
total_step_num +=
*(NNIE_CONVERT_64BIT_ADDR(
HI_U32, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].unShape.stSeq.u64VirAddrStep) +
j);
}
NnieMemFlushCache(
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64PhyAddr,
NNIE_CONVERT_64BIT_ADDR(HI_VOID, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64VirAddr),
total_step_num * pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Stride);
} else {
NnieMemFlushCache(
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64PhyAddr,
NNIE_CONVERT_64BIT_ADDR(HI_VOID, pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u64VirAddr),
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Num *
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].unShape.stWhc.u32Chn *
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].unShape.stWhc.u32Height *
pstNnieParam->seg_data_[pstInputDataIdx->seg_idx_].dst_[i].u32Stride);
}
}
return ret;
}
int FillByUnsignedChar(HI_U32 input_size, HI_U32 num, HI_U32 width, HI_U32 stride, HI_U8 *src, HI_U8 *dst) {
HI_U32 i, j;
if (input_size != num * width) {
LOGE("input size error:%d <-> %d.", input_size, num * width);
return RET_ERROR;
}
for (i = 0; i < num; i++) {
for (j = 0; j < width; j++) {
dst[j] = src[j];
}
dst += stride;
src += width;
}
return RET_OK;
}
int FillByFloat(HI_U32 input_size, HI_U32 num, HI_U32 width, HI_U32 stride, HI_FLOAT *src, HI_S32 *dst, HI_U8 *dst_u8) {
HI_U32 i, j;
if (input_size != num * width) {
LOGE("input size error:%d <-> %d.", input_size, num * width);
return RET_ERROR;
}
for (i = 0; i < num; i++) {
for (j = 0; j < width; j++) {
dst[j] = (src[j] * NNIE_QUANT_BASE);
}
dst_u8 += stride;
dst = reinterpret_cast<HI_S32 *>(dst_u8);
src += width;
}
return RET_OK;
}
static int NnieFillSrcData(NnieCfg *nnie_cfg, NnieParam *nnie_param, NnieDataIndex *input_data_idx, int64_t *shape,
int size) {
HI_U32 i, j, n, ret;
HI_U32 height, width, channel, stride, dim;
HI_U8 *input_addr_u8 = nullptr;
HI_S32 *input_addr_s32 = nullptr;
HI_U32 *step_addr_u32 = nullptr;
HI_FLOAT *float_src_data = nullptr;
HI_U8 *u8_src_data = nullptr;
HI_U32 total_step_num = 0;
HI_U32 input_size = 1;
SVP_SRC_BLOB_S *blob = &nnie_param->seg_data_[input_data_idx->seg_idx_].src_[input_data_idx->node_idx_];
for (n = 0; n < (HI_U32)size; n++) {
input_size *= shape[n];
}
input_addr_u8 = NNIE_CONVERT_64BIT_ADDR(HI_U8, blob->u64VirAddr);
input_addr_s32 = NNIE_CONVERT_64BIT_ADDR(HI_S32, blob->u64VirAddr);
float_src_data = reinterpret_cast<float *>(nnie_cfg->data_ptr_);
u8_src_data = reinterpret_cast<unsigned char *>(nnie_cfg->data_ptr_);
if (SVP_BLOB_TYPE_SEQ_S32 == blob->enType) {
step_addr_u32 = NNIE_CONVERT_64BIT_ADDR(HI_U32, blob->unShape.stSeq.u64VirAddrStep);
dim = blob->unShape.stSeq.u32Dim;
stride = blob->u32Stride;
for (n = 0; n < blob->u32Num; n++) {
total_step_num += *(step_addr_u32 + n);
}
if (input_size != total_step_num * dim) {
LOGE("input size error:%d <-> %d.", input_size, total_step_num * dim);
return RET_ERROR;
}
for (n = 0; n < blob->u32Num; n++) {
for (i = 0; i < *(step_addr_u32 + n); i++) {
for (j = 0; j < dim; j++) {
input_addr_s32[j] = (float_src_data[j] * NNIE_QUANT_BASE);
}
input_addr_u8 += stride;
input_addr_s32 = reinterpret_cast<HI_S32 *>(input_addr_u8);
float_src_data += dim;
}
}
NnieMemFlushCache(blob->u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, blob->u64VirAddr), total_step_num * stride);
} else {
height = blob->unShape.stWhc.u32Height;
width = blob->unShape.stWhc.u32Width;
channel = blob->unShape.stWhc.u32Chn;
stride = blob->u32Stride;
if (SVP_BLOB_TYPE_YVU420SP == blob->enType) {
ret = FillByUnsignedChar(input_size, blob->u32Num * static_cast<HI_U32>(channel * height / 2), width, stride,
u8_src_data, input_addr_u8);
} else if (SVP_BLOB_TYPE_YVU422SP == blob->enType) {
ret = FillByUnsignedChar(input_size, blob->u32Num * height * 2, width, stride, u8_src_data, input_addr_u8);
} else {
if (SVP_BLOB_TYPE_U8 == blob->enType) {
ret =
FillByUnsignedChar(input_size, blob->u32Num * channel * height, width, stride, u8_src_data, input_addr_u8);
} else {
ret = FillByFloat(input_size, blob->u32Num * channel * height, width, stride, float_src_data, input_addr_s32,
input_addr_u8);
}
}
if (ret != RET_OK) {
return ret;
}
NnieMemFlushCache(blob->u64PhyAddr, NNIE_CONVERT_64BIT_ADDR(HI_VOID, blob->u64VirAddr),
blob->u32Num * channel * height * stride);
}
return RET_OK;
}
static int NnieGetDstData(NnieCfg *nnie_cfg, NnieParam *nnie_param, NnieDataIndex *input_data_idx, int64_t *shape,
int size) {
HI_U32 i, j, n;
HI_U32 height, width, channel, stride, dim;
HI_U8 *output_addr_u8 = nullptr;
HI_S32 *output_addr_s32 = nullptr;
HI_U32 *step_addr_u32 = nullptr;
HI_FLOAT *float_dst_data = nullptr;
HI_U32 total_step_num = 0;
HI_U32 input_num = 1;
SVP_SRC_BLOB_S *blob = &nnie_param->seg_data_[input_data_idx->seg_idx_ - 1].dst_[input_data_idx->node_idx_];
for (n = 0; n < (HI_U32)size; n++) {
input_num *= shape[n];
}
if (SVP_BLOB_TYPE_U8 <= blob->enType && SVP_BLOB_TYPE_YVU422SP >= blob->enType) {
LOGE("Nnie output type error");
return RET_ERROR;
}
output_addr_u8 = NNIE_CONVERT_64BIT_ADDR(HI_U8, blob->u64VirAddr);
output_addr_s32 = NNIE_CONVERT_64BIT_ADDR(HI_S32, blob->u64VirAddr);
float_dst_data = reinterpret_cast<float *>(nnie_cfg->data_ptr_);
if (SVP_BLOB_TYPE_SEQ_S32 == blob->enType) {
dim = blob->unShape.stSeq.u32Dim;
stride = blob->u32Stride;
step_addr_u32 = NNIE_CONVERT_64BIT_ADDR(HI_U32, blob->unShape.stSeq.u64VirAddrStep);
for (n = 0; n < blob->u32Num; n++) {
total_step_num += *(step_addr_u32 + n);
}
if (input_num != total_step_num * dim) {
LOGE("input shape");
return RET_ERROR;
}
for (n = 0; n < blob->u32Num; n++) {
for (i = 0; i < *(step_addr_u32 + n); i++) {
for (j = 0; j < dim; j++) {
float_dst_data[j] = (HI_FLOAT)output_addr_s32[j] / NNIE_QUANT_BASE;
}
output_addr_u8 += stride;
output_addr_s32 = reinterpret_cast<HI_S32 *>(output_addr_u8);
float_dst_data += dim;
}
}
} else {
height = blob->unShape.stWhc.u32Height;
width = blob->unShape.stWhc.u32Width;
channel = blob->unShape.stWhc.u32Chn;
stride = blob->u32Stride;
if (input_num != height * channel * width * blob->u32Num) {
LOGE("output shape diff:%d<->%d.", input_num, height * channel * width * blob->u32Num);
return RET_ERROR;
}
for (n = 0; n < blob->u32Num; n++) {
for (i = 0; i < channel * height; i++) {
for (j = 0; j < width; j++) {
float_dst_data[j] = (HI_FLOAT)output_addr_s32[j] / NNIE_QUANT_BASE;
}
output_addr_u8 += stride;
output_addr_s32 = reinterpret_cast<HI_S32 *>(output_addr_u8);
float_dst_data += width;
}
}
}
return RET_OK;
}
int CheckMsShapeN(NnieRunCfg *nnie_run_cfg, const std::vector<int64_t> &input_shape, const SVP_NNIE_NODE_S &nnie_node) {
size_t ms_input_size = 1, i;
for (i = 1; i < input_shape.size(); i++) {
ms_input_size *= input_shape[i];
}
size_t nnie_input_size;
if (SVP_BLOB_TYPE_SEQ_S32 == nnie_node.enType) {
if (nnie_run_cfg->cfg_.step_ == 0) {
LOGE("request time_step set! Please export NNIE_RUNTIME_CONFIG_PATH");
return RET_ERROR;
}
if (ms_input_size != nnie_node.unShape.u32Dim) {
LOGE("The input data does not meet the required size %d <-> %d.", static_cast<int>(ms_input_size),
nnie_node.unShape.u32Dim);
return RET_ERROR;
}
if ((input_shape[0] < static_cast<int>(nnie_run_cfg->cfg_.step_)) ||
(input_shape[0] % nnie_run_cfg->cfg_.step_ != 0)) {
LOGW("The num value(%d) of input must be an integer multiple of time_step(%d)", static_cast<int>(input_shape[0]),
nnie_run_cfg->cfg_.step_);
return RET_ERROR;
}
nnie_input_size = nnie_node.unShape.u32Dim * nnie_run_cfg->cfg_.step_;
} else {
auto height = nnie_node.unShape.stWhc.u32Height;
auto width = nnie_node.unShape.stWhc.u32Width;
auto channel = nnie_node.unShape.stWhc.u32Chn;
if (SVP_BLOB_TYPE_YVU420SP == nnie_node.enType) {
nnie_input_size = static_cast<HI_U32>(channel * height / 2) * width;
} else if (SVP_BLOB_TYPE_YVU422SP == nnie_node.enType) {
nnie_input_size = height * 2 * width;
} else {
nnie_input_size = channel * height * width;
}
if (ms_input_size != nnie_input_size) {
LOGE("The input data does not meet the required size %d <-> %d.", static_cast<int>(ms_input_size),
static_cast<int>(nnie_input_size));
return RET_ERROR;
}
}
nnie_run_cfg->cfg_.max_input_num_ = (ms_input_size * input_shape[0]) / nnie_input_size;
fprintf(stdout, "The input num is %d.", nnie_run_cfg->cfg_.max_input_num_);
return RET_OK;
}
size_t GetFillIndex(const std::vector<mindspore::MSTensor> &inputs, size_t input_size, const HI_CHAR *name) {
size_t j;
for (j = 0; j < input_size; j++) {
auto input_str = inputs[j].Name();
if (input_str.length() > 4) {
if (input_str.substr(input_str.length() - 4) == "_pre") {
input_str = input_str.substr(0, input_str.length() - 4);
} else if (input_str.length() > 5) {
if (input_str.substr(input_str.length() - 5) == "_post") {
input_str = input_str.substr(0, input_str.length() - 5);
}
}
}
if (strcmp(input_str.c_str(), name) == 0) {
break;
}
}
if (j == input_size) {
for (j = 0; j < input_size; j++) {
auto input_str = inputs[j].Name();
if (input_str.length() > 4) {
if (input_str.substr(input_str.length() - 4) == "_pre") {
input_str = input_str.substr(0, input_str.length() - 4);
} else if (input_str.length() > 5) {
if (input_str.substr(input_str.length() - 5) == "_post") {
input_str = input_str.substr(0, input_str.length() - 5);
}
}
}
if (strncmp(input_str.c_str(), name, input_str.length()) == 0) {
break;
}
}
}
return j;
}
int NnieCommCreate(NnieRunCfg *nnie_run_cfg, char *model_buf, int size,
const std::vector<mindspore::MSTensor> &inputs) {
HI_U8 *vir_addr = nullptr;
HI_U32 seg_num;
HI_U32 off_set;
HI_U32 total_size;
HI_U32 i, j;
HI_S32 ret = HI_SUCCESS;
NnieModel *model = &nnie_run_cfg->model_;
NnieParam *param = &nnie_run_cfg->param_;
NnieCfg *cfg = &nnie_run_cfg->cfg_;
HI_U32 step = cfg->step_; // time step
ret = NnieLoadModel(model_buf, size, model);
if (ret != RET_OK) {
LOGE("NnieLoadModel failed!");
return RET_ERROR;
}
if (inputs.size() <= 1) {
LOGE("inputs size need greater than 1!");
return RET_ERROR;
}
if (inputs[0].Shape().size() <= 1) {
LOGE("input shape size need greater than 1!");
return RET_ERROR;
}
j = GetFillIndex(inputs, inputs.size() - 1, model->model_.astSeg[0].astSrcNode[0].szName);
if (j == (inputs.size() - 1)) {
j = 0;
// LOGW("input tensor name(%s) can't match wk node name(%s).", inputs[0].Name().c_str(),
// model->model_.astSeg[0].astSrcNode[0].szName);
}
if (CheckMsShapeN(nnie_run_cfg, inputs[j].Shape(), model->model_.astSeg[0].astSrcNode[0]) != RET_OK) {
return RET_ERROR;
}
bool has_roi = false;
for (i = 0; i < model->model_.u32NetSegNum; i++) {
if (SVP_NNIE_NET_TYPE_ROI == model->model_.astSeg[i].enNetType) {
has_roi = true;
}
}
if (has_roi) {
if (cfg->max_roi_num_ == 0) {
LOGE("NNIE_RUNTIME_CONFIG_PATH: max_roi_num(0) should greater than 0!");
return RET_ERROR;
}
} else {
if (cfg->max_roi_num_ != 0) {
LOGW("NNIE_RUNTIME_CONFIG_PATH: max_roi_num should euqal to 0!");
cfg->max_roi_num_ = 0;
}
}
if (model->model_.astSeg[0].enNetType == SVP_NNIE_NET_TYPE_RECURRENT) {
if (step == 0) {
LOGE("request time_step set! No NNIE_RUNTIME_CONFIG_PATH, please export NNIE_RUNTIME_CONFIG_PATH");
return RET_ERROR;
}
seg_num = model->model_.u32NetSegNum;
total_size = cfg->max_input_num_ * sizeof(HI_S32) * seg_num * 2;
ret = NnieMemMalloc(std::string("SVP_NNIE_STEP").data(), nullptr,
reinterpret_cast<HI_U64 *>(&param->step_buf_.u64PhyAddr), reinterpret_cast<void **>(&vir_addr),
total_size);
if (HI_SUCCESS != ret) {
LOGE("Malloc memory failed:");
return RET_ERROR;
}
param->step_buf_.u64VirAddr = (HI_U64)((HI_UL)vir_addr);
for (i = 0; i < seg_num * NNIE_EACH_SEG_STEP_ADDR_NUM; i++) {
cfg->step_vir_addr_[i] = param->step_buf_.u64VirAddr + i * cfg->max_input_num_ * sizeof(HI_S32);
}
for (i = 0; i < seg_num; i++) {
off_set = i * NNIE_EACH_SEG_STEP_ADDR_NUM;
for (j = 0; j < cfg->max_input_num_; j++) {
*(reinterpret_cast<HI_U32 *>(static_cast<HI_UL>(cfg->step_vir_addr_[off_set])) + j) =
step; // step of input x_t
*(reinterpret_cast<HI_U32 *>(static_cast<HI_UL>(cfg->step_vir_addr_[off_set + 1])) + j) =
step; // step of output h_t
}
}
}
param->model_ = &(model->model_);
ret = NnieParamInit(cfg, param);
if (ret != RET_OK) {
LOGE("NnieParamInit failed!");
return RET_ERROR;
}
nnie_run_cfg->run_idx_.seg_idx_ = 0;
return RET_OK;
}
void NnieCommDelete(NnieParam *pstNnieParamm, NnieModel *nnie_model) {
NnieParamRelease(pstNnieParamm);
NnieUnloadModel(nnie_model);
}
int NnieCommGetOutputData(NnieRunCfg *nnie_run_cfg, float *data, int64_t *shape, int size, int tensor_index) {
if (nnie_run_cfg->run_idx_.seg_idx_ <= 0) {
LOGE("output seg index error.");
return RET_ERROR;
}
HI_U32 ret = 0;
int id = tensor_index;
nnie_run_cfg->run_idx_.node_idx_ = id;
nnie_run_cfg->cfg_.data_ptr_ = data;
ret = NnieGetDstData(&nnie_run_cfg->cfg_, &nnie_run_cfg->param_, &nnie_run_cfg->run_idx_, shape, size);
if (ret != RET_OK) {
LOGE("NnieGetDstData failed!");
return RET_ERROR;
}
return RET_OK;
}
int NnieCommFillData(NnieRunCfg *nnie_run_cfg, void *data, mindspore::DataType dtype, int64_t *shape, int size,
int tensor_index) {
HI_U32 ret = 0;
int id = tensor_index;
HI_U32 seg_idx = nnie_run_cfg->run_idx_.seg_idx_;
if (id >= nnie_run_cfg->param_.model_->astSeg[seg_idx].u16SrcNum) {
LOGE("Nnie input node index error!");
return RET_ERROR;
}
SVP_BLOB_TYPE_E src_type = nnie_run_cfg->param_.seg_data_[seg_idx].src_[id].enType;
if (SVP_BLOB_TYPE_U8 <= src_type && src_type <= SVP_BLOB_TYPE_YVU422SP) {
if (!(dtype == DataType::kNumberTypeUInt8 || dtype == DataType::kNumberTypeInt8)) {
LOGE("Nnie input node type error!");
return RET_ERROR;
}
} else {
if (dtype != DataType::kNumberTypeFloat32) {
LOGE("Nnie input node type error!");
return RET_ERROR;
}
}
nnie_run_cfg->run_idx_.node_idx_ = id;
nnie_run_cfg->cfg_.data_ptr_ = data;
ret = NnieFillSrcData(&nnie_run_cfg->cfg_, &nnie_run_cfg->param_, &nnie_run_cfg->run_idx_, shape, size);
if (ret != RET_OK) {
LOGE("NnieFillSrcData failed!");
return RET_ERROR;
}
return RET_OK;
}
int NnieCommRun(NnieRunCfg *nnie_run_cfg, bool run_box) {
HI_U32 segidx = nnie_run_cfg->run_idx_.seg_idx_;
HI_U32 ret = 0;
if (segidx >= nnie_run_cfg->param_.model_->u32NetSegNum) {
LOGE("seg num err!\n");
return RET_ERROR;
}
// NniePrintReportResultInputSeg(&nnie_run_cfg->param_, segidx);
nnie_run_cfg->run_idx_.node_idx_ = 0;
if (run_box) {
ret =
NNIE_ForwardWithBbox(&nnie_run_cfg->param_, &nnie_run_cfg->run_idx_, &nnie_run_cfg->param_.rpn_bbox_, HI_TRUE);
if (HI_SUCCESS != ret) {
LOGE("NnieForward failed!");
return RET_ERROR;
}
} else {
ret = NnieForward(&nnie_run_cfg->param_, &nnie_run_cfg->run_idx_, HI_TRUE);
if (HI_SUCCESS != ret) {
LOGE("NnieForward failed!");
return RET_ERROR;
}
}
nnie_run_cfg->run_idx_.seg_idx_ = ++segidx;
return RET_OK;
}
} // namespace nnie
} // namespace mindspore

View File

@ -0,0 +1,115 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_COMMON_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_COMMON_H_
#include <iostream>
#include <string>
#include <vector>
#include "include/api/types.h"
#include "include/mpi_vb.h"
#include "include/hi_comm_svp.h"
#include "include/hi_nnie.h"
#include "include/mpi_nnie.h"
#include "include/ir/dtype/type_id.h"
namespace mindspore {
namespace nnie {
#define NNIE_ALIGN_16 16
#define NNIE_ALIGN16(u32Num) ((u32Num + NNIE_ALIGN_16 - 1) / NNIE_ALIGN_16 * NNIE_ALIGN_16)
#define NNIE_ALIGN_32 32
#define NNIE_ALIGN32(u32Num) ((u32Num + NNIE_ALIGN_32 - 1) / NNIE_ALIGN_32 * NNIE_ALIGN_32)
#define NNIE_CONVERT_64BIT_ADDR(Type, Addr) reinterpret_cast<Type *>((HI_UL)(Addr))
#define NNIE_QUANT_BASE 4096
#define NNIE_COORDI_NUM 4
#define NNIE_EACH_SEG_STEP_ADDR_NUM 2
#define NNIE_REPORT_NAME_LENGTH 64
typedef struct {
SVP_NNIE_MODEL_S model_;
SVP_MEM_INFO_S model_buf_; // store Model file
} NnieModel;
typedef struct {
SVP_SRC_BLOB_S src_[SVP_NNIE_MAX_INPUT_NUM];
SVP_DST_BLOB_S dst_[SVP_NNIE_MAX_OUTPUT_NUM];
} NnieSegData;
typedef struct {
bool src_node_[SVP_NNIE_MAX_INPUT_NUM];
bool dst_node_[SVP_NNIE_MAX_OUTPUT_NUM];
} NNIEMemSegInfo;
typedef struct {
NNIEMemSegInfo seg_[SVP_NNIE_MAX_NET_SEG_NUM];
} NNIEMemCfg;
typedef struct {
SVP_NNIE_MODEL_S *model_;
HI_U32 task_buf_size_[SVP_NNIE_MAX_NET_SEG_NUM];
SVP_MEM_INFO_S task_buf_;
SVP_MEM_INFO_S tmp_buf_;
SVP_MEM_INFO_S step_buf_; // store Lstm step info
SVP_SRC_BLOB_S rpn_bbox_;
NnieSegData seg_data_[SVP_NNIE_MAX_NET_SEG_NUM]; // each seg's input and output blob
SVP_NNIE_FORWARD_CTRL_S forward_ctrl_[SVP_NNIE_MAX_NET_SEG_NUM];
SVP_NNIE_FORWARD_WITHBBOX_CTRL_S forward_with_bbox_ctrl_[SVP_NNIE_MAX_NET_SEG_NUM];
NNIEMemCfg mem_cfg_;
} NnieParam;
typedef struct {
HI_VOID *data_ptr_;
HI_U32 max_input_num_;
HI_U32 max_roi_num_;
HI_U32 step_;
HI_U64 step_vir_addr_[NNIE_EACH_SEG_STEP_ADDR_NUM *
SVP_NNIE_MAX_NET_SEG_NUM]; // virtual addr of LSTM's or RNN's step buffer
SVP_NNIE_ID_E nnie_core_id_[SVP_NNIE_MAX_NET_SEG_NUM];
} NnieCfg;
typedef struct {
HI_U32 seg_idx_;
HI_U32 node_idx_;
} NnieDataIndex;
typedef struct {
HI_U32 src_size_[SVP_NNIE_MAX_INPUT_NUM];
HI_U32 dst_size_[SVP_NNIE_MAX_OUTPUT_NUM];
} NnieBlobSize;
typedef struct {
NnieModel model_;
NnieParam param_;
NnieCfg cfg_;
NnieDataIndex run_idx_;
} NnieRunCfg;
int NnieCommCreate(NnieRunCfg *nnie_run_cfg, char *model_buf, int size, const std::vector<mindspore::MSTensor> &inputs);
size_t GetFillIndex(const std::vector<mindspore::MSTensor> &inputs, size_t input_size, const HI_CHAR *name);
void NnieCommDelete(NnieParam *pstNnieParamm, NnieModel *nnie_model);
int NnieCommRun(NnieRunCfg *nnie_run_cfg, bool run_box);
int NnieCommFillData(NnieRunCfg *nnie_run_cfg, void *data, mindspore::DataType dtype, int64_t *shape, int size, int id);
int NnieCommGetOutputData(NnieRunCfg *nnie_run_cfg, float *data, int64_t *shape, int size, int tensor_index);
} // namespace nnie
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_COMMON_H_

View File

@ -0,0 +1,222 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <cstring>
#include "src/nnie_manager.h"
#include "src/nnie_common.h"
#include "src/nnie_print.h"
#include "src/nnie_memory.h"
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
constexpr int kNumInput2 = 2;
namespace mindspore {
namespace nnie {
int NNIEManager::CfgInit(int max_roi_num, int step, const std::vector<int> &core_id) {
memset(&nnie_cfg_, 0, sizeof(NnieRunCfg));
nnie_cfg_.cfg_.max_roi_num_ = max_roi_num;
nnie_cfg_.cfg_.step_ = step;
for (size_t i = 0; i < SVP_NNIE_MAX_NET_SEG_NUM && i < core_id.size(); i++) {
if (core_id[i] < SVP_NNIE_ID_BUTT) {
nnie_cfg_.cfg_.nnie_core_id_[i] = (SVP_NNIE_ID_E)core_id[i];
} else {
LOGE("nnie core num toobig.\n");
return RET_ERROR;
}
}
return RET_OK;
}
void NNIEManager::SetInputNum(int max_input_num) { nnie_cfg_.cfg_.max_input_num_ = max_input_num; }
int NNIEManager::Init(char *model_buf, int size, const std::vector<mindspore::MSTensor> &inputs) {
if (NnieCommCreate(&nnie_cfg_, model_buf, size, inputs) != RET_OK) {
NnieCommDelete(&nnie_cfg_.param_, &nnie_cfg_.model_);
return RET_ERROR;
}
return RET_OK;
}
int NNIEManager::Run(std::vector<mindspore::MSTensor> *outputs, unsigned int seg_id,
const std::vector<std::vector<int64_t>> &outputs_shape) {
bool run_box = false;
nnie_cfg_.run_idx_.seg_idx_ = seg_id;
if (nnie_cfg_.param_.model_->astSeg[seg_id].enNetType == SVP_NNIE_NET_TYPE_ROI) {
run_box = true;
}
if (NnieCommRun(&nnie_cfg_, run_box)) {
LOGE("Nnie Run Fail!");
return RET_ERROR;
}
if (GetOutputData(outputs, outputs_shape, run_box)) {
LOGE("Get Output Data Fail!");
return RET_ERROR;
}
return RET_OK;
}
void NNIEManager::Release() {
// NniePrintReportResult(&nnie_cfg_.param_);
NnieCommDelete(&nnie_cfg_.param_, &nnie_cfg_.model_);
}
int NNIEManager::GetOutputData(std::vector<mindspore::MSTensor> *outputs,
const std::vector<std::vector<int64_t>> &outputs_shape, bool run_box) {
int i, j, output_size = outputs->size();
if (output_size != nnie_cfg_.param_.model_->astSeg[nnie_cfg_.run_idx_.seg_idx_ - 1].u16DstNum) {
LOGE("seg%d: %d output tensors are required, but there are %d outputs.", nnie_cfg_.run_idx_.seg_idx_ - 1,
nnie_cfg_.param_.model_->astSeg[nnie_cfg_.run_idx_.seg_idx_ - 1].u16DstNum, output_size);
return RET_ERROR;
}
if (run_box) {
for (i = 0; i < output_size; i++) {
auto input_data_type = (*outputs)[i].DataType();
if (input_data_type == DataType::kNumberTypeFloat32) {
auto ptr_shape = outputs_shape[i];
int max_roi_num = nnie_cfg_.param_.seg_data_[nnie_cfg_.run_idx_.seg_idx_ - 1].dst_[0].u32Num;
ptr_shape.insert(ptr_shape.begin(), max_roi_num);
(*outputs)[i].SetShape(ptr_shape);
} else {
LOGE("Unsupported DataType!");
return RET_ERROR;
}
}
}
HI_U32 seg_idx = nnie_cfg_.run_idx_.seg_idx_ - 1;
for (i = 0; i < nnie_cfg_.param_.model_->astSeg[seg_idx].u16DstNum; i++) {
if (nnie_cfg_.param_.mem_cfg_.seg_[seg_idx].dst_node_[i]) {
continue;
}
j = GetFillIndex(*outputs, output_size, nnie_cfg_.param_.model_->astSeg[seg_idx].astDstNode[i].szName);
if (j == output_size) {
j = i;
// LOGW("output tensor name(%s) can't match wk node name(%s).", (*outputs)[j].Name().c_str(),
// nnie_cfg_.param_.model_->astSeg[seg_idx].astDstNode[i].szName);
}
auto input_data_type = (*outputs)[j].DataType();
if (input_data_type == DataType::kNumberTypeFloat32) {
auto ptr_shape = (*outputs)[j].Shape();
auto ptr = reinterpret_cast<float *>((*outputs)[j].MutableData());
if (NnieCommGetOutputData(&nnie_cfg_, ptr, ptr_shape.data(), ptr_shape.size(), i) != RET_OK) {
return RET_ERROR;
}
} else {
LOGE("Unsupported DataType!");
return RET_ERROR;
}
}
return RET_OK;
}
int NNIEManager::FillRoiPooling(mindspore::MSTensor *input) {
auto roi_shape = input->Shape();
if (roi_shape[1] != NNIE_COORDI_NUM) {
LOGE("Roi shape err!");
return RET_ERROR;
}
if (roi_shape[0] > static_cast<int64_t>(nnie_cfg_.cfg_.max_roi_num_)) {
LOGE("NNIE_RUNTIME_CONFIG_PATH: The maximum [max_roi_num] value set is less than the actual value: %d < %d.",
nnie_cfg_.cfg_.max_roi_num_, static_cast<int>(roi_shape[0]));
return RET_ERROR;
}
nnie_cfg_.param_.rpn_bbox_.unShape.stWhc.u32Height = roi_shape[0];
HI_U32 dst_stride = nnie_cfg_.param_.rpn_bbox_.u32Stride;
auto proposal_result = NNIE_CONVERT_64BIT_ADDR(HI_S32, nnie_cfg_.param_.rpn_bbox_.u64VirAddr);
auto float_src_data = reinterpret_cast<float *>(input->MutableData());
for (size_t j = 0; j < nnie_cfg_.param_.rpn_bbox_.unShape.stWhc.u32Height; j++) {
proposal_result[dst_stride / sizeof(HI_U32) * j] = *(float_src_data++) * NNIE_QUANT_BASE;
proposal_result[dst_stride / sizeof(HI_U32) * j + 1] = *(float_src_data++) * NNIE_QUANT_BASE;
proposal_result[dst_stride / sizeof(HI_U32) * j + 2] = *(float_src_data++) * NNIE_QUANT_BASE;
proposal_result[dst_stride / sizeof(HI_U32) * j + 3] = *(float_src_data++) * NNIE_QUANT_BASE;
}
NnieMemFlushCache(nnie_cfg_.param_.rpn_bbox_.u64PhyAddr,
NNIE_CONVERT_64BIT_ADDR(HI_VOID, nnie_cfg_.param_.rpn_bbox_.u64VirAddr),
dst_stride * nnie_cfg_.param_.rpn_bbox_.unShape.stWhc.u32Height);
return RET_OK;
}
int NNIEManager::FillData(std::vector<mindspore::MSTensor> *inputs, unsigned int seg_id) {
bool run_box = false;
size_t i, j;
size_t input_size = inputs->size();
if (seg_id >= nnie_cfg_.param_.model_->u32NetSegNum) {
LOGE("seg num err!");
return RET_ERROR;
}
nnie_cfg_.run_idx_.seg_idx_ = seg_id;
if (nnie_cfg_.param_.model_->astSeg[nnie_cfg_.run_idx_.seg_idx_].enNetType == SVP_NNIE_NET_TYPE_ROI) {
run_box = true;
for (i = 0; i < (input_size - 1); i++) {
if ((*inputs)[i].Name() == "proposal") {
FillRoiPooling(&(*inputs)[i]);
break;
}
}
if (i == (input_size - 1)) {
LOGE("Can't find proposal out!");
return RET_ERROR;
}
} else if ((input_size < kNumInput2) ||
(input_size - 1) != nnie_cfg_.param_.model_->astSeg[nnie_cfg_.run_idx_.seg_idx_].u16SrcNum) {
LOGE("Input Size Err!");
return RET_ERROR;
}
for (i = 0; i < nnie_cfg_.param_.model_->astSeg[seg_id].u16SrcNum; i++) {
if (nnie_cfg_.param_.mem_cfg_.seg_[seg_id].src_node_[i]) {
continue;
}
j = GetFillIndex(*inputs, input_size - 1, nnie_cfg_.param_.model_->astSeg[seg_id].astSrcNode[i].szName);
if (j == (input_size - 1)) {
if (run_box && (*inputs)[i].Name() == "proposal") {
continue;
} else {
j = i;
// LOGW("input tensor name(%s) can't match wk node name(%s).", (*inputs)[i].Name().c_str(),
// nnie_cfg_.param_.model_->astSeg[seg_id].astSrcNode[i].szName);
}
}
auto input_data_type = (*inputs)[j].DataType();
if ((input_data_type == DataType::kNumberTypeFloat32) || (input_data_type == DataType::kNumberTypeUInt8) ||
(input_data_type == DataType::kNumberTypeInt8)) {
auto ptr_shape = (*inputs)[j].Shape();
if (NnieCommFillData(&nnie_cfg_, (*inputs)[j].MutableData(), input_data_type, ptr_shape.data(), ptr_shape.size(),
i) != RET_OK) {
LOGE("FillData failed!");
return RET_ERROR;
}
} else {
LOGE("Unsupported DataType!");
return RET_ERROR;
}
}
return RET_OK;
}
} // namespace nnie
} // namespace mindspore

View File

@ -0,0 +1,62 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MANAGER_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MANAGER_H_
#include <vector>
#include "include/errorcode.h"
#include "include/api/types.h"
#include "src/nnie_common.h"
namespace mindspore {
namespace nnie {
class NNIEManager {
public:
static NNIEManager *GetInstance() {
static NNIEManager manager;
return &manager;
}
NNIEManager() {}
~NNIEManager() {}
int Init(char *model_buf, int size, const std::vector<mindspore::MSTensor> &inputs);
int CfgInit(int max_roi_num, int step, const std::vector<int> &core_id);
void SetInputNum(int max_input_num);
int FillData(std::vector<mindspore::MSTensor> *inputs, unsigned int seg_id);
int Run(std::vector<mindspore::MSTensor> *outputs, unsigned int seg_id,
const std::vector<std::vector<int64_t>> &outputs_shape);
void Release();
private:
int GetOutputData(std::vector<mindspore::MSTensor> *outputs, const std::vector<std::vector<int64_t>> &outputs_shape,
bool run_box = false);
int FillRoiPooling(mindspore::MSTensor *input);
char *wk_model_ = nullptr;
int model_size_ = 0;
NnieRunCfg nnie_cfg_;
};
} // namespace nnie
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MANAGER_H_

View File

@ -0,0 +1,35 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/nnie_memory.h"
#include "include/hi_common.h"
#include "include/mpi_sys.h"
namespace mindspore {
namespace nnie {
HI_S32 NnieMemMalloc(const HI_CHAR *mmb, HI_CHAR *zone, HI_U64 *pu_phy_addr, HI_VOID **ppv_vir_addr, HI_U32 size) {
return HI_MPI_SYS_MmzAlloc(pu_phy_addr, ppv_vir_addr, mmb, zone, size);
}
HI_S32 NnieMemMallocCached(const HI_CHAR *mmb, HI_CHAR *zone, HI_U64 *pu_phy_addr, HI_VOID **ppv_vir_addr,
HI_U32 size) {
return HI_MPI_SYS_MmzAlloc_Cached(pu_phy_addr, ppv_vir_addr, mmb, zone, size);
}
HI_S32 NnieMemFlushCache(HI_U64 phy_addr, HI_VOID *pv_vir_addr, HI_U32 size) {
return HI_MPI_SYS_MmzFlushCache(phy_addr, pv_vir_addr, size);
}
} // namespace nnie
} // namespace mindspore

View File

@ -0,0 +1,48 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MEMORY_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MEMORY_H_
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include "include/hi_common.h"
#include "include/hi_debug.h"
#include "include/hi_comm_svp.h"
#include "include/hi_nnie.h"
#include "include/mpi_nnie.h"
#include "include/mpi_sys.h"
namespace mindspore {
namespace nnie {
#define NNIE_MEM_FREE(phy, vir) \
do { \
if ((0 != (phy)) && (0 != (vir))) { \
HI_MPI_SYS_MmzFree((phy), reinterpret_cast<void *>(static_cast<HI_UL>(vir))); \
(phy) = 0; \
(vir) = 0; \
} \
} while (0)
HI_S32 NnieMemMalloc(const HI_CHAR *mmb, HI_CHAR *zone, HI_U64 *pu_phy_addr, HI_VOID **ppv_vir_addr, HI_U32 size);
HI_S32 NnieMemMallocCached(const HI_CHAR *mmb, HI_CHAR *zone, HI_U64 *pu_phy_addr, HI_VOID **ppv_vir_addr, HI_U32 size);
HI_S32 NnieMemFlushCache(HI_U64 phy_addr, HI_VOID *pv_vir_addr, HI_U32 size);
} // namespace nnie
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_MEMORY_H_

View File

@ -0,0 +1,176 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/nnie_print.h"
namespace mindspore {
namespace nnie {
HI_S32 NniePrintReportResult(NnieParam *pst_nnie_param) {
HI_U32 u32seg_num = pst_nnie_param->model_->u32NetSegNum;
HI_U32 i, j, k, n;
HI_U32 seg_idx_, node_idx_;
HI_S32 ret;
HI_CHAR acReportFileName[NNIE_REPORT_NAME_LENGTH] = {'\0'};
FILE *fp = nullptr;
HI_U32 *pu32StepAddr = nullptr;
HI_S32 *ps32ResultAddr = nullptr;
HI_U32 u32Height, u32Width, u32Chn, u32Stride, u32Dim;
for (seg_idx_ = 0; seg_idx_ < u32seg_num; seg_idx_++) {
for (node_idx_ = 0; node_idx_ < pst_nnie_param->model_->astSeg[seg_idx_].u16DstNum; node_idx_++) {
ret = snprintf(acReportFileName, NNIE_REPORT_NAME_LENGTH, "./ms/fseg%d(%d,%d)_%s.txt", seg_idx_, node_idx_,
pst_nnie_param->model_->astSeg[seg_idx_].astDstNode[node_idx_].u32NodeId,
pst_nnie_param->model_->astSeg[seg_idx_].astDstNode[node_idx_].szName);
if (ret < 0) {
LOGE("Error,create file name failed!");
return HI_FAILURE;
}
fp = fopen(acReportFileName, "w");
if (fp == nullptr) {
LOGE("Error,open file failed!");
return HI_FAILURE;
}
if (SVP_BLOB_TYPE_SEQ_S32 == pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].enType) {
u32Dim = pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].unShape.stSeq.u32Dim;
u32Stride = pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].u32Stride;
pu32StepAddr = NNIE_CONVERT_64BIT_ADDR(
HI_U32, pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].unShape.stSeq.u64VirAddrStep);
ps32ResultAddr =
NNIE_CONVERT_64BIT_ADDR(HI_S32, pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].u64VirAddr);
for (n = 0; n < pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].u32Num; n++) {
for (i = 0; i < *(pu32StepAddr + n); i++) {
for (j = 0; j < u32Dim; j++) {
fprintf(fp, "%f ", static_cast<float>(*(ps32ResultAddr + j)) / NNIE_QUANT_BASE);
}
ps32ResultAddr += u32Stride / sizeof(HI_U32);
}
}
} else {
u32Height = pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].unShape.stWhc.u32Height;
u32Width = pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].unShape.stWhc.u32Width;
u32Chn = pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].unShape.stWhc.u32Chn;
u32Stride = pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].u32Stride;
ps32ResultAddr =
NNIE_CONVERT_64BIT_ADDR(HI_S32, pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].u64VirAddr);
fprintf(fp, "%s 4 1 %d %d %d\n", pst_nnie_param->model_->astSeg[seg_idx_].astDstNode[node_idx_].szName,
u32Height, u32Width, u32Chn);
for (n = 0; n < pst_nnie_param->seg_data_[seg_idx_].dst_[node_idx_].u32Num; n++) {
for (i = 0; i < u32Chn; i++) {
for (j = 0; j < u32Height; j++) {
for (k = 0; k < u32Width; k++) {
ret = fprintf(fp, "%f ", static_cast<float>(*(ps32ResultAddr + k)) / NNIE_QUANT_BASE);
if (ret < 0) {
fclose(fp);
return HI_FAILURE;
}
}
ps32ResultAddr += u32Stride / sizeof(HI_U32);
}
}
}
}
fclose(fp);
}
}
return HI_SUCCESS;
}
HI_S32 NniePrintReportResultInputSeg(NnieParam *pst_nnie_param, int segnum) {
HI_U32 i, j, k, n;
HI_U32 seg_idx_ = segnum, node_idx_;
HI_S32 ret;
HI_CHAR acReportFileName[NNIE_REPORT_NAME_LENGTH] = {'\0'};
FILE *fp = nullptr;
HI_U32 *pu32StepAddr = nullptr;
HI_S32 *ps32ResultAddr = nullptr;
HI_U8 *pu8ResultAddr = nullptr;
HI_U32 u32Height, u32Width, u32Chn, u32Stride, u32Dim;
for (node_idx_ = 0; node_idx_ < pst_nnie_param->model_->astSeg[seg_idx_].u16SrcNum; node_idx_++) {
ret = snprintf(acReportFileName, NNIE_REPORT_NAME_LENGTH, "seg%d_layer%d_input(%s)_inst.linear.hex", seg_idx_,
pst_nnie_param->model_->astSeg[seg_idx_].astSrcNode[node_idx_].u32NodeId,
pst_nnie_param->model_->astSeg[seg_idx_].astSrcNode[node_idx_].szName);
if (ret < 0) {
LOGE("Error,create file name failed!\n");
return HI_FAILURE;
}
fp = fopen(acReportFileName, "w");
if (fp == nullptr) {
LOGE("Error,open file failed!");
return HI_FAILURE;
}
if (SVP_BLOB_TYPE_SEQ_S32 == pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].enType) {
u32Dim = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stSeq.u32Dim;
u32Stride = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u32Stride;
pu32StepAddr = NNIE_CONVERT_64BIT_ADDR(
HI_U32, pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stSeq.u64VirAddrStep);
ps32ResultAddr = NNIE_CONVERT_64BIT_ADDR(HI_S32, pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u64VirAddr);
for (n = 0; n < pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u32Num; n++) {
for (i = 0; i < *(pu32StepAddr + n); i++) {
for (j = 0; j < u32Dim; j++) {
fprintf(fp, "%d ", *(ps32ResultAddr + j));
}
ps32ResultAddr += u32Stride / sizeof(HI_U32);
}
}
} else if (pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].enType == SVP_BLOB_TYPE_U8) {
u32Height = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stWhc.u32Height;
u32Width = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stWhc.u32Width;
u32Chn = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stWhc.u32Chn;
u32Stride = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u32Stride;
pu8ResultAddr = NNIE_CONVERT_64BIT_ADDR(HI_U8, pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u64VirAddr);
for (n = 0; n < pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u32Num; n++) {
for (i = 0; i < u32Chn; i++) {
for (j = 0; j < u32Height; j++) {
for (k = 0; k < u32Width; k++) {
fprintf(fp, "%d ", *(pu8ResultAddr + k));
}
pu8ResultAddr += u32Stride / sizeof(HI_U8);
}
}
}
} else {
u32Height = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stWhc.u32Height;
u32Width = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stWhc.u32Width;
u32Chn = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].unShape.stWhc.u32Chn;
u32Stride = pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u32Stride;
ps32ResultAddr = NNIE_CONVERT_64BIT_ADDR(HI_S32, pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u64VirAddr);
fprintf(fp, "%s 4 1 %d %d %d\n", pst_nnie_param->model_->astSeg[seg_idx_].astSrcNode[node_idx_].szName, u32Height,
u32Width, u32Chn);
for (n = 0; n < pst_nnie_param->seg_data_[seg_idx_].src_[node_idx_].u32Num; n++) {
for (i = 0; i < u32Chn; i++) {
for (j = 0; j < u32Height; j++) {
for (k = 0; k < u32Width; k++) {
fprintf(fp, "%f ", static_cast<float>(*(ps32ResultAddr + k) / NNIE_QUANT_BASE));
}
ps32ResultAddr += u32Stride / sizeof(HI_U32);
}
}
}
}
fclose(fp);
}
return HI_SUCCESS;
}
} // namespace nnie
} // namespace mindspore

View File

@ -0,0 +1,50 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_PRINT_H_
#define MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_PRINT_H_
#include "include/mpi_nnie.h"
#include "include/hi_type.h"
#include "src/nnie_common.h"
#include "src/nnie_memory.h"
#define LOG_TAG1 "NNIE"
#define LOGE(format, ...) \
do { \
if (1) { \
fprintf(stderr, "\n[ERROR] " LOG_TAG1 " [" __FILE__ ":%d] %s] ", __LINE__, __FUNCTION__); \
fprintf(stderr, format, ##__VA_ARGS__); \
} \
} while (0)
#define LOGW(format, ...) \
do { \
if (1) { \
fprintf(stderr, "\n[Warning] " LOG_TAG1 " [" __FILE__ ":%d] %s] ", __LINE__, __FUNCTION__); \
fprintf(stderr, format, ##__VA_ARGS__); \
} \
} while (0)
constexpr int kMaxSize = 1024;
constexpr int kDecimal = 10;
namespace mindspore {
namespace nnie {
HI_S32 NniePrintReportResult(NnieParam *pst_nnie_param);
HI_S32 NniePrintReportResultInputSeg(NnieParam *pst_nnie_param, int segnum);
} // namespace nnie
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_NNIE_NNIE_PRINT_H_

View File

@ -0,0 +1,22 @@
cmake_minimum_required(VERSION 3.14)
project(NNIE_proposal)
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime/include)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../runtime/include/third_party)
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/src COMMON_SRC3)
add_library(mslite_proposal SHARED ${COMMON_SRC3})
target_link_libraries(mslite_proposal ${LINK_LOCAT_LIB})
if(DEFINED HIMIX_STRIP)
set(NDK_STRIP ${HIMIX_STRIP})
else()
set(NDK_STRIP "arm-himix200-linux-strip")
endif()
if("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
add_custom_command(TARGET mslite_proposal POST_BUILD COMMAND ${NDK_STRIP}
${CMAKE_CURRENT_BINARY_DIR}/libmslite_proposal.so)
endif()

View File

@ -0,0 +1,650 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/proposal.h"
#include <cmath>
#include <cstring>
#include <memory>
#include "include/errorcode.h"
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
constexpr int kNumInput2 = 2;
constexpr int kNCHWDims = 4;
namespace mindspore {
namespace proposal {
uint32_t RpnTmpBufSize(uint32_t num_ratio_anchors, uint32_t num_scale_anchors, uint32_t input_height,
uint32_t input_width) {
uint32_t anchors_num = num_ratio_anchors * num_scale_anchors * input_height * input_width;
uint32_t anchors_size = sizeof(uint32_t) * COORDI_NUM * anchors_num;
uint32_t bbox_delta_size = anchors_size;
uint32_t proposal_size = sizeof(uint32_t) * PROPOSAL_WIDTH * anchors_num;
uint32_t ratio_anchors_size = sizeof(float) * num_ratio_anchors * COORDI_NUM;
uint32_t scale_anchors_size = sizeof(float) * num_ratio_anchors * num_scale_anchors * COORDI_NUM;
uint32_t score_size = sizeof(float) * anchors_num * 2;
uint32_t stack_size = sizeof(Stack) * anchors_num;
uint32_t total_size =
anchors_size + bbox_delta_size + proposal_size + ratio_anchors_size + scale_anchors_size + score_size + stack_size;
return total_size;
}
static float exp_coef[10][16] = {
{1.0f, 1.00024f, 1.00049f, 1.00073f, 1.00098f, 1.00122f, 1.00147f, 1.00171f, 1.00196f, 1.0022f, 1.00244f, 1.00269f,
1.00293f, 1.00318f, 1.00342f, 1.00367f},
{1.0f, 1.00391f, 1.00784f, 1.01179f, 1.01575f, 1.01972f, 1.02371f, 1.02772f, 1.03174f, 1.03578f, 1.03984f, 1.04391f,
1.04799f, 1.05209f, 1.05621f, 1.06034f},
{1.0f, 1.06449f, 1.13315f, 1.20623f, 1.28403f, 1.36684f, 1.45499f, 1.54883f, 1.64872f, 1.75505f, 1.86825f, 1.98874f,
2.117f, 2.25353f, 2.39888f, 2.55359f},
{1.0f, 2.71828f, 7.38906f, 20.0855f, 54.5981f, 148.413f, 403.429f, 1096.63f, 2980.96f, 8103.08f, 22026.5f, 59874.1f,
162755.0f, 442413.0f, 1.2026e+006f, 3.26902e+006f},
{1.0f, 8.88611e+006f, 7.8963e+013f, 7.01674e+020f, 6.23515e+027f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f,
5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f, 5.54062e+034f,
5.54062e+034f},
{1.0f, 0.999756f, 0.999512f, 0.999268f, 0.999024f, 0.99878f, 0.998536f, 0.998292f, 0.998049f, 0.997805f, 0.997562f,
0.997318f, 0.997075f, 0.996831f, 0.996588f, 0.996345f},
{1.0f, 0.996101f, 0.992218f, 0.98835f, 0.984496f, 0.980658f, 0.976835f, 0.973027f, 0.969233f, 0.965455f, 0.961691f,
0.957941f, 0.954207f, 0.950487f, 0.946781f, 0.94309f},
{1.0f, 0.939413f, 0.882497f, 0.829029f, 0.778801f, 0.731616f, 0.687289f, 0.645649f, 0.606531f, 0.569783f, 0.535261f,
0.502832f, 0.472367f, 0.443747f, 0.416862f, 0.391606f},
{1.0f, 0.367879f, 0.135335f, 0.0497871f, 0.0183156f, 0.00673795f, 0.00247875f, 0.000911882f, 0.000335463f,
0.00012341f, 4.53999e-005f, 1.67017e-005f, 6.14421e-006f, 2.26033e-006f, 8.31529e-007f, 3.05902e-007f},
{1.0f, 1.12535e-007f, 1.26642e-014f, 1.42516e-021f, 1.60381e-028f, 1.80485e-035f, 2.03048e-042f, 0.0f, 0.0f, 0.0f,
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}};
static float QuickExp(int32_t value) {
if (value & 0x80000000) {
value = ~value + 0x00000001;
return exp_coef[5][value & 0x0000000F] * exp_coef[6][(value >> 4) & 0x0000000F] *
exp_coef[7][(value >> 8) & 0x0000000F] * exp_coef[8][(value >> 12) & 0x0000000F] *
exp_coef[9][(value >> 16) & 0x0000000F];
} else {
return exp_coef[0][value & 0x0000000F] * exp_coef[1][(value >> 4) & 0x0000000F] *
exp_coef[2][(value >> 8) & 0x0000000F] * exp_coef[3][(value >> 12) & 0x0000000F] *
exp_coef[4][(value >> 16) & 0x0000000F];
}
}
static int32_t SoftMax(float *src, uint32_t num) {
float max = 0;
float sum = 0;
uint32_t i = 0;
for (i = 0; i < num; ++i) {
if (max < src[i]) {
max = src[i];
}
}
for (i = 0; i < num; ++i) {
src[i] = QuickExp(static_cast<int32_t>((src[i] - max) * QUANT_BASE));
sum += src[i];
}
for (i = 0; i < num; ++i) {
src[i] /= sum;
}
return RET_OK;
}
static void Argswap(int32_t *src1, int32_t *src2) {
for (uint32_t i = 0; i < PROPOSAL_WIDTH; i++) {
int32_t tmp = src1[i];
src1[i] = src2[i];
src2[i] = tmp;
}
}
static int32_t NonRecursiveArgQuickSort(int32_t *array, int32_t low, int32_t high, Stack *stack, int32_t max_num) {
int32_t top = 0;
stack[top].min_ = low;
stack[top].max_ = high;
while (top > -1) {
low = stack[top].min_;
high = stack[top].max_;
int32_t i = low;
int32_t j = high;
int32_t key_confidence = array[PROPOSAL_WIDTH * low + 4];
top--;
while (i < j) {
while ((i < j) && (key_confidence > array[j * PROPOSAL_WIDTH + 4])) {
j--;
}
if (i < j) {
Argswap(&array[i * PROPOSAL_WIDTH], &array[j * PROPOSAL_WIDTH]);
i++;
}
while ((i < j) && (key_confidence < array[i * PROPOSAL_WIDTH + 4])) {
i++;
}
if (i < j) {
Argswap(&array[i * PROPOSAL_WIDTH], &array[j * PROPOSAL_WIDTH]);
j--;
}
}
if (low <= max_num) {
if (low < i - 1) {
top++;
stack[top].min_ = low;
stack[top].max_ = i - 1;
}
if (high > i + 1) {
top++;
stack[top].min_ = i + 1;
stack[top].max_ = high;
}
}
}
return RET_OK;
}
static int32_t FilterLowScoreBbox(int32_t *proposals, uint32_t anchors_num, uint32_t filter_thresh,
uint32_t *num_after_filter) {
uint32_t proposal_cnt = anchors_num;
if (filter_thresh > 0) {
uint32_t i;
for (i = 0; i < anchors_num; i++) {
if (proposals[PROPOSAL_WIDTH * i + 4] < static_cast<int32_t>(filter_thresh)) {
proposals[PROPOSAL_WIDTH * i + 5] = 1;
}
}
proposal_cnt = 0;
for (i = 0; i < anchors_num; i++) {
if (proposals[PROPOSAL_WIDTH * i + 5] == 0) {
proposals[PROPOSAL_WIDTH * proposal_cnt] = proposals[PROPOSAL_WIDTH * i];
proposals[PROPOSAL_WIDTH * proposal_cnt + 1] = proposals[PROPOSAL_WIDTH * i + 1];
proposals[PROPOSAL_WIDTH * proposal_cnt + 2] = proposals[PROPOSAL_WIDTH * i + 2];
proposals[PROPOSAL_WIDTH * proposal_cnt + 3] = proposals[PROPOSAL_WIDTH * i + 3];
proposals[PROPOSAL_WIDTH * proposal_cnt + 4] = proposals[PROPOSAL_WIDTH * i + 4];
proposals[PROPOSAL_WIDTH * proposal_cnt + 5] = proposals[PROPOSAL_WIDTH * i + 5];
proposal_cnt++;
}
}
}
*num_after_filter = proposal_cnt;
return RET_OK;
}
static int32_t SVP_NNIE_Overlap(int32_t x_min1, int32_t y_min1, int32_t x_max1, int32_t y_max1, int32_t x_min2,
int32_t y_min2, int32_t x_max2, int32_t y_max2, int32_t *area_sum,
int32_t *area_inter) {
/*** Check the input, and change the Return value ***/
int32_t inter = 0;
int32_t total = 0;
int32_t x_min = 0;
int32_t y_min = 0;
int32_t x_max = 0;
int32_t y_max = 0;
int32_t area1 = 0;
int32_t area2 = 0;
int32_t inter_width = 0;
int32_t inter_height = 0;
x_min = MAX(x_min1, x_min2);
y_min = MAX(y_min1, y_min2);
x_max = MIN(x_max1, x_max2);
y_max = MIN(y_max1, y_max2);
inter_width = x_max - x_min + 1;
inter_height = y_max - y_min + 1;
inter_width = (inter_width >= 0) ? inter_width : 0;
inter_height = (inter_height >= 0) ? inter_height : 0;
inter = inter_width * inter_height;
area1 = (x_max1 - x_min1 + 1) * (y_max1 - y_min1 + 1);
area2 = (x_max2 - x_min2 + 1) * (y_max2 - y_min2 + 1);
total = area1 + area2 - inter;
*area_sum = total;
*area_inter = inter;
return RET_OK;
}
static int32_t SVP_NNIE_NonMaxSuppression(int32_t *proposals, uint32_t anchors_num, uint32_t nms_thresh,
uint32_t max_roi_num) {
/****** define variables *******/
int32_t x_min1;
int32_t y_min1;
int32_t x_max1;
int32_t y_max1;
int32_t x_min2;
int32_t y_min2;
int32_t x_max2;
int32_t y_max2;
int32_t s32AreaTotal = 0;
int32_t area_inter = 0;
uint32_t i;
uint32_t j;
uint32_t num = 0;
bool bNoOverlap;
for (i = 0; i < anchors_num && num < max_roi_num; i++) {
if (proposals[PROPOSAL_WIDTH * i + 5] == 0) {
num++;
x_min1 = proposals[PROPOSAL_WIDTH * i];
y_min1 = proposals[PROPOSAL_WIDTH * i + 1];
x_max1 = proposals[PROPOSAL_WIDTH * i + 2];
y_max1 = proposals[PROPOSAL_WIDTH * i + 3];
for (j = i + 1; j < anchors_num; j++) {
if (proposals[PROPOSAL_WIDTH * j + 5] == 0) {
x_min2 = proposals[PROPOSAL_WIDTH * j];
y_min2 = proposals[PROPOSAL_WIDTH * j + 1];
x_max2 = proposals[PROPOSAL_WIDTH * j + 2];
y_max2 = proposals[PROPOSAL_WIDTH * j + 3];
bNoOverlap = (x_min2 > x_max1) || (x_max2 < x_min1) || (y_min2 > y_max1) || (y_max2 < y_min1);
if (bNoOverlap) {
continue;
}
(void)SVP_NNIE_Overlap(x_min1, y_min1, x_max1, y_max1, x_min2, y_min2, x_max2, y_max2, &s32AreaTotal,
&area_inter);
if (area_inter * QUANT_BASE > static_cast<int32_t>(nms_thresh * s32AreaTotal)) {
if (proposals[PROPOSAL_WIDTH * i + 4] >= proposals[PROPOSAL_WIDTH * j + 4]) {
proposals[PROPOSAL_WIDTH * j + 5] = 1;
} else {
proposals[PROPOSAL_WIDTH * i + 5] = 1;
}
}
}
}
}
}
return RET_OK;
}
static void Rpn(float **inputs, uint32_t num_ratio_anchors, uint32_t num_scale_anchors, uint32_t *scales,
uint32_t *ratios, uint32_t ori_image_height, uint32_t ori_image_width, uint32_t *inputs_height,
uint32_t *inputs_width, uint32_t *inputs_channel, uint32_t inputs_stride, uint32_t max_rois,
uint32_t min_size, uint32_t spatial_scale, uint32_t nms_thresh, uint32_t filter_thresh,
uint32_t num_before_nms, char *pu32MemPool, float *proposal_result, uint32_t dst_stride,
uint32_t *num_rois) {
#if 1
/******************** define parameters ****************/
uint32_t size;
int32_t *anchors = nullptr;
int32_t *bbox_delta = nullptr;
int32_t *proposals = nullptr;
int32_t *ptr1 = nullptr;
int32_t *ptr2 = nullptr;
int32_t *ptr3 = nullptr;
uint32_t num_after_filter = 0;
uint32_t num_anchors;
float base_w;
float base_h;
float base_x_ctr;
float base_y_ctr;
float *ratio_anchors = nullptr;
float *f32_ptr = nullptr;
float *f32_ptr2 = nullptr;
float *scale_anchors = nullptr;
float *scores = nullptr;
float f32_size;
uint32_t pixel_interval;
uint32_t src_bbox_index;
uint32_t src_fg_prob_index;
uint32_t src_bg_prob_index;
uint32_t src_bbox_bias;
uint32_t src_prob_bias;
uint32_t des_box;
uint32_t bg_blob_size;
uint32_t anchors_per_pixel;
uint32_t map_size;
uint32_t line_size;
int32_t proposal_width;
int32_t proposal_height;
uint32_t roi_count;
Stack *stack = nullptr;
uint32_t c;
uint32_t h;
uint32_t w;
uint32_t i;
uint32_t j;
uint32_t p;
uint32_t q;
uint32_t z;
uint32_t base_anchor[4] = {0, 0, (min_size - 1), (min_size - 1)};
/*********************************** Faster RCNN *********************************************/
/********* calculate the start pointer of each part in MemPool *********/
anchors = reinterpret_cast<int32_t *>(pu32MemPool);
num_anchors = num_ratio_anchors * num_scale_anchors * (inputs_height[0] * inputs_width[0]);
size = COORDI_NUM * num_anchors;
pu32MemPool += size * sizeof(int32_t);
bbox_delta = reinterpret_cast<int32_t *>(pu32MemPool);
pu32MemPool += size * sizeof(int32_t);
proposals = reinterpret_cast<int32_t *>(pu32MemPool);
size = PROPOSAL_WIDTH * num_anchors;
pu32MemPool += size * sizeof(int32_t);
ratio_anchors = reinterpret_cast<float *>(static_cast<void *>(pu32MemPool));
f32_ptr = reinterpret_cast<float *>(static_cast<void *>(pu32MemPool));
size = num_ratio_anchors * COORDI_NUM;
f32_ptr = f32_ptr + size;
scale_anchors = f32_ptr;
size = num_scale_anchors * num_ratio_anchors * COORDI_NUM;
f32_ptr = f32_ptr + size;
scores = f32_ptr;
size = num_anchors * SCORE_NUM;
f32_ptr = f32_ptr + size;
stack = reinterpret_cast<Stack *>(f32_ptr);
/********************* Generate the base anchor ***********************/
base_w = static_cast<float>(base_anchor[2] - base_anchor[0] + 1);
base_h = static_cast<float>(base_anchor[3] - base_anchor[1] + 1);
base_x_ctr = static_cast<float>(base_anchor[0] + ((base_w - 1) * 0.5));
base_y_ctr = static_cast<float>(base_anchor[1] + ((base_h - 1) * 0.5));
/*************** Generate Ratio Anchors for the base anchor ***********/
f32_ptr = ratio_anchors;
f32_size = base_w * base_h;
for (i = 0; i < num_ratio_anchors; i++) {
float f32_ratios = static_cast<float>(ratios[i]) / QUANT_BASE;
base_w = sqrt(f32_size / f32_ratios);
base_w = static_cast<float>(
1.0 * ((base_w) >= 0 ? static_cast<int32_t>(base_w + HALF_VAL) : static_cast<int32_t>(base_w - HALF_VAL)));
base_h = base_w * f32_ratios;
base_h = static_cast<float>(
1.0 * ((base_h) >= 0 ? static_cast<int32_t>(base_h + HALF_VAL) : static_cast<int32_t>(base_h - HALF_VAL)));
*f32_ptr++ = static_cast<float>(base_x_ctr - ((base_w - 1) * HALF_VAL));
*(f32_ptr++) = static_cast<float>(base_y_ctr - ((base_h - 1) * HALF_VAL));
*(f32_ptr++) = static_cast<float>(base_x_ctr + ((base_w - 1) * HALF_VAL));
*(f32_ptr++) = static_cast<float>(base_y_ctr + ((base_h - 1) * HALF_VAL));
}
/********* Generate Scale Anchors for each Ratio Anchor **********/
f32_ptr = ratio_anchors;
f32_ptr2 = scale_anchors;
/* Generate Scale Anchors for one pixel */
for (i = 0; i < num_ratio_anchors; i++) {
for (j = 0; j < num_scale_anchors; j++) {
base_w = *(f32_ptr + 2) - *(f32_ptr) + 1;
base_h = *(f32_ptr + 3) - *(f32_ptr + 1) + 1;
base_x_ctr = static_cast<float>(*(f32_ptr) + ((base_w - 1) * HALF_VAL));
base_y_ctr = static_cast<float>(*(f32_ptr + 1) + ((base_h - 1) * HALF_VAL));
*(f32_ptr2++) =
static_cast<float>(base_x_ctr - ((base_w * (static_cast<float>(scales[j]) / QUANT_BASE) - 1) * HALF_VAL));
*(f32_ptr2++) =
static_cast<float>(base_y_ctr - ((base_h * (static_cast<float>(scales[j]) / QUANT_BASE) - 1) * HALF_VAL));
*(f32_ptr2++) =
static_cast<float>(base_x_ctr + ((base_w * (static_cast<float>(scales[j]) / QUANT_BASE) - 1) * HALF_VAL));
*(f32_ptr2++) =
static_cast<float>(base_y_ctr + ((base_h * (static_cast<float>(scales[j]) / QUANT_BASE) - 1) * HALF_VAL));
}
f32_ptr += COORDI_NUM;
}
/******************* Copy the anchors to every pixel in the feature map ******************/
ptr1 = anchors;
pixel_interval = QUANT_BASE / spatial_scale;
for (p = 0; p < inputs_height[0]; p++) {
for (q = 0; q < inputs_width[0]; q++) {
f32_ptr2 = scale_anchors;
for (z = 0; z < num_scale_anchors * num_ratio_anchors; z++) {
*(ptr1++) = static_cast<int32_t>(q * pixel_interval + *(f32_ptr2++));
*(ptr1++) = static_cast<int32_t>(p * pixel_interval + *(f32_ptr2++));
*(ptr1++) = static_cast<int32_t>(q * pixel_interval + *(f32_ptr2++));
*(ptr1++) = static_cast<int32_t>(p * pixel_interval + *(f32_ptr2++));
}
}
}
/********** do transpose, convert the blob from (M,C,H,W) to (M,H,W,C) **********/
map_size = inputs_height[1] * inputs_stride / sizeof(uint32_t);
anchors_per_pixel = num_ratio_anchors * num_scale_anchors;
bg_blob_size = anchors_per_pixel * map_size;
line_size = inputs_stride / sizeof(uint32_t);
src_prob_bias = 0;
src_bbox_bias = 0;
for (c = 0; c < inputs_channel[1]; c++) {
for (h = 0; h < inputs_height[1]; h++) {
for (w = 0; w < inputs_width[1]; w++) {
src_bbox_index = src_bbox_bias + c * map_size + h * line_size + w;
src_bg_prob_index = src_prob_bias + (c / COORDI_NUM) * map_size + h * line_size + w;
src_fg_prob_index = bg_blob_size + src_bg_prob_index;
des_box = (anchors_per_pixel) * (h * inputs_width[1] + w) + c / COORDI_NUM;
uint32_t des_bbox_delta_index = COORDI_NUM * des_box + c % COORDI_NUM;
bbox_delta[des_bbox_delta_index] = static_cast<int32_t>(inputs[1][src_bbox_index] * QUANT_BASE);
uint32_t des_score_index = (SCORE_NUM)*des_box;
scores[des_score_index] = inputs[0][src_bg_prob_index];
scores[des_score_index + 1] = inputs[0][src_fg_prob_index];
}
}
}
/************************* do softmax ****************************/
f32_ptr = scores;
for (i = 0; i < num_anchors; i++) {
SoftMax(f32_ptr, SCORE_NUM);
f32_ptr += SCORE_NUM;
}
/************************* BBox Transform *****************************/
for (i = 0; i < num_anchors; i++) {
ptr1 = anchors;
ptr1 = ptr1 + COORDI_NUM * i;
ptr2 = proposals;
ptr2 = ptr2 + PROPOSAL_WIDTH * i;
ptr3 = bbox_delta;
ptr3 = ptr3 + COORDI_NUM * i;
f32_ptr = scores;
f32_ptr = f32_ptr + i * (SCORE_NUM);
proposal_width = *(ptr1 + 2) - *(ptr1) + 1;
proposal_height = *(ptr1 + 3) - *(ptr1 + 1) + 1;
int32_t proposal_center_x = *(ptr1) + static_cast<int32_t>(proposal_width * HALF_VAL);
int32_t proposal_center_y = *(ptr1 + 1) + static_cast<int32_t>(proposal_height * HALF_VAL);
int32_t pred_center_x =
static_cast<int32_t>((static_cast<float>(*(ptr3)) / QUANT_BASE) * proposal_width + proposal_center_x);
int32_t pred_center_y =
static_cast<int32_t>((static_cast<float>(*(ptr3 + 1)) / QUANT_BASE) * proposal_height + proposal_center_y);
int32_t pred_w = static_cast<int32_t>(proposal_width * QuickExp(static_cast<int32_t>(*(ptr3 + 2))));
int32_t pred_h = static_cast<int32_t>(proposal_height * QuickExp(static_cast<int32_t>(*(ptr3 + 3))));
*(ptr2) = static_cast<int32_t>(pred_center_x - HALF_VAL * pred_w);
*(ptr2 + 1) = static_cast<int32_t>(pred_center_y - HALF_VAL * pred_h);
*(ptr2 + 2) = static_cast<int32_t>(pred_center_x + HALF_VAL * pred_w);
*(ptr2 + 3) = static_cast<int32_t>(pred_center_y + HALF_VAL * pred_h);
*(ptr2 + 4) = static_cast<int32_t>(*(f32_ptr + 1) * QUANT_BASE);
*(ptr2 + 5) = 0;
}
/************************ clip bbox *****************************/
for (i = 0; i < num_anchors; i++) {
ptr1 = proposals;
ptr1 = ptr1 + PROPOSAL_WIDTH * i;
*ptr1 = MAX(MIN(*ptr1, static_cast<int32_t>(ori_image_width) - 1), 0);
*(ptr1 + 1) = MAX(MIN(*(ptr1 + 1), static_cast<int32_t>(ori_image_height) - 1), 0);
*(ptr1 + 2) = MAX(MIN(*(ptr1 + 2), static_cast<int32_t>(ori_image_width) - 1), 0);
*(ptr1 + 3) = MAX(MIN(*(ptr1 + 3), static_cast<int32_t>(ori_image_height) - 1), 0);
}
/************ remove the bboxes which are too small *************/
for (i = 0; i < num_anchors; i++) {
ptr1 = proposals;
ptr1 = ptr1 + PROPOSAL_WIDTH * i;
proposal_width = *(ptr1 + 2) - *(ptr1) + 1;
proposal_height = *(ptr1 + 3) - *(ptr1 + 1) + 1;
if (proposal_width < static_cast<int32_t>(min_size) || proposal_height < static_cast<int32_t>(min_size)) {
*(ptr1 + 5) = 1;
}
}
/********** remove low score bboxes ************/
(void)FilterLowScoreBbox(proposals, num_anchors, filter_thresh, &num_after_filter);
/********** sort ***********/
(void)NonRecursiveArgQuickSort(proposals, 0, num_after_filter - 1, stack, static_cast<int32_t>(num_before_nms));
num_after_filter = (num_after_filter < num_before_nms) ? num_after_filter : num_before_nms;
/* do nms to remove highly overlapped bbox */
(void)SVP_NNIE_NonMaxSuppression(proposals, num_after_filter, nms_thresh, max_rois); /* function NMS */
/************** write the final result to output ***************/
roi_count = 0;
for (i = 0; i < num_after_filter; i++) {
ptr1 = proposals;
ptr1 = ptr1 + PROPOSAL_WIDTH * i;
if (*(ptr1 + 5) == 0) {
proposal_result[dst_stride / sizeof(uint32_t) * roi_count] = *ptr1;
proposal_result[dst_stride / sizeof(uint32_t) * roi_count + 1] = *(ptr1 + 1);
proposal_result[dst_stride / sizeof(uint32_t) * roi_count + 2] = *(ptr1 + 2);
proposal_result[dst_stride / sizeof(uint32_t) * roi_count + 3] = *(ptr1 + 3);
roi_count++;
}
if (roi_count >= max_rois) {
break;
}
}
*num_rois = roi_count;
#endif
}
int32_t ProposalInit(ProposalParam *param, const std::vector<mindspore::MSTensor> &inputs, uint32_t max_roi_num,
uint32_t ori_image_height, uint32_t ori_image_width) {
uint32_t tmp_buf_size = 0;
uint32_t bbox_buf_size = 0;
uint32_t total_size = 0;
param->max_roi_num_ = max_roi_num;
param->num_ratio_anchors_ = 1;
param->num_scale_anchors_ = NUM_SCALE_ANCHORS;
param->scales_[0] = 1.5 * QUANT_BASE;
param->scales_[1] = 2.1 * QUANT_BASE;
param->scales_[2] = 2.9 * QUANT_BASE;
param->scales_[3] = 4.1 * QUANT_BASE;
param->scales_[4] = 5.8 * QUANT_BASE;
param->scales_[5] = 8.0 * QUANT_BASE;
param->scales_[6] = 11.3 * QUANT_BASE;
param->scales_[7] = 15.8 * QUANT_BASE;
param->scales_[8] = 22.1 * QUANT_BASE;
param->ratios_[0] = 2.44 * QUANT_BASE;
param->ori_image_height_ = ori_image_height;
param->ori_image_width_ = ori_image_width;
param->min_size_ = MIN_SIZE;
param->spatial_scale_ = (uint32_t)(0.0625 * QUANT_BASE);
param->nms_thresh_ = (uint32_t)(0.7 * QUANT_BASE);
param->filter_thresh_ = 0;
param->num_before_nms_ = NUM_NMS;
param->rpn_bounding_box_.chn_ = 1;
param->rpn_bounding_box_.height_ = max_roi_num;
param->rpn_bounding_box_.width_ = COORDI_NUM;
param->rpn_bounding_box_.stride_ = COORDI_NUM * sizeof(float);
param->rpn_bounding_box_.num_ = 1;
if (inputs.size() < kNumInput2) {
LOGE("inputs tensor size error.");
return RET_ERROR;
}
for (int i = 0; i < kNumInput2; i++) {
auto input_data_type = inputs[i].DataType();
if (input_data_type == DataType::kNumberTypeFloat32) {
auto ptr_shape = inputs[i].Shape();
if ((ptr_shape.size() == kNCHWDims)) {
param->inputs_height_[i] = ptr_shape[2];
param->inputs_width_[i] = ptr_shape[3];
param->inputs_channel_[i] = ptr_shape[1];
if (i == 0) {
param->inputs_stride_ = ptr_shape[3] * sizeof(float);
}
}
}
}
tmp_buf_size = RpnTmpBufSize(param->num_ratio_anchors_, param->num_scale_anchors_, param->inputs_height_[0],
param->inputs_width_[0]);
bbox_buf_size = param->rpn_bounding_box_.num_ * param->rpn_bounding_box_.height_ * param->rpn_bounding_box_.stride_;
total_size = tmp_buf_size + bbox_buf_size;
if (param->rpn_tmp_buf_ != nullptr) {
free(param->rpn_tmp_buf_);
param->rpn_tmp_buf_ = nullptr;
}
param->rpn_tmp_buf_ = malloc(total_size);
if (param->rpn_tmp_buf_ == nullptr) {
LOGE("malloc buf fail.");
return RET_ERROR;
}
param->rpn_bounding_box_.data_ = reinterpret_cast<char *>(param->rpn_tmp_buf_) + tmp_buf_size;
return RET_OK;
}
int32_t ProposalRun(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
ProposalParam *param) {
if (inputs->size() < kNumInput2) {
LOGE("inputs tensor size error.");
return RET_ERROR;
}
if (outputs->size() != 1) {
LOGE("outputs tensor size error.");
return RET_ERROR;
}
for (int i = 0; i < kNumInput2; i++) {
auto input_data_type = inputs->at(i).DataType();
if (input_data_type == DataType::kNumberTypeFloat32) {
param->inputs_[i] = reinterpret_cast<float *>((*inputs)[i].MutableData());
}
}
auto output_data_type = (*outputs)[0].DataType();
if (output_data_type != DataType::kNumberTypeFloat32) {
LOGE("outputs tensor data type error.");
return RET_ERROR;
}
Rpn(param->inputs_, param->num_ratio_anchors_, param->num_scale_anchors_, param->scales_, param->ratios_,
param->ori_image_height_, param->ori_image_width_, param->inputs_height_, param->inputs_width_,
param->inputs_channel_, param->inputs_stride_, param->max_roi_num_, param->min_size_, param->spatial_scale_,
param->nms_thresh_, param->filter_thresh_, param->num_before_nms_, reinterpret_cast<char *>(param->rpn_tmp_buf_),
reinterpret_cast<float *>(param->rpn_bounding_box_.data_), param->rpn_bounding_box_.stride_,
&param->rpn_bounding_box_.height_);
std::vector<int64_t> shape{static_cast<int64_t>(param->rpn_bounding_box_.height_), COORDI_NUM};
(*outputs)[0].SetShape(shape);
auto output_data = (*outputs)[0].MutableData();
memcpy(output_data, param->rpn_bounding_box_.data_, param->rpn_bounding_box_.height_ * COORDI_NUM * sizeof(float));
return RET_OK;
}
void ProposalDeInit(ProposalParam *param) {
if (param->rpn_tmp_buf_ != 0) {
free(param->rpn_tmp_buf_);
param->rpn_tmp_buf_ = 0;
}
}
} // namespace proposal
} // namespace mindspore

View File

@ -0,0 +1,95 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_H_
#define MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_H_
#include <vector>
#include "include/api/types.h"
#define LOG_TAG1 "Proposal"
#define LOGE(format, ...) \
do { \
if (1) { \
fprintf(stderr, "\n[ERROR] " LOG_TAG1 " [" __FILE__ ":%d] %s] ", __LINE__, __FUNCTION__); \
fprintf(stderr, format, ##__VA_ARGS__); \
} \
} while (0)
#define LOGW(format, ...) \
do { \
if (1) { \
fprintf(stderr, "\n[Warning] " LOG_TAG1 " [" __FILE__ ":%d] %s] ", __LINE__, __FUNCTION__); \
fprintf(stderr, format, ##__VA_ARGS__); \
} \
} while (0)
namespace mindspore {
namespace proposal {
typedef struct {
uint32_t stride_;
void *data_;
uint32_t num_;
uint32_t width_;
uint32_t height_;
uint32_t chn_;
} RpnBoundingBox;
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
#define HALF_VAL 0.5f // the half value
#define COORDI_NUM 4 // coordinate numbers
#define PROPOSAL_WIDTH 6 // the number of proposal values
#define QUANT_BASE 4096 // the base value
#define SCORE_NUM 2 // the num of RPN scores
#define NUM_SCALE_ANCHORS 9
#define NUM_NMS 6000
#define MIN_SIZE 16
typedef struct {
uint32_t scales_[9];
uint32_t ratios_[9];
uint32_t inputs_height_[2];
uint32_t inputs_width_[2];
uint32_t inputs_channel_[2];
uint32_t inputs_stride_;
uint32_t num_ratio_anchors_;
uint32_t num_scale_anchors_;
uint32_t ori_image_height_;
uint32_t ori_image_width_;
uint32_t min_size_;
uint32_t spatial_scale_;
uint32_t nms_thresh_;
uint32_t filter_thresh_;
uint32_t max_roi_num_;
uint32_t num_before_nms_;
float *inputs_[2];
void *rpn_tmp_buf_;
RpnBoundingBox rpn_bounding_box_;
} ProposalParam;
typedef struct {
int32_t min_;
int32_t max_;
} Stack;
int32_t ProposalInit(ProposalParam *param, const std::vector<mindspore::MSTensor> &inputs, uint32_t max_roi_num,
uint32_t ori_image_height, uint32_t ori_image_width);
int32_t ProposalRun(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
ProposalParam *param);
void ProposalDeInit(ProposalParam *param);
} // namespace proposal
} // namespace mindspore
#endif // MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_H_

View File

@ -0,0 +1,200 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/proposal_fp32.h"
#include <memory>
#include <string>
#include "schema/model_generated.h"
#include "include/registry/register_kernel.h"
#include "include/errorcode.h"
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Custom;
constexpr int kMaxSize = 1024;
constexpr int kNumInput2 = 2;
constexpr int kDecimal = 10;
namespace mindspore {
namespace proposal {
int ProposalCPUKernel::Prepare() {
if (inputs_.size() < kNumInput2) {
LOGE("inputs tensor num error.");
return RET_ERROR;
}
if (outputs_.size() != 1) {
LOGE("outputs tensor num error.");
return RET_ERROR;
}
std::vector<std::string> inputs_name = {"rpn_cls_score", "rpn_bbox_pred"};
std::vector<mindspore::MSTensor> inputs;
for (size_t i = 0; i < inputs_name.size(); i++) {
bool find_flag = false;
for (auto &input : inputs_) {
if (input.Name() == inputs_name[i]) {
inputs.push_back(input);
find_flag = true;
break;
}
}
if (!find_flag) {
for (auto &input : inputs_) {
if (std::find(inputs.begin(), inputs.end(), input) != inputs.end()) {
continue;
}
inputs.push_back(input);
LOGW("input tensor name diff '%s' vs '%s'.", inputs_name[i].c_str(), input.Name().c_str());
break;
}
}
}
if (inputs.size() != inputs_name.size()) {
LOGE("inputs size error.");
return RET_ERROR;
}
this->set_inputs(inputs);
if (inputs[0].Shape()[0] != 1) {
LOGE("proposal only support input num == 1.");
return RET_ERROR;
}
outputs_[0].SetTensorName("proposal");
int max_roi_num_int = 300;
auto *max_roi_num = std::getenv("MAX_ROI_NUM");
if (max_roi_num != nullptr) {
auto iter =
std::find_if(max_roi_num, max_roi_num + strlen(max_roi_num), [](char val) { return val < '0' || val > '9'; });
if (iter != max_roi_num) {
*iter = '\0';
max_roi_num_int = atoi(max_roi_num);
} else {
LOGW("MAX_ROI_NUM ENV is invalid, now set to default value %d", max_roi_num_int);
}
} else {
LOGW("MAX_ROI_NUM ENV is not set, now set to default value %d", max_roi_num_int);
}
return ProposalInit(&proposal_param_, inputs_, max_roi_num_int, image_height_, image_weight_);
}
int ProposalCPUKernel::ReSize() {
if (inputs_[0].Shape()[0] != 1) {
LOGE("proposal only support input num == 1.");
return RET_ERROR;
}
return RET_OK;
}
int ProposalCPUKernel::Execute() { return ProposalRun(&inputs_, &outputs_, &proposal_param_); }
ProposalCPUKernel::~ProposalCPUKernel() { ProposalDeInit(&proposal_param_); }
bool GetCustomAttr(char *buf, int buf_size, const mindspore::schema::Custom *op, const std::string &attr) {
int attr_size;
for (size_t i = 0; i < op->attr()->size(); i++) {
if (op->attr()->Get(i)->name()->str() == attr) {
auto output_info = op->attr()->Get(i)->data();
attr_size = static_cast<int>(output_info->size());
if (attr_size >= buf_size) {
LOGE("attr size too big");
return false;
}
for (int j = 0; j < attr_size; j++) {
buf[j] = static_cast<char>(output_info->Get(j));
}
buf[attr_size] = 0;
return true;
}
}
return false;
}
std::shared_ptr<mindspore::kernel::Kernel> ProposalCreateKernel(const std::vector<mindspore::MSTensor> &inputs,
const std::vector<mindspore::MSTensor> &outputs,
const mindspore::schema::Primitive *primitive,
const mindspore::Context *ctx) {
if (primitive->value_type() != mindspore::schema::PrimitiveType_Custom) {
LOGE("Primitive type is not PrimitiveType_Custom");
return nullptr;
}
auto op = primitive->value_as_Custom();
if (op->attr()->size() < 1) {
LOGE("There are at least 1 attribute of Custom");
return nullptr;
}
int64_t ndims;
int64_t image_height;
int64_t image_width;
char *res = nullptr;
char buf[kMaxSize];
if (GetCustomAttr(buf, kMaxSize, op, "proposal_id")) {
res = nullptr;
ndims = strtol(buf, &res, kDecimal);
if ((*res) != 0) {
LOGE("Get attr id data fail");
return nullptr;
}
} else {
LOGE("Proposal Custom op should have id");
return nullptr;
}
if (GetCustomAttr(buf, kMaxSize, op, "image_height")) {
res = nullptr;
image_height = strtol(buf, &res, kDecimal);
if ((*res) != 0) {
LOGE("Get attr id data fail");
return nullptr;
}
} else {
LOGE("Proposal Custom op should have image_height");
return nullptr;
}
if (GetCustomAttr(buf, kMaxSize, op, "image_width")) {
res = nullptr;
image_width = strtol(buf, &res, kDecimal);
if ((*res) != 0) {
LOGE("Get attr id data fail");
return nullptr;
}
} else {
LOGE("Proposal Custom op should have image_width");
return nullptr;
}
auto kernel = std::make_shared<ProposalCPUKernel>(inputs, outputs, primitive, ctx, ndims, image_height, image_width);
// auto kernel = new (std::nothrow) ProposalCPUKernel(inputs, outputs, primitive, ctx, ndims, image_height,
// image_width);
if (kernel == nullptr) {
LOGE("new custom kernel is nullptr");
return nullptr;
}
return kernel;
}
} // namespace proposal
} // namespace mindspore
namespace mindspore {
namespace kernel {
namespace {
const auto kFloat32 = DataType::kNumberTypeFloat32;
}
REGISTER_CUSTOM_KERNEL(CPU, NNIE, kFloat32, Proposal, proposal::ProposalCreateKernel)
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,51 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_FP32_H_
#define MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_FP32_H_
#include <vector>
#include "schema/model_generated.h"
#include "include/context.h"
#include "include/api/kernel.h"
#include "src/proposal.h"
using mindspore::kernel::Kernel;
namespace mindspore {
namespace proposal {
class ProposalCPUKernel : public Kernel {
public:
ProposalCPUKernel(const std::vector<mindspore::MSTensor> &inputs, const std::vector<mindspore::MSTensor> &outputs,
const mindspore::schema::Primitive *primitive, const mindspore::Context *ctx, int id,
int image_height, int image_width)
: Kernel(inputs, outputs, primitive, ctx), id_(id), image_height_(image_height), image_weight_(image_width) {}
~ProposalCPUKernel() override;
int Prepare() override;
int ReSize() override;
int Execute() override;
private:
proposal::ProposalParam proposal_param_ = {0};
int64_t id_;
int64_t image_height_;
int64_t image_weight_;
};
} // namespace proposal
} // namespace mindspore
#endif // MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_FP32_H_

View File

@ -0,0 +1,77 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/proposal_infer.h"
#include <memory>
#include <vector>
#include "include/errorcode.h"
#include "src/proposal.h"
#include "include/api/format.h"
#include "include/registry/register_kernel_interface.h"
using mindspore::kernel::KernelInterface;
using mindspore::lite::RET_ERROR;
using mindspore::lite::RET_OK;
using mindspore::schema::PrimitiveType_Custom;
namespace mindspore {
namespace proposal {
std::shared_ptr<KernelInterface> ProposalInferCreater() {
auto infer = std::make_shared<ProposalInterface>();
if (infer == nullptr) {
LOGE("new custom infer is nullptr");
return nullptr;
}
return infer;
}
Status ProposalInterface::Infer(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
const mindspore::schema::Primitive *primitive) {
if (inputs->size() != 2) {
LOGE("Inputs size less 2");
return kLiteError;
}
if (outputs->size() == 0) {
LOGE("Outputs size 0");
return kLiteError;
}
if (primitive->value_type() != mindspore::schema::PrimitiveType_Custom) {
LOGE("Primitive type is not PrimitiveType_Custom");
return kLiteError;
}
size_t id = 0;
while (id < outputs->size()) {
// 待补完
// outputs[id]->format_ = input->format_;
// outputs[id]->data_type_ = kNumberTypeFloat32;
// 设置type为int
std::vector<int64_t> shape{-1, COORDI_NUM};
(*outputs)[id].SetShape(shape);
(*outputs)[id].SetDataType(DataType::kNumberTypeFloat32);
(*outputs)[id].SetFormat(Format::NCHW);
id++;
}
return kSuccess;
}
} // namespace proposal
} // namespace mindspore
namespace mindspore {
namespace kernel {
// static KernelInterfaceReg a(aa, schema::PrimitiveType_Custom, CustomInferCreater);
REGISTER_CUSTOM_KERNEL_INTERFACE(NNIE, Proposal, proposal::ProposalInferCreater);
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,35 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_INFER_H_
#define MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_INFER_H_
#include <vector>
#include "include/kernel_interface.h"
namespace mindspore {
namespace proposal {
class ProposalInterface : public mindspore::kernel::KernelInterface {
public:
ProposalInterface() {}
~ProposalInterface() = default;
Status Infer(std::vector<mindspore::MSTensor> *inputs, std::vector<mindspore::MSTensor> *outputs,
const mindspore::schema::Primitive *primitive) override;
};
} // namespace proposal
} // namespace mindspore
#endif // MINDSPORE_LITE_TOOLS_BENCHMARK_NNIE_PROPOSAL_PROPOSAL_INFER_H_

View File

@ -26,6 +26,12 @@ namespace lite {
int RunBenchmark(int argc, const char **argv) {
BenchmarkFlags flags;
Option<std::string> err = flags.ParseFlags(argc, argv);
#ifdef SUPPORT_NNIE
if (SvpSysInit() != RET_OK) {
std::cerr << "SVP Init failed" << std::endl;
return RET_ERROR;
}
#endif
if (err.IsSome()) {
std::cerr << err.Get() << std::endl;
std::cerr << flags.Usage() << std::endl;
@ -36,7 +42,9 @@ int RunBenchmark(int argc, const char **argv) {
std::cerr << flags.Usage() << std::endl;
return RET_OK;
}
#ifdef SUPPORT_NNIE
BenchmarkBase *benchmark = new (std::nothrow) Benchmark(&flags);
#else
auto api_type = std::getenv("MSLITE_API_TYPE");
if (api_type != nullptr) {
MS_LOG(INFO) << "MSLITE_API_TYPE = " << api_type;
@ -53,6 +61,7 @@ int RunBenchmark(int argc, const char **argv) {
BENCHMARK_LOG_ERROR("Invalid MSLITE_API_TYPE, (OLD/NEW/C, default:OLD)");
return RET_ERROR;
}
#endif
if (benchmark == nullptr) {
BENCHMARK_LOG_ERROR("new benchmark failed ");
return RET_ERROR;
@ -61,6 +70,7 @@ int RunBenchmark(int argc, const char **argv) {
auto status = benchmark->Init();
if (status != 0) {
BENCHMARK_LOG_ERROR("Benchmark init Error : " << status);
delete benchmark;
return RET_ERROR;
}
auto model_name = flags.model_file_.substr(flags.model_file_.find_last_of(DELIM_SLASH) + 1);
@ -68,6 +78,7 @@ int RunBenchmark(int argc, const char **argv) {
status = benchmark->RunBenchmark();
if (status != 0) {
BENCHMARK_LOG_ERROR("Run Benchmark " << model_name << " Failed : " << status);
delete benchmark;
return RET_ERROR;
}

View File

@ -53,58 +53,6 @@ function Run_Build_x86() {
fi
}
# Build arm32 for nnie
function Run_Build_arm() {
# decompress release_pkg
cd ${open_source_ms_path}/output/ || exit 1
file_name=$(ls ./*linux-${package_name}.tar.gz)
IFS="-" read -r -a file_name_array <<< "$file_name"
version=${file_name_array[2]}
tar -xf mindspore-lite-${version}-linux-${package_name}.tar.gz
# cp runtime folder
cd ${open_source_ms_path}/output/mindspore-lite-${version}-linux-${package_name} || exit 1
rm -rf ${nnie_code_path}/mindspore/mindspore/lite/tools/benchmark/nnie/third_patry/runtime/
mkdir -p ${nnie_code_path}/mindspore/mindspore/lite/tools/benchmark/nnie/third_patry/runtime/ || exit 1
rm -rf ${nnie_code_path}/mindspore/mindspore/lite/tools/benchmark/nnie_proposal/third_patry/runtime/
mkdir -p ${nnie_code_path}/mindspore/mindspore/lite/tools/benchmark/nnie_proposal/third_patry/runtime/ || exit 1
cp -r ./runtime/ ${nnie_code_path}/mindspore/mindspore/lite/tools/benchmark/nnie/third_patry/
cp -r ./runtime/ ${nnie_code_path}/mindspore/mindspore/lite/tools/benchmark/nnie_proposal/third_patry/
# compile nnie runtime so
export TOOLCHAIN_NAME=${toolchain_name}
export TOOLCHAIN_FILE=${open_source_ms_path}/mindspore/lite/cmake/${toolchain_name}.toolchain.cmake
export MSLITE_REGISTRY_DEVICE=${device_name}
# disable gpu & npu & train
export MSLITE_GPU_BACKEND=off
export MSLITE_ENABLE_NPU=off
export MSLITE_ENABLE_TRAIN=off
export MSLITE_ENABLE_NNIE=on
bash ${nnie_code_path}/mindspore/build.sh -I ${task} -e cpu -j ${thread_num}
if [ $? = 0 ]; then
echo "build arm for nnie success"
release_path=${open_source_ms_path}/output/mindspore-lite-${version}-linux-${package_name}/providers/${device_name}/
rm -rf ${release_path}
mkdir -p ${release_path}
mkdir -p ${open_source_ms_path}/output/mindspore-lite-${version}-linux-${package_name}/tools/benchmark/
cp ${nnie_code_path}/mindspore/mindspore/lite/build/tools/benchmark/benchmark ${open_source_ms_path}/output/mindspore-lite-${version}-linux-${package_name}/tools/benchmark/ || exit 1
cp ${nnie_code_path}/mindspore/mindspore/lite/build/tools/benchmark/nnie/libmslite_nnie.so ${release_path}/ || exit 1
cp ${nnie_code_path}/mindspore/mindspore/lite/build/tools/benchmark/nnie_proposal/libmslite_proposal.so ${release_path}/ || exit 1
if [ ${device_name} == "Hi3516D" ]; then
cp ${nnie_code_path}/mindspore/mindspore/lite/micro/example/hi3516d/libmicro_nnie.so ${release_path}/ || exit 1
fi
echo "cp new nnie so to release pkg success"
cd ${open_source_ms_path}/output/ || exit 1
rm ${open_source_ms_path}/output/mindspore-lite-${version}-linux-${package_name}.tar.gz
tar -zcf ./mindspore-lite-${version}-linux-${package_name}.tar.gz ./mindspore-lite-${version}-linux-${package_name}/ || exit 1
sha256sum ./mindspore-lite-${version}-linux-${package_name}.tar.gz > ./mindspore-lite-${version}-linux-${package_name}.tar.gz.sha256 || exit 1
else
echo "build arm for nnie failed"; return 1
fi
}
# bashpath should be /home/jenkins/agent-working-dir/workspace/Compile_Lite_ARM32_3516D/
basepath=$(pwd)
echo "basepath is ${basepath}"
@ -123,12 +71,8 @@ while getopts "I:b:j:t:d:" opt; do
echo "branch name is ${OPTARG}"
;;
t)
toolchain_name=${OPTARG}
echo "toolchain_name is ${OPTARG}"
;;
d)
device_name=${OPTARG}
echo "device_name is ${OPTARG}"
;;
j)
thread_num=${OPTARG}
@ -163,14 +107,6 @@ fi
if [ ${task} == "x86_64" ]; then
echo "start building x86 for nnie..."
Run_Build_x86
elif [ ${task} == "arm32" ]; then
echo "start building arm32 for nnie..."
package_name=aarch32
Run_Build_arm
elif [ ${task} == "arm64" ]; then
echo "start building arm64 for nnie..."
package_name=aarch64
Run_Build_arm
fi
Run_build_PID=$!

View File

@ -1,81 +0,0 @@
#!/bin/bash
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
prepare_third_party() {
dpico_third_party=${mindspore_lite_top_dir}/tools/benchmark/dpico/third_party
rm -rf ${dpico_third_party} || exit 1
mkdir -p ${dpico_third_party} || exit 1
cd ${mindspore_top_dir}/output || exit 1
file_name=$(ls *tar.gz)
tar_name=${file_name%%.tar.gz}
tar xzvf ${tar_name}.tar.gz || exit 1
cd ..
cp -rf ${mindspore_top_dir}/output/${tar_name}/runtime/ ${dpico_third_party} || exit 1
}
# Build arm64 for dpico
make_dpico_benchmark_package() {
cd ${mindspore_top_dir}/output || exit 1
file_name=$(ls *tar.gz)
tar_name=${file_name%%.tar.gz}
dpico_sd3403_release_path=${mindspore_top_dir}/output/${tar_name}/providers/SD3403/
mkdir -p ${dpico_sd3403_release_path}
dpico_benchmark_path=${mindspore_top_dir}/mindspore/lite/build/tools/benchmark
cp ${dpico_benchmark_path}/dpico/libdpico_acl_adapter.so ${dpico_sd3403_release_path} || exit 1
echo "install dpico adapter so success."
rm ${tar_name}.tar.gz || exit 1
tar -zcf ${tar_name}.tar.gz ${tar_name} || exit 1
rm -rf ${tar_name} || exit 1
sha256sum ${tar_name}.tar.gz > ${tar_name}.tar.gz.sha256 || exit 1
echo "generate dpico package success!"
cd ${basepath}
rm -rf ${dpico_third_party} || exit 1
}
basepath=$(pwd)
echo "basepath is ${basepath}"
#set -e
mindspore_top_dir=${basepath}
mindspore_lite_top_dir=${mindspore_top_dir}/mindspore/lite
while getopts "t:" opt; do
case ${opt} in
t)
task=${OPTARG}
echo "compile task is ${OPTARG}"
;;
?)
echo "unknown para"
exit 1;;
esac
done
if [[ ${task} == "prepare_third_party" ]]; then
prepare_third_party
if [ $? -eq 1 ]; then
echo "prepare third party failed"
return 1
fi
else
echo "start make package for dpico..."
make_dpico_benchmark_package &
make_dpico_benchmark_package_pid=$!
sleep 1
wait ${make_dpico_benchmark_package_pid}
make_dpico_benchmark_package_status=$?
exit ${make_dpico_benchmark_package_status}
fi