diff --git a/build.sh b/build.sh index ac398082687..200cff10295 100755 --- a/build.sh +++ b/build.sh @@ -568,6 +568,24 @@ build_lite() cd ${BASEPATH}/mindspore/lite/build write_commit_file + if [[ "${local_lite_platform}" == "arm32" ]]; then + if [[ "${TOOLCHAIN_FILE}" && "${TOOLCHAIN_NAME}" ]]; then + RUN_TESTCASES="off" + COMPILE_MINDDATA_LITE="off" + CMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} + CMAKE_TOOLCHAIN_NAME=${TOOLCHAIN_NAME} + else + CMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake + ANDROID_NATIVE_API_LEVEL="19" + CMAKE_ANDROID_NDK=${ANDROID_NDK} + CMAKE_ANDROID_ABI="armeabi-v7a" + CMAKE_ANDROID_TOOLCHAIN_NAME="clang" + CMAKE_ANDROID_STL=${MSLITE_ANDROID_STL} + CMAKE_BUILD_TYPE=${LITE_BUILD_TYPE} + ENABLE_FP16="on" + fi + fi + if [[ "${local_lite_platform}" == "arm64" ]]; then if [ "$(uname)" == "Darwin" ]; then cmake -DCMAKE_TOOLCHAIN_FILE=${BASEPATH}/cmake/lite_ios.cmake -DARCHS="arm64" -DENABLE_BITCODE=0 \ @@ -593,10 +611,10 @@ build_lite() else checkndk echo "default link libc++_static.a, export MSLITE_ANDROID_STL=c++_shared to link libc++_shared.so" - cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" -DANDROID_NATIVE_API_LEVEL="19" \ - -DANDROID_NDK="${ANDROID_NDK}" -DANDROID_ABI="armeabi-v7a" -DANDROID_TOOLCHAIN_NAME="clang" \ - -DANDROID_STL=${MSLITE_ANDROID_STL} -DCMAKE_BUILD_TYPE=${LITE_BUILD_TYPE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \ - -DPLATFORM_ARM32="on" -DENABLE_NEON="on" -DENABLE_FP16="on" -DCMAKE_INSTALL_PREFIX=${BASEPATH}/output/tmp \ + cmake -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DTOOLCHAIN_NAME=${CMAKE_TOOLCHAIN_NAME} -DANDROID_NATIVE_API_LEVEL=${ANDROID_NATIVE_API_LEVEL} \ + -DANDROID_NDK=${CMAKE_ANDROID_NDK} -DANDROID_ABI=${CMAKE_ANDROID_ABI} -DANDROID_TOOLCHAIN_NAME=${CMAKE_ANDROID_TOOLCHAIN_NAME} \ + -DANDROID_STL=${CMAKE_ANDROID_STL} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \ + -DPLATFORM_ARM32="on" -DENABLE_NEON="on" -DENABLE_FP16=${ENABLE_FP16} -DCMAKE_INSTALL_PREFIX=${BASEPATH}/output/tmp \ -DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} \ -DENABLE_ASAN=${ENABLE_ASAN} -DENABLE_VERBOSE=${ENABLE_VERBOSE} "${BASEPATH}/mindspore/lite" fi diff --git a/cmake/external_libs/flatbuffers.cmake b/cmake/external_libs/flatbuffers.cmake index 78288263c1d..8ec9d6730f6 100644 --- a/cmake/external_libs/flatbuffers.cmake +++ b/cmake/external_libs/flatbuffers.cmake @@ -18,7 +18,8 @@ mindspore_add_pkg(flatbuffers EXE flatc URL ${REQ_URL} MD5 ${MD5} - CMAKE_OPTION -DFLATBUFFERS_BUILD_TESTS=OFF -DCMAKE_INSTALL_LIBDIR=lib) + CMAKE_OPTION -DCMAKE_C_COMPILER=${FLATC_GCC_COMPILER} -DCMAKE_CXX_COMPILER=${FLATC_GXX_COMPILER} + -DFLATBUFFERS_BUILD_TESTS=OFF -DCMAKE_INSTALL_LIBDIR=lib) include_directories(${flatbuffers_INC}) add_library(mindspore::flatbuffers ALIAS flatbuffers::flatbuffers) diff --git a/cmake/package_lite.cmake b/cmake/package_lite.cmake index 43ae08f1b63..a6e513ef315 100644 --- a/cmake/package_lite.cmake +++ b/cmake/package_lite.cmake @@ -15,6 +15,7 @@ set(MIND_DATA_INC_DIR ${RUNTIME_PKG_NAME}/inference/include/dataset) set(TURBO_DIR ${RUNTIME_PKG_NAME}/inference/third_party/libjpeg-turbo) set(SECUREC_DIR ${RUNTIME_PKG_NAME}/inference/third_party/securec) set(MINDSPORE_LITE_LIB_NAME libmindspore-lite) +set(MINDSPORE_CORE_LIB_NAME libmindspore_core) set(BENCHMARK_NAME benchmark) set(BENCHMARK_ROOT_DIR ${RUNTIME_PKG_NAME}/tools/benchmark) @@ -253,6 +254,10 @@ elseif(PLATFORM_ARM32) ${RUNTIME_COMPONENT_NAME}) endif() endif() + if(MSLITE_ENABLE_NNIE AND TARGET_HIMIX200) + install(FILES ${TOP_DIR}/mindspore/lite/tools/providers/NNIE/3516D/libmslite_nnie.so + DESTINATION ${RUNTIME_PKG_NAME}/providers/3516D COMPONENT ${RUNTIME_COMPONENT_NAME}) + endif() elseif(WIN32) get_filename_component(CXX_DIR ${CMAKE_CXX_COMPILER} PATH) file(GLOB LIB_LIST ${CXX_DIR}/libstdc++-6.dll ${CXX_DIR}/libwinpthread-1.dll @@ -393,6 +398,39 @@ else() install(FILES ${TOP_DIR}/mindspore/lite/build/tools/cropper/cropper_mapping_npu.cfg DESTINATION ${CROPPER_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME}) endif() + if(NOT SUPPORT_TRAIN AND MSLITE_ENABLE_NNIE) + install(DIRECTORY ${TOP_DIR}/mindspore/lite/build/schema/ DESTINATION ${CONVERTER_ROOT_DIR}/include/schema + COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") + install(DIRECTORY ${TOP_DIR}/mindspore/core/abstract/ DESTINATION ${CONVERTER_ROOT_DIR}/include/core/abstract + COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") + install(DIRECTORY ${TOP_DIR}/mindspore/core/base/ DESTINATION ${CONVERTER_ROOT_DIR}/include/core/base + COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") + install(DIRECTORY ${TOP_DIR}/mindspore/core/ir/ DESTINATION ${CONVERTER_ROOT_DIR}/include/core/ir + COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") + install(DIRECTORY ${TOP_DIR}/mindspore/core/ops/ DESTINATION ${CONVERTER_ROOT_DIR}/include/core/ops + COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") + install(DIRECTORY ${TOP_DIR}/mindspore/core/utils/ DESTINATION ${CONVERTER_ROOT_DIR}/include/core/utils + COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") + install(FILES ${TOP_DIR}/mindspore/lite/build/tools/converter/mindspore_core/${MINDSPORE_CORE_LIB_NAME}.a + DESTINATION ${CONVERTER_ROOT_DIR}/lib COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/mindspore/lite/build/securec/src/libsecurec.a DESTINATION ${CONVERTER_ROOT_DIR}/lib + COMPONENT ${RUNTIME_COMPONENT_NAME}) + file(GLOB PROTOBUF_LIB_PATH ${TOP_DIR}/mindspore/lite/build/.mslib/protobuf_*/lib/libprotobuf.a) + install(FILES ${PROTOBUF_LIB_PATH} DESTINATION ${CONVERTER_ROOT_DIR}/lib + COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/mindspore/lite/tools/providers/NNIE/3516D/libmslite_nnie_converter.so + DESTINATION ${CONVERTER_ROOT_DIR}/providers/3516D/ COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/mindspore/lite/tools/providers/NNIE/3516D/libmslite_nnie_data_process.so + DESTINATION ${CONVERTER_ROOT_DIR}/providers/3516D COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(FILES ${TOP_DIR}/mindspore/lite/tools/providers/NNIE/3516D/libnnie_mapper.so + DESTINATION ${CONVERTER_ROOT_DIR}/providers/3516D COMPONENT ${RUNTIME_COMPONENT_NAME}) + install(DIRECTORY ${TOP_DIR}/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/ + DESTINATION ${CONVERTER_ROOT_DIR}/providers/3516D/third_party/opencv-4.2.0 + COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.so*") + install(DIRECTORY ${TOP_DIR}/mindspore/lite/tools/providers/NNIE/3516D/protobuf-3.9.0/lib/ + DESTINATION ${CONVERTER_ROOT_DIR}/providers/3516D/third_party/protobuf-3.9.0 + COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.so*") + endif() endif() if(CMAKE_SYSTEM_NAME MATCHES "Windows") diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/arg_min_max_parameter.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/arg_min_max_parameter.h index bb2cb50a060..ab6f74b29b3 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/arg_min_max_parameter.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/arg_min_max_parameter.h @@ -31,7 +31,9 @@ typedef struct ArgElement { int32_t i_data_; float f_data_; #ifdef ENABLE_ARM +#ifndef SUPPORT_NNIE float16_t f16_data_; +#endif #endif } data_; } ArgElement; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/arithmetic_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/arithmetic_fp32.c index 1f87f7e921e..8f56c33b80c 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/arithmetic_fp32.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/arithmetic_fp32.c @@ -97,8 +97,13 @@ int ElementLogicalAnd(const float *in0, const float *in1, float *out, int size) uint32x4_t mask = vmovq_n_u32(((uint32_t)(1u << 31) - 1)); uint32x4_t zeros = vdupq_n_u32(0); for (; index <= size - 4; index += C4NUM) { +#ifndef SUPPORT_NNIE uint32x4_t vin0 = vandq_u32(vreinterpretq_s32_f32(vld1q_f32(in0 + index)), mask); uint32x4_t vin1 = vandq_u32(vreinterpretq_s32_f32(vld1q_f32(in1 + index)), mask); +#else + uint32x4_t vin0 = vandq_u32(vreinterpretq_u32_f32(vld1q_f32(in0 + index)), mask); + uint32x4_t vin1 = vandq_u32(vreinterpretq_u32_f32(vld1q_f32(in1 + index)), mask); +#endif float32x4_t vout = vbslq_f32(vceqq_u32(vandq_u32(vin0, vin1), zeros), vfalse, vtrue); vst1q_f32(out + index, vout); } @@ -133,8 +138,13 @@ int ElementLogicalOr(const float *in0, const float *in1, float *out, int size) { uint32x4_t mask = vmovq_n_u32(((uint32_t)(1u << 31) - 1)); uint32x4_t zeros = vdupq_n_u32(0); for (; index <= size - 4; index += C4NUM) { +#ifndef SUPPORT_NNIE uint32x4_t vin0 = vandq_u32(vreinterpretq_s32_f32(vld1q_f32(in0 + index)), mask); uint32x4_t vin1 = vandq_u32(vreinterpretq_s32_f32(vld1q_f32(in1 + index)), mask); +#else + uint32x4_t vin0 = vandq_u32(vreinterpretq_u32_f32(vld1q_f32(in0 + index)), mask); + uint32x4_t vin1 = vandq_u32(vreinterpretq_u32_f32(vld1q_f32(in1 + index)), mask); +#endif float32x4_t vout = vbslq_f32(vceqq_u32(vorrq_u32(vin0, vin1), zeros), vfalse, vtrue); vst1q_f32(out + index, vout); } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/deconv_winograd_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/deconv_winograd_fp32.c index ec493c718ee..24068add231 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/deconv_winograd_fp32.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/deconv_winograd_fp32.c @@ -219,6 +219,7 @@ void TiledC4MatmulFp32(float *dst, const float *src, const float *weight, size_t #endif #ifdef ENABLE_ARM32 +#ifndef SUPPORT_NNIE void DeConvWgMergeArm32(const float *src_ptr, float *dst_ptr, size_t src_step, size_t dst_step) { asm volatile( "mov r11, %[src_ptr]\n" @@ -277,6 +278,66 @@ void DeConvWgMergeArm32(const float *src_ptr, float *dst_ptr, size_t src_step, s : "r8", "r10", "r11", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); return; } +#else +void DeConvWgMergeArm32(const float *src_ptr, float *dst_ptr, size_t src_step, size_t dst_step) { + asm volatile( + "mov r7, %[src_ptr]\n" + "mov r8, %[dst_ptr]\n" + "mov r10, r8\n" + + "vld1.32 {q0}, [r7], %[src_step]\n" + "vld1.32 {q1}, [r8], %[dst_step]\n" + "vld1.32 {q2}, [r7], %[src_step]\n" + "vld1.32 {q3}, [r8], %[dst_step]\n" + + "vadd.f32 q0, q0, q1\n" + "vld1.32 {q8}, [r7], %[src_step]\n" + "vadd.f32 q2, q2, q3\n" + + "vst1.32 {q0}, [r10], %[dst_step]\n" + "vst1.32 {q2}, [r10], %[dst_step]\n" + + "vld1.32 {q9}, [r8], %[dst_step]\n" + + "vld1.32 {q10}, [r7], %[src_step]\n" + + "vadd.f32 q8, q8, q9\n" + "vld1.32 {q11}, [r8], %[dst_step]\n" + "vadd.f32 q10, q10, q11\n" + + "vld1.32 {q0}, [r7], %[src_step]\n" + "vst1.32 {q8}, [r10], %[dst_step]\n" + "vst1.32 {q10}, [r10], %[dst_step]\n" + + "vld1.32 {q1}, [r8], %[dst_step]\n" + + "vld1.32 {q2}, [r7], %[src_step]\n" + "vld1.32 {q3}, [r8], %[dst_step]\n" + + "vadd.f32 q0, q0, q1\n" + "vadd.f32 q2, q2, q3\n" + + "vst1.32 {q0}, [r10], %[dst_step]\n" + "vst1.32 {q2}, [r10], %[dst_step]\n" + + "vld1.32 {q8}, [r7], %[src_step]\n" + "vld1.32 {q9}, [r8], %[dst_step]\n" + + "vld1.32 {q10}, [r7], %[src_step]\n" + "vld1.32 {q11}, [r8], %[dst_step]\n" + + "vadd.f32 q8, q8, q9\n" + "vadd.f32 q10, q10, q11\n" + + "vst1.32 {q8}, [r10], %[dst_step]\n" + "vst1.32 {q10}, [r10], %[dst_step]\n" + + : + : [ src_ptr ] "r"(src_ptr), [ dst_ptr ] "r"(dst_ptr), [ src_step ] "r"(src_step), [ dst_step ] "r"(dst_step) + : "r8", "r10", "r7", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); + return; +} +#endif #endif void DeConvWgMerge(const float *src, float *dst, size_t src_stride, size_t dst_stride, size_t count) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_fp32.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_fp32.c index f4cfaccdc34..33328b41f1c 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_fp32.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32/matmul_fp32.c @@ -420,6 +420,7 @@ void RowMajor2Col8Major_arm64(const float *src_c, float *dst_c, size_t col) { } #endif #ifdef ENABLE_ARM32 +#ifndef SUPPORT_NNIE void RowMajor2Col8Major_arm32(const float *src_c, float *dst_c, size_t col) { size_t stride = col * sizeof(float); asm volatile( @@ -462,6 +463,50 @@ void RowMajor2Col8Major_arm32(const float *src_c, float *dst_c, size_t col) { : "r10", "r11", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7"); return; } +#else +void RowMajor2Col8Major_arm32(const float *src_c, float *dst_c, size_t col) { + size_t stride = col * sizeof(float); + asm volatile( + "mov r10, %[src_c]\n" + "mov r7, %[dst_c]\n" + + "vld1.32 {q0}, [r10], %[stride]\n" + "vld1.32 {q2}, [r10], %[stride]\n" + "vld1.32 {q4}, [r10], %[stride]\n" + "vld1.32 {q6}, [r10], %[stride]\n" + + "vtrn.32 d0, d4\n" + "vtrn.32 d1, d5\n" + "vtrn.32 d8, d12\n" + "vtrn.32 d9, d13\n" + + "vld1.32 {q1}, [r10], %[stride]\n" + "vld1.32 {q3}, [r10], %[stride]\n" + "vld1.32 {q5}, [r10], %[stride]\n" + "vld1.32 {q7}, [r10], %[stride]\n" + + "vswp d1, d8\n" + "vswp d5, d12\n" + + "vtrn.32 d2, d6\n" + "vtrn.32 d3, d7\n" + "vtrn.32 d10, d14\n" + "vtrn.32 d11, d15\n" + + "vswp d3, d10\n" + "vswp d7, d14\n" + + "vst1.32 {q0, q1}, [r7]!\n" + "vst1.32 {q2, q3}, [r7]!\n" + "vst1.32 {q4, q5}, [r7]!\n" + "vst1.32 {q6, q7}, [r7]!\n" + + : + : [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ stride ] "r"(stride) + : "r10", "r7", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7"); + return; +} +#endif #endif void RowMajor2Col8Major(const float *src_ptr, float *dst_ptr, size_t row, size_t col) { size_t row8 = row / C8NUM * C8NUM; diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/gemm.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/gemm.c index 8815498f930..4de2a8ad69b 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/gemm.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/gemm.c @@ -353,6 +353,7 @@ static void RowMajor2Col8MajorStride(const float *src_ptr, float *dst_ptr, size_ "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); #elif ENABLE_ARM32 +#ifndef SUPPORT_NNIE /* 8x4 row-major to col-major */ size_t stride = col * sizeof(float); asm volatile( @@ -393,6 +394,48 @@ static void RowMajor2Col8MajorStride(const float *src_ptr, float *dst_ptr, size_ : : [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ stride ] "r"(stride) : "r10", "r11", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7"); +#else + /* 8x4 row-major to col-major */ + size_t stride = col * sizeof(float); + asm volatile( + "mov r10, %[src_c]\n" + "mov r7, %[dst_c]\n" + + "vld1.32 {q0}, [r10], %[stride]\n" + "vld1.32 {q2}, [r10], %[stride]\n" + "vld1.32 {q4}, [r10], %[stride]\n" + "vld1.32 {q6}, [r10], %[stride]\n" + + "vtrn.32 d0, d4\n" + "vtrn.32 d1, d5\n" + "vtrn.32 d8, d12\n" + "vtrn.32 d9, d13\n" + + "vld1.32 {q1}, [r10], %[stride]\n" + "vld1.32 {q3}, [r10], %[stride]\n" + "vld1.32 {q5}, [r10], %[stride]\n" + "vld1.32 {q7}, [r10], %[stride]\n" + + "vswp d1, d8\n" + "vswp d5, d12\n" + + "vtrn.32 d2, d6\n" + "vtrn.32 d3, d7\n" + "vtrn.32 d10, d14\n" + "vtrn.32 d11, d15\n" + + "vswp d3, d10\n" + "vswp d7, d14\n" + + "vst1.32 {q0, q1}, [r7]!\n" + "vst1.32 {q2, q3}, [r7]!\n" + "vst1.32 {q4, q5}, [r7]!\n" + "vst1.32 {q6, q7}, [r7]!\n" + + : + : [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ stride ] "r"(stride) + : "r10", "r7", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7"); +#endif #else for (int tr = 0; tr < 8; tr++) { for (int tc = 0; tc < 4; tc++) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/pooling_grad.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/pooling_grad.c index 6084d4ab637..f2ac7ccba8f 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/pooling_grad.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/fp32_grad/pooling_grad.c @@ -146,7 +146,8 @@ void MaxPoolingGrad(const float *input_ptr, const float *dy_ptr, float *output_p #ifdef ENABLE_ARM uint32x4_t index = {val_idx, val_idx + 1, val_idx + 2, val_idx + 3}; float32x4_t in = vld1q_f32(inPtr + val_idx); - max_idx = MaxIndex(in, &max_val, index, max_idx); + max_idx = vreinterpretq_u32_s32( + MaxIndex(in, &max_val, vreinterpretq_s32_u32(index), vreinterpretq_s32_u32(max_idx))); #else float val[4] = {inPtr[val_idx], inPtr[val_idx + 1], inPtr[val_idx + 2], inPtr[val_idx + 3]}; for (int i = 0; i < 4; i++) { diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.c b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.c index ac80749c0cb..ae0c0ef96ff 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.c +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nnacl/int8/matmul_int8.c @@ -137,10 +137,10 @@ void RowMajor2Row16x4MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int co "vld1.8 {q2}, [r0], r2 \n" "vld1.8 {q3}, [r0], r2 \n" - "vst1.32 q0, [r1], r3 \n" - "vst1.32 q1, [r1], r3 \n" - "vst1.32 q2, [r1], r3 \n" - "vst1.32 q3, [r1], r3 \n" + "vst1.32 {d0, d1}, [r1], r3 \n" + "vst1.32 {d2, d3}, [r1], r3 \n" + "vst1.32 {d4, d5}, [r1], r3 \n" + "vst1.32 {d6, d7}, [r1], r3 \n" : : [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ col_offset ] "r"(col_offset) diff --git a/mindspore/core/base/base.cc b/mindspore/core/base/base.cc index 07ed252e962..94c73127368 100644 --- a/mindspore/core/base/base.cc +++ b/mindspore/core/base/base.cc @@ -17,6 +17,7 @@ #include "base/base.h" #include #include +#include #include namespace mindspore { @@ -32,7 +33,7 @@ uint32_t Base::GetTypeId(const char *const type_name) { if (it != t->map.end()) { return it->second; } - uint32_t tid = ++(t->type_counter); + uint32_t tid = std::hash()(type_name); t->map[type_name] = tid; return tid; } diff --git a/mindspore/core/base/core_ops.h b/mindspore/core/base/core_ops.h index e1f3a92bb8f..9461cb80675 100644 --- a/mindspore/core/base/core_ops.h +++ b/mindspore/core/base/core_ops.h @@ -589,6 +589,8 @@ inline const PrimitivePtr kPrimIf = std::make_shared("If"); inline const PrimitivePtr kPrimAvgPoolFusion = std::make_shared("AvgPoolFusion"); inline const PrimitivePtr kPrimMaxPoolFusion = std::make_shared("MaxPoolFusion"); inline const PrimitivePtr kPrimActivation = std::make_shared("Activation"); +inline const PrimitivePtr kPrimPReLUFusion = std::make_shared("PReLUFusion"); +inline const PrimitivePtr kPrimCustom = std::make_shared("Custom"); inline const PrimitivePtr kPrimTopKFusion = std::make_shared("TopKFusion"); inline const PrimitivePtr kPrimTileFusion = std::make_shared("TileFusion"); inline const PrimitivePtr kPrimReduceFusion = std::make_shared("ReduceFusion"); diff --git a/mindspore/core/mindrt/src/thread/inter_threadpool.cc b/mindspore/core/mindrt/src/thread/inter_threadpool.cc index b1928e6eeb6..143ef6d1ae8 100644 --- a/mindspore/core/mindrt/src/thread/inter_threadpool.cc +++ b/mindspore/core/mindrt/src/thread/inter_threadpool.cc @@ -42,6 +42,7 @@ void InterThreadPool::ThreadAsyncRun(Worker *worker) { } void InterThreadPool::ActorThreadRun() { +#ifndef SUPPORT_NNIE ActorReference actor; { std::unique_lock _l(actor_mutex_); @@ -54,6 +55,7 @@ void InterThreadPool::ActorThreadRun() { } actor->Run(); finish_cond_var_.notify_one(); +#endif } void InterThreadPool::EnqueReadyActor(const ActorReference &actor) { diff --git a/mindspore/core/mindrt/src/thread/threadpool.cc b/mindspore/core/mindrt/src/thread/threadpool.cc index b8556e9c7a3..a2d5d6555d8 100644 --- a/mindspore/core/mindrt/src/thread/threadpool.cc +++ b/mindspore/core/mindrt/src/thread/threadpool.cc @@ -100,7 +100,7 @@ void ThreadPool::KernelThreadRun(Worker *worker) { int ThreadPool::ParallelLaunch(const Func &func, Content content, int task_num) { // distribute task to the KernelThread and the free ActorThread, // if the task num is greater than the KernelThread num - Task task = Task(func, content); + Task task = {func, content}; Worker *curr = CurrentWorker(); if (inter_thread_num_ == thread_num_ || curr == nullptr) { SyncRunTask(&task, task_num); diff --git a/mindspore/lite/CMakeLists.txt b/mindspore/lite/CMakeLists.txt index db8f7bebbaf..a648de09456 100644 --- a/mindspore/lite/CMakeLists.txt +++ b/mindspore/lite/CMakeLists.txt @@ -3,7 +3,12 @@ project(Lite) set(BUILD_LITE "on") -if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.3.0) +if(TOOLCHAIN_NAME STREQUAL "himix200") + set(TARGET_HIMIX200 on) + add_compile_definitions(SUPPORT_NNIE) +endif() + +if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.3.0 AND NOT TARGET_HIMIX200) message(FATAL_ERROR "GCC version ${CMAKE_CXX_COMPILER_VERSION} must not be less than 7.3.0") endif() @@ -22,6 +27,7 @@ option(MSLITE_ENABLE_AVX "enable AVX instruction set, only x86_64 support" off) option(MSLITE_ENABLE_CONVERTER "enable converter, only x86_64 support" on) option(MSLITE_ENABLE_TOOLS "enable tools" on) option(MSLITE_ENABLE_TESTCASES "enable testcase" off) +option(MSLITE_ENABLE_NNIE "enable NNIE" off) # Option that can be configured through manually option(ENABLE_ARM82_A32 "if build fp16 on platform_arm32" off) @@ -55,6 +61,9 @@ endif() if(DEFINED ENV{MSLITE_ENABLE_TESTCASES}) set(MSLITE_ENABLE_TESTCASES $ENV{MSLITE_ENABLE_TESTCASES}) endif() +if(DEFINED ENV{MSLITE_ENABLE_NNIE}) + set(MSLITE_ENABLE_NNIE $ENV{MSLITE_ENABLE_NNIE}) +endif() if(PLATFORM_ARM64 OR PLATFORM_ARM32) set(PLATFORM_ARM "on") @@ -197,6 +206,9 @@ else() -Wno-deprecated-declarations -Wno-missing-braces ${CMAKE_C_FLAGS}") set(CMAKE_CXX_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes \ -Wno-deprecated-declarations -Wno-missing-braces -Wno-overloaded-virtual ${CMAKE_CXX_FLAGS}") + if(TARGET_HIMIX200) + set(CMAKE_CXX_FLAGS "-Wno-error=maybe-uninitialized ${CMAKE_CXX_FLAGS}") + endif() if(NOT WIN32) set(CMAKE_SHARED_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_SHARED_LINKER_FLAGS}") set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_EXE_LINKER_FLAGS}") diff --git a/mindspore/lite/src/CMakeLists.txt b/mindspore/lite/src/CMakeLists.txt index 6db52ab4452..b580fe8bcd9 100644 --- a/mindspore/lite/src/CMakeLists.txt +++ b/mindspore/lite/src/CMakeLists.txt @@ -14,6 +14,9 @@ if(PLATFORM_ARM32 OR PLATFORM_ARM64) -fdata-sections -ffast-math -fno-rtti -fno-exceptions") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections \ -fdata-sections -ffast-math -fno-rtti -fno-exceptions") + if(TARGET_HIMIX200) + string(REPLACE "-fno-rtti " "" CMAKE_C_FLAGS ${CMAKE_C_FLAGS}) + endif() endif() if("${CMAKE_BUILD_TYPE}" STREQUAL "Release" AND APPLE) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fstrict-aliasing -ffunction-sections \ @@ -25,7 +28,10 @@ if(PLATFORM_ARM32 OR PLATFORM_ARM64) endif() endif() -set(API_SRC +if(TARGET_HIMIX200) + set(API_SRC) +else() + set(API_SRC ${CORE_DIR}/utils/status.cc ${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/cell.cc ${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/serialization.cc @@ -35,7 +41,8 @@ set(API_SRC ${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/model/model_impl.cc ${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/graph/graph.cc ${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/tensor/tensor_impl.cc -) + ) +endif() if(SUPPORT_NPU) include_directories(${DDK_PATH}) @@ -138,6 +145,16 @@ if(ENABLE_MINDRT) ${CMAKE_CURRENT_SOURCE_DIR}/lite_mindrt.cc ${CMAKE_CURRENT_SOURCE_DIR}/mindrt_executor.cc ) +elseif(TARGET_HIMIX200) + include_directories(${CORE_DIR}/mindrt) + include_directories(${CORE_DIR}/mindrt/include) + include_directories(${CORE_DIR}/mindrt/src) + set(LITE_SRC + ${LITE_SRC} + ${CORE_DIR}/mindrt/src/thread/core_affinity.cc + ${CORE_DIR}/mindrt/src/thread/inter_threadpool.cc + ${CORE_DIR}/mindrt/src/thread/threadpool.cc + ) endif() add_subdirectory(ops) @@ -199,7 +216,7 @@ if(SUPPORT_NPU) target_link_libraries(mindspore-lite npu_kernel_mid) target_link_libraries(mindspore-lite_static npu_kernel_mid) endif() -if(PLATFORM_ARM32 OR PLATFORM_ARM64) +if(PLATFORM_ARM32 OR PLATFORM_ARM64 AND NOT TARGET_HIMIX200) target_link_libraries(mindspore-lite log) target_link_libraries(mindspore-lite_static log) endif() @@ -222,12 +239,15 @@ if(SUPPORT_TRAIN) target_link_libraries(mindspore-lite-train_static minddata-lite mindspore-lite) endif() -if("${CMAKE_BUILD_TYPE}" STREQUAL "Release") - if(NOT APPLE AND PLATFORM_ARM) - set(NDK_STRIP - "${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip") +if(NOT APPLE AND PLATFORM_ARM AND NOT TARGET_HIMIX200) + set(NDK_STRIP + "${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip") +endif() + +if(NOT APPLE AND "${CMAKE_BUILD_TYPE}" STREQUAL "Release") + if(PLATFORM_ARM) add_custom_command(TARGET mindspore-lite POST_BUILD COMMAND ${NDK_STRIP} - ${CMAKE_BINARY_DIR}/src/libmindspore-lite*.so) + ${CMAKE_BINARY_DIR}/src/libmindspore-lite*.so) elseif(NOT WIN32) add_custom_command(TARGET mindspore-lite POST_BUILD COMMAND strip ${CMAKE_BINARY_DIR}/src/libmindspore-lite*.so) endif() diff --git a/mindspore/lite/src/common/log_adapter.cc b/mindspore/lite/src/common/log_adapter.cc index 434ddd3522f..44231a23871 100644 --- a/mindspore/lite/src/common/log_adapter.cc +++ b/mindspore/lite/src/common/log_adapter.cc @@ -93,11 +93,9 @@ const char *EnumStrForMsLogLevel(MsLogLevel level) { void LogWriter::OutputLog(const std::ostringstream &msg) const { if (IsPrint(log_level_)) { -#ifdef ENABLE_ARM -#if defined(__ANDROID__) || defined(ANDROID) +#if defined(ENABLE_ARM) && (defined(__ANDROID__) || defined(ANDROID)) __android_log_print(GetAndroidLogLevel(log_level_), ANDROID_LOG_TAG, "[%s:%d] %s] %s", location_.file_, location_.line_, location_.func_, msg.str().c_str()); -#endif #else printf("%s [%s:%d] %s] %s\n", EnumStrForMsLogLevel(log_level_), location_.file_, location_.line_, location_.func_, msg.str().c_str()); diff --git a/mindspore/lite/src/lite_session.h b/mindspore/lite/src/lite_session.h index 7ec7ebe48be..e1b71f5be1b 100644 --- a/mindspore/lite/src/lite_session.h +++ b/mindspore/lite/src/lite_session.h @@ -139,7 +139,7 @@ class LiteSession : public session::LiteSession { std::unordered_map graph_output_map_; /* */ Executor *executor_ = nullptr; Model *model_ = nullptr; - std::atomic is_running_ = false; + std::atomic is_running_ = {false}; bool is_train_session_ = false; friend class TransferSession; #if SUPPORT_NPU diff --git a/mindspore/lite/src/runtime/inner_allocator.h b/mindspore/lite/src/runtime/inner_allocator.h index 7319df38745..0c40677f878 100644 --- a/mindspore/lite/src/runtime/inner_allocator.h +++ b/mindspore/lite/src/runtime/inner_allocator.h @@ -52,7 +52,7 @@ class DefaultAllocator : public Allocator { void UnLock(); bool ReuseMemory(size_t free_size, size_t size); struct MemBuf { - std::atomic_int ref_count_ = 0; + std::atomic_int ref_count_ = {0}; size_t size; void *buf; }; diff --git a/mindspore/lite/src/runtime/kernel/arm/fp32/nonzero_fp32.cc b/mindspore/lite/src/runtime/kernel/arm/fp32/nonzero_fp32.cc index 9a69d982109..60f267d1efc 100644 --- a/mindspore/lite/src/runtime/kernel/arm/fp32/nonzero_fp32.cc +++ b/mindspore/lite/src/runtime/kernel/arm/fp32/nonzero_fp32.cc @@ -47,7 +47,7 @@ int NonZeroCPUKernel::Run() { } auto non_zero_nums = out_tensor->shape()[1]; int non_zero_count = 0; - std::vector coordiate_values(in_tensor->shape().size(), 0); + std::vector coordiate_values(in_tensor->shape().size(), 0); for (int i = 0; i < in_tensor->ElementsNum(); i += 1) { if (input_data[i]) { for (size_t j = 0; j < input_dim_size; j++) { diff --git a/mindspore/lite/src/sub_graph_kernel.h b/mindspore/lite/src/sub_graph_kernel.h index a117f26a527..8530911e5b1 100644 --- a/mindspore/lite/src/sub_graph_kernel.h +++ b/mindspore/lite/src/sub_graph_kernel.h @@ -145,7 +145,7 @@ class CpuFp32SubGraph : public CpuSubGraph { std::vector nodes, Kernel *kernel) : CpuSubGraph(std::move(in_kernels), std::move(out_kernels), std::move(nodes), kernel) { subgraph_type_ = kCpuFP32SubGraph; - static std::atomic_int index = 0; + static std::atomic_int index = {0}; this->set_name("CpuFP32SubGraph" + std::to_string(index++)); desc_.data_type = kNumberTypeFloat32; } diff --git a/mindspore/lite/src/tensor.h b/mindspore/lite/src/tensor.h index f26fe4f875c..334e670f19f 100644 --- a/mindspore/lite/src/tensor.h +++ b/mindspore/lite/src/tensor.h @@ -217,8 +217,8 @@ class Tensor : public mindspore::tensor::MSTensor { std::vector shape_; schema::Format format_; Category category_; - std::atomic_int ref_count_ = 0; - int init_ref_count_ = 0; + std::atomic_int ref_count_ = {0}; + size_t init_ref_count_ = 0; std::vector quant_params_; std::vector quant_clusters_; AllocatorPtr allocator_ = nullptr; diff --git a/mindspore/lite/tools/benchmark/CMakeLists.txt b/mindspore/lite/tools/benchmark/CMakeLists.txt index 2be7b049b6d..20a74c3217b 100644 --- a/mindspore/lite/tools/benchmark/CMakeLists.txt +++ b/mindspore/lite/tools/benchmark/CMakeLists.txt @@ -12,7 +12,7 @@ add_executable(benchmark add_dependencies(benchmark fbs_src) -if(PLATFORM_ARM32 OR PLATFORM_ARM64) +if(PLATFORM_ARM32 OR PLATFORM_ARM64 AND NOT TARGET_HIMIX200) if(SUPPORT_NPU AND ANDROID_STL STREQUAL "c++_static") target_link_libraries(benchmark mindspore-lite mindspore::json c++_shared) else() diff --git a/mindspore/lite/tools/converter/CMakeLists.txt b/mindspore/lite/tools/converter/CMakeLists.txt index 867e6d4cc06..53d030baa48 100644 --- a/mindspore/lite/tools/converter/CMakeLists.txt +++ b/mindspore/lite/tools/converter/CMakeLists.txt @@ -125,6 +125,8 @@ set(LITE_SRC ${SRC_DIR}/tensor.cc ${SRC_DIR}/ms_tensor.cc ${SRC_DIR}/tensorlist.cc + ${SRC_DIR}/registry/kernel_interface_registry.cc + ${SRC_DIR}/registry/kernel_interface.cc ${SRC_DIR}/kernel_registry.cc ${SRC_DIR}/inner_kernel.cc ${SRC_DIR}/lite_kernel.cc diff --git a/mindspore/lite/tools/converter/converter_flags.cc b/mindspore/lite/tools/converter/converter_flags.cc index 0b0863ff948..e3ebb98d1b6 100644 --- a/mindspore/lite/tools/converter/converter_flags.cc +++ b/mindspore/lite/tools/converter/converter_flags.cc @@ -45,7 +45,10 @@ Flags::Flags() { AddFlag(&Flags::bitNumIn, "bitNum", "Weight quantization bitNum", "8"); AddFlag(&Flags::quantWeightSizeStr, "quantWeightSize", "Weight quantization size threshold", "0"); AddFlag(&Flags::quantWeightChannelStr, "quantWeightChannel", "Channel threshold for weight quantization", "16"); - AddFlag(&Flags::configFile, "configFile", "Configuration for post-training, offline split op to parallel", ""); + AddFlag(&Flags::configFile, "configFile", + "Configuration for post-training, offline split op to parallel," + "and converter for nnie", + ""); AddFlag(&Flags::trainModelIn, "trainModel", "whether the model is going to be trained on device. " "true | false", diff --git a/mindspore/lite/tools/optimizer/common/format_utils.cc b/mindspore/lite/tools/optimizer/common/format_utils.cc index 443875247cd..a597df43aca 100644 --- a/mindspore/lite/tools/optimizer/common/format_utils.cc +++ b/mindspore/lite/tools/optimizer/common/format_utils.cc @@ -56,6 +56,7 @@ #include "ops/op_utils.h" #include "ops/quant_dtype_cast.h" #include "ops/resize.h" +#include "ops/roi_pooling.h" #include "ops/sgd.h" #include "ops/space_to_batch.h" #include "ops/space_to_batch_nd.h" @@ -88,6 +89,7 @@ static const std::unordered_map> NHWCOpMap = { {ops::kNamePReLUFusion, {1}}, {ops::kNameResize, {1}}, {ops::kNameResizeGrad, {}}, + {ops::kNameROIPooling, {1}}, {ops::kNameSGD, {2}}, {ops::kNameSpaceToBatch, {1}}, {ops::kNameSpaceToBatchND, {1}}, diff --git a/mindspore/lite/tools/optimizer/graph/node_infershape.cc b/mindspore/lite/tools/optimizer/graph/node_infershape.cc index 382ab530788..1e2812a96ee 100644 --- a/mindspore/lite/tools/optimizer/graph/node_infershape.cc +++ b/mindspore/lite/tools/optimizer/graph/node_infershape.cc @@ -132,45 +132,49 @@ STATUS NodeInferShape::InferShape(const CNodePtr &cnode) { fbb.Clear(); return lite::RET_ERROR; } - auto parameter_gen = lite::PopulateRegistry::GetInstance()->GetParameterCreator(prim->value_type(), lite::SCHEMA_CUR); - if (parameter_gen == nullptr) { - MS_LOG(ERROR) << "PopulateParameter return nullptr, type: " << schema::EnumNamePrimitiveType(prim->value_type()); - FreeTensors(&inputs); - FreeTensors(&outputs); - fbb.Clear(); - return lite::RET_ERROR; - } - auto parameter = parameter_gen(prim); - if (parameter == nullptr) { - MS_LOG(ERROR) << "parameter is nullptr."; - FreeTensors(&inputs); - FreeTensors(&outputs); - fbb.Clear(); - return lite::RET_ERROR; - } - RectifyFormat(cnode, inputs, fmk_type_); - auto status = KernelInferShape(inputs, outputs, parameter); - if (status == lite::RET_OK) { - anf_prim->AddAttr(kInferDone, MakeValue(true)); - } - if (status == lite::RET_OK || status == lite::RET_INFER_INVALID) { - auto set_status = SetCNodeAbstract(cnode, outputs, status); - if (set_status != lite::RET_OK) { - MS_LOG(ERROR) << "set CNode abstract failed: " << cnode->fullname_with_scope(); + auto ret = KernelInferShape(inputs, outputs, prim, {}); + if (ret == lite::RET_NOT_SUPPORT) { + auto parameter_gen = + lite::PopulateRegistry::GetInstance()->GetParameterCreator(prim->value_type(), lite::SCHEMA_CUR); + if (parameter_gen == nullptr) { + MS_LOG(ERROR) << "PopulateParameter return nullptr, type: " << schema::EnumNamePrimitiveType(prim->value_type()); FreeTensors(&inputs); FreeTensors(&outputs); - free(parameter); fbb.Clear(); - return set_status; + return lite::RET_ERROR; } - } else { - MS_LOG(ERROR) << "infer shape failed."; + auto parameter = parameter_gen(prim); + if (parameter == nullptr) { + MS_LOG(ERROR) << "parameter is nullptr."; + FreeTensors(&inputs); + FreeTensors(&outputs); + fbb.Clear(); + return lite::RET_ERROR; + } + RectifyFormat(cnode, inputs, fmk_type_); + ret = KernelInferShape(inputs, outputs, parameter); + if (ret == lite::RET_OK) { + anf_prim->AddAttr(kInferDone, MakeValue(true)); + } + if (ret == lite::RET_OK || ret == lite::RET_INFER_INVALID) { + auto set_status = SetCNodeAbstract(cnode, outputs, ret); + if (set_status != lite::RET_OK) { + MS_LOG(ERROR) << "set CNode abstract failed: " << cnode->fullname_with_scope(); + FreeTensors(&inputs); + FreeTensors(&outputs); + free(parameter); + fbb.Clear(); + return set_status; + } + } else { + MS_LOG(ERROR) << "infer shape failed."; + } + FreeTensors(&inputs); + FreeTensors(&outputs); + free(parameter); } - FreeTensors(&inputs); - FreeTensors(&outputs); - free(parameter); fbb.Clear(); - return status; + return ret; } std::vector NodeInferShape::GetInputShape(const CNodePtr &cnode, size_t index) { diff --git a/mindspore/lite/tools/providers/NNIE/3516D/libmslite_nnie_converter.so b/mindspore/lite/tools/providers/NNIE/3516D/libmslite_nnie_converter.so new file mode 100755 index 00000000000..054eab0e436 Binary files /dev/null and b/mindspore/lite/tools/providers/NNIE/3516D/libmslite_nnie_converter.so differ diff --git a/mindspore/lite/tools/providers/NNIE/3516D/libmslite_nnie_data_process.so b/mindspore/lite/tools/providers/NNIE/3516D/libmslite_nnie_data_process.so new file mode 100755 index 00000000000..f08583752af Binary files /dev/null and b/mindspore/lite/tools/providers/NNIE/3516D/libmslite_nnie_data_process.so differ diff --git a/mindspore/lite/tools/providers/NNIE/3516D/libnnie_mapper.so b/mindspore/lite/tools/providers/NNIE/3516D/libnnie_mapper.so new file mode 100755 index 00000000000..84c48f905be Binary files /dev/null and b/mindspore/lite/tools/providers/NNIE/3516D/libnnie_mapper.so differ diff --git a/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_core.so b/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_core.so new file mode 100644 index 00000000000..9a116c05b8b Binary files /dev/null and b/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_core.so differ diff --git a/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_core.so.4.2 b/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_core.so.4.2 new file mode 100644 index 00000000000..9a116c05b8b Binary files /dev/null and b/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_core.so.4.2 differ diff --git a/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_core.so.4.2.0 b/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_core.so.4.2.0 new file mode 100644 index 00000000000..9a116c05b8b Binary files /dev/null and b/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_core.so.4.2.0 differ diff --git a/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_imgcodecs.so b/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_imgcodecs.so new file mode 100644 index 00000000000..b07fe08ce29 Binary files /dev/null and b/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_imgcodecs.so differ diff --git a/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_imgcodecs.so.4.2 b/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_imgcodecs.so.4.2 new file mode 100644 index 00000000000..b07fe08ce29 Binary files /dev/null and b/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_imgcodecs.so.4.2 differ diff --git a/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_imgcodecs.so.4.2.0 b/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_imgcodecs.so.4.2.0 new file mode 100644 index 00000000000..b07fe08ce29 Binary files /dev/null and b/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_imgcodecs.so.4.2.0 differ diff --git a/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_imgproc.so b/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_imgproc.so new file mode 100644 index 00000000000..52b46fda4a9 Binary files /dev/null and b/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_imgproc.so differ diff --git a/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_imgproc.so.4.2 b/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_imgproc.so.4.2 new file mode 100644 index 00000000000..52b46fda4a9 Binary files /dev/null and b/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_imgproc.so.4.2 differ diff --git a/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_imgproc.so.4.2.0 b/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_imgproc.so.4.2.0 new file mode 100644 index 00000000000..52b46fda4a9 Binary files /dev/null and b/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/libopencv_imgproc.so.4.2.0 differ diff --git a/mindspore/lite/tools/providers/NNIE/3516D/protobuf-3.9.0/lib/libprotobuf.so b/mindspore/lite/tools/providers/NNIE/3516D/protobuf-3.9.0/lib/libprotobuf.so new file mode 100644 index 00000000000..507bf79a191 Binary files /dev/null and b/mindspore/lite/tools/providers/NNIE/3516D/protobuf-3.9.0/lib/libprotobuf.so differ diff --git a/mindspore/lite/tools/providers/NNIE/3516D/protobuf-3.9.0/lib/libprotobuf.so.3.9.0.0 b/mindspore/lite/tools/providers/NNIE/3516D/protobuf-3.9.0/lib/libprotobuf.so.3.9.0.0 new file mode 100644 index 00000000000..507bf79a191 Binary files /dev/null and b/mindspore/lite/tools/providers/NNIE/3516D/protobuf-3.9.0/lib/libprotobuf.so.3.9.0.0 differ diff --git a/mindspore/lite/tools/providers/NNIE/3516D/protobuf-3.9.0/lib/libprotoc.so b/mindspore/lite/tools/providers/NNIE/3516D/protobuf-3.9.0/lib/libprotoc.so new file mode 100644 index 00000000000..cf792792cdc Binary files /dev/null and b/mindspore/lite/tools/providers/NNIE/3516D/protobuf-3.9.0/lib/libprotoc.so differ diff --git a/mindspore/lite/tools/providers/NNIE/3516D/protobuf-3.9.0/lib/libprotoc.so.3.9.0.0 b/mindspore/lite/tools/providers/NNIE/3516D/protobuf-3.9.0/lib/libprotoc.so.3.9.0.0 new file mode 100644 index 00000000000..cf792792cdc Binary files /dev/null and b/mindspore/lite/tools/providers/NNIE/3516D/protobuf-3.9.0/lib/libprotoc.so.3.9.0.0 differ