[MSLITE][NNIE] support nnie.
This commit is contained in:
parent
df491ffe53
commit
c82f1aee45
26
build.sh
26
build.sh
|
@ -568,6 +568,24 @@ build_lite()
|
|||
cd ${BASEPATH}/mindspore/lite/build
|
||||
write_commit_file
|
||||
|
||||
if [[ "${local_lite_platform}" == "arm32" ]]; then
|
||||
if [[ "${TOOLCHAIN_FILE}" && "${TOOLCHAIN_NAME}" ]]; then
|
||||
RUN_TESTCASES="off"
|
||||
COMPILE_MINDDATA_LITE="off"
|
||||
CMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE}
|
||||
CMAKE_TOOLCHAIN_NAME=${TOOLCHAIN_NAME}
|
||||
else
|
||||
CMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake
|
||||
ANDROID_NATIVE_API_LEVEL="19"
|
||||
CMAKE_ANDROID_NDK=${ANDROID_NDK}
|
||||
CMAKE_ANDROID_ABI="armeabi-v7a"
|
||||
CMAKE_ANDROID_TOOLCHAIN_NAME="clang"
|
||||
CMAKE_ANDROID_STL=${MSLITE_ANDROID_STL}
|
||||
CMAKE_BUILD_TYPE=${LITE_BUILD_TYPE}
|
||||
ENABLE_FP16="on"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "${local_lite_platform}" == "arm64" ]]; then
|
||||
if [ "$(uname)" == "Darwin" ]; then
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=${BASEPATH}/cmake/lite_ios.cmake -DARCHS="arm64" -DENABLE_BITCODE=0 \
|
||||
|
@ -593,10 +611,10 @@ build_lite()
|
|||
else
|
||||
checkndk
|
||||
echo "default link libc++_static.a, export MSLITE_ANDROID_STL=c++_shared to link libc++_shared.so"
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" -DANDROID_NATIVE_API_LEVEL="19" \
|
||||
-DANDROID_NDK="${ANDROID_NDK}" -DANDROID_ABI="armeabi-v7a" -DANDROID_TOOLCHAIN_NAME="clang" \
|
||||
-DANDROID_STL=${MSLITE_ANDROID_STL} -DCMAKE_BUILD_TYPE=${LITE_BUILD_TYPE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
|
||||
-DPLATFORM_ARM32="on" -DENABLE_NEON="on" -DENABLE_FP16="on" -DCMAKE_INSTALL_PREFIX=${BASEPATH}/output/tmp \
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DTOOLCHAIN_NAME=${CMAKE_TOOLCHAIN_NAME} -DANDROID_NATIVE_API_LEVEL=${ANDROID_NATIVE_API_LEVEL} \
|
||||
-DANDROID_NDK=${CMAKE_ANDROID_NDK} -DANDROID_ABI=${CMAKE_ANDROID_ABI} -DANDROID_TOOLCHAIN_NAME=${CMAKE_ANDROID_TOOLCHAIN_NAME} \
|
||||
-DANDROID_STL=${CMAKE_ANDROID_STL} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DBUILD_MINDDATA=${COMPILE_MINDDATA_LITE} \
|
||||
-DPLATFORM_ARM32="on" -DENABLE_NEON="on" -DENABLE_FP16=${ENABLE_FP16} -DCMAKE_INSTALL_PREFIX=${BASEPATH}/output/tmp \
|
||||
-DMS_VERSION_MAJOR=${VERSION_MAJOR} -DMS_VERSION_MINOR=${VERSION_MINOR} -DMS_VERSION_REVISION=${VERSION_REVISION} \
|
||||
-DENABLE_ASAN=${ENABLE_ASAN} -DENABLE_VERBOSE=${ENABLE_VERBOSE} "${BASEPATH}/mindspore/lite"
|
||||
fi
|
||||
|
|
|
@ -18,7 +18,8 @@ mindspore_add_pkg(flatbuffers
|
|||
EXE flatc
|
||||
URL ${REQ_URL}
|
||||
MD5 ${MD5}
|
||||
CMAKE_OPTION -DFLATBUFFERS_BUILD_TESTS=OFF -DCMAKE_INSTALL_LIBDIR=lib)
|
||||
CMAKE_OPTION -DCMAKE_C_COMPILER=${FLATC_GCC_COMPILER} -DCMAKE_CXX_COMPILER=${FLATC_GXX_COMPILER}
|
||||
-DFLATBUFFERS_BUILD_TESTS=OFF -DCMAKE_INSTALL_LIBDIR=lib)
|
||||
|
||||
include_directories(${flatbuffers_INC})
|
||||
add_library(mindspore::flatbuffers ALIAS flatbuffers::flatbuffers)
|
||||
|
|
|
@ -15,6 +15,7 @@ set(MIND_DATA_INC_DIR ${RUNTIME_PKG_NAME}/inference/include/dataset)
|
|||
set(TURBO_DIR ${RUNTIME_PKG_NAME}/inference/third_party/libjpeg-turbo)
|
||||
set(SECUREC_DIR ${RUNTIME_PKG_NAME}/inference/third_party/securec)
|
||||
set(MINDSPORE_LITE_LIB_NAME libmindspore-lite)
|
||||
set(MINDSPORE_CORE_LIB_NAME libmindspore_core)
|
||||
set(BENCHMARK_NAME benchmark)
|
||||
set(BENCHMARK_ROOT_DIR ${RUNTIME_PKG_NAME}/tools/benchmark)
|
||||
|
||||
|
@ -253,6 +254,10 @@ elseif(PLATFORM_ARM32)
|
|||
${RUNTIME_COMPONENT_NAME})
|
||||
endif()
|
||||
endif()
|
||||
if(MSLITE_ENABLE_NNIE AND TARGET_HIMIX200)
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/tools/providers/NNIE/3516D/libmslite_nnie.so
|
||||
DESTINATION ${RUNTIME_PKG_NAME}/providers/3516D COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
endif()
|
||||
elseif(WIN32)
|
||||
get_filename_component(CXX_DIR ${CMAKE_CXX_COMPILER} PATH)
|
||||
file(GLOB LIB_LIST ${CXX_DIR}/libstdc++-6.dll ${CXX_DIR}/libwinpthread-1.dll
|
||||
|
@ -393,6 +398,39 @@ else()
|
|||
install(FILES ${TOP_DIR}/mindspore/lite/build/tools/cropper/cropper_mapping_npu.cfg
|
||||
DESTINATION ${CROPPER_ROOT_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
endif()
|
||||
if(NOT SUPPORT_TRAIN AND MSLITE_ENABLE_NNIE)
|
||||
install(DIRECTORY ${TOP_DIR}/mindspore/lite/build/schema/ DESTINATION ${CONVERTER_ROOT_DIR}/include/schema
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
|
||||
install(DIRECTORY ${TOP_DIR}/mindspore/core/abstract/ DESTINATION ${CONVERTER_ROOT_DIR}/include/core/abstract
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
|
||||
install(DIRECTORY ${TOP_DIR}/mindspore/core/base/ DESTINATION ${CONVERTER_ROOT_DIR}/include/core/base
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
|
||||
install(DIRECTORY ${TOP_DIR}/mindspore/core/ir/ DESTINATION ${CONVERTER_ROOT_DIR}/include/core/ir
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
|
||||
install(DIRECTORY ${TOP_DIR}/mindspore/core/ops/ DESTINATION ${CONVERTER_ROOT_DIR}/include/core/ops
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
|
||||
install(DIRECTORY ${TOP_DIR}/mindspore/core/utils/ DESTINATION ${CONVERTER_ROOT_DIR}/include/core/utils
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/build/tools/converter/mindspore_core/${MINDSPORE_CORE_LIB_NAME}.a
|
||||
DESTINATION ${CONVERTER_ROOT_DIR}/lib COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/build/securec/src/libsecurec.a DESTINATION ${CONVERTER_ROOT_DIR}/lib
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
file(GLOB PROTOBUF_LIB_PATH ${TOP_DIR}/mindspore/lite/build/.mslib/protobuf_*/lib/libprotobuf.a)
|
||||
install(FILES ${PROTOBUF_LIB_PATH} DESTINATION ${CONVERTER_ROOT_DIR}/lib
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/tools/providers/NNIE/3516D/libmslite_nnie_converter.so
|
||||
DESTINATION ${CONVERTER_ROOT_DIR}/providers/3516D/ COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/tools/providers/NNIE/3516D/libmslite_nnie_data_process.so
|
||||
DESTINATION ${CONVERTER_ROOT_DIR}/providers/3516D COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/tools/providers/NNIE/3516D/libnnie_mapper.so
|
||||
DESTINATION ${CONVERTER_ROOT_DIR}/providers/3516D COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
install(DIRECTORY ${TOP_DIR}/mindspore/lite/tools/providers/NNIE/3516D/opencv-4.2.0/lib/
|
||||
DESTINATION ${CONVERTER_ROOT_DIR}/providers/3516D/third_party/opencv-4.2.0
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.so*")
|
||||
install(DIRECTORY ${TOP_DIR}/mindspore/lite/tools/providers/NNIE/3516D/protobuf-3.9.0/lib/
|
||||
DESTINATION ${CONVERTER_ROOT_DIR}/providers/3516D/third_party/protobuf-3.9.0
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.so*")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(CMAKE_SYSTEM_NAME MATCHES "Windows")
|
||||
|
|
|
@ -31,7 +31,9 @@ typedef struct ArgElement {
|
|||
int32_t i_data_;
|
||||
float f_data_;
|
||||
#ifdef ENABLE_ARM
|
||||
#ifndef SUPPORT_NNIE
|
||||
float16_t f16_data_;
|
||||
#endif
|
||||
#endif
|
||||
} data_;
|
||||
} ArgElement;
|
||||
|
|
|
@ -97,8 +97,13 @@ int ElementLogicalAnd(const float *in0, const float *in1, float *out, int size)
|
|||
uint32x4_t mask = vmovq_n_u32(((uint32_t)(1u << 31) - 1));
|
||||
uint32x4_t zeros = vdupq_n_u32(0);
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
#ifndef SUPPORT_NNIE
|
||||
uint32x4_t vin0 = vandq_u32(vreinterpretq_s32_f32(vld1q_f32(in0 + index)), mask);
|
||||
uint32x4_t vin1 = vandq_u32(vreinterpretq_s32_f32(vld1q_f32(in1 + index)), mask);
|
||||
#else
|
||||
uint32x4_t vin0 = vandq_u32(vreinterpretq_u32_f32(vld1q_f32(in0 + index)), mask);
|
||||
uint32x4_t vin1 = vandq_u32(vreinterpretq_u32_f32(vld1q_f32(in1 + index)), mask);
|
||||
#endif
|
||||
float32x4_t vout = vbslq_f32(vceqq_u32(vandq_u32(vin0, vin1), zeros), vfalse, vtrue);
|
||||
vst1q_f32(out + index, vout);
|
||||
}
|
||||
|
@ -133,8 +138,13 @@ int ElementLogicalOr(const float *in0, const float *in1, float *out, int size) {
|
|||
uint32x4_t mask = vmovq_n_u32(((uint32_t)(1u << 31) - 1));
|
||||
uint32x4_t zeros = vdupq_n_u32(0);
|
||||
for (; index <= size - 4; index += C4NUM) {
|
||||
#ifndef SUPPORT_NNIE
|
||||
uint32x4_t vin0 = vandq_u32(vreinterpretq_s32_f32(vld1q_f32(in0 + index)), mask);
|
||||
uint32x4_t vin1 = vandq_u32(vreinterpretq_s32_f32(vld1q_f32(in1 + index)), mask);
|
||||
#else
|
||||
uint32x4_t vin0 = vandq_u32(vreinterpretq_u32_f32(vld1q_f32(in0 + index)), mask);
|
||||
uint32x4_t vin1 = vandq_u32(vreinterpretq_u32_f32(vld1q_f32(in1 + index)), mask);
|
||||
#endif
|
||||
float32x4_t vout = vbslq_f32(vceqq_u32(vorrq_u32(vin0, vin1), zeros), vfalse, vtrue);
|
||||
vst1q_f32(out + index, vout);
|
||||
}
|
||||
|
|
|
@ -219,6 +219,7 @@ void TiledC4MatmulFp32(float *dst, const float *src, const float *weight, size_t
|
|||
#endif
|
||||
|
||||
#ifdef ENABLE_ARM32
|
||||
#ifndef SUPPORT_NNIE
|
||||
void DeConvWgMergeArm32(const float *src_ptr, float *dst_ptr, size_t src_step, size_t dst_step) {
|
||||
asm volatile(
|
||||
"mov r11, %[src_ptr]\n"
|
||||
|
@ -277,6 +278,66 @@ void DeConvWgMergeArm32(const float *src_ptr, float *dst_ptr, size_t src_step, s
|
|||
: "r8", "r10", "r11", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
|
||||
return;
|
||||
}
|
||||
#else
|
||||
void DeConvWgMergeArm32(const float *src_ptr, float *dst_ptr, size_t src_step, size_t dst_step) {
|
||||
asm volatile(
|
||||
"mov r7, %[src_ptr]\n"
|
||||
"mov r8, %[dst_ptr]\n"
|
||||
"mov r10, r8\n"
|
||||
|
||||
"vld1.32 {q0}, [r7], %[src_step]\n"
|
||||
"vld1.32 {q1}, [r8], %[dst_step]\n"
|
||||
"vld1.32 {q2}, [r7], %[src_step]\n"
|
||||
"vld1.32 {q3}, [r8], %[dst_step]\n"
|
||||
|
||||
"vadd.f32 q0, q0, q1\n"
|
||||
"vld1.32 {q8}, [r7], %[src_step]\n"
|
||||
"vadd.f32 q2, q2, q3\n"
|
||||
|
||||
"vst1.32 {q0}, [r10], %[dst_step]\n"
|
||||
"vst1.32 {q2}, [r10], %[dst_step]\n"
|
||||
|
||||
"vld1.32 {q9}, [r8], %[dst_step]\n"
|
||||
|
||||
"vld1.32 {q10}, [r7], %[src_step]\n"
|
||||
|
||||
"vadd.f32 q8, q8, q9\n"
|
||||
"vld1.32 {q11}, [r8], %[dst_step]\n"
|
||||
"vadd.f32 q10, q10, q11\n"
|
||||
|
||||
"vld1.32 {q0}, [r7], %[src_step]\n"
|
||||
"vst1.32 {q8}, [r10], %[dst_step]\n"
|
||||
"vst1.32 {q10}, [r10], %[dst_step]\n"
|
||||
|
||||
"vld1.32 {q1}, [r8], %[dst_step]\n"
|
||||
|
||||
"vld1.32 {q2}, [r7], %[src_step]\n"
|
||||
"vld1.32 {q3}, [r8], %[dst_step]\n"
|
||||
|
||||
"vadd.f32 q0, q0, q1\n"
|
||||
"vadd.f32 q2, q2, q3\n"
|
||||
|
||||
"vst1.32 {q0}, [r10], %[dst_step]\n"
|
||||
"vst1.32 {q2}, [r10], %[dst_step]\n"
|
||||
|
||||
"vld1.32 {q8}, [r7], %[src_step]\n"
|
||||
"vld1.32 {q9}, [r8], %[dst_step]\n"
|
||||
|
||||
"vld1.32 {q10}, [r7], %[src_step]\n"
|
||||
"vld1.32 {q11}, [r8], %[dst_step]\n"
|
||||
|
||||
"vadd.f32 q8, q8, q9\n"
|
||||
"vadd.f32 q10, q10, q11\n"
|
||||
|
||||
"vst1.32 {q8}, [r10], %[dst_step]\n"
|
||||
"vst1.32 {q10}, [r10], %[dst_step]\n"
|
||||
|
||||
:
|
||||
: [ src_ptr ] "r"(src_ptr), [ dst_ptr ] "r"(dst_ptr), [ src_step ] "r"(src_step), [ dst_step ] "r"(dst_step)
|
||||
: "r8", "r10", "r7", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
void DeConvWgMerge(const float *src, float *dst, size_t src_stride, size_t dst_stride, size_t count) {
|
||||
|
|
|
@ -420,6 +420,7 @@ void RowMajor2Col8Major_arm64(const float *src_c, float *dst_c, size_t col) {
|
|||
}
|
||||
#endif
|
||||
#ifdef ENABLE_ARM32
|
||||
#ifndef SUPPORT_NNIE
|
||||
void RowMajor2Col8Major_arm32(const float *src_c, float *dst_c, size_t col) {
|
||||
size_t stride = col * sizeof(float);
|
||||
asm volatile(
|
||||
|
@ -462,6 +463,50 @@ void RowMajor2Col8Major_arm32(const float *src_c, float *dst_c, size_t col) {
|
|||
: "r10", "r11", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7");
|
||||
return;
|
||||
}
|
||||
#else
|
||||
void RowMajor2Col8Major_arm32(const float *src_c, float *dst_c, size_t col) {
|
||||
size_t stride = col * sizeof(float);
|
||||
asm volatile(
|
||||
"mov r10, %[src_c]\n"
|
||||
"mov r7, %[dst_c]\n"
|
||||
|
||||
"vld1.32 {q0}, [r10], %[stride]\n"
|
||||
"vld1.32 {q2}, [r10], %[stride]\n"
|
||||
"vld1.32 {q4}, [r10], %[stride]\n"
|
||||
"vld1.32 {q6}, [r10], %[stride]\n"
|
||||
|
||||
"vtrn.32 d0, d4\n"
|
||||
"vtrn.32 d1, d5\n"
|
||||
"vtrn.32 d8, d12\n"
|
||||
"vtrn.32 d9, d13\n"
|
||||
|
||||
"vld1.32 {q1}, [r10], %[stride]\n"
|
||||
"vld1.32 {q3}, [r10], %[stride]\n"
|
||||
"vld1.32 {q5}, [r10], %[stride]\n"
|
||||
"vld1.32 {q7}, [r10], %[stride]\n"
|
||||
|
||||
"vswp d1, d8\n"
|
||||
"vswp d5, d12\n"
|
||||
|
||||
"vtrn.32 d2, d6\n"
|
||||
"vtrn.32 d3, d7\n"
|
||||
"vtrn.32 d10, d14\n"
|
||||
"vtrn.32 d11, d15\n"
|
||||
|
||||
"vswp d3, d10\n"
|
||||
"vswp d7, d14\n"
|
||||
|
||||
"vst1.32 {q0, q1}, [r7]!\n"
|
||||
"vst1.32 {q2, q3}, [r7]!\n"
|
||||
"vst1.32 {q4, q5}, [r7]!\n"
|
||||
"vst1.32 {q6, q7}, [r7]!\n"
|
||||
|
||||
:
|
||||
: [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ stride ] "r"(stride)
|
||||
: "r10", "r7", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7");
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
void RowMajor2Col8Major(const float *src_ptr, float *dst_ptr, size_t row, size_t col) {
|
||||
size_t row8 = row / C8NUM * C8NUM;
|
||||
|
|
|
@ -353,6 +353,7 @@ static void RowMajor2Col8MajorStride(const float *src_ptr, float *dst_ptr, size_
|
|||
"v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
|
||||
"v30", "v31");
|
||||
#elif ENABLE_ARM32
|
||||
#ifndef SUPPORT_NNIE
|
||||
/* 8x4 row-major to col-major */
|
||||
size_t stride = col * sizeof(float);
|
||||
asm volatile(
|
||||
|
@ -393,6 +394,48 @@ static void RowMajor2Col8MajorStride(const float *src_ptr, float *dst_ptr, size_
|
|||
:
|
||||
: [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ stride ] "r"(stride)
|
||||
: "r10", "r11", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7");
|
||||
#else
|
||||
/* 8x4 row-major to col-major */
|
||||
size_t stride = col * sizeof(float);
|
||||
asm volatile(
|
||||
"mov r10, %[src_c]\n"
|
||||
"mov r7, %[dst_c]\n"
|
||||
|
||||
"vld1.32 {q0}, [r10], %[stride]\n"
|
||||
"vld1.32 {q2}, [r10], %[stride]\n"
|
||||
"vld1.32 {q4}, [r10], %[stride]\n"
|
||||
"vld1.32 {q6}, [r10], %[stride]\n"
|
||||
|
||||
"vtrn.32 d0, d4\n"
|
||||
"vtrn.32 d1, d5\n"
|
||||
"vtrn.32 d8, d12\n"
|
||||
"vtrn.32 d9, d13\n"
|
||||
|
||||
"vld1.32 {q1}, [r10], %[stride]\n"
|
||||
"vld1.32 {q3}, [r10], %[stride]\n"
|
||||
"vld1.32 {q5}, [r10], %[stride]\n"
|
||||
"vld1.32 {q7}, [r10], %[stride]\n"
|
||||
|
||||
"vswp d1, d8\n"
|
||||
"vswp d5, d12\n"
|
||||
|
||||
"vtrn.32 d2, d6\n"
|
||||
"vtrn.32 d3, d7\n"
|
||||
"vtrn.32 d10, d14\n"
|
||||
"vtrn.32 d11, d15\n"
|
||||
|
||||
"vswp d3, d10\n"
|
||||
"vswp d7, d14\n"
|
||||
|
||||
"vst1.32 {q0, q1}, [r7]!\n"
|
||||
"vst1.32 {q2, q3}, [r7]!\n"
|
||||
"vst1.32 {q4, q5}, [r7]!\n"
|
||||
"vst1.32 {q6, q7}, [r7]!\n"
|
||||
|
||||
:
|
||||
: [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ stride ] "r"(stride)
|
||||
: "r10", "r7", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7");
|
||||
#endif
|
||||
#else
|
||||
for (int tr = 0; tr < 8; tr++) {
|
||||
for (int tc = 0; tc < 4; tc++) {
|
||||
|
|
|
@ -146,7 +146,8 @@ void MaxPoolingGrad(const float *input_ptr, const float *dy_ptr, float *output_p
|
|||
#ifdef ENABLE_ARM
|
||||
uint32x4_t index = {val_idx, val_idx + 1, val_idx + 2, val_idx + 3};
|
||||
float32x4_t in = vld1q_f32(inPtr + val_idx);
|
||||
max_idx = MaxIndex(in, &max_val, index, max_idx);
|
||||
max_idx = vreinterpretq_u32_s32(
|
||||
MaxIndex(in, &max_val, vreinterpretq_s32_u32(index), vreinterpretq_s32_u32(max_idx)));
|
||||
#else
|
||||
float val[4] = {inPtr[val_idx], inPtr[val_idx + 1], inPtr[val_idx + 2], inPtr[val_idx + 3]};
|
||||
for (int i = 0; i < 4; i++) {
|
||||
|
|
|
@ -137,10 +137,10 @@ void RowMajor2Row16x4MajorInt8(int8_t *src_ptr, int8_t *dst_ptr, int row, int co
|
|||
"vld1.8 {q2}, [r0], r2 \n"
|
||||
"vld1.8 {q3}, [r0], r2 \n"
|
||||
|
||||
"vst1.32 q0, [r1], r3 \n"
|
||||
"vst1.32 q1, [r1], r3 \n"
|
||||
"vst1.32 q2, [r1], r3 \n"
|
||||
"vst1.32 q3, [r1], r3 \n"
|
||||
"vst1.32 {d0, d1}, [r1], r3 \n"
|
||||
"vst1.32 {d2, d3}, [r1], r3 \n"
|
||||
"vst1.32 {d4, d5}, [r1], r3 \n"
|
||||
"vst1.32 {d6, d7}, [r1], r3 \n"
|
||||
|
||||
:
|
||||
: [ dst_c ] "r"(dst_c), [ src_c ] "r"(src_c), [ col_offset ] "r"(col_offset)
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "base/base.h"
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace mindspore {
|
||||
|
@ -32,7 +33,7 @@ uint32_t Base::GetTypeId(const char *const type_name) {
|
|||
if (it != t->map.end()) {
|
||||
return it->second;
|
||||
}
|
||||
uint32_t tid = ++(t->type_counter);
|
||||
uint32_t tid = std::hash<std::string>()(type_name);
|
||||
t->map[type_name] = tid;
|
||||
return tid;
|
||||
}
|
||||
|
|
|
@ -588,6 +588,8 @@ inline const PrimitivePtr kPrimIf = std::make_shared<Primitive>("If");
|
|||
inline const PrimitivePtr kPrimAvgPoolFusion = std::make_shared<Primitive>("AvgPoolFusion");
|
||||
inline const PrimitivePtr kPrimMaxPoolFusion = std::make_shared<Primitive>("MaxPoolFusion");
|
||||
inline const PrimitivePtr kPrimActivation = std::make_shared<Primitive>("Activation");
|
||||
inline const PrimitivePtr kPrimPReLUFusion = std::make_shared<Primitive>("PReLUFusion");
|
||||
inline const PrimitivePtr kPrimCustom = std::make_shared<Primitive>("Custom");
|
||||
inline const PrimitivePtr kPrimTopKFusion = std::make_shared<Primitive>("TopKFusion");
|
||||
inline const PrimitivePtr kPrimTileFusion = std::make_shared<Primitive>("TileFusion");
|
||||
inline const PrimitivePtr kPrimReduceFusion = std::make_shared<Primitive>("ReduceFusion");
|
||||
|
|
|
@ -42,6 +42,7 @@ void InterThreadPool::ThreadAsyncRun(Worker *worker) {
|
|||
}
|
||||
|
||||
void InterThreadPool::ActorThreadRun() {
|
||||
#ifndef SUPPORT_NNIE
|
||||
ActorReference actor;
|
||||
{
|
||||
std::unique_lock<std::mutex> _l(actor_mutex_);
|
||||
|
@ -54,6 +55,7 @@ void InterThreadPool::ActorThreadRun() {
|
|||
}
|
||||
actor->Run();
|
||||
finish_cond_var_.notify_one();
|
||||
#endif
|
||||
}
|
||||
|
||||
void InterThreadPool::EnqueReadyActor(const ActorReference &actor) {
|
||||
|
|
|
@ -100,7 +100,7 @@ void ThreadPool::KernelThreadRun(Worker *worker) {
|
|||
int ThreadPool::ParallelLaunch(const Func &func, Content content, int task_num) {
|
||||
// distribute task to the KernelThread and the free ActorThread,
|
||||
// if the task num is greater than the KernelThread num
|
||||
Task task = Task(func, content);
|
||||
Task task = {func, content};
|
||||
Worker *curr = CurrentWorker();
|
||||
if (inter_thread_num_ == thread_num_ || curr == nullptr) {
|
||||
SyncRunTask(&task, task_num);
|
||||
|
|
|
@ -3,7 +3,12 @@ project(Lite)
|
|||
|
||||
set(BUILD_LITE "on")
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.3.0)
|
||||
if(TOOLCHAIN_NAME STREQUAL "himix200")
|
||||
set(TARGET_HIMIX200 on)
|
||||
add_compile_definitions(SUPPORT_NNIE)
|
||||
endif()
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.3.0 AND NOT TARGET_HIMIX200)
|
||||
message(FATAL_ERROR "GCC version ${CMAKE_CXX_COMPILER_VERSION} must not be less than 7.3.0")
|
||||
endif()
|
||||
|
||||
|
@ -22,6 +27,7 @@ option(MSLITE_ENABLE_AVX "enable AVX instruction set, only x86_64 support" off)
|
|||
option(MSLITE_ENABLE_CONVERTER "enable converter, only x86_64 support" on)
|
||||
option(MSLITE_ENABLE_TOOLS "enable tools" on)
|
||||
option(MSLITE_ENABLE_TESTCASES "enable testcase" off)
|
||||
option(MSLITE_ENABLE_NNIE "enable NNIE" off)
|
||||
|
||||
# Option that can be configured through manually
|
||||
option(ENABLE_ARM82_A32 "if build fp16 on platform_arm32" off)
|
||||
|
@ -56,6 +62,9 @@ endif()
|
|||
if(DEFINED ENV{MSLITE_ENABLE_TESTCASES})
|
||||
set(MSLITE_ENABLE_TESTCASES $ENV{MSLITE_ENABLE_TESTCASES})
|
||||
endif()
|
||||
if(DEFINED ENV{MSLITE_ENABLE_NNIE})
|
||||
set(MSLITE_ENABLE_NNIE $ENV{MSLITE_ENABLE_NNIE})
|
||||
endif()
|
||||
|
||||
if(PLATFORM_ARM64 OR PLATFORM_ARM32)
|
||||
set(PLATFORM_ARM "on")
|
||||
|
@ -198,6 +207,9 @@ else()
|
|||
-Wno-deprecated-declarations -Wno-missing-braces ${CMAKE_C_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "-fPIC -fPIE -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -fstack-protector-strong -Wno-attributes \
|
||||
-Wno-deprecated-declarations -Wno-missing-braces -Wno-overloaded-virtual ${CMAKE_CXX_FLAGS}")
|
||||
if(TARGET_HIMIX200)
|
||||
set(CMAKE_CXX_FLAGS "-Wno-error=maybe-uninitialized ${CMAKE_CXX_FLAGS}")
|
||||
endif()
|
||||
if(NOT WIN32)
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_SHARED_LINKER_FLAGS}")
|
||||
set(CMAKE_EXE_LINKER_FLAGS "-Wl,-z,relro,-z,now -Wl,-z,noexecstack ${CMAKE_EXE_LINKER_FLAGS}")
|
||||
|
|
|
@ -14,6 +14,9 @@ if(PLATFORM_ARM32 OR PLATFORM_ARM64)
|
|||
-fdata-sections -ffast-math -fno-rtti -fno-exceptions")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections \
|
||||
-fdata-sections -ffast-math -fno-rtti -fno-exceptions")
|
||||
if(TARGET_HIMIX200)
|
||||
string(REPLACE "-fno-rtti " "" CMAKE_C_FLAGS ${CMAKE_C_FLAGS})
|
||||
endif()
|
||||
endif()
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL "Release" AND APPLE)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fstrict-aliasing -ffunction-sections \
|
||||
|
@ -25,7 +28,10 @@ if(PLATFORM_ARM32 OR PLATFORM_ARM64)
|
|||
endif()
|
||||
endif()
|
||||
|
||||
set(API_SRC
|
||||
if(TARGET_HIMIX200)
|
||||
set(API_SRC)
|
||||
else()
|
||||
set(API_SRC
|
||||
${CORE_DIR}/utils/status.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/cell.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/serialization.cc
|
||||
|
@ -35,7 +41,8 @@ set(API_SRC
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/model/model_impl.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/graph/graph.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/tensor/tensor_impl.cc
|
||||
)
|
||||
)
|
||||
endif()
|
||||
|
||||
if(SUPPORT_NPU)
|
||||
include_directories(${DDK_PATH})
|
||||
|
@ -137,6 +144,16 @@ if(ENABLE_MINDRT)
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/lite_mindrt.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mindrt_executor.cc
|
||||
)
|
||||
elseif(TARGET_HIMIX200)
|
||||
include_directories(${CORE_DIR}/mindrt)
|
||||
include_directories(${CORE_DIR}/mindrt/include)
|
||||
include_directories(${CORE_DIR}/mindrt/src)
|
||||
set(LITE_SRC
|
||||
${LITE_SRC}
|
||||
${CORE_DIR}/mindrt/src/thread/core_affinity.cc
|
||||
${CORE_DIR}/mindrt/src/thread/inter_threadpool.cc
|
||||
${CORE_DIR}/mindrt/src/thread/threadpool.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
add_subdirectory(ops)
|
||||
|
@ -198,7 +215,7 @@ if(SUPPORT_NPU)
|
|||
target_link_libraries(mindspore-lite npu_kernel_mid)
|
||||
target_link_libraries(mindspore-lite_static npu_kernel_mid)
|
||||
endif()
|
||||
if(PLATFORM_ARM32 OR PLATFORM_ARM64)
|
||||
if(PLATFORM_ARM32 OR PLATFORM_ARM64 AND NOT TARGET_HIMIX200)
|
||||
target_link_libraries(mindspore-lite log)
|
||||
target_link_libraries(mindspore-lite_static log)
|
||||
endif()
|
||||
|
@ -221,12 +238,15 @@ if(SUPPORT_TRAIN)
|
|||
target_link_libraries(mindspore-lite-train_static minddata-lite mindspore-lite)
|
||||
endif()
|
||||
|
||||
if("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
|
||||
if(NOT APPLE AND PLATFORM_ARM)
|
||||
set(NDK_STRIP
|
||||
"${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip")
|
||||
if(NOT APPLE AND PLATFORM_ARM AND NOT TARGET_HIMIX200)
|
||||
set(NDK_STRIP
|
||||
"${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip")
|
||||
endif()
|
||||
|
||||
if(NOT APPLE AND "${CMAKE_BUILD_TYPE}" STREQUAL "Release")
|
||||
if(PLATFORM_ARM)
|
||||
add_custom_command(TARGET mindspore-lite POST_BUILD COMMAND ${NDK_STRIP}
|
||||
${CMAKE_BINARY_DIR}/src/libmindspore-lite*.so)
|
||||
${CMAKE_BINARY_DIR}/src/libmindspore-lite*.so)
|
||||
elseif(NOT WIN32)
|
||||
add_custom_command(TARGET mindspore-lite POST_BUILD COMMAND strip ${CMAKE_BINARY_DIR}/src/libmindspore-lite*.so)
|
||||
endif()
|
||||
|
|
|
@ -83,11 +83,9 @@ const char *EnumStrForMsLogLevel(MsLogLevel level) {
|
|||
|
||||
void LogWriter::OutputLog(const std::ostringstream &msg) const {
|
||||
if (IsPrint(log_level_)) {
|
||||
#ifdef ENABLE_ARM
|
||||
#if defined(__ANDROID__) || defined(ANDROID)
|
||||
#if defined(ENABLE_ARM) && (defined(__ANDROID__) || defined(ANDROID))
|
||||
__android_log_print(GetAndroidLogLevel(log_level_), ANDROID_LOG_TAG, "[%s:%d] %s] %s", location_.file_,
|
||||
location_.line_, location_.func_, msg.str().c_str());
|
||||
#endif
|
||||
#else
|
||||
printf("%s [%s:%d] %s] %s\n", EnumStrForMsLogLevel(log_level_), location_.file_, location_.line_, location_.func_,
|
||||
msg.str().c_str());
|
||||
|
|
|
@ -138,7 +138,7 @@ class LiteSession : public session::LiteSession {
|
|||
std::unordered_map<Tensor *, Tensor *> graph_output_map_; /* <calculate-tensor, graph-output-tensor> */
|
||||
Executor *executor_ = nullptr;
|
||||
Model *model_ = nullptr;
|
||||
std::atomic<bool> is_running_ = false;
|
||||
std::atomic<bool> is_running_ = {false};
|
||||
bool is_train_session_ = false;
|
||||
friend class TransferSession;
|
||||
#if SUPPORT_NPU
|
||||
|
|
|
@ -52,7 +52,7 @@ class DefaultAllocator : public Allocator {
|
|||
void UnLock();
|
||||
bool ReuseMemory(size_t free_size, size_t size);
|
||||
struct MemBuf {
|
||||
std::atomic_int ref_count_ = 0;
|
||||
std::atomic_int ref_count_ = {0};
|
||||
size_t size;
|
||||
void *buf;
|
||||
};
|
||||
|
|
|
@ -47,7 +47,7 @@ int NonZeroCPUKernel::Run() {
|
|||
}
|
||||
auto non_zero_nums = out_tensor->shape()[1];
|
||||
int non_zero_count = 0;
|
||||
std::vector coordiate_values(in_tensor->shape().size(), 0);
|
||||
std::vector<int> coordiate_values(in_tensor->shape().size(), 0);
|
||||
for (int i = 0; i < in_tensor->ElementsNum(); i += 1) {
|
||||
if (input_data[i]) {
|
||||
for (size_t j = 0; j < input_dim_size; j++) {
|
||||
|
|
|
@ -145,7 +145,7 @@ class CpuFp32SubGraph : public CpuSubGraph {
|
|||
std::vector<LiteKernel *> nodes, Kernel *kernel)
|
||||
: CpuSubGraph(std::move(in_kernels), std::move(out_kernels), std::move(nodes), kernel) {
|
||||
subgraph_type_ = kCpuFP32SubGraph;
|
||||
static std::atomic_int index = 0;
|
||||
static std::atomic_int index = {0};
|
||||
this->set_name("CpuFP32SubGraph" + std::to_string(index++));
|
||||
desc_.data_type = kNumberTypeFloat32;
|
||||
}
|
||||
|
|
|
@ -217,8 +217,8 @@ class Tensor : public mindspore::tensor::MSTensor {
|
|||
std::vector<int> shape_;
|
||||
schema::Format format_;
|
||||
Category category_;
|
||||
std::atomic_int ref_count_ = 0;
|
||||
int init_ref_count_ = 0;
|
||||
std::atomic_int ref_count_ = {0};
|
||||
size_t init_ref_count_ = 0;
|
||||
std::vector<QuantArg> quant_params_;
|
||||
std::vector<float> quant_clusters_;
|
||||
mindspore::Allocator *allocator_ = nullptr;
|
||||
|
|
|
@ -12,7 +12,7 @@ add_executable(benchmark
|
|||
|
||||
add_dependencies(benchmark fbs_src)
|
||||
|
||||
if(PLATFORM_ARM32 OR PLATFORM_ARM64)
|
||||
if(PLATFORM_ARM32 OR PLATFORM_ARM64 AND NOT TARGET_HIMIX200)
|
||||
if(SUPPORT_NPU AND ANDROID_STL STREQUAL "c++_static")
|
||||
target_link_libraries(benchmark mindspore-lite mindspore::json c++_shared)
|
||||
else()
|
||||
|
|
|
@ -123,6 +123,8 @@ set(LITE_SRC
|
|||
${SRC_DIR}/tensor.cc
|
||||
${SRC_DIR}/ms_tensor.cc
|
||||
${SRC_DIR}/tensorlist.cc
|
||||
${SRC_DIR}/registry/kernel_interface_registry.cc
|
||||
${SRC_DIR}/registry/kernel_interface.cc
|
||||
${SRC_DIR}/kernel_registry.cc
|
||||
${SRC_DIR}/inner_kernel.cc
|
||||
${SRC_DIR}/lite_kernel.cc
|
||||
|
|
|
@ -45,7 +45,10 @@ Flags::Flags() {
|
|||
AddFlag(&Flags::bitNumIn, "bitNum", "Weight quantization bitNum", "8");
|
||||
AddFlag(&Flags::quantWeightSizeStr, "quantWeightSize", "Weight quantization size threshold", "0");
|
||||
AddFlag(&Flags::quantWeightChannelStr, "quantWeightChannel", "Channel threshold for weight quantization", "16");
|
||||
AddFlag(&Flags::configFile, "configFile", "Configuration for post-training, offline split op to parallel", "");
|
||||
AddFlag(&Flags::configFile, "configFile",
|
||||
"Configuration for post-training, offline split op to parallel,"
|
||||
"and converter for nnie",
|
||||
"");
|
||||
AddFlag(&Flags::trainModelIn, "trainModel",
|
||||
"whether the model is going to be trained on device. "
|
||||
"true | false",
|
||||
|
|
|
@ -56,6 +56,7 @@
|
|||
#include "ops/op_utils.h"
|
||||
#include "ops/quant_dtype_cast.h"
|
||||
#include "ops/resize.h"
|
||||
#include "ops/roi_pooling.h"
|
||||
#include "ops/sgd.h"
|
||||
#include "ops/space_to_batch.h"
|
||||
#include "ops/space_to_batch_nd.h"
|
||||
|
@ -88,6 +89,7 @@ static const std::unordered_map<std::string, std::vector<size_t>> NHWCOpMap = {
|
|||
{ops::kNamePReLUFusion, {1}},
|
||||
{ops::kNameResize, {1}},
|
||||
{ops::kNameResizeGrad, {}},
|
||||
{ops::kNameROIPooling, {1}},
|
||||
{ops::kNameSGD, {2}},
|
||||
{ops::kNameSpaceToBatch, {1}},
|
||||
{ops::kNameSpaceToBatchND, {1}},
|
||||
|
|
|
@ -132,45 +132,49 @@ STATUS NodeInferShape::InferShape(const CNodePtr &cnode) {
|
|||
fbb.Clear();
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
auto parameter_gen = lite::PopulateRegistry::GetInstance()->GetParameterCreator(prim->value_type(), lite::SCHEMA_CUR);
|
||||
if (parameter_gen == nullptr) {
|
||||
MS_LOG(ERROR) << "PopulateParameter return nullptr, type: " << schema::EnumNamePrimitiveType(prim->value_type());
|
||||
FreeTensors(&inputs);
|
||||
FreeTensors(&outputs);
|
||||
fbb.Clear();
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
auto parameter = parameter_gen(prim);
|
||||
if (parameter == nullptr) {
|
||||
MS_LOG(ERROR) << "parameter is nullptr.";
|
||||
FreeTensors(&inputs);
|
||||
FreeTensors(&outputs);
|
||||
fbb.Clear();
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
RectifyFormat(cnode, inputs, fmk_type_);
|
||||
auto status = KernelInferShape(inputs, outputs, parameter);
|
||||
if (status == lite::RET_OK) {
|
||||
anf_prim->AddAttr(kInferDone, MakeValue<bool>(true));
|
||||
}
|
||||
if (status == lite::RET_OK || status == lite::RET_INFER_INVALID) {
|
||||
auto set_status = SetCNodeAbstract(cnode, outputs, status);
|
||||
if (set_status != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "set CNode abstract failed: " << cnode->fullname_with_scope();
|
||||
auto ret = KernelInferShape(inputs, outputs, prim, {});
|
||||
if (ret == lite::RET_NOT_SUPPORT) {
|
||||
auto parameter_gen =
|
||||
lite::PopulateRegistry::GetInstance()->GetParameterCreator(prim->value_type(), lite::SCHEMA_CUR);
|
||||
if (parameter_gen == nullptr) {
|
||||
MS_LOG(ERROR) << "PopulateParameter return nullptr, type: " << schema::EnumNamePrimitiveType(prim->value_type());
|
||||
FreeTensors(&inputs);
|
||||
FreeTensors(&outputs);
|
||||
free(parameter);
|
||||
fbb.Clear();
|
||||
return set_status;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
} else {
|
||||
MS_LOG(ERROR) << "infer shape failed.";
|
||||
auto parameter = parameter_gen(prim);
|
||||
if (parameter == nullptr) {
|
||||
MS_LOG(ERROR) << "parameter is nullptr.";
|
||||
FreeTensors(&inputs);
|
||||
FreeTensors(&outputs);
|
||||
fbb.Clear();
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
RectifyFormat(cnode, inputs, fmk_type_);
|
||||
ret = KernelInferShape(inputs, outputs, parameter);
|
||||
if (ret == lite::RET_OK) {
|
||||
anf_prim->AddAttr(kInferDone, MakeValue<bool>(true));
|
||||
}
|
||||
if (ret == lite::RET_OK || ret == lite::RET_INFER_INVALID) {
|
||||
auto set_status = SetCNodeAbstract(cnode, outputs, ret);
|
||||
if (set_status != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "set CNode abstract failed: " << cnode->fullname_with_scope();
|
||||
FreeTensors(&inputs);
|
||||
FreeTensors(&outputs);
|
||||
free(parameter);
|
||||
fbb.Clear();
|
||||
return set_status;
|
||||
}
|
||||
} else {
|
||||
MS_LOG(ERROR) << "infer shape failed.";
|
||||
}
|
||||
FreeTensors(&inputs);
|
||||
FreeTensors(&outputs);
|
||||
free(parameter);
|
||||
}
|
||||
FreeTensors(&inputs);
|
||||
FreeTensors(&outputs);
|
||||
free(parameter);
|
||||
fbb.Clear();
|
||||
return status;
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::vector<int> NodeInferShape::GetInputShape(const CNodePtr &cnode, size_t index) {
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue