From a19e6251bc7d77014ce73e530c52a8f490828a39 Mon Sep 17 00:00:00 2001 From: ling Date: Wed, 16 Sep 2020 12:09:23 +0800 Subject: [PATCH] [MSLITE][Develop] modify optimize.so to sdot and fp16 so --- cmake/package_lite.cmake | 3 +- mindspore/lite/nnacl/CMakeLists.txt | 65 +++++++++---------- .../assembly/{opt => fp16}/ConvDwFp16Border.S | 0 .../assembly/{opt => fp16}/ConvDwFp16Center.S | 0 .../assembly/{opt => fp16}/ConvDwFp16Row.S | 0 .../{opt => fp16}/DeconvDwFp16Border.S | 0 .../{opt => fp16}/DeconvDwFp16Center.S | 0 .../assembly/{opt => fp16}/Float16ToFloat32.S | 0 .../assembly/{opt => fp16}/Float32ToFloat16.S | 0 .../{opt => fp16}/IndirectGemmFp16_16x8.S | 0 .../nnacl/assembly/{opt => fp16}/MatmulFp16.S | 0 .../{opt => fp16}/PostFuncBiasReluC8Fp16.S | 0 mindspore/lite/nnacl/optimize/CMakeLists.txt | 28 ++++++++ mindspore/lite/nnacl/optimized_kernel.h | 50 ++++++++++++-- mindspore/lite/src/CMakeLists.txt | 48 +++++++++----- mindspore/lite/src/kernel_registry.cc | 6 ++ .../src/runtime/kernel/arm/CMakeLists.txt | 46 +++++-------- .../kernel/arm/int8/opt_op_handler.cc} | 8 +-- mindspore/lite/test/run_benchmark_nets.sh | 3 +- mindspore/lite/tools/converter/CMakeLists.txt | 1 - 20 files changed, 165 insertions(+), 93 deletions(-) rename mindspore/lite/nnacl/assembly/{opt => fp16}/ConvDwFp16Border.S (100%) rename mindspore/lite/nnacl/assembly/{opt => fp16}/ConvDwFp16Center.S (100%) rename mindspore/lite/nnacl/assembly/{opt => fp16}/ConvDwFp16Row.S (100%) rename mindspore/lite/nnacl/assembly/{opt => fp16}/DeconvDwFp16Border.S (100%) rename mindspore/lite/nnacl/assembly/{opt => fp16}/DeconvDwFp16Center.S (100%) rename mindspore/lite/nnacl/assembly/{opt => fp16}/Float16ToFloat32.S (100%) rename mindspore/lite/nnacl/assembly/{opt => fp16}/Float32ToFloat16.S (100%) rename mindspore/lite/nnacl/assembly/{opt => fp16}/IndirectGemmFp16_16x8.S (100%) rename mindspore/lite/nnacl/assembly/{opt => fp16}/MatmulFp16.S (100%) rename mindspore/lite/nnacl/assembly/{opt => fp16}/PostFuncBiasReluC8Fp16.S (100%) create mode 100644 mindspore/lite/nnacl/optimize/CMakeLists.txt rename mindspore/lite/{nnacl/opt_op_handler.c => src/runtime/kernel/arm/int8/opt_op_handler.cc} (100%) diff --git a/cmake/package_lite.cmake b/cmake/package_lite.cmake index 7cfa003af6f..56ce91a28fa 100644 --- a/cmake/package_lite.cmake +++ b/cmake/package_lite.cmake @@ -58,7 +58,8 @@ if (PLATFORM_ARM64) install(FILES ${TOP_DIR}/mindspore/core/ir/dtype/type_id.h DESTINATION ${INC_DIR}/ir/dtype COMPONENT ${COMPONENT_NAME}) install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${INC_DIR} COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h") install(DIRECTORY ${TOP_DIR}/mindspore/lite/schema/ DESTINATION ${INC_DIR}/schema COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "inner" EXCLUDE) - install(FILES ${TOP_DIR}/mindspore/lite/build/nnacl/liboptimize.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME}) + install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite-optimize.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME}) + install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite-fp16.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME}) install(DIRECTORY ${TOP_DIR}/third_party/flatbuffers/include DESTINATION ${FLATBF_DIR} COMPONENT ${COMPONENT_NAME}) elseif (PLATFORM_ARM32) install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME}) diff --git a/mindspore/lite/nnacl/CMakeLists.txt b/mindspore/lite/nnacl/CMakeLists.txt index 2c8baf93ba1..93d11c3e344 100644 --- a/mindspore/lite/nnacl/CMakeLists.txt +++ b/mindspore/lite/nnacl/CMakeLists.txt @@ -1,45 +1,42 @@ project(nnacl) set(NNACL_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -set(TOP_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..) include_directories(NNACL_DIR) -########################### optimized files ########################### -file(GLOB OPTIMIZED_ASSEMBLY - ${NNACL_DIR}/assembly/opt/*.s - ${NNACL_DIR}/assembly/opt/*.S - ) +if (PLATFORM_ARM32 OR PLATFORM_ARM64) + if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math") + endif() +endif () -file(GLOB FP16_SRC - ${NNACL_DIR}/fp16/*.c - ${TOP_DIR}/src/runtime/kernel/arm/fp16/*.cc - ) +########################### files ########################### +file(GLOB KERNEL_SRC + ${NNACL_DIR}/*.c + ${NNACL_DIR}/fp32/*.c + ${NNACL_DIR}/int8/*.c + ${NNACL_DIR}/quantization/*.c +) -########################### share library build ######################## -set(OPTIMIZED_OPS ${NNACL_DIR}/opt_op_handler.c) - -set_property(SOURCE ${OPTIMIZED_ASSEMBLY} PROPERTY LANGUAGE C) -list(APPEND OPTIMIZED_OPS ${OPTIMIZED_ASSEMBLY} ${FP16_SRC}) +if (SUPPORT_TRAIN) + file (GLOB TRAIN_SRC ${NNACL_DIR}/fp32_grad/*.c) +endif() if (PLATFORM_ARM64) - string(REPLACE "-fvisibility=hidden" "-fvisibility=default" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+dotprod+fp16") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16") - add_library(optimize SHARED ${OPTIMIZED_OPS}) - target_link_libraries( - optimize - mindspore-lite - ) - set_target_properties(optimize PROPERTIES CLEAN_DIRECT_OUTPUT 1) + file(GLOB ASSEMBLY_SRC ${NNACL_DIR}/assembly/arm64/*.S) + set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C) +endif() - if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release") - add_custom_command(TARGET optimize POST_BUILD - COMMAND ${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip - ${TOP_DIR}/build/nnacl/liboptimize.so) - endif () +if (PLATFORM_ARM32) + file(GLOB ASSEMBLY_SRC ${NNACL_DIR}/assembly/arm32/*.S) + set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C) +endif() - add_custom_command(TARGET optimize POST_BUILD - COMMAND rm -rf ${TOP_DIR}/output/lib/liboptimize.so - COMMAND mkdir -pv ${TOP_DIR}/output/lib - COMMAND cp ${TOP_DIR}/build/nnacl/liboptimize.so ${TOP_DIR}/output/lib) -endif () +########################### build nnacl static library ######################## +string(REPLACE "-fvisibility=hidden" "-fvisibility=default" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") +add_library(nnacl STATIC ${KERNEL_SRC} ${TRAIN_SRC} ${ASSEMBLY_SRC}) + +########################### arm64 build optimize library ######################## +if (PLATFORM_ARM64) + add_subdirectory(${NNACL_DIR}/optimize) +endif() \ No newline at end of file diff --git a/mindspore/lite/nnacl/assembly/opt/ConvDwFp16Border.S b/mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Border.S similarity index 100% rename from mindspore/lite/nnacl/assembly/opt/ConvDwFp16Border.S rename to mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Border.S diff --git a/mindspore/lite/nnacl/assembly/opt/ConvDwFp16Center.S b/mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Center.S similarity index 100% rename from mindspore/lite/nnacl/assembly/opt/ConvDwFp16Center.S rename to mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Center.S diff --git a/mindspore/lite/nnacl/assembly/opt/ConvDwFp16Row.S b/mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Row.S similarity index 100% rename from mindspore/lite/nnacl/assembly/opt/ConvDwFp16Row.S rename to mindspore/lite/nnacl/assembly/fp16/ConvDwFp16Row.S diff --git a/mindspore/lite/nnacl/assembly/opt/DeconvDwFp16Border.S b/mindspore/lite/nnacl/assembly/fp16/DeconvDwFp16Border.S similarity index 100% rename from mindspore/lite/nnacl/assembly/opt/DeconvDwFp16Border.S rename to mindspore/lite/nnacl/assembly/fp16/DeconvDwFp16Border.S diff --git a/mindspore/lite/nnacl/assembly/opt/DeconvDwFp16Center.S b/mindspore/lite/nnacl/assembly/fp16/DeconvDwFp16Center.S similarity index 100% rename from mindspore/lite/nnacl/assembly/opt/DeconvDwFp16Center.S rename to mindspore/lite/nnacl/assembly/fp16/DeconvDwFp16Center.S diff --git a/mindspore/lite/nnacl/assembly/opt/Float16ToFloat32.S b/mindspore/lite/nnacl/assembly/fp16/Float16ToFloat32.S similarity index 100% rename from mindspore/lite/nnacl/assembly/opt/Float16ToFloat32.S rename to mindspore/lite/nnacl/assembly/fp16/Float16ToFloat32.S diff --git a/mindspore/lite/nnacl/assembly/opt/Float32ToFloat16.S b/mindspore/lite/nnacl/assembly/fp16/Float32ToFloat16.S similarity index 100% rename from mindspore/lite/nnacl/assembly/opt/Float32ToFloat16.S rename to mindspore/lite/nnacl/assembly/fp16/Float32ToFloat16.S diff --git a/mindspore/lite/nnacl/assembly/opt/IndirectGemmFp16_16x8.S b/mindspore/lite/nnacl/assembly/fp16/IndirectGemmFp16_16x8.S similarity index 100% rename from mindspore/lite/nnacl/assembly/opt/IndirectGemmFp16_16x8.S rename to mindspore/lite/nnacl/assembly/fp16/IndirectGemmFp16_16x8.S diff --git a/mindspore/lite/nnacl/assembly/opt/MatmulFp16.S b/mindspore/lite/nnacl/assembly/fp16/MatmulFp16.S similarity index 100% rename from mindspore/lite/nnacl/assembly/opt/MatmulFp16.S rename to mindspore/lite/nnacl/assembly/fp16/MatmulFp16.S diff --git a/mindspore/lite/nnacl/assembly/opt/PostFuncBiasReluC8Fp16.S b/mindspore/lite/nnacl/assembly/fp16/PostFuncBiasReluC8Fp16.S similarity index 100% rename from mindspore/lite/nnacl/assembly/opt/PostFuncBiasReluC8Fp16.S rename to mindspore/lite/nnacl/assembly/fp16/PostFuncBiasReluC8Fp16.S diff --git a/mindspore/lite/nnacl/optimize/CMakeLists.txt b/mindspore/lite/nnacl/optimize/CMakeLists.txt new file mode 100644 index 00000000000..e6acd66df74 --- /dev/null +++ b/mindspore/lite/nnacl/optimize/CMakeLists.txt @@ -0,0 +1,28 @@ +project(optimize) + +set(NNACL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..) +include_directories(NNACL_DIR) + +########################### optimized files ########################### +file(GLOB SDOT_SRC ${NNACL_DIR}/assembly/opt/*.S) +file(GLOB FP16_C_SRC ${NNACL_DIR}/fp16/*.c) +file(GLOB FP16_NEON_SRC ${NNACL_DIR}/assembly/fp16/*.S) + +set_property(SOURCE ${SDOT_SRC} PROPERTY LANGUAGE C) +set_property(SOURCE ${FP16_C_SRC} PROPERTY LANGUAGE C) +set_property(SOURCE ${FP16_NEON_SRC} PROPERTY LANGUAGE C) + +########################### share library build ######################## +list(APPEND SDOT_FILES ${SDOT_SRC}) +list(APPEND FP16_FILES ${FP16_C_SRC}) +list(APPEND FP16_FILES ${FP16_NEON_SRC}) + +string(REPLACE "-fvisibility=hidden" "-fvisibility=default" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+dotprod+fp16") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16") + +add_library(nnacl_optimize STATIC ${SDOT_FILES}) +target_link_libraries(nnacl_optimize mindspore-lite) + +add_library(nnacl_fp16 STATIC ${FP16_FILES}) +target_link_libraries(nnacl_fp16 mindspore-lite) diff --git a/mindspore/lite/nnacl/optimized_kernel.h b/mindspore/lite/nnacl/optimized_kernel.h index 216005f4d86..bbe83437573 100644 --- a/mindspore/lite/nnacl/optimized_kernel.h +++ b/mindspore/lite/nnacl/optimized_kernel.h @@ -24,14 +24,15 @@ #include #include "nnacl/nnacl_utils.h" #endif +#include "utils/log_adapter.h" -#define OPTIMIZE_SHARED_LIBRARY_PATH "liboptimize.so" +#define OPTIMIZE_SHARED_LIBRARY_PATH "libmindspore-lite-optimize.so" +#define FLOAT16_SHARED_LIBRARY_PATH "libmindspore-lite-fp16.so" class OptimizeModule { public: OptimizeModule() { bool support_optimize_ops = false; - bool support_fp16 = false; #ifdef __ANDROID__ int hwcap_type = 16; uint32_t hwcap = getHwCap(hwcap_type); @@ -40,8 +41,7 @@ class OptimizeModule { #elif defined(__arm__) if (hwcap & HWCAP_HALF) { #endif - MS_LOG(INFO) << "Hw cap support FP16, hwcap: 0x" << hwcap; - support_fp16 = true; + #ifdef ENABLE_ARM64 } #elif defined(__arm__) @@ -57,7 +57,7 @@ class OptimizeModule { } #endif #endif - if (!(support_optimize_ops && support_fp16)) { + if (support_optimize_ops == false) { return; } #ifndef _WIN32 @@ -77,4 +77,44 @@ class OptimizeModule { void *optimized_op_handler_ = nullptr; }; +class Float16Module { + public: + Float16Module() { + bool support_fp16 = false; +#ifdef __ANDROID__ + int hwcap_type = 16; + uint32_t hwcap = getHwCap(hwcap_type); +#ifdef ENABLE_ARM64 + if (hwcap & HWCAP_FPHP) { +#elif defined(__arm__) + if (hwcap & HWCAP_HALF) { +#endif + MS_LOG(INFO) << "Hw cap support FP16, hwcap: 0x" << hwcap; + support_fp16 = true; +#ifdef ENABLE_ARM64 + } +#elif defined(__arm__) + } +#endif +#endif + if (support_fp16 == false) { + return; + } +#ifndef _WIN32 + float16_op_handler_ = dlopen(FLOAT16_SHARED_LIBRARY_PATH, RTLD_LAZY); + if (float16_op_handler_ == nullptr) { + MS_LOG(INFO) << "Open optimize shared library failed: " << dlerror(); + } +#endif + } + + ~Float16Module() = default; + + static Float16Module *GetInstance() { + static Float16Module fp16_module; + return &fp16_module; + } + void *float16_op_handler_ = nullptr; +}; + #endif // MINDSPORE_LITE_NNACL_OPTIMIZED_KERNEL_H_ diff --git a/mindspore/lite/src/CMakeLists.txt b/mindspore/lite/src/CMakeLists.txt index e511cbec73b..88e57da86f2 100644 --- a/mindspore/lite/src/CMakeLists.txt +++ b/mindspore/lite/src/CMakeLists.txt @@ -1,7 +1,12 @@ + +set(LITE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..) +include_directories(${LITE_DIR}/nnacl/) +include_directories(${LITE_DIR}/nnacl/optimize) + if (PLATFORM_ARM32 OR PLATFORM_ARM64) - # for performance +#for performance if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release") - #-fno-rtti -fno-exceptions +#- fno - rtti - fno - exceptions set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math") endif() @@ -65,21 +70,11 @@ set_target_properties(mindspore-lite_static PROPERTIES CLEAN_DIRECT_OUTPUT 1) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-private-field") if (SUPPORT_GPU) add_subdirectory(runtime/kernel/opencl) - target_link_libraries(mindspore-lite - cpu_kernel_mid - opencl_kernel_mid - ) - target_link_libraries(mindspore-lite_static - cpu_kernel_mid - opencl_kernel_mid - ) + target_link_libraries(mindspore-lite cpu_kernel_mid opencl_kernel_mid nnacl) + target_link_libraries(mindspore-lite_static cpu_kernel_mid opencl_kernel_mid nnacl) else () - target_link_libraries(mindspore-lite - cpu_kernel_mid - ) - target_link_libraries(mindspore-lite_static - cpu_kernel_mid - ) + target_link_libraries(mindspore-lite cpu_kernel_mid nnacl) + target_link_libraries(mindspore-lite_static cpu_kernel_mid nnacl) endif () if (PLATFORM_ARM32 OR PLATFORM_ARM64) target_link_libraries(mindspore-lite log) @@ -106,3 +101,24 @@ if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release") endif () endif () +########################## build optimize and float16 library #################################3 +if (PLATFORM_ARM64) + add_library(mindspore-lite-optimize SHARED) + target_link_libraries(mindspore-lite-optimize cpu_opt_kernel_mid) + target_link_libraries(mindspore-lite-optimize nnacl_optimize) + + add_library(mindspore-lite-fp16 SHARED) + target_link_libraries(mindspore-lite-fp16 cpu_fp16_kernel_mid) + target_link_libraries(mindspore-lite-fp16 nnacl_fp16) +endif () + +if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release" AND (PLATFORM_ARM64)) + add_custom_command(TARGET mindspore-lite-optimize POST_BUILD COMMAND + ${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip + ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite-optimize.so) + + add_custom_command(TARGET mindspore-lite-fp16 POST_BUILD COMMAND + ${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip + ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite-fp16.so) +endif () + diff --git a/mindspore/lite/src/kernel_registry.cc b/mindspore/lite/src/kernel_registry.cc index 994867b0768..1207680fb0b 100644 --- a/mindspore/lite/src/kernel_registry.cc +++ b/mindspore/lite/src/kernel_registry.cc @@ -43,6 +43,12 @@ int KernelRegistry::Init() { } else { MS_LOG(INFO) << "load optimize lib failed."; } + void *float16_op_handler = Float16Module::GetInstance()->float16_op_handler_; + if (float16_op_handler != nullptr) { + MS_LOG(INFO) << "load float16 lib success."; + } else { + MS_LOG(INFO) << "load float16 lib failed."; + } #endif return RET_OK; } diff --git a/mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt b/mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt index 5a136127e83..a108bce633b 100644 --- a/mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt +++ b/mindspore/lite/src/runtime/kernel/arm/CMakeLists.txt @@ -1,39 +1,23 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/) file(GLOB KERNEL_SRC - ${CMAKE_CURRENT_SOURCE_DIR}/base/*.cc - ${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/*.c - ${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/fp32/*.c - ${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/int8/*.c - ${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/quantization/*.c - ${CMAKE_CURRENT_SOURCE_DIR}/fp32/*.cc - ${CMAKE_CURRENT_SOURCE_DIR}/int8/*.cc - ) -list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/opt_op_handler.c) + ${CMAKE_CURRENT_SOURCE_DIR}/base/*.cc + ${CMAKE_CURRENT_SOURCE_DIR}/fp32/*.cc + ${CMAKE_CURRENT_SOURCE_DIR}/int8/*.cc + ) +list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc) if (SUPPORT_TRAIN) -file (GLOB TRAIN_KERNEL_SRC - ${CMAKE_CURRENT_SOURCE_DIR}/fp32_grad/*.cc - ${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/fp32_grad/*.c - ) - set(KERNEL_SRC ${KERNEL_SRC} ${TRAIN_KERNEL_SRC}) -endif() - -if (PLATFORM_ARM64) - # assembly - file(GLOB ASSEMBLY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/assembly/arm64/*.s - ${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/assembly/arm64/*.S) - set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C) - set(KERNEL_SRC ${KERNEL_SRC} ${ASSEMBLY_SRC}) -endif() - -if (PLATFORM_ARM32) - # assembly - file(GLOB ASSEMBLY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/assembly/arm32/*.s - ${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/assembly/arm32/*.S - ) - set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C) - set(KERNEL_SRC ${KERNEL_SRC} ${ASSEMBLY_SRC}) +file (GLOB TRAIN_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp32_grad/*.cc) +set(KERNEL_SRC ${KERNEL_SRC} ${TRAIN_KERNEL_SRC}) endif() add_library(cpu_kernel_mid OBJECT ${KERNEL_SRC}) + +if (PLATFORM_ARM64) + file(GLOB FP16_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp16/*.cc) + add_library(cpu_fp16_kernel_mid OBJECT ${FP16_KERNEL_SRC}) + file(GLOB OPT_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc) + add_library(cpu_opt_kernel_mid OBJECT ${OPT_KERNEL_SRC}) +endif () + diff --git a/mindspore/lite/nnacl/opt_op_handler.c b/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.cc similarity index 100% rename from mindspore/lite/nnacl/opt_op_handler.c rename to mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.cc index 7a16c16990f..9ee46d6c731 100644 --- a/mindspore/lite/nnacl/opt_op_handler.c +++ b/mindspore/lite/src/runtime/kernel/arm/int8/opt_op_handler.cc @@ -34,10 +34,6 @@ extern void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, in int *multiplier, int *left_shift, int *right_shift, int row, int col, int stride, size_t peroc); -#ifdef __cplusplus -} -#endif - #ifdef ENABLE_ARM64 void IndirectGemmInt8_optimize_handler(int8_t *dst, const int8_t *src, const int8_t *weight, const int32_t *bias, size_t ksize, size_t ic4, size_t output_channel, size_t offset, @@ -61,3 +57,7 @@ void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst, output_zp, multiplier, left_shift, right_shift, row, col, stride, per_channel); } #endif + +#ifdef __cplusplus +} +#endif diff --git a/mindspore/lite/test/run_benchmark_nets.sh b/mindspore/lite/test/run_benchmark_nets.sh index 437490de385..0b1377c2316 100644 --- a/mindspore/lite/test/run_benchmark_nets.sh +++ b/mindspore/lite/test/run_benchmark_nets.sh @@ -293,7 +293,8 @@ function Run_arm64() { fi cp -a ${arm64_path}/mindspore-lite-${version}-runtime-arm64-${process_unit_arm64}/lib/libmindspore-lite.so ${benchmark_test_path}/libmindspore-lite.so || exit 1 - cp -a ${arm64_path}/mindspore-lite-${version}-runtime-arm64-${process_unit_arm64}/lib/liboptimize.so ${benchmark_test_path}/liboptimize.so || exit 1 + cp -a ${arm64_path}/mindspore-lite-${version}-runtime-arm64-${process_unit_arm64}/lib/libmindspore-lite-fp16.so ${benchmark_test_path}/libmindspore-lite-fp16.so || exit 1 + cp -a ${arm64_path}/mindspore-lite-${version}-runtime-arm64-${process_unit_arm64}/lib/libmindspore-lite-optimize.so ${benchmark_test_path}/libmindspore-lite-optimize.so || exit 1 cp -a ${arm64_path}/mindspore-lite-${version}-runtime-arm64-${process_unit_arm64}/benchmark/benchmark ${benchmark_test_path}/benchmark || exit 1 # adb push all needed files to the phone diff --git a/mindspore/lite/tools/converter/CMakeLists.txt b/mindspore/lite/tools/converter/CMakeLists.txt index 1de0a3896ab..f2c19036184 100644 --- a/mindspore/lite/tools/converter/CMakeLists.txt +++ b/mindspore/lite/tools/converter/CMakeLists.txt @@ -106,7 +106,6 @@ file(GLOB KERNEL_SRC ${ARM_DIR}/fp32/*.cc ${ARM_DIR}/int8/*.cc ) -list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/../../nnacl/opt_op_handler.c) if (PLATFORM_ARM64) # assembly