forked from mindspore-Ecosystem/mindspore
!6324 [MSLITE][Develop] modify optimize.so to sdot and fp16 so
Merge pull request !6324 from ling/bug
This commit is contained in:
commit
43770c9fda
cmake
mindspore/lite
|
@ -58,7 +58,8 @@ if (PLATFORM_ARM64)
|
|||
install(FILES ${TOP_DIR}/mindspore/core/ir/dtype/type_id.h DESTINATION ${INC_DIR}/ir/dtype COMPONENT ${COMPONENT_NAME})
|
||||
install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${INC_DIR} COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
|
||||
install(DIRECTORY ${TOP_DIR}/mindspore/lite/schema/ DESTINATION ${INC_DIR}/schema COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "inner" EXCLUDE)
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/build/nnacl/liboptimize.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite-optimize.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite-fp16.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})
|
||||
install(DIRECTORY ${TOP_DIR}/third_party/flatbuffers/include DESTINATION ${FLATBF_DIR} COMPONENT ${COMPONENT_NAME})
|
||||
elseif (PLATFORM_ARM32)
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})
|
||||
|
|
|
@ -1,45 +1,42 @@
|
|||
project(nnacl)
|
||||
|
||||
set(NNACL_DIR ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
set(TOP_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..)
|
||||
include_directories(NNACL_DIR)
|
||||
|
||||
########################### optimized files ###########################
|
||||
file(GLOB OPTIMIZED_ASSEMBLY
|
||||
${NNACL_DIR}/assembly/opt/*.s
|
||||
${NNACL_DIR}/assembly/opt/*.S
|
||||
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
|
||||
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math")
|
||||
endif()
|
||||
endif ()
|
||||
|
||||
########################### files ###########################
|
||||
file(GLOB KERNEL_SRC
|
||||
${NNACL_DIR}/*.c
|
||||
${NNACL_DIR}/fp32/*.c
|
||||
${NNACL_DIR}/int8/*.c
|
||||
${NNACL_DIR}/quantization/*.c
|
||||
)
|
||||
|
||||
file(GLOB FP16_SRC
|
||||
${NNACL_DIR}/fp16/*.c
|
||||
${TOP_DIR}/src/runtime/kernel/arm/fp16/*.cc
|
||||
)
|
||||
|
||||
########################### share library build ########################
|
||||
set(OPTIMIZED_OPS ${NNACL_DIR}/opt_op_handler.c)
|
||||
|
||||
set_property(SOURCE ${OPTIMIZED_ASSEMBLY} PROPERTY LANGUAGE C)
|
||||
list(APPEND OPTIMIZED_OPS ${OPTIMIZED_ASSEMBLY} ${FP16_SRC})
|
||||
if (SUPPORT_TRAIN)
|
||||
file (GLOB TRAIN_SRC ${NNACL_DIR}/fp32_grad/*.c)
|
||||
endif()
|
||||
|
||||
if (PLATFORM_ARM64)
|
||||
file(GLOB ASSEMBLY_SRC ${NNACL_DIR}/assembly/arm64/*.S)
|
||||
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
|
||||
endif()
|
||||
|
||||
if (PLATFORM_ARM32)
|
||||
file(GLOB ASSEMBLY_SRC ${NNACL_DIR}/assembly/arm32/*.S)
|
||||
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
|
||||
endif()
|
||||
|
||||
########################### build nnacl static library ########################
|
||||
string(REPLACE "-fvisibility=hidden" "-fvisibility=default" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+dotprod+fp16")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16")
|
||||
add_library(optimize SHARED ${OPTIMIZED_OPS})
|
||||
target_link_libraries(
|
||||
optimize
|
||||
mindspore-lite
|
||||
)
|
||||
set_target_properties(optimize PROPERTIES CLEAN_DIRECT_OUTPUT 1)
|
||||
add_library(nnacl STATIC ${KERNEL_SRC} ${TRAIN_SRC} ${ASSEMBLY_SRC})
|
||||
|
||||
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
|
||||
add_custom_command(TARGET optimize POST_BUILD
|
||||
COMMAND ${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip
|
||||
${TOP_DIR}/build/nnacl/liboptimize.so)
|
||||
endif ()
|
||||
|
||||
add_custom_command(TARGET optimize POST_BUILD
|
||||
COMMAND rm -rf ${TOP_DIR}/output/lib/liboptimize.so
|
||||
COMMAND mkdir -pv ${TOP_DIR}/output/lib
|
||||
COMMAND cp ${TOP_DIR}/build/nnacl/liboptimize.so ${TOP_DIR}/output/lib)
|
||||
########################### arm64 build optimize library ########################
|
||||
if (PLATFORM_ARM64)
|
||||
add_subdirectory(${NNACL_DIR}/optimize)
|
||||
endif()
|
|
@ -0,0 +1,28 @@
|
|||
project(optimize)
|
||||
|
||||
set(NNACL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..)
|
||||
include_directories(NNACL_DIR)
|
||||
|
||||
########################### optimized files ###########################
|
||||
file(GLOB SDOT_SRC ${NNACL_DIR}/assembly/opt/*.S)
|
||||
file(GLOB FP16_C_SRC ${NNACL_DIR}/fp16/*.c)
|
||||
file(GLOB FP16_NEON_SRC ${NNACL_DIR}/assembly/fp16/*.S)
|
||||
|
||||
set_property(SOURCE ${SDOT_SRC} PROPERTY LANGUAGE C)
|
||||
set_property(SOURCE ${FP16_C_SRC} PROPERTY LANGUAGE C)
|
||||
set_property(SOURCE ${FP16_NEON_SRC} PROPERTY LANGUAGE C)
|
||||
|
||||
########################### share library build ########################
|
||||
list(APPEND SDOT_FILES ${SDOT_SRC})
|
||||
list(APPEND FP16_FILES ${FP16_C_SRC})
|
||||
list(APPEND FP16_FILES ${FP16_NEON_SRC})
|
||||
|
||||
string(REPLACE "-fvisibility=hidden" "-fvisibility=default" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+dotprod+fp16")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16")
|
||||
|
||||
add_library(nnacl_optimize STATIC ${SDOT_FILES})
|
||||
target_link_libraries(nnacl_optimize mindspore-lite)
|
||||
|
||||
add_library(nnacl_fp16 STATIC ${FP16_FILES})
|
||||
target_link_libraries(nnacl_fp16 mindspore-lite)
|
|
@ -24,14 +24,15 @@
|
|||
#include <asm/hwcap.h>
|
||||
#include "nnacl/nnacl_utils.h"
|
||||
#endif
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
#define OPTIMIZE_SHARED_LIBRARY_PATH "liboptimize.so"
|
||||
#define OPTIMIZE_SHARED_LIBRARY_PATH "libmindspore-lite-optimize.so"
|
||||
#define FLOAT16_SHARED_LIBRARY_PATH "libmindspore-lite-fp16.so"
|
||||
|
||||
class OptimizeModule {
|
||||
public:
|
||||
OptimizeModule() {
|
||||
bool support_optimize_ops = false;
|
||||
bool support_fp16 = false;
|
||||
#ifdef __ANDROID__
|
||||
int hwcap_type = 16;
|
||||
uint32_t hwcap = getHwCap(hwcap_type);
|
||||
|
@ -40,8 +41,7 @@ class OptimizeModule {
|
|||
#elif defined(__arm__)
|
||||
if (hwcap & HWCAP_HALF) {
|
||||
#endif
|
||||
MS_LOG(INFO) << "Hw cap support FP16, hwcap: 0x" << hwcap;
|
||||
support_fp16 = true;
|
||||
|
||||
#ifdef ENABLE_ARM64
|
||||
}
|
||||
#elif defined(__arm__)
|
||||
|
@ -57,7 +57,7 @@ class OptimizeModule {
|
|||
}
|
||||
#endif
|
||||
#endif
|
||||
if (!(support_optimize_ops && support_fp16)) {
|
||||
if (support_optimize_ops == false) {
|
||||
return;
|
||||
}
|
||||
#ifndef _WIN32
|
||||
|
@ -77,4 +77,44 @@ class OptimizeModule {
|
|||
void *optimized_op_handler_ = nullptr;
|
||||
};
|
||||
|
||||
class Float16Module {
|
||||
public:
|
||||
Float16Module() {
|
||||
bool support_fp16 = false;
|
||||
#ifdef __ANDROID__
|
||||
int hwcap_type = 16;
|
||||
uint32_t hwcap = getHwCap(hwcap_type);
|
||||
#ifdef ENABLE_ARM64
|
||||
if (hwcap & HWCAP_FPHP) {
|
||||
#elif defined(__arm__)
|
||||
if (hwcap & HWCAP_HALF) {
|
||||
#endif
|
||||
MS_LOG(INFO) << "Hw cap support FP16, hwcap: 0x" << hwcap;
|
||||
support_fp16 = true;
|
||||
#ifdef ENABLE_ARM64
|
||||
}
|
||||
#elif defined(__arm__)
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
if (support_fp16 == false) {
|
||||
return;
|
||||
}
|
||||
#ifndef _WIN32
|
||||
float16_op_handler_ = dlopen(FLOAT16_SHARED_LIBRARY_PATH, RTLD_LAZY);
|
||||
if (float16_op_handler_ == nullptr) {
|
||||
MS_LOG(INFO) << "Open optimize shared library failed: " << dlerror();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
~Float16Module() = default;
|
||||
|
||||
static Float16Module *GetInstance() {
|
||||
static Float16Module fp16_module;
|
||||
return &fp16_module;
|
||||
}
|
||||
void *float16_op_handler_ = nullptr;
|
||||
};
|
||||
|
||||
#endif // MINDSPORE_LITE_NNACL_OPTIMIZED_KERNEL_H_
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
|
||||
set(LITE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..)
|
||||
include_directories(${LITE_DIR}/nnacl/)
|
||||
include_directories(${LITE_DIR}/nnacl/optimize)
|
||||
|
||||
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
|
||||
#for performance
|
||||
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
|
||||
|
@ -66,21 +71,11 @@ set_target_properties(mindspore-lite_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
|
|||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-private-field")
|
||||
if (SUPPORT_GPU)
|
||||
add_subdirectory(runtime/kernel/opencl)
|
||||
target_link_libraries(mindspore-lite
|
||||
cpu_kernel_mid
|
||||
opencl_kernel_mid
|
||||
)
|
||||
target_link_libraries(mindspore-lite_static
|
||||
cpu_kernel_mid
|
||||
opencl_kernel_mid
|
||||
)
|
||||
target_link_libraries(mindspore-lite cpu_kernel_mid opencl_kernel_mid nnacl)
|
||||
target_link_libraries(mindspore-lite_static cpu_kernel_mid opencl_kernel_mid nnacl)
|
||||
else ()
|
||||
target_link_libraries(mindspore-lite
|
||||
cpu_kernel_mid
|
||||
)
|
||||
target_link_libraries(mindspore-lite_static
|
||||
cpu_kernel_mid
|
||||
)
|
||||
target_link_libraries(mindspore-lite cpu_kernel_mid nnacl)
|
||||
target_link_libraries(mindspore-lite_static cpu_kernel_mid nnacl)
|
||||
endif ()
|
||||
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
|
||||
target_link_libraries(mindspore-lite log)
|
||||
|
@ -107,3 +102,24 @@ if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
|
|||
endif ()
|
||||
endif ()
|
||||
|
||||
########################## build optimize and float16 library #################################3
|
||||
if (PLATFORM_ARM64)
|
||||
add_library(mindspore-lite-optimize SHARED)
|
||||
target_link_libraries(mindspore-lite-optimize cpu_opt_kernel_mid)
|
||||
target_link_libraries(mindspore-lite-optimize nnacl_optimize)
|
||||
|
||||
add_library(mindspore-lite-fp16 SHARED)
|
||||
target_link_libraries(mindspore-lite-fp16 cpu_fp16_kernel_mid)
|
||||
target_link_libraries(mindspore-lite-fp16 nnacl_fp16)
|
||||
endif ()
|
||||
|
||||
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release" AND (PLATFORM_ARM64))
|
||||
add_custom_command(TARGET mindspore-lite-optimize POST_BUILD COMMAND
|
||||
${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip
|
||||
${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite-optimize.so)
|
||||
|
||||
add_custom_command(TARGET mindspore-lite-fp16 POST_BUILD COMMAND
|
||||
${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip
|
||||
${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite-fp16.so)
|
||||
endif ()
|
||||
|
||||
|
|
|
@ -43,6 +43,12 @@ int KernelRegistry::Init() {
|
|||
} else {
|
||||
MS_LOG(INFO) << "load optimize lib failed.";
|
||||
}
|
||||
void *float16_op_handler = Float16Module::GetInstance()->float16_op_handler_;
|
||||
if (float16_op_handler != nullptr) {
|
||||
MS_LOG(INFO) << "load float16 lib success.";
|
||||
} else {
|
||||
MS_LOG(INFO) << "load float16 lib failed.";
|
||||
}
|
||||
#endif
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -2,38 +2,22 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}/)
|
|||
|
||||
file(GLOB KERNEL_SRC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/base/*.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/*.c
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/fp32/*.c
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/int8/*.c
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/quantization/*.c
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/fp32/*.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/int8/*.cc
|
||||
)
|
||||
list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/opt_op_handler.c)
|
||||
list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc)
|
||||
|
||||
if (SUPPORT_TRAIN)
|
||||
file (GLOB TRAIN_KERNEL_SRC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/fp32_grad/*.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/fp32_grad/*.c
|
||||
)
|
||||
file (GLOB TRAIN_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp32_grad/*.cc)
|
||||
set(KERNEL_SRC ${KERNEL_SRC} ${TRAIN_KERNEL_SRC})
|
||||
endif()
|
||||
|
||||
if (PLATFORM_ARM64)
|
||||
# assembly
|
||||
file(GLOB ASSEMBLY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/assembly/arm64/*.s
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/assembly/arm64/*.S)
|
||||
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
|
||||
set(KERNEL_SRC ${KERNEL_SRC} ${ASSEMBLY_SRC})
|
||||
endif()
|
||||
|
||||
if (PLATFORM_ARM32)
|
||||
# assembly
|
||||
file(GLOB ASSEMBLY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/assembly/arm32/*.s
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/assembly/arm32/*.S
|
||||
)
|
||||
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
|
||||
set(KERNEL_SRC ${KERNEL_SRC} ${ASSEMBLY_SRC})
|
||||
endif()
|
||||
|
||||
add_library(cpu_kernel_mid OBJECT ${KERNEL_SRC})
|
||||
|
||||
if (PLATFORM_ARM64)
|
||||
file(GLOB FP16_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp16/*.cc)
|
||||
add_library(cpu_fp16_kernel_mid OBJECT ${FP16_KERNEL_SRC})
|
||||
file(GLOB OPT_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc)
|
||||
add_library(cpu_opt_kernel_mid OBJECT ${OPT_KERNEL_SRC})
|
||||
endif ()
|
||||
|
||||
|
|
|
@ -34,10 +34,6 @@ extern void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, in
|
|||
int *multiplier, int *left_shift, int *right_shift, int row, int col, int stride,
|
||||
size_t peroc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_ARM64
|
||||
void IndirectGemmInt8_optimize_handler(int8_t *dst, const int8_t *src, const int8_t *weight, const int32_t *bias,
|
||||
size_t ksize, size_t ic4, size_t output_channel, size_t offset,
|
||||
|
@ -61,3 +57,7 @@ void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst,
|
|||
output_zp, multiplier, left_shift, right_shift, row, col, stride, per_channel);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -293,7 +293,8 @@ function Run_arm64() {
|
|||
fi
|
||||
|
||||
cp -a ${arm64_path}/mindspore-lite-${version}-runtime-arm64-${process_unit_arm64}/lib/libmindspore-lite.so ${benchmark_test_path}/libmindspore-lite.so || exit 1
|
||||
cp -a ${arm64_path}/mindspore-lite-${version}-runtime-arm64-${process_unit_arm64}/lib/liboptimize.so ${benchmark_test_path}/liboptimize.so || exit 1
|
||||
cp -a ${arm64_path}/mindspore-lite-${version}-runtime-arm64-${process_unit_arm64}/lib/libmindspore-lite-fp16.so ${benchmark_test_path}/libmindspore-lite-fp16.so || exit 1
|
||||
cp -a ${arm64_path}/mindspore-lite-${version}-runtime-arm64-${process_unit_arm64}/lib/libmindspore-lite-optimize.so ${benchmark_test_path}/libmindspore-lite-optimize.so || exit 1
|
||||
cp -a ${arm64_path}/mindspore-lite-${version}-runtime-arm64-${process_unit_arm64}/benchmark/benchmark ${benchmark_test_path}/benchmark || exit 1
|
||||
|
||||
# adb push all needed files to the phone
|
||||
|
|
|
@ -108,7 +108,6 @@ file(GLOB KERNEL_SRC
|
|||
${ARM_DIR}/fp32/*.cc
|
||||
${ARM_DIR}/int8/*.cc
|
||||
)
|
||||
list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/../../nnacl/opt_op_handler.c)
|
||||
|
||||
if (PLATFORM_ARM64)
|
||||
# assembly
|
||||
|
|
Loading…
Reference in New Issue