[MSLITE][Develop] modify optimize.so to sdot and fp16 so

This commit is contained in:
ling 2020-09-16 12:09:23 +08:00
parent 8f0c863efe
commit a19e6251bc
20 changed files with 165 additions and 93 deletions

View File

@ -58,7 +58,8 @@ if (PLATFORM_ARM64)
install(FILES ${TOP_DIR}/mindspore/core/ir/dtype/type_id.h DESTINATION ${INC_DIR}/ir/dtype COMPONENT ${COMPONENT_NAME})
install(DIRECTORY ${TOP_DIR}/mindspore/lite/include/ DESTINATION ${INC_DIR} COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h")
install(DIRECTORY ${TOP_DIR}/mindspore/lite/schema/ DESTINATION ${INC_DIR}/schema COMPONENT ${COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "inner" EXCLUDE)
install(FILES ${TOP_DIR}/mindspore/lite/build/nnacl/liboptimize.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})
install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite-optimize.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})
install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite-fp16.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})
install(DIRECTORY ${TOP_DIR}/third_party/flatbuffers/include DESTINATION ${FLATBF_DIR} COMPONENT ${COMPONENT_NAME})
elseif (PLATFORM_ARM32)
install(FILES ${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite.so DESTINATION ${LIB_DIR} COMPONENT ${COMPONENT_NAME})

View File

@ -1,45 +1,42 @@
project(nnacl)
set(NNACL_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(TOP_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..)
include_directories(NNACL_DIR)
########################### optimized files ###########################
file(GLOB OPTIMIZED_ASSEMBLY
${NNACL_DIR}/assembly/opt/*.s
${NNACL_DIR}/assembly/opt/*.S
)
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math")
endif()
endif ()
file(GLOB FP16_SRC
${NNACL_DIR}/fp16/*.c
${TOP_DIR}/src/runtime/kernel/arm/fp16/*.cc
)
########################### files ###########################
file(GLOB KERNEL_SRC
${NNACL_DIR}/*.c
${NNACL_DIR}/fp32/*.c
${NNACL_DIR}/int8/*.c
${NNACL_DIR}/quantization/*.c
)
########################### share library build ########################
set(OPTIMIZED_OPS ${NNACL_DIR}/opt_op_handler.c)
set_property(SOURCE ${OPTIMIZED_ASSEMBLY} PROPERTY LANGUAGE C)
list(APPEND OPTIMIZED_OPS ${OPTIMIZED_ASSEMBLY} ${FP16_SRC})
if (SUPPORT_TRAIN)
file (GLOB TRAIN_SRC ${NNACL_DIR}/fp32_grad/*.c)
endif()
if (PLATFORM_ARM64)
string(REPLACE "-fvisibility=hidden" "-fvisibility=default" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+dotprod+fp16")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16")
add_library(optimize SHARED ${OPTIMIZED_OPS})
target_link_libraries(
optimize
mindspore-lite
)
set_target_properties(optimize PROPERTIES CLEAN_DIRECT_OUTPUT 1)
file(GLOB ASSEMBLY_SRC ${NNACL_DIR}/assembly/arm64/*.S)
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
endif()
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
add_custom_command(TARGET optimize POST_BUILD
COMMAND ${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip
${TOP_DIR}/build/nnacl/liboptimize.so)
endif ()
if (PLATFORM_ARM32)
file(GLOB ASSEMBLY_SRC ${NNACL_DIR}/assembly/arm32/*.S)
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
endif()
add_custom_command(TARGET optimize POST_BUILD
COMMAND rm -rf ${TOP_DIR}/output/lib/liboptimize.so
COMMAND mkdir -pv ${TOP_DIR}/output/lib
COMMAND cp ${TOP_DIR}/build/nnacl/liboptimize.so ${TOP_DIR}/output/lib)
endif ()
########################### build nnacl static library ########################
string(REPLACE "-fvisibility=hidden" "-fvisibility=default" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
add_library(nnacl STATIC ${KERNEL_SRC} ${TRAIN_SRC} ${ASSEMBLY_SRC})
########################### arm64 build optimize library ########################
if (PLATFORM_ARM64)
add_subdirectory(${NNACL_DIR}/optimize)
endif()

View File

@ -0,0 +1,28 @@
project(optimize)
set(NNACL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..)
include_directories(NNACL_DIR)
########################### optimized files ###########################
file(GLOB SDOT_SRC ${NNACL_DIR}/assembly/opt/*.S)
file(GLOB FP16_C_SRC ${NNACL_DIR}/fp16/*.c)
file(GLOB FP16_NEON_SRC ${NNACL_DIR}/assembly/fp16/*.S)
set_property(SOURCE ${SDOT_SRC} PROPERTY LANGUAGE C)
set_property(SOURCE ${FP16_C_SRC} PROPERTY LANGUAGE C)
set_property(SOURCE ${FP16_NEON_SRC} PROPERTY LANGUAGE C)
########################### share library build ########################
list(APPEND SDOT_FILES ${SDOT_SRC})
list(APPEND FP16_FILES ${FP16_C_SRC})
list(APPEND FP16_FILES ${FP16_NEON_SRC})
string(REPLACE "-fvisibility=hidden" "-fvisibility=default" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8.2-a+dotprod+fp16")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8.2-a+dotprod+fp16")
add_library(nnacl_optimize STATIC ${SDOT_FILES})
target_link_libraries(nnacl_optimize mindspore-lite)
add_library(nnacl_fp16 STATIC ${FP16_FILES})
target_link_libraries(nnacl_fp16 mindspore-lite)

View File

@ -24,14 +24,15 @@
#include <asm/hwcap.h>
#include "nnacl/nnacl_utils.h"
#endif
#include "utils/log_adapter.h"
#define OPTIMIZE_SHARED_LIBRARY_PATH "liboptimize.so"
#define OPTIMIZE_SHARED_LIBRARY_PATH "libmindspore-lite-optimize.so"
#define FLOAT16_SHARED_LIBRARY_PATH "libmindspore-lite-fp16.so"
class OptimizeModule {
public:
OptimizeModule() {
bool support_optimize_ops = false;
bool support_fp16 = false;
#ifdef __ANDROID__
int hwcap_type = 16;
uint32_t hwcap = getHwCap(hwcap_type);
@ -40,8 +41,7 @@ class OptimizeModule {
#elif defined(__arm__)
if (hwcap & HWCAP_HALF) {
#endif
MS_LOG(INFO) << "Hw cap support FP16, hwcap: 0x" << hwcap;
support_fp16 = true;
#ifdef ENABLE_ARM64
}
#elif defined(__arm__)
@ -57,7 +57,7 @@ class OptimizeModule {
}
#endif
#endif
if (!(support_optimize_ops && support_fp16)) {
if (support_optimize_ops == false) {
return;
}
#ifndef _WIN32
@ -77,4 +77,44 @@ class OptimizeModule {
void *optimized_op_handler_ = nullptr;
};
class Float16Module {
public:
Float16Module() {
bool support_fp16 = false;
#ifdef __ANDROID__
int hwcap_type = 16;
uint32_t hwcap = getHwCap(hwcap_type);
#ifdef ENABLE_ARM64
if (hwcap & HWCAP_FPHP) {
#elif defined(__arm__)
if (hwcap & HWCAP_HALF) {
#endif
MS_LOG(INFO) << "Hw cap support FP16, hwcap: 0x" << hwcap;
support_fp16 = true;
#ifdef ENABLE_ARM64
}
#elif defined(__arm__)
}
#endif
#endif
if (support_fp16 == false) {
return;
}
#ifndef _WIN32
float16_op_handler_ = dlopen(FLOAT16_SHARED_LIBRARY_PATH, RTLD_LAZY);
if (float16_op_handler_ == nullptr) {
MS_LOG(INFO) << "Open optimize shared library failed: " << dlerror();
}
#endif
}
~Float16Module() = default;
static Float16Module *GetInstance() {
static Float16Module fp16_module;
return &fp16_module;
}
void *float16_op_handler_ = nullptr;
};
#endif // MINDSPORE_LITE_NNACL_OPTIMIZED_KERNEL_H_

View File

@ -1,7 +1,12 @@
set(LITE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/..)
include_directories(${LITE_DIR}/nnacl/)
include_directories(${LITE_DIR}/nnacl/optimize)
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
# for performance
#for performance
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
#-fno-rtti -fno-exceptions
#- fno - rtti - fno - exceptions
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fomit-frame-pointer -fstrict-aliasing -ffunction-sections -fdata-sections -ffast-math")
endif()
@ -65,21 +70,11 @@ set_target_properties(mindspore-lite_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-private-field")
if (SUPPORT_GPU)
add_subdirectory(runtime/kernel/opencl)
target_link_libraries(mindspore-lite
cpu_kernel_mid
opencl_kernel_mid
)
target_link_libraries(mindspore-lite_static
cpu_kernel_mid
opencl_kernel_mid
)
target_link_libraries(mindspore-lite cpu_kernel_mid opencl_kernel_mid nnacl)
target_link_libraries(mindspore-lite_static cpu_kernel_mid opencl_kernel_mid nnacl)
else ()
target_link_libraries(mindspore-lite
cpu_kernel_mid
)
target_link_libraries(mindspore-lite_static
cpu_kernel_mid
)
target_link_libraries(mindspore-lite cpu_kernel_mid nnacl)
target_link_libraries(mindspore-lite_static cpu_kernel_mid nnacl)
endif ()
if (PLATFORM_ARM32 OR PLATFORM_ARM64)
target_link_libraries(mindspore-lite log)
@ -106,3 +101,24 @@ if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
endif ()
endif ()
########################## build optimize and float16 library #################################3
if (PLATFORM_ARM64)
add_library(mindspore-lite-optimize SHARED)
target_link_libraries(mindspore-lite-optimize cpu_opt_kernel_mid)
target_link_libraries(mindspore-lite-optimize nnacl_optimize)
add_library(mindspore-lite-fp16 SHARED)
target_link_libraries(mindspore-lite-fp16 cpu_fp16_kernel_mid)
target_link_libraries(mindspore-lite-fp16 nnacl_fp16)
endif ()
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release" AND (PLATFORM_ARM64))
add_custom_command(TARGET mindspore-lite-optimize POST_BUILD COMMAND
${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip
${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite-optimize.so)
add_custom_command(TARGET mindspore-lite-fp16 POST_BUILD COMMAND
${ANDROID_NDK}/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/aarch64-linux-android/bin/strip
${TOP_DIR}/mindspore/lite/build/src/libmindspore-lite-fp16.so)
endif ()

View File

@ -43,6 +43,12 @@ int KernelRegistry::Init() {
} else {
MS_LOG(INFO) << "load optimize lib failed.";
}
void *float16_op_handler = Float16Module::GetInstance()->float16_op_handler_;
if (float16_op_handler != nullptr) {
MS_LOG(INFO) << "load float16 lib success.";
} else {
MS_LOG(INFO) << "load float16 lib failed.";
}
#endif
return RET_OK;
}

View File

@ -1,39 +1,23 @@
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/)
file(GLOB KERNEL_SRC
${CMAKE_CURRENT_SOURCE_DIR}/base/*.cc
${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/*.c
${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/fp32/*.c
${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/int8/*.c
${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/quantization/*.c
${CMAKE_CURRENT_SOURCE_DIR}/fp32/*.cc
${CMAKE_CURRENT_SOURCE_DIR}/int8/*.cc
)
list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/opt_op_handler.c)
${CMAKE_CURRENT_SOURCE_DIR}/base/*.cc
${CMAKE_CURRENT_SOURCE_DIR}/fp32/*.cc
${CMAKE_CURRENT_SOURCE_DIR}/int8/*.cc
)
list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc)
if (SUPPORT_TRAIN)
file (GLOB TRAIN_KERNEL_SRC
${CMAKE_CURRENT_SOURCE_DIR}/fp32_grad/*.cc
${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/fp32_grad/*.c
)
set(KERNEL_SRC ${KERNEL_SRC} ${TRAIN_KERNEL_SRC})
endif()
if (PLATFORM_ARM64)
# assembly
file(GLOB ASSEMBLY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/assembly/arm64/*.s
${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/assembly/arm64/*.S)
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
set(KERNEL_SRC ${KERNEL_SRC} ${ASSEMBLY_SRC})
endif()
if (PLATFORM_ARM32)
# assembly
file(GLOB ASSEMBLY_SRC ${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/assembly/arm32/*.s
${CMAKE_CURRENT_SOURCE_DIR}/../../../../nnacl/assembly/arm32/*.S
)
set_property(SOURCE ${ASSEMBLY_SRC} PROPERTY LANGUAGE C)
set(KERNEL_SRC ${KERNEL_SRC} ${ASSEMBLY_SRC})
file (GLOB TRAIN_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp32_grad/*.cc)
set(KERNEL_SRC ${KERNEL_SRC} ${TRAIN_KERNEL_SRC})
endif()
add_library(cpu_kernel_mid OBJECT ${KERNEL_SRC})
if (PLATFORM_ARM64)
file(GLOB FP16_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/fp16/*.cc)
add_library(cpu_fp16_kernel_mid OBJECT ${FP16_KERNEL_SRC})
file(GLOB OPT_KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/int8/opt_op_handler.cc)
add_library(cpu_opt_kernel_mid OBJECT ${OPT_KERNEL_SRC})
endif ()

View File

@ -34,10 +34,6 @@ extern void MatmulInt8DpNeon64(const int8_t *a, const int8_t *b, int8_t *dst, in
int *multiplier, int *left_shift, int *right_shift, int row, int col, int stride,
size_t peroc);
#ifdef __cplusplus
}
#endif
#ifdef ENABLE_ARM64
void IndirectGemmInt8_optimize_handler(int8_t *dst, const int8_t *src, const int8_t *weight, const int32_t *bias,
size_t ksize, size_t ic4, size_t output_channel, size_t offset,
@ -61,3 +57,7 @@ void MatMulRInt8_optimize_handler(const int8_t *a, const int8_t *b, int8_t *dst,
output_zp, multiplier, left_shift, right_shift, row, col, stride, per_channel);
}
#endif
#ifdef __cplusplus
}
#endif

View File

@ -293,7 +293,8 @@ function Run_arm64() {
fi
cp -a ${arm64_path}/mindspore-lite-${version}-runtime-arm64-${process_unit_arm64}/lib/libmindspore-lite.so ${benchmark_test_path}/libmindspore-lite.so || exit 1
cp -a ${arm64_path}/mindspore-lite-${version}-runtime-arm64-${process_unit_arm64}/lib/liboptimize.so ${benchmark_test_path}/liboptimize.so || exit 1
cp -a ${arm64_path}/mindspore-lite-${version}-runtime-arm64-${process_unit_arm64}/lib/libmindspore-lite-fp16.so ${benchmark_test_path}/libmindspore-lite-fp16.so || exit 1
cp -a ${arm64_path}/mindspore-lite-${version}-runtime-arm64-${process_unit_arm64}/lib/libmindspore-lite-optimize.so ${benchmark_test_path}/libmindspore-lite-optimize.so || exit 1
cp -a ${arm64_path}/mindspore-lite-${version}-runtime-arm64-${process_unit_arm64}/benchmark/benchmark ${benchmark_test_path}/benchmark || exit 1
# adb push all needed files to the phone

View File

@ -106,7 +106,6 @@ file(GLOB KERNEL_SRC
${ARM_DIR}/fp32/*.cc
${ARM_DIR}/int8/*.cc
)
list(REMOVE_ITEM KERNEL_SRC ${CMAKE_CURRENT_SOURCE_DIR}/../../nnacl/opt_op_handler.c)
if (PLATFORM_ARM64)
# assembly