[CPU] Support avx isa by default

This commit is contained in:
zhanyuan 2021-05-29 15:07:34 +08:00
parent debd27a9f2
commit 57da3c73fe
5 changed files with 28 additions and 25 deletions

View File

@ -60,7 +60,7 @@ usage()
echo " -l Compile with python dependency, default on" echo " -l Compile with python dependency, default on"
echo " -S Enable enable download cmake compile dependency from gitee , default off" echo " -S Enable enable download cmake compile dependency from gitee , default off"
echo " -k Enable make clean, clean up compilation generated cache " echo " -k Enable make clean, clean up compilation generated cache "
echo " -W Enable x86_64 SSE or AVX instruction set, use [sse|avx|neon|off], default off" echo " -W Enable x86_64 SSE or AVX instruction set, use [sse|avx|neon|off], default off for lite and avx for CPU"
echo " -H Enable hidden" echo " -H Enable hidden"
echo " -L Link and specify Tensor-RT library path, default disable Tensor-RT lib linking" echo " -L Link and specify Tensor-RT library path, default disable Tensor-RT lib linking"
} }
@ -387,7 +387,7 @@ build_mindspore()
echo "start build mindspore project." echo "start build mindspore project."
mkdir -pv "${BUILD_PATH}/mindspore" mkdir -pv "${BUILD_PATH}/mindspore"
cd "${BUILD_PATH}/mindspore" cd "${BUILD_PATH}/mindspore"
CMAKE_ARGS="-DDEBUG_MODE=$DEBUG_MODE -DBUILD_PATH=$BUILD_PATH -DX86_64_SIMD=${X86_64_SIMD}" CMAKE_ARGS="-DDEBUG_MODE=$DEBUG_MODE -DBUILD_PATH=$BUILD_PATH"
if [[ "X$ENABLE_COVERAGE" = "Xon" ]]; then if [[ "X$ENABLE_COVERAGE" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_COVERAGE=ON" CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_COVERAGE=ON"
fi fi

View File

@ -6,18 +6,13 @@ include_directories(${CMAKE_SOURCE_DIR}/mindspore/core/mindrt/include)
include_directories(${CMAKE_SOURCE_DIR}/mindspore/core/mindrt/src) include_directories(${CMAKE_SOURCE_DIR}/mindspore/core/mindrt/src)
if(ENABLE_CPU) if(ENABLE_CPU)
if(${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES "aarch64")
set(PLATFORM_ARM64 "on")
set(X86_64_SIMD "off")
elseif("${X86_64_SIMD}" STREQUAL "off")
set(X86_64_SIMD "avx")
endif()
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/backend/kernel_compiler/cpu) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/backend/kernel_compiler/cpu)
if("${X86_64_SIMD}" STREQUAL "sse")
add_compile_definitions(ENABLE_SSE)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1 -msse4.2")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1 -msse4.2")
endif()
if("${X86_64_SIMD}" STREQUAL "avx")
add_compile_definitions(ENABLE_SSE)
add_compile_definitions(ENABLE_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1 -msse4.2 -mfma -mavx -mavx2")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1 -msse4.2 -mfma -mavx -mavx2")
endif()
add_subdirectory(backend/kernel_compiler/cpu/nnacl) add_subdirectory(backend/kernel_compiler/cpu/nnacl)
endif() endif()

View File

@ -13,9 +13,6 @@ if(PLATFORM_ARM32 OR PLATFORM_ARM64)
-ffunction-sections -fdata-sections -ffast-math") -ffunction-sections -fdata-sections -ffast-math")
endif() endif()
endif() endif()
if(ENABLE_CPU)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
endif()
if("${X86_64_SIMD}" STREQUAL "avx") if("${X86_64_SIMD}" STREQUAL "avx")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1 -mavx -mavx2 -mfma") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1 -mavx -mavx2 -mfma")
endif() endif()
@ -59,21 +56,27 @@ if(APPLE)
set_source_files_properties(${ASSEMBLY_SRC} PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp") set_source_files_properties(${ASSEMBLY_SRC} PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp")
endif() endif()
########################### build nnacl static library ######################## ########################### build nnacl library ########################
string(REPLACE "-fvisibility=hidden" "-fvisibility=default" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") string(REPLACE "-fvisibility=hidden" "-fvisibility=default" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
add_library(nnacl_mid OBJECT ${KERNEL_SRC} ${TRAIN_SRC} ${ASSEMBLY_SRC})
if(ENABLE_CPU) if(ENABLE_CPU)
add_library(nnacl SHARED $<TARGET_OBJECTS:nnacl_mid>) add_library(nnacl SHARED ${KERNEL_SRC} ${TRAIN_SRC} ${ASSEMBLY_SRC})
if(${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES "aarch64")
target_compile_definitions(nnacl PRIVATE ENABLE_ARM ENABLE_ARM64 ENABLE_NEON)
target_compile_options(nnacl PRIVATE -ffast-math -flax-vector-conversions)
elseif("${X86_64_SIMD}" STREQUAL "sse")
target_compile_definitions(nnacl PRIVATE ENABLE_SSE)
elseif("${X86_64_SIMD}" STREQUAL "avx")
target_compile_definitions(nnacl PRIVATE ENABLE_SSE ENABLE_AVX)
endif()
target_compile_options(nnacl PRIVATE -fPIC)
if(NOT CMAKE_SYSTEM_NAME MATCHES "Windows")
target_link_options(nnacl PRIVATE -Wl,-z,relro,-z,now)
endif()
else() else()
add_library(nnacl_mid OBJECT ${KERNEL_SRC} ${TRAIN_SRC} ${ASSEMBLY_SRC})
add_library(nnacl STATIC $<TARGET_OBJECTS:nnacl_mid>) add_library(nnacl STATIC $<TARGET_OBJECTS:nnacl_mid>)
endif()
if(NOT ENABLE_CPU)
add_dependencies(nnacl_mid fbs_src) add_dependencies(nnacl_mid fbs_src)
endif() endif()
########################### arm fp16 build optimize library ######################## ########################### arm fp16 build optimize library ########################
if(ENABLE_FP16) if(ENABLE_FP16)
add_subdirectory(${NNACL_DIR}/optimize) add_subdirectory(${NNACL_DIR}/optimize)

View File

@ -86,7 +86,11 @@ int LRelu(const float *src, int length, float *dst, float alpha) {
for (; i < length - 4; i += 4) { for (; i < length - 4; i += 4) {
MS_FLOAT32X4 src_tmp = MS_LDQ_F32(src + i); MS_FLOAT32X4 src_tmp = MS_LDQ_F32(src + i);
MS_FLOAT32X4 mul_tmp = MS_MULQ_N_F32(src_tmp, alpha); MS_FLOAT32X4 mul_tmp = MS_MULQ_N_F32(src_tmp, alpha);
#ifdef ENABLE_ARM
MS_UINT32X4 mask = MS_CMPGTQ_F32(src_tmp, MS_MOVQ_F32(0.0f));
#else
MS_FLOAT32X4 mask = MS_CMPGTQ_F32(src_tmp, MS_MOVQ_F32(0.0f)); MS_FLOAT32X4 mask = MS_CMPGTQ_F32(src_tmp, MS_MOVQ_F32(0.0f));
#endif
MS_STQ_F32(dst + i, MS_BLENDQ_F32(mul_tmp, src_tmp, mask)); MS_STQ_F32(dst + i, MS_BLENDQ_F32(mul_tmp, src_tmp, mask));
} }
#endif #endif

View File

@ -29,6 +29,7 @@
#ifdef ENABLE_ARM #ifdef ENABLE_ARM
#define MS_FLOAT32X4 float32x4_t #define MS_FLOAT32X4 float32x4_t
#define MS_INT32X4 int32x4_t #define MS_INT32X4 int32x4_t
#define MS_UINT32X4 uint32x4_t
#define MS_LDQ_F32 vld1q_f32 #define MS_LDQ_F32 vld1q_f32
#define MS_LDQ_EPI32 vld1q_s32 #define MS_LDQ_EPI32 vld1q_s32
#define MS_ADDQ_F32 vaddq_f32 #define MS_ADDQ_F32 vaddq_f32