[CPU] Support avx isa by default

This commit is contained in:
zhanyuan 2021-05-29 15:07:34 +08:00
parent debd27a9f2
commit 57da3c73fe
5 changed files with 28 additions and 25 deletions

View File

@ -60,7 +60,7 @@ usage()
echo " -l Compile with python dependency, default on"
echo " -S Enable enable download cmake compile dependency from gitee , default off"
echo " -k Enable make clean, clean up compilation generated cache "
echo " -W Enable x86_64 SSE or AVX instruction set, use [sse|avx|neon|off], default off"
echo " -W Enable x86_64 SSE or AVX instruction set, use [sse|avx|neon|off], default off for lite and avx for CPU"
echo " -H Enable hidden"
echo " -L Link and specify Tensor-RT library path, default disable Tensor-RT lib linking"
}
@ -387,7 +387,7 @@ build_mindspore()
echo "start build mindspore project."
mkdir -pv "${BUILD_PATH}/mindspore"
cd "${BUILD_PATH}/mindspore"
CMAKE_ARGS="-DDEBUG_MODE=$DEBUG_MODE -DBUILD_PATH=$BUILD_PATH -DX86_64_SIMD=${X86_64_SIMD}"
CMAKE_ARGS="-DDEBUG_MODE=$DEBUG_MODE -DBUILD_PATH=$BUILD_PATH"
if [[ "X$ENABLE_COVERAGE" = "Xon" ]]; then
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_COVERAGE=ON"
fi

View File

@ -6,18 +6,13 @@ include_directories(${CMAKE_SOURCE_DIR}/mindspore/core/mindrt/include)
include_directories(${CMAKE_SOURCE_DIR}/mindspore/core/mindrt/src)
if(ENABLE_CPU)
if(${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES "aarch64")
set(PLATFORM_ARM64 "on")
set(X86_64_SIMD "off")
elseif("${X86_64_SIMD}" STREQUAL "off")
set(X86_64_SIMD "avx")
endif()
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/backend/kernel_compiler/cpu)
if("${X86_64_SIMD}" STREQUAL "sse")
add_compile_definitions(ENABLE_SSE)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1 -msse4.2")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1 -msse4.2")
endif()
if("${X86_64_SIMD}" STREQUAL "avx")
add_compile_definitions(ENABLE_SSE)
add_compile_definitions(ENABLE_AVX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1 -msse4.2 -mfma -mavx -mavx2")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1 -msse4.2 -mfma -mavx -mavx2")
endif()
add_subdirectory(backend/kernel_compiler/cpu/nnacl)
endif()

View File

@ -13,9 +13,6 @@ if(PLATFORM_ARM32 OR PLATFORM_ARM64)
-ffunction-sections -fdata-sections -ffast-math")
endif()
endif()
if(ENABLE_CPU)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
endif()
if("${X86_64_SIMD}" STREQUAL "avx")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1 -mavx -mavx2 -mfma")
endif()
@ -59,21 +56,27 @@ if(APPLE)
set_source_files_properties(${ASSEMBLY_SRC} PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp")
endif()
########################### build nnacl static library ########################
########################### build nnacl library ########################
string(REPLACE "-fvisibility=hidden" "-fvisibility=default" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
add_library(nnacl_mid OBJECT ${KERNEL_SRC} ${TRAIN_SRC} ${ASSEMBLY_SRC})
if(ENABLE_CPU)
add_library(nnacl SHARED $<TARGET_OBJECTS:nnacl_mid>)
add_library(nnacl SHARED ${KERNEL_SRC} ${TRAIN_SRC} ${ASSEMBLY_SRC})
if(${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES "aarch64")
target_compile_definitions(nnacl PRIVATE ENABLE_ARM ENABLE_ARM64 ENABLE_NEON)
target_compile_options(nnacl PRIVATE -ffast-math -flax-vector-conversions)
elseif("${X86_64_SIMD}" STREQUAL "sse")
target_compile_definitions(nnacl PRIVATE ENABLE_SSE)
elseif("${X86_64_SIMD}" STREQUAL "avx")
target_compile_definitions(nnacl PRIVATE ENABLE_SSE ENABLE_AVX)
endif()
target_compile_options(nnacl PRIVATE -fPIC)
if(NOT CMAKE_SYSTEM_NAME MATCHES "Windows")
target_link_options(nnacl PRIVATE -Wl,-z,relro,-z,now)
endif()
else()
add_library(nnacl_mid OBJECT ${KERNEL_SRC} ${TRAIN_SRC} ${ASSEMBLY_SRC})
add_library(nnacl STATIC $<TARGET_OBJECTS:nnacl_mid>)
endif()
if(NOT ENABLE_CPU)
add_dependencies(nnacl_mid fbs_src)
endif()
########################### arm fp16 build optimize library ########################
if(ENABLE_FP16)
add_subdirectory(${NNACL_DIR}/optimize)

View File

@ -86,7 +86,11 @@ int LRelu(const float *src, int length, float *dst, float alpha) {
for (; i < length - 4; i += 4) {
MS_FLOAT32X4 src_tmp = MS_LDQ_F32(src + i);
MS_FLOAT32X4 mul_tmp = MS_MULQ_N_F32(src_tmp, alpha);
#ifdef ENABLE_ARM
MS_UINT32X4 mask = MS_CMPGTQ_F32(src_tmp, MS_MOVQ_F32(0.0f));
#else
MS_FLOAT32X4 mask = MS_CMPGTQ_F32(src_tmp, MS_MOVQ_F32(0.0f));
#endif
MS_STQ_F32(dst + i, MS_BLENDQ_F32(mul_tmp, src_tmp, mask));
}
#endif

View File

@ -29,6 +29,7 @@
#ifdef ENABLE_ARM
#define MS_FLOAT32X4 float32x4_t
#define MS_INT32X4 int32x4_t
#define MS_UINT32X4 uint32x4_t
#define MS_LDQ_F32 vld1q_f32
#define MS_LDQ_EPI32 vld1q_s32
#define MS_ADDQ_F32 vaddq_f32