forked from mindspore-Ecosystem/mindspore
[CPU] Support avx isa by default
This commit is contained in:
parent
debd27a9f2
commit
57da3c73fe
4
build.sh
4
build.sh
|
@ -60,7 +60,7 @@ usage()
|
|||
echo " -l Compile with python dependency, default on"
|
||||
echo " -S Enable enable download cmake compile dependency from gitee , default off"
|
||||
echo " -k Enable make clean, clean up compilation generated cache "
|
||||
echo " -W Enable x86_64 SSE or AVX instruction set, use [sse|avx|neon|off], default off"
|
||||
echo " -W Enable x86_64 SSE or AVX instruction set, use [sse|avx|neon|off], default off for lite and avx for CPU"
|
||||
echo " -H Enable hidden"
|
||||
echo " -L Link and specify Tensor-RT library path, default disable Tensor-RT lib linking"
|
||||
}
|
||||
|
@ -387,7 +387,7 @@ build_mindspore()
|
|||
echo "start build mindspore project."
|
||||
mkdir -pv "${BUILD_PATH}/mindspore"
|
||||
cd "${BUILD_PATH}/mindspore"
|
||||
CMAKE_ARGS="-DDEBUG_MODE=$DEBUG_MODE -DBUILD_PATH=$BUILD_PATH -DX86_64_SIMD=${X86_64_SIMD}"
|
||||
CMAKE_ARGS="-DDEBUG_MODE=$DEBUG_MODE -DBUILD_PATH=$BUILD_PATH"
|
||||
if [[ "X$ENABLE_COVERAGE" = "Xon" ]]; then
|
||||
CMAKE_ARGS="${CMAKE_ARGS} -DENABLE_COVERAGE=ON"
|
||||
fi
|
||||
|
|
|
@ -6,18 +6,13 @@ include_directories(${CMAKE_SOURCE_DIR}/mindspore/core/mindrt/include)
|
|||
include_directories(${CMAKE_SOURCE_DIR}/mindspore/core/mindrt/src)
|
||||
|
||||
if(ENABLE_CPU)
|
||||
if(${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES "aarch64")
|
||||
set(PLATFORM_ARM64 "on")
|
||||
set(X86_64_SIMD "off")
|
||||
elseif("${X86_64_SIMD}" STREQUAL "off")
|
||||
set(X86_64_SIMD "avx")
|
||||
endif()
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/backend/kernel_compiler/cpu)
|
||||
if("${X86_64_SIMD}" STREQUAL "sse")
|
||||
add_compile_definitions(ENABLE_SSE)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1 -msse4.2")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1 -msse4.2")
|
||||
endif()
|
||||
if("${X86_64_SIMD}" STREQUAL "avx")
|
||||
add_compile_definitions(ENABLE_SSE)
|
||||
add_compile_definitions(ENABLE_AVX)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1 -msse4.2 -mfma -mavx -mavx2")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1 -msse4.2 -mfma -mavx -mavx2")
|
||||
endif()
|
||||
add_subdirectory(backend/kernel_compiler/cpu/nnacl)
|
||||
endif()
|
||||
|
||||
|
|
|
@ -13,9 +13,6 @@ if(PLATFORM_ARM32 OR PLATFORM_ARM64)
|
|||
-ffunction-sections -fdata-sections -ffast-math")
|
||||
endif()
|
||||
endif()
|
||||
if(ENABLE_CPU)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
|
||||
endif()
|
||||
if("${X86_64_SIMD}" STREQUAL "avx")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.1 -mavx -mavx2 -mfma")
|
||||
endif()
|
||||
|
@ -59,21 +56,27 @@ if(APPLE)
|
|||
set_source_files_properties(${ASSEMBLY_SRC} PROPERTIES COMPILE_FLAGS "-x assembler-with-cpp")
|
||||
endif()
|
||||
|
||||
########################### build nnacl static library ########################
|
||||
########################### build nnacl library ########################
|
||||
string(REPLACE "-fvisibility=hidden" "-fvisibility=default" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
|
||||
|
||||
add_library(nnacl_mid OBJECT ${KERNEL_SRC} ${TRAIN_SRC} ${ASSEMBLY_SRC})
|
||||
|
||||
if(ENABLE_CPU)
|
||||
add_library(nnacl SHARED $<TARGET_OBJECTS:nnacl_mid>)
|
||||
add_library(nnacl SHARED ${KERNEL_SRC} ${TRAIN_SRC} ${ASSEMBLY_SRC})
|
||||
if(${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES "aarch64")
|
||||
target_compile_definitions(nnacl PRIVATE ENABLE_ARM ENABLE_ARM64 ENABLE_NEON)
|
||||
target_compile_options(nnacl PRIVATE -ffast-math -flax-vector-conversions)
|
||||
elseif("${X86_64_SIMD}" STREQUAL "sse")
|
||||
target_compile_definitions(nnacl PRIVATE ENABLE_SSE)
|
||||
elseif("${X86_64_SIMD}" STREQUAL "avx")
|
||||
target_compile_definitions(nnacl PRIVATE ENABLE_SSE ENABLE_AVX)
|
||||
endif()
|
||||
target_compile_options(nnacl PRIVATE -fPIC)
|
||||
if(NOT CMAKE_SYSTEM_NAME MATCHES "Windows")
|
||||
target_link_options(nnacl PRIVATE -Wl,-z,relro,-z,now)
|
||||
endif()
|
||||
else()
|
||||
add_library(nnacl_mid OBJECT ${KERNEL_SRC} ${TRAIN_SRC} ${ASSEMBLY_SRC})
|
||||
add_library(nnacl STATIC $<TARGET_OBJECTS:nnacl_mid>)
|
||||
endif()
|
||||
|
||||
if(NOT ENABLE_CPU)
|
||||
add_dependencies(nnacl_mid fbs_src)
|
||||
endif()
|
||||
|
||||
########################### arm fp16 build optimize library ########################
|
||||
if(ENABLE_FP16)
|
||||
add_subdirectory(${NNACL_DIR}/optimize)
|
||||
|
|
|
@ -86,7 +86,11 @@ int LRelu(const float *src, int length, float *dst, float alpha) {
|
|||
for (; i < length - 4; i += 4) {
|
||||
MS_FLOAT32X4 src_tmp = MS_LDQ_F32(src + i);
|
||||
MS_FLOAT32X4 mul_tmp = MS_MULQ_N_F32(src_tmp, alpha);
|
||||
#ifdef ENABLE_ARM
|
||||
MS_UINT32X4 mask = MS_CMPGTQ_F32(src_tmp, MS_MOVQ_F32(0.0f));
|
||||
#else
|
||||
MS_FLOAT32X4 mask = MS_CMPGTQ_F32(src_tmp, MS_MOVQ_F32(0.0f));
|
||||
#endif
|
||||
MS_STQ_F32(dst + i, MS_BLENDQ_F32(mul_tmp, src_tmp, mask));
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#ifdef ENABLE_ARM
|
||||
#define MS_FLOAT32X4 float32x4_t
|
||||
#define MS_INT32X4 int32x4_t
|
||||
#define MS_UINT32X4 uint32x4_t
|
||||
#define MS_LDQ_F32 vld1q_f32
|
||||
#define MS_LDQ_EPI32 vld1q_s32
|
||||
#define MS_ADDQ_F32 vaddq_f32
|
||||
|
|
Loading…
Reference in New Issue