forked from OSchip/llvm-project
[libc] Simplifies multi implementations
This is a roll forward of D101895 with two additional fixes: Original Patch description: > This is a follow up on D101524 which: > > - simplifies cpu features detection and usage, > - flattens target dependent optimizations so it's obvious which implementations are generated, > - provides an implementation targeting the host (march/mtune=native) for the mem* functions, > - makes sure all implementations are unittested (provided the host can run them). Additional fixes: - Fix uninitialized ALL_CPU_FEATURES - Use non pseudo microarch as it is only supported from Clang 12 on Differential Revision: https://reviews.llvm.org/D102233
This commit is contained in:
parent
8aa7f28497
commit
6351993da7
|
@ -2,11 +2,19 @@
|
|||
# Cpu features definition and flags
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
# Initialize ALL_CPU_FEATURES as empty list.
|
||||
set(ALL_CPU_FEATURES "")
|
||||
|
||||
if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
|
||||
set(ALL_CPU_FEATURES SSE SSE2 AVX AVX2 AVX512F)
|
||||
list(SORT ALL_CPU_FEATURES)
|
||||
set(ALL_CPU_FEATURES SSE2 SSE4_2 AVX2 AVX512F)
|
||||
set(LIBC_COMPILE_OPTIONS_NATIVE -march=native)
|
||||
elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
|
||||
set(LIBC_COMPILE_OPTIONS_NATIVE -mcpu=native)
|
||||
endif()
|
||||
|
||||
# Making sure ALL_CPU_FEATURES is sorted.
|
||||
list(SORT ALL_CPU_FEATURES)
|
||||
|
||||
# Function to check whether the target CPU supports the provided set of features.
|
||||
# Usage:
|
||||
# cpu_supports(
|
||||
|
@ -22,49 +30,6 @@ function(cpu_supports output_var features)
|
|||
endif()
|
||||
endfunction()
|
||||
|
||||
# Function to compute the flags to pass down to the compiler.
|
||||
# Usage:
|
||||
# compute_flags(
|
||||
# <output variable>
|
||||
# MARCH <arch name or "native">
|
||||
# REQUIRE <list of mandatory features to enable>
|
||||
# REJECT <list of features to disable>
|
||||
# )
|
||||
function(compute_flags output_var)
|
||||
cmake_parse_arguments(
|
||||
"COMPUTE_FLAGS"
|
||||
"" # Optional arguments
|
||||
"MARCH" # Single value arguments
|
||||
"REQUIRE;REJECT" # Multi value arguments
|
||||
${ARGN})
|
||||
# Check that features are not required and rejected at the same time.
|
||||
if(COMPUTE_FLAGS_REQUIRE AND COMPUTE_FLAGS_REJECT)
|
||||
_intersection(var ${COMPUTE_FLAGS_REQUIRE} ${COMPUTE_FLAGS_REJECT})
|
||||
if(var)
|
||||
message(FATAL_ERROR "Cpu Features REQUIRE and REJECT ${var}")
|
||||
endif()
|
||||
endif()
|
||||
# Generate the compiler flags in `current`.
|
||||
if(${CMAKE_CXX_COMPILER_ID} MATCHES "Clang|GNU")
|
||||
if(COMPUTE_FLAGS_MARCH)
|
||||
list(APPEND current "-march=${COMPUTE_FLAGS_MARCH}")
|
||||
endif()
|
||||
foreach(feature IN LISTS COMPUTE_FLAGS_REQUIRE)
|
||||
string(TOLOWER ${feature} lowercase_feature)
|
||||
list(APPEND current "-m${lowercase_feature}")
|
||||
endforeach()
|
||||
foreach(feature IN LISTS COMPUTE_FLAGS_REJECT)
|
||||
string(TOLOWER ${feature} lowercase_feature)
|
||||
list(APPEND current "-mno-${lowercase_feature}")
|
||||
endforeach()
|
||||
else()
|
||||
# In future, we can extend for other compilers.
|
||||
message(FATAL_ERROR "Unkown compiler ${CMAKE_CXX_COMPILER_ID}.")
|
||||
endif()
|
||||
# Export the list of flags.
|
||||
set(${output_var} "${current}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Internal helpers and utilities.
|
||||
# ------------------------------------------------------------------------------
|
||||
|
@ -94,39 +59,7 @@ function(_generate_check_code)
|
|||
endfunction()
|
||||
_generate_check_code()
|
||||
|
||||
# Compiles and runs the code generated above with the specified requirements.
|
||||
# This is helpful to infer which features a particular target supports or if
|
||||
# a specific features implies other features (e.g. BMI2 implies SSE2 and SSE).
|
||||
function(_check_defined_cpu_feature output_var)
|
||||
cmake_parse_arguments(
|
||||
"CHECK_DEFINED"
|
||||
"" # Optional arguments
|
||||
"MARCH" # Single value arguments
|
||||
"REQUIRE;REJECT" # Multi value arguments
|
||||
${ARGN})
|
||||
compute_flags(
|
||||
flags
|
||||
MARCH ${CHECK_DEFINED_MARCH}
|
||||
REQUIRE ${CHECK_DEFINED_REQUIRE}
|
||||
REJECT ${CHECK_DEFINED_REJECT})
|
||||
try_run(
|
||||
run_result compile_result "${CMAKE_CURRENT_BINARY_DIR}/check_${feature}"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/cpu_features/check_cpu_features.cpp"
|
||||
COMPILE_DEFINITIONS ${flags}
|
||||
COMPILE_OUTPUT_VARIABLE compile_output
|
||||
RUN_OUTPUT_VARIABLE run_output)
|
||||
if("${run_result}" EQUAL 0)
|
||||
set(${output_var}
|
||||
"${run_output}"
|
||||
PARENT_SCOPE)
|
||||
elseif(NOT ${compile_result})
|
||||
message(FATAL_ERROR "Failed to compile: ${compile_output}")
|
||||
else()
|
||||
message(FATAL_ERROR "Failed to run: ${run_output}")
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
set(LIBC_CPU_FEATURES "" CACHE PATH "supported CPU features")
|
||||
set(LIBC_CPU_FEATURES "" CACHE PATH "Host supported CPU features")
|
||||
|
||||
if(CMAKE_CROSSCOMPILING)
|
||||
_intersection(cpu_features "${ALL_CPU_FEATURES}" "${LIBC_CPU_FEATURES}")
|
||||
|
@ -135,13 +68,18 @@ if(CMAKE_CROSSCOMPILING)
|
|||
endif()
|
||||
set(LIBC_CPU_FEATURES "${cpu_features}")
|
||||
else()
|
||||
# Populates the LIBC_CPU_FEATURES list.
|
||||
# Use -march=native only when the compiler supports it.
|
||||
include(CheckCXXCompilerFlag)
|
||||
CHECK_CXX_COMPILER_FLAG("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE)
|
||||
if(COMPILER_SUPPORTS_MARCH_NATIVE)
|
||||
_check_defined_cpu_feature(LIBC_CPU_FEATURES MARCH native)
|
||||
# Populates the LIBC_CPU_FEATURES list from host.
|
||||
try_run(
|
||||
run_result compile_result "${CMAKE_CURRENT_BINARY_DIR}/check_${feature}"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/cpu_features/check_cpu_features.cpp"
|
||||
COMPILE_DEFINITIONS ${LIBC_COMPILE_OPTIONS_NATIVE}
|
||||
COMPILE_OUTPUT_VARIABLE compile_output
|
||||
RUN_OUTPUT_VARIABLE run_output)
|
||||
if("${run_result}" EQUAL 0)
|
||||
set(LIBC_CPU_FEATURES "${run_output}")
|
||||
elseif(NOT ${compile_result})
|
||||
message(FATAL_ERROR "Failed to compile: ${compile_output}")
|
||||
else()
|
||||
_check_defined_cpu_feature(LIBC_CPU_FEATURES)
|
||||
message(FATAL_ERROR "Failed to run: ${run_output}")
|
||||
endif()
|
||||
endif()
|
||||
|
|
|
@ -186,20 +186,15 @@ function(add_implementation name impl_name)
|
|||
cmake_parse_arguments(
|
||||
"ADD_IMPL"
|
||||
"" # Optional arguments
|
||||
"MARCH" # Single value arguments
|
||||
"REQUIRE;REJECT;SRCS;HDRS;DEPENDS;COMPILE_OPTIONS" # Multi value arguments
|
||||
"" # Single value arguments
|
||||
"REQUIRE;SRCS;HDRS;DEPENDS;COMPILE_OPTIONS" # Multi value arguments
|
||||
${ARGN})
|
||||
compute_flags(flags
|
||||
MARCH ${ADD_IMPL_MARCH}
|
||||
REQUIRE ${ADD_IMPL_REQUIRE}
|
||||
REJECT ${ADD_IMPL_REJECT}
|
||||
)
|
||||
add_entrypoint_object(${impl_name}
|
||||
NAME ${name}
|
||||
SRCS ${ADD_IMPL_SRCS}
|
||||
HDRS ${ADD_IMPL_HDRS}
|
||||
DEPENDS ${ADD_IMPL_DEPENDS}
|
||||
COMPILE_OPTIONS ${ADD_IMPL_COMPILE_OPTIONS} ${flags} -O2
|
||||
COMPILE_OPTIONS ${ADD_IMPL_COMPILE_OPTIONS}
|
||||
)
|
||||
get_fq_target_name(${impl_name} fq_target_name)
|
||||
set_target_properties(${fq_target_name} PROPERTIES REQUIRE_CPU_FEATURES "${ADD_IMPL_REQUIRE}")
|
||||
|
@ -210,17 +205,6 @@ endfunction()
|
|||
# memcpy
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
# include the relevant architecture specific implementations
|
||||
if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
|
||||
set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/${LIBC_TARGET_ARCHITECTURE}/memcpy.cpp)
|
||||
elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
|
||||
set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/${LIBC_TARGET_ARCHITECTURE}/memcpy.cpp)
|
||||
#Disable tail merging as it leads to lower performance
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mllvm --tail-merge-threshold=0")
|
||||
else()
|
||||
set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/memcpy.cpp)
|
||||
endif()
|
||||
|
||||
function(add_memcpy memcpy_name)
|
||||
add_implementation(memcpy ${memcpy_name}
|
||||
SRCS ${MEMCPY_SRC}
|
||||
|
@ -235,8 +219,23 @@ function(add_memcpy memcpy_name)
|
|||
endfunction()
|
||||
|
||||
if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
|
||||
add_memcpy(memcpy MARCH native)
|
||||
set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/x86_64/memcpy.cpp)
|
||||
add_memcpy(memcpy_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2)
|
||||
add_memcpy(memcpy_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2)
|
||||
add_memcpy(memcpy_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2)
|
||||
add_memcpy(memcpy_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F)
|
||||
add_memcpy(memcpy_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
|
||||
add_memcpy(memcpy)
|
||||
elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
|
||||
set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/aarch64/memcpy.cpp)
|
||||
# Disable tail merging as it leads to lower performance.
|
||||
# Note that '-mllvm' needs to be prefixed with 'SHELL:' to prevent CMake flag deduplication.
|
||||
add_memcpy(memcpy_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}
|
||||
COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0")
|
||||
add_memcpy(memcpy COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0")
|
||||
else()
|
||||
set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/memcpy.cpp)
|
||||
add_memcpy(memcpy_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
|
||||
add_memcpy(memcpy)
|
||||
endif()
|
||||
|
||||
|
@ -258,8 +257,14 @@ function(add_memset memset_name)
|
|||
endfunction()
|
||||
|
||||
if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
|
||||
add_memset(memset MARCH native)
|
||||
add_memset(memset_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2)
|
||||
add_memset(memset_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2)
|
||||
add_memset(memset_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2)
|
||||
add_memset(memset_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F)
|
||||
add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
|
||||
add_memset(memset)
|
||||
else()
|
||||
add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
|
||||
add_memset(memset)
|
||||
endif()
|
||||
|
||||
|
@ -282,15 +287,13 @@ function(add_bzero bzero_name)
|
|||
endfunction()
|
||||
|
||||
if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
|
||||
add_bzero(bzero MARCH native)
|
||||
add_bzero(bzero_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2)
|
||||
add_bzero(bzero_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2)
|
||||
add_bzero(bzero_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2)
|
||||
add_bzero(bzero_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F)
|
||||
add_bzero(bzero_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
|
||||
add_bzero(bzero)
|
||||
else()
|
||||
add_bzero(bzero_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
|
||||
add_bzero(bzero)
|
||||
endif()
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Add all other relevant implementations for the native target.
|
||||
# ------------------------------------------------------------------------------
|
||||
|
||||
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_ARCHITECTURE})
|
||||
include(${LIBC_TARGET_ARCHITECTURE}/CMakeLists.txt)
|
||||
endif()
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
add_memcpy("memcpy_${LIBC_TARGET_ARCHITECTURE}")
|
|
@ -1,14 +0,0 @@
|
|||
add_memcpy("memcpy_${LIBC_TARGET_ARCHITECTURE}_opt_none" REJECT "${ALL_CPU_FEATURES}")
|
||||
add_memcpy("memcpy_${LIBC_TARGET_ARCHITECTURE}_opt_sse" REQUIRE "SSE" REJECT "SSE2")
|
||||
add_memcpy("memcpy_${LIBC_TARGET_ARCHITECTURE}_opt_avx" REQUIRE "AVX" REJECT "AVX2")
|
||||
add_memcpy("memcpy_${LIBC_TARGET_ARCHITECTURE}_opt_avx512f" REQUIRE "AVX512F")
|
||||
|
||||
add_memset("memset_${LIBC_TARGET_ARCHITECTURE}_opt_none" REJECT "${ALL_CPU_FEATURES}")
|
||||
add_memset("memset_${LIBC_TARGET_ARCHITECTURE}_opt_sse" REQUIRE "SSE" REJECT "SSE2")
|
||||
add_memset("memset_${LIBC_TARGET_ARCHITECTURE}_opt_avx" REQUIRE "AVX" REJECT "AVX2")
|
||||
add_memset("memset_${LIBC_TARGET_ARCHITECTURE}_opt_avx512f" REQUIRE "AVX512F")
|
||||
|
||||
add_bzero("bzero_${LIBC_TARGET_ARCHITECTURE}_opt_none" REJECT "${ALL_CPU_FEATURES}")
|
||||
add_bzero("bzero_${LIBC_TARGET_ARCHITECTURE}_opt_sse" REQUIRE "SSE" REJECT "SSE2")
|
||||
add_bzero("bzero_${LIBC_TARGET_ARCHITECTURE}_opt_avx" REQUIRE "AVX" REJECT "AVX2")
|
||||
add_bzero("bzero_${LIBC_TARGET_ARCHITECTURE}_opt_avx512f" REQUIRE "AVX512F")
|
|
@ -196,6 +196,8 @@ function(add_libc_multi_impl_test name)
|
|||
libc_string_unittests
|
||||
DEPENDS
|
||||
${fq_config_name}
|
||||
COMPILE_OPTIONS
|
||||
${LIBC_COMPILE_OPTIONS_NATIVE}
|
||||
${ARGN}
|
||||
)
|
||||
else()
|
||||
|
|
Loading…
Reference in New Issue