From a4ac434c47434d80bca54bab96f295ed4e972cc6 Mon Sep 17 00:00:00 2001 From: Pavel Iliin Date: Fri, 20 Nov 2020 15:02:57 +0000 Subject: [PATCH] [AArch64] Compiler-rt interface for out-of-line atomics. Out-of-line helper functions to support LSE deployment added. This is a port of libgcc implementation: https://gcc.gnu.org/git/?p=gcc.git;h=33befddcb849235353dc263db1c7d07dc15c9faa Differential Revision: https://reviews.llvm.org/D91156 --- compiler-rt/cmake/builtin-config-ix.cmake | 6 + compiler-rt/lib/builtins/CMakeLists.txt | 30 +++ compiler-rt/lib/builtins/aarch64/lse.S | 227 ++++++++++++++++++++++ compiler-rt/lib/builtins/assembly.h | 84 +++++++- compiler-rt/lib/builtins/cpu_model.c | 44 +++-- 5 files changed, 375 insertions(+), 16 deletions(-) create mode 100644 compiler-rt/lib/builtins/aarch64/lse.S diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake index 16d82b127878..2eeedd49e392 100644 --- a/compiler-rt/cmake/builtin-config-ix.cmake +++ b/compiler-rt/cmake/builtin-config-ix.cmake @@ -23,6 +23,12 @@ int foo(int x, int y) { ") +builtin_check_c_compiler_source(COMPILER_RT_HAS_ASM_LSE +" +asm(\".arch armv8-a+lse\"); +asm(\"cas w0, w1, [x2]\"); +") + set(ARM64 aarch64) set(ARM32 arm armhf armv6m armv7m armv7em armv7 armv7s armv7k) set(HEXAGON hexagon) diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index 3c29bba612e1..7f3df6ff548d 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -502,9 +502,39 @@ endif() set(aarch64_SOURCES ${GENERIC_TF_SOURCES} ${GENERIC_SOURCES} + cpu_model.c aarch64/fp_mode.c ) +# Generate outline atomics helpers from lse.S base +set(CUSTOM_FLAGS ${CMAKE_C_FLAGS}) +if(NOT ANDROID) + append_list_if(COMPILER_RT_HAS_VISIBILITY_HIDDEN_FLAG -DVISIBILITY_HIDDEN CUSTOM_FLAGS) +endif() +append_list_if(COMPILER_RT_HAS_ASM_LSE -DHAS_ASM_LSE CUSTOM_FLAGS) +string(REPLACE " " "\t" CUSTOM_FLAGS "${CUSTOM_FLAGS}") + +foreach(pat cas swp ldadd ldclr ldeor ldset) + foreach(size 1 2 4 8 16) + foreach(model 1 2 3 4) + if(pat STREQUAL "cas" OR NOT size STREQUAL "16") + set(helper_asm outline_atomic_${pat}${size}_${model}.S) + add_custom_command( + OUTPUT ${helper_asm} + COMMAND ${CMAKE_C_COMPILER} -E ${CUSTOM_FLAGS} -DL_${pat} -DSIZE=${size} -DMODEL=${model} + ${CMAKE_CURRENT_SOURCE_DIR}/aarch64/lse.S -o ${helper_asm} + DEPENDS aarch64/lse.S assembly.h + ) + set_source_files_properties(${helper_asm} PROPERTIES GENERATED TRUE) + set(aarch64_SOURCES + ${aarch64_SOURCES} + ${helper_asm} + ) + endif() + endforeach(model) + endforeach(size) +endforeach(pat) + if (MINGW) set(aarch64_SOURCES ${aarch64_SOURCES} diff --git a/compiler-rt/lib/builtins/aarch64/lse.S b/compiler-rt/lib/builtins/aarch64/lse.S new file mode 100644 index 000000000000..4c75fa524c44 --- /dev/null +++ b/compiler-rt/lib/builtins/aarch64/lse.S @@ -0,0 +1,227 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "../assembly.h" + +// Out-of-line LSE atomics helpers. Ported from libgcc library. +// N = {1, 2, 4, 8} +// M = {1, 2, 4, 8, 16} +// ORDER = {'relax', 'acq', 'rel', 'acq_rel'} +// Routines implemented: +// +// iM __aarch64_casM_ORDER(iM expected, iM desired, iM *ptr) +// iN __aarch64_swpN_ORDER(iN val, iN *ptr) +// iN __aarch64_ldaddN_ORDER(iN val, iN *ptr) +// iN __aarch64_ldclrN_ORDER(iN val, iN *ptr) +// iN __aarch64_ldeorN_ORDER(iN val, iN *ptr) +// iN __aarch64_ldsetN_ORDER(iN val, iN *ptr) +// +// Routines may modify temporary registers tmp0, tmp1, tmp2, +// return value x0 and the flags only. + +#ifdef __aarch64__ + +#ifdef HAS_ASM_LSE +.arch armv8-a+lse +#else +.arch armv8-a +#endif + +HIDDEN(__aarch64_have_lse_atomics) + +// Generate mnemonics for +// L_cas: SIZE: 1,2,4,8,16 MODEL: 1,2,3,4 +// L_swp L_ldadd L_ldclr L_ldeor L_ldset: SIZE: 1,2,4,8 MODEL: 1,2,3,4 + +#if SIZE == 1 +#define S b +#define UXT uxtb +#define B 0x00000000 +#elif SIZE == 2 +#define S h +#define UXT uxth +#define B 0x40000000 +#elif SIZE == 4 || SIZE == 8 || SIZE == 16 +#define S +#define UXT mov +#if SIZE == 4 +#define B 0x80000000 +#elif SIZE == 8 +#define B 0xc0000000 +#endif +#else +#error +#endif // SIZE + +#if MODEL == 1 +#define SUFF _relax +#define A +#define L +#define M 0x000000 +#define N 0x000000 +#elif MODEL == 2 +#define SUFF _acq +#define A a +#define L +#define M 0x400000 +#define N 0x800000 +#elif MODEL == 3 +#define SUFF _rel +#define A +#define L l +#define M 0x008000 +#define N 0x400000 +#elif MODEL == 4 +#define SUFF _acq_rel +#define A a +#define L l +#define M 0x408000 +#define N 0xc00000 +#else +#error +#endif // MODEL + +// Define register size. +#define x(N) GLUE2(x, N) +#define w(N) GLUE2(w, N) +#if SIZE < 8 +#define s(N) w(N) +#else +#define s(N) x(N) +#endif + +#define NAME(BASE) GLUE4(__aarch64_, BASE, SIZE, SUFF) +#define LDXR GLUE4(ld, A, xr, S) +#define STXR GLUE4(st, L, xr, S) + +// Define temporary registers. +#define tmp0 16 +#define tmp1 17 +#define tmp2 15 + +// Macro for branch to label if no LSE available +.macro JUMP_IF_NOT_LSE label + adrp x(tmp0), __aarch64_have_lse_atomics + ldrb w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics] + cbz w(tmp0), \label +.endm + +#ifdef L_cas +DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(cas)) + JUMP_IF_NOT_LSE 8f +#if SIZE < 16 +#ifdef HAS_ASM_LSE +#define CAS GLUE4(cas, A, L, S) s(0), s(1), [x2] +#else +#define CAS .inst 0x08a07c41 + B + M +#endif + CAS // s(0), s(1), [x2] + ret +8: + UXT s(tmp0), s(0) +0: + LDXR s(0), [x2] + cmp s(0), s(tmp0) + bne 1f + STXR w(tmp1), s(1), [x2] + cbnz w(tmp1), 0b +1: + ret +#else +#define LDXP GLUE3(ld, A, xp) +#define STXP GLUE3(st, L, xp) +#ifdef HAS_ASM_LSE +#define CASP GLUE3(casp, A, L) x0, x1, x2, x3, [x4] +#else +#define CASP .inst 0x48207c82 + M +#endif + + CASP // x0, x1, x2, x3, [x4] + ret +8: + mov x(tmp0), x0 + mov x(tmp1), x1 +0: + LDXP x0, x1, [x4] + cmp x0, x(tmp0) + ccmp x1, x(tmp1), #0, eq + bne 1f + STXP w(tmp2), x2, x3, [x4] + cbnz w(tmp2), 0b +1: + ret +#endif +END_COMPILERRT_OUTLINE_FUNCTION(NAME(cas)) +#endif // L_cas + +#ifdef L_swp +#ifdef HAS_ASM_LSE +#define SWP GLUE4(swp, A, L, S) s(0), s(0), [x1] +#else +#define SWP .inst 0x38208020 + B + N +#endif +DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(swp)) + JUMP_IF_NOT_LSE 8f + SWP // s(0), s(0), [x1] + ret +8: + mov s(tmp0), s(0) +0: + LDXR s(0), [x1] + STXR w(tmp1), s(tmp0), [x1] + cbnz w(tmp1), 0b + ret +END_COMPILERRT_OUTLINE_FUNCTION(NAME(swp)) +#endif // L_swp + +#if defined(L_ldadd) || defined(L_ldclr) || \ + defined(L_ldeor) || defined(L_ldset) + +#ifdef L_ldadd +#define LDNM ldadd +#define OP add +#define OPN 0x0000 +#elif defined(L_ldclr) +#define LDNM ldclr +#define OP bic +#define OPN 0x1000 +#elif defined(L_ldeor) +#define LDNM ldeor +#define OP eor +#define OPN 0x2000 +#elif defined(L_ldset) +#define LDNM ldset +#define OP orr +#define OPN 0x3000 +#else +#error +#endif + +#ifdef HAS_ASM_LSE +#define LDOP GLUE4(LDNM, A, L, S) s(0), s(0), [x1] +#else +#define LDOP .inst 0x38200020 + OPN + B + N +#endif + +DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(LDNM)) + JUMP_IF_NOT_LSE 8f + LDOP // s(0), s(0), [x1] + ret +8: + mov s(tmp0), s(0) +0: + LDXR s(0), [x1] + OP s(tmp1), s(0), s(tmp0) + STXR w(tmp2), s(tmp1), [x1] + cbnz w(tmp2), 0b + ret +END_COMPILERRT_OUTLINE_FUNCTION(NAME(LDNM)) +#endif // L_ldadd L_ldclr L_ldeor L_ldset + +NO_EXEC_STACK_DIRECTIVE + +// GNU property note for BTI and PAC +GNU_PROPERTY_BTI_PAC + +#endif // __aarch64__ diff --git a/compiler-rt/lib/builtins/assembly.h b/compiler-rt/lib/builtins/assembly.h index f437cb87f60a..3b7f592fa95c 100644 --- a/compiler-rt/lib/builtins/assembly.h +++ b/compiler-rt/lib/builtins/assembly.h @@ -35,14 +35,18 @@ #define HIDDEN(name) .hidden name #define LOCAL_LABEL(name) .L_##name #define FILE_LEVEL_DIRECTIVE -#if defined(__arm__) +#if defined(__arm__) || defined(__aarch64__) #define SYMBOL_IS_FUNC(name) .type name,%function +#define FUNC_ALIGN \ + .text SEPARATOR \ + .balign 16 SEPARATOR #else #define SYMBOL_IS_FUNC(name) .type name,@function +#define FUNC_ALIGN #endif #define CONST_SECTION .section .rodata -#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \ +#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \ defined(__linux__) #define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits #else @@ -65,6 +69,58 @@ #endif +// BTI and PAC gnu property note +#define NT_GNU_PROPERTY_TYPE_0 5 +#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 +#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI 1 +#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC 2 + +#if defined(__ARM_FEATURE_BTI_DEFAULT) +#define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI +#else +#define BTI_FLAG 0 +#endif + +#if __ARM_FEATURE_PAC_DEFAULT & 3 +#define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC +#else +#define PAC_FLAG 0 +#endif + +#define GNU_PROPERTY(type, value) \ + .pushsection .note.gnu.property, "a" SEPARATOR \ + .p2align 3 SEPARATOR \ + .word 4 SEPARATOR \ + .word 16 SEPARATOR \ + .word NT_GNU_PROPERTY_TYPE_0 SEPARATOR \ + .asciz "GNU" SEPARATOR \ + .word type SEPARATOR \ + .word 4 SEPARATOR \ + .word value SEPARATOR \ + .word 0 SEPARATOR \ + .popsection + +#if BTI_FLAG != 0 +#define BTI_C bti c +#else +#define BTI_C +#endif + +#if (BTI_FLAG | PAC_FLAG) != 0 +#define GNU_PROPERTY_BTI_PAC \ + GNU_PROPERTY(GNU_PROPERTY_AARCH64_FEATURE_1_AND, BTI_FLAG | PAC_FLAG) +#else +#define GNU_PROPERTY_BTI_PAC +#endif + +#if defined(__clang__) || defined(__GCC_HAVE_DWARF2_CFI_ASM) +#define CFI_START .cfi_startproc +#define CFI_END .cfi_endproc +#else +#define CFI_START +#define CFI_END +#endif + #if defined(__arm__) // Determine actual [ARM][THUMB[1][2]] ISA using compiler predefined macros: @@ -131,8 +187,14 @@ #define DEFINE_CODE_STATE #endif -#define GLUE2(a, b) a##b -#define GLUE(a, b) GLUE2(a, b) +#define GLUE2_(a, b) a##b +#define GLUE(a, b) GLUE2_(a, b) +#define GLUE2(a, b) GLUE2_(a, b) +#define GLUE3_(a, b, c) a##b##c +#define GLUE3(a, b, c) GLUE3_(a, b, c) +#define GLUE4_(a, b, c, d) a##b##c##d +#define GLUE4(a, b, c, d) GLUE4_(a, b, c, d) + #define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name) #ifdef VISIBILITY_HIDDEN @@ -177,6 +239,16 @@ DECLARE_FUNC_ENCODING \ name: +#define DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(name) \ + DEFINE_CODE_STATE \ + FUNC_ALIGN \ + .globl name SEPARATOR \ + SYMBOL_IS_FUNC(name) SEPARATOR \ + DECLARE_SYMBOL_VISIBILITY(name) SEPARATOR \ + CFI_START SEPARATOR \ + DECLARE_FUNC_ENCODING \ + name: SEPARATOR BTI_C + #define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target) \ .globl SYMBOL_NAME(name) SEPARATOR \ SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ @@ -193,8 +265,12 @@ #ifdef __ELF__ #define END_COMPILERRT_FUNCTION(name) \ .size SYMBOL_NAME(name), . - SYMBOL_NAME(name) +#define END_COMPILERRT_OUTLINE_FUNCTION(name) \ + CFI_END SEPARATOR \ + .size SYMBOL_NAME(name), . - SYMBOL_NAME(name) #else #define END_COMPILERRT_FUNCTION(name) +#define END_COMPILERRT_OUTLINE_FUNCTION(name) #endif #endif // COMPILERRT_ASSEMBLY_H diff --git a/compiler-rt/lib/builtins/cpu_model.c b/compiler-rt/lib/builtins/cpu_model.c index e8b23d5e5381..05ef8492384f 100644 --- a/compiler-rt/lib/builtins/cpu_model.c +++ b/compiler-rt/lib/builtins/cpu_model.c @@ -8,10 +8,21 @@ // // This file is based on LLVM's lib/Support/Host.cpp. // It implements the operating system Host concept and builtin -// __cpu_model for the compiler_rt library, for x86 only. +// __cpu_model for the compiler_rt library for x86 and +// __aarch64_have_lse_atomics for AArch64. // //===----------------------------------------------------------------------===// +#if defined(HAVE_INIT_PRIORITY) +#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101)) +#elif __has_attribute(__constructor__) +#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__)) +#else +// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that +// this runs during initialization. +#define CONSTRUCTOR_ATTRIBUTE +#endif + #if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ defined(_M_X64)) && \ (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)) @@ -665,16 +676,6 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, #undef setFeature } -#if defined(HAVE_INIT_PRIORITY) -#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101)) -#elif __has_attribute(__constructor__) -#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__)) -#else -// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that -// this runs during initialization. -#define CONSTRUCTOR_ATTRIBUTE -#endif - #ifndef _WIN32 __attribute__((visibility("hidden"))) #endif @@ -749,5 +750,24 @@ int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) { return 0; } - +#elif defined(__aarch64__) +// LSE support detection for out-of-line atomics +// using HWCAP and Auxiliary vector +_Bool __aarch64_have_lse_atomics + __attribute__((visibility("hidden"), nocommon)); +#if defined(__has_include) +#if __has_include() +#include +#ifndef AT_HWCAP +#define AT_HWCAP 16 #endif +#ifndef HWCAP_ATOMICS +#define HWCAP_ATOMICS (1 << 8) +#endif +static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) { + unsigned long hwcap = getauxval(AT_HWCAP); + __aarch64_have_lse_atomics = (hwcap & HWCAP_ATOMICS) != 0; +} +#endif // defined(__has_include) +#endif // __has_include() +#endif // defined(__aarch64__)