[AArch64] Compiler-rt interface for out-of-line atomics.

Out-of-line helper functions to support LSE deployment added.
This is a port of libgcc implementation:
https://gcc.gnu.org/git/?p=gcc.git;h=33befddcb849235353dc263db1c7d07dc15c9faa

Differential Revision: https://reviews.llvm.org/D91156
This commit is contained in:
Pavel Iliin 2020-11-20 15:02:57 +00:00
parent 2c63e7604c
commit a4ac434c47
5 changed files with 375 additions and 16 deletions

View File

@ -23,6 +23,12 @@ int foo(int x, int y) {
")
builtin_check_c_compiler_source(COMPILER_RT_HAS_ASM_LSE
"
asm(\".arch armv8-a+lse\");
asm(\"cas w0, w1, [x2]\");
")
set(ARM64 aarch64)
set(ARM32 arm armhf armv6m armv7m armv7em armv7 armv7s armv7k)
set(HEXAGON hexagon)

View File

@ -502,9 +502,39 @@ endif()
set(aarch64_SOURCES
${GENERIC_TF_SOURCES}
${GENERIC_SOURCES}
cpu_model.c
aarch64/fp_mode.c
)
# Generate outline atomics helpers from lse.S base
set(CUSTOM_FLAGS ${CMAKE_C_FLAGS})
if(NOT ANDROID)
append_list_if(COMPILER_RT_HAS_VISIBILITY_HIDDEN_FLAG -DVISIBILITY_HIDDEN CUSTOM_FLAGS)
endif()
append_list_if(COMPILER_RT_HAS_ASM_LSE -DHAS_ASM_LSE CUSTOM_FLAGS)
string(REPLACE " " "\t" CUSTOM_FLAGS "${CUSTOM_FLAGS}")
foreach(pat cas swp ldadd ldclr ldeor ldset)
foreach(size 1 2 4 8 16)
foreach(model 1 2 3 4)
if(pat STREQUAL "cas" OR NOT size STREQUAL "16")
set(helper_asm outline_atomic_${pat}${size}_${model}.S)
add_custom_command(
OUTPUT ${helper_asm}
COMMAND ${CMAKE_C_COMPILER} -E ${CUSTOM_FLAGS} -DL_${pat} -DSIZE=${size} -DMODEL=${model}
${CMAKE_CURRENT_SOURCE_DIR}/aarch64/lse.S -o ${helper_asm}
DEPENDS aarch64/lse.S assembly.h
)
set_source_files_properties(${helper_asm} PROPERTIES GENERATED TRUE)
set(aarch64_SOURCES
${aarch64_SOURCES}
${helper_asm}
)
endif()
endforeach(model)
endforeach(size)
endforeach(pat)
if (MINGW)
set(aarch64_SOURCES
${aarch64_SOURCES}

View File

@ -0,0 +1,227 @@
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#include "../assembly.h"
// Out-of-line LSE atomics helpers. Ported from libgcc library.
// N = {1, 2, 4, 8}
// M = {1, 2, 4, 8, 16}
// ORDER = {'relax', 'acq', 'rel', 'acq_rel'}
// Routines implemented:
//
// iM __aarch64_casM_ORDER(iM expected, iM desired, iM *ptr)
// iN __aarch64_swpN_ORDER(iN val, iN *ptr)
// iN __aarch64_ldaddN_ORDER(iN val, iN *ptr)
// iN __aarch64_ldclrN_ORDER(iN val, iN *ptr)
// iN __aarch64_ldeorN_ORDER(iN val, iN *ptr)
// iN __aarch64_ldsetN_ORDER(iN val, iN *ptr)
//
// Routines may modify temporary registers tmp0, tmp1, tmp2,
// return value x0 and the flags only.
#ifdef __aarch64__
#ifdef HAS_ASM_LSE
.arch armv8-a+lse
#else
.arch armv8-a
#endif
HIDDEN(__aarch64_have_lse_atomics)
// Generate mnemonics for
// L_cas: SIZE: 1,2,4,8,16 MODEL: 1,2,3,4
// L_swp L_ldadd L_ldclr L_ldeor L_ldset: SIZE: 1,2,4,8 MODEL: 1,2,3,4
#if SIZE == 1
#define S b
#define UXT uxtb
#define B 0x00000000
#elif SIZE == 2
#define S h
#define UXT uxth
#define B 0x40000000
#elif SIZE == 4 || SIZE == 8 || SIZE == 16
#define S
#define UXT mov
#if SIZE == 4
#define B 0x80000000
#elif SIZE == 8
#define B 0xc0000000
#endif
#else
#error
#endif // SIZE
#if MODEL == 1
#define SUFF _relax
#define A
#define L
#define M 0x000000
#define N 0x000000
#elif MODEL == 2
#define SUFF _acq
#define A a
#define L
#define M 0x400000
#define N 0x800000
#elif MODEL == 3
#define SUFF _rel
#define A
#define L l
#define M 0x008000
#define N 0x400000
#elif MODEL == 4
#define SUFF _acq_rel
#define A a
#define L l
#define M 0x408000
#define N 0xc00000
#else
#error
#endif // MODEL
// Define register size.
#define x(N) GLUE2(x, N)
#define w(N) GLUE2(w, N)
#if SIZE < 8
#define s(N) w(N)
#else
#define s(N) x(N)
#endif
#define NAME(BASE) GLUE4(__aarch64_, BASE, SIZE, SUFF)
#define LDXR GLUE4(ld, A, xr, S)
#define STXR GLUE4(st, L, xr, S)
// Define temporary registers.
#define tmp0 16
#define tmp1 17
#define tmp2 15
// Macro for branch to label if no LSE available
.macro JUMP_IF_NOT_LSE label
adrp x(tmp0), __aarch64_have_lse_atomics
ldrb w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics]
cbz w(tmp0), \label
.endm
#ifdef L_cas
DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(cas))
JUMP_IF_NOT_LSE 8f
#if SIZE < 16
#ifdef HAS_ASM_LSE
#define CAS GLUE4(cas, A, L, S) s(0), s(1), [x2]
#else
#define CAS .inst 0x08a07c41 + B + M
#endif
CAS // s(0), s(1), [x2]
ret
8:
UXT s(tmp0), s(0)
0:
LDXR s(0), [x2]
cmp s(0), s(tmp0)
bne 1f
STXR w(tmp1), s(1), [x2]
cbnz w(tmp1), 0b
1:
ret
#else
#define LDXP GLUE3(ld, A, xp)
#define STXP GLUE3(st, L, xp)
#ifdef HAS_ASM_LSE
#define CASP GLUE3(casp, A, L) x0, x1, x2, x3, [x4]
#else
#define CASP .inst 0x48207c82 + M
#endif
CASP // x0, x1, x2, x3, [x4]
ret
8:
mov x(tmp0), x0
mov x(tmp1), x1
0:
LDXP x0, x1, [x4]
cmp x0, x(tmp0)
ccmp x1, x(tmp1), #0, eq
bne 1f
STXP w(tmp2), x2, x3, [x4]
cbnz w(tmp2), 0b
1:
ret
#endif
END_COMPILERRT_OUTLINE_FUNCTION(NAME(cas))
#endif // L_cas
#ifdef L_swp
#ifdef HAS_ASM_LSE
#define SWP GLUE4(swp, A, L, S) s(0), s(0), [x1]
#else
#define SWP .inst 0x38208020 + B + N
#endif
DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(swp))
JUMP_IF_NOT_LSE 8f
SWP // s(0), s(0), [x1]
ret
8:
mov s(tmp0), s(0)
0:
LDXR s(0), [x1]
STXR w(tmp1), s(tmp0), [x1]
cbnz w(tmp1), 0b
ret
END_COMPILERRT_OUTLINE_FUNCTION(NAME(swp))
#endif // L_swp
#if defined(L_ldadd) || defined(L_ldclr) || \
defined(L_ldeor) || defined(L_ldset)
#ifdef L_ldadd
#define LDNM ldadd
#define OP add
#define OPN 0x0000
#elif defined(L_ldclr)
#define LDNM ldclr
#define OP bic
#define OPN 0x1000
#elif defined(L_ldeor)
#define LDNM ldeor
#define OP eor
#define OPN 0x2000
#elif defined(L_ldset)
#define LDNM ldset
#define OP orr
#define OPN 0x3000
#else
#error
#endif
#ifdef HAS_ASM_LSE
#define LDOP GLUE4(LDNM, A, L, S) s(0), s(0), [x1]
#else
#define LDOP .inst 0x38200020 + OPN + B + N
#endif
DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(LDNM))
JUMP_IF_NOT_LSE 8f
LDOP // s(0), s(0), [x1]
ret
8:
mov s(tmp0), s(0)
0:
LDXR s(0), [x1]
OP s(tmp1), s(0), s(tmp0)
STXR w(tmp2), s(tmp1), [x1]
cbnz w(tmp2), 0b
ret
END_COMPILERRT_OUTLINE_FUNCTION(NAME(LDNM))
#endif // L_ldadd L_ldclr L_ldeor L_ldset
NO_EXEC_STACK_DIRECTIVE
// GNU property note for BTI and PAC
GNU_PROPERTY_BTI_PAC
#endif // __aarch64__

View File

@ -35,14 +35,18 @@
#define HIDDEN(name) .hidden name
#define LOCAL_LABEL(name) .L_##name
#define FILE_LEVEL_DIRECTIVE
#if defined(__arm__)
#if defined(__arm__) || defined(__aarch64__)
#define SYMBOL_IS_FUNC(name) .type name,%function
#define FUNC_ALIGN \
.text SEPARATOR \
.balign 16 SEPARATOR
#else
#define SYMBOL_IS_FUNC(name) .type name,@function
#define FUNC_ALIGN
#endif
#define CONST_SECTION .section .rodata
#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \
#if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \
defined(__linux__)
#define NO_EXEC_STACK_DIRECTIVE .section .note.GNU-stack,"",%progbits
#else
@ -65,6 +69,58 @@
#endif
// BTI and PAC gnu property note
#define NT_GNU_PROPERTY_TYPE_0 5
#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI 1
#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC 2
#if defined(__ARM_FEATURE_BTI_DEFAULT)
#define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI
#else
#define BTI_FLAG 0
#endif
#if __ARM_FEATURE_PAC_DEFAULT & 3
#define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC
#else
#define PAC_FLAG 0
#endif
#define GNU_PROPERTY(type, value) \
.pushsection .note.gnu.property, "a" SEPARATOR \
.p2align 3 SEPARATOR \
.word 4 SEPARATOR \
.word 16 SEPARATOR \
.word NT_GNU_PROPERTY_TYPE_0 SEPARATOR \
.asciz "GNU" SEPARATOR \
.word type SEPARATOR \
.word 4 SEPARATOR \
.word value SEPARATOR \
.word 0 SEPARATOR \
.popsection
#if BTI_FLAG != 0
#define BTI_C bti c
#else
#define BTI_C
#endif
#if (BTI_FLAG | PAC_FLAG) != 0
#define GNU_PROPERTY_BTI_PAC \
GNU_PROPERTY(GNU_PROPERTY_AARCH64_FEATURE_1_AND, BTI_FLAG | PAC_FLAG)
#else
#define GNU_PROPERTY_BTI_PAC
#endif
#if defined(__clang__) || defined(__GCC_HAVE_DWARF2_CFI_ASM)
#define CFI_START .cfi_startproc
#define CFI_END .cfi_endproc
#else
#define CFI_START
#define CFI_END
#endif
#if defined(__arm__)
// Determine actual [ARM][THUMB[1][2]] ISA using compiler predefined macros:
@ -131,8 +187,14 @@
#define DEFINE_CODE_STATE
#endif
#define GLUE2(a, b) a##b
#define GLUE(a, b) GLUE2(a, b)
#define GLUE2_(a, b) a##b
#define GLUE(a, b) GLUE2_(a, b)
#define GLUE2(a, b) GLUE2_(a, b)
#define GLUE3_(a, b, c) a##b##c
#define GLUE3(a, b, c) GLUE3_(a, b, c)
#define GLUE4_(a, b, c, d) a##b##c##d
#define GLUE4(a, b, c, d) GLUE4_(a, b, c, d)
#define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name)
#ifdef VISIBILITY_HIDDEN
@ -177,6 +239,16 @@
DECLARE_FUNC_ENCODING \
name:
#define DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(name) \
DEFINE_CODE_STATE \
FUNC_ALIGN \
.globl name SEPARATOR \
SYMBOL_IS_FUNC(name) SEPARATOR \
DECLARE_SYMBOL_VISIBILITY(name) SEPARATOR \
CFI_START SEPARATOR \
DECLARE_FUNC_ENCODING \
name: SEPARATOR BTI_C
#define DEFINE_COMPILERRT_FUNCTION_ALIAS(name, target) \
.globl SYMBOL_NAME(name) SEPARATOR \
SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \
@ -193,8 +265,12 @@
#ifdef __ELF__
#define END_COMPILERRT_FUNCTION(name) \
.size SYMBOL_NAME(name), . - SYMBOL_NAME(name)
#define END_COMPILERRT_OUTLINE_FUNCTION(name) \
CFI_END SEPARATOR \
.size SYMBOL_NAME(name), . - SYMBOL_NAME(name)
#else
#define END_COMPILERRT_FUNCTION(name)
#define END_COMPILERRT_OUTLINE_FUNCTION(name)
#endif
#endif // COMPILERRT_ASSEMBLY_H

View File

@ -8,10 +8,21 @@
//
// This file is based on LLVM's lib/Support/Host.cpp.
// It implements the operating system Host concept and builtin
// __cpu_model for the compiler_rt library, for x86 only.
// __cpu_model for the compiler_rt library for x86 and
// __aarch64_have_lse_atomics for AArch64.
//
//===----------------------------------------------------------------------===//
#if defined(HAVE_INIT_PRIORITY)
#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
#elif __has_attribute(__constructor__)
#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
#else
// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
// this runs during initialization.
#define CONSTRUCTOR_ATTRIBUTE
#endif
#if (defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \
defined(_M_X64)) && \
(defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER))
@ -665,16 +676,6 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
#undef setFeature
}
#if defined(HAVE_INIT_PRIORITY)
#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__ 101))
#elif __has_attribute(__constructor__)
#define CONSTRUCTOR_ATTRIBUTE __attribute__((__constructor__))
#else
// FIXME: For MSVC, we should make a function pointer global in .CRT$X?? so that
// this runs during initialization.
#define CONSTRUCTOR_ATTRIBUTE
#endif
#ifndef _WIN32
__attribute__((visibility("hidden")))
#endif
@ -749,5 +750,24 @@ int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
return 0;
}
#elif defined(__aarch64__)
// LSE support detection for out-of-line atomics
// using HWCAP and Auxiliary vector
_Bool __aarch64_have_lse_atomics
__attribute__((visibility("hidden"), nocommon));
#if defined(__has_include)
#if __has_include(<sys/auxv.h>)
#include <sys/auxv.h>
#ifndef AT_HWCAP
#define AT_HWCAP 16
#endif
#ifndef HWCAP_ATOMICS
#define HWCAP_ATOMICS (1 << 8)
#endif
static void CONSTRUCTOR_ATTRIBUTE init_have_lse_atomics(void) {
unsigned long hwcap = getauxval(AT_HWCAP);
__aarch64_have_lse_atomics = (hwcap & HWCAP_ATOMICS) != 0;
}
#endif // defined(__has_include)
#endif // __has_include(<sys/auxv.h>)
#endif // defined(__aarch64__)