[CXX TLS calling convention] Add CXX TLS calling convention.

This commit adds a new target-independent calling convention for C++ TLS
access functions. It aims to minimize overhead in the caller by perserving as
many registers as possible.

The target-specific implementation for X86-64 is defined as following:
  Arguments are passed as for the default C calling convention
  The same applies for the return value(s)
  The callee preserves all GPRs - except RAX and RDI

The access function makes C-style TLS function calls in the entry and exit
block, C-style TLS functions save a lot more registers than normal calls.
The added calling convention ties into the existing implementation of the
C-style TLS functions, so we can't simply use existing calling conventions
such as preserve_mostcc.

rdar://9001553

llvm-svn: 254737
This commit is contained in:
Manman Ren 2015-12-04 17:40:13 +00:00
parent a75f826117
commit 19c7bbe3b7
9 changed files with 98 additions and 0 deletions

View File

@ -756,6 +756,7 @@ function. The operand fields are:
* ``anyregcc``: code 13
* ``preserve_mostcc``: code 14
* ``preserve_allcc``: code 15
* ``cxx_fast_tlscc``: code 17
* ``x86_stdcallcc``: code 64
* ``x86_fastcallcc``: code 65
* ``arm_apcscc``: code 66

View File

@ -406,6 +406,16 @@ added in the future:
This calling convention, like the `PreserveMost` calling convention, will be
used by a future version of the ObjectiveC runtime and should be considered
experimental at this time.
"``cxx_fast_tlscc``" - The `CXX_FAST_TLS` calling convention for access functions
This calling convention aims to minimize overhead in the caller by
preserving as many registers as possible. This calling convention behaves
identical to the `C` calling convention on how arguments and return values
are passed, but it uses a different set of caller/callee-saved registers.
Given that C-style TLS on Darwin has its own special CSRs, we can't use the
existing `PreserveMost`.
- On X86-64 the callee preserves all general purpose registers, except for
RDI and RAX.
"``cc <n>``" - Numbered convention
Any calling convention may be specified by number, allowing
target-specific calling conventions to be used. Target specific

View File

@ -72,6 +72,9 @@ namespace CallingConv {
// Swift - Calling convention for Swift.
Swift = 16,
// CXX_FAST_TLS - Calling convention for access functions.
CXX_FAST_TLS = 17,
// Target - This is the start of the target-specific calling conventions,
// e.g. fastcall and thiscall on X86.
FirstTargetCC = 64,

View File

@ -591,6 +591,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(ghccc);
KEYWORD(hhvmcc);
KEYWORD(hhvm_ccc);
KEYWORD(cxx_fast_tlscc);
KEYWORD(cc);
KEYWORD(c);

View File

@ -1544,6 +1544,7 @@ bool LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'ghccc'
/// ::= 'hhvmcc'
/// ::= 'hhvm_ccc'
/// ::= 'cxx_fast_tlscc'
/// ::= 'cc' UINT
///
bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
@ -1574,6 +1575,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
case lltok::kw_ghccc: CC = CallingConv::GHC; break;
case lltok::kw_hhvmcc: CC = CallingConv::HHVM; break;
case lltok::kw_hhvm_ccc: CC = CallingConv::HHVM_C; break;
case lltok::kw_cxx_fast_tlscc: CC = CallingConv::CXX_FAST_TLS; break;
case lltok::kw_cc: {
Lex.Lex();
return ParseUInt32(CC);

View File

@ -99,6 +99,7 @@ namespace lltok {
kw_preserve_mostcc, kw_preserve_allcc,
kw_ghccc,
kw_hhvmcc, kw_hhvm_ccc,
kw_cxx_fast_tlscc,
// Attributes:
kw_attributes,

View File

@ -304,6 +304,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
case CallingConv::AnyReg: Out << "anyregcc"; break;
case CallingConv::PreserveMost: Out << "preserve_mostcc"; break;
case CallingConv::PreserveAll: Out << "preserve_allcc"; break;
case CallingConv::CXX_FAST_TLS: Out << "cxx_fast_tlscc"; break;
case CallingConv::GHC: Out << "ghccc"; break;
case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break;
case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break;

View File

@ -248,6 +248,10 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (HasAVX)
return CSR_64_RT_AllRegs_AVX_SaveList;
return CSR_64_RT_AllRegs_SaveList;
case CallingConv::CXX_FAST_TLS:
if (Is64Bit)
return CSR_64_TLS_Darwin_SaveList;
break;
case CallingConv::Intel_OCL_BI: {
if (HasAVX512 && IsWin64)
return CSR_Win64_Intel_OCL_BI_AVX512_SaveList;
@ -310,6 +314,10 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
if (HasAVX)
return CSR_64_RT_AllRegs_AVX_RegMask;
return CSR_64_RT_AllRegs_RegMask;
case CallingConv::CXX_FAST_TLS:
if (Is64Bit)
return CSR_64_TLS_Darwin_RegMask;
break;
case CallingConv::Intel_OCL_BI: {
if (HasAVX512 && IsWin64)
return CSR_Win64_Intel_OCL_BI_AVX512_RegMask;

View File

@ -0,0 +1,71 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -enable-shrink-wrap=true | FileCheck --check-prefix=SHRINK %s
%struct.S = type { i8 }
@sg = internal thread_local global %struct.S zeroinitializer, align 1
@__dso_handle = external global i8
@__tls_guard = internal thread_local unnamed_addr global i1 false
declare void @_ZN1SC1Ev(%struct.S*)
declare void @_ZN1SD1Ev(%struct.S*)
declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*)
; Every GPR should be saved - except rdi, rax, and rsp
; CHECK-LABEL: _ZTW2sg
; CHECK: pushq %r11
; CHECK: pushq %r10
; CHECK: pushq %r9
; CHECK: pushq %r8
; CHECK: pushq %rsi
; CHECK: pushq %rdx
; CHECK: pushq %rcx
; CHECK: pushq %rbx
; CHECK: callq
; CHECK: jne
; CHECK: callq
; CHECK: tlv_atexit
; CHECK: callq
; CHECK: popq %rbx
; CHECK: popq %rcx
; CHECK: popq %rdx
; CHECK: popq %rsi
; CHECK: popq %r8
; CHECK: popq %r9
; CHECK: popq %r10
; CHECK: popq %r11
; SHRINK-LABEL: _ZTW2sg
; SHRINK: callq
; SHRINK: jne
; SHRINK: pushq %r11
; SHRINK: pushq %r10
; SHRINK: pushq %r9
; SHRINK: pushq %r8
; SHRINK: pushq %rsi
; SHRINK: pushq %rdx
; SHRINK: pushq %rcx
; SHRINK: pushq %rbx
; SHRINK: callq
; SHRINK: tlv_atexit
; SHRINK: popq %rbx
; SHRINK: popq %rcx
; SHRINK: popq %rdx
; SHRINK: popq %rsi
; SHRINK: popq %r8
; SHRINK: popq %r9
; SHRINK: popq %r10
; SHRINK: popq %r11
; SHRINK: LBB{{.*}}:
; SHRINK: callq
define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() {
%.b.i = load i1, i1* @__tls_guard, align 1
br i1 %.b.i, label %__tls_init.exit, label %init.i
init.i:
store i1 true, i1* @__tls_guard, align 1
tail call void @_ZN1SC1Ev(%struct.S* nonnull @sg) #2
%1 = tail call i32 @_tlv_atexit(void (i8*)* nonnull bitcast (void (%struct.S*)* @_ZN1SD1Ev to void (i8*)*), i8* nonnull getelementptr inbounds (%struct.S, %struct.S* @sg, i64 0, i32 0), i8* nonnull @__dso_handle) #2
br label %__tls_init.exit
__tls_init.exit:
ret %struct.S* @sg
}