diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td index 948b9ddb5df6..66d92100e637 100644 --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -279,6 +279,15 @@ def CSR_AArch64_TLS_Darwin FP, (sequence "Q%u", 0, 31))>; +// We can only handle a register pair with adjacent registers, the register pair +// should belong to the same class as well. Since the access function on the +// fast path calls a function that follows CSR_AArch64_TLS_Darwin, +// CSR_AArch64_CXX_TLS_Darwin should be a subset of CSR_AArch64_TLS_Darwin. +def CSR_AArch64_CXX_TLS_Darwin + : CalleeSavedRegs<(add CSR_AArch64_AAPCS, + (sub (sequence "X%u", 1, 28), X15, X16, X17, X18), + (sequence "D%u", 0, 31))>; + // The ELF stub used for TLS-descriptor access saves every feasible // register. Only X0 and LR are clobbered. def CSR_AArch64_TLS_ELF diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 1aef31baad20..763b2337de12 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -46,6 +46,8 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_AArch64_NoRegs_SaveList; if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) return CSR_AArch64_AllRegs_SaveList; + if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS) + return CSR_AArch64_CXX_TLS_Darwin_SaveList; else return CSR_AArch64_AAPCS_SaveList; } @@ -58,6 +60,8 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF, return CSR_AArch64_NoRegs_RegMask; if (CC == CallingConv::AnyReg) return CSR_AArch64_AllRegs_RegMask; + if (CC == CallingConv::CXX_FAST_TLS) + return CSR_AArch64_CXX_TLS_Darwin_RegMask; else return CSR_AArch64_AAPCS_RegMask; } diff --git a/llvm/test/CodeGen/AArch64/cxx-tlscc.ll b/llvm/test/CodeGen/AArch64/cxx-tlscc.ll new file mode 100644 index 000000000000..39f6c0fbec94 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cxx-tlscc.ll @@ -0,0 +1,77 @@ +; RUN: llc < %s -mtriple=aarch64-apple-ios | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-apple-ios -enable-shrink-wrap=true | FileCheck --check-prefix=CHECK %s +; Shrink wrapping currently does not kick in because we have a TLS CALL +; in the entry block and it will clobber the link register. + +%struct.S = type { i8 } + +@sg = internal thread_local global %struct.S zeroinitializer, align 1 +@__dso_handle = external global i8 +@__tls_guard = internal thread_local unnamed_addr global i1 false + +declare %struct.S* @_ZN1SC1Ev(%struct.S* returned) +declare %struct.S* @_ZN1SD1Ev(%struct.S* returned) +declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*) + +define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() { + %.b.i = load i1, i1* @__tls_guard, align 1 + br i1 %.b.i, label %__tls_init.exit, label %init.i + +init.i: + store i1 true, i1* @__tls_guard, align 1 + %call.i.i = tail call %struct.S* @_ZN1SC1Ev(%struct.S* nonnull @sg) + %1 = tail call i32 @_tlv_atexit(void (i8*)* nonnull bitcast (%struct.S* (%struct.S*)* @_ZN1SD1Ev to void (i8*)*), i8* nonnull getelementptr inbounds (%struct.S, %struct.S* @sg, i64 0, i32 0), i8* nonnull @__dso_handle) + br label %__tls_init.exit + +__tls_init.exit: + ret %struct.S* @sg +} + +; CHECK-LABEL: _ZTW2sg +; CHECK-DAG: stp d31, d30 +; CHECK-DAG: stp d29, d28 +; CHECK-DAG: stp d27, d26 +; CHECK-DAG: stp d25, d24 +; CHECK-DAG: stp d23, d22 +; CHECK-DAG: stp d21, d20 +; CHECK-DAG: stp d19, d18 +; CHECK-DAG: stp d17, d16 +; CHECK-DAG: stp d7, d6 +; CHECK-DAG: stp d5, d4 +; CHECK-DAG: stp d3, d2 +; CHECK-DAG: stp d1, d0 +; CHECK-DAG: stp x20, x19 +; CHECK-DAG: stp x14, x13 +; CHECK-DAG: stp x12, x11 +; CHECK-DAG: stp x10, x9 +; CHECK-DAG: stp x8, x7 +; CHECK-DAG: stp x6, x5 +; CHECK-DAG: stp x4, x3 +; CHECK-DAG: stp x2, x1 +; CHECK-DAG: stp x29, x30 +; CHECK: blr +; CHECK: tbnz w{{.*}}, #0, [[BB_end:.?LBB0_[0-9]+]] +; CHECK: blr +; CHECK: tlv_atexit +; CHECK: [[BB_end]]: +; CHECK: blr +; CHECK-DAG: ldp x2, x1 +; CHECK-DAG: ldp x4, x3 +; CHECK-DAG: ldp x6, x5 +; CHECK-DAG: ldp x8, x7 +; CHECK-DAG: ldp x10, x9 +; CHECK-DAG: ldp x12, x11 +; CHECK-DAG: ldp x14, x13 +; CHECK-DAG: ldp x20, x19 +; CHECK-DAG: ldp d1, d0 +; CHECK-DAG: ldp d3, d2 +; CHECK-DAG: ldp d5, d4 +; CHECK-DAG: ldp d7, d6 +; CHECK-DAG: ldp d17, d16 +; CHECK-DAG: ldp d19, d18 +; CHECK-DAG: ldp d21, d20 +; CHECK-DAG: ldp d23, d22 +; CHECK-DAG: ldp d25, d24 +; CHECK-DAG: ldp d27, d26 +; CHECK-DAG: ldp d29, d28 +; CHECK-DAG: ldp d31, d30