From 9701053517100045ca9fb0fd81233314ab08f600 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Mon, 1 Aug 2022 10:51:30 +0800 Subject: [PATCH] Introduce @llvm.threadlocal.address intrinsic to access TLS variable This belongs to a series of patches which try to solve the thread identification problem in coroutines. See https://discourse.llvm.org/t/address-thread-identification-problems-with-coroutine/62015 for a full background. The problem consists of two concrete problems: TLS variable and readnone functions. This patch tries to convert the TLS problem to readnone problem by converting the access of TLS variable to an intrinsic which is marked as readnone. The readnone problem would be addressed in following patches. Reviewed By: nikic, jyknight, nhaehnle, ychen Differential Revision: https://reviews.llvm.org/D125291 --- llvm/docs/LangRef.rst | 24 +++++++++++ .../llvm/Analysis/TargetTransformInfoImpl.h | 1 + llvm/include/llvm/IR/IRBuilder.h | 3 ++ llvm/include/llvm/IR/Intrinsics.td | 4 ++ .../SelectionDAG/SelectionDAGBuilder.cpp | 4 ++ llvm/lib/IR/IRBuilder.cpp | 7 ++++ llvm/test/CodeGen/X86/threadlocal_address.ll | 41 +++++++++++++++++++ 7 files changed, 84 insertions(+) create mode 100644 llvm/test/CodeGen/X86/threadlocal_address.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 28d4121dd054..117380ace0ed 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -24545,6 +24545,30 @@ information on the *based on* terminology see mask argument does not match the pointer size of the target, the mask is zero-extended or truncated accordingly. +.. _int_threadlocal_address: + +'``llvm.threadlocal.address``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare ptr @llvm.threadlocal.address(ptr) nounwind readnone willreturn + +Arguments: +"""""""""" + +The first argument is a pointer, which refers to a thread local global. + +Semantics: +"""""""""" + +The address of a thread local global is not a constant, since it depends on +the calling thread. The `llvm.threadlocal.address` intrinsic returns the +address of the given thread local global in the calling thread. + .. _int_vscale: '``llvm.vscale``' Intrinsic diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index eb1e688735d6..289721dc5bc5 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -650,6 +650,7 @@ public: case Intrinsic::coro_align: case Intrinsic::coro_suspend: case Intrinsic::coro_subfn_addr: + case Intrinsic::threadlocal_address: // These intrinsics don't actually represent code after lowering. return 0; } diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index 6e559bb8f7f3..879084e726cd 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -753,6 +753,9 @@ public: /// If the pointer isn't i8* it will be converted. CallInst *CreateInvariantStart(Value *Ptr, ConstantInt *Size = nullptr); + /// Create a call to llvm.threadlocal.address intrinsic. + CallInst *CreateThreadLocalAddress(Value *Ptr); + /// Create a call to Masked Load intrinsic CallInst *CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru = nullptr, const Twine &Name = ""); diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index d46fa4fbf5b5..1a43ac39d521 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1404,6 +1404,10 @@ def int_is_constant : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_any_ty], def int_ptrmask: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; +// Intrinsic to wrap a thread local variable. +def int_threadlocal_address : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + def int_experimental_stepvector : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [], [IntrNoMem]>; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 35650b9bd00e..2239148bf447 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7178,6 +7178,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DAG.getZExtOrTrunc(Const, sdl, PtrVT))); return; } + case Intrinsic::threadlocal_address: { + setValue(&I, getValue(I.getOperand(0))); + return; + } case Intrinsic::get_active_lane_mask: { EVT CCVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDValue Index = getValue(I.getOperand(0)); diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index 137d6eaa5742..0b11fb6c4f4c 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -526,6 +526,13 @@ CallInst *IRBuilderBase::CreateInvariantStart(Value *Ptr, ConstantInt *Size) { return CreateCall(TheFn, Ops); } +CallInst *IRBuilderBase::CreateThreadLocalAddress(Value *Ptr) { + assert(isa(Ptr) && cast(Ptr)->isThreadLocal() && + "threadlocal_address only applies to thread local variables."); + return CreateIntrinsic(llvm::Intrinsic::threadlocal_address, {Ptr->getType()}, + {Ptr}); +} + CallInst * IRBuilderBase::CreateAssumption(Value *Cond, ArrayRef OpBundles) { diff --git a/llvm/test/CodeGen/X86/threadlocal_address.ll b/llvm/test/CodeGen/X86/threadlocal_address.ll new file mode 100644 index 000000000000..7a641c7773b6 --- /dev/null +++ b/llvm/test/CodeGen/X86/threadlocal_address.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -stop-after=finalize-isel %s -o - | FileCheck %s + +@i = thread_local global i32 0, align 4 + +define noundef i32 @foo() { +; CHECK: %0:gr64 = MOV64rm $rip, 1, $noreg, target-flags(x86-gottpoff) @i, $noreg :: (load (s64) from got) +; CHECK: %1:gr32 = MOV32rm %0, 1, $noreg, 0, $fs :: (load (s32) from %ir.0) +; CHECK: %2:gr32 = nsw INC32r %1, implicit-def dead $eflags +; CHECK: MOV32mr %0, 1, $noreg, 0, $fs, %2 :: (store (s32) into %ir.0) +; CHECK: $eax = COPY %2 +; CHECK: RET 0, $eax +entry: + %0 = call ptr @llvm.threadlocal.address(ptr @i) + %1 = load i32, ptr %0, align 4 + %inc = add nsw i32 %1, 1 + store i32 %inc, ptr %0, align 4 + %2 = call ptr @llvm.threadlocal.address(ptr @i) + %3 = load i32, ptr %2, align 4 + ret i32 %3 +} + +@j = thread_local addrspace(1) global i32 addrspace(0)* @i, align 4 +define noundef i32 @bar() { +; CHECK: %0:gr64 = MOV64rm $rip, 1, $noreg, target-flags(x86-gottpoff) @j, $noreg :: (load (s64) from got) +; CHECK: %1:gr32 = MOV32rm %0, 1, $noreg, 0, $fs :: (load (s32) from %ir.0, addrspace 1) +; CHECK: %2:gr32 = nsw INC32r %1, implicit-def dead $eflags +; CHECK: MOV32mr %0, 1, $noreg, 0, $fs, %2 :: (store (s32) into %ir.0, addrspace 1) +; CHECK: $eax = COPY %2 +; CHECK: RET 0, $eax +entry: + %0 = call ptr addrspace(1) @llvm.threadlocal.address.p1(ptr addrspace(1) @j) + %1 = load i32, ptr addrspace(1) %0, align 4 + %inc = add nsw i32 %1, 1 + store i32 %inc, ptr addrspace(1) %0, align 4 + %2 = call ptr addrspace(1) @llvm.threadlocal.address.p1(ptr addrspace(1) @j) + %3 = load i32, ptr addrspace(1) %2, align 4 + ret i32 %3 +} + +declare ptr @llvm.threadlocal.address(ptr) nounwind readnone willreturn +declare ptr addrspace(1) @llvm.threadlocal.address.p1(ptr addrspace(1)) nounwind readnone willreturn