From a8a49923ddf75be9b6166be2f6a1f50a99817841 Mon Sep 17 00:00:00 2001 From: Chris Bieneman Date: Tue, 13 Sep 2022 13:07:46 -0500 Subject: [PATCH] [HLSL] Call global destructors from entries HLSL doesn't have a C++ runtime that supports `atexit` registration. To enable global destructors we instead rely on the `llvm.global_dtor` mechanism. This change disables `atexit` generation for HLSL and updates the HLSL code generation to call global destructors on the exit from entry functions. Depends on D132977. Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D133518 --- clang/docs/HLSL/EntryFunctions.rst | 4 +- clang/lib/CodeGen/CGHLSLRuntime.cpp | 28 ++++++--- clang/lib/CodeGen/CGHLSLRuntime.h | 2 +- clang/lib/CodeGen/MicrosoftCXXABI.cpp | 4 ++ .../GlobalConstructorFunction.hlsl | 5 ++ clang/test/CodeGenHLSL/GlobalDestructors.hlsl | 57 +++++++++++++++++++ 6 files changed, 91 insertions(+), 9 deletions(-) create mode 100644 clang/test/CodeGenHLSL/GlobalDestructors.hlsl diff --git a/clang/docs/HLSL/EntryFunctions.rst b/clang/docs/HLSL/EntryFunctions.rst index 859181477738..518698b9b1f7 100644 --- a/clang/docs/HLSL/EntryFunctions.rst +++ b/clang/docs/HLSL/EntryFunctions.rst @@ -46,7 +46,9 @@ constructors, then instantiations of the user-defined entry parameters with their semantic values populated, and a call to the user-defined function. After the call instruction the return value (if any) is saved using a target-appropriate intrinsic for storing outputs (for DirectX, the -``llvm.dx.store.output``). Global destructors are not supported in HLSL. +``llvm.dx.store.output``). Lastly, any present global destructors will be called +immediately before the return. HLSL does not support C++ ``atexit`` +registrations, instead calls to global destructors are compile-time generated. .. note:: diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index 2f0c00169af6..591d0c43f182 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -55,7 +55,7 @@ void CGHLSLRuntime::finishCodeGen() { if (T.getArch() == Triple::ArchType::dxil) addDxilValVersion(TargetOpts.DxilValidatorVersion, M); - generateGlobalCtorCalls(); + generateGlobalCtorDtorCalls(); } void CGHLSLRuntime::annotateHLSLResource(const VarDecl *D, GlobalVariable *GV) { @@ -146,12 +146,13 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD, B.CreateRetVoid(); } -void CGHLSLRuntime::generateGlobalCtorCalls() { - llvm::Module &M = CGM.getModule(); - const auto *GlobalCtors = M.getNamedGlobal("llvm.global_ctors"); - if (!GlobalCtors) +static void gatherFunctions(SmallVectorImpl &Fns, llvm::Module &M, + bool CtorOrDtor) { + const auto *GV = + M.getNamedGlobal(CtorOrDtor ? "llvm.global_ctors" : "llvm.global_dtors"); + if (!GV) return; - const auto *CA = dyn_cast(GlobalCtors->getInitializer()); + const auto *CA = dyn_cast(GV->getInitializer()); if (!CA) return; // The global_ctor array elements are a struct [Priority, Fn *, COMDat]. @@ -168,8 +169,16 @@ void CGHLSLRuntime::generateGlobalCtorCalls() { "HLSL doesn't support setting priority for global ctors."); assert(isa(CS->getOperand(2)) && "HLSL doesn't support COMDat for global ctors."); - CtorFns.push_back(cast(CS->getOperand(1))); + Fns.push_back(cast(CS->getOperand(1))); } +} + +void CGHLSLRuntime::generateGlobalCtorDtorCalls() { + llvm::Module &M = CGM.getModule(); + SmallVector CtorFns; + SmallVector DtorFns; + gatherFunctions(CtorFns, M, true); + gatherFunctions(DtorFns, M, false); // Insert a call to the global constructor at the beginning of the entry block // to externally exported functions. This is a bit of a hack, but HLSL allows @@ -180,5 +189,10 @@ void CGHLSLRuntime::generateGlobalCtorCalls() { IRBuilder<> B(&F.getEntryBlock(), F.getEntryBlock().begin()); for (auto *Fn : CtorFns) B.CreateCall(FunctionCallee(Fn)); + + // Insert global dtors before the terminator of the last instruction + B.SetInsertPoint(F.back().getTerminator()); + for (auto *Fn : DtorFns) + B.CreateCall(FunctionCallee(Fn)); } } diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index ee265922c0f5..120f53ffec14 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -46,7 +46,7 @@ public: virtual ~CGHLSLRuntime() {} void annotateHLSLResource(const VarDecl *D, llvm::GlobalVariable *GV); - void generateGlobalCtorCalls(); + void generateGlobalCtorDtorCalls(); void finishCodeGen(); diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index f0c45654f8d9..cc6ba4eba6d7 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -2348,6 +2348,10 @@ void MicrosoftCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, if (D.getTLSKind()) return emitGlobalDtorWithTLRegDtor(CGF, D, Dtor, Addr); + // HLSL doesn't support atexit. + if (CGM.getLangOpts().HLSL) + return CGM.AddCXXDtorEntry(Dtor, Addr); + // The default behavior is to use atexit. CGF.registerGlobalDtorWithAtExit(D, Dtor, Addr); } diff --git a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl index d25e4551c728..db47c5c56580 100644 --- a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl +++ b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl @@ -10,6 +10,10 @@ __attribute__((constructor)) void then_call_me(void) { i = 12; } +__attribute__((destructor)) void call_me_last(void) { + i = 0; +} + [numthreads(1,1,1)] void main(unsigned GI : SV_GroupIndex) {} @@ -19,4 +23,5 @@ void main(unsigned GI : SV_GroupIndex) {} //CHECK-NEXT: call void @"?then_call_me@@YAXXZ"() //CHECK-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group() //CHECK-NEXT: call void @"?main@@YAXI@Z"(i32 %0) +//CHECK-NEXT: call void @"?call_me_last@@YAXXZ"( //CHECK-NEXT: ret void diff --git a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl new file mode 100644 index 000000000000..ece040fa633b --- /dev/null +++ b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl @@ -0,0 +1,57 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -S -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s + +struct Tail { + Tail() { + add(1); + } + + ~Tail() { + add(-1); + } + + void add(int V) { + static int Count = 0; + Count += V; + } +}; + +struct Pupper { + static int Count; + + Pupper() { + Count += 1; // :) + } + + ~Pupper() { + Count -= 1; // :( + } +} GlobalPup; + +void Wag() { + static Tail T; + T.add(0); +} + +int Pupper::Count = 0; + +[numthreads(1,1,1)] +void main(unsigned GI : SV_GroupIndex) { + Wag(); +} + +//CHECK: define void @main() +//CHECK-NEXT: entry: +//CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalDestructors.hlsl() +//CHECK-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group() +//CHECK-NEXT: call void @"?main@@YAXI@Z"(i32 %0) +//CHECK-NEXT: call void @_GLOBAL__D_a() +//CHECK-NEXT: ret void + +// This is really just a sanity check I needed for myself to verify that +// function scope static variables also get destroyed properly. + +//CHECK: define internal void @_GLOBAL__D_a() +//CHECK-NEXT: entry: +//CHECK-NEXT: call void @"??1Tail@@QAA@XZ"(ptr @"?T@?1??Wag@@YAXXZ@4UTail@@A") +//CHECK-NEXT: call void @"??1Pupper@@QAA@XZ"(ptr @"?GlobalPup@@3UPupper@@A") +//CHECK-NEXT: ret void