From 1e0c540360e826d85e7f048d31895c4028e6499a Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 11 Mar 2020 11:49:03 -0400 Subject: [PATCH] AMDGPU: Don't hard error on LDS globals in functions Instead, emit a trap and a warning. We force inlining of this situation, so any function where this happens should be dead as indirect or external calls are not yet supported. This should avoid erroring on dead code. --- .../AMDGPU/AMDGPUAnnotateKernelFeatures.cpp | 27 +++++++--- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 15 +++++- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 13 ++++- .../GlobalISel/lds-global-non-entry-func.ll | 54 +++++++++++++++++-- .../AMDGPU/lds-global-non-entry-func.ll | 41 +++++++++++++- 5 files changed, 135 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp index e72b3f4fde63..66801b7af542 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -71,7 +71,8 @@ public: static bool visitConstantExpr(const ConstantExpr *CE); static bool visitConstantExprsRecursively( const Constant *EntryC, - SmallPtrSet &ConstantExprVisited); + SmallPtrSet &ConstantExprVisited, bool IsFunc, + bool HasApertureRegs); }; } // end anonymous namespace @@ -93,6 +94,14 @@ static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) { return castRequiresQueuePtr(ASC->getSrcAddressSpace()); } +static bool isDSAddress(const Constant *C) { + const GlobalValue *GV = dyn_cast(C); + if (!GV) + return false; + unsigned AS = GV->getAddressSpace(); + return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS; +} + bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) { if (CE->getOpcode() == Instruction::AddrSpaceCast) { unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); @@ -104,7 +113,8 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) { bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( const Constant *EntryC, - SmallPtrSet &ConstantExprVisited) { + SmallPtrSet &ConstantExprVisited, + bool IsFunc, bool HasApertureRegs) { if (!ConstantExprVisited.insert(EntryC).second) return false; @@ -115,9 +125,13 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( while (!Stack.empty()) { const Constant *C = Stack.pop_back_val(); + // We need to trap on DS globals in non-entry functions. + if (IsFunc && isDSAddress(C)) + return true; + // Check this constant expression. if (const auto *CE = dyn_cast(C)) { - if (visitConstantExpr(CE)) + if (!HasApertureRegs && visitConstantExpr(CE)) return true; } @@ -301,11 +315,11 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { } } - if (NeedQueuePtr || HasApertureRegs) + if (NeedQueuePtr || (!IsFunc && HasApertureRegs)) continue; if (const AddrSpaceCastInst *ASC = dyn_cast(&I)) { - if (castRequiresQueuePtr(ASC)) { + if (!HasApertureRegs && castRequiresQueuePtr(ASC)) { NeedQueuePtr = true; continue; } @@ -316,7 +330,8 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) { if (!OpC) continue; - if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) { + if (visitConstantExprsRecursively(OpC, ConstantExprVisited, IsFunc, + HasApertureRegs)) { NeedQueuePtr = true; break; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 625db96467ad..7b659d3c61a3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1238,10 +1238,23 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) { if (!MFI->isEntryFunction()) { + SDLoc DL(Op); const Function &Fn = DAG.getMachineFunction().getFunction(); DiagnosticInfoUnsupported BadLDSDecl( - Fn, "local memory global used by non-kernel function", SDLoc(Op).getDebugLoc()); + Fn, "local memory global used by non-kernel function", + DL.getDebugLoc(), DS_Warning); DAG.getContext()->diagnose(BadLDSDecl); + + // We currently don't have a way to correctly allocate LDS objects that + // aren't directly associated with a kernel. We do force inlining of + // functions that use local objects. However, if these dead functions are + // not eliminated, we don't want a compile time error. Just emit a warning + // and a trap, since there should be no callable path here. + SDValue Trap = DAG.getNode(ISD::TRAP, DL, MVT::Other, DAG.getEntryNode()); + SDValue OutputChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + Trap, DAG.getRoot()); + DAG.setRoot(OutputChain); + return DAG.getUNDEF(Op.getValueType()); } // XXX: What does the value of G->getOffset() mean? diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 63cc5246168c..99dd11c11b66 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1967,8 +1967,19 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue( if (!MFI->isEntryFunction()) { const Function &Fn = MF.getFunction(); DiagnosticInfoUnsupported BadLDSDecl( - Fn, "local memory global used by non-kernel function", MI.getDebugLoc()); + Fn, "local memory global used by non-kernel function", MI.getDebugLoc(), + DS_Warning); Fn.getContext().diagnose(BadLDSDecl); + + // We currently don't have a way to correctly allocate LDS objects that + // aren't directly associated with a kernel. We do force inlining of + // functions that use local objects. However, if these dead functions are + // not eliminated, we don't want a compile time error. Just emit a warning + // and a trap, since there should be no callable path here. + B.buildIntrinsic(Intrinsic::trap, ArrayRef(), true); + B.buildUndef(DstReg); + MI.eraseFromParent(); + return true; } // TODO: We could emit code to handle the initialization somewhere. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll index 56571fd077ec..66122aa801a8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll @@ -1,13 +1,57 @@ -; Runs original SDAG test with -global-isel +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -o - %s 2> %t | FileCheck -check-prefixes=GCN,GFX8 %s +; RUN: FileCheck -check-prefix=ERR %s < %t -; RUN: not llc -global-isel -mtriple=amdgcn-amd-amdhsa -o /dev/null < %S/../lds-global-non-entry-func.ll 2>&1 | FileCheck %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s 2> %t | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: FileCheck -check-prefix=ERR %s < %t @lds = internal addrspace(3) global float undef, align 4 -; CHECK: error: :0:0: in function func_use_lds_global void (): local memory global used by non-kernel function -; CHECK-NOT: error -; CHECK-NOT: ERROR +; ERR: warning: :0:0: in function func_use_lds_global void (): local memory global used by non-kernel function define void @func_use_lds_global() { +; GFX8-LABEL: func_use_lds_global: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, 0 +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: s_mov_b64 s[0:1], s[4:5] +; GFX8-NEXT: s_trap 2 +; GFX8-NEXT: ds_write_b32 v0, v0 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_use_lds_global: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_mov_b64 s[0:1], s[4:5] +; GFX9-NEXT: s_trap 2 +; GFX9-NEXT: ds_write_b32 v0, v0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] store float 0.0, float addrspace(3)* @lds, align 4 ret void } + +; ERR: warning: :0:0: in function func_use_lds_global_constexpr_cast void (): local memory global used by non-kernel function +define void @func_use_lds_global_constexpr_cast() { +; GFX8-LABEL: func_use_lds_global_constexpr_cast: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_mov_b64 s[0:1], s[4:5] +; GFX8-NEXT: s_trap 2 +; GFX8-NEXT: flat_store_dword v[0:1], v0 +; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_use_lds_global_constexpr_cast: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_mov_b64 s[0:1], s[4:5] +; GFX9-NEXT: s_trap 2 +; GFX9-NEXT: global_store_dword v[0:1], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] + store i32 ptrtoint (float addrspace(3)* @lds to i32), i32 addrspace(1)* undef, align 4 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll index d797466f068f..d81bb8a65ce6 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll @@ -1,9 +1,46 @@ -; RUN: not llc -mtriple=amdgcn-amd-amdhsa -o /dev/null %s 2>&1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -o - %s 2> %t | FileCheck -check-prefixes=GCN,GFX8 %s +; RUN: FileCheck -check-prefix=ERR %s < %t + +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s 2> %t | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: FileCheck -check-prefix=ERR %s < %t @lds = internal addrspace(3) global float undef, align 4 -; CHECK: error: :0:0: in function func_use_lds_global void (): local memory global used by non-kernel function +; ERR: warning: :0:0: in function func_use_lds_global void (): local memory global used by non-kernel function define void @func_use_lds_global() { +; GFX8-LABEL: func_use_lds_global: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_mov_b32_e32 v0, 0 +; GFX8-NEXT: s_mov_b32 m0, -1 +; GFX8-NEXT: ds_write_b32 v0, v0 +; GFX8-NEXT: s_mov_b64 s[0:1], s[4:5] +; GFX8-NEXT: s_trap 2 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: func_use_lds_global: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: ds_write_b32 v0, v0 +; GFX9-NEXT: s_mov_b64 s[0:1], s[4:5] +; GFX9-NEXT: s_trap 2 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] store float 0.0, float addrspace(3)* @lds, align 4 ret void } + +; ERR: warning: :0:0: in function func_use_lds_global_constexpr_cast void (): local memory global used by non-kernel function +define void @func_use_lds_global_constexpr_cast() { +; GCN-LABEL: func_use_lds_global_constexpr_cast: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: s_mov_b64 s[0:1], s[4:5] +; GCN-NEXT: s_trap 2 +; GCN-NEXT: s_setpc_b64 s[30:31] + store i32 ptrtoint (float addrspace(3)* @lds to i32), i32 addrspace(1)* undef, align 4 + ret void +}