forked from OSchip/llvm-project
AMDGPU: Don't hard error on LDS globals in functions
Instead, emit a trap and a warning. We force inlining of this situation, so any function where this happens should be dead as indirect or external calls are not yet supported. This should avoid erroring on dead code.
This commit is contained in:
parent
8f997b4f01
commit
1e0c540360
|
@ -71,7 +71,8 @@ public:
|
|||
static bool visitConstantExpr(const ConstantExpr *CE);
|
||||
static bool visitConstantExprsRecursively(
|
||||
const Constant *EntryC,
|
||||
SmallPtrSet<const Constant *, 8> &ConstantExprVisited);
|
||||
SmallPtrSet<const Constant *, 8> &ConstantExprVisited, bool IsFunc,
|
||||
bool HasApertureRegs);
|
||||
};
|
||||
|
||||
} // end anonymous namespace
|
||||
|
@ -93,6 +94,14 @@ static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
|
|||
return castRequiresQueuePtr(ASC->getSrcAddressSpace());
|
||||
}
|
||||
|
||||
static bool isDSAddress(const Constant *C) {
|
||||
const GlobalValue *GV = dyn_cast<GlobalValue>(C);
|
||||
if (!GV)
|
||||
return false;
|
||||
unsigned AS = GV->getAddressSpace();
|
||||
return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
|
||||
}
|
||||
|
||||
bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
|
||||
if (CE->getOpcode() == Instruction::AddrSpaceCast) {
|
||||
unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
|
||||
|
@ -104,7 +113,8 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
|
|||
|
||||
bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
|
||||
const Constant *EntryC,
|
||||
SmallPtrSet<const Constant *, 8> &ConstantExprVisited) {
|
||||
SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
|
||||
bool IsFunc, bool HasApertureRegs) {
|
||||
|
||||
if (!ConstantExprVisited.insert(EntryC).second)
|
||||
return false;
|
||||
|
@ -115,9 +125,13 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
|
|||
while (!Stack.empty()) {
|
||||
const Constant *C = Stack.pop_back_val();
|
||||
|
||||
// We need to trap on DS globals in non-entry functions.
|
||||
if (IsFunc && isDSAddress(C))
|
||||
return true;
|
||||
|
||||
// Check this constant expression.
|
||||
if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
|
||||
if (visitConstantExpr(CE))
|
||||
if (!HasApertureRegs && visitConstantExpr(CE))
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -301,11 +315,11 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
|
|||
}
|
||||
}
|
||||
|
||||
if (NeedQueuePtr || HasApertureRegs)
|
||||
if (NeedQueuePtr || (!IsFunc && HasApertureRegs))
|
||||
continue;
|
||||
|
||||
if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
|
||||
if (castRequiresQueuePtr(ASC)) {
|
||||
if (!HasApertureRegs && castRequiresQueuePtr(ASC)) {
|
||||
NeedQueuePtr = true;
|
||||
continue;
|
||||
}
|
||||
|
@ -316,7 +330,8 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
|
|||
if (!OpC)
|
||||
continue;
|
||||
|
||||
if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) {
|
||||
if (visitConstantExprsRecursively(OpC, ConstantExprVisited, IsFunc,
|
||||
HasApertureRegs)) {
|
||||
NeedQueuePtr = true;
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -1238,10 +1238,23 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
|
|||
if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
|
||||
G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) {
|
||||
if (!MFI->isEntryFunction()) {
|
||||
SDLoc DL(Op);
|
||||
const Function &Fn = DAG.getMachineFunction().getFunction();
|
||||
DiagnosticInfoUnsupported BadLDSDecl(
|
||||
Fn, "local memory global used by non-kernel function", SDLoc(Op).getDebugLoc());
|
||||
Fn, "local memory global used by non-kernel function",
|
||||
DL.getDebugLoc(), DS_Warning);
|
||||
DAG.getContext()->diagnose(BadLDSDecl);
|
||||
|
||||
// We currently don't have a way to correctly allocate LDS objects that
|
||||
// aren't directly associated with a kernel. We do force inlining of
|
||||
// functions that use local objects. However, if these dead functions are
|
||||
// not eliminated, we don't want a compile time error. Just emit a warning
|
||||
// and a trap, since there should be no callable path here.
|
||||
SDValue Trap = DAG.getNode(ISD::TRAP, DL, MVT::Other, DAG.getEntryNode());
|
||||
SDValue OutputChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
|
||||
Trap, DAG.getRoot());
|
||||
DAG.setRoot(OutputChain);
|
||||
return DAG.getUNDEF(Op.getValueType());
|
||||
}
|
||||
|
||||
// XXX: What does the value of G->getOffset() mean?
|
||||
|
|
|
@ -1967,8 +1967,19 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
|
|||
if (!MFI->isEntryFunction()) {
|
||||
const Function &Fn = MF.getFunction();
|
||||
DiagnosticInfoUnsupported BadLDSDecl(
|
||||
Fn, "local memory global used by non-kernel function", MI.getDebugLoc());
|
||||
Fn, "local memory global used by non-kernel function", MI.getDebugLoc(),
|
||||
DS_Warning);
|
||||
Fn.getContext().diagnose(BadLDSDecl);
|
||||
|
||||
// We currently don't have a way to correctly allocate LDS objects that
|
||||
// aren't directly associated with a kernel. We do force inlining of
|
||||
// functions that use local objects. However, if these dead functions are
|
||||
// not eliminated, we don't want a compile time error. Just emit a warning
|
||||
// and a trap, since there should be no callable path here.
|
||||
B.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>(), true);
|
||||
B.buildUndef(DstReg);
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
// TODO: We could emit code to handle the initialization somewhere.
|
||||
|
|
|
@ -1,13 +1,57 @@
|
|||
; Runs original SDAG test with -global-isel
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -o - %s 2> %t | FileCheck -check-prefixes=GCN,GFX8 %s
|
||||
; RUN: FileCheck -check-prefix=ERR %s < %t
|
||||
|
||||
; RUN: not llc -global-isel -mtriple=amdgcn-amd-amdhsa -o /dev/null < %S/../lds-global-non-entry-func.ll 2>&1 | FileCheck %s
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s 2> %t | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
; RUN: FileCheck -check-prefix=ERR %s < %t
|
||||
|
||||
@lds = internal addrspace(3) global float undef, align 4
|
||||
|
||||
; CHECK: error: <unknown>:0:0: in function func_use_lds_global void (): local memory global used by non-kernel function
|
||||
; CHECK-NOT: error
|
||||
; CHECK-NOT: ERROR
|
||||
; ERR: warning: <unknown>:0:0: in function func_use_lds_global void (): local memory global used by non-kernel function
|
||||
define void @func_use_lds_global() {
|
||||
; GFX8-LABEL: func_use_lds_global:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX8-NEXT: s_mov_b32 m0, -1
|
||||
; GFX8-NEXT: s_mov_b64 s[0:1], s[4:5]
|
||||
; GFX8-NEXT: s_trap 2
|
||||
; GFX8-NEXT: ds_write_b32 v0, v0
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9-LABEL: func_use_lds_global:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX9-NEXT: s_mov_b64 s[0:1], s[4:5]
|
||||
; GFX9-NEXT: s_trap 2
|
||||
; GFX9-NEXT: ds_write_b32 v0, v0
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
store float 0.0, float addrspace(3)* @lds, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; ERR: warning: <unknown>:0:0: in function func_use_lds_global_constexpr_cast void (): local memory global used by non-kernel function
|
||||
define void @func_use_lds_global_constexpr_cast() {
|
||||
; GFX8-LABEL: func_use_lds_global_constexpr_cast:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: s_mov_b64 s[0:1], s[4:5]
|
||||
; GFX8-NEXT: s_trap 2
|
||||
; GFX8-NEXT: flat_store_dword v[0:1], v0
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9-LABEL: func_use_lds_global_constexpr_cast:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: s_mov_b64 s[0:1], s[4:5]
|
||||
; GFX9-NEXT: s_trap 2
|
||||
; GFX9-NEXT: global_store_dword v[0:1], v0, off
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
store i32 ptrtoint (float addrspace(3)* @lds to i32), i32 addrspace(1)* undef, align 4
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -1,9 +1,46 @@
|
|||
; RUN: not llc -mtriple=amdgcn-amd-amdhsa -o /dev/null %s 2>&1 | FileCheck %s
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -o - %s 2> %t | FileCheck -check-prefixes=GCN,GFX8 %s
|
||||
; RUN: FileCheck -check-prefix=ERR %s < %t
|
||||
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s 2> %t | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
; RUN: FileCheck -check-prefix=ERR %s < %t
|
||||
|
||||
@lds = internal addrspace(3) global float undef, align 4
|
||||
|
||||
; CHECK: error: <unknown>:0:0: in function func_use_lds_global void (): local memory global used by non-kernel function
|
||||
; ERR: warning: <unknown>:0:0: in function func_use_lds_global void (): local memory global used by non-kernel function
|
||||
define void @func_use_lds_global() {
|
||||
; GFX8-LABEL: func_use_lds_global:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX8-NEXT: s_mov_b32 m0, -1
|
||||
; GFX8-NEXT: ds_write_b32 v0, v0
|
||||
; GFX8-NEXT: s_mov_b64 s[0:1], s[4:5]
|
||||
; GFX8-NEXT: s_trap 2
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX9-LABEL: func_use_lds_global:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX9-NEXT: ds_write_b32 v0, v0
|
||||
; GFX9-NEXT: s_mov_b64 s[0:1], s[4:5]
|
||||
; GFX9-NEXT: s_trap 2
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
store float 0.0, float addrspace(3)* @lds, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; ERR: warning: <unknown>:0:0: in function func_use_lds_global_constexpr_cast void (): local memory global used by non-kernel function
|
||||
define void @func_use_lds_global_constexpr_cast() {
|
||||
; GCN-LABEL: func_use_lds_global_constexpr_cast:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_mov_b64 s[0:1], s[4:5]
|
||||
; GCN-NEXT: s_trap 2
|
||||
; GCN-NEXT: s_setpc_b64 s[30:31]
|
||||
store i32 ptrtoint (float addrspace(3)* @lds to i32), i32 addrspace(1)* undef, align 4
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue