forked from OSchip/llvm-project
AMDGPU/SI: Make sure not to fold offsets into local address space globals
Summary: Offset folding only works if you are emitting relocations, and we don't emit relocations for local address space globals. Reviewers: arsenm, nhaustov Subscribers: arsenm, llvm-commits, kzhuravl Differential Revision: http://reviews.llvm.org/D21647 llvm-svn: 273765
This commit is contained in:
parent
f63768cbfc
commit
b164a9843b
|
@ -1422,6 +1422,14 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
|
|||
return DAG.getUNDEF(ASC->getValueType(0));
|
||||
}
|
||||
|
||||
bool
|
||||
SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
|
||||
if (GA->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS)
|
||||
return false;
|
||||
|
||||
return TargetLowering::isOffsetFoldingLegal(GA);
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
|
||||
SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
|
|
|
@ -105,6 +105,8 @@ public:
|
|||
|
||||
bool isTypeDesirableForOp(unsigned Op, EVT VT) const override;
|
||||
|
||||
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
|
||||
|
||||
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
|
||||
bool isVarArg,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -relocation-model=static < %s | FileCheck %s
|
||||
|
||||
@lds = external addrspace(3) global [4 x i32]
|
||||
|
||||
; Function Attrs: nounwind
|
||||
|
||||
; Offset folding is an optimization done for global variables with relocations,
|
||||
; which allows you to store the offset in the r_addend of the relocation entry.
|
||||
; The offset is apllied to the variables address at link time, which eliminates
|
||||
; the need to emit shader instructions to do this calculation.
|
||||
; We don't use relocations for local memory, so we should never fold offsets
|
||||
; for local memory globals.
|
||||
|
||||
; CHECK-LABEL: lds_no_offset:
|
||||
; CHECK ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:4
|
||||
define void @lds_no_offset() {
|
||||
entry:
|
||||
%ptr = getelementptr [4 x i32], [4 x i32] addrspace(3)* @lds, i32 0, i32 1
|
||||
store i32 0, i32 addrspace(3)* %ptr
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue