AMDGPU/SI: Make sure not to fold offsets into local address space globals

Summary:
Offset folding only works if you are emitting relocations, and we don't
emit relocations for local address space globals.

Reviewers: arsenm, nhaustov

Subscribers: arsenm, llvm-commits, kzhuravl

Differential Revision: http://reviews.llvm.org/D21647

llvm-svn: 273765
This commit is contained in:
Tom Stellard 2016-06-25 01:59:16 +00:00
parent f63768cbfc
commit b164a9843b
3 changed files with 31 additions and 0 deletions

View File

@ -1422,6 +1422,14 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
return DAG.getUNDEF(ASC->getValueType(0));
}
bool
SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
if (GA->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS)
return false;
return TargetLowering::isOffsetFoldingLegal(GA);
}
SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
SDValue Op,
SelectionDAG &DAG) const {

View File

@ -105,6 +105,8 @@ public:
bool isTypeDesirableForOp(unsigned Op, EVT VT) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,

View File

@ -0,0 +1,21 @@
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -relocation-model=static < %s | FileCheck %s
@lds = external addrspace(3) global [4 x i32]
; Function Attrs: nounwind
; Offset folding is an optimization done for global variables with relocations,
; which allows you to store the offset in the r_addend of the relocation entry.
; The offset is apllied to the variables address at link time, which eliminates
; the need to emit shader instructions to do this calculation.
; We don't use relocations for local memory, so we should never fold offsets
; for local memory globals.
; CHECK-LABEL: lds_no_offset:
; CHECK ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:4
define void @lds_no_offset() {
entry:
%ptr = getelementptr [4 x i32], [4 x i32] addrspace(3)* @lds, i32 0, i32 1
store i32 0, i32 addrspace(3)* %ptr
ret void
}