AMDGPU/SI: Make sure llvm.amdgcn.implicitarg.ptr() is 8-byte aligned for HSA

Reviewers: arsenm

Subscribers: arsenm, wdng, nhaehnle, llvm-commits

Differential Revision: https://reviews.llvm.org/D24405

llvm-svn: 281080
This commit is contained in:
Tom Stellard 2016-09-09 19:28:00 +00:00
parent 88f6407542
commit b2869eb6e9
3 changed files with 7 additions and 2 deletions

View File

@ -2690,7 +2690,8 @@ SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const {
uint64_t ArgOffset = alignTo(MFI->getABIArgOffset(), 4);
unsigned Alignment = Subtarget->getAlignmentForImplicitArgPtr();
uint64_t ArgOffset = alignTo(MFI->getABIArgOffset(), Alignment);
switch (Param) {
case GRID_DIM:
return ArgOffset;

View File

@ -276,6 +276,10 @@ public:
return isAmdHsaOS() ? 0 : 36;
}
unsigned getAlignmentForImplicitArgPtr() const {
return isAmdHsaOS() ? 8 : 4;
}
unsigned getStackAlignment() const {
// Scratch is allocated in 256 dword per wave blocks.
return 4 * 256 / getWavefrontSize();

View File

@ -29,7 +29,7 @@ define void @test_implicit(i32 addrspace(1)* %out) #1 {
; ALL-LABEL: {{^}}test_implicit_alignment
; MESA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3
; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]]
; MESA: buffer_store_dword [[V_VAL]]
; HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]]