forked from OSchip/llvm-project
AMDGPU/SI: Make sure llvm.amdgcn.implicitarg.ptr() is 8-byte aligned for HSA
Reviewers: arsenm Subscribers: arsenm, wdng, nhaehnle, llvm-commits Differential Revision: https://reviews.llvm.org/D24405 llvm-svn: 281080
This commit is contained in:
parent
88f6407542
commit
b2869eb6e9
|
@ -2690,7 +2690,8 @@ SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
|
|||
|
||||
uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
|
||||
const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const {
|
||||
uint64_t ArgOffset = alignTo(MFI->getABIArgOffset(), 4);
|
||||
unsigned Alignment = Subtarget->getAlignmentForImplicitArgPtr();
|
||||
uint64_t ArgOffset = alignTo(MFI->getABIArgOffset(), Alignment);
|
||||
switch (Param) {
|
||||
case GRID_DIM:
|
||||
return ArgOffset;
|
||||
|
|
|
@ -276,6 +276,10 @@ public:
|
|||
return isAmdHsaOS() ? 0 : 36;
|
||||
}
|
||||
|
||||
unsigned getAlignmentForImplicitArgPtr() const {
|
||||
return isAmdHsaOS() ? 8 : 4;
|
||||
}
|
||||
|
||||
unsigned getStackAlignment() const {
|
||||
// Scratch is allocated in 256 dword per wave blocks.
|
||||
return 4 * 256 / getWavefrontSize();
|
||||
|
|
|
@ -29,7 +29,7 @@ define void @test_implicit(i32 addrspace(1)* %out) #1 {
|
|||
|
||||
; ALL-LABEL: {{^}}test_implicit_alignment
|
||||
; MESA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
|
||||
; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3
|
||||
; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
|
||||
; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]]
|
||||
; MESA: buffer_store_dword [[V_VAL]]
|
||||
; HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]]
|
||||
|
|
Loading…
Reference in New Issue