forked from OSchip/llvm-project
[AMDGPU] Avoid offset register in MUBUF for direct stack object accesses
We use an absolute address for stack objects and it would be necessary to have a constant 0 for soffset field. Fixes: SWDEV-228562 Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D89234
This commit is contained in:
parent
82974e0114
commit
5a061041ec
|
@ -1468,22 +1468,14 @@ static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
|
|||
|
||||
std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
|
||||
SDLoc DL(N);
|
||||
const MachineFunction &MF = CurDAG->getMachineFunction();
|
||||
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
|
||||
SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
|
||||
FI->getValueType(0));
|
||||
auto *FI = dyn_cast<FrameIndexSDNode>(N);
|
||||
SDValue TFI =
|
||||
FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;
|
||||
|
||||
// If we can resolve this to a frame index access, this will be relative to
|
||||
// either the stack or frame pointer SGPR.
|
||||
return std::make_pair(
|
||||
TFI, CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32));
|
||||
}
|
||||
|
||||
// If we don't know this private access is a local stack object, it needs to
|
||||
// be relative to the entry point's scratch wave offset.
|
||||
return std::make_pair(N, CurDAG->getTargetConstant(0, DL, MVT::i32));
|
||||
// We rebase the base address into an absolute stack address and hence
|
||||
// use constant 0 for soffset.
|
||||
return std::make_pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));
|
||||
}
|
||||
|
||||
bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
|
||||
|
|
|
@ -478,9 +478,10 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
|
|||
assert(TII->isMUBUF(MI));
|
||||
|
||||
MachineOperand *SOffset = TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
|
||||
assert(SOffset->getReg() ==
|
||||
MF->getInfo<SIMachineFunctionInfo>()->getStackPtrOffsetReg() &&
|
||||
"should only be seeing stack pointer offset relative FrameIndex");
|
||||
assert((SOffset->isReg() &&
|
||||
SOffset->getReg() ==
|
||||
MF->getInfo<SIMachineFunctionInfo>()->getStackPtrOffsetReg()) ||
|
||||
(SOffset->isImm() && SOffset->getImm() == 0));
|
||||
|
||||
MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
|
||||
int64_t NewOffset = OffsetOp->getImm() + Offset;
|
||||
|
@ -489,10 +490,6 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
|
|||
|
||||
FIOp->ChangeToRegister(BaseReg, false);
|
||||
OffsetOp->setImm(NewOffset);
|
||||
|
||||
// The move materializing the base address will be an absolute stack address,
|
||||
// so clear the base offset.
|
||||
SOffset->ChangeToImmediate(0);
|
||||
}
|
||||
|
||||
bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
|
||||
|
@ -1455,6 +1452,8 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
|||
} else {
|
||||
SOffset.setReg(FrameReg);
|
||||
}
|
||||
} else if (SOffset.isImm() && FrameReg != AMDGPU::NoRegister) {
|
||||
SOffset.ChangeToRegister(FrameReg, false);
|
||||
}
|
||||
|
||||
int64_t Offset = FrameInfo.getObjectOffset(Index);
|
||||
|
|
|
@ -744,13 +744,13 @@ entry:
|
|||
|
||||
; GCN-LABEL: {{^}}tail_call_byval_align16:
|
||||
; GCN-NOT: s32
|
||||
; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:12
|
||||
; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:8
|
||||
; GCN: buffer_load_dword [[VREG1:v[0-9]+]], off, s[0:3], s32 offset:8
|
||||
; GCN: buffer_load_dword [[VREG2:v[0-9]+]], off, s[0:3], s32 offset:12
|
||||
|
||||
; GCN: s_getpc_b64
|
||||
|
||||
; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4
|
||||
; GCN: buffer_store_dword v33, off, s[0:3], s32{{$}}
|
||||
; GCN: buffer_store_dword [[VREG2]], off, s[0:3], s32 offset:4
|
||||
; GCN: buffer_store_dword [[VREG1]], off, s[0:3], s32{{$}}
|
||||
; GCN-NOT: s32
|
||||
; GCN: s_setpc_b64
|
||||
define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
|
||||
|
|
|
@ -611,12 +611,10 @@ define void @too_many_args_use_workitem_id_x_byval(
|
|||
; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval:
|
||||
; VARABI: enable_vgpr_workitem_id = 0
|
||||
; VARABI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
|
||||
; VARABI: buffer_store_dword [[K]], off, s[0:3], 0 offset:4
|
||||
; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], 0 offset:4
|
||||
; VARABI: s_movk_i32 s32, 0x400{{$}}
|
||||
|
||||
; VARABI-NOT: s32
|
||||
; VARABI: buffer_store_dword [[K]], off, s[0:3], 0 offset:4
|
||||
; VARABI: buffer_store_dword v0, off, s[0:3], s32 offset:4
|
||||
; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], 0 offset:4
|
||||
|
||||
; VARABI: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}}
|
||||
; VARABI: v_mov_b32_e32 [[RELOAD_BYVAL]],
|
||||
|
@ -660,8 +658,8 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1
|
|||
; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval:
|
||||
; VARABI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
|
||||
; VARABI: buffer_store_dword [[K]], off, s[0:3], s33{{$}}
|
||||
; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33{{$}}
|
||||
; VARABI: buffer_store_dword v0, off, s[0:3], s32 offset:4
|
||||
; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33{{$}}
|
||||
; VARABI: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}}
|
||||
; VARABI: v_mov_b32_e32 [[RELOAD_BYVAL]],
|
||||
; VARABI: s_swappc_b64
|
||||
|
|
Loading…
Reference in New Issue