forked from OSchip/llvm-project
AMDGPU: Materialize frame index before add
It isn't generally safe to fold the frame index directly into the operand since it will possibly not be an inline immediate after it is expanded. This surprisingly seems to produce better code, since the FI doesn't prevent folding other immediate operands. llvm-svn: 288185
This commit is contained in:
parent
ff8bb49bf4
commit
f96eeec005
|
@ -245,12 +245,17 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
|
|||
unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||
unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
|
||||
|
||||
unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
||||
|
||||
BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
|
||||
.addImm(Offset);
|
||||
BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
|
||||
.addFrameIndex(FrameIdx);
|
||||
|
||||
BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_ADD_I32_e64), BaseReg)
|
||||
.addReg(UnusedCarry, RegState::Define | RegState::Dead)
|
||||
.addReg(OffsetReg, RegState::Kill)
|
||||
.addFrameIndex(FrameIdx);
|
||||
.addReg(FIReg);
|
||||
}
|
||||
|
||||
void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
|
||||
|
|
|
@ -7,15 +7,16 @@
|
|||
;
|
||||
; CHECK-LABEL: {{^}}main:
|
||||
|
||||
; CHECK: v_mov_b32_e32 [[BASE_FI:v[0-9]+]], 0{{$}}
|
||||
|
||||
; FIXME: add 0?
|
||||
; CHECK-DAG: s_movk_i32 [[K0:s[0-9]+]], 0x140
|
||||
; CHECK-DAG: v_add_i32_e64 [[ADD_K0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, [[K0]], 0
|
||||
; CHECK-DAG: v_add_i32_e32 [[ADD_K0:v[0-9]+]], vcc, 0x140, [[BASE_FI]]
|
||||
|
||||
; CHECK-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
|
||||
; CHECK-DAG: buffer_store_dword {{v[0-9]+}}, [[ADD_K0]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
|
||||
|
||||
; CHECK-DAG: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]]
|
||||
; CHECK-DAG: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]]
|
||||
; CHECK-DAG: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, [[BASE_FI]], [[BYTES]]
|
||||
|
||||
; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
|
||||
; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
|
||||
|
|
Loading…
Reference in New Issue