forked from OSchip/llvm-project
[AMDGPU] Override PSet for M0
This change returns empty PSet list for M0 register. Otherwise its PSet as defined by tablegen is SReg_32. This results in incorrect register pressure calculation every time an instruction uses M0. Such uses count as SReg_32 PSet and inadequately increase pressure on SGPRs. Differential Revision: https://reviews.llvm.org/D29798 llvm-svn: 294691
This commit is contained in:
parent
87c87f4c30
commit
6dec24316b
|
@ -1329,3 +1329,11 @@ unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
|
|||
|
||||
return AMDGPURegisterInfo::getRegPressureSetLimit(MF, Idx);
|
||||
}
|
||||
|
||||
const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
|
||||
static const int Empty[] = { -1 };
|
||||
|
||||
if (hasRegUnit(AMDGPU::M0, RegUnit))
|
||||
return Empty;
|
||||
return AMDGPURegisterInfo::getRegUnitPressureSets(RegUnit);
|
||||
}
|
||||
|
|
|
@ -211,6 +211,8 @@ public:
|
|||
unsigned getRegPressureSetLimit(const MachineFunction &MF,
|
||||
unsigned Idx) const override;
|
||||
|
||||
const int *getRegUnitPressureSets(unsigned RegUnit) const override;
|
||||
|
||||
private:
|
||||
void buildSpillLoadStore(MachineBasicBlock::iterator MI,
|
||||
unsigned LoadStoreOp,
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
# RUN: llc -march=amdgcn -misched=converge -run-pass machine-scheduler %s -o - -debug-only=misched 2>&1 | FileCheck %s
|
||||
# REQUIRES: asserts
|
||||
|
||||
# Check there is no SReg_32 pressure created by DS_* instructions because of M0 use
|
||||
|
||||
# CHECK: ScheduleDAGMILive::schedule starting
|
||||
# CHECK: SU({{.*}} = DS_READ_B32 {{.*}} %M0<imp-use>, %EXEC<imp-use>
|
||||
# CHECK: Pressure Diff : {{$}}
|
||||
# CHECK: SU({{.*}} DS_WRITE_B32
|
||||
|
||||
---
|
||||
name: mo_pset
|
||||
alignment: 0
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
tracksRegLiveness: true
|
||||
registers:
|
||||
- { id: 0, class: sreg_128 }
|
||||
- { id: 1, class: sgpr_64 }
|
||||
- { id: 2, class: sreg_32_xm0 }
|
||||
- { id: 3, class: sgpr_32 }
|
||||
- { id: 4, class: vgpr_32 }
|
||||
- { id: 5, class: sreg_32_xm0_xexec }
|
||||
- { id: 6, class: vgpr_32 }
|
||||
- { id: 7, class: vgpr_32 }
|
||||
- { id: 8, class: vgpr_32 }
|
||||
liveins:
|
||||
- { reg: '%sgpr4_sgpr5', virtual-reg: '%1' }
|
||||
frameInfo:
|
||||
isFrameAddressTaken: false
|
||||
isReturnAddressTaken: false
|
||||
hasStackMap: false
|
||||
hasPatchPoint: false
|
||||
stackSize: 0
|
||||
offsetAdjustment: 0
|
||||
maxAlignment: 0
|
||||
adjustsStack: false
|
||||
hasCalls: false
|
||||
maxCallFrameSize: 0
|
||||
hasOpaqueSPAdjustment: false
|
||||
hasVAStart: false
|
||||
hasMustTailInVarArgFunc: false
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: %sgpr4_sgpr5
|
||||
|
||||
%1 = COPY %sgpr4_sgpr5
|
||||
%5 = S_LOAD_DWORD_IMM %1, 0, 0 :: (non-temporal dereferenceable invariant load 4 from `i32 addrspace(2)* undef`)
|
||||
%m0 = S_MOV_B32 -1
|
||||
%7 = COPY %5
|
||||
%6 = DS_READ_B32 %7, 0, 0, implicit %m0, implicit %exec
|
||||
DS_WRITE_B32 %7, %6, 4, 0, implicit killed %m0, implicit %exec
|
||||
S_ENDPGM
|
||||
|
||||
...
|
Loading…
Reference in New Issue