[AMDGPU] Adapt GCNRegBankReassign for 16 bit subregs

It allows it not to crash and analyze 16 bit subregs if those
appear in the instructions. At the same time it does not attempt
to reassign these. It still can correctly identify register
banks to let larger registers to be reassigned.

More work will be needed here when real instructions will use
these registers and more tests as well.

Differential Revision: https://reviews.llvm.org/D78772
This commit is contained in:
Stanislav Mekhanoshin 2020-04-23 15:10:25 -07:00
parent 6b01964271
commit 26777ad7a0
5 changed files with 178 additions and 18 deletions

View File

@ -280,7 +280,9 @@ unsigned GCNRegBankReassign::getPhysRegBank(unsigned Reg) const {
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
unsigned Size = TRI->getRegSizeInBits(*RC);
if (Size > 32)
if (Size == 16)
Reg = TRI->get32BitRegister(Reg);
else if (Size > 32)
Reg = TRI->getSubReg(Reg, AMDGPU::sub0);
if (TRI->hasVGPRs(RC)) {
@ -306,9 +308,16 @@ uint32_t GCNRegBankReassign::getRegBankMask(unsigned Reg, unsigned SubReg,
}
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
unsigned Size = TRI->getRegSizeInBits(*RC) / 32;
unsigned Size = TRI->getRegSizeInBits(*RC);
if (Size == 16) {
Reg = TRI->get32BitRegister(Reg);
Size = 1;
} else {
Size /= 32;
if (Size > 1)
Reg = TRI->getSubReg(Reg, AMDGPU::sub0);
}
if (TRI->hasVGPRs(RC)) {
// VGPRs have 4 banks assigned in a round-robin fashion.
@ -440,10 +449,19 @@ bool GCNRegBankReassign::isReassignable(unsigned Reg) const {
}
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg);
unsigned Size = TRI->getRegSizeInBits(*RC);
// TODO: Support 16 bit registers. Those needs to be moved with their
// parent VGPR_32 and potentially a sibling 16 bit sub-register.
if (Size < 32)
return false;
if (TRI->hasVGPRs(RC))
return true;
unsigned Size = TRI->getRegSizeInBits(*RC);
if (Size == 16)
return AMDGPU::SGPR_LO16RegClass.contains(PhysReg);
if (Size > 32)
PhysReg = TRI->getSubReg(PhysReg, AMDGPU::sub0);

View File

@ -694,18 +694,8 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
AMDGPU::SReg_LO16RegClass.contains(SrcReg) ||
AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
const TargetRegisterClass *DstRC = IsSGPRDst ? &AMDGPU::SGPR_32RegClass
: IsAGPRDst ? &AMDGPU::AGPR_32RegClass
: &AMDGPU::VGPR_32RegClass;
const TargetRegisterClass *SrcRC = IsSGPRSrc ? &AMDGPU::SGPR_32RegClass
: IsAGPRSrc ? &AMDGPU::AGPR_32RegClass
: &AMDGPU::VGPR_32RegClass;
MCRegister NewDestReg =
RI.getMatchingSuperReg(DestReg, DstLow ? AMDGPU::lo16 : AMDGPU::hi16,
DstRC);
MCRegister NewSrcReg =
RI.getMatchingSuperReg(SrcReg, SrcLow ? AMDGPU::lo16 : AMDGPU::hi16,
SrcRC);
MCRegister NewDestReg = RI.get32BitRegister(DestReg);
MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
if (IsSGPRDst) {
if (!IsSGPRSrc) {

View File

@ -1798,3 +1798,21 @@ MachineInstr *SIRegisterInfo::findReachingDef(Register Reg, unsigned SubReg,
return Def;
}
MCPhysReg SIRegisterInfo::get32BitRegister(MCPhysReg Reg) const {
const TargetRegisterClass *RC = getPhysRegClass(Reg);
assert(getRegSizeInBits(*RC) <= 32);
for (const TargetRegisterClass &RC : { AMDGPU::VGPR_32RegClass,
AMDGPU::SReg_32RegClass,
AMDGPU::AGPR_32RegClass } ) {
if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
return Super;
}
if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
&AMDGPU::VGPR_32RegClass)) {
return Super;
}
return AMDGPU::NoRegister;
}

View File

@ -283,7 +283,7 @@ public:
// \returns a DWORD offset of a \p SubReg
unsigned getChannelFromSubReg(unsigned SubReg) const {
return SubReg ? divideCeil(getSubRegIdxOffset(SubReg), 32) : 0;
return SubReg ? (getSubRegIdxOffset(SubReg) + 31) / 32 : 0;
}
// \returns a DWORD size of a \p SubReg
@ -291,6 +291,10 @@ public:
return getNumCoveredRegs(getSubRegIndexLaneMask(SubReg));
}
// For a given 16 bit \p Reg \returns a 32 bit register holding it.
// \returns \p Reg otherwise.
MCPhysReg get32BitRegister(MCPhysReg Reg) const;
private:
void buildSpillLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp,

View File

@ -364,3 +364,133 @@ body: |
DS_WRITE2_B32_gfx9 %2, %1.sub14, %1.sub15, 14, 15, 0, implicit $exec
S_ENDPGM 0
...
# GCN-LABEL: vgpr_lo16_sub{{$}}
# GCN: renamable $vgpr0 = V_AND_B32_e32 killed $vgpr3, killed $vgpr1, implicit $exec
# GCN: renamable $vgpr1_lo16 = COPY renamable $vgpr0_lo16
---
name: vgpr_lo16_sub
tracksRegLiveness: true
registers:
- { id: 0, class: vgpr_32, preferred-register: '$vgpr1' }
- { id: 1, class: vgpr_32, preferred-register: '$vgpr5' }
- { id: 2, class: vgpr_32 }
- { id: 3, class: vgpr_lo16 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2 = V_AND_B32_e32 %1, %0, implicit $exec
%3 = COPY %2.lo16
$vgpr1_lo16 = COPY %3
SI_RETURN_TO_EPILOG $vgpr1_lo16
...
# GCN-LABEL: vgpr_lo16{{$}}
# GCN: $vgpr1_lo16 = COPY killed renamable $vgpr0_lo16
---
name: vgpr_lo16
tracksRegLiveness: true
registers:
- { id: 0, class: vgpr_lo16, preferred-register: '$vgpr4_lo16' }
body: |
bb.0:
liveins: $vgpr0_lo16
%0 = COPY $vgpr0_lo16
$vgpr1_lo16 = COPY %0
SI_RETURN_TO_EPILOG $vgpr1_lo16
...
# GCN-LABEL: vgpr_hi16_sub{{$}}
# GCN: renamable $vgpr0 = V_AND_B32_e32 killed $vgpr3, killed $vgpr1, implicit $exec
# GCN: renamable $vgpr1_hi16 = COPY renamable $vgpr0_hi16
---
name: vgpr_hi16_sub
tracksRegLiveness: true
registers:
- { id: 0, class: vgpr_32, preferred-register: '$vgpr1' }
- { id: 1, class: vgpr_32, preferred-register: '$vgpr5' }
- { id: 2, class: vgpr_32 }
- { id: 3, class: vgpr_hi16 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2 = V_AND_B32_e32 %1, %0, implicit $exec
%3 = COPY %2.hi16
$vgpr1_hi16 = COPY %3
SI_RETURN_TO_EPILOG $vgpr1_hi16
...
# GCN-LABEL: vgpr_hi16{{$}}
# GCN: $vgpr1_hi16 = COPY killed renamable $vgpr0_hi16
---
name: vgpr_hi16
tracksRegLiveness: true
registers:
- { id: 0, class: vgpr_hi16, preferred-register: '$vgpr4_hi16' }
body: |
bb.0:
liveins: $vgpr0_hi16
%0 = COPY $vgpr0_hi16
$vgpr1_hi16 = COPY %0
SI_RETURN_TO_EPILOG $vgpr1_hi16
...
# GCN-LABEL: sgpr_lo16_sub{{$}}
# GCN: renamable $sgpr0 = S_AND_B32 killed renamable $sgpr14, $sgpr0, implicit-def $scc
# GCN: renamable $sgpr1_lo16 = COPY renamable $sgpr0_lo16
---
name: sgpr_lo16_sub
tracksRegLiveness: true
registers:
- { id: 0, class: sgpr_32, preferred-register: '$sgpr16' }
- { id: 1, class: sgpr_32 }
- { id: 2, class: sgpr_lo16 }
body: |
bb.0:
%0 = IMPLICIT_DEF
$sgpr0 = IMPLICIT_DEF
%1 = S_AND_B32 %0, $sgpr0, implicit-def $scc
%2 = COPY %1.lo16
$sgpr1_lo16 = COPY %2
SI_RETURN_TO_EPILOG $sgpr1_lo16
...
# GCN-LABEL: sgpr_lo16{{$}}
# GCN: $sgpr1_lo16 = COPY killed renamable $sgpr0_lo16
---
name: sgpr_lo16
tracksRegLiveness: true
registers:
- { id: 0, class: sgpr_lo16, preferred-register: '$sgpr4_lo16' }
body: |
bb.0:
liveins: $sgpr0_lo16
%0 = COPY $sgpr0_lo16
$sgpr1_lo16 = COPY %0
SI_RETURN_TO_EPILOG $sgpr1_lo16
...
# Check that we do not use VGPR3 which we would use otherwise.
# We cannot use it because of interference with VGPR3_LO16.
# GCN-LABEL: v1_vs_v5_src_interence{{$}}
# GCN: V_AND_B32_e32 killed $vgpr7, killed $vgpr1,
---
name: v1_vs_v5_src_interence
tracksRegLiveness: true
registers:
- { id: 0, class: vgpr_32, preferred-register: '$vgpr1' }
- { id: 1, class: vgpr_32, preferred-register: '$vgpr5' }
- { id: 2, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
$vgpr3_lo16 = IMPLICIT_DEF
%2 = V_AND_B32_e32 %1, %0, implicit $exec
S_ENDPGM 0
...