[AMDGPU] New helper functions to get a register class of a given width

Summary:
Introduce new helper functions getVGPRClassForBitWidth,
getAGPRClassForBitWidth, getSGPRClassForBitWidth and use them to
refactor various other functions that all contained their own lists of
valid register class widths. NFC.

Reviewers: arsenm, rampitec

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78311
This commit is contained in:
Jay Foad 2020-04-09 12:01:05 +01:00
parent 61ba1481e2
commit 96b61571d0
2 changed files with 127 additions and 202 deletions

View File

@ -1274,6 +1274,75 @@ StringRef SIRegisterInfo::getRegAsmName(MCRegister Reg) const {
return AMDGPUInstPrinter::getRegisterName(Reg);
}
static const TargetRegisterClass *getVGPRClassForBitWidth(unsigned BitWidth) {
switch (BitWidth) {
case 1:
return &AMDGPU::VReg_1RegClass;
case 16:
return &AMDGPU::VGPR_LO16RegClass;
case 32:
return &AMDGPU::VGPR_32RegClass;
case 64:
return &AMDGPU::VReg_64RegClass;
case 96:
return &AMDGPU::VReg_96RegClass;
case 128:
return &AMDGPU::VReg_128RegClass;
case 160:
return &AMDGPU::VReg_160RegClass;
case 256:
return &AMDGPU::VReg_256RegClass;
case 512:
return &AMDGPU::VReg_512RegClass;
case 1024:
return &AMDGPU::VReg_1024RegClass;
default:
return nullptr;
}
}
static const TargetRegisterClass *getAGPRClassForBitWidth(unsigned BitWidth) {
switch (BitWidth) {
case 32:
return &AMDGPU::AGPR_32RegClass;
case 64:
return &AMDGPU::AReg_64RegClass;
case 128:
return &AMDGPU::AReg_128RegClass;
case 512:
return &AMDGPU::AReg_512RegClass;
case 1024:
return &AMDGPU::AReg_1024RegClass;
default:
return nullptr;
}
}
static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth) {
switch (BitWidth) {
case 16:
return &AMDGPU::SGPR_LO16RegClass;
case 32:
return &AMDGPU::SReg_32RegClass;
case 64:
return &AMDGPU::SReg_64RegClass;
case 96:
return &AMDGPU::SReg_96RegClass;
case 128:
return &AMDGPU::SReg_128RegClass;
case 160:
return &AMDGPU::SReg_160RegClass;
case 256:
return &AMDGPU::SReg_256RegClass;
case 512:
return &AMDGPU::SReg_512RegClass;
case 1024:
return &AMDGPU::SReg_1024RegClass;
default:
return nullptr;
}
}
// FIXME: This is very slow. It might be worth creating a map from physreg to
// register class.
const TargetRegisterClass *
@ -1320,129 +1389,56 @@ SIRegisterInfo::getPhysRegClass(MCRegister Reg) const {
// TargetRegisterClass to mark which classes are VGPRs to make this trivial.
bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
unsigned Size = getRegSizeInBits(*RC);
switch (Size) {
case 16:
if (Size == 16) {
return getCommonSubClass(&AMDGPU::VGPR_LO16RegClass, RC) != nullptr ||
getCommonSubClass(&AMDGPU::VGPR_HI16RegClass, RC) != nullptr;
case 32:
return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
case 64:
return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
case 96:
return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
case 128:
return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
case 160:
return getCommonSubClass(&AMDGPU::VReg_160RegClass, RC) != nullptr;
case 256:
return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
case 512:
return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
case 1024:
return getCommonSubClass(&AMDGPU::VReg_1024RegClass, RC) != nullptr;
case 1:
return getCommonSubClass(&AMDGPU::VReg_1RegClass, RC) != nullptr;
default:
}
const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size);
if (!VRC) {
assert(Size < 32 && "Invalid register class size");
return false;
}
return getCommonSubClass(VRC, RC) != nullptr;
}
bool SIRegisterInfo::hasAGPRs(const TargetRegisterClass *RC) const {
unsigned Size = getRegSizeInBits(*RC);
if (Size < 32)
return false;
switch (Size) {
case 32:
return getCommonSubClass(&AMDGPU::AGPR_32RegClass, RC) != nullptr;
case 64:
return getCommonSubClass(&AMDGPU::AReg_64RegClass, RC) != nullptr;
case 96:
const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size);
if (!ARC) {
assert(getVGPRClassForBitWidth(Size) && "Invalid register class size");
return false;
case 128:
return getCommonSubClass(&AMDGPU::AReg_128RegClass, RC) != nullptr;
case 160:
case 256:
return false;
case 512:
return getCommonSubClass(&AMDGPU::AReg_512RegClass, RC) != nullptr;
case 1024:
return getCommonSubClass(&AMDGPU::AReg_1024RegClass, RC) != nullptr;
default:
llvm_unreachable("Invalid register class size");
}
return getCommonSubClass(ARC, RC) != nullptr;
}
const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
const TargetRegisterClass *SRC) const {
switch (getRegSizeInBits(*SRC)) {
case 16:
return &AMDGPU::VGPR_LO16RegClass;
case 32:
return &AMDGPU::VGPR_32RegClass;
case 64:
return &AMDGPU::VReg_64RegClass;
case 96:
return &AMDGPU::VReg_96RegClass;
case 128:
return &AMDGPU::VReg_128RegClass;
case 160:
return &AMDGPU::VReg_160RegClass;
case 256:
return &AMDGPU::VReg_256RegClass;
case 512:
return &AMDGPU::VReg_512RegClass;
case 1024:
return &AMDGPU::VReg_1024RegClass;
case 1:
return &AMDGPU::VReg_1RegClass;
default:
llvm_unreachable("Invalid register class size");
}
const TargetRegisterClass *
SIRegisterInfo::getEquivalentVGPRClass(const TargetRegisterClass *SRC) const {
unsigned Size = getRegSizeInBits(*SRC);
const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size);
assert(VRC && "Invalid register class size");
return VRC;
}
const TargetRegisterClass *SIRegisterInfo::getEquivalentAGPRClass(
const TargetRegisterClass *SRC) const {
switch (getRegSizeInBits(*SRC)) {
case 32:
return &AMDGPU::AGPR_32RegClass;
case 64:
return &AMDGPU::AReg_64RegClass;
case 128:
return &AMDGPU::AReg_128RegClass;
case 512:
return &AMDGPU::AReg_512RegClass;
case 1024:
return &AMDGPU::AReg_1024RegClass;
default:
llvm_unreachable("Invalid register class size");
}
const TargetRegisterClass *
SIRegisterInfo::getEquivalentAGPRClass(const TargetRegisterClass *SRC) const {
unsigned Size = getRegSizeInBits(*SRC);
const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size);
assert(ARC && "Invalid register class size");
return ARC;
}
const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
const TargetRegisterClass *VRC) const {
switch (getRegSizeInBits(*VRC)) {
case 16:
return &AMDGPU::SGPR_LO16RegClass;
case 32:
const TargetRegisterClass *
SIRegisterInfo::getEquivalentSGPRClass(const TargetRegisterClass *VRC) const {
unsigned Size = getRegSizeInBits(*VRC);
if (Size == 32)
return &AMDGPU::SGPR_32RegClass;
case 64:
return &AMDGPU::SReg_64RegClass;
case 96:
return &AMDGPU::SReg_96RegClass;
case 128:
if (Size == 128)
return &AMDGPU::SGPR_128RegClass;
case 160:
return &AMDGPU::SReg_160RegClass;
case 256:
return &AMDGPU::SReg_256RegClass;
case 512:
return &AMDGPU::SReg_512RegClass;
case 1024:
return &AMDGPU::SReg_1024RegClass;
default:
llvm_unreachable("Invalid register class size");
}
const TargetRegisterClass *SRC = getSGPRClassForBitWidth(Size);
assert(SRC && "Invalid register class size");
return SRC;
}
const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
@ -1451,62 +1447,21 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
return RC;
// We can assume that each lane corresponds to one 32-bit register.
unsigned Count = getNumChannelsFromSubReg(SubIdx);
unsigned Size = getNumChannelsFromSubReg(SubIdx) * 32;
if (isSGPRClass(RC)) {
switch (Count) {
case 1:
return &AMDGPU::SGPR_32RegClass;
case 2:
return &AMDGPU::SReg_64RegClass;
case 3:
return &AMDGPU::SReg_96RegClass;
case 4:
return &AMDGPU::SGPR_128RegClass;
case 5:
return &AMDGPU::SReg_160RegClass;
case 8:
return &AMDGPU::SReg_256RegClass;
case 16:
return &AMDGPU::SReg_512RegClass;
case 32: /* fall-through */
default:
llvm_unreachable("Invalid sub-register class size");
}
if (Size == 32)
RC = &AMDGPU::SGPR_32RegClass;
else if (Size == 128)
RC = &AMDGPU::SGPR_128RegClass;
else
RC = getSGPRClassForBitWidth(Size);
} else if (hasAGPRs(RC)) {
switch (Count) {
case 1:
return &AMDGPU::AGPR_32RegClass;
case 2:
return &AMDGPU::AReg_64RegClass;
case 4:
return &AMDGPU::AReg_128RegClass;
case 16:
return &AMDGPU::AReg_512RegClass;
case 32: /* fall-through */
default:
llvm_unreachable("Invalid sub-register class size");
}
RC = getAGPRClassForBitWidth(Size);
} else {
switch (Count) {
case 1:
return &AMDGPU::VGPR_32RegClass;
case 2:
return &AMDGPU::VReg_64RegClass;
case 3:
return &AMDGPU::VReg_96RegClass;
case 4:
return &AMDGPU::VReg_128RegClass;
case 5:
return &AMDGPU::VReg_160RegClass;
case 8:
return &AMDGPU::VReg_256RegClass;
case 16:
return &AMDGPU::VReg_512RegClass;
case 32: /* fall-through */
default:
llvm_unreachable("Invalid sub-register class size");
}
RC = getVGPRClassForBitWidth(Size);
}
assert(RC && "Invalid sub-register class size");
return RC;
}
bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
@ -1714,49 +1669,19 @@ const TargetRegisterClass *
SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size,
const RegisterBank &RB,
const MachineRegisterInfo &MRI) const {
switch (Size) {
case 1: {
switch (RB.getID()) {
case AMDGPU::VGPRRegBankID:
return &AMDGPU::VGPR_32RegClass;
case AMDGPU::VCCRegBankID:
return isWave32 ?
&AMDGPU::SReg_32_XM0_XEXECRegClass : &AMDGPU::SReg_64_XEXECRegClass;
case AMDGPU::SGPRRegBankID:
return &AMDGPU::SReg_32RegClass;
default:
llvm_unreachable("unknown register bank");
}
}
case 32:
return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
&AMDGPU::SReg_32RegClass;
case 64:
return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
&AMDGPU::SReg_64RegClass;
case 96:
return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
&AMDGPU::SReg_96RegClass;
case 128:
return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
&AMDGPU::SGPR_128RegClass;
case 160:
return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_160RegClass :
&AMDGPU::SReg_160RegClass;
case 256:
return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_256RegClass :
&AMDGPU::SReg_256RegClass;
case 512:
return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass :
&AMDGPU::SReg_512RegClass;
case 1024:
return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_1024RegClass :
&AMDGPU::SReg_1024RegClass;
switch (RB.getID()) {
case AMDGPU::VGPRRegBankID:
return getVGPRClassForBitWidth(std::max(32u, Size));
case AMDGPU::VCCRegBankID:
assert(Size == 1);
return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
: &AMDGPU::SReg_64_XEXECRegClass;
case AMDGPU::SGPRRegBankID:
if (Size == 128)
return &AMDGPU::SGPR_128RegClass;
return getSGPRClassForBitWidth(std::max(32u, Size));
default:
if (Size < 32)
return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
&AMDGPU::SReg_32RegClass;
return nullptr;
llvm_unreachable("unknown register bank");
}
}

View File

@ -156,16 +156,16 @@ public:
}
/// \returns A VGPR reg class with the same width as \p SRC
const TargetRegisterClass *getEquivalentVGPRClass(
const TargetRegisterClass *SRC) const;
const TargetRegisterClass *
getEquivalentVGPRClass(const TargetRegisterClass *SRC) const;
/// \returns An AGPR reg class with the same width as \p SRC
const TargetRegisterClass *getEquivalentAGPRClass(
const TargetRegisterClass *SRC) const;
const TargetRegisterClass *
getEquivalentAGPRClass(const TargetRegisterClass *SRC) const;
/// \returns A SGPR reg class with the same width as \p SRC
const TargetRegisterClass *getEquivalentSGPRClass(
const TargetRegisterClass *VRC) const;
const TargetRegisterClass *
getEquivalentSGPRClass(const TargetRegisterClass *VRC) const;
/// \returns The register class that is used for a sub-register of \p RC for
/// the given \p SubIdx. If \p SubIdx equals NoSubRegister, \p RC will