RegUsageInfoCollector: Skip AMDGPU entry point functions

I'm not sure if it's worth it or not to add a hook to disable the pass
for an arbitrary function.

This pass is taking up to 5% of compile time in tiny programs by
iterating through all of the physical registers in every register
class. This pass should be rewritten in terms of regunits. For now,
skip doing anything for entry point functions. The vast majority of
functions in the real world aren't callable, so just not running this
will give the majority of the benefit.

llvm-svn: 365255
This commit is contained in:
Matt Arsenault 2019-07-05 23:33:43 +00:00
parent b1ff896e92
commit 705e46f449
4 changed files with 50 additions and 6 deletions

View File

@ -77,14 +77,45 @@ FunctionPass *llvm::createRegUsageInfoCollector() {
return new RegUsageInfoCollector();
}
// TODO: Move to hook somwehere?
// Return true if it is useful to track the used registers for IPRA / no CSR
// optimizations. This is not useful for entry points, and computing the
// register usage information is expensive.
static bool isCallableFunction(const MachineFunction &MF) {
switch (MF.getFunction().getCallingConv()) {
case CallingConv::AMDGPU_VS:
case CallingConv::AMDGPU_GS:
case CallingConv::AMDGPU_PS:
case CallingConv::AMDGPU_CS:
case CallingConv::AMDGPU_KERNEL:
return false;
default:
return true;
}
}
bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo *MRI = &MF.getRegInfo();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const LLVMTargetMachine &TM = MF.getTarget();
LLVM_DEBUG(dbgs() << " -------------------- " << getPassName()
<< " -------------------- \n");
LLVM_DEBUG(dbgs() << "Function Name : " << MF.getName() << "\n");
<< " -------------------- \nFunction Name : "
<< MF.getName() << '\n');
// Analyzing the register usage may be expensive on some targets.
if (!isCallableFunction(MF)) {
LLVM_DEBUG(dbgs() << "Not analyzing non-callable function\n");
return false;
}
// If there are no callers, there's no point in computing more precise
// register usage here.
if (MF.getFunction().use_empty()) {
LLVM_DEBUG(dbgs() << "Not analyzing function with no callers\n");
return false;
}
std::vector<uint32_t> RegMask;
@ -110,6 +141,7 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
};
// Scan all the physical registers. When a register is defined in the current
// function set it and all the aliasing registers as defined in the regmask.
// FIXME: Rewrite to use regunits.
for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
// Don't count registers that are saved and restored.
if (SavedRegs.test(PReg))
@ -135,11 +167,14 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
<< " function optimized for not having CSR.\n");
}
for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg)
if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg))
LLVM_DEBUG(dbgs() << printReg(PReg, TRI) << " ");
LLVM_DEBUG(
for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg))
dbgs() << printReg(PReg, TRI) << " ";
}
LLVM_DEBUG(dbgs() << " \n----------------------------------------\n");
dbgs() << " \n----------------------------------------\n";
);
PRUI.storeUpdateRegUsageInfo(F, RegMask);
@ -165,6 +200,7 @@ computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) {
}
// Insert any register fully saved via subregisters.
// FIXME: Rewrite to use regunits.
for (const TargetRegisterClass *RC : TRI.regclasses()) {
if (!RC->CoveredBySubRegs)
continue;

View File

@ -11,5 +11,8 @@ define void @foo()#0 {
call void @bar2()
ret void
}
@llvm.used = appending global [1 x i8*] [i8* bitcast (void ()* @foo to i8*)]
declare void @bar2()
attributes #0 = {nounwind}

View File

@ -17,4 +17,6 @@ define void @foo() #0 {
ret void
}
@llvm.used = appending global [2 x i8*] [i8* bitcast (void ()* @foo to i8*), i8* bitcast (void ()* @bar to i8*)]
attributes #0 = { nounwind }

View File

@ -9,4 +9,7 @@ define preserve_allcc void @foo()#0 {
ret void
}
declare void @bar2()
@llvm.used = appending global [1 x i8*] [i8* bitcast (void ()* @foo to i8*)]
attributes #0 = {nounwind}