forked from OSchip/llvm-project
RegUsageInfoCollector: Skip AMDGPU entry point functions
I'm not sure if it's worth it or not to add a hook to disable the pass for an arbitrary function. This pass is taking up to 5% of compile time in tiny programs by iterating through all of the physical registers in every register class. This pass should be rewritten in terms of regunits. For now, skip doing anything for entry point functions. The vast majority of functions in the real world aren't callable, so just not running this will give the majority of the benefit. llvm-svn: 365255
This commit is contained in:
parent
b1ff896e92
commit
705e46f449
|
@ -77,14 +77,45 @@ FunctionPass *llvm::createRegUsageInfoCollector() {
|
|||
return new RegUsageInfoCollector();
|
||||
}
|
||||
|
||||
// TODO: Move to hook somwehere?
|
||||
|
||||
// Return true if it is useful to track the used registers for IPRA / no CSR
|
||||
// optimizations. This is not useful for entry points, and computing the
|
||||
// register usage information is expensive.
|
||||
static bool isCallableFunction(const MachineFunction &MF) {
|
||||
switch (MF.getFunction().getCallingConv()) {
|
||||
case CallingConv::AMDGPU_VS:
|
||||
case CallingConv::AMDGPU_GS:
|
||||
case CallingConv::AMDGPU_PS:
|
||||
case CallingConv::AMDGPU_CS:
|
||||
case CallingConv::AMDGPU_KERNEL:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
|
||||
MachineRegisterInfo *MRI = &MF.getRegInfo();
|
||||
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
|
||||
const LLVMTargetMachine &TM = MF.getTarget();
|
||||
|
||||
LLVM_DEBUG(dbgs() << " -------------------- " << getPassName()
|
||||
<< " -------------------- \n");
|
||||
LLVM_DEBUG(dbgs() << "Function Name : " << MF.getName() << "\n");
|
||||
<< " -------------------- \nFunction Name : "
|
||||
<< MF.getName() << '\n');
|
||||
|
||||
// Analyzing the register usage may be expensive on some targets.
|
||||
if (!isCallableFunction(MF)) {
|
||||
LLVM_DEBUG(dbgs() << "Not analyzing non-callable function\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// If there are no callers, there's no point in computing more precise
|
||||
// register usage here.
|
||||
if (MF.getFunction().use_empty()) {
|
||||
LLVM_DEBUG(dbgs() << "Not analyzing function with no callers\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<uint32_t> RegMask;
|
||||
|
||||
|
@ -110,6 +141,7 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
|
|||
};
|
||||
// Scan all the physical registers. When a register is defined in the current
|
||||
// function set it and all the aliasing registers as defined in the regmask.
|
||||
// FIXME: Rewrite to use regunits.
|
||||
for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
|
||||
// Don't count registers that are saved and restored.
|
||||
if (SavedRegs.test(PReg))
|
||||
|
@ -135,11 +167,14 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
|
|||
<< " function optimized for not having CSR.\n");
|
||||
}
|
||||
|
||||
for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg)
|
||||
if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg))
|
||||
LLVM_DEBUG(dbgs() << printReg(PReg, TRI) << " ");
|
||||
LLVM_DEBUG(
|
||||
for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
|
||||
if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg))
|
||||
dbgs() << printReg(PReg, TRI) << " ";
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << " \n----------------------------------------\n");
|
||||
dbgs() << " \n----------------------------------------\n";
|
||||
);
|
||||
|
||||
PRUI.storeUpdateRegUsageInfo(F, RegMask);
|
||||
|
||||
|
@ -165,6 +200,7 @@ computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) {
|
|||
}
|
||||
|
||||
// Insert any register fully saved via subregisters.
|
||||
// FIXME: Rewrite to use regunits.
|
||||
for (const TargetRegisterClass *RC : TRI.regclasses()) {
|
||||
if (!RC->CoveredBySubRegs)
|
||||
continue;
|
||||
|
|
|
@ -11,5 +11,8 @@ define void @foo()#0 {
|
|||
call void @bar2()
|
||||
ret void
|
||||
}
|
||||
|
||||
@llvm.used = appending global [1 x i8*] [i8* bitcast (void ()* @foo to i8*)]
|
||||
|
||||
declare void @bar2()
|
||||
attributes #0 = {nounwind}
|
||||
|
|
|
@ -17,4 +17,6 @@ define void @foo() #0 {
|
|||
ret void
|
||||
}
|
||||
|
||||
@llvm.used = appending global [2 x i8*] [i8* bitcast (void ()* @foo to i8*), i8* bitcast (void ()* @bar to i8*)]
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
|
|
@ -9,4 +9,7 @@ define preserve_allcc void @foo()#0 {
|
|||
ret void
|
||||
}
|
||||
declare void @bar2()
|
||||
|
||||
@llvm.used = appending global [1 x i8*] [i8* bitcast (void ()* @foo to i8*)]
|
||||
|
||||
attributes #0 = {nounwind}
|
||||
|
|
Loading…
Reference in New Issue