[RegUsageInfoCollector] Bugfix for callee saved registers.

Previously, this pass would look at the (static) set returned by
getCallPreservedMask() and add those back as preserved in the case when
isSafeForNoCSROpt() returns false.

A problem is that a target may have to save some registers even when NoCSROpt
takes place. For instance, on SystemZ, the return register is needed upon
return from a function.

Furthermore, getCallPreservedMask() only includes the registers that the
target actually wishes to emit save/restore instructions for. This means that
subregs and (fully saved) superregs are missing.

This patch instead takes the (dynamic) set returned by target for the
function from determineCalleeSaves() and then adds sub/super regs to build
the set to be used when building the RegMask for the function.

Review: Quentin Colombet, Ulrich Weigand
https://reviews.llvm.org/D46315

llvm-svn: 333261
This commit is contained in:
Jonas Paulsson 2018-05-25 08:42:02 +00:00
parent ad8b7c1190
commit 7d484fae2b
2 changed files with 93 additions and 11 deletions

View File

@ -56,6 +56,10 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
// Call determineCalleeSaves and then also set the bits for subregs and
// fully saved superregs.
static void computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF);
static char ID;
};
} // end of anonymous namespace
@ -103,6 +107,9 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "Clobbered Registers: ");
BitVector SavedRegs;
computeCalleeSavedRegs(SavedRegs, MF);
const BitVector &UsedPhysRegsMask = MRI->getUsedPhysRegsMask();
auto SetRegAsDefined = [&RegMask] (unsigned Reg) {
RegMask[Reg / 32] &= ~(1u << Reg % 32);
@ -110,11 +117,15 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
// Scan all the physical registers. When a register is defined in the current
// function set it and all the aliasing registers as defined in the regmask.
for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
// Don't count registers that are saved and restored.
if (SavedRegs.test(PReg))
continue;
// If a register is defined by an instruction mark it as defined together
// with all it's aliases.
// with all it's unsaved aliases.
if (!MRI->def_empty(PReg)) {
for (MCRegAliasIterator AI(PReg, TRI, true); AI.isValid(); ++AI)
SetRegAsDefined(*AI);
if (!SavedRegs.test(*AI))
SetRegAsDefined(*AI);
continue;
}
// If a register is in the UsedPhysRegsMask set then mark it as defined.
@ -124,15 +135,7 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
SetRegAsDefined(PReg);
}
if (!TargetFrameLowering::isSafeForNoCSROpt(F)) {
const uint32_t *CallPreservedMask =
TRI->getCallPreservedMask(MF, F.getCallingConv());
if (CallPreservedMask) {
// Set callee saved register as preserved.
for (unsigned i = 0; i < RegMaskSize; ++i)
RegMask[i] = RegMask[i] | CallPreservedMask[i];
}
} else {
if (TargetFrameLowering::isSafeForNoCSROpt(F)) {
++NumCSROpt;
LLVM_DEBUG(dbgs() << MF.getName()
<< " function optimized for not having CSR.\n");
@ -148,3 +151,48 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
return false;
}
void RegUsageInfoCollector::
computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) {
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
// Target will return the set of registers that it saves/restores as needed.
SavedRegs.clear();
TFI->determineCalleeSaves(MF, SavedRegs);
// Insert subregs.
const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
if (SavedRegs.test(Reg))
for (MCSubRegIterator SR(Reg, TRI, false); SR.isValid(); ++SR)
SavedRegs.set(*SR);
}
// Insert any register fully saved via subregisters.
for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
if (SavedRegs.test(PReg))
continue;
// Check if PReg is fully covered by its subregs.
bool CoveredBySubRegs = false;
for (const TargetRegisterClass *RC : TRI->regclasses())
if (RC->CoveredBySubRegs && RC->contains(PReg)) {
CoveredBySubRegs = true;
break;
}
if (!CoveredBySubRegs)
continue;
// Add PReg to SavedRegs if all subregs are saved.
bool AllSubRegsSaved = true;
for (MCSubRegIterator SR(PReg, TRI, false); SR.isValid(); ++SR)
if (!SavedRegs.test(*SR)) {
AllSubRegsSaved = false;
break;
}
if (AllSubRegsSaved)
SavedRegs.set(PReg);
}
}

View File

@ -0,0 +1,34 @@
; Test that the updated regmask on the call to @fun1 preserves %r14 and
; %15. @fun1 will save and restore these registers since it contains a call.
;
; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -enable-ipra -print-regmask-num-regs=-1 \
; RUN: -debug-only=ip-regalloc 2>&1 < %s | FileCheck --check-prefix=DBG %s
; REQUIRES: asserts
;
; DBG: fun1 function optimized for not having CSR
; DBG: Call Instruction After Register Usage Info Propagation : CallBRASL @fun1{{.*}} $r14d $r15d
declare dso_local fastcc signext i32 @foo(i16*, i32 signext) unnamed_addr
define internal fastcc void @fun1(i16* %arg, i16* nocapture %arg1) unnamed_addr #0 {
bb:
%tmp = load i16, i16* undef, align 2
%tmp2 = shl i16 %tmp, 4
%tmp3 = tail call fastcc signext i32 @foo(i16* nonnull %arg, i32 signext 5)
%tmp4 = or i16 0, %tmp2
%tmp5 = or i16 %tmp4, 0
store i16 %tmp5, i16* undef, align 2
%tmp6 = getelementptr inbounds i16, i16* %arg, i64 5
%tmp7 = load i16, i16* %tmp6, align 2
store i16 %tmp7, i16* %arg1, align 2
ret void
}
define fastcc void @fun0(i8* nocapture readonly %arg, i16* nocapture %arg1, i32 signext %arg2) unnamed_addr {
bb:
%a = alloca i8, i64 undef
call fastcc void @fun1(i16* nonnull undef, i16* %arg1)
ret void
}
attributes #0 = { norecurse nounwind "no-frame-pointer-elim"="false" }