forked from OSchip/llvm-project
[RegUsageInfoCollector] Bugfix for callee saved registers.
Previously, this pass would look at the (static) set returned by getCallPreservedMask() and add those back as preserved in the case when isSafeForNoCSROpt() returns false. A problem is that a target may have to save some registers even when NoCSROpt takes place. For instance, on SystemZ, the return register is needed upon return from a function. Furthermore, getCallPreservedMask() only includes the registers that the target actually wishes to emit save/restore instructions for. This means that subregs and (fully saved) superregs are missing. This patch instead takes the (dynamic) set returned by target for the function from determineCalleeSaves() and then adds sub/super regs to build the set to be used when building the RegMask for the function. Review: Quentin Colombet, Ulrich Weigand https://reviews.llvm.org/D46315 llvm-svn: 333261
This commit is contained in:
parent
ad8b7c1190
commit
7d484fae2b
|
@ -56,6 +56,10 @@ public:
|
|||
|
||||
bool runOnMachineFunction(MachineFunction &MF) override;
|
||||
|
||||
// Call determineCalleeSaves and then also set the bits for subregs and
|
||||
// fully saved superregs.
|
||||
static void computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF);
|
||||
|
||||
static char ID;
|
||||
};
|
||||
} // end of anonymous namespace
|
||||
|
@ -103,6 +107,9 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
|
|||
|
||||
LLVM_DEBUG(dbgs() << "Clobbered Registers: ");
|
||||
|
||||
BitVector SavedRegs;
|
||||
computeCalleeSavedRegs(SavedRegs, MF);
|
||||
|
||||
const BitVector &UsedPhysRegsMask = MRI->getUsedPhysRegsMask();
|
||||
auto SetRegAsDefined = [&RegMask] (unsigned Reg) {
|
||||
RegMask[Reg / 32] &= ~(1u << Reg % 32);
|
||||
|
@ -110,11 +117,15 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
|
|||
// Scan all the physical registers. When a register is defined in the current
|
||||
// function set it and all the aliasing registers as defined in the regmask.
|
||||
for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
|
||||
// Don't count registers that are saved and restored.
|
||||
if (SavedRegs.test(PReg))
|
||||
continue;
|
||||
// If a register is defined by an instruction mark it as defined together
|
||||
// with all it's aliases.
|
||||
// with all it's unsaved aliases.
|
||||
if (!MRI->def_empty(PReg)) {
|
||||
for (MCRegAliasIterator AI(PReg, TRI, true); AI.isValid(); ++AI)
|
||||
SetRegAsDefined(*AI);
|
||||
if (!SavedRegs.test(*AI))
|
||||
SetRegAsDefined(*AI);
|
||||
continue;
|
||||
}
|
||||
// If a register is in the UsedPhysRegsMask set then mark it as defined.
|
||||
|
@ -124,15 +135,7 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
|
|||
SetRegAsDefined(PReg);
|
||||
}
|
||||
|
||||
if (!TargetFrameLowering::isSafeForNoCSROpt(F)) {
|
||||
const uint32_t *CallPreservedMask =
|
||||
TRI->getCallPreservedMask(MF, F.getCallingConv());
|
||||
if (CallPreservedMask) {
|
||||
// Set callee saved register as preserved.
|
||||
for (unsigned i = 0; i < RegMaskSize; ++i)
|
||||
RegMask[i] = RegMask[i] | CallPreservedMask[i];
|
||||
}
|
||||
} else {
|
||||
if (TargetFrameLowering::isSafeForNoCSROpt(F)) {
|
||||
++NumCSROpt;
|
||||
LLVM_DEBUG(dbgs() << MF.getName()
|
||||
<< " function optimized for not having CSR.\n");
|
||||
|
@ -148,3 +151,48 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
|
|||
|
||||
return false;
|
||||
}
|
||||
|
||||
void RegUsageInfoCollector::
|
||||
computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) {
|
||||
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
|
||||
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
|
||||
|
||||
// Target will return the set of registers that it saves/restores as needed.
|
||||
SavedRegs.clear();
|
||||
TFI->determineCalleeSaves(MF, SavedRegs);
|
||||
|
||||
// Insert subregs.
|
||||
const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
|
||||
for (unsigned i = 0; CSRegs[i]; ++i) {
|
||||
unsigned Reg = CSRegs[i];
|
||||
if (SavedRegs.test(Reg))
|
||||
for (MCSubRegIterator SR(Reg, TRI, false); SR.isValid(); ++SR)
|
||||
SavedRegs.set(*SR);
|
||||
}
|
||||
|
||||
// Insert any register fully saved via subregisters.
|
||||
for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
|
||||
if (SavedRegs.test(PReg))
|
||||
continue;
|
||||
|
||||
// Check if PReg is fully covered by its subregs.
|
||||
bool CoveredBySubRegs = false;
|
||||
for (const TargetRegisterClass *RC : TRI->regclasses())
|
||||
if (RC->CoveredBySubRegs && RC->contains(PReg)) {
|
||||
CoveredBySubRegs = true;
|
||||
break;
|
||||
}
|
||||
if (!CoveredBySubRegs)
|
||||
continue;
|
||||
|
||||
// Add PReg to SavedRegs if all subregs are saved.
|
||||
bool AllSubRegsSaved = true;
|
||||
for (MCSubRegIterator SR(PReg, TRI, false); SR.isValid(); ++SR)
|
||||
if (!SavedRegs.test(*SR)) {
|
||||
AllSubRegsSaved = false;
|
||||
break;
|
||||
}
|
||||
if (AllSubRegsSaved)
|
||||
SavedRegs.set(PReg);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
; Test that the updated regmask on the call to @fun1 preserves %r14 and
|
||||
; %15. @fun1 will save and restore these registers since it contains a call.
|
||||
;
|
||||
; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -enable-ipra -print-regmask-num-regs=-1 \
|
||||
; RUN: -debug-only=ip-regalloc 2>&1 < %s | FileCheck --check-prefix=DBG %s
|
||||
; REQUIRES: asserts
|
||||
;
|
||||
; DBG: fun1 function optimized for not having CSR
|
||||
; DBG: Call Instruction After Register Usage Info Propagation : CallBRASL @fun1{{.*}} $r14d $r15d
|
||||
|
||||
declare dso_local fastcc signext i32 @foo(i16*, i32 signext) unnamed_addr
|
||||
|
||||
define internal fastcc void @fun1(i16* %arg, i16* nocapture %arg1) unnamed_addr #0 {
|
||||
bb:
|
||||
%tmp = load i16, i16* undef, align 2
|
||||
%tmp2 = shl i16 %tmp, 4
|
||||
%tmp3 = tail call fastcc signext i32 @foo(i16* nonnull %arg, i32 signext 5)
|
||||
%tmp4 = or i16 0, %tmp2
|
||||
%tmp5 = or i16 %tmp4, 0
|
||||
store i16 %tmp5, i16* undef, align 2
|
||||
%tmp6 = getelementptr inbounds i16, i16* %arg, i64 5
|
||||
%tmp7 = load i16, i16* %tmp6, align 2
|
||||
store i16 %tmp7, i16* %arg1, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
define fastcc void @fun0(i8* nocapture readonly %arg, i16* nocapture %arg1, i32 signext %arg2) unnamed_addr {
|
||||
bb:
|
||||
%a = alloca i8, i64 undef
|
||||
call fastcc void @fun1(i16* nonnull undef, i16* %arg1)
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { norecurse nounwind "no-frame-pointer-elim"="false" }
|
Loading…
Reference in New Issue