[MachineOutliner][AArch64] Add support for saving LR to a register

This teaches the outliner to save LR to a register rather than the stack when
possible. This allows us to avoid bumping the stack in outlined functions in
some cases. By doing this, in a later patch, we can teach the outliner to do
something like this:

f1:
  ...
  bl OUTLINED_FUNCTION
  ...

f2:
  ...
  move LR's contents to a register
  bl OUTLINED_FUNCTION
  move the register's contents back

instead of falling back to saving LR in both cases.

llvm-svn: 338278
This commit is contained in:
Jessica Paquette 2018-07-30 17:45:28 +00:00
parent 8692e142b3
commit fa3bee4756
6 changed files with 301 additions and 96 deletions

View File

@ -19,6 +19,7 @@
#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/LivePhysRegs.h"
namespace llvm {
namespace outliner {
@ -74,6 +75,13 @@ public:
/// cost model information.
LiveRegUnits LRU;
/// Contains the accumulated register liveness information for the
/// instructions in this \p Candidate.
///
/// This is optionally used by the target to determine which registers have
/// been used across the sequence.
LiveRegUnits UsedInSequence;
/// Return the number of instructions in this Candidate.
unsigned getLength() const { return Len; }
@ -137,6 +145,12 @@ public:
// outlining candidate.
std::for_each(MBB->rbegin(), (MachineBasicBlock::reverse_iterator)front(),
[this](MachineInstr &MI) { LRU.stepBackward(MI); });
// Walk over the sequence itself and figure out which registers were used
// in the sequence.
UsedInSequence.init(TRI);
std::for_each(front(), std::next(back()),
[this](MachineInstr &MI) { UsedInSequence.accumulate(MI); });
}
};

View File

@ -4851,75 +4851,92 @@ AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
return makeArrayRef(TargetFlags);
}
/// Constants defining how certain sequences should be outlined.
/// This encompasses how an outlined function should be called, and what kind of
/// frame should be emitted for that outlined function.
///
/// \p MachineOutlinerDefault implies that the function should be called with
/// a save and restore of LR to the stack.
///
/// That is,
///
/// I1 Save LR OUTLINED_FUNCTION:
/// I2 --> BL OUTLINED_FUNCTION I1
/// I3 Restore LR I2
/// I3
/// RET
///
/// * Call construction overhead: 3 (save + BL + restore)
/// * Frame construction overhead: 1 (ret)
/// * Requires stack fixups? Yes
///
/// \p MachineOutlinerTailCall implies that the function is being created from
/// a sequence of instructions ending in a return.
///
/// That is,
///
/// I1 OUTLINED_FUNCTION:
/// I2 --> B OUTLINED_FUNCTION I1
/// RET I2
/// RET
///
/// * Call construction overhead: 1 (B)
/// * Frame construction overhead: 0 (Return included in sequence)
/// * Requires stack fixups? No
///
/// \p MachineOutlinerNoLRSave implies that the function should be called using
/// a BL instruction, but doesn't require LR to be saved and restored. This
/// happens when LR is known to be dead.
///
/// That is,
///
/// I1 OUTLINED_FUNCTION:
/// I2 --> BL OUTLINED_FUNCTION I1
/// I3 I2
/// I3
/// RET
///
/// * Call construction overhead: 1 (BL)
/// * Frame construction overhead: 1 (RET)
/// * Requires stack fixups? No
///
/// \p MachineOutlinerThunk implies that the function is being created from
/// a sequence of instructions ending in a call. The outlined function is
/// called with a BL instruction, and the outlined function tail-calls the
/// original call destination.
///
/// That is,
///
/// I1 OUTLINED_FUNCTION:
/// I2 --> BL OUTLINED_FUNCTION I1
/// BL f I2
/// B f
/// * Call construction overhead: 1 (BL)
/// * Frame construction overhead: 0
/// * Requires stack fixups? No
///
/// Constants defining how certain sequences should be outlined.
/// This encompasses how an outlined function should be called, and what kind of
/// frame should be emitted for that outlined function.
///
/// \p MachineOutlinerDefault implies that the function should be called with
/// a save and restore of LR to the stack.
///
/// That is,
///
/// I1 Save LR OUTLINED_FUNCTION:
/// I2 --> BL OUTLINED_FUNCTION I1
/// I3 Restore LR I2
/// I3
/// RET
///
/// * Call construction overhead: 3 (save + BL + restore)
/// * Frame construction overhead: 1 (ret)
/// * Requires stack fixups? Yes
///
/// \p MachineOutlinerTailCall implies that the function is being created from
/// a sequence of instructions ending in a return.
///
/// That is,
///
/// I1 OUTLINED_FUNCTION:
/// I2 --> B OUTLINED_FUNCTION I1
/// RET I2
/// RET
///
/// * Call construction overhead: 1 (B)
/// * Frame construction overhead: 0 (Return included in sequence)
/// * Requires stack fixups? No
///
/// \p MachineOutlinerNoLRSave implies that the function should be called using
/// a BL instruction, but doesn't require LR to be saved and restored. This
/// happens when LR is known to be dead.
///
/// That is,
///
/// I1 OUTLINED_FUNCTION:
/// I2 --> BL OUTLINED_FUNCTION I1
/// I3 I2
/// I3
/// RET
///
/// * Call construction overhead: 1 (BL)
/// * Frame construction overhead: 1 (RET)
/// * Requires stack fixups? No
///
/// \p MachineOutlinerThunk implies that the function is being created from
/// a sequence of instructions ending in a call. The outlined function is
/// called with a BL instruction, and the outlined function tail-calls the
/// original call destination.
///
/// That is,
///
/// I1 OUTLINED_FUNCTION:
/// I2 --> BL OUTLINED_FUNCTION I1
/// BL f I2
/// B f
/// * Call construction overhead: 1 (BL)
/// * Frame construction overhead: 0
/// * Requires stack fixups? No
///
/// \p MachineOutlinerRegSave implies that the function should be called with a
/// save and restore of LR to an available register. This allows us to avoid
/// stack fixups. Note that this outlining variant is compatible with the
/// NoLRSave case.
///
/// That is,
///
/// I1 Save LR OUTLINED_FUNCTION:
/// I2 --> BL OUTLINED_FUNCTION I1
/// I3 Restore LR I2
/// I3
/// RET
///
/// * Call construction overhead: 3 (save + BL + restore)
/// * Frame construction overhead: 1 (ret)
/// * Requires stack fixups? No
enum MachineOutlinerClass {
MachineOutlinerDefault, /// Emit a save, restore, call, and return.
MachineOutlinerTailCall, /// Only emit a branch.
MachineOutlinerNoLRSave, /// Emit a call and return.
MachineOutlinerThunk, /// Emit a call and tail-call.
MachineOutlinerRegSave /// Same as default, but save to a register.
};
enum MachineOutlinerMBBFlags {
@ -4927,6 +4944,27 @@ enum MachineOutlinerMBBFlags {
HasCalls = 0x4
};
unsigned
AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
MachineFunction *MF = C.getMF();
const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
MF->getSubtarget().getRegisterInfo());
// Check if there is an available register across the sequence that we can
// use.
for (unsigned Reg : AArch64::GPR64RegClass) {
if (!ARI->isReservedReg(*MF, Reg) &&
Reg != AArch64::LR && // LR is not reserved, but don't use it.
Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
Reg != AArch64::X17 && // Ditto for X17.
C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
return Reg;
}
// No suitable register. Return 0.
return 0u;
}
outliner::OutlinedFunction
AArch64InstrInfo::getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
@ -5015,11 +5053,27 @@ AArch64InstrInfo::getOutliningCandidateInfo(
SetCandidateCallInfo(MachineOutlinerNoLRSave, 4);
}
// LR is live, so we need to save it to the stack.
// LR is live, so we need to save it. Decide whether it should be saved to
// the stack, or if it can be saved to a register.
else {
FrameID = MachineOutlinerDefault;
NumBytesToCreateFrame = 4;
SetCandidateCallInfo(MachineOutlinerDefault, 12);
if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
[this](outliner::Candidate &C) {
return findRegisterToSaveLRTo(C);
})) {
// Every candidate has an available callee-saved register for the save.
// We can save LR to a register.
FrameID = MachineOutlinerRegSave;
NumBytesToCreateFrame = 4;
SetCandidateCallInfo(MachineOutlinerRegSave, 12);
}
else {
// At least one candidate does not have an available callee-saved
// register. We must save LR to the stack.
FrameID = MachineOutlinerDefault;
NumBytesToCreateFrame = 4;
SetCandidateCallInfo(MachineOutlinerDefault, 12);
}
}
// Check if the range contains a call. These require a save + restore of the
@ -5424,7 +5478,7 @@ void AArch64InstrInfo::buildOutlinedFrame(
MBB.insert(MBB.end(), ret);
// Did we have to modify the stack by saving the link register?
if (OF.FrameConstructionID == MachineOutlinerNoLRSave)
if (OF.FrameConstructionID != MachineOutlinerDefault)
return;
// We modified the stack.
@ -5457,13 +5511,41 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
// We want to return the spot where we inserted the call.
MachineBasicBlock::iterator CallPt;
// We have a default call. Save the link register.
MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
.addReg(AArch64::SP, RegState::Define)
.addReg(AArch64::LR)
.addReg(AArch64::SP)
.addImm(-16);
It = MBB.insert(It, STRXpre);
// Instructions for saving and restoring LR around the call instruction we're
// going to insert.
MachineInstr *Save;
MachineInstr *Restore;
// Can we save to a register?
if (C.CallConstructionID == MachineOutlinerRegSave) {
// FIXME: This logic should be sunk into a target-specific interface so that
// we don't have to recompute the register.
unsigned Reg = findRegisterToSaveLRTo(C);
assert(Reg != 0 && "No callee-saved register available?");
// Save and restore LR from that register.
Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
.addReg(AArch64::XZR)
.addReg(AArch64::LR)
.addImm(0);
Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
.addReg(AArch64::XZR)
.addReg(Reg)
.addImm(0);
} else {
// We have the default case. Save and restore from SP.
Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
.addReg(AArch64::SP, RegState::Define)
.addReg(AArch64::LR)
.addReg(AArch64::SP)
.addImm(-16);
Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
.addReg(AArch64::SP, RegState::Define)
.addReg(AArch64::LR, RegState::Define)
.addReg(AArch64::SP)
.addImm(16);
}
It = MBB.insert(It, Save);
It++;
// Insert the call.
@ -5472,14 +5554,7 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
CallPt = It;
It++;
// Restore the link register.
MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
.addReg(AArch64::SP, RegState::Define)
.addReg(AArch64::LR, RegState::Define)
.addReg(AArch64::SP)
.addImm(16);
It = MBB.insert(It, LDRXpost);
It = MBB.insert(It, Restore);
return CallPt;
}

View File

@ -272,6 +272,10 @@ private:
ArrayRef<MachineOperand> Cond) const;
bool substituteCmpToZero(MachineInstr &CmpInstr, unsigned SrcReg,
const MachineRegisterInfo *MRI) const;
/// Returns an unused general-purpose register which can be used for
/// constructing an outlined call if one exists. Returns 0 otherwise.
unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const;
};
/// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg

View File

@ -0,0 +1,112 @@
# RUN: llc -mtriple=aarch64-apple-darwin -run-pass=prologepilog \
# RUN: -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s
# Check that we save LR to a callee-saved register when possible.
# foo() should use a callee-saved register. However, bar() should not.
--- |
define void @foo() #0 {
ret void
}
define void @bar() #0 {
ret void
}
attributes #0 = { minsize noinline noredzone "no-frame-pointer-elim"="true" }
...
---
# Make sure that when we outline and a register is available, we
# use it to save + restore LR instead of SP.
# CHECK: name: foo
# CHECK-DAG: bb.0
# CHECK-DAG: $x[[REG:[0-9]+]] = ORRXrs $xzr, $lr, 0
# CHECK-NEXT: BL
# CHECK-NEXT: $lr = ORRXrs $xzr, $x[[REG]], 0
# CHECK-DAG: bb.1
# CHECK-DAG: $x[[REG]] = ORRXrs $xzr, $lr, 0
# CHECK-NEXT: BL
# CHECK-NEXT: $lr = ORRXrs $xzr, $x[[REG]], 0
# CHECK-DAG: bb.2
# CHECK-DAG: $x[[REG]] = ORRXrs $xzr, $lr, 0
# CHECK-NEXT: BL
# CHECK-NEXT: $lr = ORRXrs $xzr, $x[[REG]], 0
name: foo
tracksRegLiveness: true
fixedStack:
body: |
bb.0:
liveins: $lr, $w9
$x25 = ORRXri $xzr, 1
$w9 = ORRWri $wzr, 1
$w9 = ORRWri $wzr, 1
$w9 = ORRWri $wzr, 1
$w9 = ORRWri $wzr, 1
$w9 = ORRWri $wzr, 1
$w9 = ORRWri $wzr, 2
bb.1:
liveins: $lr, $w9
$w9 = ORRWri $wzr, 1
$w9 = ORRWri $wzr, 1
$w9 = ORRWri $wzr, 1
$w9 = ORRWri $wzr, 1
$w9 = ORRWri $wzr, 1
$w9 = ORRWri $wzr, 2
bb.2:
liveins: $lr, $w9
$w9 = ORRWri $wzr, 1
$w9 = ORRWri $wzr, 1
$w9 = ORRWri $wzr, 1
$w9 = ORRWri $wzr, 1
$w9 = ORRWri $wzr, 1
$w9 = ORRWri $wzr, 2
RET undef $lr
...
---
# Convoluted case that shows that we'll still save to the stack when there are
# no approprate registers available.
# The live-in lists do not contain x16 or x17 since including them would cause
# nothing to be outlined.
# They also deliberately don't contain x18 to show that on Darwin we won't store
# to that.
# CHECK-LABEL: name: bar
# CHECK: early-clobber $sp = STRXpre $lr, $sp, -16
# CHECK-NEXT: BL
# CHECK-DAG: early-clobber $sp, $lr = LDRXpost $sp, 16
# CHECK: early-clobber $sp = STRXpre $lr, $sp, -16
# CHECK-NEXT: BL
# CHECK-DAG: early-clobber $sp, $lr = LDRXpost $sp, 16
# CHECK: early-clobber $sp = STRXpre $lr, $sp, -16
# CHECK-NEXT: BL
# CHECK-NEXT: early-clobber $sp, $lr = LDRXpost $sp, 16
name: bar
tracksRegLiveness: true
body: |
bb.0:
liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28
$w10 = ORRWri $wzr, 1
$w10 = ORRWri $wzr, 1
$w10 = ORRWri $wzr, 1
$w10 = ORRWri $wzr, 1
$w10 = ORRWri $wzr, 1
$w12 = ORRWri $wzr, 2
bb.1:
liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28
$w10 = ORRWri $wzr, 1
$w10 = ORRWri $wzr, 1
$w10 = ORRWri $wzr, 1
$w10 = ORRWri $wzr, 1
$w10 = ORRWri $wzr, 1
$w12 = ORRWri $wzr, 2
bb.2:
liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28
$w10 = ORRWri $wzr, 1
$w10 = ORRWri $wzr, 1
$w10 = ORRWri $wzr, 1
$w10 = ORRWri $wzr, 1
$w10 = ORRWri $wzr, 1
$w12 = ORRWri $wzr, 2
bb.3:
liveins: $lr, $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x19, $x20, $x21, $x22, $x23, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28
RET undef $lr

View File

@ -82,17 +82,17 @@ define void @dog() #0 {
; CHECK: .p2align 2
; CHECK-NEXT: [[OUTLINED]]:
; CHECK: orr w8, wzr, #0x1
; CHECK-NEXT: str w8, [sp, #44]
; CHECK-NEXT: orr w8, wzr, #0x2
; CHECK-NEXT: str w8, [sp, #40]
; CHECK-NEXT: orr w8, wzr, #0x3
; CHECK-NEXT: str w8, [sp, #36]
; CHECK-NEXT: orr w8, wzr, #0x4
; CHECK-NEXT: str w8, [sp, #32]
; CHECK-NEXT: mov w8, #5
; CHECK-NEXT: str w8, [sp, #28]
; CHECK-NEXT: orr w8, wzr, #0x6
; CHECK-NEXT: orr w8, wzr, #0x2
; CHECK-NEXT: str w8, [sp, #24]
; CHECK-NEXT: orr w8, wzr, #0x3
; CHECK-NEXT: str w8, [sp, #20]
; CHECK-NEXT: orr w8, wzr, #0x4
; CHECK-NEXT: str w8, [sp, #16]
; CHECK-NEXT: mov w8, #5
; CHECK-NEXT: str w8, [sp, #12]
; CHECK-NEXT: orr w8, wzr, #0x6
; CHECK-NEXT: str w8, [sp, #8]
; CHECK-NEXT: ret
attributes #0 = { noredzone "target-cpu"="cyclone" }

View File

@ -28,19 +28,19 @@
# CHECK-LABEL: name: main
# CHECK: BL @OUTLINED_FUNCTION_[[F0:[0-9]+]]
# CHECK-NEXT: early-clobber $sp, $lr = LDRXpost $sp, 16
# CHECK-NEXT: $lr = ORRXrs $xzr, $x[[REG:[0-9]+]], 0
# CHECK-NEXT: $x16 = ADDXri $sp, 48, 0
# CHECK-NEXT: STRHHroW $w16, $x9, $w30, 1, 1
# CHECK-NEXT: $lr = ORRXri $xzr, 1
# CHECK: BL @OUTLINED_FUNCTION_[[F0]]
# CHECK-NEXT: early-clobber $sp, $lr = LDRXpost $sp, 16
# CHECK-NEXT: $lr = ORRXrs $xzr, $x[[REG]], 0
# CHECK-NEXT: $x16 = ADDXri $sp, 48, 0
# CHECK-NEXT: STRHHroW $w16, $x9, $w30, 1, 1
# CHECK-NEXT: $lr = ORRXri $xzr, 1
# CHECK: BL @OUTLINED_FUNCTION_[[F0]]
# CHECK-NEXT: early-clobber $sp, $lr = LDRXpost $sp, 16
# CHECK-NEXT: $lr = ORRXrs $xzr, $x[[REG]], 0
# CHECK-NEXT: $x16 = ADDXri $sp, 48, 0
# CHECK-NEXT: STRHHroW $w16, $x9, $w30, 1, 1
# CHECK-NEXT: $lr = ORRXri $xzr, 1