forked from OSchip/llvm-project
[MachineOutliner][NFC] Make Candidates own their call information
Before this, TCI contained all the call information for each Candidate. This moves that information onto the Candidates. As a result, each Candidate can now supply how it ought to be called. Thus, Candidates will be able to, say, call the same function in cheaper ways when possible. This also removes that information from TCI, since it's no longer used there. A follow-up patch for the AArch64 outliner will demonstrate this. llvm-svn: 337840
This commit is contained in:
parent
1cc52a0079
commit
fca55129b1
|
@ -39,27 +39,18 @@ struct TargetCostInfo {
|
|||
/// widely in size, so just counting the instructions isn't very useful.)
|
||||
unsigned SequenceSize;
|
||||
|
||||
/// Number of instructions to call an outlined function for this candidate.
|
||||
unsigned CallOverhead;
|
||||
|
||||
/// Number of instructions to construct an outlined function frame
|
||||
/// for this candidate.
|
||||
unsigned FrameOverhead;
|
||||
|
||||
/// Represents the specific instructions that must be emitted to
|
||||
/// construct a call to this candidate.
|
||||
unsigned CallConstructionID;
|
||||
|
||||
/// Represents the specific instructions that must be emitted to
|
||||
/// construct a frame for this candidate's outlined function.
|
||||
unsigned FrameConstructionID;
|
||||
|
||||
TargetCostInfo() {}
|
||||
TargetCostInfo(unsigned SequenceSize, unsigned CallOverhead,
|
||||
unsigned FrameOverhead, unsigned CallConstructionID,
|
||||
TargetCostInfo(unsigned SequenceSize, unsigned FrameOverhead,
|
||||
unsigned FrameConstructionID)
|
||||
: SequenceSize(SequenceSize), CallOverhead(CallOverhead),
|
||||
FrameOverhead(FrameOverhead), CallConstructionID(CallConstructionID),
|
||||
: SequenceSize(SequenceSize), FrameOverhead(FrameOverhead),
|
||||
FrameConstructionID(FrameConstructionID) {}
|
||||
};
|
||||
|
||||
|
@ -82,6 +73,10 @@ private:
|
|||
// The basic block that contains this Candidate.
|
||||
MachineBasicBlock *MBB;
|
||||
|
||||
/// Cost of calling an outlined function from this point as defined by the
|
||||
/// target.
|
||||
unsigned CallOverhead;
|
||||
|
||||
public:
|
||||
/// The index of this \p Candidate's \p OutlinedFunction in the list of
|
||||
/// \p OutlinedFunctions.
|
||||
|
@ -90,8 +85,9 @@ public:
|
|||
/// Set to false if the candidate overlapped with another candidate.
|
||||
bool InCandidateList = true;
|
||||
|
||||
/// Contains all target-specific information for this \p Candidate.
|
||||
TargetCostInfo TCI;
|
||||
/// Identifier denoting the instructions to emit to call an outlined function
|
||||
/// from this point. Defined by the target.
|
||||
unsigned CallConstructionID;
|
||||
|
||||
/// Contains physical register liveness information for the MBB containing
|
||||
/// this \p Candidate.
|
||||
|
@ -109,6 +105,18 @@ public:
|
|||
/// Return the end index of this candidate.
|
||||
unsigned getEndIdx() const { return StartIdx + Len - 1; }
|
||||
|
||||
/// Set the CallConstructionID and CallOverhead of this candidate to CID and
|
||||
/// CO respectively.
|
||||
void setCallInfo(unsigned CID, unsigned CO) {
|
||||
CallConstructionID = CID;
|
||||
CallOverhead = CO;
|
||||
}
|
||||
|
||||
/// Returns the call overhead of this candidate if it is in the list.
|
||||
unsigned getCallOverhead() const {
|
||||
return InCandidateList ? CallOverhead : 0;
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator &front() { return FirstInst; }
|
||||
MachineBasicBlock::iterator &back() { return LastInst; }
|
||||
MachineFunction *getMF() const { return MBB->getParent(); }
|
||||
|
@ -193,8 +201,10 @@ public:
|
|||
/// Return the number of bytes it would take to outline this
|
||||
/// function.
|
||||
unsigned getOutliningCost() {
|
||||
return (OccurrenceCount * TCI.CallOverhead) + TCI.SequenceSize +
|
||||
TCI.FrameOverhead;
|
||||
unsigned CallOverhead = 0;
|
||||
for (std::shared_ptr<Candidate> &C : Candidates)
|
||||
CallOverhead += C->getCallOverhead();
|
||||
return CallOverhead + TCI.SequenceSize + TCI.FrameOverhead;
|
||||
}
|
||||
|
||||
/// Return the size in bytes of the unoutlined sequences.
|
||||
|
@ -217,10 +227,8 @@ public:
|
|||
Candidates.push_back(std::make_shared<outliner::Candidate>(C));
|
||||
|
||||
unsigned B = getBenefit();
|
||||
for (std::shared_ptr<Candidate> &C : Candidates) {
|
||||
for (std::shared_ptr<Candidate> &C : Candidates)
|
||||
C->Benefit = B;
|
||||
C->TCI = TCI;
|
||||
}
|
||||
}
|
||||
};
|
||||
} // namespace outliner
|
||||
|
|
|
@ -1637,7 +1637,7 @@ public:
|
|||
virtual MachineBasicBlock::iterator
|
||||
insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator &It, MachineFunction &MF,
|
||||
const outliner::TargetCostInfo &TCI) const {
|
||||
const outliner::Candidate &C) const {
|
||||
llvm_unreachable(
|
||||
"Target didn't implement TargetInstrInfo::insertOutlinedCall!");
|
||||
}
|
||||
|
|
|
@ -1279,7 +1279,7 @@ bool MachineOutliner::outline(
|
|||
const TargetInstrInfo &TII = *STI.getInstrInfo();
|
||||
|
||||
// Insert a call to the new function and erase the old sequence.
|
||||
auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *OF.MF, C.TCI);
|
||||
auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *OF.MF, C);
|
||||
|
||||
// If the caller tracks liveness, then we need to make sure that anything
|
||||
// we outline doesn't break liveness assumptions.
|
||||
|
|
|
@ -4936,10 +4936,6 @@ AArch64InstrInfo::getOutliningCandidateInfo(
|
|||
0, [this](unsigned Sum, const MachineInstr &MI) {
|
||||
return Sum + getInstSizeInBytes(MI);
|
||||
});
|
||||
unsigned CallID = MachineOutlinerDefault;
|
||||
unsigned FrameID = MachineOutlinerDefault;
|
||||
unsigned NumBytesForCall = 12;
|
||||
unsigned NumBytesToCreateFrame = 4;
|
||||
|
||||
// Compute liveness information for each candidate.
|
||||
const TargetRegisterInfo &TRI = getRegisterInfo();
|
||||
|
@ -4976,21 +4972,29 @@ AArch64InstrInfo::getOutliningCandidateInfo(
|
|||
|
||||
unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
|
||||
|
||||
// Helper lambda which sets call information for every candidate.
|
||||
auto SetCandidateCallInfo =
|
||||
[&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
|
||||
for (outliner::Candidate &C : RepeatedSequenceLocs)
|
||||
C.setCallInfo(CallID, NumBytesForCall);
|
||||
};
|
||||
|
||||
unsigned FrameID = MachineOutlinerDefault;
|
||||
unsigned NumBytesToCreateFrame = 4;
|
||||
|
||||
// If the last instruction in any candidate is a terminator, then we should
|
||||
// tail call all of the candidates.
|
||||
if (RepeatedSequenceLocs[0].back()->isTerminator()) {
|
||||
CallID = MachineOutlinerTailCall;
|
||||
FrameID = MachineOutlinerTailCall;
|
||||
NumBytesForCall = 4;
|
||||
NumBytesToCreateFrame = 0;
|
||||
SetCandidateCallInfo(MachineOutlinerTailCall, 4);
|
||||
}
|
||||
|
||||
else if (LastInstrOpcode == AArch64::BL || LastInstrOpcode == AArch64::BLR) {
|
||||
// FIXME: Do we need to check if the code after this uses the value of LR?
|
||||
CallID = MachineOutlinerThunk;
|
||||
FrameID = MachineOutlinerThunk;
|
||||
NumBytesForCall = 4;
|
||||
NumBytesToCreateFrame = 0;
|
||||
SetCandidateCallInfo(MachineOutlinerThunk, 4);
|
||||
}
|
||||
|
||||
// Make sure that LR isn't live on entry to this candidate. The only
|
||||
|
@ -5002,10 +5006,16 @@ AArch64InstrInfo::getOutliningCandidateInfo(
|
|||
[](outliner::Candidate &C) {
|
||||
return C.LRU.available(AArch64::LR);
|
||||
})) {
|
||||
CallID = MachineOutlinerNoLRSave;
|
||||
FrameID = MachineOutlinerNoLRSave;
|
||||
NumBytesForCall = 4;
|
||||
NumBytesToCreateFrame = 4;
|
||||
SetCandidateCallInfo(MachineOutlinerNoLRSave, 4);
|
||||
}
|
||||
|
||||
// LR is live, so we need to save it to the stack.
|
||||
else {
|
||||
FrameID = MachineOutlinerDefault;
|
||||
NumBytesToCreateFrame = 4;
|
||||
SetCandidateCallInfo(MachineOutlinerDefault, 12);
|
||||
}
|
||||
|
||||
// Check if the range contains a call. These require a save + restore of the
|
||||
|
@ -5024,8 +5034,7 @@ AArch64InstrInfo::getOutliningCandidateInfo(
|
|||
RepeatedSequenceLocs[0].back()->isCall())
|
||||
NumBytesToCreateFrame += 8;
|
||||
|
||||
return outliner::TargetCostInfo(SequenceSize, NumBytesForCall,
|
||||
NumBytesToCreateFrame, CallID, FrameID);
|
||||
return outliner::TargetCostInfo(SequenceSize, NumBytesToCreateFrame, FrameID);
|
||||
}
|
||||
|
||||
bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
|
||||
|
@ -5420,10 +5429,10 @@ void AArch64InstrInfo::buildOutlinedFrame(
|
|||
|
||||
MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
|
||||
Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
|
||||
MachineFunction &MF, const outliner::TargetCostInfo &TCI) const {
|
||||
MachineFunction &MF, const outliner::Candidate &C) const {
|
||||
|
||||
// Are we tail calling?
|
||||
if (TCI.CallConstructionID == MachineOutlinerTailCall) {
|
||||
if (C.CallConstructionID == MachineOutlinerTailCall) {
|
||||
// If yes, then we can just branch to the label.
|
||||
It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
|
||||
.addGlobalAddress(M.getNamedValue(MF.getName()))
|
||||
|
@ -5432,8 +5441,8 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
|
|||
}
|
||||
|
||||
// Are we saving the link register?
|
||||
if (TCI.CallConstructionID == MachineOutlinerNoLRSave ||
|
||||
TCI.CallConstructionID == MachineOutlinerThunk) {
|
||||
if (C.CallConstructionID == MachineOutlinerNoLRSave ||
|
||||
C.CallConstructionID == MachineOutlinerThunk) {
|
||||
// No, so just insert the call.
|
||||
It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
|
||||
.addGlobalAddress(M.getNamedValue(MF.getName())));
|
||||
|
|
|
@ -248,7 +248,7 @@ public:
|
|||
MachineBasicBlock::iterator
|
||||
insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator &It, MachineFunction &MF,
|
||||
const outliner::TargetCostInfo &TCI) const override;
|
||||
const outliner::Candidate &C) const override;
|
||||
/// Returns true if the instruction sets to an immediate value that can be
|
||||
/// executed more efficiently.
|
||||
bool isExynosResetFast(const MachineInstr &MI) const;
|
||||
|
|
|
@ -7557,30 +7557,35 @@ enum MachineOutlinerClass {
|
|||
MachineOutlinerTailCall
|
||||
};
|
||||
|
||||
outliner::TargetCostInfo
|
||||
X86InstrInfo::getOutliningCandidateInfo(
|
||||
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
|
||||
unsigned SequenceSize = std::accumulate(
|
||||
RepeatedSequenceLocs[0].front(), std::next(RepeatedSequenceLocs[0].back()),
|
||||
0, [](unsigned Sum, const MachineInstr &MI) {
|
||||
// FIXME: x86 doesn't implement getInstSizeInBytes, so we can't
|
||||
// tell the cost. Just assume each instruction is one byte.
|
||||
if (MI.isDebugInstr() || MI.isKill())
|
||||
return Sum;
|
||||
return Sum + 1;
|
||||
});
|
||||
outliner::TargetCostInfo X86InstrInfo::getOutliningCandidateInfo(
|
||||
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
|
||||
unsigned SequenceSize =
|
||||
std::accumulate(RepeatedSequenceLocs[0].front(),
|
||||
std::next(RepeatedSequenceLocs[0].back()), 0,
|
||||
[](unsigned Sum, const MachineInstr &MI) {
|
||||
// FIXME: x86 doesn't implement getInstSizeInBytes, so
|
||||
// we can't tell the cost. Just assume each instruction
|
||||
// is one byte.
|
||||
if (MI.isDebugInstr() || MI.isKill())
|
||||
return Sum;
|
||||
return Sum + 1;
|
||||
});
|
||||
|
||||
// FIXME: Use real size in bytes for call and ret instructions.
|
||||
if (RepeatedSequenceLocs[0].back()->isTerminator())
|
||||
return outliner::TargetCostInfo(SequenceSize,
|
||||
1, // Number of bytes to emit call.
|
||||
0, // Number of bytes to emit frame.
|
||||
MachineOutlinerTailCall, // Type of call.
|
||||
MachineOutlinerTailCall // Type of frame.
|
||||
);
|
||||
if (RepeatedSequenceLocs[0].back()->isTerminator()) {
|
||||
for (outliner::Candidate &C : RepeatedSequenceLocs)
|
||||
C.setCallInfo(MachineOutlinerTailCall, 1);
|
||||
|
||||
return outliner::TargetCostInfo(SequenceSize, 1, 1, MachineOutlinerDefault,
|
||||
MachineOutlinerDefault);
|
||||
return outliner::TargetCostInfo(SequenceSize,
|
||||
0, // Number of bytes to emit frame.
|
||||
MachineOutlinerTailCall // Type of frame.
|
||||
);
|
||||
}
|
||||
|
||||
for (outliner::Candidate &C : RepeatedSequenceLocs)
|
||||
C.setCallInfo(MachineOutlinerDefault, 1);
|
||||
|
||||
return outliner::TargetCostInfo(SequenceSize, 1, MachineOutlinerDefault);
|
||||
}
|
||||
|
||||
bool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF,
|
||||
|
@ -7683,9 +7688,9 @@ MachineBasicBlock::iterator
|
|||
X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator &It,
|
||||
MachineFunction &MF,
|
||||
const outliner::TargetCostInfo &TCI) const {
|
||||
const outliner::Candidate &C) const {
|
||||
// Is it a tail call?
|
||||
if (TCI.CallConstructionID == MachineOutlinerTailCall) {
|
||||
if (C.CallConstructionID == MachineOutlinerTailCall) {
|
||||
// Yes, just insert a JMP.
|
||||
It = MBB.insert(It,
|
||||
BuildMI(MF, DebugLoc(), get(X86::JMP_1))
|
||||
|
|
|
@ -559,7 +559,7 @@ public:
|
|||
MachineBasicBlock::iterator
|
||||
insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator &It, MachineFunction &MF,
|
||||
const outliner::TargetCostInfo &TCI) const override;
|
||||
const outliner::Candidate &C) const override;
|
||||
|
||||
protected:
|
||||
/// Commutes the operands in the given instruction by changing the operands
|
||||
|
|
Loading…
Reference in New Issue