forked from OSchip/llvm-project
[MachineOutliner] NFC: Split up getOutliningBenefit
This is some more cleanup in preparation for some actual functional changes. This splits getOutliningBenefit into two cost functions: getOutliningCallOverhead and getOutliningFrameOverhead. These functions return the number of instructions that would be required to call a specific function and the number of instructions that would be required to construct a frame for a specific funtion. The actual outlining benefit logic is moved into the outliner, which calls these functions. The goal of refactoring getOutliningBenefit is to: - Get us closer to getting rid of the IsTailCall flag - Further split up "target-specific" things and "general algorithm" things llvm-svn: 309356
This commit is contained in:
parent
75a001ba78
commit
809d708b8a
|
@ -55,7 +55,7 @@ class TargetRegisterInfo;
|
|||
class TargetSchedModel;
|
||||
class TargetSubtargetInfo;
|
||||
|
||||
template<class T> class SmallVectorImpl;
|
||||
template <class T> class SmallVectorImpl;
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
///
|
||||
|
@ -66,8 +66,7 @@ public:
|
|||
TargetInstrInfo(unsigned CFSetupOpcode = ~0u, unsigned CFDestroyOpcode = ~0u,
|
||||
unsigned CatchRetOpcode = ~0u, unsigned ReturnOpcode = ~0u)
|
||||
: CallFrameSetupOpcode(CFSetupOpcode),
|
||||
CallFrameDestroyOpcode(CFDestroyOpcode),
|
||||
CatchRetOpcode(CatchRetOpcode),
|
||||
CallFrameDestroyOpcode(CFDestroyOpcode), CatchRetOpcode(CatchRetOpcode),
|
||||
ReturnOpcode(ReturnOpcode) {}
|
||||
TargetInstrInfo(const TargetInstrInfo &) = delete;
|
||||
TargetInstrInfo &operator=(const TargetInstrInfo &) = delete;
|
||||
|
@ -79,8 +78,7 @@ public:
|
|||
|
||||
/// Given a machine instruction descriptor, returns the register
|
||||
/// class constraint for OpNum, or NULL.
|
||||
const TargetRegisterClass *getRegClass(const MCInstrDesc &TID,
|
||||
unsigned OpNum,
|
||||
const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
|
||||
const TargetRegisterInfo *TRI,
|
||||
const MachineFunction &MF) const;
|
||||
|
||||
|
@ -139,8 +137,7 @@ protected:
|
|||
/// the fixed result pair is equal to or equivalent to the source pair of
|
||||
/// indices: (CommutableOpIdx1, CommutableOpIdx2). It is assumed here that
|
||||
/// the pairs (x,y) and (y,x) are equivalent.
|
||||
static bool fixCommutedOpIndices(unsigned &ResultIdx1,
|
||||
unsigned &ResultIdx2,
|
||||
static bool fixCommutedOpIndices(unsigned &ResultIdx1, unsigned &ResultIdx2,
|
||||
unsigned CommutableOpIdx1,
|
||||
unsigned CommutableOpIdx2);
|
||||
|
||||
|
@ -164,7 +161,7 @@ public:
|
|||
/// Returns true if the argument is a frame pseudo instruction.
|
||||
bool isFrameInstr(const MachineInstr &I) const {
|
||||
return I.getOpcode() == getCallFrameSetupOpcode() ||
|
||||
I.getOpcode() == getCallFrameDestroyOpcode();
|
||||
I.getOpcode() == getCallFrameDestroyOpcode();
|
||||
}
|
||||
|
||||
/// Returns true if the argument is a frame setup pseudo instruction.
|
||||
|
@ -191,7 +188,8 @@ public:
|
|||
/// prior to the pair.
|
||||
int64_t getFrameTotalSize(const MachineInstr &I) const {
|
||||
if (isFrameSetup(I)) {
|
||||
assert(I.getOperand(1).getImm() >= 0 && "Frame size must not be negative");
|
||||
assert(I.getOperand(1).getImm() >= 0 &&
|
||||
"Frame size must not be negative");
|
||||
return getFrameSize(I) + I.getOperand(1).getImm();
|
||||
}
|
||||
return getFrameSize(I);
|
||||
|
@ -211,9 +209,8 @@ public:
|
|||
/// destination. e.g. X86::MOVSX64rr32. If this returns true, then it's
|
||||
/// expected the pre-extension value is available as a subreg of the result
|
||||
/// register. This also returns the sub-register index in SubIdx.
|
||||
virtual bool isCoalescableExtInstr(const MachineInstr &MI,
|
||||
unsigned &SrcReg, unsigned &DstReg,
|
||||
unsigned &SubIdx) const {
|
||||
virtual bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
|
||||
unsigned &DstReg, unsigned &SubIdx) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -315,9 +312,7 @@ public:
|
|||
/// MachineSink determines on its own whether the instruction is safe to sink;
|
||||
/// this gives the target a hook to override the default behavior with regards
|
||||
/// to which instructions should be sunk.
|
||||
virtual bool shouldSink(const MachineInstr &MI) const {
|
||||
return true;
|
||||
}
|
||||
virtual bool shouldSink(const MachineInstr &MI) const { return true; }
|
||||
|
||||
/// Re-issue the specified 'original' instruction at the
|
||||
/// specific location targeting a new destination register.
|
||||
|
@ -456,9 +451,8 @@ public:
|
|||
/// \note The generic implementation does not provide any support for
|
||||
/// MI.isExtractSubregLike(). In other words, one has to override
|
||||
/// getExtractSubregLikeInputs for target specific instructions.
|
||||
bool
|
||||
getExtractSubregInputs(const MachineInstr &MI, unsigned DefIdx,
|
||||
RegSubRegPairAndIdx &InputReg) const;
|
||||
bool getExtractSubregInputs(const MachineInstr &MI, unsigned DefIdx,
|
||||
RegSubRegPairAndIdx &InputReg) const;
|
||||
|
||||
/// Build the equivalent inputs of a INSERT_SUBREG for the given \p MI
|
||||
/// and \p DefIdx.
|
||||
|
@ -476,10 +470,9 @@ public:
|
|||
/// \note The generic implementation does not provide any support for
|
||||
/// MI.isInsertSubregLike(). In other words, one has to override
|
||||
/// getInsertSubregLikeInputs for target specific instructions.
|
||||
bool
|
||||
getInsertSubregInputs(const MachineInstr &MI, unsigned DefIdx,
|
||||
RegSubRegPair &BaseReg,
|
||||
RegSubRegPairAndIdx &InsertedReg) const;
|
||||
bool getInsertSubregInputs(const MachineInstr &MI, unsigned DefIdx,
|
||||
RegSubRegPair &BaseReg,
|
||||
RegSubRegPairAndIdx &InsertedReg) const;
|
||||
|
||||
/// Return true if two machine instructions would produce identical values.
|
||||
/// By default, this is only true when the two instructions
|
||||
|
@ -625,8 +618,8 @@ public:
|
|||
MachineBasicBlock *DestBB,
|
||||
const DebugLoc &DL,
|
||||
int *BytesAdded = nullptr) const {
|
||||
return insertBranch(MBB, DestBB, nullptr,
|
||||
ArrayRef<MachineOperand>(), DL, BytesAdded);
|
||||
return insertBranch(MBB, DestBB, nullptr, ArrayRef<MachineOperand>(), DL,
|
||||
BytesAdded);
|
||||
}
|
||||
|
||||
/// Analyze the loop code, return true if it cannot be understoo. Upon
|
||||
|
@ -641,8 +634,8 @@ public:
|
|||
/// finished. Return the value/register of the the new loop count. We need
|
||||
/// this function when peeling off one or more iterations of a loop. This
|
||||
/// function assumes the nth iteration is peeled first.
|
||||
virtual unsigned reduceLoopCount(MachineBasicBlock &MBB,
|
||||
MachineInstr *IndVar, MachineInstr &Cmp,
|
||||
virtual unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineInstr *IndVar,
|
||||
MachineInstr &Cmp,
|
||||
SmallVectorImpl<MachineOperand> &Cond,
|
||||
SmallVectorImpl<MachineInstr *> &PrevInsts,
|
||||
unsigned Iter, unsigned MaxIter) const {
|
||||
|
@ -667,10 +660,9 @@ public:
|
|||
/// of the specified basic block, where the probability of the instructions
|
||||
/// being executed is given by Probability, and Confidence is a measure
|
||||
/// of our confidence that it will be properly predicted.
|
||||
virtual
|
||||
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
|
||||
unsigned ExtraPredCycles,
|
||||
BranchProbability Probability) const {
|
||||
virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
|
||||
unsigned ExtraPredCycles,
|
||||
BranchProbability Probability) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -680,12 +672,11 @@ public:
|
|||
/// predicates, where the probability of the true path being taken is given
|
||||
/// by Probability, and Confidence is a measure of our confidence that it
|
||||
/// will be properly predicted.
|
||||
virtual bool
|
||||
isProfitableToIfCvt(MachineBasicBlock &TMBB,
|
||||
unsigned NumTCycles, unsigned ExtraTCycles,
|
||||
MachineBasicBlock &FMBB,
|
||||
unsigned NumFCycles, unsigned ExtraFCycles,
|
||||
BranchProbability Probability) const {
|
||||
virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTCycles,
|
||||
unsigned ExtraTCycles,
|
||||
MachineBasicBlock &FMBB, unsigned NumFCycles,
|
||||
unsigned ExtraFCycles,
|
||||
BranchProbability Probability) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -695,9 +686,9 @@ public:
|
|||
/// The probability of the instructions being executed is given by
|
||||
/// Probability, and Confidence is a measure of our confidence that it
|
||||
/// will be properly predicted.
|
||||
virtual bool
|
||||
isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
|
||||
BranchProbability Probability) const {
|
||||
virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
|
||||
unsigned NumCycles,
|
||||
BranchProbability Probability) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -735,9 +726,8 @@ public:
|
|||
/// @param TrueCycles Latency from TrueReg to select output.
|
||||
/// @param FalseCycles Latency from FalseReg to select output.
|
||||
virtual bool canInsertSelect(const MachineBasicBlock &MBB,
|
||||
ArrayRef<MachineOperand> Cond,
|
||||
unsigned TrueReg, unsigned FalseReg,
|
||||
int &CondCycles,
|
||||
ArrayRef<MachineOperand> Cond, unsigned TrueReg,
|
||||
unsigned FalseReg, int &CondCycles,
|
||||
int &TrueCycles, int &FalseCycles) const {
|
||||
return false;
|
||||
}
|
||||
|
@ -953,8 +943,7 @@ public:
|
|||
/// Set special operand attributes for new instructions after reassociation.
|
||||
virtual void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2,
|
||||
MachineInstr &NewMI1,
|
||||
MachineInstr &NewMI2) const {
|
||||
}
|
||||
MachineInstr &NewMI2) const {}
|
||||
|
||||
/// Return true when a target supports MachineCombiner.
|
||||
virtual bool useMachineCombiner() const { return false; }
|
||||
|
@ -1007,9 +996,9 @@ protected:
|
|||
/// \pre MI.isExtractSubregLike().
|
||||
///
|
||||
/// \see TargetInstrInfo::getExtractSubregInputs.
|
||||
virtual bool getExtractSubregLikeInputs(
|
||||
const MachineInstr &MI, unsigned DefIdx,
|
||||
RegSubRegPairAndIdx &InputReg) const {
|
||||
virtual bool getExtractSubregLikeInputs(const MachineInstr &MI,
|
||||
unsigned DefIdx,
|
||||
RegSubRegPairAndIdx &InputReg) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1040,7 +1029,7 @@ public:
|
|||
}
|
||||
|
||||
virtual bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
|
||||
SmallVectorImpl<SDNode*> &NewNodes) const {
|
||||
SmallVectorImpl<SDNode *> &NewNodes) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1050,9 +1039,9 @@ public:
|
|||
/// possible. If LoadRegIndex is non-null, it is filled in with the operand
|
||||
/// index of the operand which will hold the register holding the loaded
|
||||
/// value.
|
||||
virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
|
||||
bool UnfoldLoad, bool UnfoldStore,
|
||||
unsigned *LoadRegIndex = nullptr) const {
|
||||
virtual unsigned
|
||||
getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore,
|
||||
unsigned *LoadRegIndex = nullptr) const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1061,7 +1050,8 @@ public:
|
|||
/// pointers are the same and the only differences between the two addresses
|
||||
/// are the offset. It also returns the offsets by reference.
|
||||
virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
|
||||
int64_t &Offset1, int64_t &Offset2) const {
|
||||
int64_t &Offset1,
|
||||
int64_t &Offset2) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1115,8 +1105,8 @@ public:
|
|||
|
||||
/// Reverses the branch condition of the specified condition list,
|
||||
/// returning false on success and true if it cannot be reversed.
|
||||
virtual
|
||||
bool reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
|
||||
virtual bool
|
||||
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1128,14 +1118,10 @@ public:
|
|||
virtual void getNoop(MCInst &NopInst) const;
|
||||
|
||||
/// Return true for post-incremented instructions.
|
||||
virtual bool isPostIncrement(const MachineInstr &MI) const {
|
||||
return false;
|
||||
}
|
||||
virtual bool isPostIncrement(const MachineInstr &MI) const { return false; }
|
||||
|
||||
/// Returns true if the instruction is already predicated.
|
||||
virtual bool isPredicated(const MachineInstr &MI) const {
|
||||
return false;
|
||||
}
|
||||
virtual bool isPredicated(const MachineInstr &MI) const { return false; }
|
||||
|
||||
/// Returns true if the instruction is a
|
||||
/// terminator instruction that has not been predicated.
|
||||
|
@ -1147,9 +1133,8 @@ public:
|
|||
}
|
||||
|
||||
/// Returns true if the tail call can be made conditional on BranchCond.
|
||||
virtual bool
|
||||
canMakeTailCallConditional(SmallVectorImpl<MachineOperand> &Cond,
|
||||
const MachineInstr &TailCall) const {
|
||||
virtual bool canMakeTailCallConditional(SmallVectorImpl<MachineOperand> &Cond,
|
||||
const MachineInstr &TailCall) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1167,9 +1152,8 @@ public:
|
|||
|
||||
/// Returns true if the first specified predicate
|
||||
/// subsumes the second, e.g. GE subsumes GT.
|
||||
virtual
|
||||
bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
|
||||
ArrayRef<MachineOperand> Pred2) const {
|
||||
virtual bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
|
||||
ArrayRef<MachineOperand> Pred2) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1207,25 +1191,25 @@ public:
|
|||
|
||||
/// Allocate and return a hazard recognizer to use for this target when
|
||||
/// scheduling the machine instructions before register allocation.
|
||||
virtual ScheduleHazardRecognizer*
|
||||
virtual ScheduleHazardRecognizer *
|
||||
CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
|
||||
const ScheduleDAG *DAG) const;
|
||||
|
||||
/// Allocate and return a hazard recognizer to use for this target when
|
||||
/// scheduling the machine instructions before register allocation.
|
||||
virtual ScheduleHazardRecognizer*
|
||||
CreateTargetMIHazardRecognizer(const InstrItineraryData*,
|
||||
virtual ScheduleHazardRecognizer *
|
||||
CreateTargetMIHazardRecognizer(const InstrItineraryData *,
|
||||
const ScheduleDAG *DAG) const;
|
||||
|
||||
/// Allocate and return a hazard recognizer to use for this target when
|
||||
/// scheduling the machine instructions after register allocation.
|
||||
virtual ScheduleHazardRecognizer*
|
||||
CreateTargetPostRAHazardRecognizer(const InstrItineraryData*,
|
||||
virtual ScheduleHazardRecognizer *
|
||||
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *,
|
||||
const ScheduleDAG *DAG) const;
|
||||
|
||||
/// Allocate and return a hazard recognizer to use for by non-scheduling
|
||||
/// passes.
|
||||
virtual ScheduleHazardRecognizer*
|
||||
virtual ScheduleHazardRecognizer *
|
||||
CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const {
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -1502,7 +1486,7 @@ public:
|
|||
|
||||
/// \brief Return the value to use for the MachineCSE's LookAheadLimit,
|
||||
/// which is a heuristic used for CSE'ing phys reg defs.
|
||||
virtual unsigned getMachineCSELookAheadLimit () const {
|
||||
virtual unsigned getMachineCSELookAheadLimit() const {
|
||||
// The default lookahead is small to prevent unprofitable quadratic
|
||||
// behavior.
|
||||
return 5;
|
||||
|
@ -1569,13 +1553,24 @@ public:
|
|||
return false;
|
||||
}
|
||||
|
||||
/// \brief Return how many instructions would be saved by outlining a
|
||||
/// sequence containing \p SequenceSize instructions that appears
|
||||
/// \p Occurrences times in a module.
|
||||
virtual unsigned getOutliningBenefit(size_t SequenceSize, size_t Occurrences,
|
||||
bool CanBeTailCall) const {
|
||||
/// \brief Returns the number of instructions that will be taken to call a
|
||||
/// function defined by the sequence on the closed interval [ \p StartIt, \p
|
||||
/// EndIt].
|
||||
virtual size_t
|
||||
getOutliningCallOverhead(MachineBasicBlock::iterator &StartIt,
|
||||
MachineBasicBlock::iterator &EndIt) const {
|
||||
llvm_unreachable(
|
||||
"Target didn't implement TargetInstrInfo::getOutliningBenefit!");
|
||||
"Target didn't implement TargetInstrInfo::getOutliningCallOverhead!");
|
||||
}
|
||||
|
||||
/// \brief Returns the number of instructions that will be taken to construct
|
||||
/// an outlined function frame for a function defined on the closed interval
|
||||
/// [ \p StartIt, \p EndIt].
|
||||
virtual size_t
|
||||
getOutliningFrameOverhead(MachineBasicBlock::iterator &StartIt,
|
||||
MachineBasicBlock::iterator &EndIt) const {
|
||||
llvm_unreachable(
|
||||
"Target didn't implement TargetInstrInfo::getOutliningCallOverhead!");
|
||||
}
|
||||
|
||||
/// Represents how an instruction should be mapped by the outliner.
|
||||
|
@ -1583,7 +1578,7 @@ public:
|
|||
/// \p Illegal instructions are those which cannot be outlined.
|
||||
/// \p Invisible instructions are instructions which can be outlined, but
|
||||
/// shouldn't actually impact the outlining result.
|
||||
enum MachineOutlinerInstrType {Legal, Illegal, Invisible};
|
||||
enum MachineOutlinerInstrType { Legal, Illegal, Invisible };
|
||||
|
||||
/// Returns how or if \p MI should be outlined.
|
||||
virtual MachineOutlinerInstrType getOutliningType(MachineInstr &MI) const {
|
||||
|
@ -1635,25 +1630,23 @@ private:
|
|||
};
|
||||
|
||||
/// \brief Provide DenseMapInfo for TargetInstrInfo::RegSubRegPair.
|
||||
template<>
|
||||
struct DenseMapInfo<TargetInstrInfo::RegSubRegPair> {
|
||||
template <> struct DenseMapInfo<TargetInstrInfo::RegSubRegPair> {
|
||||
using RegInfo = DenseMapInfo<unsigned>;
|
||||
|
||||
static inline TargetInstrInfo::RegSubRegPair getEmptyKey() {
|
||||
return TargetInstrInfo::RegSubRegPair(RegInfo::getEmptyKey(),
|
||||
RegInfo::getEmptyKey());
|
||||
RegInfo::getEmptyKey());
|
||||
}
|
||||
|
||||
static inline TargetInstrInfo::RegSubRegPair getTombstoneKey() {
|
||||
return TargetInstrInfo::RegSubRegPair(RegInfo::getTombstoneKey(),
|
||||
RegInfo::getTombstoneKey());
|
||||
RegInfo::getTombstoneKey());
|
||||
}
|
||||
|
||||
/// \brief Reuse getHashValue implementation from
|
||||
/// std::pair<unsigned, unsigned>.
|
||||
static unsigned getHashValue(const TargetInstrInfo::RegSubRegPair &Val) {
|
||||
std::pair<unsigned, unsigned> PairVal =
|
||||
std::make_pair(Val.Reg, Val.SubReg);
|
||||
std::pair<unsigned, unsigned> PairVal = std::make_pair(Val.Reg, Val.SubReg);
|
||||
return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal);
|
||||
}
|
||||
|
||||
|
|
|
@ -114,7 +114,7 @@ struct OutlinedFunction {
|
|||
/// This is initialized after we go through and create the actual function.
|
||||
MachineFunction *MF = nullptr;
|
||||
|
||||
/// A number assigned to this function which appears at the end of its name.
|
||||
/// A numbefr assigned to this function which appears at the end of its name.
|
||||
size_t Name;
|
||||
|
||||
/// The number of candidates for this OutlinedFunction.
|
||||
|
@ -813,11 +813,13 @@ struct MachineOutliner : public ModulePass {
|
|||
///
|
||||
/// \param[in,out] CandidateList A list of outlining candidates.
|
||||
/// \param[in,out] FunctionList A list of functions to be outlined.
|
||||
/// \param Mapper Contains instruction mapping info for outlining.
|
||||
/// \param MaxCandidateLen The length of the longest candidate.
|
||||
/// \param TII TargetInstrInfo for the module.
|
||||
void pruneOverlaps(std::vector<Candidate> &CandidateList,
|
||||
std::vector<OutlinedFunction> &FunctionList,
|
||||
unsigned MaxCandidateLen, const TargetInstrInfo &TII);
|
||||
InstructionMapper &Mapper, unsigned MaxCandidateLen,
|
||||
const TargetInstrInfo &TII);
|
||||
|
||||
/// Construct a suffix tree on the instructions in \p M and outline repeated
|
||||
/// strings from that tree.
|
||||
|
@ -859,23 +861,40 @@ MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII,
|
|||
if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree)
|
||||
continue;
|
||||
|
||||
// How many instructions would outlining this string save?
|
||||
// Figure out if this candidate is beneficial.
|
||||
size_t StringLen = Leaf->ConcatLen - Leaf->size();
|
||||
unsigned EndVal = ST.Str[Leaf->SuffixIdx + StringLen - 1];
|
||||
size_t CallOverhead = 0;
|
||||
size_t FrameOverhead = 0;
|
||||
size_t SequenceOverhead = StringLen;
|
||||
|
||||
// Determine if this is going to be tail called.
|
||||
// FIXME: The target should decide this. The outlining pass shouldn't care
|
||||
// about things like tail calling. It should be representation-agnostic.
|
||||
MachineInstr *LastInstr = Mapper.IntegerInstructionMap[EndVal];
|
||||
assert(LastInstr && "Last instruction in sequence was unmapped!");
|
||||
bool IsTailCall = LastInstr->isTerminator();
|
||||
unsigned Benefit =
|
||||
TII.getOutliningBenefit(StringLen, Parent.OccurrenceCount, IsTailCall);
|
||||
// Figure out the call overhead for each instance of the sequence.
|
||||
for (auto &ChildPair : Parent.Children) {
|
||||
SuffixTreeNode *M = ChildPair.second;
|
||||
|
||||
// If it's not beneficial, skip it.
|
||||
if (Benefit < 1)
|
||||
if (M && M->IsInTree && M->isLeaf()) {
|
||||
// Each sequence is over [StartIt, EndIt].
|
||||
MachineBasicBlock::iterator StartIt = Mapper.InstrList[M->SuffixIdx];
|
||||
MachineBasicBlock::iterator EndIt =
|
||||
Mapper.InstrList[M->SuffixIdx + StringLen - 1];
|
||||
CallOverhead += TII.getOutliningCallOverhead(StartIt, EndIt);
|
||||
}
|
||||
}
|
||||
|
||||
// Figure out how many instructions it'll take to construct an outlined
|
||||
// function frame for this sequence.
|
||||
MachineBasicBlock::iterator StartIt = Mapper.InstrList[Leaf->SuffixIdx];
|
||||
MachineBasicBlock::iterator EndIt =
|
||||
Mapper.InstrList[Leaf->SuffixIdx + StringLen - 1];
|
||||
FrameOverhead = TII.getOutliningFrameOverhead(StartIt, EndIt);
|
||||
|
||||
size_t OutliningCost = CallOverhead + FrameOverhead + SequenceOverhead;
|
||||
size_t NotOutliningCost = SequenceOverhead * Parent.OccurrenceCount;
|
||||
|
||||
if (NotOutliningCost <= OutliningCost)
|
||||
continue;
|
||||
|
||||
size_t Benefit = NotOutliningCost - OutliningCost;
|
||||
|
||||
if (StringLen > MaxLen)
|
||||
MaxLen = StringLen;
|
||||
|
||||
|
@ -910,6 +929,7 @@ MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII,
|
|||
|
||||
void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList,
|
||||
std::vector<OutlinedFunction> &FunctionList,
|
||||
InstructionMapper &Mapper,
|
||||
unsigned MaxCandidateLen,
|
||||
const TargetInstrInfo &TII) {
|
||||
// TODO: Experiment with interval trees or other interval-checking structures
|
||||
|
@ -993,8 +1013,18 @@ void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList,
|
|||
assert(F2.OccurrenceCount > 0 &&
|
||||
"Can't remove OutlinedFunction with no occurrences!");
|
||||
F2.OccurrenceCount--;
|
||||
F2.Benefit = TII.getOutliningBenefit(F2.Sequence.size(),
|
||||
F2.OccurrenceCount, F2.IsTailCall);
|
||||
|
||||
// Remove the call overhead from the removed sequence.
|
||||
MachineBasicBlock::iterator StartIt = Mapper.InstrList[C2.StartIdx];
|
||||
MachineBasicBlock::iterator EndIt =
|
||||
Mapper.InstrList[C2.StartIdx + C2.Len - 1];
|
||||
F2.Benefit += TII.getOutliningCallOverhead(StartIt, EndIt);
|
||||
// Add back one instance of the sequence.
|
||||
|
||||
if (F2.Sequence.size() > F2.Benefit)
|
||||
F2.Benefit = 0;
|
||||
else
|
||||
F2.Benefit -= F2.Sequence.size();
|
||||
|
||||
C2.InCandidateList = false;
|
||||
|
||||
|
@ -1009,8 +1039,19 @@ void MachineOutliner::pruneOverlaps(std::vector<Candidate> &CandidateList,
|
|||
assert(F1.OccurrenceCount > 0 &&
|
||||
"Can't remove OutlinedFunction with no occurrences!");
|
||||
F1.OccurrenceCount--;
|
||||
F1.Benefit = TII.getOutliningBenefit(F1.Sequence.size(),
|
||||
F1.OccurrenceCount, F1.IsTailCall);
|
||||
|
||||
// Remove the call overhead from the removed sequence.
|
||||
MachineBasicBlock::iterator StartIt = Mapper.InstrList[C1.StartIdx];
|
||||
MachineBasicBlock::iterator EndIt =
|
||||
Mapper.InstrList[C1.StartIdx + C1.Len - 1];
|
||||
F2.Benefit += TII.getOutliningCallOverhead(StartIt, EndIt);
|
||||
|
||||
// Add back one instance of the sequence.
|
||||
if (F1.Sequence.size() > F1.Benefit)
|
||||
F1.Benefit = 0;
|
||||
else
|
||||
F1.Benefit -= F1.Sequence.size();
|
||||
|
||||
C1.InCandidateList = false;
|
||||
|
||||
DEBUG(dbgs() << "- Removed C1. \n";
|
||||
|
@ -1206,7 +1247,7 @@ bool MachineOutliner::runOnModule(Module &M) {
|
|||
buildCandidateList(CandidateList, FunctionList, ST, Mapper, *TII);
|
||||
|
||||
// Remove candidates that overlap with other candidates.
|
||||
pruneOverlaps(CandidateList, FunctionList, MaxCandidateLen, *TII);
|
||||
pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen, *TII);
|
||||
|
||||
// Outline each of the candidates and return true if something was outlined.
|
||||
return outline(M, CandidateList, FunctionList, Mapper);
|
||||
|
|
|
@ -52,17 +52,17 @@ using namespace llvm;
|
|||
#define GET_INSTRINFO_CTOR_DTOR
|
||||
#include "AArch64GenInstrInfo.inc"
|
||||
|
||||
static cl::opt<unsigned>
|
||||
TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
|
||||
cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
|
||||
static cl::opt<unsigned> TBZDisplacementBits(
|
||||
"aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
|
||||
cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
|
||||
|
||||
static cl::opt<unsigned> CBZDisplacementBits(
|
||||
"aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
|
||||
cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
|
||||
|
||||
static cl::opt<unsigned>
|
||||
CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
|
||||
cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
|
||||
|
||||
static cl::opt<unsigned>
|
||||
BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
|
||||
cl::desc("Restrict range of Bcc instructions (DEBUG)"));
|
||||
BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
|
||||
cl::desc("Restrict range of Bcc instructions (DEBUG)"));
|
||||
|
||||
AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
|
||||
: AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
|
||||
|
@ -172,8 +172,8 @@ bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
|
|||
return isIntN(Bits, BrOffset / 4);
|
||||
}
|
||||
|
||||
MachineBasicBlock *AArch64InstrInfo::getBranchDestBlock(
|
||||
const MachineInstr &MI) const {
|
||||
MachineBasicBlock *
|
||||
AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
llvm_unreachable("unexpected opcode!");
|
||||
|
@ -374,12 +374,9 @@ void AArch64InstrInfo::instantiateCondBranch(
|
|||
}
|
||||
}
|
||||
|
||||
unsigned AArch64InstrInfo::insertBranch(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock *TBB,
|
||||
MachineBasicBlock *FBB,
|
||||
ArrayRef<MachineOperand> Cond,
|
||||
const DebugLoc &DL,
|
||||
int *BytesAdded) const {
|
||||
unsigned AArch64InstrInfo::insertBranch(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
|
||||
ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
|
||||
// Shouldn't be a fall through.
|
||||
assert(TBB && "insertBranch must not be told to insert a fallthrough");
|
||||
|
||||
|
@ -485,10 +482,11 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
|
|||
return Opc;
|
||||
}
|
||||
|
||||
bool AArch64InstrInfo::canInsertSelect(
|
||||
const MachineBasicBlock &MBB, ArrayRef<MachineOperand> Cond,
|
||||
unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles,
|
||||
int &FalseCycles) const {
|
||||
bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
|
||||
ArrayRef<MachineOperand> Cond,
|
||||
unsigned TrueReg, unsigned FalseReg,
|
||||
int &CondCycles, int &TrueCycles,
|
||||
int &FalseCycles) const {
|
||||
// Check register classes.
|
||||
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
|
||||
const TargetRegisterClass *RC =
|
||||
|
@ -656,8 +654,10 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
|
|||
MRI.constrainRegClass(FalseReg, RC);
|
||||
|
||||
// Insert the csel.
|
||||
BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm(
|
||||
CC);
|
||||
BuildMI(MBB, I, DL, get(Opc), DstReg)
|
||||
.addReg(TrueReg)
|
||||
.addReg(FalseReg)
|
||||
.addImm(CC);
|
||||
}
|
||||
|
||||
/// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
|
||||
|
@ -1078,11 +1078,7 @@ static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
|
|||
}
|
||||
}
|
||||
|
||||
enum AccessKind {
|
||||
AK_Write = 0x01,
|
||||
AK_Read = 0x10,
|
||||
AK_All = 0x11
|
||||
};
|
||||
enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
|
||||
|
||||
/// True when condition flags are accessed (either by writing or reading)
|
||||
/// on the instruction trace starting at From and ending at To.
|
||||
|
@ -1111,21 +1107,24 @@ static bool areCFlagsAccessedBetweenInstrs(
|
|||
for (--To; To != From; --To) {
|
||||
const MachineInstr &Instr = *To;
|
||||
|
||||
if ( ((AccessToCheck & AK_Write) && Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
|
||||
((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
|
||||
if (((AccessToCheck & AK_Write) &&
|
||||
Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
|
||||
((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Try to optimize a compare instruction. A compare instruction is an
|
||||
/// instruction which produces AArch64::NZCV. It can be truly compare instruction
|
||||
/// instruction which produces AArch64::NZCV. It can be truly compare
|
||||
/// instruction
|
||||
/// when there are no uses of its destination register.
|
||||
///
|
||||
/// The following steps are tried in order:
|
||||
/// 1. Convert CmpInstr into an unconditional version.
|
||||
/// 2. Remove CmpInstr if above there is an instruction producing a needed
|
||||
/// condition code or an instruction which can be converted into such an instruction.
|
||||
/// condition code or an instruction which can be converted into such an
|
||||
/// instruction.
|
||||
/// Only comparison with zero is supported.
|
||||
bool AArch64InstrInfo::optimizeCompareInstr(
|
||||
MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
|
||||
|
@ -1187,20 +1186,34 @@ static unsigned sForm(MachineInstr &Instr) {
|
|||
case AArch64::SUBSXri:
|
||||
return Instr.getOpcode();
|
||||
|
||||
case AArch64::ADDWrr: return AArch64::ADDSWrr;
|
||||
case AArch64::ADDWri: return AArch64::ADDSWri;
|
||||
case AArch64::ADDXrr: return AArch64::ADDSXrr;
|
||||
case AArch64::ADDXri: return AArch64::ADDSXri;
|
||||
case AArch64::ADCWr: return AArch64::ADCSWr;
|
||||
case AArch64::ADCXr: return AArch64::ADCSXr;
|
||||
case AArch64::SUBWrr: return AArch64::SUBSWrr;
|
||||
case AArch64::SUBWri: return AArch64::SUBSWri;
|
||||
case AArch64::SUBXrr: return AArch64::SUBSXrr;
|
||||
case AArch64::SUBXri: return AArch64::SUBSXri;
|
||||
case AArch64::SBCWr: return AArch64::SBCSWr;
|
||||
case AArch64::SBCXr: return AArch64::SBCSXr;
|
||||
case AArch64::ANDWri: return AArch64::ANDSWri;
|
||||
case AArch64::ANDXri: return AArch64::ANDSXri;
|
||||
case AArch64::ADDWrr:
|
||||
return AArch64::ADDSWrr;
|
||||
case AArch64::ADDWri:
|
||||
return AArch64::ADDSWri;
|
||||
case AArch64::ADDXrr:
|
||||
return AArch64::ADDSXrr;
|
||||
case AArch64::ADDXri:
|
||||
return AArch64::ADDSXri;
|
||||
case AArch64::ADCWr:
|
||||
return AArch64::ADCSWr;
|
||||
case AArch64::ADCXr:
|
||||
return AArch64::ADCSXr;
|
||||
case AArch64::SUBWrr:
|
||||
return AArch64::SUBSWrr;
|
||||
case AArch64::SUBWri:
|
||||
return AArch64::SUBSWri;
|
||||
case AArch64::SUBXrr:
|
||||
return AArch64::SUBSXrr;
|
||||
case AArch64::SUBXri:
|
||||
return AArch64::SUBSXri;
|
||||
case AArch64::SBCWr:
|
||||
return AArch64::SBCSWr;
|
||||
case AArch64::SBCXr:
|
||||
return AArch64::SBCSXr;
|
||||
case AArch64::ANDWri:
|
||||
return AArch64::ANDSWri;
|
||||
case AArch64::ANDXri:
|
||||
return AArch64::ANDSXri;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1222,7 +1235,7 @@ struct UsedNZCV {
|
|||
|
||||
UsedNZCV() = default;
|
||||
|
||||
UsedNZCV& operator |=(const UsedNZCV& UsedFlags) {
|
||||
UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
|
||||
this->N |= UsedFlags.N;
|
||||
this->Z |= UsedFlags.Z;
|
||||
this->C |= UsedFlags.C;
|
||||
|
@ -1238,29 +1251,29 @@ struct UsedNZCV {
|
|||
/// codes or we don't optimize CmpInstr in the presence of such instructions.
|
||||
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
|
||||
switch (Instr.getOpcode()) {
|
||||
default:
|
||||
return AArch64CC::Invalid;
|
||||
default:
|
||||
return AArch64CC::Invalid;
|
||||
|
||||
case AArch64::Bcc: {
|
||||
int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
|
||||
assert(Idx >= 2);
|
||||
return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
|
||||
}
|
||||
case AArch64::Bcc: {
|
||||
int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
|
||||
assert(Idx >= 2);
|
||||
return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
|
||||
}
|
||||
|
||||
case AArch64::CSINVWr:
|
||||
case AArch64::CSINVXr:
|
||||
case AArch64::CSINCWr:
|
||||
case AArch64::CSINCXr:
|
||||
case AArch64::CSELWr:
|
||||
case AArch64::CSELXr:
|
||||
case AArch64::CSNEGWr:
|
||||
case AArch64::CSNEGXr:
|
||||
case AArch64::FCSELSrrr:
|
||||
case AArch64::FCSELDrrr: {
|
||||
int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
|
||||
assert(Idx >= 1);
|
||||
return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
|
||||
}
|
||||
case AArch64::CSINVWr:
|
||||
case AArch64::CSINVXr:
|
||||
case AArch64::CSINCWr:
|
||||
case AArch64::CSINCXr:
|
||||
case AArch64::CSELWr:
|
||||
case AArch64::CSELXr:
|
||||
case AArch64::CSNEGWr:
|
||||
case AArch64::CSNEGXr:
|
||||
case AArch64::FCSELSrrr:
|
||||
case AArch64::FCSELDrrr: {
|
||||
int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
|
||||
assert(Idx >= 1);
|
||||
return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1268,42 +1281,42 @@ static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
|
|||
assert(CC != AArch64CC::Invalid);
|
||||
UsedNZCV UsedFlags;
|
||||
switch (CC) {
|
||||
default:
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
||||
case AArch64CC::EQ: // Z set
|
||||
case AArch64CC::NE: // Z clear
|
||||
UsedFlags.Z = true;
|
||||
break;
|
||||
case AArch64CC::EQ: // Z set
|
||||
case AArch64CC::NE: // Z clear
|
||||
UsedFlags.Z = true;
|
||||
break;
|
||||
|
||||
case AArch64CC::HI: // Z clear and C set
|
||||
case AArch64CC::LS: // Z set or C clear
|
||||
UsedFlags.Z = true;
|
||||
LLVM_FALLTHROUGH;
|
||||
case AArch64CC::HS: // C set
|
||||
case AArch64CC::LO: // C clear
|
||||
UsedFlags.C = true;
|
||||
break;
|
||||
case AArch64CC::HI: // Z clear and C set
|
||||
case AArch64CC::LS: // Z set or C clear
|
||||
UsedFlags.Z = true;
|
||||
LLVM_FALLTHROUGH;
|
||||
case AArch64CC::HS: // C set
|
||||
case AArch64CC::LO: // C clear
|
||||
UsedFlags.C = true;
|
||||
break;
|
||||
|
||||
case AArch64CC::MI: // N set
|
||||
case AArch64CC::PL: // N clear
|
||||
UsedFlags.N = true;
|
||||
break;
|
||||
case AArch64CC::MI: // N set
|
||||
case AArch64CC::PL: // N clear
|
||||
UsedFlags.N = true;
|
||||
break;
|
||||
|
||||
case AArch64CC::VS: // V set
|
||||
case AArch64CC::VC: // V clear
|
||||
UsedFlags.V = true;
|
||||
break;
|
||||
case AArch64CC::VS: // V set
|
||||
case AArch64CC::VC: // V clear
|
||||
UsedFlags.V = true;
|
||||
break;
|
||||
|
||||
case AArch64CC::GT: // Z clear, N and V the same
|
||||
case AArch64CC::LE: // Z set, N and V differ
|
||||
UsedFlags.Z = true;
|
||||
LLVM_FALLTHROUGH;
|
||||
case AArch64CC::GE: // N and V the same
|
||||
case AArch64CC::LT: // N and V differ
|
||||
UsedFlags.N = true;
|
||||
UsedFlags.V = true;
|
||||
break;
|
||||
case AArch64CC::GT: // Z clear, N and V the same
|
||||
case AArch64CC::LE: // Z set, N and V differ
|
||||
UsedFlags.Z = true;
|
||||
LLVM_FALLTHROUGH;
|
||||
case AArch64CC::GE: // N and V the same
|
||||
case AArch64CC::LT: // N and V differ
|
||||
UsedFlags.N = true;
|
||||
UsedFlags.V = true;
|
||||
break;
|
||||
}
|
||||
return UsedFlags;
|
||||
}
|
||||
|
@ -1328,7 +1341,7 @@ static bool isSUBSRegImm(unsigned Opcode) {
|
|||
/// nor uses of flags between MI and CmpInstr.
|
||||
/// - and C/V flags are not used after CmpInstr
|
||||
static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
|
||||
const TargetRegisterInfo *TRI) {
|
||||
const TargetRegisterInfo *TRI) {
|
||||
assert(MI);
|
||||
assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
|
||||
assert(CmpInstr);
|
||||
|
@ -1350,7 +1363,8 @@ static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
|
|||
return false;
|
||||
|
||||
UsedNZCV NZCVUsedAfterCmp;
|
||||
for (auto I = std::next(CmpInstr->getIterator()), E = CmpInstr->getParent()->instr_end();
|
||||
for (auto I = std::next(CmpInstr->getIterator()),
|
||||
E = CmpInstr->getParent()->instr_end();
|
||||
I != E; ++I) {
|
||||
const MachineInstr &Instr = *I;
|
||||
if (Instr.readsRegister(AArch64::NZCV, TRI)) {
|
||||
|
@ -1363,7 +1377,7 @@ static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
|
|||
if (Instr.modifiesRegister(AArch64::NZCV, TRI))
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
|
||||
}
|
||||
|
||||
|
@ -1421,16 +1435,20 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
|
|||
.addMemOperand(*MI.memoperands_begin());
|
||||
} else if (TM.getCodeModel() == CodeModel::Large) {
|
||||
BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
|
||||
.addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0);
|
||||
.addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
|
||||
.addImm(0);
|
||||
BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
|
||||
.addReg(Reg, RegState::Kill)
|
||||
.addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16);
|
||||
.addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
|
||||
.addImm(16);
|
||||
BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
|
||||
.addReg(Reg, RegState::Kill)
|
||||
.addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32);
|
||||
.addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
|
||||
.addImm(32);
|
||||
BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
|
||||
.addReg(Reg, RegState::Kill)
|
||||
.addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48);
|
||||
.addGlobalAddress(GV, 0, AArch64II::MO_G3)
|
||||
.addImm(48);
|
||||
BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
|
||||
.addReg(Reg, RegState::Kill)
|
||||
.addImm(0)
|
||||
|
@ -1812,7 +1830,7 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
|
|||
} else
|
||||
return false;
|
||||
|
||||
// Get the scaling factor for the instruction and set the width for the
|
||||
// Get the scaling factor for the instruction and set the width for the
|
||||
// instruction.
|
||||
unsigned Scale = 0;
|
||||
int64_t Dummy1, Dummy2;
|
||||
|
@ -1835,10 +1853,10 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
|
|||
return true;
|
||||
}
|
||||
|
||||
MachineOperand&
|
||||
MachineOperand &
|
||||
AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
|
||||
assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
|
||||
MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands()-1);
|
||||
MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
|
||||
assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
|
||||
return OfsOp;
|
||||
}
|
||||
|
@ -1847,7 +1865,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
|
|||
unsigned &Width, int64_t &MinOffset,
|
||||
int64_t &MaxOffset) const {
|
||||
switch (Opcode) {
|
||||
// Not a memory operation or something we want to handle.
|
||||
// Not a memory operation or something we want to handle.
|
||||
default:
|
||||
Scale = Width = 0;
|
||||
MinOffset = MaxOffset = 0;
|
||||
|
@ -2102,12 +2120,13 @@ static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
|
|||
return ((DestReg - SrcReg) & 0x1f) < NumRegs;
|
||||
}
|
||||
|
||||
void AArch64InstrInfo::copyPhysRegTuple(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
|
||||
unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
|
||||
ArrayRef<unsigned> Indices) const {
|
||||
assert(Subtarget.hasNEON() &&
|
||||
"Unexpected register copy without NEON");
|
||||
void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
const DebugLoc &DL, unsigned DestReg,
|
||||
unsigned SrcReg, bool KillSrc,
|
||||
unsigned Opcode,
|
||||
ArrayRef<unsigned> Indices) const {
|
||||
assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
|
||||
const TargetRegisterInfo *TRI = &getRegisterInfo();
|
||||
uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
|
||||
uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
|
||||
|
@ -2160,8 +2179,9 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
|
||||
}
|
||||
} else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
|
||||
BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm(
|
||||
AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
|
||||
BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
|
||||
.addImm(0)
|
||||
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
|
||||
} else {
|
||||
if (Subtarget.hasZeroCycleRegMove()) {
|
||||
// Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
|
||||
|
@ -2196,8 +2216,9 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
.addImm(0)
|
||||
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
|
||||
} else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
|
||||
BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm(
|
||||
AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
|
||||
BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
|
||||
.addImm(0)
|
||||
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
|
||||
} else {
|
||||
// Otherwise, expand to ORR XZR.
|
||||
BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
|
||||
|
@ -2210,8 +2231,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
// Copy a DDDD register quad by copying the individual sub-registers.
|
||||
if (AArch64::DDDDRegClass.contains(DestReg) &&
|
||||
AArch64::DDDDRegClass.contains(SrcReg)) {
|
||||
static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
|
||||
AArch64::dsub2, AArch64::dsub3 };
|
||||
static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
|
||||
AArch64::dsub2, AArch64::dsub3};
|
||||
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
|
||||
Indices);
|
||||
return;
|
||||
|
@ -2220,8 +2241,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
// Copy a DDD register triple by copying the individual sub-registers.
|
||||
if (AArch64::DDDRegClass.contains(DestReg) &&
|
||||
AArch64::DDDRegClass.contains(SrcReg)) {
|
||||
static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
|
||||
AArch64::dsub2 };
|
||||
static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
|
||||
AArch64::dsub2};
|
||||
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
|
||||
Indices);
|
||||
return;
|
||||
|
@ -2230,7 +2251,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
// Copy a DD register pair by copying the individual sub-registers.
|
||||
if (AArch64::DDRegClass.contains(DestReg) &&
|
||||
AArch64::DDRegClass.contains(SrcReg)) {
|
||||
static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 };
|
||||
static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
|
||||
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
|
||||
Indices);
|
||||
return;
|
||||
|
@ -2239,8 +2260,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
// Copy a QQQQ register quad by copying the individual sub-registers.
|
||||
if (AArch64::QQQQRegClass.contains(DestReg) &&
|
||||
AArch64::QQQQRegClass.contains(SrcReg)) {
|
||||
static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
|
||||
AArch64::qsub2, AArch64::qsub3 };
|
||||
static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
|
||||
AArch64::qsub2, AArch64::qsub3};
|
||||
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
|
||||
Indices);
|
||||
return;
|
||||
|
@ -2249,8 +2270,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
// Copy a QQQ register triple by copying the individual sub-registers.
|
||||
if (AArch64::QQQRegClass.contains(DestReg) &&
|
||||
AArch64::QQQRegClass.contains(SrcReg)) {
|
||||
static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
|
||||
AArch64::qsub2 };
|
||||
static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
|
||||
AArch64::qsub2};
|
||||
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
|
||||
Indices);
|
||||
return;
|
||||
|
@ -2259,7 +2280,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
// Copy a QQ register pair by copying the individual sub-registers.
|
||||
if (AArch64::QQRegClass.contains(DestReg) &&
|
||||
AArch64::QQRegClass.contains(SrcReg)) {
|
||||
static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 };
|
||||
static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
|
||||
copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
|
||||
Indices);
|
||||
return;
|
||||
|
@ -2267,28 +2288,28 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
|
||||
if (AArch64::FPR128RegClass.contains(DestReg) &&
|
||||
AArch64::FPR128RegClass.contains(SrcReg)) {
|
||||
if(Subtarget.hasNEON()) {
|
||||
if (Subtarget.hasNEON()) {
|
||||
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
|
||||
.addReg(SrcReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
} else {
|
||||
BuildMI(MBB, I, DL, get(AArch64::STRQpre))
|
||||
.addReg(AArch64::SP, RegState::Define)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc))
|
||||
.addReg(AArch64::SP)
|
||||
.addImm(-16);
|
||||
.addReg(AArch64::SP, RegState::Define)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc))
|
||||
.addReg(AArch64::SP)
|
||||
.addImm(-16);
|
||||
BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
|
||||
.addReg(AArch64::SP, RegState::Define)
|
||||
.addReg(DestReg, RegState::Define)
|
||||
.addReg(AArch64::SP)
|
||||
.addImm(16);
|
||||
.addReg(AArch64::SP, RegState::Define)
|
||||
.addReg(DestReg, RegState::Define)
|
||||
.addReg(AArch64::SP)
|
||||
.addImm(16);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (AArch64::FPR64RegClass.contains(DestReg) &&
|
||||
AArch64::FPR64RegClass.contains(SrcReg)) {
|
||||
if(Subtarget.hasNEON()) {
|
||||
if (Subtarget.hasNEON()) {
|
||||
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
|
||||
&AArch64::FPR128RegClass);
|
||||
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
|
||||
|
@ -2305,7 +2326,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
|
||||
if (AArch64::FPR32RegClass.contains(DestReg) &&
|
||||
AArch64::FPR32RegClass.contains(SrcReg)) {
|
||||
if(Subtarget.hasNEON()) {
|
||||
if (Subtarget.hasNEON()) {
|
||||
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
|
||||
&AArch64::FPR128RegClass);
|
||||
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
|
||||
|
@ -2322,7 +2343,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
|
||||
if (AArch64::FPR16RegClass.contains(DestReg) &&
|
||||
AArch64::FPR16RegClass.contains(SrcReg)) {
|
||||
if(Subtarget.hasNEON()) {
|
||||
if (Subtarget.hasNEON()) {
|
||||
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
|
||||
&AArch64::FPR128RegClass);
|
||||
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
|
||||
|
@ -2343,7 +2364,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
|
||||
if (AArch64::FPR8RegClass.contains(DestReg) &&
|
||||
AArch64::FPR8RegClass.contains(SrcReg)) {
|
||||
if(Subtarget.hasNEON()) {
|
||||
if (Subtarget.hasNEON()) {
|
||||
DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
|
||||
&AArch64::FPR128RegClass);
|
||||
SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
|
||||
|
@ -2392,17 +2413,17 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
if (DestReg == AArch64::NZCV) {
|
||||
assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
|
||||
BuildMI(MBB, I, DL, get(AArch64::MSR))
|
||||
.addImm(AArch64SysReg::NZCV)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc))
|
||||
.addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
|
||||
.addImm(AArch64SysReg::NZCV)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc))
|
||||
.addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
|
||||
return;
|
||||
}
|
||||
|
||||
if (SrcReg == AArch64::NZCV) {
|
||||
assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
|
||||
BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
|
||||
.addImm(AArch64SysReg::NZCV)
|
||||
.addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
|
||||
.addImm(AArch64SysReg::NZCV)
|
||||
.addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -2458,45 +2479,39 @@ void AArch64InstrInfo::storeRegToStackSlot(
|
|||
if (AArch64::FPR128RegClass.hasSubClassEq(RC))
|
||||
Opc = AArch64::STRQui;
|
||||
else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasNEON() &&
|
||||
"Unexpected register store without NEON");
|
||||
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
|
||||
Opc = AArch64::ST1Twov1d;
|
||||
Offset = false;
|
||||
}
|
||||
break;
|
||||
case 24:
|
||||
if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasNEON() &&
|
||||
"Unexpected register store without NEON");
|
||||
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
|
||||
Opc = AArch64::ST1Threev1d;
|
||||
Offset = false;
|
||||
}
|
||||
break;
|
||||
case 32:
|
||||
if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasNEON() &&
|
||||
"Unexpected register store without NEON");
|
||||
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
|
||||
Opc = AArch64::ST1Fourv1d;
|
||||
Offset = false;
|
||||
} else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasNEON() &&
|
||||
"Unexpected register store without NEON");
|
||||
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
|
||||
Opc = AArch64::ST1Twov2d;
|
||||
Offset = false;
|
||||
}
|
||||
break;
|
||||
case 48:
|
||||
if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasNEON() &&
|
||||
"Unexpected register store without NEON");
|
||||
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
|
||||
Opc = AArch64::ST1Threev2d;
|
||||
Offset = false;
|
||||
}
|
||||
break;
|
||||
case 64:
|
||||
if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasNEON() &&
|
||||
"Unexpected register store without NEON");
|
||||
assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
|
||||
Opc = AArch64::ST1Fourv2d;
|
||||
Offset = false;
|
||||
}
|
||||
|
@ -2505,8 +2520,8 @@ void AArch64InstrInfo::storeRegToStackSlot(
|
|||
assert(Opc && "Unknown register class");
|
||||
|
||||
const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
|
||||
.addReg(SrcReg, getKillRegState(isKill))
|
||||
.addFrameIndex(FI);
|
||||
.addReg(SrcReg, getKillRegState(isKill))
|
||||
.addFrameIndex(FI);
|
||||
|
||||
if (Offset)
|
||||
MI.addImm(0);
|
||||
|
@ -2562,45 +2577,39 @@ void AArch64InstrInfo::loadRegFromStackSlot(
|
|||
if (AArch64::FPR128RegClass.hasSubClassEq(RC))
|
||||
Opc = AArch64::LDRQui;
|
||||
else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasNEON() &&
|
||||
"Unexpected register load without NEON");
|
||||
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
|
||||
Opc = AArch64::LD1Twov1d;
|
||||
Offset = false;
|
||||
}
|
||||
break;
|
||||
case 24:
|
||||
if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasNEON() &&
|
||||
"Unexpected register load without NEON");
|
||||
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
|
||||
Opc = AArch64::LD1Threev1d;
|
||||
Offset = false;
|
||||
}
|
||||
break;
|
||||
case 32:
|
||||
if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasNEON() &&
|
||||
"Unexpected register load without NEON");
|
||||
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
|
||||
Opc = AArch64::LD1Fourv1d;
|
||||
Offset = false;
|
||||
} else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasNEON() &&
|
||||
"Unexpected register load without NEON");
|
||||
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
|
||||
Opc = AArch64::LD1Twov2d;
|
||||
Offset = false;
|
||||
}
|
||||
break;
|
||||
case 48:
|
||||
if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasNEON() &&
|
||||
"Unexpected register load without NEON");
|
||||
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
|
||||
Opc = AArch64::LD1Threev2d;
|
||||
Offset = false;
|
||||
}
|
||||
break;
|
||||
case 64:
|
||||
if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
|
||||
assert(Subtarget.hasNEON() &&
|
||||
"Unexpected register load without NEON");
|
||||
assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
|
||||
Opc = AArch64::LD1Fourv2d;
|
||||
Offset = false;
|
||||
}
|
||||
|
@ -2609,8 +2618,8 @@ void AArch64InstrInfo::loadRegFromStackSlot(
|
|||
assert(Opc && "Unknown register class");
|
||||
|
||||
const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
|
||||
.addReg(DestReg, getDefRegState(true))
|
||||
.addFrameIndex(FI);
|
||||
.addReg(DestReg, getDefRegState(true))
|
||||
.addFrameIndex(FI);
|
||||
if (Offset)
|
||||
MI.addImm(0);
|
||||
MI.addMemOperand(MMO);
|
||||
|
@ -2755,7 +2764,7 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
|
|||
|
||||
if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
|
||||
assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
|
||||
TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
|
||||
TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
|
||||
"Mismatched register size in non subreg COPY");
|
||||
if (IsSpill)
|
||||
storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
|
||||
|
@ -3138,10 +3147,7 @@ void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
|
|||
}
|
||||
|
||||
// AArch64 supports MachineCombiner.
|
||||
bool AArch64InstrInfo::useMachineCombiner() const {
|
||||
|
||||
return true;
|
||||
}
|
||||
bool AArch64InstrInfo::useMachineCombiner() const { return true; }
|
||||
|
||||
// True when Opc sets flag
|
||||
static bool isCombineInstrSettingFlag(unsigned Opc) {
|
||||
|
@ -3275,7 +3281,8 @@ static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
|
|||
// 1. Other data types (integer, vectors)
|
||||
// 2. Other math / logic operations (xor, or)
|
||||
// 3. Other forms of the same operation (intrinsics and other variants)
|
||||
bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
|
||||
bool AArch64InstrInfo::isAssociativeAndCommutative(
|
||||
const MachineInstr &Inst) const {
|
||||
switch (Inst.getOpcode()) {
|
||||
case AArch64::FADDDrr:
|
||||
case AArch64::FADDSrr:
|
||||
|
@ -3595,8 +3602,8 @@ static bool getFMAPatterns(MachineInstr &Root,
|
|||
/// Return true when a code sequence can improve throughput. It
|
||||
/// should be called only for instructions in loops.
|
||||
/// \param Pattern - combiner pattern
|
||||
bool
|
||||
AArch64InstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const {
|
||||
bool AArch64InstrInfo::isThroughputPattern(
|
||||
MachineCombinerPattern Pattern) const {
|
||||
switch (Pattern) {
|
||||
default:
|
||||
break;
|
||||
|
@ -3747,8 +3754,8 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
|
|||
static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
|
||||
const TargetInstrInfo *TII, MachineInstr &Root,
|
||||
SmallVectorImpl<MachineInstr *> &InsInstrs,
|
||||
unsigned IdxMulOpd, unsigned MaddOpc,
|
||||
unsigned VR, const TargetRegisterClass *RC) {
|
||||
unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
|
||||
const TargetRegisterClass *RC) {
|
||||
assert(IdxMulOpd == 1 || IdxMulOpd == 2);
|
||||
|
||||
MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
|
||||
|
@ -3767,11 +3774,11 @@ static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
|
|||
if (TargetRegisterInfo::isVirtualRegister(VR))
|
||||
MRI.constrainRegClass(VR, RC);
|
||||
|
||||
MachineInstrBuilder MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc),
|
||||
ResultReg)
|
||||
.addReg(SrcReg0, getKillRegState(Src0IsKill))
|
||||
.addReg(SrcReg1, getKillRegState(Src1IsKill))
|
||||
.addReg(VR);
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
|
||||
.addReg(SrcReg0, getKillRegState(Src0IsKill))
|
||||
.addReg(SrcReg1, getKillRegState(Src1IsKill))
|
||||
.addReg(VR);
|
||||
// Insert the MADD
|
||||
InsInstrs.push_back(MIB);
|
||||
return MUL;
|
||||
|
@ -4401,12 +4408,9 @@ AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
|
|||
using namespace AArch64II;
|
||||
|
||||
static const std::pair<unsigned, const char *> TargetFlags[] = {
|
||||
{MO_PAGE, "aarch64-page"},
|
||||
{MO_PAGEOFF, "aarch64-pageoff"},
|
||||
{MO_G3, "aarch64-g3"},
|
||||
{MO_G2, "aarch64-g2"},
|
||||
{MO_G1, "aarch64-g1"},
|
||||
{MO_G0, "aarch64-g0"},
|
||||
{MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
|
||||
{MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
|
||||
{MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
|
||||
{MO_HI12, "aarch64-hi12"}};
|
||||
return makeArrayRef(TargetFlags);
|
||||
}
|
||||
|
@ -4416,9 +4420,7 @@ AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
|
|||
using namespace AArch64II;
|
||||
|
||||
static const std::pair<unsigned, const char *> TargetFlags[] = {
|
||||
{MO_GOT, "aarch64-got"},
|
||||
{MO_NC, "aarch64-nc"},
|
||||
{MO_TLS, "aarch64-tls"}};
|
||||
{MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, {MO_TLS, "aarch64-tls"}};
|
||||
return makeArrayRef(TargetFlags);
|
||||
}
|
||||
|
||||
|
@ -4430,26 +4432,27 @@ AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
|
|||
return makeArrayRef(TargetFlags);
|
||||
}
|
||||
|
||||
unsigned AArch64InstrInfo::getOutliningBenefit(size_t SequenceSize,
|
||||
size_t Occurrences,
|
||||
bool CanBeTailCall) const {
|
||||
unsigned NotOutlinedSize = SequenceSize * Occurrences;
|
||||
unsigned OutlinedSize;
|
||||
size_t AArch64InstrInfo::getOutliningCallOverhead(
|
||||
MachineBasicBlock::iterator &StartIt,
|
||||
MachineBasicBlock::iterator &EndIt) const {
|
||||
// Is this a tail-call?
|
||||
if (EndIt->isTerminator())
|
||||
return 1; // Yes, so we don't need to save/restore LR.
|
||||
|
||||
// Is this candidate something we can outline as a tail call?
|
||||
if (CanBeTailCall) {
|
||||
// If yes, then we just outline the sequence and replace each of its
|
||||
// occurrences with a branch instruction.
|
||||
OutlinedSize = SequenceSize + Occurrences;
|
||||
} else {
|
||||
// If no, then we outline the sequence (SequenceSize), add a return (+1),
|
||||
// and replace each occurrence with a save/restore to LR and a call
|
||||
// (3 * Occurrences)
|
||||
OutlinedSize = (SequenceSize + 1) + (3 * Occurrences);
|
||||
}
|
||||
// No, so save + restore LR.
|
||||
return 3;
|
||||
}
|
||||
|
||||
// Return the number of instructions saved by outlining this sequence.
|
||||
return NotOutlinedSize > OutlinedSize ? NotOutlinedSize - OutlinedSize : 0;
|
||||
size_t AArch64InstrInfo::getOutliningFrameOverhead(
|
||||
MachineBasicBlock::iterator &StartIt,
|
||||
MachineBasicBlock::iterator &EndIt) const {
|
||||
|
||||
// Is this a tail-call?
|
||||
if (EndIt->isTerminator())
|
||||
return 0; // Yes, so we already have a return.
|
||||
|
||||
// No, so we have to add a return to the end.
|
||||
return 1;
|
||||
}
|
||||
|
||||
bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const {
|
||||
|
@ -4475,7 +4478,7 @@ AArch64InstrInfo::getOutliningType(MachineInstr &MI) const {
|
|||
|
||||
// Is this the end of a function?
|
||||
if (MI.getParent()->succ_empty())
|
||||
return MachineOutlinerInstrType::Legal;
|
||||
return MachineOutlinerInstrType::Legal;
|
||||
|
||||
// It's not, so don't outline it.
|
||||
return MachineOutlinerInstrType::Illegal;
|
||||
|
@ -4494,7 +4497,7 @@ AArch64InstrInfo::getOutliningType(MachineInstr &MI) const {
|
|||
// Don't outline anything that uses the link register.
|
||||
if (MI.modifiesRegister(AArch64::LR, &RI) ||
|
||||
MI.readsRegister(AArch64::LR, &RI))
|
||||
return MachineOutlinerInstrType::Illegal;
|
||||
return MachineOutlinerInstrType::Illegal;
|
||||
|
||||
// Does this use the stack?
|
||||
if (MI.modifiesRegister(AArch64::SP, &RI) ||
|
||||
|
@ -4502,13 +4505,13 @@ AArch64InstrInfo::getOutliningType(MachineInstr &MI) const {
|
|||
|
||||
// Is it a memory operation?
|
||||
if (MI.mayLoadOrStore()) {
|
||||
unsigned Base; // Filled with the base regiser of MI.
|
||||
unsigned Base; // Filled with the base regiser of MI.
|
||||
int64_t Offset; // Filled with the offset of MI.
|
||||
unsigned DummyWidth;
|
||||
|
||||
// Does it allow us to offset the base register and is the base SP?
|
||||
if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
|
||||
Base != AArch64::SP)
|
||||
Base != AArch64::SP)
|
||||
return MachineOutlinerInstrType::Illegal;
|
||||
|
||||
// Find the minimum/maximum offset for this instruction and check if
|
||||
|
@ -4522,7 +4525,7 @@ AArch64InstrInfo::getOutliningType(MachineInstr &MI) const {
|
|||
// This is tricky to test with IR tests, but when the outliner is moved
|
||||
// to a MIR test, it really ought to be checked.
|
||||
if (Offset + 16 < MinOffset || Offset + 16 > MaxOffset)
|
||||
return MachineOutlinerInstrType::Illegal;
|
||||
return MachineOutlinerInstrType::Illegal;
|
||||
|
||||
// It's in range, so we can outline it.
|
||||
return MachineOutlinerInstrType::Legal;
|
||||
|
@ -4558,7 +4561,7 @@ void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
|
|||
// We've pushed the return address to the stack, so add 16 to the offset.
|
||||
// This is safe, since we already checked if it would overflow when we
|
||||
// checked if this instruction was legal to outline.
|
||||
int64_t NewImm = (Offset + 16)/Scale;
|
||||
int64_t NewImm = (Offset + 16) / Scale;
|
||||
StackOffsetOperand.setImm(NewImm);
|
||||
}
|
||||
}
|
||||
|
@ -4624,4 +4627,3 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
|
|||
|
||||
return It;
|
||||
}
|
||||
|
||||
|
|
|
@ -299,8 +299,10 @@ public:
|
|||
getSerializableMachineMemOperandTargetFlags() const override;
|
||||
|
||||
bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override;
|
||||
unsigned getOutliningBenefit(size_t SequenceSize, size_t Occurrences,
|
||||
bool CanBeTailCall) const override;
|
||||
size_t getOutliningCallOverhead(MachineBasicBlock::iterator &StartIt,
|
||||
MachineBasicBlock::iterator &EndIt) const override;
|
||||
size_t getOutliningFrameOverhead(MachineBasicBlock::iterator &StartIt,
|
||||
MachineBasicBlock::iterator &EndIt) const override;
|
||||
AArch64GenInstrInfo::MachineOutlinerInstrType
|
||||
getOutliningType(MachineInstr &MI) const override;
|
||||
void insertOutlinerEpilogue(MachineBasicBlock &MBB,
|
||||
|
|
|
@ -10537,25 +10537,22 @@ char LDTLSCleanup::ID = 0;
|
|||
FunctionPass*
|
||||
llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
|
||||
|
||||
unsigned X86InstrInfo::getOutliningBenefit(size_t SequenceSize,
|
||||
size_t Occurrences,
|
||||
bool CanBeTailCall) const {
|
||||
unsigned NotOutlinedSize = SequenceSize * Occurrences;
|
||||
unsigned OutlinedSize;
|
||||
size_t X86InstrInfo::getOutliningCallOverhead(
|
||||
MachineBasicBlock::iterator &StartIt,
|
||||
MachineBasicBlock::iterator &EndIt) const {
|
||||
// We just have to emit a call, so return 1.
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Is it a tail call?
|
||||
if (CanBeTailCall) {
|
||||
// If yes, we don't have to include a return instruction-- it's already in
|
||||
// our sequence. So we have one occurrence of the sequence + #Occurrences
|
||||
// calls.
|
||||
OutlinedSize = SequenceSize + Occurrences;
|
||||
} else {
|
||||
// If not, add one for the return instruction.
|
||||
OutlinedSize = (SequenceSize + 1) + Occurrences;
|
||||
}
|
||||
size_t X86InstrInfo::getOutliningFrameOverhead(
|
||||
MachineBasicBlock::iterator &StartIt,
|
||||
MachineBasicBlock::iterator &EndIt) const {
|
||||
// Is this a tail-call?
|
||||
if (EndIt->isTerminator())
|
||||
return 0; // Yes, so we already have a return.
|
||||
|
||||
// Return the number of instructions saved by outlining this sequence.
|
||||
return NotOutlinedSize > OutlinedSize ? NotOutlinedSize - OutlinedSize : 0;
|
||||
// No, so we have to add a return to the end.
|
||||
return 1;
|
||||
}
|
||||
|
||||
bool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const {
|
||||
|
|
|
@ -566,9 +566,11 @@ public:
|
|||
ArrayRef<std::pair<unsigned, const char *>>
|
||||
getSerializableDirectMachineOperandTargetFlags() const override;
|
||||
|
||||
unsigned getOutliningBenefit(size_t SequenceSize,
|
||||
size_t Occurrences,
|
||||
bool CanBeTailCall) const override;
|
||||
size_t getOutliningCallOverhead(MachineBasicBlock::iterator &StartIt,
|
||||
MachineBasicBlock::iterator &EndIt) const override;
|
||||
|
||||
size_t getOutliningFrameOverhead(MachineBasicBlock::iterator &StartIt,
|
||||
MachineBasicBlock::iterator &EndIt) const override;
|
||||
|
||||
bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override;
|
||||
|
||||
|
|
Loading…
Reference in New Issue