[MachineScheduler] Order more stores by ascending address

According D125377, we order STP Q's by ascending address. While on some
targets, paired 128 bit loads and stores are slow, so the STP will split
into STRQ and STUR, so I hope these stores will also be ordered.
Also add subtarget feature ascend-store-address to control the aggressive order.

Reviewed By: dmgreen, fhahn

Differential Revision: https://reviews.llvm.org/D126700
This commit is contained in:
zhongyunde 2022-06-13 17:24:59 +08:00
parent 6119053dab
commit c42a225545
5 changed files with 165 additions and 93 deletions

View File

@ -215,6 +215,10 @@ def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store",
def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128",
"IsPaired128Slow", "true", "Paired 128 bit loads and stores are slow">;
def FeatureAscendStoreAddress : SubtargetFeature<"ascend-store-address",
"IsStoreAddressAscend", "false",
"Schedule scalar stores by ascending address">;
def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "IsSTRQroSlow",
"true", "STR of Q register with register offset is slow">;

View File

@ -3152,6 +3152,41 @@ bool AArch64InstrInfo::isPreLdSt(const MachineInstr &MI) {
return isPreLd(MI) || isPreSt(MI);
}
bool AArch64InstrInfo::isPairedLdSt(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
return false;
case AArch64::LDPSi:
case AArch64::LDPSWi:
case AArch64::LDPDi:
case AArch64::LDPQi:
case AArch64::LDPWi:
case AArch64::LDPXi:
case AArch64::STPSi:
case AArch64::STPDi:
case AArch64::STPQi:
case AArch64::STPWi:
case AArch64::STPXi:
case AArch64::STGPi:
return true;
}
}
const MachineOperand &AArch64InstrInfo::getLdStBaseOp(const MachineInstr &MI) {
unsigned Idx =
AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 2
: 1;
return MI.getOperand(Idx);
}
const MachineOperand &
AArch64InstrInfo::getLdStOffsetOp(const MachineInstr &MI) {
unsigned Idx =
AArch64InstrInfo::isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 3
: 2;
return MI.getOperand(Idx);
}
static const TargetRegisterClass *getRegClass(const MachineInstr &MI,
Register Reg) {
if (MI.getParent() == nullptr)

View File

@ -103,6 +103,15 @@ public:
/// Returns whether the instruction is a pre-indexed load/store.
static bool isPreLdSt(const MachineInstr &MI);
/// Returns whether the instruction is a paired load/store.
static bool isPairedLdSt(const MachineInstr &MI);
/// Returns the base register operator of a load/store.
static const MachineOperand &getLdStBaseOp(const MachineInstr &MI);
/// Returns the the immediate offset operator of a load/store.
static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI);
/// Returns whether the instruction is FP or NEON.
static bool isFpOrNEON(const MachineInstr &MI);

View File

@ -556,26 +556,6 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
}
}
static bool isPairedLdSt(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
return false;
case AArch64::LDPSi:
case AArch64::LDPSWi:
case AArch64::LDPDi:
case AArch64::LDPQi:
case AArch64::LDPWi:
case AArch64::LDPXi:
case AArch64::STPSi:
case AArch64::STPDi:
case AArch64::STPQi:
case AArch64::STPWi:
case AArch64::STPXi:
case AArch64::STGPi:
return true;
}
}
static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI) {
unsigned OpcA = FirstMI.getOpcode();
@ -610,7 +590,7 @@ static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI) {
// Returns the scale and offset range of pre/post indexed variants of MI.
static void getPrePostIndexedMemOpInfo(const MachineInstr &MI, int &Scale,
int &MinOffset, int &MaxOffset) {
bool IsPaired = isPairedLdSt(MI);
bool IsPaired = AArch64InstrInfo::isPairedLdSt(MI);
bool IsTagStore = isTagStore(MI);
// ST*G and all paired ldst have the same scale in pre/post-indexed variants
// as in the "unsigned offset" variant.
@ -632,17 +612,8 @@ static MachineOperand &getLdStRegOp(MachineInstr &MI,
bool IsPreLdSt = AArch64InstrInfo::isPreLdSt(MI);
if (IsPreLdSt)
PairedRegOp += 1;
unsigned Idx = isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0;
return MI.getOperand(Idx);
}
static const MachineOperand &getLdStBaseOp(const MachineInstr &MI) {
unsigned Idx = isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 2 : 1;
return MI.getOperand(Idx);
}
static const MachineOperand &getLdStOffsetOp(const MachineInstr &MI) {
unsigned Idx = isPairedLdSt(MI) || AArch64InstrInfo::isPreLdSt(MI) ? 3 : 2;
unsigned Idx =
AArch64InstrInfo::isPairedLdSt(MI) || IsPreLdSt ? PairedRegOp : 0;
return MI.getOperand(Idx);
}
@ -652,12 +623,14 @@ static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst,
assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
int LoadSize = TII->getMemScale(LoadInst);
int StoreSize = TII->getMemScale(StoreInst);
int UnscaledStOffset = TII->hasUnscaledLdStOffset(StoreInst)
? getLdStOffsetOp(StoreInst).getImm()
: getLdStOffsetOp(StoreInst).getImm() * StoreSize;
int UnscaledLdOffset = TII->hasUnscaledLdStOffset(LoadInst)
? getLdStOffsetOp(LoadInst).getImm()
: getLdStOffsetOp(LoadInst).getImm() * LoadSize;
int UnscaledStOffset =
TII->hasUnscaledLdStOffset(StoreInst)
? AArch64InstrInfo::getLdStOffsetOp(StoreInst).getImm()
: AArch64InstrInfo::getLdStOffsetOp(StoreInst).getImm() * StoreSize;
int UnscaledLdOffset =
TII->hasUnscaledLdStOffset(LoadInst)
? AArch64InstrInfo::getLdStOffsetOp(LoadInst).getImm()
: AArch64InstrInfo::getLdStOffsetOp(LoadInst).getImm() * LoadSize;
return (UnscaledStOffset <= UnscaledLdOffset) &&
(UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
}
@ -736,7 +709,7 @@ static bool isMergeableLdStUpdate(MachineInstr &MI) {
case AArch64::STPWi:
case AArch64::STPXi:
// Make sure this is a reg+imm (as opposed to an address reloc).
if (!getLdStOffsetOp(MI).isImm())
if (!AArch64InstrInfo::getLdStOffsetOp(MI).isImm())
return false;
return true;
@ -770,17 +743,18 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
// Also based on MergeForward is from where we copy the base register operand
// so we get the flags compatible with the input code.
const MachineOperand &BaseRegOp =
MergeForward ? getLdStBaseOp(*MergeMI) : getLdStBaseOp(*I);
MergeForward ? AArch64InstrInfo::getLdStBaseOp(*MergeMI)
: AArch64InstrInfo::getLdStBaseOp(*I);
// Which register is Rt and which is Rt2 depends on the offset order.
MachineInstr *RtMI;
if (getLdStOffsetOp(*I).getImm() ==
getLdStOffsetOp(*MergeMI).getImm() + OffsetStride)
if (AArch64InstrInfo::getLdStOffsetOp(*I).getImm() ==
AArch64InstrInfo::getLdStOffsetOp(*MergeMI).getImm() + OffsetStride)
RtMI = &*MergeMI;
else
RtMI = &*I;
int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
// Change the scaled offset from small to large type.
if (IsScaled) {
assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
@ -944,10 +918,11 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
// Also based on MergeForward is from where we copy the base register operand
// so we get the flags compatible with the input code.
const MachineOperand &BaseRegOp =
MergeForward ? getLdStBaseOp(*Paired) : getLdStBaseOp(*I);
MergeForward ? AArch64InstrInfo::getLdStBaseOp(*Paired)
: AArch64InstrInfo::getLdStBaseOp(*I);
int Offset = getLdStOffsetOp(*I).getImm();
int PairedOffset = getLdStOffsetOp(*Paired).getImm();
int Offset = AArch64InstrInfo::getLdStOffsetOp(*I).getImm();
int PairedOffset = AArch64InstrInfo::getLdStOffsetOp(*Paired).getImm();
bool PairedIsUnscaled = TII->hasUnscaledLdStOffset(Paired->getOpcode());
if (IsUnscaled != PairedIsUnscaled) {
// We're trying to pair instructions that differ in how they are scaled. If
@ -982,7 +957,7 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
RtMI = &*I;
Rt2MI = &*Paired;
}
int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
// Scale the immediate offset, if necessary.
if (TII->hasUnscaledLdStOffset(RtMI->getOpcode())) {
assert(!(OffsetImm % TII->getMemScale(*RtMI)) &&
@ -1140,12 +1115,14 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
assert(IsUnscaled == TII->hasUnscaledLdStOffset(*StoreI) &&
"Unsupported ld/st match");
assert(LoadSize <= StoreSize && "Invalid load size");
int UnscaledLdOffset = IsUnscaled
? getLdStOffsetOp(*LoadI).getImm()
: getLdStOffsetOp(*LoadI).getImm() * LoadSize;
int UnscaledStOffset = IsUnscaled
? getLdStOffsetOp(*StoreI).getImm()
: getLdStOffsetOp(*StoreI).getImm() * StoreSize;
int UnscaledLdOffset =
IsUnscaled
? AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm()
: AArch64InstrInfo::getLdStOffsetOp(*LoadI).getImm() * LoadSize;
int UnscaledStOffset =
IsUnscaled
? AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm()
: AArch64InstrInfo::getLdStOffsetOp(*StoreI).getImm() * StoreSize;
int Width = LoadSize * 8;
Register DestReg =
IsStoreXReg ? Register(TRI->getMatchingSuperReg(
@ -1243,7 +1220,7 @@ bool AArch64LoadStoreOpt::findMatchingStore(
MachineBasicBlock::iterator B = I->getParent()->begin();
MachineBasicBlock::iterator MBBI = I;
MachineInstr &LoadMI = *I;
Register BaseReg = getLdStBaseOp(LoadMI).getReg();
Register BaseReg = AArch64InstrInfo::getLdStBaseOp(LoadMI).getReg();
// If the load is the first instruction in the block, there's obviously
// not any matching store.
@ -1272,7 +1249,8 @@ bool AArch64LoadStoreOpt::findMatchingStore(
// Also we can't handle stores without an immediate offset operand,
// while the operand might be the address for a global variable.
if (MI.mayStore() && isMatchingStore(LoadMI, MI) &&
BaseReg == getLdStBaseOp(MI).getReg() && getLdStOffsetOp(MI).isImm() &&
BaseReg == AArch64InstrInfo::getLdStBaseOp(MI).getReg() &&
AArch64InstrInfo::getLdStOffsetOp(MI).isImm() &&
isLdOffsetInRangeOfSt(LoadMI, MI, TII) &&
ModifiedRegUnits.available(getLdStRegOp(MI).getReg())) {
StoreI = MBBI;
@ -1539,8 +1517,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
bool MayLoad = FirstMI.mayLoad();
bool IsUnscaled = TII->hasUnscaledLdStOffset(FirstMI);
Register Reg = getLdStRegOp(FirstMI).getReg();
Register BaseReg = getLdStBaseOp(FirstMI).getReg();
int Offset = getLdStOffsetOp(FirstMI).getImm();
Register BaseReg = AArch64InstrInfo::getLdStBaseOp(FirstMI).getReg();
int Offset = AArch64InstrInfo::getLdStOffsetOp(FirstMI).getImm();
int OffsetStride = IsUnscaled ? TII->getMemScale(FirstMI) : 1;
bool IsPromotableZeroStore = isPromotableZeroStoreInst(FirstMI);
@ -1575,7 +1553,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
Flags.setSExtIdx(-1);
if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
getLdStOffsetOp(MI).isImm()) {
AArch64InstrInfo::getLdStOffsetOp(MI).isImm()) {
assert(MI.mayLoadOrStore() && "Expected memory operation.");
// If we've found another instruction with the same opcode, check to see
// if the base and offset are compatible with our starting instruction.
@ -1583,8 +1561,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// check for +1/-1. Make sure to check the new instruction offset is
// actually an immediate and not a symbolic reference destined for
// a relocation.
Register MIBaseReg = getLdStBaseOp(MI).getReg();
int MIOffset = getLdStOffsetOp(MI).getImm();
Register MIBaseReg = AArch64InstrInfo::getLdStBaseOp(MI).getReg();
int MIOffset = AArch64InstrInfo::getLdStOffsetOp(MI).getImm();
bool MIIsUnscaled = TII->hasUnscaledLdStOffset(MI);
if (IsUnscaled != MIIsUnscaled) {
// We're trying to pair instructions that differ in how they are scaled.
@ -1615,15 +1593,16 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// can't be paired: bail and keep looking.
if (IsPreLdSt) {
bool IsOutOfBounds = MIOffset != TII->getMemScale(MI);
bool IsBaseRegUsed =
!UsedRegUnits.available(getLdStBaseOp(MI).getReg());
bool IsBaseRegModified =
!ModifiedRegUnits.available(getLdStBaseOp(MI).getReg());
bool IsBaseRegUsed = !UsedRegUnits.available(
AArch64InstrInfo::getLdStBaseOp(MI).getReg());
bool IsBaseRegModified = !ModifiedRegUnits.available(
AArch64InstrInfo::getLdStBaseOp(MI).getReg());
// If the stored value and the address of the second instruction is
// the same, it needs to be using the updated register and therefore
// it must not be folded.
bool IsMIRegTheSame = TRI->regsOverlap(getLdStRegOp(MI).getReg(),
getLdStBaseOp(MI).getReg());
bool IsMIRegTheSame =
TRI->regsOverlap(getLdStRegOp(MI).getReg(),
AArch64InstrInfo::getLdStBaseOp(MI).getReg());
if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
IsMIRegTheSame) {
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
@ -1776,7 +1755,7 @@ maybeMoveCFI(MachineInstr &MI, MachineBasicBlock::iterator MaybeCFI) {
MaybeCFI->getOpcode() != TargetOpcode::CFI_INSTRUCTION ||
!(MI.getFlag(MachineInstr::FrameSetup) ||
MI.getFlag(MachineInstr::FrameDestroy)) ||
getLdStBaseOp(MI).getReg() != AArch64::SP)
AArch64InstrInfo::getLdStBaseOp(MI).getReg() != AArch64::SP)
return End;
const MachineFunction &MF = *MI.getParent()->getParent();
@ -1823,12 +1802,12 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
MachineInstrBuilder MIB;
int Scale, MinOffset, MaxOffset;
getPrePostIndexedMemOpInfo(*I, Scale, MinOffset, MaxOffset);
if (!isPairedLdSt(*I)) {
if (!AArch64InstrInfo::isPairedLdSt(*I)) {
// Non-paired instruction.
MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
.add(getLdStRegOp(*Update))
.add(getLdStRegOp(*I))
.add(getLdStBaseOp(*I))
.add(AArch64InstrInfo::getLdStBaseOp(*I))
.addImm(Value / Scale)
.setMemRefs(I->memoperands())
.setMIFlags(I->mergeFlagsWith(*Update));
@ -1838,7 +1817,7 @@ AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
.add(getLdStRegOp(*Update))
.add(getLdStRegOp(*I, 0))
.add(getLdStRegOp(*I, 1))
.add(getLdStBaseOp(*I))
.add(AArch64InstrInfo::getLdStBaseOp(*I))
.addImm(Value / Scale)
.setMemRefs(I->memoperands())
.setMIFlags(I->mergeFlagsWith(*Update));
@ -1928,8 +1907,9 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
MachineInstr &MemMI = *I;
MachineBasicBlock::iterator MBBI = I;
Register BaseReg = getLdStBaseOp(MemMI).getReg();
int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * TII->getMemScale(MemMI);
Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg();
int MIUnscaledOffset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm() *
TII->getMemScale(MemMI);
// Scan forward looking for post-index opportunities. Updating instructions
// can't be formed if the memory instruction doesn't have the offset we're
@ -1944,7 +1924,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
// behavior in this case unlike normal stores, and always performs writeback
// after reading the source register value.
if (!isTagStore(MemMI) && MemMI.getOpcode() != AArch64::STGPi) {
bool IsPairedInsn = isPairedLdSt(MemMI);
bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
Register DestReg = getLdStRegOp(MemMI, i).getReg();
if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
@ -2005,8 +1985,8 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
MachineBasicBlock::iterator MBBI = I;
MachineFunction &MF = *MemMI.getMF();
Register BaseReg = getLdStBaseOp(MemMI).getReg();
int Offset = getLdStOffsetOp(MemMI).getImm();
Register BaseReg = AArch64InstrInfo::getLdStBaseOp(MemMI).getReg();
int Offset = AArch64InstrInfo::getLdStOffsetOp(MemMI).getImm();
// If the load/store is the first instruction in the block, there's obviously
// not any matching update. Ditto if the memory offset isn't zero.
@ -2015,7 +1995,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
// If the base register overlaps a destination register, we can't
// merge the update.
if (!isTagStore(MemMI)) {
bool IsPairedInsn = isPairedLdSt(MemMI);
bool IsPairedInsn = AArch64InstrInfo::isPairedLdSt(MemMI);
for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
Register DestReg = getLdStRegOp(MemMI, i).getReg();
if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
@ -2085,7 +2065,7 @@ bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
// Make sure this is a reg+imm.
// FIXME: It is possible to extend it to handle reg+reg cases.
if (!getLdStOffsetOp(MI).isImm())
if (!AArch64InstrInfo::getLdStOffsetOp(MI).isImm())
return false;
// Look backward up to LdStLimit instructions.
@ -2139,7 +2119,7 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
// range, plus allow an extra one in case we find a later insn that matches
// with Offset-1)
bool IsUnscaled = TII->hasUnscaledLdStOffset(MI);
int Offset = getLdStOffsetOp(MI).getImm();
int Offset = AArch64InstrInfo::getLdStOffsetOp(MI).getImm();
int OffsetStride = IsUnscaled ? TII->getMemScale(MI) : 1;
// Allow one more for offset.
if (Offset > 0)
@ -2206,7 +2186,8 @@ bool AArch64LoadStoreOpt::tryToMergeLdStUpdate
// The immediate in the load/store is scaled by the size of the memory
// operation. The immediate in the add we're looking for,
// however, is not, so adjust here.
int UnscaledOffset = getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI);
int UnscaledOffset =
AArch64InstrInfo::getLdStOffsetOp(MI).getImm() * TII->getMemScale(MI);
// Look forward to try to find a pre-index instruction. For example,
// ldr x1, [x0, #64]

View File

@ -7,10 +7,57 @@
//===----------------------------------------------------------------------===//
#include "AArch64MachineScheduler.h"
#include "AArch64InstrInfo.h"
#include "AArch64Subtarget.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
using namespace llvm;
static bool needReorderStoreMI(const MachineInstr *MI) {
if (!MI)
return false;
switch (MI->getOpcode()) {
default:
return false;
case AArch64::STURQi:
case AArch64::STRQui:
if (MI->getMF()->getSubtarget<AArch64Subtarget>().isStoreAddressAscend())
return false;
LLVM_FALLTHROUGH;
case AArch64::STPQi:
return AArch64InstrInfo::getLdStOffsetOp(*MI).getType() == MachineOperand::MO_Immediate;
}
return false;
}
// Return true if two stores with same base address may overlap writes
static bool mayOverlapWrite(const MachineInstr &MI0, const MachineInstr &MI1,
int64_t &Off0, int64_t &Off1) {
const MachineOperand &Base0 = AArch64InstrInfo::getLdStBaseOp(MI0);
const MachineOperand &Base1 = AArch64InstrInfo::getLdStBaseOp(MI1);
// May overlapping writes if two store instructions without same base
if (!Base0.isIdenticalTo(Base1))
return true;
int StoreSize0 = AArch64InstrInfo::getMemScale(MI0);
int StoreSize1 = AArch64InstrInfo::getMemScale(MI1);
Off0 = AArch64InstrInfo::hasUnscaledLdStOffset(MI0.getOpcode())
? AArch64InstrInfo::getLdStOffsetOp(MI0).getImm()
: AArch64InstrInfo::getLdStOffsetOp(MI0).getImm() * StoreSize0;
Off1 = AArch64InstrInfo::hasUnscaledLdStOffset(MI1.getOpcode())
? AArch64InstrInfo::getLdStOffsetOp(MI1).getImm()
: AArch64InstrInfo::getLdStOffsetOp(MI1).getImm() * StoreSize1;
const MachineInstr &MI = (Off0 < Off1) ? MI0 : MI1;
int Multiples = AArch64InstrInfo::isPairedLdSt(MI) ? 2 : 1;
int StoreSize = AArch64InstrInfo::getMemScale(MI) * Multiples;
return llabs(Off0 - Off1) < StoreSize;
}
bool AArch64PostRASchedStrategy::tryCandidate(SchedCandidate &Cand,
SchedCandidate &TryCand) {
bool OriginalResult = PostGenericScheduler::tryCandidate(Cand, TryCand);
@ -18,20 +65,16 @@ bool AArch64PostRASchedStrategy::tryCandidate(SchedCandidate &Cand,
if (Cand.isValid()) {
MachineInstr *Instr0 = TryCand.SU->getInstr();
MachineInstr *Instr1 = Cand.SU->getInstr();
// When dealing with two STPqi's.
if (Instr0 && Instr1 && Instr0->getOpcode() == Instr1->getOpcode () &&
Instr0->getOpcode() == AArch64::STPQi)
{
MachineOperand &Base0 = Instr0->getOperand(2);
MachineOperand &Base1 = Instr1->getOperand(2);
int64_t Off0 = Instr0->getOperand(3).getImm();
int64_t Off1 = Instr1->getOperand(3).getImm();
// With the same base address and non-overlapping writes.
if (Base0.isIdenticalTo(Base1) && llabs (Off0 - Off1) >= 2) {
TryCand.Reason = NodeOrder;
// Order them by ascending offsets.
return Off0 < Off1;
}
if (!needReorderStoreMI(Instr0) || !needReorderStoreMI(Instr1))
return OriginalResult;
int64_t Off0, Off1;
// With the same base address and non-overlapping writes.
if (!mayOverlapWrite(*Instr0, *Instr1, Off0, Off1)) {
TryCand.Reason = NodeOrder;
// Order them by ascending offsets.
return Off0 < Off1;
}
}