forked from OSchip/llvm-project
ARMLoadStoreOpt: Merge subs/adds into LDRD/STRD; Factor out common code
Re-apply of r241928 which had to be reverted because of the r241926 revert. This commit factors out common code from MergeBaseUpdateLoadStore() and MergeBaseUpdateLSMultiple() and introduces a new function MergeBaseUpdateLSDouble() which merges adds/subs preceding/following a strd/ldrd instruction into an strd/ldrd instruction with writeback where possible. Differential Revision: http://reviews.llvm.org/D10676 llvm-svn: 242743
This commit is contained in:
parent
e40d89ef9b
commit
a50d2203fa
|
@ -118,6 +118,7 @@ namespace {
|
|||
};
|
||||
SpecificBumpPtrAllocator<MergeCandidate> Allocator;
|
||||
SmallVector<const MergeCandidate*,4> Candidates;
|
||||
SmallVector<MachineInstr*,4> MergeBaseCandidates;
|
||||
|
||||
void moveLiveRegsBefore(const MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::const_iterator Before);
|
||||
|
@ -140,6 +141,7 @@ namespace {
|
|||
MachineBasicBlock::iterator &MBBI);
|
||||
bool MergeBaseUpdateLoadStore(MachineInstr *MI);
|
||||
bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
|
||||
bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
|
||||
bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
|
||||
bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
|
||||
};
|
||||
|
@ -997,76 +999,6 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
|
|||
} while (SIndex < EIndex);
|
||||
}
|
||||
|
||||
static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
|
||||
unsigned Bytes, unsigned Limit,
|
||||
ARMCC::CondCodes Pred, unsigned PredReg) {
|
||||
unsigned MyPredReg = 0;
|
||||
if (!MI)
|
||||
return false;
|
||||
|
||||
bool CheckCPSRDef = false;
|
||||
switch (MI->getOpcode()) {
|
||||
default: return false;
|
||||
case ARM::tSUBi8:
|
||||
case ARM::t2SUBri:
|
||||
case ARM::SUBri:
|
||||
CheckCPSRDef = true;
|
||||
break;
|
||||
case ARM::tSUBspi:
|
||||
break;
|
||||
}
|
||||
|
||||
// Make sure the offset fits in 8 bits.
|
||||
if (Bytes == 0 || (Limit && Bytes >= Limit))
|
||||
return false;
|
||||
|
||||
unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
|
||||
MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
|
||||
if (!(MI->getOperand(0).getReg() == Base &&
|
||||
MI->getOperand(1).getReg() == Base &&
|
||||
(MI->getOperand(2).getImm() * Scale) == Bytes &&
|
||||
getInstrPredicate(MI, MyPredReg) == Pred &&
|
||||
MyPredReg == PredReg))
|
||||
return false;
|
||||
|
||||
return CheckCPSRDef ? !definesCPSR(MI) : true;
|
||||
}
|
||||
|
||||
static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
|
||||
unsigned Bytes, unsigned Limit,
|
||||
ARMCC::CondCodes Pred, unsigned PredReg) {
|
||||
unsigned MyPredReg = 0;
|
||||
if (!MI)
|
||||
return false;
|
||||
|
||||
bool CheckCPSRDef = false;
|
||||
switch (MI->getOpcode()) {
|
||||
default: return false;
|
||||
case ARM::tADDi8:
|
||||
case ARM::t2ADDri:
|
||||
case ARM::ADDri:
|
||||
CheckCPSRDef = true;
|
||||
break;
|
||||
case ARM::tADDspi:
|
||||
break;
|
||||
}
|
||||
|
||||
if (Bytes == 0 || (Limit && Bytes >= Limit))
|
||||
// Make sure the offset fits in 8 bits.
|
||||
return false;
|
||||
|
||||
unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
|
||||
MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
|
||||
if (!(MI->getOperand(0).getReg() == Base &&
|
||||
MI->getOperand(1).getReg() == Base &&
|
||||
(MI->getOperand(2).getImm() * Scale) == Bytes &&
|
||||
getInstrPredicate(MI, MyPredReg) == Pred &&
|
||||
MyPredReg == PredReg))
|
||||
return false;
|
||||
|
||||
return CheckCPSRDef ? !definesCPSR(MI) : true;
|
||||
}
|
||||
|
||||
static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
|
||||
ARM_AM::AMSubMode Mode) {
|
||||
switch (Opc) {
|
||||
|
@ -1134,6 +1066,75 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
|
|||
}
|
||||
}
|
||||
|
||||
/// Check if the given instruction increments or decrements a register and
|
||||
/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
|
||||
/// generated by the instruction are possibly read as well.
|
||||
static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
|
||||
ARMCC::CondCodes Pred, unsigned PredReg) {
|
||||
bool CheckCPSRDef;
|
||||
int Scale;
|
||||
switch (MI.getOpcode()) {
|
||||
case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
|
||||
case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
|
||||
case ARM::t2SUBri:
|
||||
case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
|
||||
case ARM::t2ADDri:
|
||||
case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
|
||||
case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
|
||||
case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
|
||||
default: return 0;
|
||||
}
|
||||
|
||||
unsigned MIPredReg;
|
||||
if (MI.getOperand(0).getReg() != Reg ||
|
||||
MI.getOperand(1).getReg() != Reg ||
|
||||
getInstrPredicate(&MI, MIPredReg) != Pred ||
|
||||
MIPredReg != PredReg)
|
||||
return 0;
|
||||
|
||||
if (CheckCPSRDef && definesCPSR(&MI))
|
||||
return 0;
|
||||
return MI.getOperand(2).getImm() * Scale;
|
||||
}
|
||||
|
||||
/// Searches for an increment or decrement of \p Reg before \p MBBI.
|
||||
static MachineBasicBlock::iterator
|
||||
findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
|
||||
ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
|
||||
Offset = 0;
|
||||
MachineBasicBlock &MBB = *MBBI->getParent();
|
||||
MachineBasicBlock::iterator BeginMBBI = MBB.begin();
|
||||
MachineBasicBlock::iterator EndMBBI = MBB.end();
|
||||
if (MBBI == BeginMBBI)
|
||||
return EndMBBI;
|
||||
|
||||
// Skip debug values.
|
||||
MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
|
||||
while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)
|
||||
--PrevMBBI;
|
||||
|
||||
Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
|
||||
return Offset == 0 ? EndMBBI : PrevMBBI;
|
||||
}
|
||||
|
||||
/// Searches for a increment or decrement of \p Reg after \p MBBI.
|
||||
static MachineBasicBlock::iterator
|
||||
findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
|
||||
ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
|
||||
Offset = 0;
|
||||
MachineBasicBlock &MBB = *MBBI->getParent();
|
||||
MachineBasicBlock::iterator EndMBBI = MBB.end();
|
||||
MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
|
||||
// Skip debug values.
|
||||
while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
|
||||
++NextMBBI;
|
||||
if (NextMBBI == EndMBBI)
|
||||
return EndMBBI;
|
||||
|
||||
Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
|
||||
return Offset == 0 ? EndMBBI : NextMBBI;
|
||||
}
|
||||
|
||||
/// Fold proceeding/trailing inc/dec of base register into the
|
||||
/// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
|
||||
///
|
||||
|
@ -1153,7 +1154,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
|
|||
const MachineOperand &BaseOP = MI->getOperand(0);
|
||||
unsigned Base = BaseOP.getReg();
|
||||
bool BaseKill = BaseOP.isKill();
|
||||
unsigned Bytes = getLSMultipleTransferSize(MI);
|
||||
unsigned PredReg = 0;
|
||||
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
|
||||
unsigned Opcode = MI->getOpcode();
|
||||
|
@ -1165,49 +1165,24 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
|
|||
if (MI->getOperand(i).getReg() == Base)
|
||||
return false;
|
||||
|
||||
bool DoMerge = false;
|
||||
ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
|
||||
|
||||
// Try merging with the previous instruction.
|
||||
int Bytes = getLSMultipleTransferSize(MI);
|
||||
MachineBasicBlock &MBB = *MI->getParent();
|
||||
MachineBasicBlock::iterator BeginMBBI = MBB.begin();
|
||||
MachineBasicBlock::iterator MBBI(MI);
|
||||
if (MBBI != BeginMBBI) {
|
||||
MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
|
||||
while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
|
||||
--PrevMBBI;
|
||||
if (Mode == ARM_AM::ia &&
|
||||
isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
|
||||
Mode = ARM_AM::db;
|
||||
DoMerge = true;
|
||||
} else if (Mode == ARM_AM::ib &&
|
||||
isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
|
||||
Mode = ARM_AM::da;
|
||||
DoMerge = true;
|
||||
}
|
||||
if (DoMerge)
|
||||
MBB.erase(PrevMBBI);
|
||||
int Offset;
|
||||
MachineBasicBlock::iterator MergeInstr
|
||||
= findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
|
||||
ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
|
||||
if (Mode == ARM_AM::ia && Offset == -Bytes) {
|
||||
Mode = ARM_AM::db;
|
||||
} else if (Mode == ARM_AM::ib && Offset == -Bytes) {
|
||||
Mode = ARM_AM::da;
|
||||
} else {
|
||||
MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
|
||||
if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
|
||||
((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
|
||||
return false;
|
||||
}
|
||||
|
||||
// Try merging with the next instruction.
|
||||
MachineBasicBlock::iterator EndMBBI = MBB.end();
|
||||
if (!DoMerge && MBBI != EndMBBI) {
|
||||
MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
|
||||
while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
|
||||
++NextMBBI;
|
||||
if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
|
||||
isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
|
||||
DoMerge = true;
|
||||
} else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
|
||||
isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
|
||||
DoMerge = true;
|
||||
}
|
||||
if (DoMerge)
|
||||
MBB.erase(NextMBBI);
|
||||
}
|
||||
|
||||
if (!DoMerge)
|
||||
return false;
|
||||
MBB.erase(MergeInstr);
|
||||
|
||||
unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
|
||||
|
@ -1285,7 +1260,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
|
|||
|
||||
unsigned Base = getLoadStoreBaseOp(*MI).getReg();
|
||||
bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
|
||||
unsigned Bytes = getLSMultipleTransferSize(MI);
|
||||
unsigned Opcode = MI->getOpcode();
|
||||
DebugLoc DL = MI->getDebugLoc();
|
||||
bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
|
||||
|
@ -1297,7 +1271,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
|
|||
if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
|
||||
return false;
|
||||
|
||||
bool isLd = isLoadSingle(Opcode);
|
||||
// Can't do the merge if the destination register is the same as the would-be
|
||||
// writeback register.
|
||||
if (MI->getOperand(0).getReg() == Base)
|
||||
|
@ -1305,55 +1278,31 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
|
|||
|
||||
unsigned PredReg = 0;
|
||||
ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
|
||||
bool DoMerge = false;
|
||||
ARM_AM::AddrOpc AddSub = ARM_AM::add;
|
||||
unsigned NewOpc = 0;
|
||||
// AM2 - 12 bits, thumb2 - 8 bits.
|
||||
unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
|
||||
|
||||
// Try merging with the previous instruction.
|
||||
int Bytes = getLSMultipleTransferSize(MI);
|
||||
MachineBasicBlock &MBB = *MI->getParent();
|
||||
MachineBasicBlock::iterator BeginMBBI = MBB.begin();
|
||||
MachineBasicBlock::iterator MBBI(MI);
|
||||
if (MBBI != BeginMBBI) {
|
||||
MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
|
||||
while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
|
||||
--PrevMBBI;
|
||||
if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
|
||||
DoMerge = true;
|
||||
AddSub = ARM_AM::sub;
|
||||
} else if (!isAM5 &&
|
||||
isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
|
||||
DoMerge = true;
|
||||
}
|
||||
if (DoMerge) {
|
||||
NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
|
||||
MBB.erase(PrevMBBI);
|
||||
}
|
||||
int Offset;
|
||||
MachineBasicBlock::iterator MergeInstr
|
||||
= findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
|
||||
unsigned NewOpc;
|
||||
if (!isAM5 && Offset == Bytes) {
|
||||
NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
|
||||
} else if (Offset == -Bytes) {
|
||||
NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
|
||||
} else {
|
||||
MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
|
||||
if (Offset == Bytes) {
|
||||
NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
|
||||
} else if (!isAM5 && Offset == -Bytes) {
|
||||
NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
|
||||
} else
|
||||
return false;
|
||||
}
|
||||
MBB.erase(MergeInstr);
|
||||
|
||||
// Try merging with the next instruction.
|
||||
MachineBasicBlock::iterator EndMBBI = MBB.end();
|
||||
if (!DoMerge && MBBI != EndMBBI) {
|
||||
MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
|
||||
while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
|
||||
++NextMBBI;
|
||||
if (!isAM5 &&
|
||||
isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
|
||||
DoMerge = true;
|
||||
AddSub = ARM_AM::sub;
|
||||
} else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
|
||||
DoMerge = true;
|
||||
}
|
||||
if (DoMerge) {
|
||||
NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
|
||||
MBB.erase(NextMBBI);
|
||||
}
|
||||
}
|
||||
|
||||
if (!DoMerge)
|
||||
return false;
|
||||
ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
|
||||
|
||||
bool isLd = isLoadSingle(Opcode);
|
||||
if (isAM5) {
|
||||
// VLDM[SD]_UPD, VSTM[SD]_UPD
|
||||
// (There are no base-updating versions of VLDR/VSTR instructions, but the
|
||||
|
@ -1370,18 +1319,16 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
|
|||
if (isAM2) {
|
||||
// LDR_PRE, LDR_POST
|
||||
if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
|
||||
int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
|
||||
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
|
||||
.addReg(Base, RegState::Define)
|
||||
.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
|
||||
} else {
|
||||
int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
|
||||
int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
|
||||
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
|
||||
.addReg(Base, RegState::Define)
|
||||
.addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
|
||||
.addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
|
||||
}
|
||||
} else {
|
||||
int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
|
||||
// t2LDR_PRE, t2LDR_POST
|
||||
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
|
||||
.addReg(Base, RegState::Define)
|
||||
|
@ -1393,13 +1340,12 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
|
|||
// the vestigal zero-reg offset register. When that's fixed, this clause
|
||||
// can be removed entirely.
|
||||
if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
|
||||
int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
|
||||
int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
|
||||
// STR_PRE, STR_POST
|
||||
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
|
||||
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
|
||||
.addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
|
||||
.addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
|
||||
} else {
|
||||
int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
|
||||
// t2STR_PRE, t2STR_POST
|
||||
BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
|
||||
.addReg(MO.getReg(), getKillRegState(MO.isKill()))
|
||||
|
@ -1411,6 +1357,66 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
|
||||
unsigned Opcode = MI.getOpcode();
|
||||
assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
|
||||
"Must have t2STRDi8 or t2LDRDi8");
|
||||
if (MI.getOperand(3).getImm() != 0)
|
||||
return false;
|
||||
|
||||
// Behaviour for writeback is undefined if base register is the same as one
|
||||
// of the others.
|
||||
const MachineOperand &BaseOp = MI.getOperand(2);
|
||||
unsigned Base = BaseOp.getReg();
|
||||
const MachineOperand &Reg0Op = MI.getOperand(0);
|
||||
const MachineOperand &Reg1Op = MI.getOperand(1);
|
||||
if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
|
||||
return false;
|
||||
|
||||
unsigned PredReg;
|
||||
ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
|
||||
MachineBasicBlock::iterator MBBI(MI);
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
int Offset;
|
||||
MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
|
||||
PredReg, Offset);
|
||||
unsigned NewOpc;
|
||||
if (Offset == 8 || Offset == -8) {
|
||||
NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
|
||||
} else {
|
||||
MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
|
||||
if (Offset == 8 || Offset == -8) {
|
||||
NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
|
||||
} else
|
||||
return false;
|
||||
}
|
||||
MBB.erase(MergeInstr);
|
||||
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
|
||||
if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
|
||||
MIB.addOperand(Reg0Op).addOperand(Reg1Op)
|
||||
.addReg(BaseOp.getReg(), RegState::Define);
|
||||
} else {
|
||||
assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
|
||||
MIB.addReg(BaseOp.getReg(), RegState::Define)
|
||||
.addOperand(Reg0Op).addOperand(Reg1Op);
|
||||
}
|
||||
MIB.addReg(BaseOp.getReg(), RegState::Kill)
|
||||
.addImm(Offset).addImm(Pred).addReg(PredReg);
|
||||
assert(TII->get(Opcode).getNumOperands() == 6 &&
|
||||
TII->get(NewOpc).getNumOperands() == 7 &&
|
||||
"Unexpected number of operands in Opcode specification.");
|
||||
|
||||
// Transfer implicit operands.
|
||||
for (const MachineOperand &MO : MI.implicit_operands())
|
||||
MIB.addOperand(MO);
|
||||
MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
|
||||
|
||||
MBB.erase(MBBI);
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Returns true if instruction is a memory operation that this pass is capable
|
||||
/// of operating on.
|
||||
static bool isMemoryOp(const MachineInstr *MI) {
|
||||
|
@ -1618,6 +1624,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
|
|||
ARMCC::CondCodes CurrPred = ARMCC::AL;
|
||||
unsigned Position = 0;
|
||||
assert(Candidates.size() == 0);
|
||||
assert(MergeBaseCandidates.size() == 0);
|
||||
LiveRegsValid = false;
|
||||
|
||||
for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
|
||||
|
@ -1696,8 +1703,15 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
|
|||
MBBI = I;
|
||||
--Position;
|
||||
// Fallthrough to look into existing chain.
|
||||
} else if (MBBI->isDebugValue())
|
||||
} else if (MBBI->isDebugValue()) {
|
||||
continue;
|
||||
} else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
|
||||
MBBI->getOpcode() == ARM::t2STRDi8) {
|
||||
// ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
|
||||
// remember them because we may still be able to merge add/sub into them.
|
||||
MergeBaseCandidates.push_back(MBBI);
|
||||
}
|
||||
|
||||
|
||||
// If we are here then the chain is broken; Extract candidates for a merge.
|
||||
if (MemOps.size() > 0) {
|
||||
|
@ -1728,7 +1742,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
|
|||
if (Merged) {
|
||||
Changed = true;
|
||||
unsigned Opcode = Merged->getOpcode();
|
||||
if (Opcode != ARM::t2STRDi8 && Opcode != ARM::t2LDRDi8)
|
||||
if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
|
||||
MergeBaseUpdateLSDouble(*Merged);
|
||||
else
|
||||
MergeBaseUpdateLSMultiple(Merged);
|
||||
} else {
|
||||
for (MachineInstr *MI : Candidate->Instrs) {
|
||||
|
@ -1743,6 +1759,10 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
|
|||
}
|
||||
}
|
||||
Candidates.clear();
|
||||
// Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
|
||||
for (MachineInstr *MI : MergeBaseCandidates)
|
||||
MergeBaseUpdateLSDouble(*MI);
|
||||
MergeBaseCandidates.clear();
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
|
|
@ -112,10 +112,10 @@ entry:
|
|||
}
|
||||
|
||||
; CHECK-LABEL: strd_spill_ldrd_reload:
|
||||
; A8: strd r1, r0, [sp]
|
||||
; M3: strd r1, r0, [sp]
|
||||
; BASIC: strd r1, r0, [sp]
|
||||
; GREEDY: strd r0, r1, [sp]
|
||||
; A8: strd r1, r0, [sp, #-8]!
|
||||
; M3: strd r1, r0, [sp, #-8]!
|
||||
; BASIC: strd r1, r0, [sp, #-8]!
|
||||
; GREEDY: strd r0, r1, [sp, #-8]!
|
||||
; CHECK: @ InlineAsm Start
|
||||
; CHECK: @ InlineAsm End
|
||||
; A8: ldrd r2, r1, [sp]
|
||||
|
@ -131,5 +131,53 @@ define void @strd_spill_ldrd_reload(i32 %v0, i32 %v1) {
|
|||
ret void
|
||||
}
|
||||
|
||||
declare void @extfunc2(i32*, i32, i32)
|
||||
|
||||
; CHECK-LABEL: ldrd_postupdate_dec:
|
||||
; CHECK: ldrd r1, r2, [r0], #-8
|
||||
; CHECK-NEXT: bl{{x?}} _extfunc
|
||||
define void @ldrd_postupdate_dec(i32* %p0) {
|
||||
%p0.1 = getelementptr i32, i32* %p0, i32 1
|
||||
%v0 = load i32, i32* %p0
|
||||
%v1 = load i32, i32* %p0.1
|
||||
%p1 = getelementptr i32, i32* %p0, i32 -2
|
||||
call void @extfunc2(i32* %p1, i32 %v0, i32 %v1)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: ldrd_postupdate_inc:
|
||||
; CHECK: ldrd r1, r2, [r0], #8
|
||||
; CHECK-NEXT: bl{{x?}} _extfunc
|
||||
define void @ldrd_postupdate_inc(i32* %p0) {
|
||||
%p0.1 = getelementptr i32, i32* %p0, i32 1
|
||||
%v0 = load i32, i32* %p0
|
||||
%v1 = load i32, i32* %p0.1
|
||||
%p1 = getelementptr i32, i32* %p0, i32 2
|
||||
call void @extfunc2(i32* %p1, i32 %v0, i32 %v1)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: strd_postupdate_dec:
|
||||
; CHECK: strd r1, r2, [r0], #-8
|
||||
; CHECK-NEXT: bx lr
|
||||
define i32* @strd_postupdate_dec(i32* %p0, i32 %v0, i32 %v1) {
|
||||
%p0.1 = getelementptr i32, i32* %p0, i32 1
|
||||
store i32 %v0, i32* %p0
|
||||
store i32 %v1, i32* %p0.1
|
||||
%p1 = getelementptr i32, i32* %p0, i32 -2
|
||||
ret i32* %p1
|
||||
}
|
||||
|
||||
; CHECK-LABEL: strd_postupdate_inc:
|
||||
; CHECK: strd r1, r2, [r0], #8
|
||||
; CHECK-NEXT: bx lr
|
||||
define i32* @strd_postupdate_inc(i32* %p0, i32 %v0, i32 %v1) {
|
||||
%p0.1 = getelementptr i32, i32* %p0, i32 1
|
||||
store i32 %v0, i32* %p0
|
||||
store i32 %v1, i32* %p0.1
|
||||
%p1 = getelementptr i32, i32* %p0, i32 2
|
||||
ret i32* %p1
|
||||
}
|
||||
|
||||
declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
|
||||
declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
|
||||
|
|
Loading…
Reference in New Issue