Avoid partial CPSR dependency from loop backedges. rdar://10357570

llvm-svn: 143145
This commit is contained in:
Evan Cheng 2011-10-27 21:21:05 +00:00
parent a054790390
commit f4807a19e8
2 changed files with 79 additions and 26 deletions

View File

@ -146,7 +146,8 @@ namespace {
/// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable. /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
DenseMap<unsigned, unsigned> ReduceOpcodeMap; DenseMap<unsigned, unsigned> ReduceOpcodeMap;
bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use); bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
bool IsSelfLoop);
bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
bool is2Addr, ARMCC::CondCodes Pred, bool is2Addr, ARMCC::CondCodes Pred,
@ -157,19 +158,21 @@ namespace {
bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry, bool LiveCPSR, const ReduceEntry &Entry, bool LiveCPSR,
MachineInstr *CPSRDef); MachineInstr *CPSRDef, bool IsSelfLoop);
/// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
/// instruction. /// instruction.
bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry, const ReduceEntry &Entry,
bool LiveCPSR, MachineInstr *CPSRDef); bool LiveCPSR, MachineInstr *CPSRDef,
bool IsSelfLoop);
/// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
/// non-two-address instruction. /// non-two-address instruction.
bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry, const ReduceEntry &Entry,
bool LiveCPSR, MachineInstr *CPSRDef); bool LiveCPSR, MachineInstr *CPSRDef,
bool IsSelfLoop);
/// ReduceMBB - Reduce width of instructions in the specified basic block. /// ReduceMBB - Reduce width of instructions in the specified basic block.
bool ReduceMBB(MachineBasicBlock &MBB); bool ReduceMBB(MachineBasicBlock &MBB);
@ -210,10 +213,17 @@ static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
/// In this case it would have been ok to narrow the mul.w to muls since there /// In this case it would have been ok to narrow the mul.w to muls since there
/// are indirect RAW dependency between the muls and the mul.w /// are indirect RAW dependency between the muls and the mul.w
bool bool
Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use) { Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
if (!Def || !STI->avoidCPSRPartialUpdate()) bool FirstInSelfLoop) {
// FIXME: Disable check for -Oz (aka OptimizeForSizeHarder).
if (!STI->avoidCPSRPartialUpdate())
return false; return false;
if (!Def)
// If this BB loops back to itself, conservatively avoid narrowing the
// first instruction that does partial flag update.
return FirstInSelfLoop;
SmallSet<unsigned, 2> Defs; SmallSet<unsigned, 2> Defs;
for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) { for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = Def->getOperand(i); const MachineOperand &MO = Def->getOperand(i);
@ -476,15 +486,16 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
bool bool
Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry, const ReduceEntry &Entry,
bool LiveCPSR, MachineInstr *CPSRDef) { bool LiveCPSR, MachineInstr *CPSRDef,
bool IsSelfLoop) {
unsigned Opc = MI->getOpcode(); unsigned Opc = MI->getOpcode();
if (Opc == ARM::t2ADDri) { if (Opc == ARM::t2ADDri) {
// If the source register is SP, try to reduce to tADDrSPi, otherwise // If the source register is SP, try to reduce to tADDrSPi, otherwise
// it's a normal reduce. // it's a normal reduce.
if (MI->getOperand(1).getReg() != ARM::SP) { if (MI->getOperand(1).getReg() != ARM::SP) {
if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
return true; return true;
return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
} }
// Try to reduce to tADDrSPi. // Try to reduce to tADDrSPi.
unsigned Imm = MI->getOperand(2).getImm(); unsigned Imm = MI->getOperand(2).getImm();
@ -535,12 +546,12 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
switch (Opc) { switch (Opc) {
default: break; default: break;
case ARM::t2ADDSri: { case ARM::t2ADDSri: {
if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
return true; return true;
// fallthrough // fallthrough
} }
case ARM::t2ADDSrr: case ARM::t2ADDSrr:
return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
} }
} }
break; break;
@ -552,13 +563,13 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
case ARM::t2UXTB: case ARM::t2UXTB:
case ARM::t2UXTH: case ARM::t2UXTH:
if (MI->getOperand(2).getImm() == 0) if (MI->getOperand(2).getImm() == 0)
return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
break; break;
case ARM::t2MOVi16: case ARM::t2MOVi16:
// Can convert only 'pure' immediate operands, not immediates obtained as // Can convert only 'pure' immediate operands, not immediates obtained as
// globals' addresses. // globals' addresses.
if (MI->getOperand(1).isImm()) if (MI->getOperand(1).isImm())
return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
break; break;
case ARM::t2CMPrr: { case ARM::t2CMPrr: {
// Try to reduce to the lo-reg only version first. Why there are two // Try to reduce to the lo-reg only version first. Why there are two
@ -568,9 +579,9 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
// source insn opcode. So for now, we hack a local entry record to use. // source insn opcode. So for now, we hack a local entry record to use.
static const ReduceEntry NarrowEntry = static const ReduceEntry NarrowEntry =
{ ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 }; { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 };
if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef)) if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef, IsSelfLoop))
return true; return true;
return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
} }
} }
return false; return false;
@ -579,7 +590,8 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
bool bool
Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry, const ReduceEntry &Entry,
bool LiveCPSR, MachineInstr *CPSRDef) { bool LiveCPSR, MachineInstr *CPSRDef,
bool IsSelfLoop) {
if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
return false; return false;
@ -637,7 +649,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
// Avoid adding a false dependency on partial flag update by some 16-bit // Avoid adding a false dependency on partial flag update by some 16-bit
// instructions which has the 's' bit set. // instructions which has the 's' bit set.
if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
canAddPseudoFlagDep(CPSRDef, MI)) canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop))
return false; return false;
// Add the 16-bit instruction. // Add the 16-bit instruction.
@ -674,7 +686,8 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
bool bool
Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
const ReduceEntry &Entry, const ReduceEntry &Entry,
bool LiveCPSR, MachineInstr *CPSRDef) { bool LiveCPSR, MachineInstr *CPSRDef,
bool IsSelfLoop) {
if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
return false; return false;
@ -727,7 +740,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
// Avoid adding a false dependency on partial flag update by some 16-bit // Avoid adding a false dependency on partial flag update by some 16-bit
// instructions which has the 's' bit set. // instructions which has the 's' bit set.
if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
canAddPseudoFlagDep(CPSRDef, MI)) canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop))
return false; return false;
// Add the 16-bit instruction. // Add the 16-bit instruction.
@ -818,6 +831,9 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
MachineInstr *CPSRDef = 0; MachineInstr *CPSRDef = 0;
// If this BB loops back to itself, conservatively avoid narrowing the
// first instruction that does partial flag update.
bool IsSelfLoop = MBB.isSuccessor(&MBB);
MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
MachineBasicBlock::iterator NextMII; MachineBasicBlock::iterator NextMII;
for (; MII != E; MII = NextMII) { for (; MII != E; MII = NextMII) {
@ -832,7 +848,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
const ReduceEntry &Entry = ReduceTable[OPI->second]; const ReduceEntry &Entry = ReduceTable[OPI->second];
// Ignore "special" cases for now. // Ignore "special" cases for now.
if (Entry.Special) { if (Entry.Special) {
if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef)) { if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
Modified = true; Modified = true;
MachineBasicBlock::iterator I = prior(NextMII); MachineBasicBlock::iterator I = prior(NextMII);
MI = &*I; MI = &*I;
@ -842,7 +858,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
// Try to transform to a 16-bit two-address instruction. // Try to transform to a 16-bit two-address instruction.
if (Entry.NarrowOpc2 && if (Entry.NarrowOpc2 &&
ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) { ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
Modified = true; Modified = true;
MachineBasicBlock::iterator I = prior(NextMII); MachineBasicBlock::iterator I = prior(NextMII);
MI = &*I; MI = &*I;
@ -851,7 +867,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
// Try to transform to a 16-bit non-two-address instruction. // Try to transform to a 16-bit non-two-address instruction.
if (Entry.NarrowOpc1 && if (Entry.NarrowOpc1 &&
ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef)) { ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
Modified = true; Modified = true;
MachineBasicBlock::iterator I = prior(NextMII); MachineBasicBlock::iterator I = prior(NextMII);
MI = &*I; MI = &*I;
@ -861,12 +877,15 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
ProcessNext: ProcessNext:
bool DefCPSR = false; bool DefCPSR = false;
LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
if (MI->getDesc().isCall()) if (MI->getDesc().isCall()) {
// Calls don't really set CPSR. // Calls don't really set CPSR.
CPSRDef = 0; CPSRDef = 0;
else if (DefCPSR) IsSelfLoop = false;
} else if (DefCPSR) {
// This is the last CPSR defining instruction. // This is the last CPSR defining instruction.
CPSRDef = MI; CPSRDef = MI;
IsSelfLoop = false;
}
} }
return Modified; return Modified;

View File

@ -3,9 +3,9 @@
; dependency) when it isn't dependent on last CPSR defining instruction. ; dependency) when it isn't dependent on last CPSR defining instruction.
; rdar://8928208 ; rdar://8928208
define i32 @t(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone { define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
entry: entry:
; CHECK: t: ; CHECK: t1:
; CHECK: muls [[REG:(r[0-9]+)]], r2, r3 ; CHECK: muls [[REG:(r[0-9]+)]], r2, r3
; CHECK-NEXT: mul [[REG2:(r[0-9]+)]], r0, r1 ; CHECK-NEXT: mul [[REG2:(r[0-9]+)]], r0, r1
; CHECK-NEXT: muls r0, [[REG2]], [[REG]] ; CHECK-NEXT: muls r0, [[REG2]], [[REG]]
@ -14,3 +14,37 @@ define i32 @t(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
%2 = mul nsw i32 %0, %1 %2 = mul nsw i32 %0, %1
ret i32 %2 ret i32 %2
} }
; Avoid partial CPSR dependency via loop backedge.
; rdar://10357570
define void @t2(i32* nocapture %ptr1, i32* %ptr2, i32 %c) nounwind {
entry:
; CHECK: t2:
%tobool7 = icmp eq i32* %ptr2, null
br i1 %tobool7, label %while.end, label %while.body
while.body:
; CHECK: while.body
; CHECK: mul r{{[0-9]+}}
; CHECK-NOT: muls
%ptr1.addr.09 = phi i32* [ %add.ptr, %while.body ], [ %ptr1, %entry ]
%ptr2.addr.08 = phi i32* [ %incdec.ptr, %while.body ], [ %ptr2, %entry ]
%0 = load i32* %ptr1.addr.09, align 4
%arrayidx1 = getelementptr inbounds i32* %ptr1.addr.09, i32 1
%1 = load i32* %arrayidx1, align 4
%arrayidx3 = getelementptr inbounds i32* %ptr1.addr.09, i32 2
%2 = load i32* %arrayidx3, align 4
%arrayidx4 = getelementptr inbounds i32* %ptr1.addr.09, i32 3
%3 = load i32* %arrayidx4, align 4
%add.ptr = getelementptr inbounds i32* %ptr1.addr.09, i32 4
%mul = mul i32 %1, %0
%mul5 = mul i32 %mul, %2
%mul6 = mul i32 %mul5, %3
store i32 %mul6, i32* %ptr2.addr.08, align 4
%incdec.ptr = getelementptr inbounds i32* %ptr2.addr.08, i32 -1
%tobool = icmp eq i32* %incdec.ptr, null
br i1 %tobool, label %while.end, label %while.body
while.end:
ret void
}