[ARM][LowOverheadLoops] Use iterator for InsertPt.

Use a MachineBasicBlock::iterator instead of a MachineInstr* for the
position of our LoopStart instruction. NFCish, as it change debug
info.
This commit is contained in:
Sam Parker 2020-09-30 15:15:42 +01:00
parent 1e8fbb3b74
commit dfa2c14b8f
2 changed files with 60 additions and 45 deletions

View File

@ -355,7 +355,8 @@ namespace {
const TargetRegisterInfo &TRI; const TargetRegisterInfo &TRI;
const ARMBaseInstrInfo &TII; const ARMBaseInstrInfo &TII;
MachineFunction *MF = nullptr; MachineFunction *MF = nullptr;
MachineInstr *InsertPt = nullptr; MachineBasicBlock::iterator StartInsertPt;
MachineBasicBlock *StartInsertBB = nullptr;
MachineInstr *Start = nullptr; MachineInstr *Start = nullptr;
MachineInstr *Dec = nullptr; MachineInstr *Dec = nullptr;
MachineInstr *End = nullptr; MachineInstr *End = nullptr;
@ -402,7 +403,7 @@ namespace {
// Check that the predication in the loop will be equivalent once we // Check that the predication in the loop will be equivalent once we
// perform the conversion. Also ensure that we can provide the number // perform the conversion. Also ensure that we can provide the number
// of elements to the loop start instruction. // of elements to the loop start instruction.
bool ValidateTailPredicate(MachineInstr *StartInsertPt); bool ValidateTailPredicate();
// Check that any values available outside of the loop will be the same // Check that any values available outside of the loop will be the same
// after tail predication conversion. // after tail predication conversion.
@ -585,10 +586,7 @@ static bool TryRemove(MachineInstr *MI, ReachingDefAnalysis &RDA,
return false; return false;
} }
bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) { bool LowOverheadLoop::ValidateTailPredicate() {
if (!StartInsertPt)
return false;
if (!IsTailPredicationLegal()) { if (!IsTailPredicationLegal()) {
LLVM_DEBUG(if (VCTPs.empty()) LLVM_DEBUG(if (VCTPs.empty())
dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n"; dbgs() << "ARM Loops: Didn't find a VCTP instruction.\n";
@ -632,19 +630,19 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
// The element count register maybe defined after InsertPt, in which case we // The element count register maybe defined after InsertPt, in which case we
// need to try to move either InsertPt or the def so that the [w|d]lstp can // need to try to move either InsertPt or the def so that the [w|d]lstp can
// use the value. // use the value.
MachineBasicBlock *InsertBB = StartInsertPt->getParent();
if (!RDA.isReachingDefLiveOut(StartInsertPt, NumElements)) { if (StartInsertPt != StartInsertBB->end() &&
if (auto *ElemDef = RDA.getLocalLiveOutMIDef(InsertBB, NumElements)) { !RDA.isReachingDefLiveOut(&*StartInsertPt, NumElements)) {
if (RDA.isSafeToMoveForwards(ElemDef, StartInsertPt)) { if (auto *ElemDef = RDA.getLocalLiveOutMIDef(StartInsertBB, NumElements)) {
if (RDA.isSafeToMoveForwards(ElemDef, &*StartInsertPt)) {
ElemDef->removeFromParent(); ElemDef->removeFromParent();
InsertBB->insert(MachineBasicBlock::iterator(StartInsertPt), ElemDef); StartInsertBB->insert(StartInsertPt, ElemDef);
LLVM_DEBUG(dbgs() << "ARM Loops: Moved element count def: " LLVM_DEBUG(dbgs() << "ARM Loops: Moved element count def: "
<< *ElemDef); << *ElemDef);
} else if (RDA.isSafeToMoveBackwards(StartInsertPt, ElemDef)) { } else if (RDA.isSafeToMoveBackwards(&*StartInsertPt, ElemDef)) {
StartInsertPt->removeFromParent(); StartInsertPt->removeFromParent();
InsertBB->insertAfter(MachineBasicBlock::iterator(ElemDef), StartInsertBB->insertAfter(MachineBasicBlock::iterator(ElemDef),
StartInsertPt); &*StartInsertPt);
LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef); LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef);
} else { } else {
// If we fail to move an instruction and the element count is provided // If we fail to move an instruction and the element count is provided
@ -653,7 +651,7 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
MachineOperand Operand = ElemDef->getOperand(1); MachineOperand Operand = ElemDef->getOperand(1);
if (isMovRegOpcode(ElemDef->getOpcode()) && if (isMovRegOpcode(ElemDef->getOpcode()) &&
RDA.getUniqueReachingMIDef(ElemDef, Operand.getReg()) == RDA.getUniqueReachingMIDef(ElemDef, Operand.getReg()) ==
RDA.getUniqueReachingMIDef(StartInsertPt, Operand.getReg())) { RDA.getUniqueReachingMIDef(&*StartInsertPt, Operand.getReg())) {
TPNumElements = Operand; TPNumElements = Operand;
NumElements = TPNumElements.getReg(); NumElements = TPNumElements.getReg();
} else { } else {
@ -683,7 +681,7 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
return false; return false;
}; };
if (CannotInsertWDLSTPBetween(StartInsertPt, InsertBB->end())) if (CannotInsertWDLSTPBetween(StartInsertPt, StartInsertBB->end()))
return false; return false;
// Especially in the case of while loops, InsertBB may not be the // Especially in the case of while loops, InsertBB may not be the
@ -704,7 +702,7 @@ bool LowOverheadLoop::ValidateTailPredicate(MachineInstr *StartInsertPt) {
// Search backwards for a def, until we get to InsertBB. // Search backwards for a def, until we get to InsertBB.
MachineBasicBlock *MBB = Preheader; MachineBasicBlock *MBB = Preheader;
while (MBB && MBB != InsertBB) { while (MBB && MBB != StartInsertBB) {
if (CannotProvideElements(MBB, NumElements)) { if (CannotProvideElements(MBB, NumElements)) {
LLVM_DEBUG(dbgs() << "ARM Loops: Unable to provide element count.\n"); LLVM_DEBUG(dbgs() << "ARM Loops: Unable to provide element count.\n");
return false; return false;
@ -1017,10 +1015,15 @@ void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) {
// Find a suitable position to insert the loop start instruction. It needs to // Find a suitable position to insert the loop start instruction. It needs to
// be able to safely define LR. // be able to safely define LR.
auto FindStartInsertionPoint = [](MachineInstr *Start, auto FindStartInsertionPoint = [](MachineInstr *Start,
ReachingDefAnalysis &RDA) -> MachineInstr* { MachineBasicBlock::iterator &InsertPt,
MachineBasicBlock *&InsertBB,
ReachingDefAnalysis &RDA) {
// We can define LR because LR already contains the same value. // We can define LR because LR already contains the same value.
if (Start->getOperand(0).getReg() == ARM::LR) if (Start->getOperand(0).getReg() == ARM::LR) {
return Start; InsertPt = MachineBasicBlock::iterator(Start);
InsertBB = Start->getParent();
return true;
}
unsigned CountReg = Start->getOperand(0).getReg(); unsigned CountReg = Start->getOperand(0).getReg();
auto IsMoveLR = [&CountReg](MachineInstr *MI) { auto IsMoveLR = [&CountReg](MachineInstr *MI) {
@ -1035,32 +1038,41 @@ void LowOverheadLoop::Validate(ARMBasicBlockUtils *BBUtils) {
// Find an insertion point: // Find an insertion point:
// - Is there a (mov lr, Count) before Start? If so, and nothing else // - Is there a (mov lr, Count) before Start? If so, and nothing else
// writes to Count before Start, we can insert at that mov. // writes to Count before Start, we can insert at that mov.
if (auto *LRDef = RDA.getUniqueReachingMIDef(Start, ARM::LR)) if (auto *LRDef = RDA.getUniqueReachingMIDef(Start, ARM::LR)) {
if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg)) if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg)) {
return LRDef; InsertPt = MachineBasicBlock::iterator(LRDef);
InsertBB = LRDef->getParent();
return true;
}
}
// - Is there a (mov lr, Count) after Start? If so, and nothing else writes // - Is there a (mov lr, Count) after Start? If so, and nothing else writes
// to Count after Start, we can insert at that mov. // to Count after Start, we can insert at that mov.
if (auto *LRDef = RDA.getLocalLiveOutMIDef(MBB, ARM::LR)) if (auto *LRDef = RDA.getLocalLiveOutMIDef(MBB, ARM::LR)) {
if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg)) if (IsMoveLR(LRDef) && RDA.hasSameReachingDef(Start, LRDef, CountReg)) {
return LRDef; InsertPt = MachineBasicBlock::iterator(LRDef);
InsertBB = LRDef->getParent();
return true;
}
}
// We've found no suitable LR def and Start doesn't use LR directly. Can we // We've found no suitable LR def and Start doesn't use LR directly. Can we
// just define LR anyway? // just define LR anyway?
return RDA.isSafeToDefRegAt(Start, ARM::LR) ? Start : nullptr; if (!RDA.isSafeToDefRegAt(Start, ARM::LR))
return false;
InsertPt = MachineBasicBlock::iterator(Start);
InsertBB = Start->getParent();
return true;
}; };
InsertPt = FindStartInsertionPoint(Start, RDA); if (!FindStartInsertionPoint(Start, StartInsertPt, StartInsertBB, RDA)) {
Revert = !ValidateRanges(Start, End, BBUtils, ML) || !InsertPt; LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n");
CannotTailPredicate = !ValidateTailPredicate(InsertPt); Revert = true;
return;
LLVM_DEBUG(if (!InsertPt) }
dbgs() << "ARM Loops: Unable to find safe insertion point.\n"; Revert = !ValidateRanges(Start, End, BBUtils, ML);
else CannotTailPredicate = !ValidateTailPredicate();
dbgs() << "ARM Loops: Start insertion point: " << *InsertPt;
if (CannotTailPredicate)
dbgs() << "ARM Loops: Couldn't validate tail predicate.\n"
);
} }
bool LowOverheadLoop::AddVCTP(MachineInstr *MI) { bool LowOverheadLoop::AddVCTP(MachineInstr *MI) {
@ -1398,7 +1410,10 @@ void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
// Collect and remove the users of iteration count. // Collect and remove the users of iteration count.
SmallPtrSet<MachineInstr*, 4> Killed = { LoLoop.Start, LoLoop.Dec, SmallPtrSet<MachineInstr*, 4> Killed = { LoLoop.Start, LoLoop.Dec,
LoLoop.End, LoLoop.InsertPt }; LoLoop.End };
if (LoLoop.StartInsertPt != LoLoop.StartInsertBB->end())
Killed.insert(&*LoLoop.StartInsertPt);
if (!TryRemove(Def, *RDA, LoLoop.ToRemove, Killed)) if (!TryRemove(Def, *RDA, LoLoop.ToRemove, Killed))
LLVM_DEBUG(dbgs() << "ARM Loops: Unsafe to remove loop iteration count.\n"); LLVM_DEBUG(dbgs() << "ARM Loops: Unsafe to remove loop iteration count.\n");
} }
@ -1409,15 +1424,15 @@ MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
// calculate the number of loop iterations. // calculate the number of loop iterations.
IterationCountDCE(LoLoop); IterationCountDCE(LoLoop);
MachineInstr *InsertPt = LoLoop.InsertPt; MachineBasicBlock::iterator InsertPt = LoLoop.StartInsertPt;
MachineInstr *Start = LoLoop.Start; MachineInstr *Start = LoLoop.Start;
MachineBasicBlock *MBB = InsertPt->getParent(); MachineBasicBlock *MBB = LoLoop.StartInsertBB;
bool IsDo = Start->getOpcode() == ARM::t2DoLoopStart; bool IsDo = Start->getOpcode() == ARM::t2DoLoopStart;
unsigned Opc = LoLoop.getStartOpcode(); unsigned Opc = LoLoop.getStartOpcode();
MachineOperand &Count = LoLoop.getLoopStartOperand(); MachineOperand &Count = LoLoop.getLoopStartOperand();
MachineInstrBuilder MIB = MachineInstrBuilder MIB =
BuildMI(*MBB, InsertPt, InsertPt->getDebugLoc(), TII->get(Opc)); BuildMI(*MBB, InsertPt, Start->getDebugLoc(), TII->get(Opc));
MIB.addDef(ARM::LR); MIB.addDef(ARM::LR);
MIB.add(Count); MIB.add(Count);
@ -1425,8 +1440,8 @@ MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
MIB.add(Start->getOperand(1)); MIB.add(Start->getOperand(1));
// If we're inserting at a mov lr, then remove it as it's redundant. // If we're inserting at a mov lr, then remove it as it's redundant.
if (InsertPt != Start) if (InsertPt != MBB->end())
LoLoop.ToRemove.insert(InsertPt); LoLoop.ToRemove.insert(&*InsertPt);
LoLoop.ToRemove.insert(Start); LoLoop.ToRemove.insert(Start);
LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB); LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB);
return &*MIB; return &*MIB;

View File

@ -249,7 +249,7 @@ body: |
; CHECK: renamable $r2 = t2LDRs renamable $r9, renamable $r1, 2, 14 /* CC::al */, $noreg, debug-location !41 :: (load 4 from %ir.arrayidx7.us) ; CHECK: renamable $r2 = t2LDRs renamable $r9, renamable $r1, 2, 14 /* CC::al */, $noreg, debug-location !41 :: (load 4 from %ir.arrayidx7.us)
; CHECK: $r3 = tMOVr $r5, 14 /* CC::al */, $noreg, debug-location !32 ; CHECK: $r3 = tMOVr $r5, 14 /* CC::al */, $noreg, debug-location !32
; CHECK: $r0 = tMOVr $r8, 14 /* CC::al */, $noreg, debug-location !32 ; CHECK: $r0 = tMOVr $r8, 14 /* CC::al */, $noreg, debug-location !32
; CHECK: $lr = t2DLS renamable $r10, debug-location !32 ; CHECK: $lr = t2DLS renamable $r10, debug-location !42
; CHECK: bb.3.for.body3.us: ; CHECK: bb.3.for.body3.us:
; CHECK: successors: %bb.3(0x7c000000), %bb.4(0x04000000) ; CHECK: successors: %bb.3(0x7c000000), %bb.4(0x04000000)
; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r5, $r8, $r9, $r10, $r12 ; CHECK: liveins: $lr, $r0, $r1, $r2, $r3, $r5, $r8, $r9, $r10, $r12