Recommit r129383. PreRA scheduler heuristic fixes: VRegCycle, TokenFactor latency.

Additional fixes:
Do something reasonable for subtargets with generic
itineraries by handle node latency the same as for an empty
itinerary. Now nodes default to unit latency unless an itinerary
explicitly specifies a zero cycle stage or it is a TokenFactor chain.

Original fixes:
UnitsSharePred was a source of randomness in the scheduler: node
priority depended on the queue data structure. I rewrote the recent
VRegCycle heuristics to completely replace the old heuristic without
any randomness. To make the ndoe latency adjustments work, I also
needed to do something a little more reasonable with TokenFactor. I
gave it zero latency to its consumers and always schedule it as low as
possible.

llvm-svn: 129421
This commit is contained in:
Andrew Trick 2011-04-13 00:38:32 +00:00
parent 3137d3cb49
commit b53a00d2cb
9 changed files with 225 additions and 189 deletions

View File

@ -155,9 +155,13 @@ public:
/// in the itinerary.
///
unsigned getStageLatency(unsigned ItinClassIndx) const {
// If the target doesn't provide itinerary information, use a
// simple non-zero default value for all instructions.
if (isEmpty())
// If the target doesn't provide itinerary information, use a simple
// non-zero default value for all instructions. Some target's provide a
// dummy (Generic) itinerary which should be handled as if it's itinerary is
// empty. We identify this by looking for a reference to stage zero (invalid
// stage). This is different from beginStage == endState != 0, which could
// be used for zero-latency pseudo ops.
if (isEmpty() || Itineraries[ItinClassIndx].FirstStage == 0)
return 1;
// Calculate the maximum completion time for any stage.

View File

@ -102,11 +102,11 @@ static cl::opt<unsigned> AvgIPC(
#ifndef NDEBUG
namespace {
// For sched=list-ilp, Count the number of times each factor comes into play.
enum { FactPressureDiff, FactRegUses, FactHeight, FactDepth, FactStatic,
FactOther, NumFactors };
enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
FactStatic, FactOther, NumFactors };
}
static const char *FactorName[NumFactors] =
{"PressureDiff", "RegUses", "Height", "Depth","Static", "Other"};
{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
static int FactorCount[NumFactors];
#endif //!NDEBUG
@ -463,6 +463,13 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
if (DisableSchedCycles)
return;
// FIXME: Nodes such as CopyFromReg probably should not advance the current
// cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
// has predecessors the cycle will be advanced when they are scheduled.
// But given the crude nature of modeling latency though such nodes, we
// currently need to treat these nodes like real instructions.
// if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
// Bump CurCycle to account for latency. We assume the latency of other
@ -533,6 +540,8 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) {
}
}
static void resetVRegCycle(SUnit *SU);
/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
/// count of its predecessors. If a predecessor pending count is zero, add it to
/// the Available queue.
@ -542,7 +551,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
#ifndef NDEBUG
if (CurCycle < SU->getHeight())
DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n");
DEBUG(dbgs() << " Height [" << SU->getHeight()
<< "] pipeline stall!\n");
#endif
// FIXME: Do not modify node height. It may interfere with
@ -559,7 +569,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
AvailableQueue->ScheduledNode(SU);
// If HazardRec is disabled, and each inst counts as one cycle, then
// advance CurCycle before ReleasePredecessors to avoid useles pushed to
// advance CurCycle before ReleasePredecessors to avoid useless pushes to
// PendingQueue for schedulers that implement HasReadyFilter.
if (!HazardRec->isEnabled() && AvgIPC < 2)
AdvanceToCycle(CurCycle + 1);
@ -580,20 +590,25 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
}
}
resetVRegCycle(SU);
SU->isScheduled = true;
// Conditions under which the scheduler should eagerly advance the cycle:
// (1) No available instructions
// (2) All pipelines full, so available instructions must have hazards.
//
// If HazardRec is disabled, the cycle was advanced earlier.
// If HazardRec is disabled, the cycle was pre-advanced before calling
// ReleasePredecessors. In that case, IssueCount should remain 0.
//
// Check AvailableQueue after ReleasePredecessors in case of zero latency.
++IssueCount;
if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
|| (!HazardRec->isEnabled() && AvgIPC > 1 && IssueCount == AvgIPC)
|| AvailableQueue->empty())
AdvanceToCycle(CurCycle + 1);
if (HazardRec->isEnabled() || AvgIPC > 1) {
if (SU->getNode() && SU->getNode()->isMachineOpcode())
++IssueCount;
if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
|| (!HazardRec->isEnabled() && IssueCount == AvgIPC))
AdvanceToCycle(CurCycle + 1);
}
}
/// CapturePred - This does the opposite of ReleasePred. Since SU is being
@ -1220,7 +1235,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
// priority. If it is not ready put it back. Schedule the node.
Sequence.reserve(SUnits.size());
while (!AvailableQueue->empty()) {
DEBUG(dbgs() << "\n*** Examining Available\n";
DEBUG(dbgs() << "\nExamining Available:\n";
AvailableQueue->dump(this));
// Pick the best node to schedule taking all constraints into
@ -1661,17 +1676,6 @@ void RegReductionPQBase::CalculateSethiUllmanNumbers() {
CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
}
void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
SUnits = &sunits;
// Add pseudo dependency edges for two-address nodes.
AddPseudoTwoAddrDeps();
// Reroute edges to nodes with multiple uses.
if (!TracksRegPressure)
PrescheduleNodesWithMultipleUses();
// Calculate node priorities.
CalculateSethiUllmanNumbers();
}
void RegReductionPQBase::addNode(const SUnit *SU) {
unsigned SUSize = SethiUllmanNumbers.size();
if (SUnits->size() > SUSize)
@ -2008,7 +2012,29 @@ static unsigned calcMaxScratches(const SUnit *SU) {
return Scratches;
}
/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
/// CopyFromReg from a virtual register.
static bool hasOnlyLiveInOpers(const SUnit *SU) {
bool RetVal = false;
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (I->isCtrl()) continue;
const SUnit *PredSU = I->getSUnit();
if (PredSU->getNode() &&
PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
unsigned Reg =
cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
RetVal = true;
continue;
}
}
return false;
}
return RetVal;
}
/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
/// CopyToReg to a virtual register. This SU def is probably a liveout and
/// it has no other use. It should be scheduled closer to the terminator.
static bool hasOnlyLiveOutUses(const SUnit *SU) {
@ -2030,62 +2056,71 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) {
return RetVal;
}
/// UnitsSharePred - Return true if the two scheduling units share a common
/// data predecessor.
static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
SmallSet<const SUnit*, 4> Preds;
for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
// Set isVRegCycle for a node with only live in opers and live out uses. Also
// set isVRegCycle for its CopyFromReg operands.
//
// This is only relevant for single-block loops, in which case the VRegCycle
// node is likely an induction variable in which the operand and target virtual
// registers should be coalesced (e.g. pre/post increment values). Setting the
// isVRegCycle flag helps the scheduler prioritize other uses of the same
// CopyFromReg so that this node becomes the virtual register "kill". This
// avoids interference between the values live in and out of the block and
// eliminates a copy inside the loop.
static void initVRegCycle(SUnit *SU) {
if (DisableSchedVRegCycle)
return;
if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
return;
DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
SU->isVRegCycle = true;
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (I->isCtrl()) continue; // ignore chain preds
Preds.insert(I->getSUnit());
if (I->isCtrl()) continue;
I->getSUnit()->isVRegCycle = true;
}
for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
I != E; ++I) {
if (I->isCtrl()) continue; // ignore chain preds
if (Preds.count(I->getSUnit()))
return true;
}
return false;
}
// Return true if the virtual register defined by VRCycleSU may interfere with
// VRUseSU.
//
// Note: We may consider two SU's that use the same value live into a loop as
// interferng even though the value is not an induction variable. This is an
// unfortunate consequence of scheduling on the selection DAG.
static bool checkVRegCycleInterference(const SUnit *VRCycleSU,
const SUnit *VRUseSU) {
for (SUnit::const_pred_iterator I = VRCycleSU->Preds.begin(),
E = VRCycleSU->Preds.end(); I != E; ++I) {
// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
// CopyFromReg operands. We should no longer penalize other uses of this VReg.
static void resetVRegCycle(SUnit *SU) {
if (!SU->isVRegCycle)
return;
for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
I != E; ++I) {
if (I->isCtrl()) continue; // ignore chain preds
SDNode *InNode = I->getSUnit()->getNode();
if (!InNode || InNode->getOpcode() != ISD::CopyFromReg)
continue;
for (SUnit::const_pred_iterator II = VRUseSU->Preds.begin(),
EE = VRUseSU->Preds.end(); II != EE; ++II) {
if (II->getSUnit() == I->getSUnit())
return true;
SUnit *PredSU = I->getSUnit();
if (PredSU->isVRegCycle) {
assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
"VRegCycle def must be CopyFromReg");
I->getSUnit()->isVRegCycle = 0;
}
}
}
// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
// means a node that defines the VRegCycle has not been scheduled yet.
static bool hasVRegCycleUse(const SUnit *SU) {
// If this SU also defines the VReg, don't hoist it as a "use".
if (SU->isVRegCycle)
return false;
for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
I != E; ++I) {
if (I->isCtrl()) continue; // ignore chain preds
if (I->getSUnit()->isVRegCycle &&
I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
return true;
}
}
return false;
}
// Compare the VRegCycle properties of the nodes.
// Return -1 if left has higher priority, 1 if right has higher priority.
// Return 0 if priority is equivalent.
static int BUCompareVRegCycle(const SUnit *left, const SUnit *right) {
if (left->isVRegCycle && !right->isVRegCycle) {
if (checkVRegCycleInterference(left, right))
return -1;
}
else if (!left->isVRegCycle && right->isVRegCycle) {
if (checkVRegCycleInterference(right, left))
return 1;
}
return 0;
}
// Check for either a dependence (latency) or resource (hazard) stall.
//
// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
@ -2101,23 +2136,12 @@ static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
// Return 0 if latency-based priority is equivalent.
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
RegReductionPQBase *SPQ) {
// If the two nodes share an operand and one of them has a single
// use that is a live out copy, favor the one that is live out. Otherwise
// it will be difficult to eliminate the copy if the instruction is a
// loop induction variable update. e.g.
// BB:
// sub r1, r3, #1
// str r0, [r2, r3]
// mov r3, r1
// cmp
// bne BB
bool SharePred = UnitsSharePred(left, right);
// FIXME: Only adjust if BB is a loop back edge.
// FIXME: What's the cost of a copy?
int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
int LHeight = (int)left->getHeight() - LBonus;
int RHeight = (int)right->getHeight() - RBonus;
// Scheduling an instruction that uses a VReg whose postincrement has not yet
// been scheduled will induce a copy. Model this as an extra cycle of latency.
int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
int LHeight = (int)left->getHeight() + LPenalty;
int RHeight = (int)right->getHeight() + RPenalty;
bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
BUHasStall(left, LHeight, SPQ);
@ -2128,36 +2152,47 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
// If scheduling either one of the node will cause a pipeline stall, sort
// them according to their height.
if (LStall) {
if (!RStall)
if (!RStall) {
DEBUG(++FactorCount[FactStall]);
return 1;
if (LHeight != RHeight)
}
if (LHeight != RHeight) {
DEBUG(++FactorCount[FactStall]);
return LHeight > RHeight ? 1 : -1;
} else if (RStall)
}
} else if (RStall) {
DEBUG(++FactorCount[FactStall]);
return -1;
}
// If either node is scheduling for latency, sort them by height/depth
// and latency.
if (!checkPref || (left->SchedulingPref == Sched::Latency ||
right->SchedulingPref == Sched::Latency)) {
if (DisableSchedCycles) {
if (LHeight != RHeight)
if (LHeight != RHeight) {
DEBUG(++FactorCount[FactHeight]);
return LHeight > RHeight ? 1 : -1;
}
}
else {
// If neither instruction stalls (!LStall && !RStall) then
// its height is already covered so only its depth matters. We also reach
// this if both stall but have the same height.
unsigned LDepth = left->getDepth();
unsigned RDepth = right->getDepth();
int LDepth = left->getDepth() - LPenalty;
int RDepth = right->getDepth() - RPenalty;
if (LDepth != RDepth) {
DEBUG(++FactorCount[FactDepth]);
DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
<< ") depth " << LDepth << " vs SU (" << right->NodeNum
<< ") depth " << RDepth << "\n");
return LDepth < RDepth ? 1 : -1;
}
}
if (left->Latency != right->Latency)
if (left->Latency != right->Latency) {
DEBUG(++FactorCount[FactOther]);
return left->Latency > right->Latency ? 1 : -1;
}
}
return 0;
}
@ -2169,7 +2204,19 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
DEBUG(++FactorCount[FactStatic]);
return LPriority > RPriority;
}
DEBUG(++FactorCount[FactOther]);
else if(LPriority == 0) {
// Schedule zero-latency TokenFactor below any other special
// nodes. The alternative may be to avoid artificially boosting the
// TokenFactor's height when it is scheduled, but we currently rely on an
// instruction's final height to equal the cycle in which it is scheduled,
// so heights are monotonically increasing.
unsigned LOpc = left->getNode() ? left->getNode()->getOpcode() : 0;
unsigned ROpc = right->getNode() ? right->getNode()->getOpcode() : 0;
if (LOpc == ISD::TokenFactor)
return false;
if (ROpc == ISD::TokenFactor)
return true;
}
// Try schedule def + use closer when Sethi-Ullman numbers are the same.
// e.g.
@ -2190,14 +2237,18 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
// This creates more short live intervals.
unsigned LDist = closestSucc(left);
unsigned RDist = closestSucc(right);
if (LDist != RDist)
if (LDist != RDist) {
DEBUG(++FactorCount[FactOther]);
return LDist < RDist;
}
// How many registers becomes live when the node is scheduled.
unsigned LScratch = calcMaxScratches(left);
unsigned RScratch = calcMaxScratches(right);
if (LScratch != RScratch)
if (LScratch != RScratch) {
DEBUG(++FactorCount[FactOther]);
return LScratch > RScratch;
}
if (!DisableSchedCycles) {
int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
@ -2205,15 +2256,20 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
return result > 0;
}
else {
if (left->getHeight() != right->getHeight())
if (left->getHeight() != right->getHeight()) {
DEBUG(++FactorCount[FactHeight]);
return left->getHeight() > right->getHeight();
}
if (left->getDepth() != right->getDepth())
if (left->getDepth() != right->getDepth()) {
DEBUG(++FactorCount[FactDepth]);
return left->getDepth() < right->getDepth();
}
}
assert(left->NodeQueueId && right->NodeQueueId &&
"NodeQueueId cannot be zero");
DEBUG(++FactorCount[FactOther]);
return (left->NodeQueueId > right->NodeQueueId);
}
@ -2264,24 +2320,22 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
// Avoid causing spills. If register pressure is high, schedule for
// register pressure reduction.
if (LHigh && !RHigh) {
DEBUG(++FactorCount[FactPressureDiff]);
DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
<< right->NodeNum << ")\n");
return true;
}
else if (!LHigh && RHigh) {
DEBUG(++FactorCount[FactPressureDiff]);
DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
<< left->NodeNum << ")\n");
return false;
}
int result = 0;
if (!DisableSchedVRegCycle) {
result = BUCompareVRegCycle(left, right);
if (!LHigh && !RHigh) {
int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
if (result != 0)
return result > 0;
}
if (result == 0 && !LHigh && !RHigh) {
result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
}
if (result != 0)
return result > 0;
return BURRSort(left, right, SPQ);
}
@ -2347,12 +2401,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
if (RReduce && !LReduce) return true;
}
if (!DisableSchedVRegCycle) {
int result = BUCompareVRegCycle(left, right);
if (result != 0)
return result > 0;
}
if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
<< " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
@ -2391,6 +2439,24 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
return BURRSort(left, right, SPQ);
}
void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
SUnits = &sunits;
// Add pseudo dependency edges for two-address nodes.
AddPseudoTwoAddrDeps();
// Reroute edges to nodes with multiple uses.
if (!TracksRegPressure)
PrescheduleNodesWithMultipleUses();
// Calculate node priorities.
CalculateSethiUllmanNumbers();
// For single block loops, mark nodes that look like canonical IV increments.
if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) {
for (unsigned i = 0, e = sunits.size(); i != e; ++i) {
initVRegCycle(&sunits[i]);
}
}
}
//===----------------------------------------------------------------------===//
// Preschedule for Register Pressure
//===----------------------------------------------------------------------===//

View File

@ -342,10 +342,6 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NodeSUnit->NodeNum);
// Set isVRegCycle if the node operands are live into and value is live out
// of a single block loop.
InitVRegCycleFlag(NodeSUnit);
// Compute NumRegDefsLeft. This must be done before AddSchedEdges.
InitNumRegDefsLeft(NodeSUnit);
@ -417,6 +413,10 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
// If this is a ctrl dep, latency is 1.
unsigned OpLatency = isChain ? 1 : OpSU->Latency;
// Special-case TokenFactor chains as zero-latency.
if(isChain && OpN->getOpcode() == ISD::TokenFactor)
OpLatency = 0;
const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
OpLatency, PhysReg);
if (!isChain && !UnitLatencies) {
@ -512,47 +512,6 @@ void ScheduleDAGSDNodes::RegDefIter::Advance() {
}
}
// Set isVRegCycle if this node's single use is CopyToReg and its only active
// data operands are CopyFromReg.
//
// This is only relevant for single-block loops, in which case the VRegCycle
// node is likely an induction variable in which the operand and target virtual
// registers should be coalesced (e.g. pre/post increment values). Setting the
// isVRegCycle flag helps the scheduler prioritize other uses of the same
// CopyFromReg so that this node becomes the virtual register "kill". This
// avoids interference between the values live in and out of the block and
// eliminates a copy inside the loop.
void ScheduleDAGSDNodes::InitVRegCycleFlag(SUnit *SU) {
if (!BB->isSuccessor(BB))
return;
SDNode *N = SU->getNode();
if (N->getGluedNode())
return;
if (!N->hasOneUse() || N->use_begin()->getOpcode() != ISD::CopyToReg)
return;
bool FoundLiveIn = false;
for (SDNode::op_iterator OI = N->op_begin(), E = N->op_end(); OI != E; ++OI) {
EVT OpVT = OI->getValueType();
assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
if (OpVT == MVT::Other)
continue; // ignore chain operands
if (isPassiveNode(OI->getNode()))
continue; // ignore constants and such
if (OI->getNode()->getOpcode() != ISD::CopyFromReg)
return;
FoundLiveIn = true;
}
if (FoundLiveIn)
SU->isVRegCycle = true;
}
void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
assert(SU->NumRegDefsLeft == 0 && "expect a new node");
for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
@ -562,6 +521,16 @@ void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
}
void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
SDNode *N = SU->getNode();
// TokenFactor operands are considered zero latency, and some schedulers
// (e.g. Top-Down list) may rely on the fact that operand latency is nonzero
// whenever node latency is nonzero.
if (N && N->getOpcode() == ISD::TokenFactor) {
SU->Latency = 0;
return;
}
// Check to see if the scheduler cares about latencies.
if (ForceUnitLatencies()) {
SU->Latency = 1;
@ -569,7 +538,6 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
}
if (!InstrItins || InstrItins->isEmpty()) {
SDNode *N = SU->getNode();
if (N && N->isMachineOpcode() &&
TII->isHighLatencyDef(N->getMachineOpcode()))
SU->Latency = HighLatencyCycles;

View File

@ -1,10 +1,8 @@
; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=linearscan -disable-post-ra | FileCheck %s
; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=basic -disable-post-ra | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -regalloc=linearscan -disable-post-ra | FileCheck %s
; The ARM magic hinting works best with linear scan.
; CHECK: ldmia
; CHECK: stmia
; CHECK: ldrh
; CHECK: ldrd
; CHECK: strd
; CHECK: ldrb
%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
; RUN: llc < %s -march=arm -mattr=+neon -pre-RA-sched=source | FileCheck %s
define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
;CHECK: vrecpe.f32

View File

@ -1,10 +1,10 @@
; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi | FileCheck %s
; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -pre-RA-sched=source | FileCheck %s
; Test that we correctly align elements when using va_arg
; CHECK: test1:
; CHECK-NOT: bfc
; CHECK: add r0, r0, #7
; CHECK: bfc r0, #0, #3
; CHECK: add [[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7
; CHECK: bfc [[REG]], #0, #3
; CHECK-NOT: bfc
define i64 @test1(i32 %i, ...) nounwind optsize {
@ -19,8 +19,8 @@ entry:
; CHECK: test2:
; CHECK-NOT: bfc
; CHECK: add r0, r0, #7
; CHECK: bfc r0, #0, #3
; CHECK: add [[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7
; CHECK: bfc [[REG]], #0, #3
; CHECK-NOT: bfc
; CHECK: bx lr

View File

@ -40,8 +40,8 @@ define void @test_add(float* %P, double* %D) {
define void @test_ext_round(float* %P, double* %D) {
;CHECK: test_ext_round:
%a = load float* %P ; <float> [#uses=1]
;CHECK: vcvt.f32.f64
;CHECK: vcvt.f64.f32
;CHECK: vcvt.f32.f64
%b = fpext float %a to double ; <double> [#uses=1]
%A = load double* %D ; <double> [#uses=1]
%B = fptrunc double %A to float ; <float> [#uses=1]

View File

@ -1,12 +1,12 @@
; RUN: llc -march=mipsel -mcpu=mips2 < %s | FileCheck %s
; RUN: llc -march=mipsel -mcpu=mips2 < %s -regalloc=basic | FileCheck %s
; RUN: llc -march=mipsel -mcpu=mips2 -pre-RA-sched=source < %s | FileCheck %s
; RUN: llc -march=mipsel -mcpu=mips2 -pre-RA-sched=source < %s -regalloc=basic | FileCheck %s
; All test functions do the same thing - they return the first variable
; argument.
; All CHECK's do the same thing - they check whether variable arguments from
; registers are placed on correct stack locations, and whether the first
; All CHECK's do the same thing - they check whether variable arguments from
; registers are placed on correct stack locations, and whether the first
; variable argument is returned from the correct stack location.
@ -31,14 +31,14 @@ entry:
; CHECK: va1:
; CHECK: addiu $sp, $sp, -32
; CHECK: sw $5, 36($sp)
; CHECK: sw $6, 40($sp)
; CHECK: sw $7, 44($sp)
; CHECK: sw $6, 40($sp)
; CHECK: sw $5, 36($sp)
; CHECK: lw $2, 36($sp)
}
; check whether the variable double argument will be accessed from the 8-byte
; aligned location (i.e. whether the address is computed by adding 7 and
; check whether the variable double argument will be accessed from the 8-byte
; aligned location (i.e. whether the address is computed by adding 7 and
; clearing lower 3 bits)
define double @va2(i32 %a, ...) nounwind {
entry:
@ -57,10 +57,10 @@ entry:
; CHECK: va2:
; CHECK: addiu $sp, $sp, -40
; CHECK: addiu $[[R0:[0-9]+]], $sp, 44
; CHECK: sw $5, 44($sp)
; CHECK: sw $6, 48($sp)
; CHECK: sw $7, 52($sp)
; CHECK: sw $6, 48($sp)
; CHECK: sw $5, 44($sp)
; CHECK: addiu $[[R0:[0-9]+]], $sp, 44
; CHECK: addiu $[[R1:[0-9]+]], $[[R0]], 7
; CHECK: addiu $[[R2:[0-9]+]], $zero, -8
; CHECK: and $[[R3:[0-9]+]], $[[R1]], $[[R2]]
@ -85,8 +85,8 @@ entry:
; CHECK: va3:
; CHECK: addiu $sp, $sp, -40
; CHECK: sw $6, 48($sp)
; CHECK: sw $7, 52($sp)
; CHECK: sw $6, 48($sp)
; CHECK: lw $2, 48($sp)
}
@ -108,8 +108,8 @@ entry:
; CHECK: va4:
; CHECK: addiu $sp, $sp, -48
; CHECK: sw $6, 56($sp)
; CHECK: sw $7, 60($sp)
; CHECK: sw $6, 56($sp)
; CHECK: addiu $[[R0:[0-9]+]], $sp, 56
; CHECK: addiu $[[R1:[0-9]+]], $[[R0]], 7
; CHECK: addiu $[[R2:[0-9]+]], $zero, -8

View File

@ -128,9 +128,9 @@ define i32 @test10(i32 %p0) {
; ARMv7M: test10
; ARMv7M: mov.w r1, #16253176
; ARMv7M: mov.w r2, #458759
; ARMv7M: and.w r0, r1, r0, lsr #7
; ARMv7M: mov.w r1, #458759
; ARMv7M: and.w r1, r1, r0, lsr #5
; ARMv7M: and.w r1, r2, r0, lsr #5
; ARMv7M: orrs r0, r1
%tmp1 = lshr i32 %p0, 7 ; <i32> [#uses=1]
%tmp2 = and i32 %tmp1, 16253176 ; <i32> [#uses=2]