forked from OSchip/llvm-project
Added a check in the preRA scheduler for potential interference on a
induction variable. The preRA scheduler is unaware of induction vars, so we look for potential "virtual register cycles" instead. Fixes <rdar://problem/8946719> Bad scheduling prevents coalescing llvm-svn: 129100
This commit is contained in:
parent
d6f1c58914
commit
2ad0b37318
|
@ -250,6 +250,7 @@ namespace llvm {
|
|||
unsigned NumSuccsLeft; // # of succs not scheduled.
|
||||
unsigned short NumRegDefsLeft; // # of reg defs with no scheduled use.
|
||||
unsigned short Latency; // Node latency.
|
||||
bool isVRegCycle : 1; // May use and def the same vreg.
|
||||
bool isCall : 1; // Is a function call.
|
||||
bool isTwoAddress : 1; // Is a two-address instruction.
|
||||
bool isCommutable : 1; // Is a commutable instruction.
|
||||
|
@ -278,8 +279,8 @@ namespace llvm {
|
|||
: Node(node), Instr(0), OrigNode(0), NodeNum(nodenum),
|
||||
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
|
||||
NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
|
||||
isCall(false), isTwoAddress(false), isCommutable(false),
|
||||
hasPhysRegDefs(false), hasPhysRegClobbers(false),
|
||||
isVRegCycle(false), isCall(false), isTwoAddress(false),
|
||||
isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
|
||||
isPending(false), isAvailable(false), isScheduled(false),
|
||||
isScheduleHigh(false), isCloned(false),
|
||||
SchedulingPref(Sched::None),
|
||||
|
@ -292,8 +293,8 @@ namespace llvm {
|
|||
: Node(0), Instr(instr), OrigNode(0), NodeNum(nodenum),
|
||||
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
|
||||
NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
|
||||
isCall(false), isTwoAddress(false), isCommutable(false),
|
||||
hasPhysRegDefs(false), hasPhysRegClobbers(false),
|
||||
isVRegCycle(false), isCall(false), isTwoAddress(false),
|
||||
isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
|
||||
isPending(false), isAvailable(false), isScheduled(false),
|
||||
isScheduleHigh(false), isCloned(false),
|
||||
SchedulingPref(Sched::None),
|
||||
|
@ -305,8 +306,8 @@ namespace llvm {
|
|||
: Node(0), Instr(0), OrigNode(0), NodeNum(~0u),
|
||||
NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
|
||||
NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
|
||||
isCall(false), isTwoAddress(false), isCommutable(false),
|
||||
hasPhysRegDefs(false), hasPhysRegClobbers(false),
|
||||
isVRegCycle(false), isCall(false), isTwoAddress(false),
|
||||
isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
|
||||
isPending(false), isAvailable(false), isScheduled(false),
|
||||
isScheduleHigh(false), isCloned(false),
|
||||
SchedulingPref(Sched::None),
|
||||
|
|
|
@ -77,6 +77,9 @@ static cl::opt<bool> DisableSchedRegPressure(
|
|||
static cl::opt<bool> DisableSchedLiveUses(
|
||||
"disable-sched-live-uses", cl::Hidden, cl::init(true),
|
||||
cl::desc("Disable live use priority in sched=list-ilp"));
|
||||
static cl::opt<bool> DisableSchedVRegCycle(
|
||||
"disable-sched-vrcycle", cl::Hidden, cl::init(false),
|
||||
cl::desc("Disable virtual register cycle interference checks"));
|
||||
static cl::opt<bool> DisableSchedStalls(
|
||||
"disable-sched-stalls", cl::Hidden, cl::init(true),
|
||||
cl::desc("Disable no-stall priority in sched=list-ilp"));
|
||||
|
@ -2045,6 +2048,44 @@ static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Return true if the virtual register defined by VRCycleSU may interfere with
|
||||
// VRUseSU.
|
||||
//
|
||||
// Note: We may consider two SU's that use the same value live into a loop as
|
||||
// interferng even though the value is not an induction variable. This is an
|
||||
// unfortunate consequence of scheduling on the selection DAG.
|
||||
static bool checkVRegCycleInterference(const SUnit *VRCycleSU,
|
||||
const SUnit *VRUseSU) {
|
||||
for (SUnit::const_pred_iterator I = VRCycleSU->Preds.begin(),
|
||||
E = VRCycleSU->Preds.end(); I != E; ++I) {
|
||||
if (I->isCtrl()) continue; // ignore chain preds
|
||||
SDNode *InNode = I->getSUnit()->getNode();
|
||||
if (!InNode || InNode->getOpcode() != ISD::CopyFromReg)
|
||||
continue;
|
||||
for (SUnit::const_pred_iterator II = VRUseSU->Preds.begin(),
|
||||
EE = VRUseSU->Preds.end(); II != EE; ++II) {
|
||||
if (II->getSUnit() == I->getSUnit())
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Compare the VRegCycle properties of the nodes.
|
||||
// Return -1 if left has higher priority, 1 if right has higher priority.
|
||||
// Return 0 if priority is equivalent.
|
||||
static int BUCompareVRegCycle(const SUnit *left, const SUnit *right) {
|
||||
if (left->isVRegCycle && !right->isVRegCycle) {
|
||||
if (checkVRegCycleInterference(left, right))
|
||||
return -1;
|
||||
}
|
||||
else if (!left->isVRegCycle && right->isVRegCycle) {
|
||||
if (checkVRegCycleInterference(right, left))
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Check for either a dependence (latency) or resource (hazard) stall.
|
||||
//
|
||||
// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
|
||||
|
@ -2232,11 +2273,15 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
|
|||
<< left->NodeNum << ")\n");
|
||||
return false;
|
||||
}
|
||||
else if (!LHigh && !RHigh) {
|
||||
int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
|
||||
if (result != 0)
|
||||
return result > 0;
|
||||
int result = 0;
|
||||
if (!DisableSchedVRegCycle) {
|
||||
result = BUCompareVRegCycle(left, right);
|
||||
}
|
||||
if (result == 0 && !LHigh && !RHigh) {
|
||||
result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
|
||||
}
|
||||
if (result != 0)
|
||||
return result > 0;
|
||||
return BURRSort(left, right, SPQ);
|
||||
}
|
||||
|
||||
|
@ -2302,6 +2347,12 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
|
|||
if (RReduce && !LReduce) return true;
|
||||
}
|
||||
|
||||
if (!DisableSchedVRegCycle) {
|
||||
int result = BUCompareVRegCycle(left, right);
|
||||
if (result != 0)
|
||||
return result > 0;
|
||||
}
|
||||
|
||||
if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
|
||||
DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
|
||||
<< " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
|
||||
|
|
|
@ -81,6 +81,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
|
|||
SUnit *SU = NewSUnit(Old->getNode());
|
||||
SU->OrigNode = Old->OrigNode;
|
||||
SU->Latency = Old->Latency;
|
||||
SU->isVRegCycle = Old->isVRegCycle;
|
||||
SU->isCall = Old->isCall;
|
||||
SU->isTwoAddress = Old->isTwoAddress;
|
||||
SU->isCommutable = Old->isCommutable;
|
||||
|
@ -341,6 +342,10 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
|
|||
assert(N->getNodeId() == -1 && "Node already inserted!");
|
||||
N->setNodeId(NodeSUnit->NodeNum);
|
||||
|
||||
// Set isVRegCycle if the node operands are live into and value is live out
|
||||
// of a single block loop.
|
||||
InitVRegCycleFlag(NodeSUnit);
|
||||
|
||||
// Compute NumRegDefsLeft. This must be done before AddSchedEdges.
|
||||
InitNumRegDefsLeft(NodeSUnit);
|
||||
|
||||
|
@ -507,6 +512,47 @@ void ScheduleDAGSDNodes::RegDefIter::Advance() {
|
|||
}
|
||||
}
|
||||
|
||||
// Set isVRegCycle if this node's single use is CopyToReg and its only active
|
||||
// data operands are CopyFromReg.
|
||||
//
|
||||
// This is only relevant for single-block loops, in which case the VRegCycle
|
||||
// node is likely an induction variable in which the operand and target virtual
|
||||
// registers should be coalesced (e.g. pre/post increment values). Setting the
|
||||
// isVRegCycle flag helps the scheduler prioritize other uses of the same
|
||||
// CopyFromReg so that this node becomes the virtual register "kill". This
|
||||
// avoids interference between the values live in and out of the block and
|
||||
// eliminates a copy inside the loop.
|
||||
void ScheduleDAGSDNodes::InitVRegCycleFlag(SUnit *SU) {
|
||||
if (!BB->isSuccessor(BB))
|
||||
return;
|
||||
|
||||
SDNode *N = SU->getNode();
|
||||
if (N->getGluedNode())
|
||||
return;
|
||||
|
||||
if (!N->hasOneUse() || N->use_begin()->getOpcode() != ISD::CopyToReg)
|
||||
return;
|
||||
|
||||
bool FoundLiveIn = false;
|
||||
for (SDNode::op_iterator OI = N->op_begin(), E = N->op_end(); OI != E; ++OI) {
|
||||
EVT OpVT = OI->getValueType();
|
||||
assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
|
||||
|
||||
if (OpVT == MVT::Other)
|
||||
continue; // ignore chain operands
|
||||
|
||||
if (isPassiveNode(OI->getNode()))
|
||||
continue; // ignore constants and such
|
||||
|
||||
if (OI->getNode()->getOpcode() != ISD::CopyFromReg)
|
||||
return;
|
||||
|
||||
FoundLiveIn = true;
|
||||
}
|
||||
if (FoundLiveIn)
|
||||
SU->isVRegCycle = true;
|
||||
}
|
||||
|
||||
void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
|
||||
assert(SU->NumRegDefsLeft == 0 && "expect a new node");
|
||||
for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
|
||||
|
|
|
@ -80,6 +80,12 @@ namespace llvm {
|
|||
/// flagged together nodes with a single SUnit.
|
||||
virtual void BuildSchedGraph(AliasAnalysis *AA);
|
||||
|
||||
/// InitVRegCycleFlag - Set isVRegCycle if this node's single use is
|
||||
/// CopyToReg and its only active data operands are CopyFromReg within a
|
||||
/// single block loop.
|
||||
///
|
||||
void InitVRegCycleFlag(SUnit *SU);
|
||||
|
||||
/// InitNumRegDefsLeft - Determine the # of regs defined by this node.
|
||||
///
|
||||
void InitNumRegDefsLeft(SUnit *SU);
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s
|
||||
; Tests preRAsched support for VRegCycle interference.
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
|
||||
target triple = "thumbv7-apple-darwin10"
|
||||
|
||||
define void @t(i32 %src_width, float* nocapture %src_copy_start, float* nocapture %dst_copy_start, i32 %src_copy_start_index) nounwind optsize {
|
||||
entry:
|
||||
%src_copy_start6 = bitcast float* %src_copy_start to i8*
|
||||
%0 = icmp eq i32 %src_width, 0
|
||||
br i1 %0, label %return, label %bb
|
||||
|
||||
; Make sure the scheduler schedules all uses of the preincrement
|
||||
; induction variable before defining the postincrement value.
|
||||
; CHECK: t:
|
||||
; CHECK-NOT: mov
|
||||
bb: ; preds = %entry, %bb
|
||||
%j.05 = phi i32 [ %2, %bb ], [ 0, %entry ]
|
||||
%tmp = mul i32 %j.05, %src_copy_start_index
|
||||
%uglygep = getelementptr i8* %src_copy_start6, i32 %tmp
|
||||
%src_copy_start_addr.04 = bitcast i8* %uglygep to float*
|
||||
%dst_copy_start_addr.03 = getelementptr float* %dst_copy_start, i32 %j.05
|
||||
%1 = load float* %src_copy_start_addr.04, align 4
|
||||
store float %1, float* %dst_copy_start_addr.03, align 4
|
||||
%2 = add i32 %j.05, 1
|
||||
%exitcond = icmp eq i32 %2, %src_width
|
||||
br i1 %exitcond, label %return, label %bb
|
||||
|
||||
return: ; preds = %bb, %entry
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue