From 9dbbd3e553c7f441807c31e28c425b36d8aefb02 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 24 Feb 2012 07:04:55 +0000 Subject: [PATCH] PostRA sched: speed up physreg tracking by not abusing SparseSet. llvm-svn: 151348 --- llvm/lib/CodeGen/ScheduleDAGInstrs.cpp | 41 +++++++++------ llvm/lib/CodeGen/ScheduleDAGInstrs.h | 72 +++++++++++++++++++------- 2 files changed, 79 insertions(+), 34 deletions(-) diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 2858904fde3c..8f9d85b695bc 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -148,6 +148,20 @@ void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) { LoopRegs.VisitLoop(ML); } +/// Initialize the map with the number of registers. +void ScheduleDAGInstrs::Reg2SUnitsMap::setRegLimit(unsigned Limit) { + PhysRegSet.setUniverse(Limit); + SUnits.resize(Limit); +} + +/// Clear the map without deallocating storage. +void ScheduleDAGInstrs::Reg2SUnitsMap::clear() { + for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) { + SUnits[*I].clear(); + } + PhysRegSet.clear(); +} + /// AddSchedBarrierDeps - Add dependencies from instructions in the current /// list of instructions being scheduled to scheduling barrier by adding /// the exit SU to the register defs and use list. This is because we want to @@ -171,7 +185,7 @@ void ScheduleDAGInstrs::AddSchedBarrierDeps() { if (Reg == 0) continue; if (TRI->isPhysicalRegister(Reg)) - Uses[Reg].SUnits.push_back(&ExitSU); + Uses[Reg].push_back(&ExitSU); else assert(!IsPostRA && "Virtual register encountered after regalloc."); } @@ -185,7 +199,7 @@ void ScheduleDAGInstrs::AddSchedBarrierDeps() { E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; if (Seen.insert(Reg)) - Uses[Reg].SUnits.push_back(&ExitSU); + Uses[Reg].push_back(&ExitSU); } } } @@ -202,10 +216,9 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned DataLatency = SU->Latency; for (const unsigned *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { - Reg2SUnitsMap::iterator UsesI = Uses.find(*Alias); - if (UsesI == Uses.end()) + if (!Uses.contains(*Alias)) continue; - std::vector &UseList = UsesI->SUnits; + std::vector &UseList = Uses[*Alias]; for (unsigned i = 0, e = UseList.size(); i != e; ++i) { SUnit *UseSU = UseList[i]; if (UseSU == SU) @@ -256,10 +269,9 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // there's no cost for reusing registers. SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; for (const unsigned *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) { - Reg2SUnitsMap::iterator DefI = Defs.find(*Alias); - if (DefI == Defs.end()) + if (!Defs.contains(*Alias)) continue; - std::vector &DefList = DefI->SUnits; + std::vector &DefList = Defs[*Alias]; for (unsigned i = 0, e = DefList.size(); i != e; ++i) { SUnit *DefSU = DefList[i]; if (DefSU == &ExitSU) @@ -282,14 +294,14 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // Either insert a new Reg2SUnits entry with an empty SUnits list, or // retrieve the existing SUnits list for this register's uses. // Push this SUnit on the use list. - Uses[MO.getReg()].SUnits.push_back(SU); + Uses[MO.getReg()].push_back(SU); } else { addPhysRegDataDeps(SU, MO); // Either insert a new Reg2SUnits entry with an empty SUnits list, or // retrieve the existing SUnits list for this register's defs. - std::vector &DefList = Defs[MO.getReg()].SUnits; + std::vector &DefList = Defs[MO.getReg()]; // If a def is going to wrap back around to the top of the loop, // backschedule it. @@ -339,9 +351,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { } // clear this register's use list - Reg2SUnitsMap::iterator UsesI = Uses.find(MO.getReg()); - if (UsesI != Uses.end()) - UsesI->SUnits.clear(); + if (Uses.contains(MO.getReg())) + Uses[MO.getReg()].clear(); if (!MO.isDead()) DefList.clear(); @@ -495,8 +506,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { assert(Defs.empty() && Uses.empty() && "Only BuildGraph should update Defs/Uses"); - Defs.setUniverse(TRI->getNumRegs()); - Uses.setUniverse(TRI->getNumRegs()); + Defs.setRegLimit(TRI->getNumRegs()); + Uses.setRegLimit(TRI->getNumRegs()); assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); // FIXME: Allow SparseSet to reserve space for the creation of virtual diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.h b/llvm/lib/CodeGen/ScheduleDAGInstrs.h index ea61f2f66179..1a9d1ea57897 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.h +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.h @@ -118,15 +118,57 @@ namespace llvm { /// the def-side latency only. bool UnitLatencies; - /// An individual mapping from physical register number to an SUnit vector. - struct Reg2SUnits { - unsigned PhysReg; - std::vector SUnits; + /// Combine a SparseSet with a 1x1 vector to track physical registers. + /// The SparseSet allows iterating over the (few) live registers for quickly + /// comparing against a regmask or clearing the set. + /// + /// Storage for the map is allocated once for the pass. The map can be + /// cleared between scheduling regions without freeing unused entries. + class Reg2SUnitsMap { + SparseSet PhysRegSet; + std::vector > SUnits; + public: + typedef SparseSet::const_iterator const_iterator; - explicit Reg2SUnits(unsigned reg): PhysReg(reg) {} + // Allow iteration over register numbers (keys) in the map. If needed, we + // can provide an iterator over SUnits (values) as well. + const_iterator reg_begin() const { return PhysRegSet.begin(); } + const_iterator reg_end() const { return PhysRegSet.end(); } - unsigned getSparseSetKey() const { return PhysReg; } + /// Initialize the map with the number of registers. + /// If the map is already large enough, no allocation occurs. + /// For simplicity we expect the map to be empty(). + void setRegLimit(unsigned Limit); + + /// Returns true if the map is empty. + bool empty() const { return PhysRegSet.empty(); } + + /// Clear the map without deallocating storage. + void clear(); + + bool contains(unsigned Reg) const { return PhysRegSet.count(Reg); } + + /// If this register is mapped, return its existing SUnits vector. + /// Otherwise map the register and return an empty SUnits vector. + std::vector &operator[](unsigned Reg) { + bool New = PhysRegSet.insert(Reg).second; + assert(!New || SUnits[Reg].empty() && "stale SUnits vector"); + (void)New; + return SUnits[Reg]; + } + + /// Erase an existing element without freeing memory. + void erase(unsigned Reg) { + PhysRegSet.erase(Reg); + SUnits[Reg].clear(); + } }; + /// Defs, Uses - Remember where defs and uses of each register are as we + /// iterate upward through the instructions. This is allocated here instead + /// of inside BuildSchedGraph to avoid the need for it to be initialized and + /// destructed for each block. + Reg2SUnitsMap Defs; + Reg2SUnitsMap Uses; /// An individual mapping from virtual register number to SUnit. struct VReg2SUnit { @@ -139,20 +181,12 @@ namespace llvm { return TargetRegisterInfo::virtReg2Index(VirtReg); } }; - // Use SparseSet as a SparseMap by relying on the fact that it never - // compares ValueT's, only unsigned keys. This allows the set to be cleared - // between scheduling regions in constant time. - typedef SparseSet Reg2SUnitsMap; + /// Use SparseSet as a SparseMap by relying on the fact that it never + /// compares ValueT's, only unsigned keys. This allows the set to be cleared + /// between scheduling regions in constant time as long as ValueT does not + /// require a destructor. typedef SparseSet VReg2SUnitMap; - - /// Defs, Uses - Remember where defs and uses of each register are as we - /// iterate upward through the instructions. This is allocated here instead - /// of inside BuildSchedGraph to avoid the need for it to be initialized and - /// destructed for each block. - Reg2SUnitsMap Defs; - Reg2SUnitsMap Uses; - - // Track the last instructon in this region defining each virtual register. + /// Track the last instructon in this region defining each virtual register. VReg2SUnitMap VRegDefs; /// PendingLoads - Remember where unknown loads are after the most recent