From 39525a6723854e9920b2560aadaa399cfbbd006a Mon Sep 17 00:00:00 2001 From: jmolloy Date: Mon, 4 Nov 2019 19:25:13 +0000 Subject: [PATCH] [DFAPacketizer] Allow up to 64 functional units Summary: To drive the automaton we used a uint64_t as an action type. This contained the transition's resource requirements as a conjunction: (a OR b) AND (b OR c) We encoded this conjunction as a sequence of four 16-bit bitmasks. This limited the number of addressable functional units to 16, which is quite low and has bitten many people in the past. Instead, the DFAEmitter now generates a lookup table from InstrItinerary class (index of the ItinData inside the ProcItineraries) to an internal action index which is essentially a dense embedding of the conjunctive form. Because we never materialize the conjunctive form, we no longer have the 16 FU restriction. In this patch we limit to 64 functional units due to using a uint64_t bitmask in the DFAEmitter. Now that we've decoupled these representations we can increase this in future. Reviewers: ThomasRaoux, kparzysz, majnemer Reviewed By: ThomasRaoux Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D69110 --- llvm/include/llvm/CodeGen/DFAPacketizer.h | 45 +- llvm/lib/CodeGen/DFAPacketizer.cpp | 62 +-- llvm/utils/TableGen/DFAPacketizerEmitter.cpp | 474 ++++++------------- 3 files changed, 156 insertions(+), 425 deletions(-) diff --git a/llvm/include/llvm/CodeGen/DFAPacketizer.h b/llvm/include/llvm/CodeGen/DFAPacketizer.h index 705465b15c4c..9cdaedc9e861 100644 --- a/llvm/include/llvm/CodeGen/DFAPacketizer.h +++ b/llvm/include/llvm/CodeGen/DFAPacketizer.h @@ -46,43 +46,18 @@ class MCInstrDesc; class SUnit; class TargetInstrInfo; -// -------------------------------------------------------------------- -// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp - -// DFA_MAX_RESTERMS * DFA_MAX_RESOURCES must fit within sizeof DFAInput. -// This is verified in DFAPacketizer.cpp:DFAPacketizer::DFAPacketizer. -// -// e.g. terms x resource bit combinations that fit in uint32_t: -// 4 terms x 8 bits = 32 bits -// 3 terms x 10 bits = 30 bits -// 2 terms x 16 bits = 32 bits -// -// e.g. terms x resource bit combinations that fit in uint64_t: -// 8 terms x 8 bits = 64 bits -// 7 terms x 9 bits = 63 bits -// 6 terms x 10 bits = 60 bits -// 5 terms x 12 bits = 60 bits -// 4 terms x 16 bits = 64 bits <--- current -// 3 terms x 21 bits = 63 bits -// 2 terms x 32 bits = 64 bits -// -#define DFA_MAX_RESTERMS 4 // The max # of AND'ed resource terms. -#define DFA_MAX_RESOURCES 16 // The max # of resource bits in one term. - -using DFAInput = uint64_t; -using DFAStateInput = int64_t; - -#define DFA_TBLTYPE "int64_t" // For generating DFAStateInputTable. -// -------------------------------------------------------------------- - class DFAPacketizer { private: const InstrItineraryData *InstrItins; - Automaton A; + Automaton A; + /// For every itinerary, an "action" to apply to the automaton. This removes + /// the redundancy in actions between itinerary classes. + ArrayRef ItinActions; public: - DFAPacketizer(const InstrItineraryData *InstrItins, Automaton a) : - InstrItins(InstrItins), A(std::move(a)) { + DFAPacketizer(const InstrItineraryData *InstrItins, Automaton a, + ArrayRef ItinActions) + : InstrItins(InstrItins), A(std::move(a)), ItinActions(ItinActions) { // Start off with resource tracking disabled. A.enableTranscription(false); } @@ -99,12 +74,6 @@ public: A.enableTranscription(Track); } - // Return the DFAInput for an instruction class. - DFAInput getInsnInput(unsigned InsnClass); - - // Return the DFAInput for an instruction class input vector. - static DFAInput getInsnInput(const std::vector &InsnClass); - // Check if the resources occupied by a MCInstrDesc are available in // the current state. bool canReserveResources(const MCInstrDesc *MID); diff --git a/llvm/lib/CodeGen/DFAPacketizer.cpp b/llvm/lib/CodeGen/DFAPacketizer.cpp index a169c3cb16b2..afcf014bca40 100644 --- a/llvm/lib/CodeGen/DFAPacketizer.cpp +++ b/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -52,68 +52,22 @@ static cl::opt InstrLimit("dfa-instr-limit", cl::Hidden, static unsigned InstrCount = 0; -// -------------------------------------------------------------------- -// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp - -static DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) { - return (Inp << DFA_MAX_RESOURCES) | FuncUnits; -} - -/// Return the DFAInput for an instruction class input vector. -/// This function is used in both DFAPacketizer.cpp and in -/// DFAPacketizerEmitter.cpp. -static DFAInput getDFAInsnInput(const std::vector &InsnClass) { - DFAInput InsnInput = 0; - assert((InsnClass.size() <= DFA_MAX_RESTERMS) && - "Exceeded maximum number of DFA terms"); - for (auto U : InsnClass) - InsnInput = addDFAFuncUnits(InsnInput, U); - return InsnInput; -} - -// -------------------------------------------------------------------- - -// Make sure DFA types are large enough for the number of terms & resources. -static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= - (8 * sizeof(DFAInput)), - "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput"); -static_assert( - (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)), - "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput"); - -// Return the DFAInput for an instruction class. -DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) { - // Note: this logic must match that in DFAPacketizerDefs.h for input vectors. - DFAInput InsnInput = 0; - unsigned i = 0; - (void)i; - for (const InstrStage *IS = InstrItins->beginStage(InsnClass), - *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS) { - InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits()); - assert((i++ < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs"); - } - return InsnInput; -} - -// Return the DFAInput for an instruction class input vector. -DFAInput DFAPacketizer::getInsnInput(const std::vector &InsnClass) { - return getDFAInsnInput(InsnClass); -} - // Check if the resources occupied by a MCInstrDesc are available in the // current state. bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) { - unsigned InsnClass = MID->getSchedClass(); - DFAInput InsnInput = getInsnInput(InsnClass); - return A.canAdd(InsnInput); + unsigned Action = ItinActions[MID->getSchedClass()]; + if (MID->getSchedClass() == 0 || Action == 0) + return false; + return A.canAdd(Action); } // Reserve the resources occupied by a MCInstrDesc and change the current // state to reflect that change. void DFAPacketizer::reserveResources(const MCInstrDesc *MID) { - unsigned InsnClass = MID->getSchedClass(); - DFAInput InsnInput = getInsnInput(InsnClass); - A.add(InsnInput); + unsigned Action = ItinActions[MID->getSchedClass()]; + if (MID->getSchedClass() == 0 || Action == 0) + return; + A.add(Action); } // Check if the resources occupied by a machine instruction are available diff --git a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp index ccb4ef1b9678..018bda1b6090 100644 --- a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp +++ b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp @@ -16,15 +16,16 @@ #define DEBUG_TYPE "dfa-emitter" +#include "CodeGenSchedule.h" #include "CodeGenTarget.h" #include "DFAEmitter.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/TableGen/Record.h" -#include "llvm/TableGen/TableGenBackend.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/TableGen/Record.h" +#include "llvm/TableGen/TableGenBackend.h" #include #include #include @@ -35,216 +36,101 @@ using namespace llvm; -// -------------------------------------------------------------------- -// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp - -// DFA_MAX_RESTERMS * DFA_MAX_RESOURCES must fit within sizeof DFAInput. -// This is verified in DFAPacketizer.cpp:DFAPacketizer::DFAPacketizer. -// -// e.g. terms x resource bit combinations that fit in uint32_t: -// 4 terms x 8 bits = 32 bits -// 3 terms x 10 bits = 30 bits -// 2 terms x 16 bits = 32 bits -// -// e.g. terms x resource bit combinations that fit in uint64_t: -// 8 terms x 8 bits = 64 bits -// 7 terms x 9 bits = 63 bits -// 6 terms x 10 bits = 60 bits -// 5 terms x 12 bits = 60 bits -// 4 terms x 16 bits = 64 bits <--- current -// 3 terms x 21 bits = 63 bits -// 2 terms x 32 bits = 64 bits -// -#define DFA_MAX_RESTERMS 4 // The max # of AND'ed resource terms. -#define DFA_MAX_RESOURCES 16 // The max # of resource bits in one term. - -typedef uint64_t DFAInput; -typedef int64_t DFAStateInput; -#define DFA_TBLTYPE "int64_t" // For generating DFAStateInputTable. +// We use a uint64_t to represent a resource bitmask. +#define DFA_MAX_RESOURCES 64 namespace { +using ResourceVector = SmallVector; - DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) { - return (Inp << DFA_MAX_RESOURCES) | FuncUnits; - } +struct ScheduleClass { + /// The parent itinerary index (processor model ID). + unsigned ItineraryID; - /// Return the DFAInput for an instruction class input vector. - /// This function is used in both DFAPacketizer.cpp and in - /// DFAPacketizerEmitter.cpp. - DFAInput getDFAInsnInput(const std::vector &InsnClass) { - DFAInput InsnInput = 0; - assert((InsnClass.size() <= DFA_MAX_RESTERMS) && - "Exceeded maximum number of DFA terms"); - for (auto U : InsnClass) - InsnInput = addDFAFuncUnits(InsnInput, U); - return InsnInput; - } + /// Index within this itinerary of the schedule class. + unsigned Idx; -} // end anonymous namespace + /// The index within the uniqued set of required resources of Resources. + unsigned ResourcesIdx; -// -------------------------------------------------------------------- - -#ifndef NDEBUG -// To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter". -// -// dbgsInsnClass - When debugging, print instruction class stages. -// -void dbgsInsnClass(const std::vector &InsnClass); -// -// dbgsStateInfo - When debugging, print the set of state info. -// -void dbgsStateInfo(const std::set &stateInfo); -// -// dbgsIndent - When debugging, indent by the specified amount. -// -void dbgsIndent(unsigned indent); -#endif - -// -// class DFAPacketizerEmitter: class that generates and prints out the DFA -// for resource tracking. -// -namespace { + /// Conjunctive list of resource requirements: + /// {a|b, b|c} => (a OR b) AND (b or c). + /// Resources are unique across all itineraries. + ResourceVector Resources; +}; +// Generates and prints out the DFA for resource tracking. class DFAPacketizerEmitter { private: std::string TargetName; - // - // allInsnClasses is the set of all possible resources consumed by an - // InstrStage. - // - std::vector> allInsnClasses; RecordKeeper &Records; + UniqueVector UniqueResources; + std::vector ScheduleClasses; + std::map FUNameToBitsMap; + std::map ComboBitToBitsMap; + public: DFAPacketizerEmitter(RecordKeeper &R); - // - // collectAllFuncUnits - Construct a map of function unit names to bits. - // - int collectAllFuncUnits(std::vector &ProcItinList, - std::map &FUNameToBitsMap, - int &maxResources, - raw_ostream &OS); + // Construct a map of function unit names to bits. + int collectAllFuncUnits( + ArrayRef ProcModels); - // - // collectAllComboFuncs - Construct a map from a combo function unit bit to - // the bits of all included functional units. - // - int collectAllComboFuncs(std::vector &ComboFuncList, - std::map &FUNameToBitsMap, - std::map &ComboBitToBitsMap, - raw_ostream &OS); + // Construct a map from a combo function unit bit to the bits of all included + // functional units. + int collectAllComboFuncs(ArrayRef ComboFuncList); - // - // collectOneInsnClass - Populate allInsnClasses with one instruction class. - // - int collectOneInsnClass(const std::string &ProcName, - std::vector &ProcItinList, - std::map &FUNameToBitsMap, - Record *ItinData, - raw_ostream &OS); - - // - // collectAllInsnClasses - Populate allInsnClasses which is a set of units - // used in each stage. - // - int collectAllInsnClasses(const std::string &ProcName, - std::vector &ProcItinList, - std::map &FUNameToBitsMap, - std::vector &ItinDataList, - int &maxStages, - raw_ostream &OS); + ResourceVector getResourcesForItinerary(Record *Itinerary); + void createScheduleClasses(unsigned ItineraryIdx, const RecVec &Itineraries); // Emit code for a subset of itineraries. void emitForItineraries(raw_ostream &OS, - std::vector &ProcItinList, + std::vector &ProcItinList, std::string DFAName); void run(raw_ostream &OS); }; } // end anonymous namespace -#ifndef NDEBUG -// To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter". -// -// dbgsInsnClass - When debugging, print instruction class stages. -// -void dbgsInsnClass(const std::vector &InsnClass) { - LLVM_DEBUG(dbgs() << "InsnClass: "); - for (unsigned i = 0; i < InsnClass.size(); ++i) { - if (i > 0) { - LLVM_DEBUG(dbgs() << ", "); - } - LLVM_DEBUG(dbgs() << "0x" << Twine::utohexstr(InsnClass[i])); - } - DFAInput InsnInput = getDFAInsnInput(InsnClass); - LLVM_DEBUG(dbgs() << " (input: 0x" << Twine::utohexstr(InsnInput) << ")"); -} +DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R) + : TargetName(CodeGenTarget(R).getName()), Records(R) {} -// -// dbgsIndent - When debugging, indent by the specified amount. -// -void dbgsIndent(unsigned indent) { - for (unsigned i = 0; i < indent; ++i) { - LLVM_DEBUG(dbgs() << " "); - } -} -#endif // NDEBUG - -DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R): - TargetName(CodeGenTarget(R).getName()), Records(R) {} - -// -// collectAllFuncUnits - Construct a map of function unit names to bits. -// int DFAPacketizerEmitter::collectAllFuncUnits( - std::vector &ProcItinList, - std::map &FUNameToBitsMap, - int &maxFUs, - raw_ostream &OS) { + ArrayRef ProcModels) { LLVM_DEBUG(dbgs() << "-------------------------------------------------------" "----------------------\n"); LLVM_DEBUG(dbgs() << "collectAllFuncUnits"); - LLVM_DEBUG(dbgs() << " (" << ProcItinList.size() << " itineraries)\n"); + LLVM_DEBUG(dbgs() << " (" << ProcModels.size() << " itineraries)\n"); + + std::set ProcItinList; + for (const CodeGenProcModel *Model : ProcModels) + ProcItinList.insert(Model->ItinsDef); int totalFUs = 0; // Parse functional units for all the itineraries. - for (unsigned i = 0, N = ProcItinList.size(); i < N; ++i) { - Record *Proc = ProcItinList[i]; - std::vector FUs = Proc->getValueAsListOfDefs("FU"); + for (Record *Proc : ProcItinList) { + std::vector FUs = Proc->getValueAsListOfDefs("FU"); - LLVM_DEBUG(dbgs() << " FU:" << i << " (" << FUs.size() << " FUs) " - << Proc->getName()); + LLVM_DEBUG(dbgs() << " FU:" + << " (" << FUs.size() << " FUs) " << Proc->getName()); // Convert macros to bits for each stage. unsigned numFUs = FUs.size(); for (unsigned j = 0; j < numFUs; ++j) { - assert ((j < DFA_MAX_RESOURCES) && - "Exceeded maximum number of representable resources"); - unsigned FuncResources = (unsigned) (1U << j); + assert((j < DFA_MAX_RESOURCES) && + "Exceeded maximum number of representable resources"); + uint64_t FuncResources = 1ULL << j; FUNameToBitsMap[FUs[j]->getName()] = FuncResources; LLVM_DEBUG(dbgs() << " " << FUs[j]->getName() << ":0x" << Twine::utohexstr(FuncResources)); } - if (((int) numFUs) > maxFUs) { - maxFUs = numFUs; - } totalFUs += numFUs; LLVM_DEBUG(dbgs() << "\n"); } return totalFUs; } -// -// collectAllComboFuncs - Construct a map from a combo function unit bit to -// the bits of all included functional units. -// -int DFAPacketizerEmitter::collectAllComboFuncs( - std::vector &ComboFuncList, - std::map &FUNameToBitsMap, - std::map &ComboBitToBitsMap, - raw_ostream &OS) { +int DFAPacketizerEmitter::collectAllComboFuncs(ArrayRef ComboFuncList) { LLVM_DEBUG(dbgs() << "-------------------------------------------------------" "----------------------\n"); LLVM_DEBUG(dbgs() << "collectAllComboFuncs"); @@ -253,27 +139,27 @@ int DFAPacketizerEmitter::collectAllComboFuncs( int numCombos = 0; for (unsigned i = 0, N = ComboFuncList.size(); i < N; ++i) { Record *Func = ComboFuncList[i]; - std::vector FUs = Func->getValueAsListOfDefs("CFD"); + std::vector FUs = Func->getValueAsListOfDefs("CFD"); LLVM_DEBUG(dbgs() << " CFD:" << i << " (" << FUs.size() << " combo FUs) " << Func->getName() << "\n"); // Convert macros to bits for each stage. for (unsigned j = 0, N = FUs.size(); j < N; ++j) { - assert ((j < DFA_MAX_RESOURCES) && - "Exceeded maximum number of DFA resources"); + assert((j < DFA_MAX_RESOURCES) && + "Exceeded maximum number of DFA resources"); Record *FuncData = FUs[j]; Record *ComboFunc = FuncData->getValueAsDef("TheComboFunc"); - const std::vector &FuncList = - FuncData->getValueAsListOfDefs("FuncList"); + const std::vector &FuncList = + FuncData->getValueAsListOfDefs("FuncList"); const std::string &ComboFuncName = ComboFunc->getName(); - unsigned ComboBit = FUNameToBitsMap[ComboFuncName]; - unsigned ComboResources = ComboBit; + uint64_t ComboBit = FUNameToBitsMap[ComboFuncName]; + uint64_t ComboResources = ComboBit; LLVM_DEBUG(dbgs() << " combo: " << ComboFuncName << ":0x" << Twine::utohexstr(ComboResources) << "\n"); for (unsigned k = 0, M = FuncList.size(); k < M; ++k) { std::string FuncName = FuncList[k]->getName(); - unsigned FuncResources = FUNameToBitsMap[FuncName]; + uint64_t FuncResources = FUNameToBitsMap[FuncName]; LLVM_DEBUG(dbgs() << " " << FuncName << ":0x" << Twine::utohexstr(FuncResources) << "\n"); ComboResources |= FuncResources; @@ -288,101 +174,33 @@ int DFAPacketizerEmitter::collectAllComboFuncs( return numCombos; } -// -// collectOneInsnClass - Populate allInsnClasses with one instruction class -// -int DFAPacketizerEmitter::collectOneInsnClass(const std::string &ProcName, - std::vector &ProcItinList, - std::map &FUNameToBitsMap, - Record *ItinData, - raw_ostream &OS) { - const std::vector &StageList = - ItinData->getValueAsListOfDefs("Stages"); - - // The number of stages. - unsigned NStages = StageList.size(); - - LLVM_DEBUG(dbgs() << " " << ItinData->getValueAsDef("TheClass")->getName() - << "\n"); - - std::vector UnitBits; - - // Compute the bitwise or of each unit used in this stage. - for (unsigned i = 0; i < NStages; ++i) { - const Record *Stage = StageList[i]; - - // Get unit list. - const std::vector &UnitList = - Stage->getValueAsListOfDefs("Units"); - - LLVM_DEBUG(dbgs() << " stage:" << i << " [" << UnitList.size() - << " units]:"); - unsigned dbglen = 26; // cursor after stage dbgs - - // Compute the bitwise or of each unit used in this stage. - unsigned UnitBitValue = 0; - for (unsigned j = 0, M = UnitList.size(); j < M; ++j) { - // Conduct bitwise or. - std::string UnitName = UnitList[j]->getName(); - LLVM_DEBUG(dbgs() << " " << j << ":" << UnitName); - dbglen += 3 + UnitName.length(); - assert(FUNameToBitsMap.count(UnitName)); - UnitBitValue |= FUNameToBitsMap[UnitName]; +ResourceVector +DFAPacketizerEmitter::getResourcesForItinerary(Record *Itinerary) { + ResourceVector Resources; + assert(Itinerary); + for (Record *StageDef : Itinerary->getValueAsListOfDefs("Stages")) { + uint64_t StageResources = 0; + for (Record *Unit : StageDef->getValueAsListOfDefs("Units")) { + StageResources |= FUNameToBitsMap[Unit->getName()]; } - - if (UnitBitValue != 0) - UnitBits.push_back(UnitBitValue); - - while (dbglen <= 64) { // line up bits dbgs - dbglen += 8; - LLVM_DEBUG(dbgs() << "\t"); - } - LLVM_DEBUG(dbgs() << " (bits: 0x" << Twine::utohexstr(UnitBitValue) - << ")\n"); + if (StageResources != 0) + Resources.push_back(StageResources); } - - if (!UnitBits.empty()) - allInsnClasses.push_back(UnitBits); - - LLVM_DEBUG({ - dbgs() << " "; - dbgsInsnClass(UnitBits); - dbgs() << "\n"; - }); - - return NStages; + return Resources; } -// -// collectAllInsnClasses - Populate allInsnClasses which is a set of units -// used in each stage. -// -int DFAPacketizerEmitter::collectAllInsnClasses(const std::string &ProcName, - std::vector &ProcItinList, - std::map &FUNameToBitsMap, - std::vector &ItinDataList, - int &maxStages, - raw_ostream &OS) { - // Collect all instruction classes. - unsigned M = ItinDataList.size(); - - int numInsnClasses = 0; - LLVM_DEBUG(dbgs() << "-------------------------------------------------------" - "----------------------\n" - << "collectAllInsnClasses " << ProcName << " (" << M - << " classes)\n"); - - // Collect stages for each instruction class for all itinerary data - for (unsigned j = 0; j < M; j++) { - Record *ItinData = ItinDataList[j]; - int NStages = collectOneInsnClass(ProcName, ProcItinList, - FUNameToBitsMap, ItinData, OS); - if (NStages > maxStages) { - maxStages = NStages; +void DFAPacketizerEmitter::createScheduleClasses(unsigned ItineraryIdx, + const RecVec &Itineraries) { + unsigned Idx = 0; + for (Record *Itinerary : Itineraries) { + if (!Itinerary) { + ScheduleClasses.push_back({ItineraryIdx, Idx++, 0, ResourceVector{}}); + continue; } - numInsnClasses++; + ResourceVector Resources = getResourcesForItinerary(Itinerary); + ScheduleClasses.push_back( + {ItineraryIdx, Idx++, UniqueResources.insert(Resources), Resources}); } - return numInsnClasses; } // @@ -393,19 +211,17 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) { << "#include \"llvm/CodeGen/DFAPacketizer.h\"\n"; OS << "namespace llvm {\n"; - OS << "\n// Input format:\n"; - OS << "#define DFA_MAX_RESTERMS " << DFA_MAX_RESTERMS - << "\t// maximum AND'ed resource terms\n"; - OS << "#define DFA_MAX_RESOURCES " << DFA_MAX_RESOURCES - << "\t// maximum resource bits in one term\n"; + CodeGenTarget CGT(Records); + CodeGenSchedModels CGS(Records, CGT); - // Collect processor iteraries. - std::vector ProcItinList = - Records.getAllDerivedDefinitions("ProcessorItineraries"); - - std::unordered_map> ItinsByNamespace; - for (Record *R : ProcItinList) - ItinsByNamespace[R->getValueAsString("PacketizerNamespace")].push_back(R); + std::unordered_map> + ItinsByNamespace; + for (const CodeGenProcModel &ProcModel : CGS.procModels()) { + if (ProcModel.hasItineraries()) { + auto NS = ProcModel.ItinsDef->getValueAsString("PacketizerNamespace"); + ItinsByNamespace[NS].push_back(&ProcModel); + } + } for (auto &KV : ItinsByNamespace) emitForItineraries(OS, KV.second, KV.first); @@ -413,80 +229,68 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) { } void DFAPacketizerEmitter::emitForItineraries( - raw_ostream &OS, std::vector &ProcItinList, + raw_ostream &OS, std::vector &ProcModels, std::string DFAName) { - // - // Collect the Functional units. - // - std::map FUNameToBitsMap; - int maxResources = 0; - collectAllFuncUnits(ProcItinList, - FUNameToBitsMap, maxResources, OS); + OS << "} // end namespace llvm\n\n"; + OS << "namespace {\n"; + collectAllFuncUnits(ProcModels); + collectAllComboFuncs(Records.getAllDerivedDefinitions("ComboFuncUnits")); - // - // Collect the Combo Functional units. - // - std::map ComboBitToBitsMap; - std::vector ComboFuncList = - Records.getAllDerivedDefinitions("ComboFuncUnits"); - collectAllComboFuncs(ComboFuncList, FUNameToBitsMap, ComboBitToBitsMap, OS); - - // // Collect the itineraries. - // - int maxStages = 0; - int numInsnClasses = 0; - for (unsigned i = 0, N = ProcItinList.size(); i < N; i++) { - Record *Proc = ProcItinList[i]; - - // Get processor itinerary name. - const std::string &ProcName = Proc->getName(); - - // Skip default. - if (ProcName == "NoItineraries") - continue; - - // Sanity check for at least one instruction itinerary class. - unsigned NItinClasses = - Records.getAllDerivedDefinitions("InstrItinClass").size(); - if (NItinClasses == 0) - return; - - // Get itinerary data list. - std::vector ItinDataList = Proc->getValueAsListOfDefs("IID"); - - // Collect all instruction classes - numInsnClasses += collectAllInsnClasses(ProcName, ProcItinList, - FUNameToBitsMap, ItinDataList, maxStages, OS); + DenseMap ProcModelStartIdx; + for (const CodeGenProcModel *Model : ProcModels) { + assert(Model->hasItineraries()); + ProcModelStartIdx[Model] = ScheduleClasses.size(); + createScheduleClasses(Model->Index, Model->ItinDefList); } + // Output the mapping from ScheduleClass to ResourcesIdx. + unsigned Idx = 0; + OS << "unsigned " << TargetName << DFAName << "ResourceIndices[] = {"; + for (const ScheduleClass &SC : ScheduleClasses) { + if (Idx++ % 32 == 0) + OS << "\n "; + OS << SC.ResourcesIdx << ", "; + } + OS << "\n};\n\n"; + + // And the mapping from Itinerary index into the previous table. + OS << "unsigned " << TargetName << DFAName + << "ProcResourceIndexStart[] = {\n"; + OS << " 0, // NoSchedModel\n"; + for (const CodeGenProcModel *Model : ProcModels) { + OS << " " << ProcModelStartIdx[Model] << ", // " << Model->ModelName + << "\n"; + } + OS << ScheduleClasses.size() << "\n};\n\n"; + // The type of a state in the nondeterministic automaton we're defining. - using NfaStateTy = unsigned; + using NfaStateTy = uint64_t; // Given a resource state, return all resource states by applying // InsnClass. - auto applyInsnClass = [&](ArrayRef InsnClass, - NfaStateTy State) -> std::deque { - std::deque V(1, State); + auto applyInsnClass = [&](const ResourceVector &InsnClass, + NfaStateTy State) -> std::deque { + std::deque V(1, State); // Apply every stage in the class individually. - for (unsigned Stage : InsnClass) { + for (NfaStateTy Stage : InsnClass) { // Apply this stage to every existing member of V in turn. size_t Sz = V.size(); for (unsigned I = 0; I < Sz; ++I) { - unsigned S = V.front(); + NfaStateTy S = V.front(); V.pop_front(); // For this stage, state combination, try all possible resources. for (unsigned J = 0; J < DFA_MAX_RESOURCES; ++J) { - unsigned ResourceMask = 1U << J; + NfaStateTy ResourceMask = 1ULL << J; if ((ResourceMask & Stage) == 0) // This resource isn't required by this stage. continue; - unsigned Combo = ComboBitToBitsMap[ResourceMask]; + NfaStateTy Combo = ComboBitToBitsMap[ResourceMask]; if (Combo && ((~S & Combo) != Combo)) // This combo units bits are not available. continue; - unsigned ResultingResourceState = S | ResourceMask | Combo; + NfaStateTy ResultingResourceState = S | ResourceMask | Combo; if (ResultingResourceState == S) continue; V.push_back(ResultingResourceState); @@ -499,9 +303,9 @@ void DFAPacketizerEmitter::emitForItineraries( // Given a resource state, return a quick (conservative) guess as to whether // InsnClass can be applied. This is a filter for the more heavyweight // applyInsnClass. - auto canApplyInsnClass = [](ArrayRef InsnClass, + auto canApplyInsnClass = [](const ResourceVector &InsnClass, NfaStateTy State) -> bool { - for (unsigned Resources : InsnClass) { + for (NfaStateTy Resources : InsnClass) { if ((State | Resources) == State) return false; } @@ -515,20 +319,18 @@ void DFAPacketizerEmitter::emitForItineraries( while (!Worklist.empty()) { NfaStateTy State = Worklist.front(); Worklist.pop_front(); - for (unsigned i = 0; i < allInsnClasses.size(); i++) { - const std::vector &InsnClass = allInsnClasses[i]; - if (!canApplyInsnClass(InsnClass, State)) + for (const ResourceVector &Resources : UniqueResources) { + if (!canApplyInsnClass(Resources, State)) continue; - for (unsigned NewState : applyInsnClass(InsnClass, State)) { + unsigned ResourcesID = UniqueResources.idFor(Resources); + for (uint64_t NewState : applyInsnClass(Resources, State)) { if (SeenStates.emplace(NewState).second) Worklist.emplace_back(NewState); - Emitter.addTransition(State, NewState, getDFAInsnInput(InsnClass)); + Emitter.addTransition(State, NewState, ResourcesID); } } } - OS << "} // end namespace llvm\n\n"; - OS << "namespace {\n"; std::string TargetAndDFAName = TargetName + DFAName; Emitter.emit(TargetAndDFAName, OS); OS << "} // end anonymous namespace\n\n"; @@ -541,7 +343,13 @@ void DFAPacketizerEmitter::emitForItineraries( << " static Automaton A(ArrayRef<" << TargetAndDFAName << "Transition>(" << TargetAndDFAName << "Transitions), " << TargetAndDFAName << "TransitionInfo);\n" - << " return new DFAPacketizer(IID, A);\n" + << " unsigned ProcResIdxStart = " << TargetAndDFAName + << "ProcResourceIndexStart[IID->SchedModel.ProcID];\n" + << " unsigned ProcResIdxNum = " << TargetAndDFAName + << "ProcResourceIndexStart[IID->SchedModel.ProcID + 1] - " + "ProcResIdxStart;\n" + << " return new DFAPacketizer(IID, A, {&" << TargetAndDFAName + << "ResourceIndices[ProcResIdxStart], ProcResIdxNum});\n" << "\n}\n\n"; }