From b46557292c140bc8c65ec47048b6a4de726e6d64 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Sat, 21 Nov 2015 20:00:45 +0000 Subject: [PATCH] Hexagon V60/HVX DFA scheduler support Extended DFA tablegen to: - added "-debug-only dfa-emitter" support to llvm-tblgen - defined CVI_PIPE* resources for the V60 vector coprocessor - allow specification of multiple required resources - supports ANDs of ORs - e.g. [SLOT2, SLOT3], [CVI_MPY0, CVI_MPY1] means: (SLOT2 OR SLOT3) AND (CVI_MPY0 OR CVI_MPY1) - added support for combo resources - allows specifying ORs of ANDs - e.g. [CVI_XLSHF, CVI_MPY01] means: (CVI_XLANE AND CVI_SHIFT) OR (CVI_MPY0 AND CVI_MPY1) - increased DFA input size from 32-bit to 64-bit - allows for a maximum of 4 AND'ed terms of 16 resources - supported expressions now include: expression => term [AND term] [AND term] [AND term] term => resource [OR resource]* resource => one_resource | combo_resource combo_resource => (one_resource [AND one_resource]*) Author: Dan Palermo kparzysz: Verified AMDGPU codegen to be unchanged on all llc tests, except those dealing with instruction encodings. Reapply the previous patch, this time without circular dependencies. llvm-svn: 253793 --- llvm/include/llvm/CodeGen/DFAPacketizer.h | 18 +- llvm/include/llvm/CodeGen/DFAPacketizerDefs.h | 63 ++ llvm/include/llvm/Target/TargetItinerary.td | 16 + llvm/lib/CodeGen/DFAPacketizer.cpp | 36 +- llvm/lib/Target/Hexagon/HexagonScheduleV60.td | 9 + llvm/utils/TableGen/DFAPacketizerEmitter.cpp | 648 +++++++++++++++--- 6 files changed, 669 insertions(+), 121 deletions(-) create mode 100644 llvm/include/llvm/CodeGen/DFAPacketizerDefs.h diff --git a/llvm/include/llvm/CodeGen/DFAPacketizer.h b/llvm/include/llvm/CodeGen/DFAPacketizer.h index 791b66e14292..69ea74d3d01b 100644 --- a/llvm/include/llvm/CodeGen/DFAPacketizer.h +++ b/llvm/include/llvm/CodeGen/DFAPacketizer.h @@ -27,6 +27,7 @@ #define LLVM_CODEGEN_DFAPACKETIZER_H #include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/DFAPacketizerDefs.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include @@ -42,20 +43,21 @@ class SUnit; class DFAPacketizer { private: - typedef std::pair UnsignPair; + typedef std::pair UnsignPair; + const InstrItineraryData *InstrItins; int CurrentState; - const int (*DFAStateInputTable)[2]; + const DFAStateInput (*DFAStateInputTable)[2]; const unsigned *DFAStateEntryTable; // CachedTable is a map from to ToState. DenseMap CachedTable; // ReadTable - Read the DFA transition table and update CachedTable. - void ReadTable(unsigned int state); + void ReadTable(unsigned state); public: - DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2], + DFAPacketizer(const InstrItineraryData *I, const DFAStateInput (*SIT)[2], const unsigned *SET); // Reset the current state to make all resources available. @@ -63,6 +65,14 @@ public: CurrentState = 0; } + // getInsnInput - Return the DFAInput for an instruction class. + DFAInput getInsnInput(unsigned InsnClass); + + // getInsnInput - Return the DFAInput for an instruction class input vector. + static DFAInput getInsnInput(const std::vector &InsnClass) { + return getDFAInsnInput(InsnClass); + } + // canReserveResources - Check if the resources occupied by a MCInstrDesc // are available in the current state. bool canReserveResources(const llvm::MCInstrDesc *MID); diff --git a/llvm/include/llvm/CodeGen/DFAPacketizerDefs.h b/llvm/include/llvm/CodeGen/DFAPacketizerDefs.h new file mode 100644 index 000000000000..483a5beae8d3 --- /dev/null +++ b/llvm/include/llvm/CodeGen/DFAPacketizerDefs.h @@ -0,0 +1,63 @@ +//=- llvm/CodeGen/DFAPacketizerDefs.h - DFA Packetizer for VLIW ---*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Common definitions used by TableGen and the DFAPacketizer in CodeGen. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_DFAPACKETIZERDEFS_H +#define LLVM_CODEGEN_DFAPACKETIZERDEFS_H + +#include + +namespace llvm { + +// DFA_MAX_RESTERMS * DFA_MAX_RESOURCES must fit within sizeof DFAInput. +// This is verified in DFAPacketizer.cpp:DFAPacketizer::DFAPacketizer. +// +// e.g. terms x resource bit combinations that fit in uint32_t: +// 4 terms x 8 bits = 32 bits +// 3 terms x 10 bits = 30 bits +// 2 terms x 16 bits = 32 bits +// +// e.g. terms x resource bit combinations that fit in uint64_t: +// 8 terms x 8 bits = 64 bits +// 7 terms x 9 bits = 63 bits +// 6 terms x 10 bits = 60 bits +// 5 terms x 12 bits = 60 bits +// 4 terms x 16 bits = 64 bits <--- current +// 3 terms x 21 bits = 63 bits +// 2 terms x 32 bits = 64 bits +// +#define DFA_MAX_RESTERMS 4 // The max # of AND'ed resource terms. +#define DFA_MAX_RESOURCES 16 // The max # of resource bits in one term. + +typedef uint64_t DFAInput; +typedef int64_t DFAStateInput; +#define DFA_TBLTYPE "int64_t" // For generating DFAStateInputTable. + +namespace { + DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) { + return (Inp << DFA_MAX_RESOURCES) | FuncUnits; + } + + /// Return the DFAInput for an instruction class input vector. + /// This function is used in both DFAPacketizer.cpp and in + /// DFAPacketizerEmitter.cpp. + DFAInput getDFAInsnInput(const std::vector &InsnClass) { + DFAInput InsnInput = 0; + assert ((InsnClass.size() <= DFA_MAX_RESTERMS) && + "Exceeded maximum number of DFA terms"); + for (auto U : InsnClass) + InsnInput = addDFAFuncUnits(InsnInput, U); + return InsnInput; + } +} + +} + +#endif diff --git a/llvm/include/llvm/Target/TargetItinerary.td b/llvm/include/llvm/Target/TargetItinerary.td index cc74006dc9fe..a37bbf2474c5 100644 --- a/llvm/include/llvm/Target/TargetItinerary.td +++ b/llvm/include/llvm/Target/TargetItinerary.td @@ -134,3 +134,19 @@ class ProcessorItineraries fu, list bp, // info. Subtargets using NoItineraries can bypass the scheduler's // expensive HazardRecognizer because no reservation table is needed. def NoItineraries : ProcessorItineraries<[], [], []>; + +//===----------------------------------------------------------------------===// +// Combo Function Unit data - This is a map of combo function unit names to +// the list of functional units that are included in the combination. +// +class ComboFuncData funclist> { + FuncUnit TheComboFunc = ComboFunc; + list FuncList = funclist; +} + +//===----------------------------------------------------------------------===// +// Combo Function Units - This is a list of all combo function unit data. +class ComboFuncUnits cfd> { + list CFD = cfd; +} + diff --git a/llvm/lib/CodeGen/DFAPacketizer.cpp b/llvm/lib/CodeGen/DFAPacketizer.cpp index ee50f972aa7b..64b7f481df41 100644 --- a/llvm/lib/CodeGen/DFAPacketizer.cpp +++ b/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -31,10 +31,17 @@ #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; -DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2], +DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, + const DFAStateInput (*SIT)[2], const unsigned *SET): InstrItins(I), CurrentState(0), DFAStateInputTable(SIT), - DFAStateEntryTable(SET) {} + DFAStateEntryTable(SET) { + // Make sure DFA types are large enough for the number of terms & resources. + assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAInput)) + && "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput"); + assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)) + && "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput"); +} // @@ -60,26 +67,37 @@ void DFAPacketizer::ReadTable(unsigned int state) { DFAStateInputTable[i][1]; } +// +// getInsnInput - Return the DFAInput for an instruction class. +// +DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) { + // Note: this logic must match that in DFAPacketizerDefs.h for input vectors. + DFAInput InsnInput = 0; + unsigned i = 0; + for (const InstrStage *IS = InstrItins->beginStage(InsnClass), + *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS, ++i) { + InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits()); + assert ((i < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs"); + } + return InsnInput; +} // canReserveResources - Check if the resources occupied by a MCInstrDesc // are available in the current state. bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) { unsigned InsnClass = MID->getSchedClass(); - const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass); - unsigned FuncUnits = IS->getUnits(); - UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits); + DFAInput InsnInput = getInsnInput(InsnClass); + UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); ReadTable(CurrentState); return (CachedTable.count(StateTrans) != 0); } - // reserveResources - Reserve the resources occupied by a MCInstrDesc and // change the current state to reflect that change. void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) { unsigned InsnClass = MID->getSchedClass(); - const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass); - unsigned FuncUnits = IS->getUnits(); - UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits); + DFAInput InsnInput = getInsnInput(InsnClass); + UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); ReadTable(CurrentState); assert(CachedTable.count(StateTrans) != 0); CurrentState = CachedTable[StateTrans]; diff --git a/llvm/lib/Target/Hexagon/HexagonScheduleV60.td b/llvm/lib/Target/Hexagon/HexagonScheduleV60.td index 7cda4a743199..2ccff8242a47 100644 --- a/llvm/lib/Target/Hexagon/HexagonScheduleV60.td +++ b/llvm/lib/Target/Hexagon/HexagonScheduleV60.td @@ -20,6 +20,15 @@ def CVI_XLSHF : FuncUnit; def CVI_MPY01 : FuncUnit; def CVI_ALL : FuncUnit; +// Combined functional unit data. +def HexagonComboFuncsV60 : + ComboFuncUnits<[ + ComboFuncData, + ComboFuncData, + ComboFuncData + ]>; + // Note: When adding additional vector scheduling classes, add the // corresponding methods to the class HexagonInstrInfo. def CVI_VA : InstrItinClass; diff --git a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp index 5060b6e9ce7c..fc70e97d7e21 100644 --- a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp +++ b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp @@ -15,16 +15,36 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "dfa-emitter" + #include "CodeGenTarget.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/DFAPacketizerDefs.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TableGenBackend.h" +#include "llvm/Support/Debug.h" #include #include #include +#include using namespace llvm; +// To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter". +// +// dbgsInsnClass - When debugging, print instruction class stages. +// +void dbgsInsnClass(const std::vector &InsnClass); +// +// dbgsStateInfo - When debugging, print the set of state info. +// +void dbgsStateInfo(const std::set &stateInfo); +// +// dbgsIndent - When debugging, indent by the specified amount. +// +void dbgsIndent(unsigned indent); + // // class DFAPacketizerEmitter: class that generates and prints out the DFA // for resource tracking. @@ -37,20 +57,48 @@ private: // allInsnClasses is the set of all possible resources consumed by an // InstrStage. // - DenseSet allInsnClasses; + std::vector> allInsnClasses; RecordKeeper &Records; public: DFAPacketizerEmitter(RecordKeeper &R); // - // collectAllInsnClasses: Populate allInsnClasses which is a set of units + // collectAllFuncUnits - Construct a map of function unit names to bits. + // + int collectAllFuncUnits(std::vector &ProcItinList, + std::map &FUNameToBitsMap, + int &maxResources, + raw_ostream &OS); + + // + // collectAllComboFuncs - Construct a map from a combo function unit bit to + // the bits of all included functional units. + // + int collectAllComboFuncs(std::vector &ComboFuncList, + std::map &FUNameToBitsMap, + std::map &ComboBitToBitsMap, + raw_ostream &OS); + + // + // collectOneInsnClass - Populate allInsnClasses with one instruction class. + // + int collectOneInsnClass(const std::string &ProcName, + std::vector &ProcItinList, + std::map &FUNameToBitsMap, + Record *ItinData, + raw_ostream &OS); + + // + // collectAllInsnClasses - Populate allInsnClasses which is a set of units // used in each stage. // - void collectAllInsnClasses(const std::string &Name, - Record *ItinData, - unsigned &NStages, - raw_ostream &OS); + int collectAllInsnClasses(const std::string &ProcName, + std::vector &ProcItinList, + std::map &FUNameToBitsMap, + std::vector &ItinDataList, + int &maxStages, + raw_ostream &OS); void run(raw_ostream &OS); }; @@ -87,7 +135,7 @@ class State { const int stateNum; mutable bool isInitial; mutable std::set stateInfo; - typedef std::map TransitionMap; + typedef std::map, const State *> TransitionMap; mutable TransitionMap Transitions; State(); @@ -97,28 +145,47 @@ class State { } // - // canAddInsnClass - Returns true if an instruction of type InsnClass is a - // valid transition from this state, i.e., can an instruction of type InsnClass - // be added to the packet represented by this state. + // canMaybeAddInsnClass - Quickly verifies if an instruction of type InsnClass + // may be a valid transition from this state i.e., can an instruction of type + // InsnClass be added to the packet represented by this state. // - // PossibleStates is the set of valid resource states that ensue from valid - // transitions. + // Note that for multiple stages, this quick check does not take into account + // any possible resource competition between the stages themselves. That is + // enforced in AddInsnClassStages which checks the cross product of all + // stages for resource availability (which is a more involved check). // - bool canAddInsnClass(unsigned InsnClass) const; + bool canMaybeAddInsnClass(std::vector &InsnClass, + std::map &ComboBitToBitsMap) const; // // AddInsnClass - Return all combinations of resource reservation // which are possible from this state (PossibleStates). // - void AddInsnClass(unsigned InsnClass, std::set &PossibleStates) const; - // + // PossibleStates is the set of valid resource states that ensue from valid + // transitions. + // + void AddInsnClass(std::vector &InsnClass, + std::map &ComboBitToBitsMap, + std::set &PossibleStates) const; + // + // AddInsnClassStages - Return all combinations of resource reservation + // resulting from the cross product of all stages for this InsnClass + // which are possible from this state (PossibleStates). + // + void AddInsnClassStages(std::vector &InsnClass, + std::map &ComboBitToBitsMap, + unsigned chkstage, unsigned numstages, + unsigned prevState, unsigned origState, + DenseSet &VisitedResourceStates, + std::set &PossibleStates) const; + // // addTransition - Add a transition from this state given the input InsnClass // - void addTransition(unsigned InsnClass, const State *To) const; + void addTransition(std::vector InsnClass, const State *To) const; // // hasTransition - Returns true if there is a transition from this state // given the input InsnClass // - bool hasTransition(unsigned InsnClass) const; + bool hasTransition(std::vector InsnClass) const; }; } // End anonymous namespace. @@ -144,10 +211,52 @@ public: // // writeTable: Print out a table representing the DFA. // - void writeTableAndAPI(raw_ostream &OS, const std::string &ClassName); + void writeTableAndAPI(raw_ostream &OS, const std::string &ClassName, + int numInsnClasses = 0, + int maxResources = 0, int numCombos = 0, int maxStages = 0); }; } // End anonymous namespace. +// To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter". +// +// dbgsInsnClass - When debugging, print instruction class stages. +// +void dbgsInsnClass(const std::vector &InsnClass) { + DEBUG(dbgs() << "InsnClass: "); + for (unsigned i = 0; i < InsnClass.size(); ++i) { + if (i > 0) { + DEBUG(dbgs() << ", "); + } + DEBUG(dbgs() << "0x" << utohexstr(InsnClass[i])); + } + DFAInput InsnInput = getDFAInsnInput(InsnClass); + DEBUG(dbgs() << " (input: 0x" << utohexstr(InsnInput) << ")"); +} + +// +// dbgsStateInfo - When debugging, print the set of state info. +// +void dbgsStateInfo(const std::set &stateInfo) { + DEBUG(dbgs() << "StateInfo: "); + unsigned i = 0; + for (std::set::iterator SI = stateInfo.begin(); + SI != stateInfo.end(); ++SI, ++i) { + unsigned thisState = *SI; + if (i > 0) { + DEBUG(dbgs() << ", "); + } + DEBUG(dbgs() << "0x" << utohexstr(thisState)); + } +} + +// +// dbgsIndent - When debugging, indent by the specified amount. +// +void dbgsIndent(unsigned indent) { + for (unsigned i = 0; i < indent; ++i) { + DEBUG(dbgs() << " "); + } +} // // Constructors and destructors for State and DFA @@ -157,10 +266,11 @@ State::State() : DFA::DFA(): currentState(nullptr) {} -// +// // addTransition - Add a transition from this state given the input InsnClass // -void State::addTransition(unsigned InsnClass, const State *To) const { +void State::addTransition(std::vector InsnClass, const State *To) + const { assert(!Transitions.count(InsnClass) && "Cannot have multiple transitions for the same input"); Transitions[InsnClass] = To; @@ -170,7 +280,7 @@ void State::addTransition(unsigned InsnClass, const State *To) const { // hasTransition - Returns true if there is a transition from this state // given the input InsnClass // -bool State::hasTransition(unsigned InsnClass) const { +bool State::hasTransition(std::vector InsnClass) const { return Transitions.count(InsnClass) > 0; } @@ -178,61 +288,167 @@ bool State::hasTransition(unsigned InsnClass) const { // AddInsnClass - Return all combinations of resource reservation // which are possible from this state (PossibleStates). // -void State::AddInsnClass(unsigned InsnClass, - std::set &PossibleStates) const { +// PossibleStates is the set of valid resource states that ensue from valid +// transitions. +// +void State::AddInsnClass(std::vector &InsnClass, + std::map &ComboBitToBitsMap, + std::set &PossibleStates) const { // // Iterate over all resource states in currentState. // + unsigned numstages = InsnClass.size(); + assert((numstages > 0) && "InsnClass has no stages"); for (std::set::iterator SI = stateInfo.begin(); SI != stateInfo.end(); ++SI) { unsigned thisState = *SI; - // - // Iterate over all possible resources used in InsnClass. - // For ex: for InsnClass = 0x11, all resources = {0x01, 0x10}. - // - DenseSet VisitedResourceStates; - for (unsigned int j = 0; j < sizeof(InsnClass) * 8; ++j) { - if ((0x1 << j) & InsnClass) { - // - // For each possible resource used in InsnClass, generate the - // resource state if that resource was used. - // - unsigned ResultingResourceState = thisState | (0x1 << j); + + DEBUG(dbgs() << " thisState: 0x" << utohexstr(thisState) << "\n"); + AddInsnClassStages(InsnClass, ComboBitToBitsMap, + numstages - 1, numstages, + thisState, thisState, + VisitedResourceStates, PossibleStates); + } +} + +void State::AddInsnClassStages(std::vector &InsnClass, + std::map &ComboBitToBitsMap, + unsigned chkstage, unsigned numstages, + unsigned prevState, unsigned origState, + DenseSet &VisitedResourceStates, + std::set &PossibleStates) const { + + assert((chkstage < numstages) && "AddInsnClassStages: stage out of range"); + unsigned thisStage = InsnClass[chkstage]; + + dbgsIndent((1 + numstages - chkstage) << 1); + DEBUG(dbgs() << "AddInsnClassStages " << chkstage + << " (0x" << utohexstr(thisStage) << ") from "); + dbgsInsnClass(InsnClass); + DEBUG(dbgs() << "\n"); + + // + // Iterate over all possible resources used in thisStage. + // For ex: for thisStage = 0x11, all resources = {0x01, 0x10}. + // + for (unsigned int j = 0; j < DFA_MAX_RESOURCES; ++j) { + unsigned resourceMask = (0x1 << j); + if (resourceMask & thisStage) { + unsigned combo = ComboBitToBitsMap[resourceMask]; + if (combo && ((~prevState & combo) != combo)) { + DEBUG(dbgs() << "\tSkipped Add 0x" << utohexstr(prevState) + << " - combo op 0x" << utohexstr(resourceMask) + << " (0x" << utohexstr(combo) <<") cannot be scheduled\n"); + continue; + } + // + // For each possible resource used in thisStage, generate the + // resource state if that resource was used. + // + unsigned ResultingResourceState = prevState | resourceMask | combo; + dbgsIndent((2 + numstages - chkstage) << 1); + DEBUG(dbgs() << "0x" << utohexstr(prevState) + << " | 0x" << utohexstr(resourceMask)); + if (combo) { + DEBUG(dbgs() << " | 0x" << utohexstr(combo)); + } + DEBUG(dbgs() << " = 0x" << utohexstr(ResultingResourceState) << " "); + + // + // If this is the final stage for this class + // + if (chkstage == 0) { // // Check if the resulting resource state can be accommodated in this // packet. - // We compute ResultingResourceState OR thisState. - // If the result of the OR is different than thisState, it implies + // We compute resource OR prevState (originally started as origState). + // If the result of the OR is different than origState, it implies // that there is at least one resource that can be used to schedule - // InsnClass in the current packet. + // thisStage in the current packet. // Insert ResultingResourceState into PossibleStates only if we haven't // processed ResultingResourceState before. // - if ((ResultingResourceState != thisState) && - (VisitedResourceStates.count(ResultingResourceState) == 0)) { - VisitedResourceStates.insert(ResultingResourceState); - PossibleStates.insert(ResultingResourceState); + if (ResultingResourceState != prevState) { + if (VisitedResourceStates.count(ResultingResourceState) == 0) { + VisitedResourceStates.insert(ResultingResourceState); + PossibleStates.insert(ResultingResourceState); + DEBUG(dbgs() << "\tResultingResourceState: 0x" + << utohexstr(ResultingResourceState) << "\n"); + } else { + DEBUG(dbgs() << "\tSkipped Add - state already seen\n"); + } + } else { + DEBUG(dbgs() << "\tSkipped Add - no final resources available\n"); + } + } else { + // + // If the current resource can be accommodated, check the next + // stage in InsnClass for available resources. + // + if (ResultingResourceState != prevState) { + DEBUG(dbgs() << "\n"); + AddInsnClassStages(InsnClass, ComboBitToBitsMap, + chkstage - 1, numstages, + ResultingResourceState, origState, + VisitedResourceStates, PossibleStates); + } else { + DEBUG(dbgs() << "\tSkipped Add - no resources available\n"); } } } } - } // -// canAddInsnClass - Quickly verifies if an instruction of type InsnClass is a -// valid transition from this state i.e., can an instruction of type InsnClass -// be added to the packet represented by this state. +// canMaybeAddInsnClass - Quickly verifies if an instruction of type InsnClass +// may be a valid transition from this state i.e., can an instruction of type +// InsnClass be added to the packet represented by this state. // -bool State::canAddInsnClass(unsigned InsnClass) const { +// Note that this routine is performing conservative checks that can be +// quickly executed acting as a filter before calling AddInsnClassStages. +// Any cases allowed through here will be caught later in AddInsnClassStages +// which performs the more expensive exact check. +// +bool State::canMaybeAddInsnClass(std::vector &InsnClass, + std::map &ComboBitToBitsMap) const { for (std::set::const_iterator SI = stateInfo.begin(); SI != stateInfo.end(); ++SI) { - if (~*SI & InsnClass) + + // Check to see if all required resources are available. + bool available = true; + + // Inspect each stage independently. + // note: This is a conservative check as we aren't checking for + // possible resource competition between the stages themselves + // The full cross product is examined later in AddInsnClass. + for (unsigned i = 0; i < InsnClass.size(); ++i) { + unsigned resources = *SI; + if ((~resources & InsnClass[i]) == 0) { + available = false; + break; + } + // Make sure _all_ resources for a combo function are available. + // note: This is a quick conservative check as it won't catch an + // unscheduleable combo if this stage is an OR expression + // containing a combo. + // These cases are caught later in AddInsnClass. + unsigned combo = ComboBitToBitsMap[InsnClass[i]]; + if (combo && ((~resources & combo) != combo)) { + DEBUG(dbgs() << "\tSkipped canMaybeAdd 0x" << utohexstr(resources) + << " - combo op 0x" << utohexstr(InsnClass[i]) + << " (0x" << utohexstr(combo) <<") cannot be scheduled\n"); + available = false; + break; + } + } + + if (available) { return true; + } } return false; } @@ -244,7 +460,6 @@ const State &DFA::newState() { return *IterPair.first; } - int State::currentStateNum = 0; DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R): @@ -263,57 +478,100 @@ DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R): // the ith state. // // -void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName) { - static const std::string SentinelEntry = "{-1, -1}"; - DFA::StateSet::iterator SI = states.begin(); +void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName, + int numInsnClasses, + int maxResources, int numCombos, int maxStages) { + + unsigned numStates = states.size(); + + DEBUG(dbgs() << "-----------------------------------------------------------------------------\n"); + DEBUG(dbgs() << "writeTableAndAPI\n"); + DEBUG(dbgs() << "Total states: " << numStates << "\n"); + + OS << "namespace llvm {\n"; + + OS << "\n// Input format:\n"; + OS << "#define DFA_MAX_RESTERMS " << DFA_MAX_RESTERMS + << "\t// maximum AND'ed resource terms\n"; + OS << "#define DFA_MAX_RESOURCES " << DFA_MAX_RESOURCES + << "\t// maximum resource bits in one term\n"; + + OS << "\n// " << TargetName << "DFAStateInputTable[][2] = " + << "pairs of for all valid\n"; + OS << "// transitions.\n"; + OS << "// " << numStates << "\tstates\n"; + OS << "// " << numInsnClasses << "\tinstruction classes\n"; + OS << "// " << maxResources << "\tresources max\n"; + OS << "// " << numCombos << "\tcombo resources\n"; + OS << "// " << maxStages << "\tstages max\n"; + OS << "const " << DFA_TBLTYPE << " " + << TargetName << "DFAStateInputTable[][2] = {\n"; + // This table provides a map to the beginning of the transitions for State s // in DFAStateInputTable. - std::vector StateEntry(states.size()); - - OS << "namespace llvm {\n\n"; - OS << "const int " << TargetName << "DFAStateInputTable[][2] = {\n"; + std::vector StateEntry(numStates+1); + static const std::string SentinelEntry = "{-1, -1}"; // Tracks the total valid transitions encountered so far. It is used // to construct the StateEntry table. int ValidTransitions = 0; - for (unsigned i = 0; i < states.size(); ++i, ++SI) { + DFA::StateSet::iterator SI = states.begin(); + for (unsigned i = 0; i < numStates; ++i, ++SI) { assert ((SI->stateNum == (int) i) && "Mismatch in state numbers"); StateEntry[i] = ValidTransitions; for (State::TransitionMap::iterator II = SI->Transitions.begin(), IE = SI->Transitions.end(); II != IE; ++II) { - OS << "{" << II->first << ", " + OS << "{0x" << utohexstr(getDFAInsnInput(II->first)) << ", " << II->second->stateNum - << "}, "; + << "},\t"; } ValidTransitions += SI->Transitions.size(); // If there are no valid transitions from this stage, we need a sentinel // transition. if (ValidTransitions == StateEntry[i]) { - OS << SentinelEntry << ","; + OS << SentinelEntry << ",\t"; ++ValidTransitions; } + OS << " // state " << i << ": " << StateEntry[i]; + if (StateEntry[i] != (ValidTransitions-1)) { // More than one transition. + OS << "-" << (ValidTransitions-1); + } OS << "\n"; } // Print out a sentinel entry at the end of the StateInputTable. This is // needed to iterate over StateInputTable in DFAPacketizer::ReadTable() - OS << SentinelEntry << "\n"; - + OS << SentinelEntry << "\t"; + OS << " // state " << numStates << ": " << ValidTransitions; + OS << "\n"; + OS << "};\n\n"; + OS << "// " << TargetName << "DFAStateEntryTable[i] = " + << "Index of the first entry in DFAStateInputTable for\n"; + OS << "// " + << "the ith state.\n"; + OS << "// " << numStates << " states\n"; OS << "const unsigned int " << TargetName << "DFAStateEntryTable[] = {\n"; // Multiply i by 2 since each entry in DFAStateInputTable is a set of // two numbers. - for (unsigned i = 0; i < states.size(); ++i) + unsigned lastState = 0; + for (unsigned i = 0; i < numStates; ++i) { + if (i && ((i % 10) == 0)) { + lastState = i-1; + OS << " // states " << (i-10) << ":" << lastState << "\n"; + } OS << StateEntry[i] << ", "; + } // Print out the index to the sentinel entry in StateInputTable OS << ValidTransitions << ", "; + OS << " // states " << (lastState+1) << ":" << numStates << "\n"; - OS << "\n};\n"; + OS << "};\n"; OS << "} // namespace\n"; @@ -332,40 +590,123 @@ void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName) { // -// collectAllInsnClasses - Populate allInsnClasses which is a set of units -// used in each stage. +// collectAllFuncUnits - Construct a map of function unit names to bits. // -void DFAPacketizerEmitter::collectAllInsnClasses(const std::string &Name, - Record *ItinData, - unsigned &NStages, - raw_ostream &OS) { - // Collect processor itineraries. - std::vector ProcItinList = - Records.getAllDerivedDefinitions("ProcessorItineraries"); - - // If just no itinerary then don't bother. - if (ProcItinList.size() < 2) - return; - std::map NameToBitsMap; +int DFAPacketizerEmitter::collectAllFuncUnits( + std::vector &ProcItinList, + std::map &FUNameToBitsMap, + int &maxFUs, + raw_ostream &OS) { + DEBUG(dbgs() << "-----------------------------------------------------------------------------\n"); + DEBUG(dbgs() << "collectAllFuncUnits"); + DEBUG(dbgs() << " (" << ProcItinList.size() << " itineraries)\n"); + int totalFUs = 0; // Parse functional units for all the itineraries. for (unsigned i = 0, N = ProcItinList.size(); i < N; ++i) { Record *Proc = ProcItinList[i]; + const std::string &ProcName = Proc->getName(); std::vector FUs = Proc->getValueAsListOfDefs("FU"); + DEBUG(dbgs() << " FU:" << i + << " (" << FUs.size() << " FUs) " + << ProcName); + + // Convert macros to bits for each stage. - for (unsigned i = 0, N = FUs.size(); i < N; ++i) - NameToBitsMap[FUs[i]->getName()] = (unsigned) (1U << i); + unsigned numFUs = FUs.size(); + for (unsigned j = 0; j < numFUs; ++j) { + assert ((j < DFA_MAX_RESOURCES) && + "Exceeded maximum number of representable resources"); + unsigned FuncResources = (unsigned) (1U << j); + FUNameToBitsMap[FUs[j]->getName()] = FuncResources; + DEBUG(dbgs() << " " << FUs[j]->getName() + << ":0x" << utohexstr(FuncResources)); + } + if (((int) numFUs) > maxFUs) { + maxFUs = numFUs; + } + totalFUs += numFUs; + DEBUG(dbgs() << "\n"); } + return totalFUs; +} + +// +// collectAllComboFuncs - Construct a map from a combo function unit bit to +// the bits of all included functional units. +// +int DFAPacketizerEmitter::collectAllComboFuncs( + std::vector &ComboFuncList, + std::map &FUNameToBitsMap, + std::map &ComboBitToBitsMap, + raw_ostream &OS) { + DEBUG(dbgs() << "-----------------------------------------------------------------------------\n"); + DEBUG(dbgs() << "collectAllComboFuncs"); + DEBUG(dbgs() << " (" << ComboFuncList.size() << " sets)\n"); + + int numCombos = 0; + for (unsigned i = 0, N = ComboFuncList.size(); i < N; ++i) { + Record *Func = ComboFuncList[i]; + const std::string &ProcName = Func->getName(); + std::vector FUs = Func->getValueAsListOfDefs("CFD"); + + DEBUG(dbgs() << " CFD:" << i + << " (" << FUs.size() << " combo FUs) " + << ProcName << "\n"); + + // Convert macros to bits for each stage. + for (unsigned j = 0, N = FUs.size(); j < N; ++j) { + assert ((j < DFA_MAX_RESOURCES) && + "Exceeded maximum number of DFA resources"); + Record *FuncData = FUs[j]; + Record *ComboFunc = FuncData->getValueAsDef("TheComboFunc"); + const std::vector &FuncList = + FuncData->getValueAsListOfDefs("FuncList"); + std::string ComboFuncName = ComboFunc->getName(); + unsigned ComboBit = FUNameToBitsMap[ComboFuncName]; + unsigned ComboResources = ComboBit; + DEBUG(dbgs() << " combo: " << ComboFuncName + << ":0x" << utohexstr(ComboResources) << "\n"); + for (unsigned k = 0, M = FuncList.size(); k < M; ++k) { + std::string FuncName = FuncList[k]->getName(); + unsigned FuncResources = FUNameToBitsMap[FuncName]; + DEBUG(dbgs() << " " << FuncName + << ":0x" << utohexstr(FuncResources) << "\n"); + ComboResources |= FuncResources; + } + ComboBitToBitsMap[ComboBit] = ComboResources; + numCombos++; + DEBUG(dbgs() << " => combo bits: " << ComboFuncName << ":0x" + << utohexstr(ComboBit) << " = 0x" + << utohexstr(ComboResources) << "\n"); + } + } + return numCombos; +} + + +// +// collectOneInsnClass - Populate allInsnClasses with one instruction class +// +int DFAPacketizerEmitter::collectOneInsnClass(const std::string &ProcName, + std::vector &ProcItinList, + std::map &FUNameToBitsMap, + Record *ItinData, + raw_ostream &OS) { + // Collect instruction classes. + Record *ItinDef = ItinData->getValueAsDef("TheClass"); const std::vector &StageList = ItinData->getValueAsListOfDefs("Stages"); // The number of stages. - NStages = StageList.size(); + unsigned NStages = StageList.size(); - // For each unit. - unsigned UnitBitValue = 0; + DEBUG(dbgs() << " " << ItinDef->getName() + << "\n"); + + std::vector UnitBits; // Compute the bitwise or of each unit used in this stage. for (unsigned i = 0; i < NStages; ++i) { @@ -375,18 +716,72 @@ void DFAPacketizerEmitter::collectAllInsnClasses(const std::string &Name, const std::vector &UnitList = Stage->getValueAsListOfDefs("Units"); + DEBUG(dbgs() << " stage:" << i + << " [" << UnitList.size() << " units]:"); + unsigned dbglen = 26; // cursor after stage dbgs + + // Compute the bitwise or of each unit used in this stage. + unsigned UnitBitValue = 0; for (unsigned j = 0, M = UnitList.size(); j < M; ++j) { // Conduct bitwise or. std::string UnitName = UnitList[j]->getName(); - assert(NameToBitsMap.count(UnitName)); - UnitBitValue |= NameToBitsMap[UnitName]; + DEBUG(dbgs() << " " << j << ":" << UnitName); + dbglen += 3 + UnitName.length(); + assert(FUNameToBitsMap.count(UnitName)); + UnitBitValue |= FUNameToBitsMap[UnitName]; } if (UnitBitValue != 0) - allInsnClasses.insert(UnitBitValue); + UnitBits.push_back(UnitBitValue); + + while (dbglen <= 64) { // line up bits dbgs + dbglen += 8; + DEBUG(dbgs() << "\t"); + } + DEBUG(dbgs() << " (bits: 0x" << utohexstr(UnitBitValue) << ")\n"); } + + if (UnitBits.size() > 0) + allInsnClasses.push_back(UnitBits); + + DEBUG(dbgs() << " "); + dbgsInsnClass(UnitBits); + DEBUG(dbgs() << "\n"); + + return NStages; } +// +// collectAllInsnClasses - Populate allInsnClasses which is a set of units +// used in each stage. +// +int DFAPacketizerEmitter::collectAllInsnClasses(const std::string &ProcName, + std::vector &ProcItinList, + std::map &FUNameToBitsMap, + std::vector &ItinDataList, + int &maxStages, + raw_ostream &OS) { + // Collect all instruction classes. + unsigned M = ItinDataList.size(); + + int numInsnClasses = 0; + DEBUG(dbgs() << "-----------------------------------------------------------------------------\n" + << "collectAllInsnClasses " + << ProcName + << " (" << M << " classes)\n"); + + // Collect stages for each instruction class for all itinerary data + for (unsigned j = 0; j < M; j++) { + Record *ItinData = ItinDataList[j]; + int NStages = collectOneInsnClass(ProcName, ProcItinList, + FUNameToBitsMap, ItinData, OS); + if (NStages > maxStages) { + maxStages = NStages; + } + numInsnClasses++; + } + return numInsnClasses; +} // // Run the worklist algorithm to generate the DFA. @@ -398,16 +793,35 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) { Records.getAllDerivedDefinitions("ProcessorItineraries"); // - // Collect the instruction classes. + // Collect the Functional units. // + std::map FUNameToBitsMap; + int maxResources = 0; + collectAllFuncUnits(ProcItinList, + FUNameToBitsMap, maxResources, OS); + + // + // Collect the Combo Functional units. + // + std::map ComboBitToBitsMap; + std::vector ComboFuncList = + Records.getAllDerivedDefinitions("ComboFuncUnits"); + int numCombos = collectAllComboFuncs(ComboFuncList, + FUNameToBitsMap, ComboBitToBitsMap, OS); + + // + // Collect the itineraries. + // + int maxStages = 0; + int numInsnClasses = 0; for (unsigned i = 0, N = ProcItinList.size(); i < N; i++) { Record *Proc = ProcItinList[i]; // Get processor itinerary name. - const std::string &Name = Proc->getName(); + const std::string &ProcName = Proc->getName(); // Skip default. - if (Name == "NoItineraries") + if (ProcName == "NoItineraries") continue; // Sanity check for at least one instruction itinerary class. @@ -419,15 +833,11 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) { // Get itinerary data list. std::vector ItinDataList = Proc->getValueAsListOfDefs("IID"); - // Collect instruction classes for all itinerary data. - for (unsigned j = 0, M = ItinDataList.size(); j < M; j++) { - Record *ItinData = ItinDataList[j]; - unsigned NStages; - collectAllInsnClasses(Name, ItinData, NStages, OS); - } + // Collect all instruction classes + numInsnClasses += collectAllInsnClasses(ProcName, ProcItinList, + FUNameToBitsMap, ItinDataList, maxStages, OS); } - // // Run a worklist algorithm to generate the DFA. // @@ -436,6 +846,7 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) { Initial->isInitial = true; Initial->stateInfo.insert(0x0); SmallVector WorkList; +// std::queue WorkList; std::map, const State*> Visited; WorkList.push_back(Initial); @@ -459,9 +870,15 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) { // while (!WorkList.empty()) { const State *current = WorkList.pop_back_val(); - for (DenseSet::iterator CI = allInsnClasses.begin(), - CE = allInsnClasses.end(); CI != CE; ++CI) { - unsigned InsnClass = *CI; + DEBUG(dbgs() << "---------------------\n"); + DEBUG(dbgs() << "Processing state: " << current->stateNum << " - "); + dbgsStateInfo(current->stateInfo); + DEBUG(dbgs() << "\n"); + for (unsigned i = 0; i < allInsnClasses.size(); i++) { + std::vector InsnClass = allInsnClasses[i]; + DEBUG(dbgs() << i << " "); + dbgsInsnClass(InsnClass); + DEBUG(dbgs() << "\n"); std::set NewStateResources; // @@ -469,32 +886,47 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) { // and the state can accommodate this InsnClass, create a transition. // if (!current->hasTransition(InsnClass) && - current->canAddInsnClass(InsnClass)) { - const State *NewState; - current->AddInsnClass(InsnClass, NewStateResources); - assert(!NewStateResources.empty() && "New states must be generated"); + current->canMaybeAddInsnClass(InsnClass, ComboBitToBitsMap)) { + const State *NewState = NULL; + current->AddInsnClass(InsnClass, ComboBitToBitsMap, NewStateResources); + if (NewStateResources.size() == 0) { + DEBUG(dbgs() << " Skipped - no new states generated\n"); + continue; + } + + DEBUG(dbgs() << "\t"); + dbgsStateInfo(NewStateResources); + DEBUG(dbgs() << "\n"); // // If we have seen this state before, then do not create a new state. // - // auto VI = Visited.find(NewStateResources); - if (VI != Visited.end()) + if (VI != Visited.end()) { NewState = VI->second; - else { + DEBUG(dbgs() << "\tFound existing state: " + << NewState->stateNum << " - "); + dbgsStateInfo(NewState->stateInfo); + DEBUG(dbgs() << "\n"); + } else { NewState = &D.newState(); NewState->stateInfo = NewStateResources; Visited[NewStateResources] = NewState; WorkList.push_back(NewState); + DEBUG(dbgs() << "\tAccepted new state: " + << NewState->stateNum << " - "); + dbgsStateInfo(NewState->stateInfo); + DEBUG(dbgs() << "\n"); } - + current->addTransition(InsnClass, NewState); } } } // Print out the table. - D.writeTableAndAPI(OS, TargetName); + D.writeTableAndAPI(OS, TargetName, + numInsnClasses, maxResources, numCombos, maxStages); } namespace llvm {