Hexagon V60/HVX DFA scheduler support

Extended DFA tablegen to:
  - added "-debug-only dfa-emitter" support to llvm-tblgen

  - defined CVI_PIPE* resources for the V60 vector coprocessor

  - allow specification of multiple required resources
    - supports ANDs of ORs
    - e.g. [SLOT2, SLOT3], [CVI_MPY0, CVI_MPY1] means:
           (SLOT2 OR SLOT3) AND (CVI_MPY0 OR CVI_MPY1)

  - added support for combo resources
    - allows specifying ORs of ANDs
    - e.g. [CVI_XLSHF, CVI_MPY01] means:
           (CVI_XLANE AND CVI_SHIFT) OR (CVI_MPY0 AND CVI_MPY1)

  - increased DFA input size from 32-bit to 64-bit
    - allows for a maximum of 4 AND'ed terms of 16 resources

  - supported expressions now include:

    expression     => term [AND term] [AND term] [AND term]
    term           => resource [OR resource]*
    resource       => one_resource | combo_resource
    combo_resource => (one_resource [AND one_resource]*)

Author: Dan Palermo <dpalermo@codeaurora.org>

kparzysz: Verified AMDGPU codegen to be unchanged on all llc
tests, except those dealing with instruction encodings.

Reapply the previous patch, this time without circular dependencies.

llvm-svn: 253793
This commit is contained in:
Krzysztof Parzyszek 2015-11-21 20:00:45 +00:00
parent a5ea5289ff
commit b46557292c
6 changed files with 669 additions and 121 deletions

View File

@ -27,6 +27,7 @@
#define LLVM_CODEGEN_DFAPACKETIZER_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/DFAPacketizerDefs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include <map>
@ -42,20 +43,21 @@ class SUnit;
class DFAPacketizer {
private:
typedef std::pair<unsigned, unsigned> UnsignPair;
typedef std::pair<unsigned, DFAInput> UnsignPair;
const InstrItineraryData *InstrItins;
int CurrentState;
const int (*DFAStateInputTable)[2];
const DFAStateInput (*DFAStateInputTable)[2];
const unsigned *DFAStateEntryTable;
// CachedTable is a map from <FromState, Input> to ToState.
DenseMap<UnsignPair, unsigned> CachedTable;
// ReadTable - Read the DFA transition table and update CachedTable.
void ReadTable(unsigned int state);
void ReadTable(unsigned state);
public:
DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
DFAPacketizer(const InstrItineraryData *I, const DFAStateInput (*SIT)[2],
const unsigned *SET);
// Reset the current state to make all resources available.
@ -63,6 +65,14 @@ public:
CurrentState = 0;
}
// getInsnInput - Return the DFAInput for an instruction class.
DFAInput getInsnInput(unsigned InsnClass);
// getInsnInput - Return the DFAInput for an instruction class input vector.
static DFAInput getInsnInput(const std::vector<unsigned> &InsnClass) {
return getDFAInsnInput(InsnClass);
}
// canReserveResources - Check if the resources occupied by a MCInstrDesc
// are available in the current state.
bool canReserveResources(const llvm::MCInstrDesc *MID);

View File

@ -0,0 +1,63 @@
//=- llvm/CodeGen/DFAPacketizerDefs.h - DFA Packetizer for VLIW ---*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// Common definitions used by TableGen and the DFAPacketizer in CodeGen.
//===----------------------------------------------------------------------===//
#ifndef LLVM_CODEGEN_DFAPACKETIZERDEFS_H
#define LLVM_CODEGEN_DFAPACKETIZERDEFS_H
#include <vector>
namespace llvm {
// DFA_MAX_RESTERMS * DFA_MAX_RESOURCES must fit within sizeof DFAInput.
// This is verified in DFAPacketizer.cpp:DFAPacketizer::DFAPacketizer.
//
// e.g. terms x resource bit combinations that fit in uint32_t:
// 4 terms x 8 bits = 32 bits
// 3 terms x 10 bits = 30 bits
// 2 terms x 16 bits = 32 bits
//
// e.g. terms x resource bit combinations that fit in uint64_t:
// 8 terms x 8 bits = 64 bits
// 7 terms x 9 bits = 63 bits
// 6 terms x 10 bits = 60 bits
// 5 terms x 12 bits = 60 bits
// 4 terms x 16 bits = 64 bits <--- current
// 3 terms x 21 bits = 63 bits
// 2 terms x 32 bits = 64 bits
//
#define DFA_MAX_RESTERMS 4 // The max # of AND'ed resource terms.
#define DFA_MAX_RESOURCES 16 // The max # of resource bits in one term.
typedef uint64_t DFAInput;
typedef int64_t DFAStateInput;
#define DFA_TBLTYPE "int64_t" // For generating DFAStateInputTable.
namespace {
DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
}
/// Return the DFAInput for an instruction class input vector.
/// This function is used in both DFAPacketizer.cpp and in
/// DFAPacketizerEmitter.cpp.
DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
DFAInput InsnInput = 0;
assert ((InsnClass.size() <= DFA_MAX_RESTERMS) &&
"Exceeded maximum number of DFA terms");
for (auto U : InsnClass)
InsnInput = addDFAFuncUnits(InsnInput, U);
return InsnInput;
}
}
}
#endif

View File

@ -134,3 +134,19 @@ class ProcessorItineraries<list<FuncUnit> fu, list<Bypass> bp,
// info. Subtargets using NoItineraries can bypass the scheduler's
// expensive HazardRecognizer because no reservation table is needed.
def NoItineraries : ProcessorItineraries<[], [], []>;
//===----------------------------------------------------------------------===//
// Combo Function Unit data - This is a map of combo function unit names to
// the list of functional units that are included in the combination.
//
class ComboFuncData<FuncUnit ComboFunc, list<FuncUnit> funclist> {
FuncUnit TheComboFunc = ComboFunc;
list<FuncUnit> FuncList = funclist;
}
//===----------------------------------------------------------------------===//
// Combo Function Units - This is a list of all combo function unit data.
class ComboFuncUnits<list<ComboFuncData> cfd> {
list<ComboFuncData> CFD = cfd;
}

View File

@ -31,10 +31,17 @@
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
DFAPacketizer::DFAPacketizer(const InstrItineraryData *I,
const DFAStateInput (*SIT)[2],
const unsigned *SET):
InstrItins(I), CurrentState(0), DFAStateInputTable(SIT),
DFAStateEntryTable(SET) {}
DFAStateEntryTable(SET) {
// Make sure DFA types are large enough for the number of terms & resources.
assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAInput))
&& "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput");
assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput))
&& "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
}
//
@ -60,26 +67,37 @@ void DFAPacketizer::ReadTable(unsigned int state) {
DFAStateInputTable[i][1];
}
//
// getInsnInput - Return the DFAInput for an instruction class.
//
DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) {
// Note: this logic must match that in DFAPacketizerDefs.h for input vectors.
DFAInput InsnInput = 0;
unsigned i = 0;
for (const InstrStage *IS = InstrItins->beginStage(InsnClass),
*IE = InstrItins->endStage(InsnClass); IS != IE; ++IS, ++i) {
InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits());
assert ((i < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs");
}
return InsnInput;
}
// canReserveResources - Check if the resources occupied by a MCInstrDesc
// are available in the current state.
bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) {
unsigned InsnClass = MID->getSchedClass();
const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
unsigned FuncUnits = IS->getUnits();
UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
DFAInput InsnInput = getInsnInput(InsnClass);
UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
ReadTable(CurrentState);
return (CachedTable.count(StateTrans) != 0);
}
// reserveResources - Reserve the resources occupied by a MCInstrDesc and
// change the current state to reflect that change.
void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) {
unsigned InsnClass = MID->getSchedClass();
const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
unsigned FuncUnits = IS->getUnits();
UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
DFAInput InsnInput = getInsnInput(InsnClass);
UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
ReadTable(CurrentState);
assert(CachedTable.count(StateTrans) != 0);
CurrentState = CachedTable[StateTrans];

View File

@ -20,6 +20,15 @@ def CVI_XLSHF : FuncUnit;
def CVI_MPY01 : FuncUnit;
def CVI_ALL : FuncUnit;
// Combined functional unit data.
def HexagonComboFuncsV60 :
ComboFuncUnits<[
ComboFuncData<CVI_XLSHF , [CVI_XLANE, CVI_SHIFT]>,
ComboFuncData<CVI_MPY01 , [CVI_MPY0, CVI_MPY1]>,
ComboFuncData<CVI_ALL , [CVI_ST, CVI_XLANE, CVI_SHIFT,
CVI_MPY0, CVI_MPY1, CVI_LD]>
]>;
// Note: When adding additional vector scheduling classes, add the
// corresponding methods to the class HexagonInstrInfo.
def CVI_VA : InstrItinClass;

View File

@ -15,16 +15,36 @@
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "dfa-emitter"
#include "CodeGenTarget.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/DFAPacketizerDefs.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
#include "llvm/Support/Debug.h"
#include <list>
#include <map>
#include <string>
#include <queue>
using namespace llvm;
// To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter".
//
// dbgsInsnClass - When debugging, print instruction class stages.
//
void dbgsInsnClass(const std::vector<unsigned> &InsnClass);
//
// dbgsStateInfo - When debugging, print the set of state info.
//
void dbgsStateInfo(const std::set<unsigned> &stateInfo);
//
// dbgsIndent - When debugging, indent by the specified amount.
//
void dbgsIndent(unsigned indent);
//
// class DFAPacketizerEmitter: class that generates and prints out the DFA
// for resource tracking.
@ -37,20 +57,48 @@ private:
// allInsnClasses is the set of all possible resources consumed by an
// InstrStage.
//
DenseSet<unsigned> allInsnClasses;
std::vector<std::vector<unsigned>> allInsnClasses;
RecordKeeper &Records;
public:
DFAPacketizerEmitter(RecordKeeper &R);
//
// collectAllInsnClasses: Populate allInsnClasses which is a set of units
// collectAllFuncUnits - Construct a map of function unit names to bits.
//
int collectAllFuncUnits(std::vector<Record*> &ProcItinList,
std::map<std::string, unsigned> &FUNameToBitsMap,
int &maxResources,
raw_ostream &OS);
//
// collectAllComboFuncs - Construct a map from a combo function unit bit to
// the bits of all included functional units.
//
int collectAllComboFuncs(std::vector<Record*> &ComboFuncList,
std::map<std::string, unsigned> &FUNameToBitsMap,
std::map<unsigned, unsigned> &ComboBitToBitsMap,
raw_ostream &OS);
//
// collectOneInsnClass - Populate allInsnClasses with one instruction class.
//
int collectOneInsnClass(const std::string &ProcName,
std::vector<Record*> &ProcItinList,
std::map<std::string, unsigned> &FUNameToBitsMap,
Record *ItinData,
raw_ostream &OS);
//
// collectAllInsnClasses - Populate allInsnClasses which is a set of units
// used in each stage.
//
void collectAllInsnClasses(const std::string &Name,
Record *ItinData,
unsigned &NStages,
raw_ostream &OS);
int collectAllInsnClasses(const std::string &ProcName,
std::vector<Record*> &ProcItinList,
std::map<std::string, unsigned> &FUNameToBitsMap,
std::vector<Record*> &ItinDataList,
int &maxStages,
raw_ostream &OS);
void run(raw_ostream &OS);
};
@ -87,7 +135,7 @@ class State {
const int stateNum;
mutable bool isInitial;
mutable std::set<unsigned> stateInfo;
typedef std::map<unsigned, const State *> TransitionMap;
typedef std::map<std::vector<unsigned>, const State *> TransitionMap;
mutable TransitionMap Transitions;
State();
@ -97,28 +145,47 @@ class State {
}
//
// canAddInsnClass - Returns true if an instruction of type InsnClass is a
// valid transition from this state, i.e., can an instruction of type InsnClass
// be added to the packet represented by this state.
// canMaybeAddInsnClass - Quickly verifies if an instruction of type InsnClass
// may be a valid transition from this state i.e., can an instruction of type
// InsnClass be added to the packet represented by this state.
//
// PossibleStates is the set of valid resource states that ensue from valid
// transitions.
// Note that for multiple stages, this quick check does not take into account
// any possible resource competition between the stages themselves. That is
// enforced in AddInsnClassStages which checks the cross product of all
// stages for resource availability (which is a more involved check).
//
bool canAddInsnClass(unsigned InsnClass) const;
bool canMaybeAddInsnClass(std::vector<unsigned> &InsnClass,
std::map<unsigned, unsigned> &ComboBitToBitsMap) const;
//
// AddInsnClass - Return all combinations of resource reservation
// which are possible from this state (PossibleStates).
//
void AddInsnClass(unsigned InsnClass, std::set<unsigned> &PossibleStates) const;
//
// PossibleStates is the set of valid resource states that ensue from valid
// transitions.
//
void AddInsnClass(std::vector<unsigned> &InsnClass,
std::map<unsigned, unsigned> &ComboBitToBitsMap,
std::set<unsigned> &PossibleStates) const;
//
// AddInsnClassStages - Return all combinations of resource reservation
// resulting from the cross product of all stages for this InsnClass
// which are possible from this state (PossibleStates).
//
void AddInsnClassStages(std::vector<unsigned> &InsnClass,
std::map<unsigned, unsigned> &ComboBitToBitsMap,
unsigned chkstage, unsigned numstages,
unsigned prevState, unsigned origState,
DenseSet<unsigned> &VisitedResourceStates,
std::set<unsigned> &PossibleStates) const;
//
// addTransition - Add a transition from this state given the input InsnClass
//
void addTransition(unsigned InsnClass, const State *To) const;
void addTransition(std::vector<unsigned> InsnClass, const State *To) const;
//
// hasTransition - Returns true if there is a transition from this state
// given the input InsnClass
//
bool hasTransition(unsigned InsnClass) const;
bool hasTransition(std::vector<unsigned> InsnClass) const;
};
} // End anonymous namespace.
@ -144,10 +211,52 @@ public:
//
// writeTable: Print out a table representing the DFA.
//
void writeTableAndAPI(raw_ostream &OS, const std::string &ClassName);
void writeTableAndAPI(raw_ostream &OS, const std::string &ClassName,
int numInsnClasses = 0,
int maxResources = 0, int numCombos = 0, int maxStages = 0);
};
} // End anonymous namespace.
// To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter".
//
// dbgsInsnClass - When debugging, print instruction class stages.
//
void dbgsInsnClass(const std::vector<unsigned> &InsnClass) {
DEBUG(dbgs() << "InsnClass: ");
for (unsigned i = 0; i < InsnClass.size(); ++i) {
if (i > 0) {
DEBUG(dbgs() << ", ");
}
DEBUG(dbgs() << "0x" << utohexstr(InsnClass[i]));
}
DFAInput InsnInput = getDFAInsnInput(InsnClass);
DEBUG(dbgs() << " (input: 0x" << utohexstr(InsnInput) << ")");
}
//
// dbgsStateInfo - When debugging, print the set of state info.
//
void dbgsStateInfo(const std::set<unsigned> &stateInfo) {
DEBUG(dbgs() << "StateInfo: ");
unsigned i = 0;
for (std::set<unsigned>::iterator SI = stateInfo.begin();
SI != stateInfo.end(); ++SI, ++i) {
unsigned thisState = *SI;
if (i > 0) {
DEBUG(dbgs() << ", ");
}
DEBUG(dbgs() << "0x" << utohexstr(thisState));
}
}
//
// dbgsIndent - When debugging, indent by the specified amount.
//
void dbgsIndent(unsigned indent) {
for (unsigned i = 0; i < indent; ++i) {
DEBUG(dbgs() << " ");
}
}
//
// Constructors and destructors for State and DFA
@ -157,10 +266,11 @@ State::State() :
DFA::DFA(): currentState(nullptr) {}
//
//
// addTransition - Add a transition from this state given the input InsnClass
//
void State::addTransition(unsigned InsnClass, const State *To) const {
void State::addTransition(std::vector<unsigned> InsnClass, const State *To)
const {
assert(!Transitions.count(InsnClass) &&
"Cannot have multiple transitions for the same input");
Transitions[InsnClass] = To;
@ -170,7 +280,7 @@ void State::addTransition(unsigned InsnClass, const State *To) const {
// hasTransition - Returns true if there is a transition from this state
// given the input InsnClass
//
bool State::hasTransition(unsigned InsnClass) const {
bool State::hasTransition(std::vector<unsigned> InsnClass) const {
return Transitions.count(InsnClass) > 0;
}
@ -178,61 +288,167 @@ bool State::hasTransition(unsigned InsnClass) const {
// AddInsnClass - Return all combinations of resource reservation
// which are possible from this state (PossibleStates).
//
void State::AddInsnClass(unsigned InsnClass,
std::set<unsigned> &PossibleStates) const {
// PossibleStates is the set of valid resource states that ensue from valid
// transitions.
//
void State::AddInsnClass(std::vector<unsigned> &InsnClass,
std::map<unsigned, unsigned> &ComboBitToBitsMap,
std::set<unsigned> &PossibleStates) const {
//
// Iterate over all resource states in currentState.
//
unsigned numstages = InsnClass.size();
assert((numstages > 0) && "InsnClass has no stages");
for (std::set<unsigned>::iterator SI = stateInfo.begin();
SI != stateInfo.end(); ++SI) {
unsigned thisState = *SI;
//
// Iterate over all possible resources used in InsnClass.
// For ex: for InsnClass = 0x11, all resources = {0x01, 0x10}.
//
DenseSet<unsigned> VisitedResourceStates;
for (unsigned int j = 0; j < sizeof(InsnClass) * 8; ++j) {
if ((0x1 << j) & InsnClass) {
//
// For each possible resource used in InsnClass, generate the
// resource state if that resource was used.
//
unsigned ResultingResourceState = thisState | (0x1 << j);
DEBUG(dbgs() << " thisState: 0x" << utohexstr(thisState) << "\n");
AddInsnClassStages(InsnClass, ComboBitToBitsMap,
numstages - 1, numstages,
thisState, thisState,
VisitedResourceStates, PossibleStates);
}
}
void State::AddInsnClassStages(std::vector<unsigned> &InsnClass,
std::map<unsigned, unsigned> &ComboBitToBitsMap,
unsigned chkstage, unsigned numstages,
unsigned prevState, unsigned origState,
DenseSet<unsigned> &VisitedResourceStates,
std::set<unsigned> &PossibleStates) const {
assert((chkstage < numstages) && "AddInsnClassStages: stage out of range");
unsigned thisStage = InsnClass[chkstage];
dbgsIndent((1 + numstages - chkstage) << 1);
DEBUG(dbgs() << "AddInsnClassStages " << chkstage
<< " (0x" << utohexstr(thisStage) << ") from ");
dbgsInsnClass(InsnClass);
DEBUG(dbgs() << "\n");
//
// Iterate over all possible resources used in thisStage.
// For ex: for thisStage = 0x11, all resources = {0x01, 0x10}.
//
for (unsigned int j = 0; j < DFA_MAX_RESOURCES; ++j) {
unsigned resourceMask = (0x1 << j);
if (resourceMask & thisStage) {
unsigned combo = ComboBitToBitsMap[resourceMask];
if (combo && ((~prevState & combo) != combo)) {
DEBUG(dbgs() << "\tSkipped Add 0x" << utohexstr(prevState)
<< " - combo op 0x" << utohexstr(resourceMask)
<< " (0x" << utohexstr(combo) <<") cannot be scheduled\n");
continue;
}
//
// For each possible resource used in thisStage, generate the
// resource state if that resource was used.
//
unsigned ResultingResourceState = prevState | resourceMask | combo;
dbgsIndent((2 + numstages - chkstage) << 1);
DEBUG(dbgs() << "0x" << utohexstr(prevState)
<< " | 0x" << utohexstr(resourceMask));
if (combo) {
DEBUG(dbgs() << " | 0x" << utohexstr(combo));
}
DEBUG(dbgs() << " = 0x" << utohexstr(ResultingResourceState) << " ");
//
// If this is the final stage for this class
//
if (chkstage == 0) {
//
// Check if the resulting resource state can be accommodated in this
// packet.
// We compute ResultingResourceState OR thisState.
// If the result of the OR is different than thisState, it implies
// We compute resource OR prevState (originally started as origState).
// If the result of the OR is different than origState, it implies
// that there is at least one resource that can be used to schedule
// InsnClass in the current packet.
// thisStage in the current packet.
// Insert ResultingResourceState into PossibleStates only if we haven't
// processed ResultingResourceState before.
//
if ((ResultingResourceState != thisState) &&
(VisitedResourceStates.count(ResultingResourceState) == 0)) {
VisitedResourceStates.insert(ResultingResourceState);
PossibleStates.insert(ResultingResourceState);
if (ResultingResourceState != prevState) {
if (VisitedResourceStates.count(ResultingResourceState) == 0) {
VisitedResourceStates.insert(ResultingResourceState);
PossibleStates.insert(ResultingResourceState);
DEBUG(dbgs() << "\tResultingResourceState: 0x"
<< utohexstr(ResultingResourceState) << "\n");
} else {
DEBUG(dbgs() << "\tSkipped Add - state already seen\n");
}
} else {
DEBUG(dbgs() << "\tSkipped Add - no final resources available\n");
}
} else {
//
// If the current resource can be accommodated, check the next
// stage in InsnClass for available resources.
//
if (ResultingResourceState != prevState) {
DEBUG(dbgs() << "\n");
AddInsnClassStages(InsnClass, ComboBitToBitsMap,
chkstage - 1, numstages,
ResultingResourceState, origState,
VisitedResourceStates, PossibleStates);
} else {
DEBUG(dbgs() << "\tSkipped Add - no resources available\n");
}
}
}
}
}
//
// canAddInsnClass - Quickly verifies if an instruction of type InsnClass is a
// valid transition from this state i.e., can an instruction of type InsnClass
// be added to the packet represented by this state.
// canMaybeAddInsnClass - Quickly verifies if an instruction of type InsnClass
// may be a valid transition from this state i.e., can an instruction of type
// InsnClass be added to the packet represented by this state.
//
bool State::canAddInsnClass(unsigned InsnClass) const {
// Note that this routine is performing conservative checks that can be
// quickly executed acting as a filter before calling AddInsnClassStages.
// Any cases allowed through here will be caught later in AddInsnClassStages
// which performs the more expensive exact check.
//
bool State::canMaybeAddInsnClass(std::vector<unsigned> &InsnClass,
std::map<unsigned, unsigned> &ComboBitToBitsMap) const {
for (std::set<unsigned>::const_iterator SI = stateInfo.begin();
SI != stateInfo.end(); ++SI) {
if (~*SI & InsnClass)
// Check to see if all required resources are available.
bool available = true;
// Inspect each stage independently.
// note: This is a conservative check as we aren't checking for
// possible resource competition between the stages themselves
// The full cross product is examined later in AddInsnClass.
for (unsigned i = 0; i < InsnClass.size(); ++i) {
unsigned resources = *SI;
if ((~resources & InsnClass[i]) == 0) {
available = false;
break;
}
// Make sure _all_ resources for a combo function are available.
// note: This is a quick conservative check as it won't catch an
// unscheduleable combo if this stage is an OR expression
// containing a combo.
// These cases are caught later in AddInsnClass.
unsigned combo = ComboBitToBitsMap[InsnClass[i]];
if (combo && ((~resources & combo) != combo)) {
DEBUG(dbgs() << "\tSkipped canMaybeAdd 0x" << utohexstr(resources)
<< " - combo op 0x" << utohexstr(InsnClass[i])
<< " (0x" << utohexstr(combo) <<") cannot be scheduled\n");
available = false;
break;
}
}
if (available) {
return true;
}
}
return false;
}
@ -244,7 +460,6 @@ const State &DFA::newState() {
return *IterPair.first;
}
int State::currentStateNum = 0;
DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R):
@ -263,57 +478,100 @@ DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R):
// the ith state.
//
//
void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName) {
static const std::string SentinelEntry = "{-1, -1}";
DFA::StateSet::iterator SI = states.begin();
void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName,
int numInsnClasses,
int maxResources, int numCombos, int maxStages) {
unsigned numStates = states.size();
DEBUG(dbgs() << "-----------------------------------------------------------------------------\n");
DEBUG(dbgs() << "writeTableAndAPI\n");
DEBUG(dbgs() << "Total states: " << numStates << "\n");
OS << "namespace llvm {\n";
OS << "\n// Input format:\n";
OS << "#define DFA_MAX_RESTERMS " << DFA_MAX_RESTERMS
<< "\t// maximum AND'ed resource terms\n";
OS << "#define DFA_MAX_RESOURCES " << DFA_MAX_RESOURCES
<< "\t// maximum resource bits in one term\n";
OS << "\n// " << TargetName << "DFAStateInputTable[][2] = "
<< "pairs of <Input, NextState> for all valid\n";
OS << "// transitions.\n";
OS << "// " << numStates << "\tstates\n";
OS << "// " << numInsnClasses << "\tinstruction classes\n";
OS << "// " << maxResources << "\tresources max\n";
OS << "// " << numCombos << "\tcombo resources\n";
OS << "// " << maxStages << "\tstages max\n";
OS << "const " << DFA_TBLTYPE << " "
<< TargetName << "DFAStateInputTable[][2] = {\n";
// This table provides a map to the beginning of the transitions for State s
// in DFAStateInputTable.
std::vector<int> StateEntry(states.size());
OS << "namespace llvm {\n\n";
OS << "const int " << TargetName << "DFAStateInputTable[][2] = {\n";
std::vector<int> StateEntry(numStates+1);
static const std::string SentinelEntry = "{-1, -1}";
// Tracks the total valid transitions encountered so far. It is used
// to construct the StateEntry table.
int ValidTransitions = 0;
for (unsigned i = 0; i < states.size(); ++i, ++SI) {
DFA::StateSet::iterator SI = states.begin();
for (unsigned i = 0; i < numStates; ++i, ++SI) {
assert ((SI->stateNum == (int) i) && "Mismatch in state numbers");
StateEntry[i] = ValidTransitions;
for (State::TransitionMap::iterator
II = SI->Transitions.begin(), IE = SI->Transitions.end();
II != IE; ++II) {
OS << "{" << II->first << ", "
OS << "{0x" << utohexstr(getDFAInsnInput(II->first)) << ", "
<< II->second->stateNum
<< "}, ";
<< "},\t";
}
ValidTransitions += SI->Transitions.size();
// If there are no valid transitions from this stage, we need a sentinel
// transition.
if (ValidTransitions == StateEntry[i]) {
OS << SentinelEntry << ",";
OS << SentinelEntry << ",\t";
++ValidTransitions;
}
OS << " // state " << i << ": " << StateEntry[i];
if (StateEntry[i] != (ValidTransitions-1)) { // More than one transition.
OS << "-" << (ValidTransitions-1);
}
OS << "\n";
}
// Print out a sentinel entry at the end of the StateInputTable. This is
// needed to iterate over StateInputTable in DFAPacketizer::ReadTable()
OS << SentinelEntry << "\n";
OS << SentinelEntry << "\t";
OS << " // state " << numStates << ": " << ValidTransitions;
OS << "\n";
OS << "};\n\n";
OS << "// " << TargetName << "DFAStateEntryTable[i] = "
<< "Index of the first entry in DFAStateInputTable for\n";
OS << "// "
<< "the ith state.\n";
OS << "// " << numStates << " states\n";
OS << "const unsigned int " << TargetName << "DFAStateEntryTable[] = {\n";
// Multiply i by 2 since each entry in DFAStateInputTable is a set of
// two numbers.
for (unsigned i = 0; i < states.size(); ++i)
unsigned lastState = 0;
for (unsigned i = 0; i < numStates; ++i) {
if (i && ((i % 10) == 0)) {
lastState = i-1;
OS << " // states " << (i-10) << ":" << lastState << "\n";
}
OS << StateEntry[i] << ", ";
}
// Print out the index to the sentinel entry in StateInputTable
OS << ValidTransitions << ", ";
OS << " // states " << (lastState+1) << ":" << numStates << "\n";
OS << "\n};\n";
OS << "};\n";
OS << "} // namespace\n";
@ -332,40 +590,123 @@ void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName) {
//
// collectAllInsnClasses - Populate allInsnClasses which is a set of units
// used in each stage.
// collectAllFuncUnits - Construct a map of function unit names to bits.
//
void DFAPacketizerEmitter::collectAllInsnClasses(const std::string &Name,
Record *ItinData,
unsigned &NStages,
raw_ostream &OS) {
// Collect processor itineraries.
std::vector<Record*> ProcItinList =
Records.getAllDerivedDefinitions("ProcessorItineraries");
// If just no itinerary then don't bother.
if (ProcItinList.size() < 2)
return;
std::map<std::string, unsigned> NameToBitsMap;
int DFAPacketizerEmitter::collectAllFuncUnits(
std::vector<Record*> &ProcItinList,
std::map<std::string, unsigned> &FUNameToBitsMap,
int &maxFUs,
raw_ostream &OS) {
DEBUG(dbgs() << "-----------------------------------------------------------------------------\n");
DEBUG(dbgs() << "collectAllFuncUnits");
DEBUG(dbgs() << " (" << ProcItinList.size() << " itineraries)\n");
int totalFUs = 0;
// Parse functional units for all the itineraries.
for (unsigned i = 0, N = ProcItinList.size(); i < N; ++i) {
Record *Proc = ProcItinList[i];
const std::string &ProcName = Proc->getName();
std::vector<Record*> FUs = Proc->getValueAsListOfDefs("FU");
DEBUG(dbgs() << " FU:" << i
<< " (" << FUs.size() << " FUs) "
<< ProcName);
// Convert macros to bits for each stage.
for (unsigned i = 0, N = FUs.size(); i < N; ++i)
NameToBitsMap[FUs[i]->getName()] = (unsigned) (1U << i);
unsigned numFUs = FUs.size();
for (unsigned j = 0; j < numFUs; ++j) {
assert ((j < DFA_MAX_RESOURCES) &&
"Exceeded maximum number of representable resources");
unsigned FuncResources = (unsigned) (1U << j);
FUNameToBitsMap[FUs[j]->getName()] = FuncResources;
DEBUG(dbgs() << " " << FUs[j]->getName()
<< ":0x" << utohexstr(FuncResources));
}
if (((int) numFUs) > maxFUs) {
maxFUs = numFUs;
}
totalFUs += numFUs;
DEBUG(dbgs() << "\n");
}
return totalFUs;
}
//
// collectAllComboFuncs - Construct a map from a combo function unit bit to
// the bits of all included functional units.
//
int DFAPacketizerEmitter::collectAllComboFuncs(
std::vector<Record*> &ComboFuncList,
std::map<std::string, unsigned> &FUNameToBitsMap,
std::map<unsigned, unsigned> &ComboBitToBitsMap,
raw_ostream &OS) {
DEBUG(dbgs() << "-----------------------------------------------------------------------------\n");
DEBUG(dbgs() << "collectAllComboFuncs");
DEBUG(dbgs() << " (" << ComboFuncList.size() << " sets)\n");
int numCombos = 0;
for (unsigned i = 0, N = ComboFuncList.size(); i < N; ++i) {
Record *Func = ComboFuncList[i];
const std::string &ProcName = Func->getName();
std::vector<Record*> FUs = Func->getValueAsListOfDefs("CFD");
DEBUG(dbgs() << " CFD:" << i
<< " (" << FUs.size() << " combo FUs) "
<< ProcName << "\n");
// Convert macros to bits for each stage.
for (unsigned j = 0, N = FUs.size(); j < N; ++j) {
assert ((j < DFA_MAX_RESOURCES) &&
"Exceeded maximum number of DFA resources");
Record *FuncData = FUs[j];
Record *ComboFunc = FuncData->getValueAsDef("TheComboFunc");
const std::vector<Record*> &FuncList =
FuncData->getValueAsListOfDefs("FuncList");
std::string ComboFuncName = ComboFunc->getName();
unsigned ComboBit = FUNameToBitsMap[ComboFuncName];
unsigned ComboResources = ComboBit;
DEBUG(dbgs() << " combo: " << ComboFuncName
<< ":0x" << utohexstr(ComboResources) << "\n");
for (unsigned k = 0, M = FuncList.size(); k < M; ++k) {
std::string FuncName = FuncList[k]->getName();
unsigned FuncResources = FUNameToBitsMap[FuncName];
DEBUG(dbgs() << " " << FuncName
<< ":0x" << utohexstr(FuncResources) << "\n");
ComboResources |= FuncResources;
}
ComboBitToBitsMap[ComboBit] = ComboResources;
numCombos++;
DEBUG(dbgs() << " => combo bits: " << ComboFuncName << ":0x"
<< utohexstr(ComboBit) << " = 0x"
<< utohexstr(ComboResources) << "\n");
}
}
return numCombos;
}
//
// collectOneInsnClass - Populate allInsnClasses with one instruction class
//
int DFAPacketizerEmitter::collectOneInsnClass(const std::string &ProcName,
std::vector<Record*> &ProcItinList,
std::map<std::string, unsigned> &FUNameToBitsMap,
Record *ItinData,
raw_ostream &OS) {
// Collect instruction classes.
Record *ItinDef = ItinData->getValueAsDef("TheClass");
const std::vector<Record*> &StageList =
ItinData->getValueAsListOfDefs("Stages");
// The number of stages.
NStages = StageList.size();
unsigned NStages = StageList.size();
// For each unit.
unsigned UnitBitValue = 0;
DEBUG(dbgs() << " " << ItinDef->getName()
<< "\n");
std::vector<unsigned> UnitBits;
// Compute the bitwise or of each unit used in this stage.
for (unsigned i = 0; i < NStages; ++i) {
@ -375,18 +716,72 @@ void DFAPacketizerEmitter::collectAllInsnClasses(const std::string &Name,
const std::vector<Record*> &UnitList =
Stage->getValueAsListOfDefs("Units");
DEBUG(dbgs() << " stage:" << i
<< " [" << UnitList.size() << " units]:");
unsigned dbglen = 26; // cursor after stage dbgs
// Compute the bitwise or of each unit used in this stage.
unsigned UnitBitValue = 0;
for (unsigned j = 0, M = UnitList.size(); j < M; ++j) {
// Conduct bitwise or.
std::string UnitName = UnitList[j]->getName();
assert(NameToBitsMap.count(UnitName));
UnitBitValue |= NameToBitsMap[UnitName];
DEBUG(dbgs() << " " << j << ":" << UnitName);
dbglen += 3 + UnitName.length();
assert(FUNameToBitsMap.count(UnitName));
UnitBitValue |= FUNameToBitsMap[UnitName];
}
if (UnitBitValue != 0)
allInsnClasses.insert(UnitBitValue);
UnitBits.push_back(UnitBitValue);
while (dbglen <= 64) { // line up bits dbgs
dbglen += 8;
DEBUG(dbgs() << "\t");
}
DEBUG(dbgs() << " (bits: 0x" << utohexstr(UnitBitValue) << ")\n");
}
if (UnitBits.size() > 0)
allInsnClasses.push_back(UnitBits);
DEBUG(dbgs() << " ");
dbgsInsnClass(UnitBits);
DEBUG(dbgs() << "\n");
return NStages;
}
//
// collectAllInsnClasses - Populate allInsnClasses which is a set of units
// used in each stage.
//
int DFAPacketizerEmitter::collectAllInsnClasses(const std::string &ProcName,
std::vector<Record*> &ProcItinList,
std::map<std::string, unsigned> &FUNameToBitsMap,
std::vector<Record*> &ItinDataList,
int &maxStages,
raw_ostream &OS) {
// Collect all instruction classes.
unsigned M = ItinDataList.size();
int numInsnClasses = 0;
DEBUG(dbgs() << "-----------------------------------------------------------------------------\n"
<< "collectAllInsnClasses "
<< ProcName
<< " (" << M << " classes)\n");
// Collect stages for each instruction class for all itinerary data
for (unsigned j = 0; j < M; j++) {
Record *ItinData = ItinDataList[j];
int NStages = collectOneInsnClass(ProcName, ProcItinList,
FUNameToBitsMap, ItinData, OS);
if (NStages > maxStages) {
maxStages = NStages;
}
numInsnClasses++;
}
return numInsnClasses;
}
//
// Run the worklist algorithm to generate the DFA.
@ -398,16 +793,35 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
Records.getAllDerivedDefinitions("ProcessorItineraries");
//
// Collect the instruction classes.
// Collect the Functional units.
//
std::map<std::string, unsigned> FUNameToBitsMap;
int maxResources = 0;
collectAllFuncUnits(ProcItinList,
FUNameToBitsMap, maxResources, OS);
//
// Collect the Combo Functional units.
//
std::map<unsigned, unsigned> ComboBitToBitsMap;
std::vector<Record*> ComboFuncList =
Records.getAllDerivedDefinitions("ComboFuncUnits");
int numCombos = collectAllComboFuncs(ComboFuncList,
FUNameToBitsMap, ComboBitToBitsMap, OS);
//
// Collect the itineraries.
//
int maxStages = 0;
int numInsnClasses = 0;
for (unsigned i = 0, N = ProcItinList.size(); i < N; i++) {
Record *Proc = ProcItinList[i];
// Get processor itinerary name.
const std::string &Name = Proc->getName();
const std::string &ProcName = Proc->getName();
// Skip default.
if (Name == "NoItineraries")
if (ProcName == "NoItineraries")
continue;
// Sanity check for at least one instruction itinerary class.
@ -419,15 +833,11 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
// Get itinerary data list.
std::vector<Record*> ItinDataList = Proc->getValueAsListOfDefs("IID");
// Collect instruction classes for all itinerary data.
for (unsigned j = 0, M = ItinDataList.size(); j < M; j++) {
Record *ItinData = ItinDataList[j];
unsigned NStages;
collectAllInsnClasses(Name, ItinData, NStages, OS);
}
// Collect all instruction classes
numInsnClasses += collectAllInsnClasses(ProcName, ProcItinList,
FUNameToBitsMap, ItinDataList, maxStages, OS);
}
//
// Run a worklist algorithm to generate the DFA.
//
@ -436,6 +846,7 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
Initial->isInitial = true;
Initial->stateInfo.insert(0x0);
SmallVector<const State*, 32> WorkList;
// std::queue<State*> WorkList;
std::map<std::set<unsigned>, const State*> Visited;
WorkList.push_back(Initial);
@ -459,9 +870,15 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
//
while (!WorkList.empty()) {
const State *current = WorkList.pop_back_val();
for (DenseSet<unsigned>::iterator CI = allInsnClasses.begin(),
CE = allInsnClasses.end(); CI != CE; ++CI) {
unsigned InsnClass = *CI;
DEBUG(dbgs() << "---------------------\n");
DEBUG(dbgs() << "Processing state: " << current->stateNum << " - ");
dbgsStateInfo(current->stateInfo);
DEBUG(dbgs() << "\n");
for (unsigned i = 0; i < allInsnClasses.size(); i++) {
std::vector<unsigned> InsnClass = allInsnClasses[i];
DEBUG(dbgs() << i << " ");
dbgsInsnClass(InsnClass);
DEBUG(dbgs() << "\n");
std::set<unsigned> NewStateResources;
//
@ -469,32 +886,47 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
// and the state can accommodate this InsnClass, create a transition.
//
if (!current->hasTransition(InsnClass) &&
current->canAddInsnClass(InsnClass)) {
const State *NewState;
current->AddInsnClass(InsnClass, NewStateResources);
assert(!NewStateResources.empty() && "New states must be generated");
current->canMaybeAddInsnClass(InsnClass, ComboBitToBitsMap)) {
const State *NewState = NULL;
current->AddInsnClass(InsnClass, ComboBitToBitsMap, NewStateResources);
if (NewStateResources.size() == 0) {
DEBUG(dbgs() << " Skipped - no new states generated\n");
continue;
}
DEBUG(dbgs() << "\t");
dbgsStateInfo(NewStateResources);
DEBUG(dbgs() << "\n");
//
// If we have seen this state before, then do not create a new state.
//
//
auto VI = Visited.find(NewStateResources);
if (VI != Visited.end())
if (VI != Visited.end()) {
NewState = VI->second;
else {
DEBUG(dbgs() << "\tFound existing state: "
<< NewState->stateNum << " - ");
dbgsStateInfo(NewState->stateInfo);
DEBUG(dbgs() << "\n");
} else {
NewState = &D.newState();
NewState->stateInfo = NewStateResources;
Visited[NewStateResources] = NewState;
WorkList.push_back(NewState);
DEBUG(dbgs() << "\tAccepted new state: "
<< NewState->stateNum << " - ");
dbgsStateInfo(NewState->stateInfo);
DEBUG(dbgs() << "\n");
}
current->addTransition(InsnClass, NewState);
}
}
}
// Print out the table.
D.writeTableAndAPI(OS, TargetName);
D.writeTableAndAPI(OS, TargetName,
numInsnClasses, maxResources, numCombos, maxStages);
}
namespace llvm {