Model Cortex-a9 load to SUB, RSB, ADD, ADC, SBC, RSC, CMN, MVN, or CMP

pipeline forwarding path.

llvm-svn: 115098
This commit is contained in:
Evan Cheng 2010-09-29 22:42:35 +00:00
parent 2016f0eaac
commit 4a010fd1ea
8 changed files with 157 additions and 77 deletions

View File

@ -111,14 +111,16 @@ class InstrItineraryData {
public:
const InstrStage *Stages; ///< Array of stages selected
const unsigned *OperandCycles; ///< Array of operand cycles selected
const unsigned *Forwardings; ///< Array of pipeline forwarding pathes
const InstrItinerary *Itineraries; ///< Array of itineraries selected
/// Ctors.
///
InstrItineraryData() : Stages(0), OperandCycles(0), Itineraries(0) {}
InstrItineraryData() : Stages(0), OperandCycles(0), Forwardings(0),
Itineraries(0) {}
InstrItineraryData(const InstrStage *S, const unsigned *OS,
const InstrItinerary *I)
: Stages(S), OperandCycles(OS), Itineraries(I) {}
const unsigned *F, const InstrItinerary *I)
: Stages(S), OperandCycles(OS), Forwardings(F), Itineraries(I) {}
/// isEmpty - Returns true if there are no itineraries.
///
@ -182,6 +184,53 @@ public:
return (int)OperandCycles[FirstIdx + OperandIdx];
}
/// hasPipelineForwarding - Return true if there is a pipeline forwarding
/// between instructions of itinerary classes DefClass and UseClasses so that
/// value produced by an instruction of itinerary class DefClass, operand
/// index DefIdx can be bypassed when it's read by an instruction of
/// itinerary class UseClass, operand index UseIdx.
bool hasPipelineForwarding(unsigned DefClass, unsigned DefIdx,
unsigned UseClass, unsigned UseIdx) const {
unsigned FirstDefIdx = Itineraries[DefClass].FirstOperandCycle;
unsigned LastDefIdx = Itineraries[DefClass].LastOperandCycle;
if ((FirstDefIdx + DefIdx) >= LastDefIdx)
return false;
if (Forwardings[FirstDefIdx + DefIdx] == 0)
return false;
unsigned FirstUseIdx = Itineraries[UseClass].FirstOperandCycle;
unsigned LastUseIdx = Itineraries[UseClass].LastOperandCycle;
if ((FirstUseIdx + UseIdx) >= LastUseIdx)
return false;
return Forwardings[FirstDefIdx + DefIdx] ==
Forwardings[FirstUseIdx + UseIdx];
}
/// getOperandLatency - Compute and return the use operand latency of a given
/// itinerary class and operand index if the value is produced by an
/// instruction of the specified itinerary class and def operand index.
int getOperandLatency(unsigned DefClass, unsigned DefIdx,
unsigned UseClass, unsigned UseIdx) const {
if (isEmpty())
return -1;
int DefCycle = getOperandCycle(DefClass, DefIdx);
if (DefCycle == -1)
return -1;
int UseCycle = getOperandCycle(UseClass, UseIdx);
if (UseCycle == -1)
return -1;
UseCycle = DefCycle - UseCycle + 1;
if (UseCycle > 0 &&
hasPipelineForwarding(DefClass, DefIdx, UseClass, UseIdx))
// FIXME: This assumes one cycle benefit for every pipeline forwarding.
--UseCycle;
return UseCycle;
}
/// isMicroCoded - Return true if the instructions in the given class decode
/// to more than one micro-ops.
bool isMicroCoded(unsigned ItinClassIndx) const {

View File

@ -527,26 +527,23 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
MachineInstr *DefMI = Def->getInstr();
int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
if (DefIdx != -1) {
int DefCycle = InstrItins->getOperandCycle(DefMI->getDesc().getSchedClass(),
DefIdx);
if (DefCycle >= 0) {
MachineInstr *UseMI = Use->getInstr();
const unsigned UseClass = UseMI->getDesc().getSchedClass();
unsigned DefClass = DefMI->getDesc().getSchedClass();
MachineInstr *UseMI = Use->getInstr();
unsigned UseClass = UseMI->getDesc().getSchedClass();
// For all uses of the register, calculate the maxmimum latency
int Latency = -1;
for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = UseMI->getOperand(i);
if (!MO.isReg() || !MO.isUse())
continue;
unsigned MOReg = MO.getReg();
if (MOReg != Reg)
continue;
// For all uses of the register, calculate the maxmimum latency
int Latency = -1;
for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = UseMI->getOperand(i);
if (!MO.isReg() || !MO.isUse())
continue;
unsigned MOReg = MO.getReg();
if (MOReg != Reg)
continue;
int UseCycle = InstrItins->getOperandCycle(UseClass, i);
if (UseCycle >= 0)
Latency = std::max(Latency, DefCycle - UseCycle + 1);
}
int UseCycle = InstrItins->getOperandLatency(DefClass, DefIdx,
UseClass, i);
Latency = std::max(Latency, UseCycle);
// If we found a latency, then replace the existing dependence latency.
if (Latency >= 0)

View File

@ -457,24 +457,24 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
return;
unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
if (Def->isMachineOpcode()) {
const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
if (DefIdx >= II.getNumDefs())
return;
int DefCycle = InstrItins->getOperandCycle(II.getSchedClass(), DefIdx);
if (DefCycle < 0)
return;
int UseCycle = 1;
if (Use->isMachineOpcode()) {
const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
UseCycle = InstrItins->getOperandCycle(UseClass, OpIdx);
}
if (UseCycle >= 0) {
int Latency = DefCycle - UseCycle + 1;
if (Latency >= 0)
dep.setLatency(Latency);
}
if (!Def->isMachineOpcode())
return;
const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
if (DefIdx >= II.getNumDefs())
return;
int Latency = 0;
if (!Use->isMachineOpcode()) {
Latency = InstrItins->getOperandCycle(II.getSchedClass(), DefIdx);
} else {
unsigned DefClass = II.getSchedClass();
unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
Latency = InstrItins->getOperandLatency(DefClass, DefIdx, UseClass, OpIdx);
}
if (Latency >= 0)
dep.setLatency(Latency);
}
void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {

View File

@ -285,7 +285,7 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
let Inst{5-4} = 0b00; // type
}
// shifted register
def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsir,
opc, "\t$dst, $rhs, $lhs",
[(set rGPR:$dst, (opnode t2_so_reg:$lhs, rGPR:$rhs))]> {
let Inst{31-27} = 0b11101;
@ -1698,7 +1698,7 @@ defm t2ORN : T2I_bin_irs<0b0011, "orn",
// Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version
let AddedComplexity = 1 in
defm t2MVN : T2I_un_irs <0b0011, "mvn",
IIC_iMOVi, IIC_iMOVr, IIC_iMOVsi,
IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi,
UnOpFrag<(not node:$Src)>, 1, 1>;

View File

@ -14,6 +14,7 @@ def IIC_iALUx : InstrItinClass;
def IIC_iALUi : InstrItinClass;
def IIC_iALUr : InstrItinClass;
def IIC_iALUsi : InstrItinClass;
def IIC_iALUsir : InstrItinClass;
def IIC_iALUsr : InstrItinClass;
def IIC_iBITi : InstrItinClass;
def IIC_iBITr : InstrItinClass;

View File

@ -36,6 +36,7 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
InstrItinData<IIC_iALUsir,[InstrStage<1,[A8_Pipe0, A8_Pipe1]>], [2, 1, 2]>,
InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
//
// Bitwise Instructions that produce a result

View File

@ -23,10 +23,14 @@ def A9_NPipe : FuncUnit; // NEON ALU/MUL pipe
def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
def A9_DRegsN : FuncUnit; // FP register set, NEON side
// Bypasses
def A9_LdBypass : Bypass;
// Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
//
def CortexA9Itineraries : ProcessorItineraries<
[A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1], [], [
[A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1],
[A9_LdBypass], [
// Two fully-pipelined integer ALU pipelines
//
@ -39,19 +43,30 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
//
// MVN instructions
InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
InstrItinData<IIC_iMVNsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
[1]>,
InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
[1, 1], [NoBypass, A9_LdBypass]>,
InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
[1, 1]>,
InstrItinData<IIC_iMVNsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>],
[2, 2, 1]>,
//
// No operand cycles
InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
//
// Binary Instructions that produce a result
InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2, 2]>,
InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1, 1]>,
InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
[2, 2], [NoBypass, A9_LdBypass]>,
InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
[2, 2, 2], [NoBypass, A9_LdBypass, A9_LdBypass]>,
InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>],
[2, 2, 1], [NoBypass, A9_LdBypass, NoBypass]>,
InstrItinData<IIC_iALUsir,[InstrStage<2, [A9_Pipe0, A9_Pipe1]>],
[2, 1, 2], [NoBypass, NoBypass, A9_LdBypass]>,
InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>],
[2, 2, 1, 1],
[NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
//
// Bitwise Instructions that produce a result
InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
@ -69,10 +84,14 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_iEXTAsr,[InstrStage<2, [A9_Pipe0, A9_Pipe1]>],[3, 1, 1, 1]>,
//
// Compare instructions
InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
InstrItinData<IIC_iCMPsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
InstrItinData<IIC_iCMPsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
[2], [A9_LdBypass]>,
InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
[2, 2], [A9_LdBypass, A9_LdBypass]>,
InstrItinData<IIC_iCMPsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>],
[2, 1], [A9_LdBypass, NoBypass]>,
InstrItinData<IIC_iCMPsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>],
[2, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
//
// Test instructions
InstrItinData<IIC_iTSTi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
@ -105,31 +124,38 @@ def CortexA9Itineraries : ProcessorItineraries<
//
// Immediate offset
InstrItinData<IIC_iLoadi , [InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_LSPipe]>], [3, 1]>,
InstrStage<1, [A9_LSPipe]>],
[3, 1], [A9_LdBypass]>,
//
// Register offset
InstrItinData<IIC_iLoadr , [InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
InstrStage<1, [A9_LSPipe]>],
[3, 1, 1], [A9_LdBypass]>,
//
// Scaled register offset
InstrItinData<IIC_iLoadsi , [InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_LSPipe]>], [4, 1, 1]>,
InstrStage<2, [A9_LSPipe]>],
[4, 1, 1], [A9_LdBypass]>,
//
// Immediate offset with update
InstrItinData<IIC_iLoadiu , [InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_LSPipe]>], [3, 2, 1]>,
InstrStage<2, [A9_LSPipe]>],
[3, 2, 1], [A9_LdBypass]>,
//
// Register offset with update
InstrItinData<IIC_iLoadru , [InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_LSPipe]>], [3, 2, 1, 1]>,
InstrStage<2, [A9_LSPipe]>],
[3, 2, 1, 1], [A9_LdBypass]>,
//
// Scaled register offset with update
InstrItinData<IIC_iLoadsiu , [InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_LSPipe]>], [4, 3, 1, 1]>,
InstrStage<2, [A9_LSPipe]>],
[4, 3, 1, 1], [A9_LdBypass]>,
//
// Load multiple
InstrItinData<IIC_iLoadm , [InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_LSPipe]>]>,
InstrStage<1, [A9_LSPipe]>],
[3], [A9_LdBypass]>,
//
// Load multiple plus branch
@ -141,7 +167,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// iLoadi + iALUr for t2LDRpci_pic.
InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [4, 1]>,
InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
[2, 1]>,
// Integer store pipeline
///

View File

@ -269,14 +269,14 @@ void SubtargetEmitter::FormItineraryBypassString(const std::string &Name,
const std::vector<Record*> &BypassList =
ItinData->getValueAsListOfDefs("Bypasses");
unsigned N = BypassList.size();
for (unsigned i = 0; i < N;) {
unsigned i = 0;
for (; i < N;) {
ItinString += Name + "Bypass::" + BypassList[i]->getName();
if (++i < N) ItinString += ", ";
if (++i < NOperandCycles) ItinString += ", ";
}
for (; N < NOperandCycles;) {
for (; i < NOperandCycles;) {
ItinString += " 0";
if (++N < NOperandCycles) ItinString += ", ";
if (++i < NOperandCycles) ItinString += ", ";
}
}
@ -316,14 +316,17 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
OS << "}\n";
std::vector<Record*> BPs = Proc->getValueAsListOfDefs("BP");
OS << "\n// Pipeline bypasses for itineraries \"" << Name << "\"\n"
<< "namespace " << Name << "Bypass {\n";
if (BPs.size()) {
OS << "\n// Pipeline forwarding pathes for itineraries \"" << Name
<< "\"\n" << "namespace " << Name << "Bypass {\n";
for (unsigned j = 0, BPN = BPs.size(); j < BPN; ++j)
OS << " const unsigned " << BPs[j]->getName()
<< " = 1 << " << j << ";\n";
OS << " const unsigned NoBypass = 0;\n";
for (unsigned j = 0, BPN = BPs.size(); j < BPN; ++j)
OS << " const unsigned " << BPs[j]->getName()
<< " = 1 << " << j << ";\n";
OS << "}\n";
OS << "}\n";
}
}
// Begin stages table
@ -335,12 +338,12 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
OperandCycleTable += " 0, // No itinerary\n";
// Begin pipeline bypass table
std::string BypassTable = "static const unsigned Bypasses[] = {\n";
std::string BypassTable = "static const unsigned ForwardingPathes[] = {\n";
BypassTable += " 0, // No itinerary\n";
unsigned StageCount = 1, OperandCycleCount = 1;
unsigned ItinStageEnum = 1, ItinOperandCycleEnum = 1;
std::map<std::string, unsigned> ItinStageMap, ItinOperandCycleMap;
std::map<std::string, unsigned> ItinStageMap, ItinOperandMap;
for (unsigned i = 0, N = ProcItinList.size(); i < N; i++) {
// Next record
Record *Proc = ProcItinList[i];
@ -395,13 +398,14 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
// Check to see if operand cycle already exists and create if it doesn't
unsigned FindOperandCycle = 0;
if (NOperandCycles > 0) {
FindOperandCycle = ItinOperandCycleMap[ItinOperandCycleString];
std::string ItinOperandString = ItinOperandCycleString+ItinBypassString;
FindOperandCycle = ItinOperandMap[ItinOperandString];
if (FindOperandCycle == 0) {
// Emit as cycle, // index
OperandCycleTable += ItinOperandCycleString + ", // " +
itostr(ItinOperandCycleEnum) + "\n";
// Record Itin class number.
ItinOperandCycleMap[ItinOperandCycleString] =
ItinOperandMap[ItinOperandCycleString] =
FindOperandCycle = OperandCycleCount;
// Emit as bypass, // index
@ -622,7 +626,8 @@ void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS) {
OS << "\n"
<< " InstrItinerary *Itinerary = (InstrItinerary *)"
<< "Features.getInfo(ProcItinKV, ProcItinKVSize);\n"
<< " InstrItins = InstrItineraryData(Stages, OperandCycles, Itinerary);\n";
<< " InstrItins = InstrItineraryData(Stages, OperandCycles, "
<< "ForwardingPathes, Itinerary);\n";
}
OS << " return Features.getCPU();\n"