llvm-project/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td

266 lines
15 KiB
TableGen

//===-- PPCScheduleE500mc.td - e500mc Scheduling Defs ------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the itinerary class data for the Freescale e500mc 32-bit
// Power processor.
//
// All information is derived from the "e500mc Core Reference Manual",
// Freescale Document Number E500MCRM, Rev. 1, 03/2012.
//
//===----------------------------------------------------------------------===//
// Relevant functional units in the Freescale e500mc core:
//
// * Decode & Dispatch
// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue
// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ).
def DIS0 : FuncUnit; // Dispatch stage - insn 1
def DIS1 : FuncUnit; // Dispatch stage - insn 2
// * Execute
// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX.
// Some instructions can only execute in SFX0 but not SFX1.
// The CFX has a bypass path, allowing non-divide instructions to execute
// while a divide instruction is executed.
def SFX0 : FuncUnit; // Simple unit 0
def SFX1 : FuncUnit; // Simple unit 1
def BU : FuncUnit; // Branch unit
def CFX_DivBypass
: FuncUnit; // CFX divide bypass path
def CFX_0 : FuncUnit; // CFX pipeline
def LSU_0 : FuncUnit; // LSU pipeline
def FPU_0 : FuncUnit; // FPU pipeline
def PPCE500mcItineraries : ProcessorItineraries<
[DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, LSU_0, FPU_0],
[CR_Bypass, GPR_Bypass, FPR_Bypass], [
InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [SFX0, SFX1]>],
[4, 1, 1], // Latency = 1
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [SFX0, SFX1]>],
[4, 1, 1], // Latency = 1
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [SFX0, SFX1]>],
[5, 1, 1], // Latency = 1 or 2
[CR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [CFX_0], 0>,
InstrStage<14, [CFX_DivBypass]>],
[17, 1, 1], // Latency=4..35, Repeat= 4..35
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<8, [FPU_0]>],
[11], // Latency = 8
[FPR_Bypass]>,
InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<8, [FPU_0]>],
[11, 1, 1], // Latency = 8
[NoBypass, NoBypass, NoBypass]>,
InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [CFX_0]>],
[7, 1, 1], // Latency = 4, Repeat rate = 1
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [CFX_0]>],
[7, 1, 1], // Latency = 4, Repeat rate = 1
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [CFX_0]>],
[7, 1, 1], // Latency = 4, Repeat rate = 1
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [SFX0, SFX1]>],
[4, 1, 1], // Latency = 1
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [SFX0, SFX1]>],
[4, 1, 1], // Latency = 1
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<2, [SFX0]>],
[5, 1], // Latency = 2, Repeat rate = 2
[GPR_Bypass, GPR_Bypass]>,
InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [BU]>],
[4, 1], // Latency = 1
[NoBypass, GPR_Bypass]>,
InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [BU]>],
[4, 1, 1], // Latency = 1
[CR_Bypass, CR_Bypass, CR_Bypass]>,
InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [BU]>],
[4, 1], // Latency = 1
[CR_Bypass, CR_Bypass]>,
InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [SFX0, SFX1]>],
[4, 1, 1], // Latency = 1
[CR_Bypass, GPR_Bypass]>,
InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [LSU_0]>],
[6, 1], // Latency = 3, Repeat rate = 1
[GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [LSU_0]>],
[6, 1], // Latency = 3
[GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [LSU_0]>],
[6, 1], // Latency = 3
[GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [LSU_0]>],
[6, 1], // Latency = 3
[GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [SFX0, SFX1], 0>,
InstrStage<1, [LSU_0]>],
[6, 1], // Latency = 3
[GPR_Bypass, GPR_Bypass],
2>, // 2 micro-ops
InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [LSU_0]>],
[6, 1], // Latency = 3
[NoBypass, GPR_Bypass]>,
InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [SFX0, SFX1], 0>,
InstrStage<1, [LSU_0]>],
[6, 1], // Latency = 3
[NoBypass, GPR_Bypass],
2>, // 2 micro-ops
InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [LSU_0]>],
[6, 1], // Latency = 3
[NoBypass, GPR_Bypass]>,
InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [LSU_0]>],
[6, 1, 1], // Latency = 3
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [SFX0, SFX1], 0>,
InstrStage<1, [LSU_0]>],
[6, 1, 1], // Latency = 3
[GPR_Bypass, GPR_Bypass, GPR_Bypass],
2>, // 2 micro-ops
InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [LSU_0]>],
[7, 1, 1], // Latency = 4
[FPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [SFX0, SFX1], 0>,
InstrStage<1, [LSU_0]>],
[7, 1, 1], // Latency = 4
[FPR_Bypass, GPR_Bypass, GPR_Bypass],
2>, // 2 micro-ops
InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [LSU_0]>],
[6, 1], // Latency = 3
[GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [SFX0, SFX1], 0>,
InstrStage<1, [LSU_0]>],
[6, 1], // Latency = 3
[GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [LSU_0]>],
[7, 1], // Latency = r+3
[NoBypass, GPR_Bypass]>,
InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<3, [LSU_0]>],
[6, 1, 1], // Latency = 3, Repeat rate = 3
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [LSU_0]>],
[6, 1], // Latency = 3
[NoBypass, GPR_Bypass]>,
InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [LSU_0]>]>,
InstrItinData<SprMFSR , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<4, [SFX0]>],
[7, 1],
[GPR_Bypass, GPR_Bypass]>,
InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<2, [SFX0, SFX1]>],
[5, 1], // Latency = 2, Repeat rate = 4
[GPR_Bypass, GPR_Bypass]>,
InstrItinData<SprMTSR , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [SFX0]>],
[5, 1],
[NoBypass, GPR_Bypass]>,
InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [LSU_0], 0>]>,
InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<5, [SFX0]>],
[8, 1],
[GPR_Bypass, CR_Bypass]>,
InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<4, [SFX0]>],
[7, 1], // Latency = 4, Repeat rate = 4
[GPR_Bypass, GPR_Bypass]>,
InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [SFX0, SFX1]>],
[4, 1], // Latency = 1, Repeat rate = 1
[GPR_Bypass, CR_Bypass]>,
InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<4, [SFX0]>],
[7, 1], // Latency = 4, Repeat rate = 4
[NoBypass, GPR_Bypass]>,
InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [SFX0, SFX1]>],
[4, 1], // Latency = 1, Repeat rate = 1
[CR_Bypass, GPR_Bypass]>,
InstrItinData<SprMTSRIN , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<1, [SFX0]>],
[4, 1],
[NoBypass, GPR_Bypass]>,
InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<2, [FPU_0]>],
[11, 1, 1], // Latency = 8, Repeat rate = 2
[FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<4, [FPU_0]>],
[13, 1, 1], // Latency = 10, Repeat rate = 4
[FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<2, [FPU_0]>],
[11, 1, 1], // Latency = 8, Repeat rate = 2
[CR_Bypass, FPR_Bypass, FPR_Bypass]>,
InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<68, [FPU_0]>],
[71, 1, 1], // Latency = 68, Repeat rate = 68
[FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<38, [FPU_0]>],
[41, 1, 1], // Latency = 38, Repeat rate = 38
[FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<4, [FPU_0]>],
[13, 1, 1, 1], // Latency = 10, Repeat rate = 4
[FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>,
InstrStage<38, [FPU_0]>],
[41, 1], // Latency = 38, Repeat rate = 38
[FPR_Bypass, FPR_Bypass]>
]>;
// ===---------------------------------------------------------------------===//
// e500mc machine model for scheduling and other instruction cost heuristics.
def PPCE500mcModel : SchedMachineModel {
let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
let LoadLatency = 5; // Optimistic load latency assuming bypass.
// This is overriden by OperandCycles if the
// Itineraries are queried instead.
let Itineraries = PPCE500mcItineraries;
}