forked from OSchip/llvm-project
Add PPC Freescale e500mc and e5500 subtargets.
Add subtargets for Freescale e500mc (32-bit) and e5500 (64-bit) to the PowerPC backend. Patch by Tobias von Koch. llvm-svn: 162764
This commit is contained in:
parent
f4ad232921
commit
742b535e40
|
@ -35,6 +35,10 @@ def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">;
|
|||
def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">;
|
||||
def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">;
|
||||
def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">;
|
||||
def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective",
|
||||
"PPC::DIR_E500mc", "">;
|
||||
def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective",
|
||||
"PPC::DIR_E5500", "">;
|
||||
def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">;
|
||||
def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">;
|
||||
|
||||
|
@ -94,6 +98,12 @@ def : Processor<"g5", G5Itineraries,
|
|||
[Directive970, FeatureAltivec,
|
||||
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
|
||||
Feature64Bit /*, Feature64BitRegs */]>;
|
||||
def : ProcessorModel<"e500mc", PPCE500mcModel,
|
||||
[DirectiveE500mc, FeatureMFOCRF,
|
||||
FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
|
||||
def : ProcessorModel<"e5500", PPCE5500Model,
|
||||
[DirectiveE5500, FeatureMFOCRF, Feature64Bit,
|
||||
FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
|
||||
def : Processor<"a2", PPCA2Itineraries, [DirectiveA2, FeatureBookE,
|
||||
FeatureMFOCRF, FeatureFSqrt,
|
||||
FeatureSTFIWX, FeatureISEL,
|
||||
|
|
|
@ -462,6 +462,8 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
|
|||
"ppc750",
|
||||
"ppc970",
|
||||
"ppcA2",
|
||||
"ppce500mc",
|
||||
"ppce5500",
|
||||
"power6",
|
||||
"power7",
|
||||
"ppc64"
|
||||
|
|
|
@ -449,6 +449,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
|
|||
setSchedulingPreference(Sched::Hybrid);
|
||||
|
||||
computeRegisterProperties();
|
||||
|
||||
// The Freescale cores does better with aggressive inlining of memcpy and
|
||||
// friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
|
||||
if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc ||
|
||||
Subtarget->getDarwinDirective() == PPC::DIR_E5500) {
|
||||
maxStoresPerMemset = 32;
|
||||
maxStoresPerMemsetOptSize = 16;
|
||||
maxStoresPerMemcpy = 32;
|
||||
maxStoresPerMemcpyOptSize = 8;
|
||||
maxStoresPerMemmove = 32;
|
||||
maxStoresPerMemmoveOptSize = 8;
|
||||
|
||||
setPrefFunctionAlignment(4);
|
||||
benefitFromCodePlacementOpt = true;
|
||||
}
|
||||
}
|
||||
|
||||
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
|
||||
|
|
|
@ -54,7 +54,8 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
|
|||
const TargetMachine *TM,
|
||||
const ScheduleDAG *DAG) const {
|
||||
unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective();
|
||||
if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2) {
|
||||
if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 ||
|
||||
Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) {
|
||||
const InstrItineraryData *II = TM->getInstrItineraryData();
|
||||
return new PPCScoreboardHazardRecognizer(II, DAG);
|
||||
}
|
||||
|
@ -70,7 +71,8 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
|
|||
unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
|
||||
|
||||
// Most subtargets use a PPC970 recognizer.
|
||||
if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2) {
|
||||
if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 &&
|
||||
Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) {
|
||||
const TargetInstrInfo *TII = TM.getInstrInfo();
|
||||
assert(TII && "No InstrInfo?");
|
||||
|
||||
|
|
|
@ -118,6 +118,8 @@ include "PPCScheduleG4.td"
|
|||
include "PPCScheduleG4Plus.td"
|
||||
include "PPCScheduleG5.td"
|
||||
include "PPCScheduleA2.td"
|
||||
include "PPCScheduleE500mc.td"
|
||||
include "PPCScheduleE5500.td"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction to itinerary class map - When add new opcodes to the supported
|
||||
|
|
|
@ -0,0 +1,265 @@
|
|||
//===-- PPCScheduleE500mc.td - e500mc Scheduling Defs ------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the itinerary class data for the Freescale e500mc 32-bit
|
||||
// Power processor.
|
||||
//
|
||||
// All information is derived from the "e500mc Core Reference Manual",
|
||||
// Freescale Document Number E500MCRM, Rev. 1, 03/2012.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Relevant functional units in the Freescale e500mc core:
|
||||
//
|
||||
// * Decode & Dispatch
|
||||
// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue
|
||||
// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ).
|
||||
def DIS0 : FuncUnit; // Dispatch stage - insn 1
|
||||
def DIS1 : FuncUnit; // Dispatch stage - insn 2
|
||||
|
||||
// * Execute
|
||||
// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX.
|
||||
// Some instructions can only execute in SFX0 but not SFX1.
|
||||
// The CFX has a bypass path, allowing non-divide instructions to execute
|
||||
// while a divide instruction is executed.
|
||||
def SFX0 : FuncUnit; // Simple unit 0
|
||||
def SFX1 : FuncUnit; // Simple unit 1
|
||||
def BU : FuncUnit; // Branch unit
|
||||
def CFX_DivBypass
|
||||
: FuncUnit; // CFX divide bypass path
|
||||
def CFX_0 : FuncUnit; // CFX pipeline
|
||||
def LSU_0 : FuncUnit; // LSU pipeline
|
||||
def FPU_0 : FuncUnit; // FPU pipeline
|
||||
|
||||
def PPCE500mcItineraries : ProcessorItineraries<
|
||||
[DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, LSU_0, FPU_0],
|
||||
[CR_Bypass, GPR_Bypass, FPR_Bypass], [
|
||||
InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1]>],
|
||||
[4, 1, 1], // Latency = 1
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1]>],
|
||||
[4, 1, 1], // Latency = 1
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1]>],
|
||||
[5, 1, 1], // Latency = 1 or 2
|
||||
[CR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [CFX_0], 0>,
|
||||
InstrStage<14, [CFX_DivBypass]>],
|
||||
[17, 1, 1], // Latency=4..35, Repeat= 4..35
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<8, [FPU_0]>],
|
||||
[11], // Latency = 8
|
||||
[FPR_Bypass]>,
|
||||
InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<8, [FPU_0]>],
|
||||
[11, 1, 1], // Latency = 8
|
||||
[NoBypass, NoBypass, NoBypass]>,
|
||||
InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [CFX_0]>],
|
||||
[7, 1, 1], // Latency = 4, Repeat rate = 1
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [CFX_0]>],
|
||||
[7, 1, 1], // Latency = 4, Repeat rate = 1
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [CFX_0]>],
|
||||
[7, 1, 1], // Latency = 4, Repeat rate = 1
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1]>],
|
||||
[4, 1, 1], // Latency = 1
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1]>],
|
||||
[4, 1, 1], // Latency = 1
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<2, [SFX0]>],
|
||||
[5, 1], // Latency = 2, Repeat rate = 2
|
||||
[GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [BU]>],
|
||||
[4, 1], // Latency = 1
|
||||
[NoBypass, GPR_Bypass]>,
|
||||
InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [BU]>],
|
||||
[4, 1, 1], // Latency = 1
|
||||
[CR_Bypass, CR_Bypass, CR_Bypass]>,
|
||||
InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [BU]>],
|
||||
[4, 1], // Latency = 1
|
||||
[CR_Bypass, CR_Bypass]>,
|
||||
InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1]>],
|
||||
[4, 1, 1], // Latency = 1
|
||||
[CR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[6, 1], // Latency = 3, Repeat rate = 1
|
||||
[GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[6, 1], // Latency = 3
|
||||
[GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[6, 1], // Latency = 3
|
||||
[GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[6, 1], // Latency = 3
|
||||
[GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[6, 1], // Latency = 3
|
||||
[GPR_Bypass, GPR_Bypass],
|
||||
2>, // 2 micro-ops
|
||||
InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[6, 1], // Latency = 3
|
||||
[NoBypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[6, 1], // Latency = 3
|
||||
[NoBypass, GPR_Bypass],
|
||||
2>, // 2 micro-ops
|
||||
InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[6, 1], // Latency = 3
|
||||
[NoBypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[6, 1, 1], // Latency = 3
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[6, 1, 1], // Latency = 3
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass],
|
||||
2>, // 2 micro-ops
|
||||
InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[7, 1, 1], // Latency = 4
|
||||
[FPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[7, 1, 1], // Latency = 4
|
||||
[FPR_Bypass, GPR_Bypass, GPR_Bypass],
|
||||
2>, // 2 micro-ops
|
||||
InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[6, 1], // Latency = 3
|
||||
[GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[6, 1], // Latency = 3
|
||||
[GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[7, 1], // Latency = r+3
|
||||
[NoBypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<3, [LSU_0]>],
|
||||
[6, 1, 1], // Latency = 3, Repeat rate = 3
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[6, 1], // Latency = 3
|
||||
[NoBypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>]>,
|
||||
InstrItinData<SprMFSR , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<4, [SFX0]>],
|
||||
[7, 1],
|
||||
[GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<2, [SFX0, SFX1]>],
|
||||
[5, 1], // Latency = 2, Repeat rate = 4
|
||||
[GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<SprMTSR , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0]>],
|
||||
[5, 1],
|
||||
[NoBypass, GPR_Bypass]>,
|
||||
InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0], 0>]>,
|
||||
InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<5, [SFX0]>],
|
||||
[8, 1],
|
||||
[GPR_Bypass, CR_Bypass]>,
|
||||
InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<4, [SFX0]>],
|
||||
[7, 1], // Latency = 4, Repeat rate = 4
|
||||
[GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1]>],
|
||||
[4, 1], // Latency = 1, Repeat rate = 1
|
||||
[GPR_Bypass, CR_Bypass]>,
|
||||
InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<4, [SFX0]>],
|
||||
[7, 1], // Latency = 4, Repeat rate = 4
|
||||
[NoBypass, GPR_Bypass]>,
|
||||
InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1]>],
|
||||
[4, 1], // Latency = 1, Repeat rate = 1
|
||||
[CR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<SprMTSRIN , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0]>],
|
||||
[4, 1],
|
||||
[NoBypass, GPR_Bypass]>,
|
||||
InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<2, [FPU_0]>],
|
||||
[11, 1, 1], // Latency = 8, Repeat rate = 2
|
||||
[FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
|
||||
InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<4, [FPU_0]>],
|
||||
[13, 1, 1], // Latency = 10, Repeat rate = 4
|
||||
[FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
|
||||
InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<2, [FPU_0]>],
|
||||
[11, 1, 1], // Latency = 8, Repeat rate = 2
|
||||
[CR_Bypass, FPR_Bypass, FPR_Bypass]>,
|
||||
InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<68, [FPU_0]>],
|
||||
[71, 1, 1], // Latency = 68, Repeat rate = 68
|
||||
[FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
|
||||
InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<38, [FPU_0]>],
|
||||
[41, 1, 1], // Latency = 38, Repeat rate = 38
|
||||
[FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
|
||||
InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<4, [FPU_0]>],
|
||||
[13, 1, 1, 1], // Latency = 10, Repeat rate = 4
|
||||
[FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
|
||||
InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<38, [FPU_0]>],
|
||||
[41, 1], // Latency = 38, Repeat rate = 38
|
||||
[FPR_Bypass, FPR_Bypass]>
|
||||
]>;
|
||||
|
||||
// ===---------------------------------------------------------------------===//
|
||||
// e500mc machine model for scheduling and other instruction cost heuristics.
|
||||
|
||||
def PPCE500mcModel : SchedMachineModel {
|
||||
let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
|
||||
let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
|
||||
let LoadLatency = 5; // Optimistic load latency assuming bypass.
|
||||
// This is overriden by OperandCycles if the
|
||||
// Itineraries are queried instead.
|
||||
|
||||
let Itineraries = PPCE500mcItineraries;
|
||||
}
|
|
@ -0,0 +1,309 @@
|
|||
//===-- PPCScheduleE500mc.td - e5500 Scheduling Defs -------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the itinerary class data for the Freescale e5500 64-bit
|
||||
// Power processor.
|
||||
//
|
||||
// All information is derived from the "e5500 Core Reference Manual",
|
||||
// Freescale Document Number e5500RM, Rev. 1, 03/2012.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Relevant functional units in the Freescale e5500 core
|
||||
// (These are the same as for the e500mc)
|
||||
//
|
||||
// * Decode & Dispatch
|
||||
// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue
|
||||
// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ).
|
||||
// def DIS0 : FuncUnit;
|
||||
// def DIS1 : FuncUnit;
|
||||
|
||||
// * Execute
|
||||
// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX.
|
||||
// The CFX has a bypass path, allowing non-divide instructions to execute
|
||||
// while a divide instruction is being executed.
|
||||
// def SFX0 : FuncUnit; // Simple unit 0
|
||||
// def SFX1 : FuncUnit; // Simple unit 1
|
||||
// def BU : FuncUnit; // Branch unit
|
||||
// def CFX_DivBypass
|
||||
// : FuncUnit; // CFX divide bypass path
|
||||
// def CFX_0 : FuncUnit; // CFX pipeline stage 0
|
||||
|
||||
def CFX_1 : FuncUnit; // CFX pipeline stage 1
|
||||
|
||||
// def LSU_0 : FuncUnit; // LSU pipeline
|
||||
// def FPU_0 : FuncUnit; // FPU pipeline
|
||||
|
||||
|
||||
def PPCE5500Itineraries : ProcessorItineraries<
|
||||
[DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, CFX_1,
|
||||
LSU_0, FPU_0],
|
||||
[CR_Bypass, GPR_Bypass, FPR_Bypass], [
|
||||
InstrItinData<IntSimple , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1]>],
|
||||
[5, 2, 2], // Latency = 1
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<IntGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1]>],
|
||||
[5, 2, 2], // Latency = 1
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<IntCompare , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1]>],
|
||||
[6, 2, 2], // Latency = 1 or 2
|
||||
[CR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<IntDivD , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [CFX_0], 0>,
|
||||
InstrStage<26, [CFX_DivBypass]>],
|
||||
[30, 2, 2], // Latency= 4..26, Repeat rate= 4..26
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<IntDivW , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [CFX_0], 0>,
|
||||
InstrStage<16, [CFX_DivBypass]>],
|
||||
[20, 2, 2], // Latency= 4..16, Repeat rate= 4..16
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<IntMFFS , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [FPU_0]>],
|
||||
[11], // Latency = 7, Repeat rate = 1
|
||||
[FPR_Bypass]>,
|
||||
InstrItinData<IntMTFSB0 , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<7, [FPU_0]>],
|
||||
[11, 2, 2], // Latency = 7, Repeat rate = 7
|
||||
[NoBypass, NoBypass, NoBypass]>,
|
||||
InstrItinData<IntMulHD , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [CFX_0], 0>,
|
||||
InstrStage<2, [CFX_1]>],
|
||||
[9, 2, 2], // Latency = 4..7, Repeat rate = 2..4
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<IntMulHW , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [CFX_0], 0>,
|
||||
InstrStage<1, [CFX_1]>],
|
||||
[8, 2, 2], // Latency = 4, Repeat rate = 1
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<IntMulHWU , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [CFX_0], 0>,
|
||||
InstrStage<1, [CFX_1]>],
|
||||
[8, 2, 2], // Latency = 4, Repeat rate = 1
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<IntMulLI , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [CFX_0], 0>,
|
||||
InstrStage<2, [CFX_1]>],
|
||||
[8, 2, 2], // Latency = 4 or 5, Repeat = 2
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<IntRotate , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1]>],
|
||||
[5, 2, 2], // Latency = 1
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<IntRotateD , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<2, [SFX0, SFX1]>],
|
||||
[6, 2, 2], // Latency = 2, Repeat rate = 2
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<IntRotateDI , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1]>],
|
||||
[5, 2, 2], // Latency = 1, Repeat rate = 1
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<IntShift , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<2, [SFX0, SFX1]>],
|
||||
[6, 2, 2], // Latency = 2, Repeat rate = 2
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<IntTrapW , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<2, [SFX0]>],
|
||||
[6, 2], // Latency = 2, Repeat rate = 2
|
||||
[GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<BrB , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [BU]>],
|
||||
[5, 2], // Latency = 1
|
||||
[NoBypass, GPR_Bypass]>,
|
||||
InstrItinData<BrCR , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [BU]>],
|
||||
[5, 2, 2], // Latency = 1
|
||||
[CR_Bypass, CR_Bypass, CR_Bypass]>,
|
||||
InstrItinData<BrMCR , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [BU]>],
|
||||
[5, 2], // Latency = 1
|
||||
[CR_Bypass, CR_Bypass]>,
|
||||
InstrItinData<BrMCRX , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [CFX_0]>],
|
||||
[5, 2, 2], // Latency = 1
|
||||
[CR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStDCBA , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[7, 2], // Latency = 3, Repeat rate = 1
|
||||
[GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStDCBF , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[7, 2], // Latency = 3, Repeat rate = 1
|
||||
[GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStDCBI , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[7, 2], // Latency = 3, Repeat rate = 1
|
||||
[GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStLoad , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[7, 2], // Latency = 3
|
||||
[GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStLoadUpd , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[7, 2], // Latency = 3, Repeat rate = 1
|
||||
[GPR_Bypass, GPR_Bypass],
|
||||
2>, // 2 micro-ops
|
||||
InstrItinData<LdStLD , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[7, 2], // Latency = 3, Repeat rate = 1
|
||||
[GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStLDARX , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<3, [LSU_0]>],
|
||||
[7, 2], // Latency = 3, Repeat rate = 3
|
||||
[GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStLDU , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[7, 2], // Latency = 3, Repeat rate = 1
|
||||
[GPR_Bypass, GPR_Bypass],
|
||||
2>, // 2 micro-ops
|
||||
InstrItinData<LdStStore , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[7, 2], // Latency = 3, Repeat rate = 1
|
||||
[NoBypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStStoreUpd, [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[7, 2], // Latency = 3, Repeat rate = 1
|
||||
[NoBypass, GPR_Bypass],
|
||||
2>, // 2 micro-ops
|
||||
InstrItinData<LdStICBI , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[7, 2], // Latency = 3, Repeat rate = 1
|
||||
[NoBypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStSTFD , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[7, 2, 2], // Latency = 3, Repeat rate = 1
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStSTFDU , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[7, 2, 2], // Latency = 3, Repeat rate = 1
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass],
|
||||
2>, // 2 micro-ops
|
||||
InstrItinData<LdStLFD , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[8, 2, 2], // Latency = 4, Repeat rate = 1
|
||||
[FPR_Bypass, GPR_Bypass, GPR_Bypass],
|
||||
2>, // 2 micro-ops
|
||||
InstrItinData<LdStLFDU , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[8, 2, 2], // Latency = 4, Repeat rate = 1
|
||||
[FPR_Bypass, GPR_Bypass, GPR_Bypass],
|
||||
2>, // 2 micro-ops
|
||||
InstrItinData<LdStLHA , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[7, 2], // Latency = 3
|
||||
[GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStLHAU , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[7, 2], // Latency = 3, Repeat rate = 1
|
||||
[GPR_Bypass, GPR_Bypass],
|
||||
2>, // 2 micro-ops
|
||||
InstrItinData<LdStLMW , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<4, [LSU_0]>],
|
||||
[8, 2], // Latency = r+3, Repeat rate = r+3
|
||||
[NoBypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStLWARX , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<3, [LSU_0]>],
|
||||
[7, 2, 2], // Latency = 3, Repeat rate = 3
|
||||
[GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStSTD , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[7, 2], // Latency = 3, Repeat rate = 1
|
||||
[NoBypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStSTDCX , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[7, 2], // Latency = 3, Repeat rate = 1
|
||||
[NoBypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStSTDU , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[7, 2], // Latency = 3, Repeat rate = 1
|
||||
[NoBypass, GPR_Bypass],
|
||||
2>, // 2 micro-ops
|
||||
InstrItinData<LdStSTWCX , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>],
|
||||
[7, 2], // Latency = 3, Repeat rate = 1
|
||||
[NoBypass, GPR_Bypass]>,
|
||||
InstrItinData<LdStSync , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0]>]>,
|
||||
InstrItinData<SprMTMSR , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<2, [CFX_0]>],
|
||||
[6, 2], // Latency = 2, Repeat rate = 4
|
||||
[GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<SprTLBSYNC , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [LSU_0], 0>]>,
|
||||
InstrItinData<SprMFCR , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<5, [CFX_0]>],
|
||||
[9, 2], // Latency = 5, Repeat rate = 5
|
||||
[GPR_Bypass, CR_Bypass]>,
|
||||
InstrItinData<SprMFMSR , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<4, [SFX0]>],
|
||||
[8, 2], // Latency = 4, Repeat rate = 4
|
||||
[GPR_Bypass, GPR_Bypass]>,
|
||||
InstrItinData<SprMFSPR , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [CFX_0]>],
|
||||
[5], // Latency = 1, Repeat rate = 1
|
||||
[GPR_Bypass]>,
|
||||
InstrItinData<SprMFTB , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<4, [CFX_0]>],
|
||||
[8, 2], // Latency = 4, Repeat rate = 4
|
||||
[NoBypass, GPR_Bypass]>,
|
||||
InstrItinData<SprMTSPR , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [SFX0, SFX1]>],
|
||||
[5], // Latency = 1, Repeat rate = 1
|
||||
[GPR_Bypass]>,
|
||||
InstrItinData<FPGeneral , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [FPU_0]>],
|
||||
[11, 2, 2], // Latency = 7, Repeat rate = 1
|
||||
[FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
|
||||
InstrItinData<FPAddSub , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [FPU_0]>],
|
||||
[11, 2, 2], // Latency = 7, Repeat rate = 1
|
||||
[FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
|
||||
InstrItinData<FPCompare , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [FPU_0]>],
|
||||
[11, 2, 2], // Latency = 7, Repeat rate = 1
|
||||
[CR_Bypass, FPR_Bypass, FPR_Bypass]>,
|
||||
InstrItinData<FPDivD , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<31, [FPU_0]>],
|
||||
[39, 2, 2], // Latency = 35, Repeat rate = 31
|
||||
[FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
|
||||
InstrItinData<FPDivS , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<16, [FPU_0]>],
|
||||
[24, 2, 2], // Latency = 20, Repeat rate = 16
|
||||
[FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
|
||||
InstrItinData<FPFused , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<1, [FPU_0]>],
|
||||
[11, 2, 2, 2], // Latency = 7, Repeat rate = 1
|
||||
[FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
|
||||
InstrItinData<FPRes , [InstrStage<1, [DIS0, DIS1], 0>,
|
||||
InstrStage<2, [FPU_0]>],
|
||||
[12, 2], // Latency = 8, Repeat rate = 2
|
||||
[FPR_Bypass, FPR_Bypass]>
|
||||
]>;
|
||||
|
||||
// ===---------------------------------------------------------------------===//
|
||||
// e5500 machine model for scheduling and other instruction cost heuristics.
|
||||
|
||||
def PPCE5500Model : SchedMachineModel {
|
||||
let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
|
||||
let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
|
||||
let LoadLatency = 6; // Optimistic load latency assuming bypass.
|
||||
// This is overriden by OperandCycles if the
|
||||
// Itineraries are queried instead.
|
||||
|
||||
let Itineraries = PPCE5500Itineraries;
|
||||
}
|
|
@ -41,6 +41,8 @@ namespace PPC {
|
|||
DIR_750,
|
||||
DIR_970,
|
||||
DIR_A2,
|
||||
DIR_E500mc,
|
||||
DIR_E5500,
|
||||
DIR_PWR6,
|
||||
DIR_PWR7,
|
||||
DIR_64
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
;
|
||||
; Test support for Freescale e500mc and its higher memcpy inlining thresholds.
|
||||
;
|
||||
; RUN: llc -mcpu=e500mc < %s 2>&1 | FileCheck %s
|
||||
; CHECK-NOT: not a recognized processor for this target
|
||||
|
||||
target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
|
||||
target triple = "powerpc-fsl-linux"
|
||||
|
||||
%struct.teststruct = type { [12 x i32], i32 }
|
||||
|
||||
define void @copy(%struct.teststruct* noalias nocapture sret %agg.result, %struct.teststruct* nocapture %in) nounwind {
|
||||
entry:
|
||||
; CHECK: @copy
|
||||
; CHECK-NOT: bl memcpy
|
||||
%0 = bitcast %struct.teststruct* %agg.result to i8*
|
||||
%1 = bitcast %struct.teststruct* %in to i8*
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 52, i32 4, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
|
|
@ -0,0 +1,22 @@
|
|||
;
|
||||
; Test support for Freescale e5500 and its higher memcpy inlining thresholds.
|
||||
;
|
||||
; RUN: llc -mcpu=e5500 < %s 2>&1 | FileCheck %s
|
||||
; CHECK-NOT: not a recognized processor for this target
|
||||
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-fsl-linux"
|
||||
|
||||
%struct.teststruct = type { [24 x i32], i32 }
|
||||
|
||||
define void @copy(%struct.teststruct* noalias nocapture sret %agg.result, %struct.teststruct* nocapture %in) nounwind {
|
||||
entry:
|
||||
; CHECK: @copy
|
||||
; CHECK-NOT: bl memcpy
|
||||
%0 = bitcast %struct.teststruct* %agg.result to i8*
|
||||
%1 = bitcast %struct.teststruct* %in to i8*
|
||||
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 100, i32 4, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
|
Loading…
Reference in New Issue