From baeb210be780995bee7e9bb00208bfc40027fe25 Mon Sep 17 00:00:00 2001 From: Anton Korobeynikov Date: Wed, 7 Apr 2010 18:19:40 +0000 Subject: [PATCH] Make use of new reserved/required scheduling stuff: introduce VFP and NEON locks to model domain cross stalls precisly. llvm-svn: 100646 --- llvm/include/llvm/Target/TargetSchedule.td | 15 ++-- llvm/lib/Target/ARM/ARMSchedule.td | 2 + llvm/lib/Target/ARM/ARMScheduleV7.td | 97 +++++++++++++++++----- 3 files changed, 87 insertions(+), 27 deletions(-) diff --git a/llvm/include/llvm/Target/TargetSchedule.td b/llvm/include/llvm/Target/TargetSchedule.td index bbf43dec58ac..bd6791f92ea7 100644 --- a/llvm/include/llvm/Target/TargetSchedule.td +++ b/llvm/include/llvm/Target/TargetSchedule.td @@ -23,7 +23,7 @@ class FuncUnit; class ReservationKind val> { - bits<1> Value = val; + int Value = val; } def Required : ReservationKind<0>; @@ -43,14 +43,19 @@ def Reserved : ReservationKind<1>; // InstrStage<1, [FU_x, FU_y]> - TimeInc defaults to Cycles // InstrStage<1, [FU_x, FU_y], 0> - TimeInc explicit // -class InstrStage units, - int timeinc = -1, ReservationKind kind = Required> { + +class InstrStage2 units, + int timeinc, ReservationKind kind> { int Cycles = cycles; // length of stage in machine cycles list Units = units; // choice of functional units int TimeInc = timeinc; // cycles till start of next stage int Kind = kind.Value; // kind of FU reservation } +class InstrStage units, + int timeinc = -1> + : InstrStage2; + //===----------------------------------------------------------------------===// // Instruction itinerary - An itinerary represents a sequential series of steps // required to complete an instruction. Itineraries are represented as lists of @@ -71,10 +76,10 @@ def NoItinerary : InstrItinClass; // Instruction itinerary data - These values provide a runtime map of an // instruction itinerary class (name) to its itinerary data. // -class InstrItinData stages, +class InstrItinData stages, list operandcycles = []> { InstrItinClass TheClass = Class; - list Stages = stages; + list Stages = stages; list OperandCycles = operandcycles; } diff --git a/llvm/lib/Target/ARM/ARMSchedule.td b/llvm/lib/Target/ARM/ARMSchedule.td index fc4c5f5830b0..db15a85e40f1 100644 --- a/llvm/lib/Target/ARM/ARMSchedule.td +++ b/llvm/lib/Target/ARM/ARMSchedule.td @@ -17,6 +17,8 @@ def FU_LdSt0 : FuncUnit; // pipeline 0 load/store def FU_LdSt1 : FuncUnit; // pipeline 1 load/store def FU_NPipe : FuncUnit; // NEON ALU/MUL pipe def FU_NLSPipe : FuncUnit; // NEON LS pipe +def FU_DRegsVFP: FuncUnit; // FP register set, VFP side +def FU_DRegsN : FuncUnit; // FP register set, NEON side //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for ARM diff --git a/llvm/lib/Target/ARM/ARMScheduleV7.td b/llvm/lib/Target/ARM/ARMScheduleV7.td index b121045dd5b0..0d7a5539c1db 100644 --- a/llvm/lib/Target/ARM/ARMScheduleV7.td +++ b/llvm/lib/Target/ARM/ARMScheduleV7.td @@ -593,94 +593,147 @@ def CortexA8Itineraries : ProcessorItineraries<[ // Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1 // def CortexA9Itineraries : ProcessorItineraries<[ + // VFP and NEON shares the same register file. This means that every VFP + // instruction should wait for full completion of the consecutive NEON + // instruction and vice-versa. We model this behavior with two artificial FUs: + // DRegsVFP and DRegsVFP. + // + // Every VFP instruction: + // - Acquires DRegsVFP resource for 1 cycle + // - Reserves DRegsN resource for the whole duration. + // Every NEON instruction does the same but with FUs swapped. + // + // Since the reserved FU cannot be acquired this models precisly "cross-domain" + // stalls. // VFP // Issue through integer pipeline, and execute in NEON unit. - // // FP Special Register to Integer Register File Move - InstrItinData, + InstrItinData, + InstrStage2<2, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>]>, // // Single-precision FP Unary - InstrItinData, + InstrItinData, + InstrStage2<2, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [1, 1]>, // // Double-precision FP Unary - InstrItinData, + InstrItinData, + InstrStage2<2, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [1, 1]>, // // Single-precision FP Compare - InstrItinData, + InstrItinData, + InstrStage2<2, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [1, 1]>, // // Double-precision FP Compare - InstrItinData, + InstrItinData, + InstrStage2<2, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [1, 1]>, // // Single to Double FP Convert - InstrItinData, + InstrItinData, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1]>, // // Double to Single FP Convert - InstrItinData, + InstrItinData, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1]>, // // Single-Precision FP to Integer Convert - InstrItinData, + InstrItinData, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1]>, // // Double-Precision FP to Integer Convert - InstrItinData, + InstrItinData, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1]>, // // Integer to Single-Precision FP Convert - InstrItinData, + InstrItinData, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1]>, // // Integer to Double-Precision FP Convert - InstrItinData, + InstrItinData, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1]>, // // Single-precision FP ALU - InstrItinData, + InstrItinData, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1, 1]>, // // Double-precision FP ALU - InstrItinData, + InstrItinData, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1, 1]>, // // Single-precision FP Multiply - InstrItinData, + InstrItinData, + InstrStage2<6, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [5, 1, 1]>, // // Double-precision FP Multiply - InstrItinData, + InstrItinData, + InstrStage2<7, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<2, [FU_NPipe]>], [6, 1, 1]>, // // Single-precision FP MAC - InstrItinData, + InstrItinData, + InstrStage2<9, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [8, 0, 1, 1]>, // // Double-precision FP MAC - InstrItinData, + InstrItinData, + InstrStage2<10, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<2, [FU_NPipe]>], [9, 0, 1, 1]>, // // Single-precision FP DIV - InstrItinData, + InstrItinData, + InstrStage2<16, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<10, [FU_NPipe]>], [15, 1, 1]>, // // Double-precision FP DIV - InstrItinData, + InstrItinData, + InstrStage2<26, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<20, [FU_NPipe]>], [25, 1, 1]>, // // Single-precision FP SQRT - InstrItinData, + InstrItinData, + InstrStage2<18, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<13, [FU_NPipe]>], [17, 1]>, // // Double-precision FP SQRT - InstrItinData, + InstrItinData, + InstrStage2<33, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<28, [FU_NPipe]>], [32, 1]> ]>;