forked from OSchip/llvm-project
[ARM] Use ProcResGroup in Cortex-M7 scheduling model
Used to model structural hazards on FP issue, where some instructions take up 2 issue slots and others one as well as similar structural hazards on load issue, where some instructions take up two load lanes and others one. Differential Revision: https://reviews.llvm.org/D98977
This commit is contained in:
parent
3c54762226
commit
78a871abf7
|
@ -19,6 +19,8 @@ def CortexM7Model : SchedMachineModel {
|
|||
let CompleteModel = 0;
|
||||
}
|
||||
|
||||
let SchedModel = CortexM7Model in {
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// The Cortex-M7 has two ALU, two LOAD, a STORE, a MAC, a BRANCH and a VFP
|
||||
// pipe. The stages relevant to scheduling are as follows:
|
||||
|
@ -33,7 +35,9 @@ def CortexM7Model : SchedMachineModel {
|
|||
// for scheduling, so simple ALU operations executing in EX2 will have
|
||||
// ReadAdvance<0> (the default) for their source operands and Latency = 1.
|
||||
|
||||
def M7UnitLoad : ProcResource<2> { let BufferSize = 0; }
|
||||
def M7UnitLoadL : ProcResource<1> { let BufferSize = 0; }
|
||||
def M7UnitLoadH : ProcResource<1> { let BufferSize = 0; }
|
||||
def M7UnitLoad : ProcResGroup<[M7UnitLoadL,M7UnitLoadH]> { let BufferSize = 0; }
|
||||
def M7UnitStore : ProcResource<1> { let BufferSize = 0; }
|
||||
def M7UnitALU : ProcResource<2>;
|
||||
def M7UnitShift1 : ProcResource<1> { let BufferSize = 0; }
|
||||
|
@ -41,14 +45,14 @@ def M7UnitShift2 : ProcResource<1> { let BufferSize = 0; }
|
|||
def M7UnitMAC : ProcResource<1> { let BufferSize = 0; }
|
||||
def M7UnitBranch : ProcResource<1> { let BufferSize = 0; }
|
||||
def M7UnitVFP : ProcResource<1> { let BufferSize = 0; }
|
||||
def M7UnitVPort : ProcResource<2> { let BufferSize = 0; }
|
||||
def M7UnitVPortL : ProcResource<1> { let BufferSize = 0; }
|
||||
def M7UnitVPortH : ProcResource<1> { let BufferSize = 0; }
|
||||
def M7UnitVPort : ProcResGroup<[M7UnitVPortL,M7UnitVPortH]> { let BufferSize = 0; }
|
||||
def M7UnitSIMD : ProcResource<1> { let BufferSize = 0; }
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Subtarget-specific SchedWrite types with map ProcResources and set latency.
|
||||
|
||||
let SchedModel = CortexM7Model in {
|
||||
|
||||
def : WriteRes<WriteALU, [M7UnitALU]> { let Latency = 1; }
|
||||
|
||||
// Basic ALU with shifts.
|
||||
|
@ -105,39 +109,42 @@ def : WriteRes<WriteNoop, []> { let Latency = 0; }
|
|||
// Floating point conversions.
|
||||
def : WriteRes<WriteFPCVT, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
|
||||
def : WriteRes<WriteFPMOV, [M7UnitVPort]> { let Latency = 3; }
|
||||
def M7WriteFPMOV64 : SchedWriteRes<[M7UnitVPortL, M7UnitVPortH]> {
|
||||
let Latency = 3;
|
||||
}
|
||||
|
||||
// The FP pipeline has a latency of 3 cycles.
|
||||
// ALU operations (32/64-bit). These go down the FP pipeline.
|
||||
def : WriteRes<WriteFPALU32, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
|
||||
def : WriteRes<WriteFPALU64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
|
||||
def : WriteRes<WriteFPALU64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
|
||||
let Latency = 4;
|
||||
let BeginGroup = 1;
|
||||
}
|
||||
|
||||
// Multiplication
|
||||
def : WriteRes<WriteFPMUL32, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
|
||||
def : WriteRes<WriteFPMUL64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
|
||||
def : WriteRes<WriteFPMUL64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
|
||||
let Latency = 7;
|
||||
let BeginGroup = 1;
|
||||
}
|
||||
|
||||
// Multiply-accumulate. FPMAC goes down the FP Pipeline.
|
||||
def : WriteRes<WriteFPMAC32, [M7UnitVFP, M7UnitVPort]> { let Latency = 6; }
|
||||
def : WriteRes<WriteFPMAC64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
|
||||
def : WriteRes<WriteFPMAC64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
|
||||
let Latency = 11;
|
||||
let BeginGroup = 1;
|
||||
}
|
||||
|
||||
// Division. Effective scheduling latency is 3, though real latency is larger
|
||||
def : WriteRes<WriteFPDIV32, [M7UnitVFP, M7UnitVPort]> { let Latency = 16; }
|
||||
def : WriteRes<WriteFPDIV64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
|
||||
def : WriteRes<WriteFPDIV64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
|
||||
let Latency = 30;
|
||||
let BeginGroup = 1;
|
||||
}
|
||||
|
||||
// Square-root. Effective scheduling latency is 3; real latency is larger
|
||||
def : WriteRes<WriteFPSQRT32, [M7UnitVFP, M7UnitVPort]> { let Latency = 16; }
|
||||
def : WriteRes<WriteFPSQRT64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
|
||||
def : WriteRes<WriteFPSQRT64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
|
||||
let Latency = 30;
|
||||
let BeginGroup = 1;
|
||||
}
|
||||
|
@ -283,12 +290,12 @@ def : InstRW<[M7TableLoad, M7Read_ISS, M7Read_ISS], (instregex "t2TB")>;
|
|||
// VFP loads and stores
|
||||
|
||||
def M7LoadSP : SchedWriteRes<[M7UnitLoad, M7UnitVPort]> { let Latency = 1; }
|
||||
def M7LoadDP : SchedWriteRes<[M7UnitLoad, M7UnitVPort, M7UnitVPort]> {
|
||||
def M7LoadDP : SchedWriteRes<[M7UnitLoadL, M7UnitLoadH, M7UnitVPortL, M7UnitVPortH]> {
|
||||
let Latency = 2;
|
||||
let SingleIssue = 1;
|
||||
}
|
||||
def M7StoreSP : SchedWriteRes<[M7UnitStore, M7UnitVPort]>;
|
||||
def M7StoreDP : SchedWriteRes<[M7UnitStore, M7UnitVPort, M7UnitVPort]> {
|
||||
def M7StoreDP : SchedWriteRes<[M7UnitStore, M7UnitVPortL, M7UnitVPortH]> {
|
||||
let SingleIssue = 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple arm-arm-eabi -mcpu=cortex-m7 -verify-machineinstrs -run-pass=postmisched %s -o - | FileCheck %s
|
||||
---
|
||||
name: test_groups
|
||||
alignment: 2
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '$d0' }
|
||||
- { reg: '$r0' }
|
||||
- { reg: '$r1' }
|
||||
- { reg: '$r2' }
|
||||
- { reg: '$r3' }
|
||||
- { reg: '$r4' }
|
||||
frameInfo:
|
||||
maxAlignment: 1
|
||||
maxCallFrameSize: 0
|
||||
machineFunctionInfo: {}
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $d0, $r0, $r1, $r2, $r3, $r4
|
||||
|
||||
; CHECK-LABEL: name: test_groups
|
||||
; CHECK: liveins: $d0, $r0, $r1, $r2, $r3, $r4
|
||||
; CHECK: renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg
|
||||
; CHECK: renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg
|
||||
; CHECK: VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg
|
||||
; CHECK: renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg
|
||||
; CHECK: renamable $r4 = t2ADDrr killed renamable $r4, renamable $r4, 14 /* CC::al */, $noreg, $noreg
|
||||
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit killed $d0
|
||||
renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg
|
||||
renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg
|
||||
VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg
|
||||
renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg
|
||||
t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg
|
||||
renamable $r4 = t2ADDrr killed renamable $r4, renamable $r4, 14 /* CC::al */, $noreg, $noreg
|
||||
tBX_RET 14 /* CC::al */, $noreg, implicit $d0
|
||||
|
||||
...
|
|
@ -253,23 +253,23 @@ vstr.f32 s0, [r0]
|
|||
# CHECK-NEXT: [0.0] - M7UnitALU
|
||||
# CHECK-NEXT: [0.1] - M7UnitALU
|
||||
# CHECK-NEXT: [1] - M7UnitBranch
|
||||
# CHECK-NEXT: [2.0] - M7UnitLoad
|
||||
# CHECK-NEXT: [2.1] - M7UnitLoad
|
||||
# CHECK-NEXT: [3] - M7UnitMAC
|
||||
# CHECK-NEXT: [4] - M7UnitSIMD
|
||||
# CHECK-NEXT: [5] - M7UnitShift1
|
||||
# CHECK-NEXT: [6] - M7UnitShift2
|
||||
# CHECK-NEXT: [7] - M7UnitStore
|
||||
# CHECK-NEXT: [8] - M7UnitVFP
|
||||
# CHECK-NEXT: [9.0] - M7UnitVPort
|
||||
# CHECK-NEXT: [9.1] - M7UnitVPort
|
||||
# CHECK-NEXT: [2] - M7UnitLoadH
|
||||
# CHECK-NEXT: [3] - M7UnitLoadL
|
||||
# CHECK-NEXT: [4] - M7UnitMAC
|
||||
# CHECK-NEXT: [5] - M7UnitSIMD
|
||||
# CHECK-NEXT: [6] - M7UnitShift1
|
||||
# CHECK-NEXT: [7] - M7UnitShift2
|
||||
# CHECK-NEXT: [8] - M7UnitStore
|
||||
# CHECK-NEXT: [9] - M7UnitVFP
|
||||
# CHECK-NEXT: [10] - M7UnitVPortH
|
||||
# CHECK-NEXT: [11] - M7UnitVPortL
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1]
|
||||
# CHECK-NEXT: - - - 1.00 1.00 - - - - 2.00 104.00 81.00 81.00
|
||||
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
|
||||
# CHECK-NEXT: - - - 1.50 1.50 - - - - 2.00 104.00 81.00 81.00
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1] Instructions:
|
||||
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
|
||||
# CHECK-NEXT: - - - - - - - - - - 1.00 0.50 0.50 vabs.f32 s0, s2
|
||||
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 1.00 vabs.f64 d0, d2
|
||||
# CHECK-NEXT: - - - - - - - - - - 1.00 0.50 0.50 vadd.f32 s0, s2, s1
|
||||
|
@ -384,7 +384,7 @@ vstr.f32 s0, [r0]
|
|||
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 1.00 vsqrt.f64 d0, d2
|
||||
# CHECK-NEXT: - - - - - - - - - - 1.00 0.50 0.50 vsub.f32 s0, s2, s1
|
||||
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 1.00 vsub.f64 d0, d2, d1
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 1.00 1.00 vldr d0, [r0]
|
||||
# CHECK-NEXT: - - - 1.00 1.00 - - - - - - 1.00 1.00 vldr d0, [r0]
|
||||
# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.50 vldr s0, [r0]
|
||||
# CHECK-NEXT: - - - - - - - - - 1.00 - 1.00 1.00 vstr d0, [r0]
|
||||
# CHECK-NEXT: - - - - - - - - - 1.00 - 0.50 0.50 vstr s0, [r0]
|
||||
|
|
|
@ -862,23 +862,23 @@ yield
|
|||
# CHECK-NEXT: [0.0] - M7UnitALU
|
||||
# CHECK-NEXT: [0.1] - M7UnitALU
|
||||
# CHECK-NEXT: [1] - M7UnitBranch
|
||||
# CHECK-NEXT: [2.0] - M7UnitLoad
|
||||
# CHECK-NEXT: [2.1] - M7UnitLoad
|
||||
# CHECK-NEXT: [3] - M7UnitMAC
|
||||
# CHECK-NEXT: [4] - M7UnitSIMD
|
||||
# CHECK-NEXT: [5] - M7UnitShift1
|
||||
# CHECK-NEXT: [6] - M7UnitShift2
|
||||
# CHECK-NEXT: [7] - M7UnitStore
|
||||
# CHECK-NEXT: [8] - M7UnitVFP
|
||||
# CHECK-NEXT: [9.0] - M7UnitVPort
|
||||
# CHECK-NEXT: [9.1] - M7UnitVPort
|
||||
# CHECK-NEXT: [2] - M7UnitLoadH
|
||||
# CHECK-NEXT: [3] - M7UnitLoadL
|
||||
# CHECK-NEXT: [4] - M7UnitMAC
|
||||
# CHECK-NEXT: [5] - M7UnitSIMD
|
||||
# CHECK-NEXT: [6] - M7UnitShift1
|
||||
# CHECK-NEXT: [7] - M7UnitShift2
|
||||
# CHECK-NEXT: [8] - M7UnitStore
|
||||
# CHECK-NEXT: [9] - M7UnitVFP
|
||||
# CHECK-NEXT: [10] - M7UnitVPortH
|
||||
# CHECK-NEXT: [11] - M7UnitVPortL
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1]
|
||||
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
|
||||
# CHECK-NEXT: 125.00 125.00 - 35.00 35.00 43.00 90.00 88.00 2.00 45.00 - - -
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1] Instructions:
|
||||
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - adc r0, r1, #0
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - adcs r0, r1, #0
|
||||
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - adcs r0, r1
|
||||
|
|
|
@ -34,26 +34,26 @@ vldr d0, [r1]
|
|||
# CHECK-NEXT: [0.0] - M7UnitALU
|
||||
# CHECK-NEXT: [0.1] - M7UnitALU
|
||||
# CHECK-NEXT: [1] - M7UnitBranch
|
||||
# CHECK-NEXT: [2.0] - M7UnitLoad
|
||||
# CHECK-NEXT: [2.1] - M7UnitLoad
|
||||
# CHECK-NEXT: [3] - M7UnitMAC
|
||||
# CHECK-NEXT: [4] - M7UnitSIMD
|
||||
# CHECK-NEXT: [5] - M7UnitShift1
|
||||
# CHECK-NEXT: [6] - M7UnitShift2
|
||||
# CHECK-NEXT: [7] - M7UnitStore
|
||||
# CHECK-NEXT: [8] - M7UnitVFP
|
||||
# CHECK-NEXT: [9.0] - M7UnitVPort
|
||||
# CHECK-NEXT: [9.1] - M7UnitVPort
|
||||
# CHECK-NEXT: [2] - M7UnitLoadH
|
||||
# CHECK-NEXT: [3] - M7UnitLoadL
|
||||
# CHECK-NEXT: [4] - M7UnitMAC
|
||||
# CHECK-NEXT: [5] - M7UnitSIMD
|
||||
# CHECK-NEXT: [6] - M7UnitShift1
|
||||
# CHECK-NEXT: [7] - M7UnitShift2
|
||||
# CHECK-NEXT: [8] - M7UnitStore
|
||||
# CHECK-NEXT: [9] - M7UnitVFP
|
||||
# CHECK-NEXT: [10] - M7UnitVPortH
|
||||
# CHECK-NEXT: [11] - M7UnitVPortL
|
||||
|
||||
# CHECK: Resource pressure per iteration:
|
||||
# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1]
|
||||
# CHECK-NEXT: 1.00 1.00 - - 1.00 - - - - - - - 2.00
|
||||
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
|
||||
# CHECK-NEXT: 1.00 1.00 - 1.00 1.00 - - - - - - 1.00 1.00
|
||||
|
||||
# CHECK: Resource pressure by instruction:
|
||||
# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1] Instructions:
|
||||
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
|
||||
# CHECK-NEXT: - 1.00 - - - - - - - - - - - add.w r1, r1, #1
|
||||
# CHECK-NEXT: 1.00 - - - - - - - - - - - - add.w r1, r1, #2
|
||||
# CHECK-NEXT: - - - - 1.00 - - - - - - - 2.00 vldr d0, [r1]
|
||||
# CHECK-NEXT: - - - 1.00 1.00 - - - - - - 1.00 1.00 vldr d0, [r1]
|
||||
|
||||
# CHECK: Timeline view:
|
||||
# CHECK-NEXT: Index 012345
|
||||
|
|
Loading…
Reference in New Issue