[ARM] Use ProcResGroup in Cortex-M7 scheduling model

Used to model structural hazards on FP issue, where some
instructions take up 2 issue slots and others one as well
as similar structural hazards on load issue, where some
instructions take up two load lanes and others one.

Differential Revision: https://reviews.llvm.org/D98977
This commit is contained in:
David Penry 2021-04-19 21:23:05 +01:00 committed by David Green
parent 3c54762226
commit 78a871abf7
5 changed files with 97 additions and 51 deletions

View File

@ -19,6 +19,8 @@ def CortexM7Model : SchedMachineModel {
let CompleteModel = 0;
}
let SchedModel = CortexM7Model in {
//===--------------------------------------------------------------------===//
// The Cortex-M7 has two ALU, two LOAD, a STORE, a MAC, a BRANCH and a VFP
// pipe. The stages relevant to scheduling are as follows:
@ -33,7 +35,9 @@ def CortexM7Model : SchedMachineModel {
// for scheduling, so simple ALU operations executing in EX2 will have
// ReadAdvance<0> (the default) for their source operands and Latency = 1.
def M7UnitLoad : ProcResource<2> { let BufferSize = 0; }
def M7UnitLoadL : ProcResource<1> { let BufferSize = 0; }
def M7UnitLoadH : ProcResource<1> { let BufferSize = 0; }
def M7UnitLoad : ProcResGroup<[M7UnitLoadL,M7UnitLoadH]> { let BufferSize = 0; }
def M7UnitStore : ProcResource<1> { let BufferSize = 0; }
def M7UnitALU : ProcResource<2>;
def M7UnitShift1 : ProcResource<1> { let BufferSize = 0; }
@ -41,14 +45,14 @@ def M7UnitShift2 : ProcResource<1> { let BufferSize = 0; }
def M7UnitMAC : ProcResource<1> { let BufferSize = 0; }
def M7UnitBranch : ProcResource<1> { let BufferSize = 0; }
def M7UnitVFP : ProcResource<1> { let BufferSize = 0; }
def M7UnitVPort : ProcResource<2> { let BufferSize = 0; }
def M7UnitVPortL : ProcResource<1> { let BufferSize = 0; }
def M7UnitVPortH : ProcResource<1> { let BufferSize = 0; }
def M7UnitVPort : ProcResGroup<[M7UnitVPortL,M7UnitVPortH]> { let BufferSize = 0; }
def M7UnitSIMD : ProcResource<1> { let BufferSize = 0; }
//===---------------------------------------------------------------------===//
// Subtarget-specific SchedWrite types with map ProcResources and set latency.
let SchedModel = CortexM7Model in {
def : WriteRes<WriteALU, [M7UnitALU]> { let Latency = 1; }
// Basic ALU with shifts.
@ -105,39 +109,42 @@ def : WriteRes<WriteNoop, []> { let Latency = 0; }
// Floating point conversions.
def : WriteRes<WriteFPCVT, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
def : WriteRes<WriteFPMOV, [M7UnitVPort]> { let Latency = 3; }
def M7WriteFPMOV64 : SchedWriteRes<[M7UnitVPortL, M7UnitVPortH]> {
let Latency = 3;
}
// The FP pipeline has a latency of 3 cycles.
// ALU operations (32/64-bit). These go down the FP pipeline.
def : WriteRes<WriteFPALU32, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
def : WriteRes<WriteFPALU64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
def : WriteRes<WriteFPALU64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
let Latency = 4;
let BeginGroup = 1;
}
// Multiplication
def : WriteRes<WriteFPMUL32, [M7UnitVFP, M7UnitVPort]> { let Latency = 3; }
def : WriteRes<WriteFPMUL64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
def : WriteRes<WriteFPMUL64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
let Latency = 7;
let BeginGroup = 1;
}
// Multiply-accumulate. FPMAC goes down the FP Pipeline.
def : WriteRes<WriteFPMAC32, [M7UnitVFP, M7UnitVPort]> { let Latency = 6; }
def : WriteRes<WriteFPMAC64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
def : WriteRes<WriteFPMAC64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
let Latency = 11;
let BeginGroup = 1;
}
// Division. Effective scheduling latency is 3, though real latency is larger
def : WriteRes<WriteFPDIV32, [M7UnitVFP, M7UnitVPort]> { let Latency = 16; }
def : WriteRes<WriteFPDIV64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
def : WriteRes<WriteFPDIV64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
let Latency = 30;
let BeginGroup = 1;
}
// Square-root. Effective scheduling latency is 3; real latency is larger
def : WriteRes<WriteFPSQRT32, [M7UnitVFP, M7UnitVPort]> { let Latency = 16; }
def : WriteRes<WriteFPSQRT64, [M7UnitVFP, M7UnitVPort, M7UnitVPort]> {
def : WriteRes<WriteFPSQRT64, [M7UnitVFP, M7UnitVPortL, M7UnitVPortH]> {
let Latency = 30;
let BeginGroup = 1;
}
@ -283,12 +290,12 @@ def : InstRW<[M7TableLoad, M7Read_ISS, M7Read_ISS], (instregex "t2TB")>;
// VFP loads and stores
def M7LoadSP : SchedWriteRes<[M7UnitLoad, M7UnitVPort]> { let Latency = 1; }
def M7LoadDP : SchedWriteRes<[M7UnitLoad, M7UnitVPort, M7UnitVPort]> {
def M7LoadDP : SchedWriteRes<[M7UnitLoadL, M7UnitLoadH, M7UnitVPortL, M7UnitVPortH]> {
let Latency = 2;
let SingleIssue = 1;
}
def M7StoreSP : SchedWriteRes<[M7UnitStore, M7UnitVPort]>;
def M7StoreDP : SchedWriteRes<[M7UnitStore, M7UnitVPort, M7UnitVPort]> {
def M7StoreDP : SchedWriteRes<[M7UnitStore, M7UnitVPortL, M7UnitVPortH]> {
let SingleIssue = 1;
}

View File

@ -0,0 +1,39 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple arm-arm-eabi -mcpu=cortex-m7 -verify-machineinstrs -run-pass=postmisched %s -o - | FileCheck %s
---
name: test_groups
alignment: 2
tracksRegLiveness: true
liveins:
- { reg: '$d0' }
- { reg: '$r0' }
- { reg: '$r1' }
- { reg: '$r2' }
- { reg: '$r3' }
- { reg: '$r4' }
frameInfo:
maxAlignment: 1
maxCallFrameSize: 0
machineFunctionInfo: {}
body: |
bb.0:
liveins: $d0, $r0, $r1, $r2, $r3, $r4
; CHECK-LABEL: name: test_groups
; CHECK: liveins: $d0, $r0, $r1, $r2, $r3, $r4
; CHECK: renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg
; CHECK: renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg
; CHECK: VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg
; CHECK: renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg
; CHECK: t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg
; CHECK: renamable $r4 = t2ADDrr killed renamable $r4, renamable $r4, 14 /* CC::al */, $noreg, $noreg
; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit killed $d0
renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg
renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg
VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg
renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg
t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg
renamable $r4 = t2ADDrr killed renamable $r4, renamable $r4, 14 /* CC::al */, $noreg, $noreg
tBX_RET 14 /* CC::al */, $noreg, implicit $d0
...

View File

@ -253,23 +253,23 @@ vstr.f32 s0, [r0]
# CHECK-NEXT: [0.0] - M7UnitALU
# CHECK-NEXT: [0.1] - M7UnitALU
# CHECK-NEXT: [1] - M7UnitBranch
# CHECK-NEXT: [2.0] - M7UnitLoad
# CHECK-NEXT: [2.1] - M7UnitLoad
# CHECK-NEXT: [3] - M7UnitMAC
# CHECK-NEXT: [4] - M7UnitSIMD
# CHECK-NEXT: [5] - M7UnitShift1
# CHECK-NEXT: [6] - M7UnitShift2
# CHECK-NEXT: [7] - M7UnitStore
# CHECK-NEXT: [8] - M7UnitVFP
# CHECK-NEXT: [9.0] - M7UnitVPort
# CHECK-NEXT: [9.1] - M7UnitVPort
# CHECK-NEXT: [2] - M7UnitLoadH
# CHECK-NEXT: [3] - M7UnitLoadL
# CHECK-NEXT: [4] - M7UnitMAC
# CHECK-NEXT: [5] - M7UnitSIMD
# CHECK-NEXT: [6] - M7UnitShift1
# CHECK-NEXT: [7] - M7UnitShift2
# CHECK-NEXT: [8] - M7UnitStore
# CHECK-NEXT: [9] - M7UnitVFP
# CHECK-NEXT: [10] - M7UnitVPortH
# CHECK-NEXT: [11] - M7UnitVPortL
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1]
# CHECK-NEXT: - - - 1.00 1.00 - - - - 2.00 104.00 81.00 81.00
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
# CHECK-NEXT: - - - 1.50 1.50 - - - - 2.00 104.00 81.00 81.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1] Instructions:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
# CHECK-NEXT: - - - - - - - - - - 1.00 0.50 0.50 vabs.f32 s0, s2
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 1.00 vabs.f64 d0, d2
# CHECK-NEXT: - - - - - - - - - - 1.00 0.50 0.50 vadd.f32 s0, s2, s1
@ -384,7 +384,7 @@ vstr.f32 s0, [r0]
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 1.00 vsqrt.f64 d0, d2
# CHECK-NEXT: - - - - - - - - - - 1.00 0.50 0.50 vsub.f32 s0, s2, s1
# CHECK-NEXT: - - - - - - - - - - 1.00 1.00 1.00 vsub.f64 d0, d2, d1
# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 1.00 1.00 vldr d0, [r0]
# CHECK-NEXT: - - - 1.00 1.00 - - - - - - 1.00 1.00 vldr d0, [r0]
# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.50 vldr s0, [r0]
# CHECK-NEXT: - - - - - - - - - 1.00 - 1.00 1.00 vstr d0, [r0]
# CHECK-NEXT: - - - - - - - - - 1.00 - 0.50 0.50 vstr s0, [r0]

View File

@ -862,23 +862,23 @@ yield
# CHECK-NEXT: [0.0] - M7UnitALU
# CHECK-NEXT: [0.1] - M7UnitALU
# CHECK-NEXT: [1] - M7UnitBranch
# CHECK-NEXT: [2.0] - M7UnitLoad
# CHECK-NEXT: [2.1] - M7UnitLoad
# CHECK-NEXT: [3] - M7UnitMAC
# CHECK-NEXT: [4] - M7UnitSIMD
# CHECK-NEXT: [5] - M7UnitShift1
# CHECK-NEXT: [6] - M7UnitShift2
# CHECK-NEXT: [7] - M7UnitStore
# CHECK-NEXT: [8] - M7UnitVFP
# CHECK-NEXT: [9.0] - M7UnitVPort
# CHECK-NEXT: [9.1] - M7UnitVPort
# CHECK-NEXT: [2] - M7UnitLoadH
# CHECK-NEXT: [3] - M7UnitLoadL
# CHECK-NEXT: [4] - M7UnitMAC
# CHECK-NEXT: [5] - M7UnitSIMD
# CHECK-NEXT: [6] - M7UnitShift1
# CHECK-NEXT: [7] - M7UnitShift2
# CHECK-NEXT: [8] - M7UnitStore
# CHECK-NEXT: [9] - M7UnitVFP
# CHECK-NEXT: [10] - M7UnitVPortH
# CHECK-NEXT: [11] - M7UnitVPortL
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1]
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
# CHECK-NEXT: 125.00 125.00 - 35.00 35.00 43.00 90.00 88.00 2.00 45.00 - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1] Instructions:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - adc r0, r1, #0
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - adcs r0, r1, #0
# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - adcs r0, r1

View File

@ -34,26 +34,26 @@ vldr d0, [r1]
# CHECK-NEXT: [0.0] - M7UnitALU
# CHECK-NEXT: [0.1] - M7UnitALU
# CHECK-NEXT: [1] - M7UnitBranch
# CHECK-NEXT: [2.0] - M7UnitLoad
# CHECK-NEXT: [2.1] - M7UnitLoad
# CHECK-NEXT: [3] - M7UnitMAC
# CHECK-NEXT: [4] - M7UnitSIMD
# CHECK-NEXT: [5] - M7UnitShift1
# CHECK-NEXT: [6] - M7UnitShift2
# CHECK-NEXT: [7] - M7UnitStore
# CHECK-NEXT: [8] - M7UnitVFP
# CHECK-NEXT: [9.0] - M7UnitVPort
# CHECK-NEXT: [9.1] - M7UnitVPort
# CHECK-NEXT: [2] - M7UnitLoadH
# CHECK-NEXT: [3] - M7UnitLoadL
# CHECK-NEXT: [4] - M7UnitMAC
# CHECK-NEXT: [5] - M7UnitSIMD
# CHECK-NEXT: [6] - M7UnitShift1
# CHECK-NEXT: [7] - M7UnitShift2
# CHECK-NEXT: [8] - M7UnitStore
# CHECK-NEXT: [9] - M7UnitVFP
# CHECK-NEXT: [10] - M7UnitVPortH
# CHECK-NEXT: [11] - M7UnitVPortL
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1]
# CHECK-NEXT: 1.00 1.00 - - 1.00 - - - - - - - 2.00
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11]
# CHECK-NEXT: 1.00 1.00 - 1.00 1.00 - - - - - - 1.00 1.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1] Instructions:
# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions:
# CHECK-NEXT: - 1.00 - - - - - - - - - - - add.w r1, r1, #1
# CHECK-NEXT: 1.00 - - - - - - - - - - - - add.w r1, r1, #2
# CHECK-NEXT: - - - - 1.00 - - - - - - - 2.00 vldr d0, [r1]
# CHECK-NEXT: - - - 1.00 1.00 - - - - - - 1.00 1.00 vldr d0, [r1]
# CHECK: Timeline view:
# CHECK-NEXT: Index 012345