forked from OSchip/llvm-project
120 lines
3.7 KiB
TableGen
120 lines
3.7 KiB
TableGen
//==- ARMScheduleM4.td - Cortex-M4 Scheduling Definitions -*- tablegen -*-====//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file defines the SchedRead/Write data for the ARM Cortex-M4 processor.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
def CortexM4Model : SchedMachineModel {
|
|
let IssueWidth = 1; // Only IT can be dual-issued, so assume single-issue
|
|
let MicroOpBufferSize = 0; // In-order
|
|
let LoadLatency = 2; // Latency when not pipelined, not pc-relative
|
|
let MispredictPenalty = 2; // Best case branch taken cost
|
|
let PostRAScheduler = 1;
|
|
|
|
let CompleteModel = 0;
|
|
}
|
|
|
|
|
|
// We model the entire cpu as a single pipeline with a BufferSize = 0 since
|
|
// Cortex-M4 is in-order.
|
|
|
|
def M4Unit : ProcResource<1> { let BufferSize = 0; }
|
|
|
|
|
|
let SchedModel = CortexM4Model in {
|
|
|
|
// Some definitions of latencies we apply to different instructions
|
|
|
|
class M4UnitL1<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 1; }
|
|
class M4UnitL2<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 2; }
|
|
class M4UnitL3<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 3; }
|
|
class M4UnitL14<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 14; }
|
|
def M4UnitL1_wr : SchedWriteRes<[M4Unit]> { let Latency = 1; }
|
|
def M4UnitL2_wr : SchedWriteRes<[M4Unit]> { let Latency = 2; }
|
|
class M4UnitL1I<dag instr> : InstRW<[M4UnitL1_wr], instr>;
|
|
class M4UnitL2I<dag instr> : InstRW<[M4UnitL2_wr], instr>;
|
|
|
|
|
|
// Loads, MAC's and DIV all get a higher latency of 2
|
|
def : M4UnitL2<WriteLd>;
|
|
def : M4UnitL2<WriteMAC32>;
|
|
def : M4UnitL2<WriteMAC64Hi>;
|
|
def : M4UnitL2<WriteMAC64Lo>;
|
|
def : M4UnitL2<WriteMAC16>;
|
|
def : M4UnitL2<WriteDIV>;
|
|
|
|
def : M4UnitL2I<(instregex "(t|t2)LDM")>;
|
|
|
|
|
|
// Stores we use a latency of 1 as they have no outputs
|
|
|
|
def : M4UnitL1<WriteST>;
|
|
def : M4UnitL1I<(instregex "(t|t2)STM")>;
|
|
|
|
|
|
// Everything else has a Latency of 1
|
|
|
|
def : M4UnitL1<WriteALU>;
|
|
def : M4UnitL1<WriteALUsi>;
|
|
def : M4UnitL1<WriteALUsr>;
|
|
def : M4UnitL1<WriteALUSsr>;
|
|
def : M4UnitL1<WriteBr>;
|
|
def : M4UnitL1<WriteBrL>;
|
|
def : M4UnitL1<WriteBrTbl>;
|
|
def : M4UnitL1<WriteCMPsi>;
|
|
def : M4UnitL1<WriteCMPsr>;
|
|
def : M4UnitL1<WriteCMP>;
|
|
def : M4UnitL1<WriteMUL32>;
|
|
def : M4UnitL1<WriteMUL64Hi>;
|
|
def : M4UnitL1<WriteMUL64Lo>;
|
|
def : M4UnitL1<WriteMUL16>;
|
|
def : M4UnitL1<WriteNoop>;
|
|
def : M4UnitL1<WritePreLd>;
|
|
def : M4UnitL1I<(instregex "(t|t2)MOV")>;
|
|
def : M4UnitL1I<(instrs COPY)>;
|
|
def : M4UnitL1I<(instregex "t2IT")>;
|
|
def : M4UnitL1I<(instregex "t2SEL", "t2USAD8",
|
|
"t2(S|Q|SH|U|UQ|UH)(ADD16|ASX|SAX|SUB16|ADD8|SUB8)", "t2USADA8", "(t|t2)REV")>;
|
|
|
|
def : ReadAdvance<ReadALU, 0>;
|
|
def : ReadAdvance<ReadALUsr, 0>;
|
|
def : ReadAdvance<ReadMUL, 0>;
|
|
def : ReadAdvance<ReadMAC, 0>;
|
|
|
|
// Most FP instructions are single-cycle latency, except MAC's, Div's and Sqrt's.
|
|
// Loads still take 2 cycles.
|
|
|
|
def : M4UnitL1<WriteFPCVT>;
|
|
def : M4UnitL1<WriteFPMOV>;
|
|
def : M4UnitL1<WriteFPALU32>;
|
|
def : M4UnitL1<WriteFPALU64>;
|
|
def : M4UnitL1<WriteFPMUL32>;
|
|
def : M4UnitL1<WriteFPMUL64>;
|
|
def : M4UnitL2I<(instregex "VLD")>;
|
|
def : M4UnitL1I<(instregex "VST")>;
|
|
def : M4UnitL3<WriteFPMAC32>;
|
|
def : M4UnitL3<WriteFPMAC64>;
|
|
def : M4UnitL14<WriteFPDIV32>;
|
|
def : M4UnitL14<WriteFPDIV64>;
|
|
def : M4UnitL14<WriteFPSQRT32>;
|
|
def : M4UnitL14<WriteFPSQRT64>;
|
|
def : M4UnitL1<WriteVLD1>;
|
|
def : M4UnitL1<WriteVLD2>;
|
|
def : M4UnitL1<WriteVLD3>;
|
|
def : M4UnitL1<WriteVLD4>;
|
|
def : M4UnitL1<WriteVST1>;
|
|
def : M4UnitL1<WriteVST2>;
|
|
def : M4UnitL1<WriteVST3>;
|
|
def : M4UnitL1<WriteVST4>;
|
|
|
|
def : ReadAdvance<ReadFPMUL, 0>;
|
|
def : ReadAdvance<ReadFPMAC, 0>;
|
|
|
|
}
|