From cf0e5e1c62f05e4f891026ed091495a5f73cd9b3 Mon Sep 17 00:00:00 2001 From: Balaram Makam Date: Sat, 25 Mar 2017 04:02:39 +0000 Subject: [PATCH] [AArch64] Refine Falkor Machine Model - Part1 llvm-svn: 298768 --- llvm/lib/Target/AArch64/AArch64SchedFalkor.td | 91 +----- .../AArch64/AArch64SchedFalkorDetails.td | 151 ++++++++++ .../AArch64/AArch64SchedFalkorWriteRes.td | 268 ++++++++++++++++++ 3 files changed, 422 insertions(+), 88 deletions(-) create mode 100644 llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td create mode 100644 llvm/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td diff --git a/llvm/lib/Target/AArch64/AArch64SchedFalkor.td b/llvm/lib/Target/AArch64/AArch64SchedFalkor.td index ddb09acd011e..caf84aab4b84 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedFalkor.td +++ b/llvm/lib/Target/AArch64/AArch64SchedFalkor.td @@ -95,7 +95,7 @@ def : WriteRes { let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1 def : WriteRes { let Latency = 6; } def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 4; } +def : WriteRes { let Latency = 0; } def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 1; } @@ -116,93 +116,8 @@ def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; -//===----------------------------------------------------------------------===// -// Specialize the coarse model by associating instruction groups with the -// subtarget-defined types. As the modeled is refined, this will override most -// of the above SchedWriteRes and SchedAlias mappings. - +// Detailed Refinements // ----------------------------------------------------------------------------- -// Miscellaneous -// ----------------------------------------------------------------------------- - -def : InstRW<[WriteI], (instrs COPY)>; - -// ----------------------------------------------------------------------------- -// Vector Loads -// ----------------------------------------------------------------------------- -def : InstRW<[WriteVLD], (instregex "LD1i(8|16|32|64)$")>; -def : InstRW<[WriteVLD], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -def : InstRW<[WriteVLD], (instregex "LD2i(8|16|32|64)$")>; -def : InstRW<[WriteVLD], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD], (instregex "LD2Twov(8b|4h|2s)$")>; -def : InstRW<[WriteVLD], (instregex "LD2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>; - -def : InstRW<[WriteVLD], (instregex "LD3i(8|16|32|64)$")>; -def : InstRW<[WriteVLD], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; -def : InstRW<[WriteVLD], (instregex "LD3Threev(2d)$")>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "LD3Threev(2d)_POST$")>; - -def : InstRW<[WriteVLD], (instregex "LD4i(8|16|32|64)$")>; -def : InstRW<[WriteVLD], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; -def : InstRW<[WriteVLD], (instregex "LD4Fourv(2d)$")>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; - -// ----------------------------------------------------------------------------- -// Vector Stores -// ----------------------------------------------------------------------------- -def : InstRW<[WriteVST], (instregex "ST1i(8|16|32|64)$")>; -def : InstRW<[WriteVST], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVST], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVST], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVST], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVST, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; -def : InstRW<[WriteVST, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[WriteVST, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[WriteVST, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[WriteVST, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; - -def : InstRW<[WriteVST], (instregex "ST2i(8|16|32|64)$")>; -def : InstRW<[WriteVST], (instregex "ST2Twov(8b|4h|2s)$")>; -def : InstRW<[WriteVST], (instregex "ST2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[WriteVST, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; -def : InstRW<[WriteVST, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; -def : InstRW<[WriteVST, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; - -def : InstRW<[WriteVST], (instregex "ST3i(8|16|32|64)$")>; -def : InstRW<[WriteVST], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; -def : InstRW<[WriteVST], (instregex "ST3Threev(2d)$")>; -def : InstRW<[WriteVST, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; -def : InstRW<[WriteVST, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; -def : InstRW<[WriteVST, WriteAdr], (instregex "ST3Threev(2d)_POST$")>; - -def : InstRW<[WriteVST], (instregex "ST4i(8|16|32|64)$")>; -def : InstRW<[WriteVST], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; -def : InstRW<[WriteVST], (instregex "ST4Fourv(2d)$")>; -def : InstRW<[WriteVST, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>; -def : InstRW<[WriteVST, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; -def : InstRW<[WriteVST, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; +include "AArch64SchedFalkorDetails.td" } diff --git a/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td new file mode 100644 index 000000000000..4e1ae37ce5aa --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td @@ -0,0 +1,151 @@ +//==- AArch64SchedFalkorDetails.td - Falkor Scheduling Defs -*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the uop and latency details for the machine model for the +// Qualcomm Falkor subtarget. +// +//===----------------------------------------------------------------------===// + +include "AArch64SchedFalkorWriteRes.td" + +//===----------------------------------------------------------------------===// +// Specialize the coarse model by associating instruction groups with the +// subtarget-defined types. As the modeled is refined, this will override most +// of the earlier mappings. + +// Miscellaneous +// ----------------------------------------------------------------------------- + +def : InstRW<[WriteI], (instrs COPY)>; + +// SIMD Floating-point Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f32|v4f16)$")>; + +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v4f16|v2i16p|v2i32p)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FAC(GE|GT)(16|32|64)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|GE|GT)(16|32|64|v2f32|v4f16|v2i32|v4i16)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i16|v1i32|v1i64|v2i32|v4i16)rz$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f32|v4f16)$")>; + +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^F(MAX|MIN)(NM)?V(v4i16|v4i32|v8i16)v$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FABD|FADD|FSUB)(v2f32|v4f16)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FADDP(v2i16p|v2i32p|v2i64p|v2f32|v4f16)$")>; + +def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v1f16|v2f32|v4f16)$")>; +def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTXNv1i64)>; +def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i32|v4i16)(_shift)?$")>; +def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>; + +def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^(FML(A|S)|FMUL|FMULX)(v2f32|v4f16|(v1i16_indexed|v4i16_indexed|v1i32_indexed|v2i32_indexed))$")>; +def : InstRW<[FalkorWr_1VXVY_5cyc], (instrs FMULX16, FMULX32)>; + +def : InstRW<[FalkorWr_1VXVY_6cyc], (instregex "^(FML(A|S)|FMUL|FMULX)v1i64_indexed$")>; +def : InstRW<[FalkorWr_1VXVY_6cyc], (instrs FMULX64)>; + +def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f64|v4f32|v8f16)$")>; + +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v8f16|v2i64p)$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32|v8i16)rz$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32|v8f16)$")>; + +def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^(FDIV|FSQRT)(v2f32|v4f16)$")>; + +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32|v8f16)$")>; + +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32|v8f16)$")>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(FCVTL|FCVTL2)(v2i32|v4i16|v4i32|v8i16)$")>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i64|v4i32|v8i16)(_shift)?$")>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>; + +def : InstRW<[FalkorWr_2VXVY_5cyc], (instregex "^(FML(A|S)|FMUL|FMULX)(v2f64|v4f32|v8f16|v8i16_indexed|v4i32_indexed)$")>; + +def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^(FML(A|S)|FMUL|FMULX)v2i64_indexed$")>; + +def : InstRW<[FalkorWr_3VXVY_4cyc], (instregex "^(FCVTX?N|FCVTX?N2)(v1i32|v1i64|v1f16|v2f32|v4f16)$")>; + +def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^(FCVTX?N|FCVTX?N2)(v2i32|v4i16|v8i16)$")>; + +def : InstRW<[FalkorWr_2VX_2VY_2cyc], (instregex "^(FDIV|FSQRT)(v2f64|v4f32|v8f16)$")>; + +// SIMD Load Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[WriteVLD], (instregex "LD1i(8|16|32|64)$")>; +def : InstRW<[WriteVLD], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_1LD_1VXVY_4cyc], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_2LD_3cyc], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_3LD_3cyc], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_4LD_3cyc], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLD, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; +def : InstRW<[WriteVLD, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_1LD_1VXVY_4cyc, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +def : InstRW<[WriteVLD], (instregex "LD2i(8|16|32|64)$")>; +def : InstRW<[WriteVLD], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLD], (instregex "LD2Twov(8b|4h|2s)$")>; +def : InstRW<[WriteVLD], (instregex "LD2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLD, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>; +def : InstRW<[WriteVLD, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; +def : InstRW<[WriteVLD, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>; +def : InstRW<[WriteVLD, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>; + +def : InstRW<[WriteVLD], (instregex "LD3i(8|16|32|64)$")>; +def : InstRW<[WriteVLD], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLD], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[WriteVLD], (instregex "LD3Threev(2d)$")>; +def : InstRW<[WriteVLD, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; +def : InstRW<[WriteVLD, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteVLD, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[WriteVLD, WriteAdr], (instregex "LD3Threev(2d)_POST$")>; + +def : InstRW<[WriteVLD], (instregex "LD4i(8|16|32|64)$")>; +def : InstRW<[WriteVLD], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[WriteVLD], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[WriteVLD], (instregex "LD4Fourv(2d)$")>; +def : InstRW<[WriteVLD, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; +def : InstRW<[WriteVLD, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteVLD, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[WriteVLD, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; + +// SIMD Store Instructions +// ----------------------------------------------------------------------------- +def : InstRW<[WriteVST], (instregex "ST1i(8|16|32|64)$")>; +def : InstRW<[WriteVST], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[WriteVST], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[WriteVST], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[WriteVST], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[WriteVST, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; +def : InstRW<[WriteVST, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteVST, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteVST, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteVST, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; + +def : InstRW<[WriteVST], (instregex "ST2i(8|16|32|64)$")>; +def : InstRW<[WriteVST], (instregex "ST2Twov(8b|4h|2s)$")>; +def : InstRW<[WriteVST], (instregex "ST2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[WriteVST, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; +def : InstRW<[WriteVST, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[WriteVST, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[WriteVST], (instregex "ST3i(8|16|32|64)$")>; +def : InstRW<[WriteVST], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[WriteVST], (instregex "ST3Threev(2d)$")>; +def : InstRW<[WriteVST, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; +def : InstRW<[WriteVST, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[WriteVST, WriteAdr], (instregex "ST3Threev(2d)_POST$")>; + +def : InstRW<[WriteVST], (instregex "ST4i(8|16|32|64)$")>; +def : InstRW<[WriteVST], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; +def : InstRW<[WriteVST], (instregex "ST4Fourv(2d)$")>; +def : InstRW<[WriteVST, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>; +def : InstRW<[WriteVST, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; +def : InstRW<[WriteVST, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td b/llvm/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td new file mode 100644 index 000000000000..7ef3c67355a8 --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td @@ -0,0 +1,268 @@ +//=- AArch64SchedFalkorWrRes.td - Falkor Write Res ---*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Contains all of the Falkor specific SchedWriteRes types. The approach +// below is to define a generic SchedWriteRes for every combination of +// latency and microOps. The naming conventions is to use a prefix, one field +// for latency, and one or more microOp count/type designators. +// Prefix: FalkorWr +// MicroOp Count/Types: #(B|X|Y|Z|LD|ST|SD|VX|VY|VSD) +// Latency: #cyc +// +// e.g. FalkorWr_1Z_6SD_4VX_6cyc means there are 11 micro-ops to be issued +// down one Z pipe, six SD pipes, four VX pipes and the total latency is +// six cycles. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Define 1 micro-op types + + +def FalkorWr_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 2; } +def FalkorWr_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; } +def FalkorWr_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; } +def FalkorWr_1Z_1cyc : SchedWriteRes<[FalkorUnitZ]> { let Latency = 1; } +def FalkorWr_1Z_2cyc : SchedWriteRes<[FalkorUnitZ]> { let Latency = 2; } +def FalkorWr_1ZB_1cyc : SchedWriteRes<[FalkorUnitZB]> { let Latency = 1; } +def FalkorWr_1LD_3cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 3; } +def FalkorWr_1LD_4cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 4; } +def FalkorWr_1XYZ_1cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; } +def FalkorWr_1XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; } +def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; } + +def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; } +def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; } +def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; } +def FalkorWr_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; } +def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; } +def FalkorWr_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; } + +def FalkorWr_1LD_1cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 1; } +def FalkorWr_1ST_1cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 1; } + +//===----------------------------------------------------------------------===// +// Define 2 micro-op types + +def FalkorWr_2VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 1; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 2; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 3; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 5; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def FalkorWr_1LD_1VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_1XYZ_1LD_4cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_2LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 2; +} + + +def FalkorWr_1VY_1VX_3cyc : SchedWriteRes<[FalkorUnitVY, FalkorUnitVX]> { + let Latency = 3; + let NumMicroOps = 2; +} +def FalkorWr_1VY_1VX_5cyc : SchedWriteRes<[FalkorUnitVY, FalkorUnitVX]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 10; + let NumMicroOps = 2; +} + +def FalkorWr_1XYZ_1ST_4cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_1XYZ_1LD_5cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def FalkorWr_2XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitXYZ]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> { + let Latency = 3; + let NumMicroOps = 2; +} + +//===----------------------------------------------------------------------===// +// Define 3 micro-op types + +def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 3; + let NumMicroOps = 3; +} +def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 3; +} +def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 5; + let NumMicroOps = 3; +} +def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 6; + let NumMicroOps = 3; +} + +def FalkorWr_1LD_2VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 3; +} +def FalkorWr_2LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 3; +} +def FalkorWr_3LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_2LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitZ]> { + let Latency = 3; + let NumMicroOps = 3; +} +def FalkorWr_1LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST, + FalkorUnitSD]> { + let Latency = 3; + let NumMicroOps = 3; +} + +//===----------------------------------------------------------------------===// +// Define 4 micro-op types + +def FalkorWr_2VX_2VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, + FalkorUnitVX, FalkorUnitVY]> { + let Latency = 2; + let NumMicroOps = 4; +} + +def FalkorWr_4VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 2; + let NumMicroOps = 4; +} +def FalkorWr_4VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 3; + let NumMicroOps = 4; +} +def FalkorWr_4VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 4; +} +def FalkorWr_4VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 6; + let NumMicroOps = 4; +} + +def FalkorWr_4LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitLD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 4; +} + +def FalkorWr_1LD_1ST_1SD_1LD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST, + FalkorUnitSD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 4; +} + +//===----------------------------------------------------------------------===// +// Define 5 micro-op types + +def FalkorWr_1LD_4VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 5; +} +def FalkorWr_2LD_2VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 5; +} +def FalkorWr_5VXVY_7cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY]> { + let Latency = 7; + let NumMicroOps = 5; +} + +//===----------------------------------------------------------------------===// +// Define 6 micro-op types + +def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 6; +} + +//===----------------------------------------------------------------------===// +// Define 8 micro-op types + +def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 8; +} + +//===----------------------------------------------------------------------===// +// Define 9 micro-op types + +def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, + FalkorUnitLD, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitXYZ, + FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 9; +}