From 235fb927b0b7b280c766287b70d023baf597aa26 Mon Sep 17 00:00:00 2001 From: Stefan Pintilie Date: Thu, 8 Mar 2018 16:24:33 +0000 Subject: [PATCH] [Power9] Add more missing instructions to the Power 9 scheduler With this patch we should be able to mark the Power 9 model as complete. llvm-svn: 327021 --- llvm/lib/Target/PowerPC/P9InstrResources.td | 283 +++++++++++++++++--- llvm/lib/Target/PowerPC/PPCScheduleP9.td | 33 +++ 2 files changed, 276 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td index 9a6f4b5dd429..286382a96ddd 100644 --- a/llvm/lib/Target/PowerPC/P9InstrResources.td +++ b/llvm/lib/Target/PowerPC/P9InstrResources.td @@ -39,6 +39,8 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, (instregex "VADDU(B|H|W|D)M$"), (instregex "VAND(C)?$"), (instregex "VEXTS(B|H|W)2(D|W)(s)?$"), + (instregex "V_SET0(B|H)?$"), + MTVSRDD, VEQV, VRLB, VRLD, @@ -76,9 +78,6 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, VSUBUHM, VSUBUWM, VXOR, - V_SET0B, - V_SET0H, - V_SET0, XVABSDP, XVABSSP, XVCPSGNDP, @@ -100,6 +99,9 @@ def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, XXLORf, XXLORC, XXLXOR, + XXLXORdpz, + XXLXORspz, + XXLXORz, XXSEL, XSABSQP, XSCPSGNQP, @@ -121,6 +123,9 @@ def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], FTDIV, FTSQRT, CMPEQB, + (instregex "TABORT(D|W)C(I)?$"), + (instregex "MTFSB(0|1)$"), + (instregex "MFFSC(D)?RN(I)?$"), (instregex "CMPRB(8)?$"), (instregex "TD(I)?$"), (instregex "TW(I)?$") @@ -158,10 +163,16 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C], (instregex "S(L|R)D$"), (instregex "SRAD(I)?$"), (instregex "EXTSWSLI$"), + (instregex "MFV(S)?RD$"), + (instregex "MTVSRD$"), + (instregex "MTVSRW(A|Z)$"), + MFVSRWZ, SRADI_32, RLDIC, RFEBB, LA, + TBEGIN, + TRECHKPT, (instregex "CMP(WI|LWI|W|LW)(8)?$"), (instregex "CMP(L)?D(I)?$"), (instregex "SUBF(I)?C(8)?$"), @@ -170,17 +181,17 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C], (instregex "ADDIC(8)?(o)?$"), (instregex "ADD(8|4)(o)?$"), (instregex "ADD(E|ME|ZE)(8)?(o)?$"), - (instregex "SUBF(E|ME|ZE)?(8)?$"), - (instregex "NEG(8)?$"), + (instregex "SUBF(E|ME|ZE)?(8)?(o)?$"), + (instregex "NEG(8)?(o)?$"), (instregex "POPCNTB$"), (instregex "ADD(I|IS)?(8)?$"), (instregex "LI(S)?(8)?$"), - (instregex "(X)?OR(I|IS)?(8)?$"), + (instregex "(X)?OR(I|IS)?(8)?(o)?$"), NOP, - (instregex "NAND(8)?$"), + (instregex "NAND(8)?(o)?$"), (instregex "AND(C)?(8)?(o)?$"), - (instregex "NOR(8)?$"), - (instregex "OR(C)?(8)?$"), + (instregex "NOR(8)?(o)?$"), + (instregex "OR(C)?(8)?(o)?$"), (instregex "EQV(8)?(o)?$"), (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(o)?$"), (instregex "ADD(4|8)(TLS)?(_)?$"), @@ -205,6 +216,10 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], FMR, CREQV, CRXOR, + TRECLAIM, + TSR, + TABORT, + (instregex "MFOCRF(8)?$"), (instregex "CR(6)?(UN)?SET$"), (instregex "CR(N)?(OR|AND)(C)?$"), (instregex "S(L|R)W(8)?$"), @@ -222,6 +237,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], (instrs + (instregex "M(T|F)VSCR$"), (instregex "VCMPNEZ(B|H|W)$"), VCMPEQUB, VCMPEQUD, @@ -457,6 +473,15 @@ def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, VSUMSWS )>; + +// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three +// dispatch units for the superslice. +def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "MADD(HD|HDU|LD)$"), + (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?$") +)>; + // 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three // dispatch units for the superslice. def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], @@ -510,6 +535,13 @@ def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, (instregex "FSEL(D|S)o$") )>; +// 5 Cycle Restricted DP operation and one 2 cycle ALU operation. +def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "MUL(H|L)(D|W)(U)?o$") +)>; + // 7 cycle Restricted DP operation and one 3 cycle ALU operation. // These operations must be done sequentially. // The DP is restricted so we need a full 5 dispatches. @@ -543,6 +575,8 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C], XSCVDPUXDS, XSCVDPUXDSs, XSCVDPUXWS, + XSCVDPSXWSs, + XSCVDPUXWSs, XSCVHPDP, XSCVSPDP, XSCVSXDDP, @@ -556,7 +590,6 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C], XSRDPIZ, XSREDP, XSRESP, - //XSRSP, XSRSQRTEDP, XSRSQRTESP, XSSUBDP, @@ -569,13 +602,17 @@ def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C], // dispatches. def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C], (instrs + (instregex "LVS(L|R)$"), + (instregex "VSPLTIS(W|H|B)$"), + (instregex "VSPLT(W|H|B)(s)?$"), + (instregex "V_SETALLONES(B|H)?$"), + (instregex "VEXTRACTU(B|H|W)$"), + MFVSRLD, + MTVSRWS, VBPERMQ, VCLZLSBB, VCTZLSBB, VEXTRACTD, - VEXTRACTUB, - VEXTRACTUH, - VEXTRACTUW, VEXTUBLX, VEXTUBRX, VEXTUHLX, @@ -614,14 +651,6 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C, DISP_1C], VSLDOI, VSLO, VSLV, - VSPLTB, - VSPLTBs, - VSPLTH, - VSPLTHs, - VSPLTISB, - VSPLTISH, - VSPLTISW, - VSPLTW, VSR, VSRO, VSRV, @@ -696,6 +725,7 @@ def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], XSCVSDQP, XSCVUDQP, XSRQPI, + XSRQPIX, XSRQPXP, XSSUBQP, XSSUBQPO @@ -752,10 +782,20 @@ def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], XSSQRTQPO )>; +// 6 Cycle load uses a single slice. +def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C, DISP_1C], + (instrs + (instregex "LXVL(L)?") +)>; + // 5 Cycle load uses a single slice. def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C], (instrs + (instregex "LVE(B|H|W)X$"), + (instregex "LVX(L)?"), + (instregex "LXSI(B|H)ZX$"), LXSDX, + LXVB16X, LXVD2X, LXVWSX, LXSIWZX, @@ -775,6 +815,9 @@ def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C], DARN, EnforceIEIO, ISYNC, + MSGSYNC, + TLBSYNC, + SYNC, (instregex "DCB(F|T|ST)(EP)?$"), (instregex "DCBZ(L)?(EP)?$"), (instregex "DCBTST(EP)?$"), @@ -784,7 +827,18 @@ def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C], (instregex "ICBT(LS)?$"), (instregex "LBARX(L)?$"), (instregex "LBZ(CIX|8|X|X8)?$"), - (instregex "LD(ARX|ARXL|BRX|CIX|X)?$") + (instregex "LD(ARX|ARXL|BRX|CIX|X)?$"), + (instregex "LH(A|B)RX(L)?(8)?$"), + (instregex "LWARX(L)?$"), + (instregex "LWBRX(8)?$"), + (instregex "LWZ(8|CIX|X|X8)?$"), + LHZ, + LHZ8, + LHZCIX, + LHZX, + LHZX8, + LMW, + LSWI )>; // 4 Cycle Restricted load uses a single slice but the dispatch for the whole @@ -796,14 +850,45 @@ def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C], LFD )>; +// Cracked load instructions. +// Load instruction that can be done in parallel. +def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + SLBIA, + SLBIE, + SLBMFEE, + SLBMFEV, + SLBMTE, + TLBIEL +)>; + +// Cracked Load instruction. +// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU +// operations can be run in parallel. +def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "L(W|H)ZU(X)?(8)?$"), + TEND +)>; + +def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "ST(B|H|W|D)CX$") +)>; + // Cracked Load instruction. // Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU // operations cannot be done at the same time and so their latencies are added. def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs - (instregex "LHA(8)?$"), - (instregex "CP_PASTE(8)?o$") + (instregex "LHA(X)?(8)?$"), + (instregex "CP_PASTE(8)?o$"), + (instregex "LWA(X)?(_32)?$"), + TCHECK )>; // Cracked Restricted Load instruction. @@ -852,6 +937,15 @@ def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DFLOADf32 )>; +// Cracked 3-Way Load Instruction +// Load with two ALU operations that depend on each other +def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "LHAU(X)?(8)?$"), + LWAUX +)>; + // Cracked Load that requires the PM resource. // Since the Load and the PM cannot be done at the same time the latencies are // added. Requires 8 cycles. @@ -861,6 +955,7 @@ def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs + LXVH8X, LXVDSX, LXVW4X )>; @@ -870,27 +965,45 @@ def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C, def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "STF(S|D|IWX|SX|DX)$"), - (instregex "STXS(DX|SPX|IWX)$"), + (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"), + (instregex "STW(8)?$"), DFSTOREf32, DFSTOREf64, XFSTOREf32, XFSTOREf64, - STIWX + STIWX, + SLBIEG, + STMW, + STSWI, + TLBIE, + (instregex "ST(W|H|D)BRX$"), + (instregex "ST(B|H|D)(8)?$"), + (instregex "ST(B|W|H|D)(CI)?X(8)?$") )>; // Store operation that requires the whole superslice. def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C], (instrs - STXVD2X, - STXVW4X + (instregex "STVE(B|H|W)X$"), + (instregex "STVX(L)?$"), + (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$") )>; // Cracked instruction made up up two restriced stores. -def : InstRW<[P9_LS_1C, P9_LS_1C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, - IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], +//def : InstRW<[P9_LS_1C, P9_LS_1C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, +// IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], +// (instrs +// STFDEPX +//)>; + +// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole +// superslice. That includes both exec pipelines (EXECO, EXECE) and all three +// dispatches. +def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], (instrs - STFDEPX + (instregex "MTCTR(8)?(loop)?$"), + (instregex "MTLR(8)?$") )>; // 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole @@ -899,8 +1012,11 @@ def : InstRW<[P9_LS_1C, P9_LS_1C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C, DISP_1C], (instrs (instregex "M(T|F)VRSAVE(v)?$"), + (instregex "M(T|F)PMR$"), + (instregex "M(T|F)TB(8)?$"), (instregex "MF(SPR|CTR|LR)(8)?$"), - MFDCR + (instregex "M(T|F)MSR(D)?$"), + (instregex "MTSPR(8)?$") )>; // 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole @@ -994,7 +1110,8 @@ def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs - (instregex "ADDC(8)?o$") + (instregex "ADDC(8)?o$"), + (instregex "SUBFC(8)?o$") )>; // Cracked ALU operations. @@ -1022,13 +1139,47 @@ def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, MCRFS )>; +// Cracked Restricted ALU operations. +// Here the two ALU ops can actually be done in parallel and therefore the +// latencies are not added together. Otherwise this is like having two +// instructions running together on two pipelines and 6 dispatches. +// ALU ops are 3 cycles each. +def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "MTFSF(b|o)?$"), + (instregex "MTFSFI(o)?$") +)>; + // Cracked instruction made of two ALU ops. // The two ops cannot be done in parallel. -// One of the the ALU ops is restricted and takes 3 dispatches. +// One of the ALU ops is restricted and takes 3 dispatches. def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs - RLWINMo + (instregex "RLD(I)?C(R|L)o$"), + (instregex "RLW(IMI|INM|NM)(8)?o$"), + (instregex "SLW(8)?o$"), + (instregex "SRAW(I)?o$"), + (instregex "SRW(8)?o$"), + RLDICL_32o, + RLDIMIo +)>; + +// Cracked instruction made of two ALU ops. +// The two ops cannot be done in parallel. +// Both of the ALU ops are restricted and take 3 dispatches. +def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], + (instrs + (instregex "MFFS(L|CE|o)?$") +)>; + +def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C, + DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, + DISP_1C, DISP_1C], + (instrs + (instregex "MFCR(8)?$") )>; // Cracked instruction made of two ALU ops. @@ -1036,7 +1187,11 @@ def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs - (instregex "EXTSWSLIo$") + (instregex "EXTSWSLIo$"), + (instregex "SRAD(I)?o$"), + SLDo, + SRDo, + RLDICo )>; // FP Div instructions in IIC_FPDivD and IIC_FPDivS. @@ -1054,12 +1209,32 @@ def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C, FDIVo )>; +// 36 Cycle DP Instruction. +def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C], + (instrs + XSSQRTDP +)>; + // 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], (instrs FSQRT )>; +// 36 Cycle DP Vector Instruction. +def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C, + DISP_1C, DISP_1C, DISP_1C], + (instrs + XVSQRTDP +)>; + +// 27 Cycle DP Vector Instruction. +def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C, + DISP_1C, DISP_1C, DISP_1C], + (instrs + XVSQRTSP +)>; + // 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], @@ -1067,6 +1242,12 @@ def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C, FSQRTo )>; +// 26 Cycle DP Instruction. +def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C], + (instrs + XSSQRTSP +)>; + // 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C], (instrs @@ -1145,7 +1326,8 @@ def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C, def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs - (instregex "STF(SU|SUX|DU|DUX)$") + (instregex "STF(S|D)U(X)?$"), + (instregex "ST(B|H|W|D)U(X)?(8)?$") )>; // Cracked instruction made up of a Load and an ALU. The ALU does not depend on @@ -1230,7 +1412,15 @@ def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C], (instrs - LDAT + (instregex "L(D|W)AT$") +)>; + +// Atomic Store +def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, + IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, + DISP_1C], + (instrs + (instregex "ST(D|W)AT$") )>; // Signal Processing Engine (SPE) Instructions @@ -1286,10 +1476,24 @@ def : InstRW<[], (instregex "DSS(ALL)?$"), (instregex "DST(ST)?(T)?(64)?$"), (instregex "ICBL(C|Q)$"), + (instregex "L(W|H|B)EPX$"), + (instregex "ST(W|H|B)EPX$"), + (instregex "(L|ST)FDEPX$"), + (instregex "M(T|F)SR(IN)?$"), + (instregex "M(T|F)DCR$"), + (instregex "NOP_GT_PWR(6|7)$"), + (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"), + (instregex "WRTEE(I)?$"), ATTN, CLRBHRB, MFBHRBE, + MBAR, + MSYNC, + SLBSYNC, NAP, + STOP, + TRAP, + LDMX, RFCI, RFDI, RFMCI, @@ -1298,6 +1502,5 @@ def : InstRW<[], DCBA, DCBI, DCCCI, - ICCCI, - LBEPX + ICCCI )> { let Unsupported = 1; } diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/llvm/lib/Target/PowerPC/PPCScheduleP9.td index 2cc978c64736..3ebf1574d933 100644 --- a/llvm/lib/Target/PowerPC/PPCScheduleP9.td +++ b/llvm/lib/Target/PowerPC/PPCScheduleP9.td @@ -151,6 +151,10 @@ let SchedModel = P9Model in { let Latency = 6; } + def P9_DIV_5C : SchedWriteRes<[DIV]> { + let Latency = 5; + } + def P9_DIV_12C : SchedWriteRes<[DIV]> { let Latency = 12; } @@ -220,6 +224,16 @@ let SchedModel = P9Model in { let Latency = 27; } + def P9_DPE_27C_10 : SchedWriteRes<[DP]> { + let ResourceCycles = [10]; + let Latency = 27; + } + + def P9_DPO_27C_10 : SchedWriteRes<[DP]> { + let ResourceCycles = [10]; + let Latency = 27; + } + def P9_DP_33C_8 : SchedWriteRes<[DP]> { let ResourceCycles = [8]; let Latency = 33; @@ -240,6 +254,16 @@ let SchedModel = P9Model in { let Latency = 36; } + def P9_DPE_36C_10 : SchedWriteRes<[DP]> { + let ResourceCycles = [10]; + let Latency = 36; + } + + def P9_DPO_36C_10 : SchedWriteRes<[DP]> { + let ResourceCycles = [10]; + let Latency = 36; + } + def P9_PM_3C : SchedWriteRes<[PM]> { let Latency = 3; } @@ -260,6 +284,10 @@ let SchedModel = P9Model in { let Latency = 5; } + def P9_LS_6C : SchedWriteRes<[LS]> { + let Latency = 6; + } + def P9_DFU_12C : SchedWriteRes<[DFU]> { let Latency = 12; } @@ -312,8 +340,13 @@ let SchedModel = P9Model in { def P9_IntDivAndALUOp_18C_8 : WriteSequence<[P9_DIV_16C_8, P9_ALU_2C]>; def P9_IntDivAndALUOp_26C_8 : WriteSequence<[P9_DIV_24C_8, P9_ALU_2C]>; def P9_IntDivAndALUOp_42C_8 : WriteSequence<[P9_DIV_40C_8, P9_ALU_2C]>; + def P9_StoreAndALUOp_3C : WriteSequence<[P9_LS_1C, P9_ALU_2C]>; def P9_StoreAndALUOp_4C : WriteSequence<[P9_LS_1C, P9_ALU_3C]>; def P9_ALUOpAndALUOp_4C : WriteSequence<[P9_ALU_2C, P9_ALU_2C]>; + def P9_ALU2OpAndALU2Op_6C : WriteSequence<[P9_ALU_3C, P9_ALU_3C]>; + def P9_ALUOpAndALUOpAndALUOp_6C : + WriteSequence<[P9_ALU_2C, P9_ALU_2C, P9_ALU_2C]>; + def P9_DPOpAndALUOp_7C : WriteSequence<[P9_DP_5C, P9_ALU_2C]>; def P9_DPOpAndALUOp_9C : WriteSequence<[P9_DP_7C, P9_ALU_2C]>; def P9_DPOpAndALU2Op_10C : WriteSequence<[P9_DP_7C, P9_ALU_3C]>; def P9_DPOpAndALUOp_24C_5 : WriteSequence<[P9_DP_22C_5, P9_ALU_2C]>;