From 0625aed2fcfb55b245d2160bb07a68878832991a Mon Sep 17 00:00:00 2001 From: Stefan Pintilie Date: Fri, 25 Feb 2022 11:16:04 -0600 Subject: [PATCH] [PowerPC][NFC] Split out the MMA instructions from the P10 instructions. Currently all of the MMA instructions as well as the MMA related register info is bundled with the Power 10 instructions. This patch just splits them out. Reviewed By: lei Differential Revision: https://reviews.llvm.org/D120515 --- llvm/lib/Target/PowerPC/PPCInstrInfo.td | 1 + llvm/lib/Target/PowerPC/PPCInstrMMA.td | 638 ++++++++++++++++++ llvm/lib/Target/PowerPC/PPCInstrP10.td | 622 ----------------- llvm/lib/Target/PowerPC/PPCRegisterInfo.td | 88 +-- llvm/lib/Target/PowerPC/PPCRegisterInfoMMA.td | 106 +++ 5 files changed, 747 insertions(+), 708 deletions(-) create mode 100644 llvm/lib/Target/PowerPC/PPCInstrMMA.td create mode 100644 llvm/lib/Target/PowerPC/PPCRegisterInfoMMA.td diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 54ba48f8c129..4cbcf8acbd59 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -3749,6 +3749,7 @@ def : Pat<(not i1:$in), // Prefixed instructions may require access to the above defs at a later // time so we include this after the def. include "PPCInstrP10.td" +include "PPCInstrMMA.td" // Patterns for arithmetic i1 operations. def : Pat<(add i1:$a, i1:$b), diff --git a/llvm/lib/Target/PowerPC/PPCInstrMMA.td b/llvm/lib/Target/PowerPC/PPCInstrMMA.td new file mode 100644 index 000000000000..a268f3c48604 --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCInstrMMA.td @@ -0,0 +1,638 @@ + +// Mask immediates for MMA instructions (2, 4 and 8 bits). +def Msk2Imm : ImmLeaf(Imm); }]>; +def Msk4Imm : ImmLeaf(Imm); }]>; +def Msk8Imm : ImmLeaf(Imm); }]>; + +def MMA : Predicate<"Subtarget->hasMMA()">; + + +// Multiclass definitions for MMA accumulator instructions. +// ---------------------------------------------------------------------------- + +// Defines 2 unmasked instructions where the xo field for acc/non-acc version +// is even/odd. +multiclass ACC_UM_XOEO opcode, bits<8> xo, dag IOL, string asmbase, + string asmstr> { + let Predicates = [MMA] in { + def NAME : + XX3Form_AT3_XAB6, + RegConstraint<"@earlyclobber $AT">; + def PP : + XX3Form_AT3_XAB6, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 4 instructions, masked/unmasked with masks 8, 4, 4 bits. +// The XO field for acc/non-acc version is even/odd. +multiclass ACC_UM_M844_XOEO opcode, bits<8> xo, dag IOL, string asmbase, + string asmstr> { + defm NAME : ACC_UM_XOEO; + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_XY4P8_XAB6< + opcode, !or(xo, 0x01), (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_XY4P8_XAB6< + opcode, xo, (outs acc:$AT), + !con((ins acc:$ATi), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 4 instructions, masked/unmasked with masks 4, 4, 4 bits. +// The XO field for acc/non-acc version is even/odd. +multiclass ACC_UM_M444_XOEO opcode, bits<8> xo, dag IOL, string asmbase, + string asmstr> { + defm NAME : ACC_UM_XOEO; + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_XYP4_XAB6< + opcode, !or(xo, 0x01), (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_XYP4_XAB6< + opcode, xo, (outs acc:$AT), + !con((ins acc:$ATi), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits. +// The XO field for acc/non-acc version is even/odd. +multiclass ACC_UM_M244_XOEO opcode, bits<8> xo, dag IOL, string asmbase, + string asmstr> { + defm NAME : ACC_UM_XOEO; + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, !or(xo, 0x01), (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, xo, (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits. +// Upper nibble of XO field for acc/non-acc version is 0x4/0x6. +multiclass ACC_UM_M244_XO46 opcode, bits<8> xo, dag IOL, string asmbase, + string asmstr> { + let Predicates = [MMA] in { + def NAME : + XX3Form_AT3_XAB6, + RegConstraint<"@earlyclobber $AT">; + def PP : + XX3Form_AT3_XAB6< + opcode, !or(xo, 0x20), (outs acc:$AT), !con((ins acc:$ATi), IOL), + !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, xo, (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, !or(xo, 0x20), (outs acc:$AT), + !con((ins acc:$ATi), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 10 instructions, operand negating, unmasked, masked with 2, 4, 4 +// bits. Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. +multiclass ACC_NEG_UM_M244_XOM84C opcode, bits<8> xo, dag IOL, + string asmbase, string asmstr> { + defm NAME : ACC_UM_M244_XOEO; + let Predicates = [MMA] in { + def PN : XX3Form_AT3_XAB6< + opcode, !or(xo, 0x80), (outs acc:$AT), !con((ins acc:$ATi), IOL), + !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def NP : XX3Form_AT3_XAB6< + opcode, !or(xo, 0x40), (outs acc:$AT), !con((ins acc:$ATi), IOL), + !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def NN : XX3Form_AT3_XAB6< + opcode, !or(xo, 0xC0), (outs acc:$AT), !con((ins acc:$ATi), IOL), + !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME#PN : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, !or(xo, 0x80), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NP : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, !or(xo, 0x40), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NN : + MMIRR_XX3Form_XY4P2_XAB6< + opcode, !or(xo, 0xC0), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), + !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 5 instructions, unmasked, operand negating. +// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. +multiclass ACC_NEG_UM_XOM84C opcode, bits<8> xo, dag IOL, + string asmbase, string asmstr> { + defm NAME : ACC_UM_XOEO; + let Predicates = [MMA] in { + def PN : XX3Form_AT3_XAB6, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def NP : XX3Form_AT3_XAB6, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def NN : XX3Form_AT3_XAB6, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 10 instructions, operand negating, unmasked, masked with 4, 4 bits. +// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. +multiclass ACC_NEG_UM_M44_XOM84C opcode, bits<8> xo, dag IOL, + string asmbase, string asmstr> { + defm NAME : ACC_NEG_UM_XOM84C; + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_XY4_XAB6< + opcode, !or(xo, 0x01), (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_XY4_XAB6< + opcode, xo, (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#PN : + MMIRR_XX3Form_XY4_XAB6< + opcode, !or(xo, 0x80), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), + !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NP : + MMIRR_XX3Form_XY4_XAB6< + opcode, !or(xo, 0x40), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), + !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NN : + MMIRR_XX3Form_XY4_XAB6< + opcode, !or(xo, 0xC0), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), + !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// Defines 10 instructions, operand negating, unmasked, masked with 4, 2 bits. +// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. +multiclass ACC_NEG_UM_M42_XOM84C opcode, bits<8> xo, dag IOL, + string asmbase, string asmstr> { + defm NAME : ACC_NEG_UM_XOM84C; + let Predicates = [MMA, PrefixInstrs] in { + def PM#NAME : + MMIRR_XX3Form_X4Y2_XAB6< + opcode, !or(xo, 0x01), (outs acc:$AT), + !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK)), + !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"@earlyclobber $AT">; + def PM#NAME#PP : + MMIRR_XX3Form_X4Y2_XAB6< + opcode, xo, (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), + !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#PN : + MMIRR_XX3Form_X4Y2_XAB6< + opcode, !or(xo, 0x80), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), + !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NP : + MMIRR_XX3Form_X4Y2_XAB6< + opcode, !or(xo, 0x40), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), + !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def PM#NAME#NN : + MMIRR_XX3Form_X4Y2_XAB6< + opcode, !or(xo, 0xC0), (outs acc:$AT), + !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), + !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"), + IIC_VecFP, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + } +} + +// End of class definitions. +//----------------------------------------------------------------------------- + +let Predicates = [MMA] in { + def XXMFACC : + XForm_AT3<31, 0, 177, (outs acc:$ASo), (ins acc:$AS), "xxmfacc $AS", + IIC_VecGeneral, + [(set v512i1:$ASo, (int_ppc_mma_xxmfacc v512i1:$AS))]>, + RegConstraint<"$ASo = $AS">, NoEncode<"$ASo">; + def XXMTACC : + XForm_AT3<31, 1, 177, (outs acc:$AT), (ins acc:$ATi), "xxmtacc $AT", + IIC_VecGeneral, + [(set v512i1:$AT, (int_ppc_mma_xxmtacc v512i1:$ATi))]>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + def KILL_PAIR : PPCPostRAExpPseudo<(outs vsrprc:$XTp), (ins vsrprc:$XSp), + "#KILL_PAIR", []>, + RegConstraint<"$XTp = $XSp">; + def BUILD_UACC : PPCPostRAExpPseudo<(outs acc:$AT), (ins uacc:$AS), + "#BUILD_UACC $AT, $AS", []>; + // We define XXSETACCZ as rematerializable to undo CSE of that intrinsic in + // the backend. We avoid CSE here because it generates a copy of the acc + // register and this copy is more expensive than calling the intrinsic again. + let isAsCheapAsAMove = 1, isReMaterializable = 1 in { + def XXSETACCZ : + XForm_AT3<31, 3, 177, (outs acc:$AT), (ins), "xxsetaccz $AT", IIC_VecGeneral, + [(set v512i1:$AT, (int_ppc_mma_xxsetaccz))]>; + } + def XVI8GER4SPP : + XX3Form_AT3_XAB6<59, 99, (outs acc:$AT), (ins acc:$ATi, vsrc:$XA, vsrc:$XB), + "xvi8ger4spp $AT, $XA, $XB", IIC_VecGeneral, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; + let mayStore = 1 in { + def SPILL_ACC: PPCEmitTimePseudo<(outs), (ins acc:$AT, memrix16:$dst), + "#SPILL_ACC", []>; + def SPILL_UACC: PPCEmitTimePseudo<(outs), (ins uacc:$AT, memrix16:$dst), + "#SPILL_UACC", []>; + } + let mayLoad = 1, hasSideEffects = 0 in { + def RESTORE_ACC: PPCEmitTimePseudo<(outs acc:$AT), (ins memrix16:$src), + "#RESTORE_ACC", []>; + def RESTORE_UACC: PPCEmitTimePseudo<(outs uacc:$AT), (ins memrix16:$src), + "#RESTORE_UACC", []>; + } +} + +let Predicates = [MMA, PrefixInstrs] in { + def PMXVI8GER4SPP : + MMIRR_XX3Form_XYP4_XAB6<59, 99, (outs acc:$AT), + (ins acc:$ATi, vsrc:$XA,vsrc:$XB, u4imm:$XMSK, + u4imm:$YMSK, u4imm:$PMSK), + "pmxvi8ger4spp $AT, $XA, $XB, $XMSK, $YMSK, $PMSK", + IIC_VecGeneral, []>, + RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; +} + +// MMA accumulating/non-accumulating instructions. +//------------------------------------------------------------------------------ + +// XVBF16GER2, XVBF16GER2PP, XVBF16GER2PN, XVBF16GER2NP, XVBF16GER2NN +// PMXVBF16GER2, PMXVBF16GER2PP, PMXVBF16GER2PN, PMXVBF16GER2NP, PMXVBF16GER2NN +defm XVBF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 50, (ins vsrc:$XA, vsrc:$XB), + "xvbf16ger2", "$AT, $XA, $XB">; + +// XVI4GER8, XVI4GER8PP, PMXVI4GER8, PMXVI4GER8PP +defm XVI4GER8 : ACC_UM_M844_XOEO<59, 34, (ins vsrc:$XA, vsrc:$XB), + "xvi4ger8", "$AT, $XA, $XB">; + +// XVI8GER4, XVI8GER4PP, PMXVI8GER4, PMXVI8GER4PP +defm XVI8GER4 : ACC_UM_M444_XOEO<59, 2, (ins vsrc:$XA, vsrc:$XB), + "xvi8ger4", "$AT, $XA, $XB">; + +// XVI16GER2, XVI16GER2PP, PMXVI16GER2, PMXVI16GER2PP +defm XVI16GER2 : ACC_UM_M244_XO46<59, 75, (ins vsrc:$XA, vsrc:$XB), + "xvi16ger2", "$AT, $XA, $XB">; + +// XVI16GER2S, XVI16GER2SPP, PMXVI16GER2S, PMXVI16GER2SPP +defm XVI16GER2S : ACC_UM_M244_XOEO<59, 42, (ins vsrc:$XA, vsrc:$XB), + "xvi16ger2s", "$AT, $XA, $XB">; + +// XVF16GER2, XVF16GER2PP, XVF16GER2PN, XVF16GER2NP, XVF16GER2NN +// PMXVF16GER2, PMXVF16GER2PP, PMXVF16GER2PN, PMXVF16GER2NP, PMXVF16GER2NN +defm XVF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 18, (ins vsrc:$XA, vsrc:$XB), + "xvf16ger2", "$AT, $XA, $XB">; + +// XVF32GER, XVF32GERPP, XVF32GERPN, XVF32GERNP, XVF32GERPP +// PMXVF32GER, PMXVF32GERPP, PMXVF32GERPN, PMXVF32GERNP, PMXVF32GERPP +defm XVF32GER : ACC_NEG_UM_M44_XOM84C<59, 26, (ins vsrc:$XA, vsrc:$XB), + "xvf32ger", "$AT, $XA, $XB">; + +// XVF64GER, XVF64GERPP, XVF64GERPN, XVF64GERNP, XVF64GERNN +// PMXVF64GER, PMXVF64GERPP, PMXVF64GERPN, PMXVF64GERNP, PMXVF64GERNN +defm XVF64GER : ACC_NEG_UM_M42_XOM84C<59, 58, (ins vsrpevenrc:$XA, vsrc:$XB), + "xvf64ger", "$AT, $XA, $XB">; +//------------------------------------------------------------------------------ + +// MMA Intrinsics +let Predicates = [MMA] in { + def : Pat<(v512i1 (int_ppc_mma_xvi4ger8 v16i8:$XA, v16i8:$XB)), + (XVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + + def : Pat<(v512i1 (int_ppc_mma_xvi8ger4 v16i8:$XA, v16i8:$XB)), + (XVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + + def : Pat<(v512i1 (int_ppc_mma_xvi16ger2s v16i8:$XA, v16i8:$XB)), + (XVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + + def : Pat<(v512i1 (int_ppc_mma_xvf16ger2 v16i8:$XA, v16i8:$XB)), + (XVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + + def : Pat<(v512i1 (int_ppc_mma_xvf32ger v16i8:$XA, v16i8:$XB)), + (XVF32GER RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf64ger v256i1:$XA, v16i8:$XB)), + (XVF64GER $XA, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB)), + (XVF64GERPP $ATi, $XA, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB)), + (XVF64GERPN $ATi, $XA, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB)), + (XVF64GERNP $ATi, $XA, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB)), + (XVF64GERNN $ATi, $XA, RCCp.BToVSRC)>; + + def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2 v16i8:$XA, v16i8:$XB)), + (XVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvi16ger2 v16i8:$XA, v16i8:$XB)), + (XVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; + def : Pat<(v512i1 (int_ppc_mma_xvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), + (XVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; +} + +// MMA Intrinsics +let Predicates = [MMA, PrefixInstrs] in { + def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk8Imm:$PMSK)), + (PMXVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk8Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk8Imm:$PMSK)), + (PMXVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk8Imm:$PMSK)>; + + def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk4Imm:$PMSK)), + (PMXVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk4Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk4Imm:$PMSK)), + (PMXVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk4Imm:$PMSK)>; + + def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2s v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)), + (PMXVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)), + (PMXVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + + def : Pat<(v512i1 (int_ppc_mma_pmxvf32ger v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk4Imm:$YMSK)), + (PMXVF32GER RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK)), + (PMXVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK)), + (PMXVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK)), + (PMXVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK)), + (PMXVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK)>; + + def : Pat<(v512i1 (int_ppc_mma_pmxvf64ger v256i1:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk2Imm:$YMSK)), + (PMXVF64GER $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, Msk2Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk2Imm:$YMSK)), + (PMXVF64GERPP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk2Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk2Imm:$YMSK)), + (PMXVF64GERPN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk2Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk2Imm:$YMSK)), + (PMXVF64GERNP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk2Imm:$YMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk2Imm:$YMSK)), + (PMXVF64GERNN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk2Imm:$YMSK)>; + + def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)), + (PMXVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)), + (PMXVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; + def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, + Msk4Imm:$XMSK, Msk4Imm:$YMSK, + Msk2Imm:$PMSK)), + (PMXVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, + Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; +} + +def ConcatsMMA { + dag VecsToVecPair0 = + (v256i1 (INSERT_SUBREG + (INSERT_SUBREG (IMPLICIT_DEF), $vs0, sub_vsx1), + $vs1, sub_vsx0)); + dag VecsToVecPair1 = + (v256i1 (INSERT_SUBREG + (INSERT_SUBREG (IMPLICIT_DEF), $vs2, sub_vsx1), + $vs3, sub_vsx0)); + dag VecsToVecQuad = + (BUILD_UACC (INSERT_SUBREG + (INSERT_SUBREG (v512i1 (IMPLICIT_DEF)), + (KILL_PAIR VecsToVecPair0), sub_pair0), + (KILL_PAIR VecsToVecPair1), sub_pair1)); +} + +def Extracts { + dag Pair0 = (v256i1 (EXTRACT_SUBREG $v, sub_pair0)); + dag Pair1 = (v256i1 (EXTRACT_SUBREG $v, sub_pair1)); + dag Vec0 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx0)); + dag Vec1 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx1)); + dag Vec2 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx0)); + dag Vec3 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx1)); +} + +let Predicates = [MMA] in { + def : Pat<(v512i1 (PPCAccBuild v4i32:$vs1, v4i32:$vs0, v4i32:$vs3, v4i32:$vs2)), + (XXMTACC ConcatsMMA.VecsToVecQuad)>; + def : Pat<(v512i1 (int_ppc_mma_assemble_acc v16i8:$vs1, v16i8:$vs0, + v16i8:$vs3, v16i8:$vs2)), + (XXMTACC ConcatsMMA.VecsToVecQuad)>; + def : Pat<(v512i1 (PPCxxmfacc v512i1:$AS)), (XXMFACC acc:$AS)>; + def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 0)), + Extracts.Vec0>; + def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 1)), + Extracts.Vec1>; + def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 2)), + Extracts.Vec2>; + def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 3)), + Extracts.Vec3>; +} + + diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td index 81b2d835cbf2..6f1819085c19 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrP10.td +++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td @@ -54,10 +54,6 @@ //-------------------------- Predicate definitions ---------------------------// def IsPPC32 : Predicate<"!Subtarget->isPPC64()">; -// Mask immediates for MMA instructions (2, 4 and 8 bits). -def Msk2Imm : ImmLeaf(Imm); }]>; -def Msk4Imm : ImmLeaf(Imm); }]>; -def Msk8Imm : ImmLeaf(Imm); }]>; //===----------------------------------------------------------------------===// // PowerPC ISA 3.1 specific type constraints. @@ -906,7 +902,6 @@ class MMIRR_XX3Form_XYP4_XAB6 opcode, bits<8> xo, dag OOL, dag IOL, def PrefixInstrs : Predicate<"Subtarget->hasPrefixInstrs()">; def IsISA3_1 : Predicate<"Subtarget->isISA3_1()">; def PairedVectorMemops : Predicate<"Subtarget->pairedVectorMemops()">; -def MMA : Predicate<"Subtarget->hasMMA()">; def RCCp { dag AToVSRC = (COPY_TO_REGCLASS $XA, VSRC); @@ -1058,592 +1053,6 @@ let Predicates = [PrefixInstrs] in { } } -// Multiclass definitions for MMA accumulator instructions. -// ---------------------------------------------------------------------------- - -// Defines 2 unmasked instructions where the xo field for acc/non-acc version -// is even/odd. -multiclass ACC_UM_XOEO opcode, bits<8> xo, dag IOL, string asmbase, - string asmstr> { - let Predicates = [MMA] in { - def NAME : - XX3Form_AT3_XAB6, - RegConstraint<"@earlyclobber $AT">; - def PP : - XX3Form_AT3_XAB6, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 4 instructions, masked/unmasked with masks 8, 4, 4 bits. -// The XO field for acc/non-acc version is even/odd. -multiclass ACC_UM_M844_XOEO opcode, bits<8> xo, dag IOL, string asmbase, - string asmstr> { - defm NAME : ACC_UM_XOEO; - let Predicates = [MMA, PrefixInstrs] in { - def PM#NAME : - MMIRR_XX3Form_XY4P8_XAB6< - opcode, !or(xo, 0x01), (outs acc:$AT), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK)), - !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PM#NAME#PP : - MMIRR_XX3Form_XY4P8_XAB6< - opcode, xo, (outs acc:$AT), - !con((ins acc:$ATi), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK))), - !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 4 instructions, masked/unmasked with masks 4, 4, 4 bits. -// The XO field for acc/non-acc version is even/odd. -multiclass ACC_UM_M444_XOEO opcode, bits<8> xo, dag IOL, string asmbase, - string asmstr> { - defm NAME : ACC_UM_XOEO; - let Predicates = [MMA, PrefixInstrs] in { - def PM#NAME : - MMIRR_XX3Form_XYP4_XAB6< - opcode, !or(xo, 0x01), (outs acc:$AT), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK)), - !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PM#NAME#PP : - MMIRR_XX3Form_XYP4_XAB6< - opcode, xo, (outs acc:$AT), - !con((ins acc:$ATi), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))), - !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits. -// The XO field for acc/non-acc version is even/odd. -multiclass ACC_UM_M244_XOEO opcode, bits<8> xo, dag IOL, string asmbase, - string asmstr> { - defm NAME : ACC_UM_XOEO; - let Predicates = [MMA, PrefixInstrs] in { - def PM#NAME : - MMIRR_XX3Form_XY4P2_XAB6< - opcode, !or(xo, 0x01), (outs acc:$AT), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)), - !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PM#NAME#PP : - MMIRR_XX3Form_XY4P2_XAB6< - opcode, xo, (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), - !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits. -// Upper nibble of XO field for acc/non-acc version is 0x4/0x6. -multiclass ACC_UM_M244_XO46 opcode, bits<8> xo, dag IOL, string asmbase, - string asmstr> { - let Predicates = [MMA] in { - def NAME : - XX3Form_AT3_XAB6, - RegConstraint<"@earlyclobber $AT">; - def PP : - XX3Form_AT3_XAB6< - opcode, !or(xo, 0x20), (outs acc:$AT), !con((ins acc:$ATi), IOL), - !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } - let Predicates = [MMA, PrefixInstrs] in { - def PM#NAME : - MMIRR_XX3Form_XY4P2_XAB6< - opcode, xo, (outs acc:$AT), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)), - !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PM#NAME#PP : - MMIRR_XX3Form_XY4P2_XAB6< - opcode, !or(xo, 0x20), (outs acc:$AT), - !con((ins acc:$ATi), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), - !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 10 instructions, operand negating, unmasked, masked with 2, 4, 4 -// bits. Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. -multiclass ACC_NEG_UM_M244_XOM84C opcode, bits<8> xo, dag IOL, - string asmbase, string asmstr> { - defm NAME : ACC_UM_M244_XOEO; - let Predicates = [MMA] in { - def PN : XX3Form_AT3_XAB6< - opcode, !or(xo, 0x80), (outs acc:$AT), !con((ins acc:$ATi), IOL), - !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def NP : XX3Form_AT3_XAB6< - opcode, !or(xo, 0x40), (outs acc:$AT), !con((ins acc:$ATi), IOL), - !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def NN : XX3Form_AT3_XAB6< - opcode, !or(xo, 0xC0), (outs acc:$AT), !con((ins acc:$ATi), IOL), - !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } - let Predicates = [MMA, PrefixInstrs] in { - def PM#NAME#PN : - MMIRR_XX3Form_XY4P2_XAB6< - opcode, !or(xo, 0x80), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), - !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#NP : - MMIRR_XX3Form_XY4P2_XAB6< - opcode, !or(xo, 0x40), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), - !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#NN : - MMIRR_XX3Form_XY4P2_XAB6< - opcode, !or(xo, 0xC0), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))), - !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 5 instructions, unmasked, operand negating. -// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. -multiclass ACC_NEG_UM_XOM84C opcode, bits<8> xo, dag IOL, - string asmbase, string asmstr> { - defm NAME : ACC_UM_XOEO; - let Predicates = [MMA] in { - def PN : XX3Form_AT3_XAB6, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def NP : XX3Form_AT3_XAB6, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def NN : XX3Form_AT3_XAB6, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 10 instructions, operand negating, unmasked, masked with 4, 4 bits. -// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. -multiclass ACC_NEG_UM_M44_XOM84C opcode, bits<8> xo, dag IOL, - string asmbase, string asmstr> { - defm NAME : ACC_NEG_UM_XOM84C; - let Predicates = [MMA, PrefixInstrs] in { - def PM#NAME : - MMIRR_XX3Form_XY4_XAB6< - opcode, !or(xo, 0x01), (outs acc:$AT), - !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK)), - !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PM#NAME#PP : - MMIRR_XX3Form_XY4_XAB6< - opcode, xo, (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), - !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#PN : - MMIRR_XX3Form_XY4_XAB6< - opcode, !or(xo, 0x80), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), - !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#NP : - MMIRR_XX3Form_XY4_XAB6< - opcode, !or(xo, 0x40), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), - !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#NN : - MMIRR_XX3Form_XY4_XAB6< - opcode, !or(xo, 0xC0), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))), - !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// Defines 10 instructions, operand negating, unmasked, masked with 4, 2 bits. -// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands. -multiclass ACC_NEG_UM_M42_XOM84C opcode, bits<8> xo, dag IOL, - string asmbase, string asmstr> { - defm NAME : ACC_NEG_UM_XOM84C; - let Predicates = [MMA, PrefixInstrs] in { - def PM#NAME : - MMIRR_XX3Form_X4Y2_XAB6< - opcode, !or(xo, 0x01), (outs acc:$AT), - !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK)), - !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"@earlyclobber $AT">; - def PM#NAME#PP : - MMIRR_XX3Form_X4Y2_XAB6< - opcode, xo, (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), - !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#PN : - MMIRR_XX3Form_X4Y2_XAB6< - opcode, !or(xo, 0x80), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), - !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#NP : - MMIRR_XX3Form_X4Y2_XAB6< - opcode, !or(xo, 0x40), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), - !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def PM#NAME#NN : - MMIRR_XX3Form_X4Y2_XAB6< - opcode, !or(xo, 0xC0), (outs acc:$AT), - !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))), - !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"), - IIC_VecFP, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - } -} - -// End of class definitions. -//----------------------------------------------------------------------------- - -let Predicates = [MMA] in { - def XXMFACC : - XForm_AT3<31, 0, 177, (outs acc:$ASo), (ins acc:$AS), "xxmfacc $AS", - IIC_VecGeneral, - [(set v512i1:$ASo, (int_ppc_mma_xxmfacc v512i1:$AS))]>, - RegConstraint<"$ASo = $AS">, NoEncode<"$ASo">; - def XXMTACC : - XForm_AT3<31, 1, 177, (outs acc:$AT), (ins acc:$ATi), "xxmtacc $AT", - IIC_VecGeneral, - [(set v512i1:$AT, (int_ppc_mma_xxmtacc v512i1:$ATi))]>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - def KILL_PAIR : PPCPostRAExpPseudo<(outs vsrprc:$XTp), (ins vsrprc:$XSp), - "#KILL_PAIR", []>, - RegConstraint<"$XTp = $XSp">; - def BUILD_UACC : PPCPostRAExpPseudo<(outs acc:$AT), (ins uacc:$AS), - "#BUILD_UACC $AT, $AS", []>; - // We define XXSETACCZ as rematerializable to undo CSE of that intrinsic in - // the backend. We avoid CSE here because it generates a copy of the acc - // register and this copy is more expensive than calling the intrinsic again. - let isAsCheapAsAMove = 1, isReMaterializable = 1 in { - def XXSETACCZ : - XForm_AT3<31, 3, 177, (outs acc:$AT), (ins), "xxsetaccz $AT", IIC_VecGeneral, - [(set v512i1:$AT, (int_ppc_mma_xxsetaccz))]>; - } - def XVI8GER4SPP : - XX3Form_AT3_XAB6<59, 99, (outs acc:$AT), (ins acc:$ATi, vsrc:$XA, vsrc:$XB), - "xvi8ger4spp $AT, $XA, $XB", IIC_VecGeneral, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; - let mayStore = 1 in { - def SPILL_ACC: PPCEmitTimePseudo<(outs), (ins acc:$AT, memrix16:$dst), - "#SPILL_ACC", []>; - def SPILL_UACC: PPCEmitTimePseudo<(outs), (ins uacc:$AT, memrix16:$dst), - "#SPILL_UACC", []>; - } - let mayLoad = 1, hasSideEffects = 0 in { - def RESTORE_ACC: PPCEmitTimePseudo<(outs acc:$AT), (ins memrix16:$src), - "#RESTORE_ACC", []>; - def RESTORE_UACC: PPCEmitTimePseudo<(outs uacc:$AT), (ins memrix16:$src), - "#RESTORE_UACC", []>; - } -} - -let Predicates = [MMA, PrefixInstrs] in { - def PMXVI8GER4SPP : - MMIRR_XX3Form_XYP4_XAB6<59, 99, (outs acc:$AT), - (ins acc:$ATi, vsrc:$XA,vsrc:$XB, u4imm:$XMSK, - u4imm:$YMSK, u4imm:$PMSK), - "pmxvi8ger4spp $AT, $XA, $XB, $XMSK, $YMSK, $PMSK", - IIC_VecGeneral, []>, - RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">; -} - -// MMA accumulating/non-accumulating instructions. -//------------------------------------------------------------------------------ - -// XVBF16GER2, XVBF16GER2PP, XVBF16GER2PN, XVBF16GER2NP, XVBF16GER2NN -// PMXVBF16GER2, PMXVBF16GER2PP, PMXVBF16GER2PN, PMXVBF16GER2NP, PMXVBF16GER2NN -defm XVBF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 50, (ins vsrc:$XA, vsrc:$XB), - "xvbf16ger2", "$AT, $XA, $XB">; - -// XVI4GER8, XVI4GER8PP, PMXVI4GER8, PMXVI4GER8PP -defm XVI4GER8 : ACC_UM_M844_XOEO<59, 34, (ins vsrc:$XA, vsrc:$XB), - "xvi4ger8", "$AT, $XA, $XB">; - -// XVI8GER4, XVI8GER4PP, PMXVI8GER4, PMXVI8GER4PP -defm XVI8GER4 : ACC_UM_M444_XOEO<59, 2, (ins vsrc:$XA, vsrc:$XB), - "xvi8ger4", "$AT, $XA, $XB">; - -// XVI16GER2, XVI16GER2PP, PMXVI16GER2, PMXVI16GER2PP -defm XVI16GER2 : ACC_UM_M244_XO46<59, 75, (ins vsrc:$XA, vsrc:$XB), - "xvi16ger2", "$AT, $XA, $XB">; - -// XVI16GER2S, XVI16GER2SPP, PMXVI16GER2S, PMXVI16GER2SPP -defm XVI16GER2S : ACC_UM_M244_XOEO<59, 42, (ins vsrc:$XA, vsrc:$XB), - "xvi16ger2s", "$AT, $XA, $XB">; - -// XVF16GER2, XVF16GER2PP, XVF16GER2PN, XVF16GER2NP, XVF16GER2NN -// PMXVF16GER2, PMXVF16GER2PP, PMXVF16GER2PN, PMXVF16GER2NP, PMXVF16GER2NN -defm XVF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 18, (ins vsrc:$XA, vsrc:$XB), - "xvf16ger2", "$AT, $XA, $XB">; - -// XVF32GER, XVF32GERPP, XVF32GERPN, XVF32GERNP, XVF32GERPP -// PMXVF32GER, PMXVF32GERPP, PMXVF32GERPN, PMXVF32GERNP, PMXVF32GERPP -defm XVF32GER : ACC_NEG_UM_M44_XOM84C<59, 26, (ins vsrc:$XA, vsrc:$XB), - "xvf32ger", "$AT, $XA, $XB">; - -// XVF64GER, XVF64GERPP, XVF64GERPN, XVF64GERNP, XVF64GERNN -// PMXVF64GER, PMXVF64GERPP, PMXVF64GERPN, PMXVF64GERNP, PMXVF64GERNN -defm XVF64GER : ACC_NEG_UM_M42_XOM84C<59, 58, (ins vsrpevenrc:$XA, vsrc:$XB), - "xvf64ger", "$AT, $XA, $XB">; -//------------------------------------------------------------------------------ - -// MMA Intrinsics -let Predicates = [MMA] in { - def : Pat<(v512i1 (int_ppc_mma_xvi4ger8 v16i8:$XA, v16i8:$XB)), - (XVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - - def : Pat<(v512i1 (int_ppc_mma_xvi8ger4 v16i8:$XA, v16i8:$XB)), - (XVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - - def : Pat<(v512i1 (int_ppc_mma_xvi16ger2s v16i8:$XA, v16i8:$XB)), - (XVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - - def : Pat<(v512i1 (int_ppc_mma_xvf16ger2 v16i8:$XA, v16i8:$XB)), - (XVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - - def : Pat<(v512i1 (int_ppc_mma_xvf32ger v16i8:$XA, v16i8:$XB)), - (XVF32GER RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf64ger v256i1:$XA, v16i8:$XB)), - (XVF64GER $XA, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB)), - (XVF64GERPP $ATi, $XA, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB)), - (XVF64GERPN $ATi, $XA, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB)), - (XVF64GERNP $ATi, $XA, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB)), - (XVF64GERNN $ATi, $XA, RCCp.BToVSRC)>; - - def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2 v16i8:$XA, v16i8:$XB)), - (XVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvi16ger2 v16i8:$XA, v16i8:$XB)), - (XVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; - def : Pat<(v512i1 (int_ppc_mma_xvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)), - (XVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>; -} - -// MMA Intrinsics -let Predicates = [MMA, PrefixInstrs] in { - def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk8Imm:$PMSK)), - (PMXVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk8Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk8Imm:$PMSK)), - (PMXVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk8Imm:$PMSK)>; - - def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk4Imm:$PMSK)), - (PMXVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk4Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk4Imm:$PMSK)), - (PMXVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk4Imm:$PMSK)>; - - def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2s v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)), - (PMXVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)), - (PMXVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - - def : Pat<(v512i1 (int_ppc_mma_pmxvf32ger v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk4Imm:$YMSK)), - (PMXVF32GER RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK)), - (PMXVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK)), - (PMXVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK)), - (PMXVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK)), - (PMXVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK)>; - - def : Pat<(v512i1 (int_ppc_mma_pmxvf64ger v256i1:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk2Imm:$YMSK)), - (PMXVF64GER $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, Msk2Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk2Imm:$YMSK)), - (PMXVF64GERPP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk2Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk2Imm:$YMSK)), - (PMXVF64GERPN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk2Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk2Imm:$YMSK)), - (PMXVF64GERNP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk2Imm:$YMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk2Imm:$YMSK)), - (PMXVF64GERNN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk2Imm:$YMSK)>; - - def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)), - (PMXVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)), - (PMXVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; - def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB, - Msk4Imm:$XMSK, Msk4Imm:$YMSK, - Msk2Imm:$PMSK)), - (PMXVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK, - Msk4Imm:$YMSK, Msk2Imm:$PMSK)>; -} - def Concats { dag VecsToVecPair0 = (v256i1 (INSERT_SUBREG @@ -1653,37 +1062,6 @@ def Concats { (v256i1 (INSERT_SUBREG (INSERT_SUBREG (IMPLICIT_DEF), $vs2, sub_vsx1), $vs3, sub_vsx0)); - dag VecsToVecQuad = - (BUILD_UACC (INSERT_SUBREG - (INSERT_SUBREG (v512i1 (IMPLICIT_DEF)), - (KILL_PAIR VecsToVecPair0), sub_pair0), - (KILL_PAIR VecsToVecPair1), sub_pair1)); -} - -def Extracts { - dag Pair0 = (v256i1 (EXTRACT_SUBREG $v, sub_pair0)); - dag Pair1 = (v256i1 (EXTRACT_SUBREG $v, sub_pair1)); - dag Vec0 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx0)); - dag Vec1 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx1)); - dag Vec2 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx0)); - dag Vec3 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx1)); -} - -let Predicates = [MMA] in { - def : Pat<(v512i1 (PPCAccBuild v4i32:$vs1, v4i32:$vs0, v4i32:$vs3, v4i32:$vs2)), - (XXMTACC Concats.VecsToVecQuad)>; - def : Pat<(v512i1 (int_ppc_mma_assemble_acc v16i8:$vs1, v16i8:$vs0, - v16i8:$vs3, v16i8:$vs2)), - (XXMTACC Concats.VecsToVecQuad)>; - def : Pat<(v512i1 (PPCxxmfacc v512i1:$AS)), (XXMFACC acc:$AS)>; - def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 0)), - Extracts.Vec0>; - def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 1)), - Extracts.Vec1>; - def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 2)), - Extracts.Vec2>; - def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 3)), - Extracts.Vec3>; } let Predicates = [PairedVectorMemops] in { diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td index 044035e0ef29..f50dd10ab7ca 100644 --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -18,8 +18,6 @@ def sub_32 : SubRegIndex<32>; def sub_64 : SubRegIndex<64>; def sub_vsx0 : SubRegIndex<128>; def sub_vsx1 : SubRegIndex<128, 128>; -def sub_pair0 : SubRegIndex<256>; -def sub_pair1 : SubRegIndex<256, 256>; def sub_gp8_x0 : SubRegIndex<64>; def sub_gp8_x1 : SubRegIndex<64, 64>; } @@ -100,21 +98,6 @@ class CRBIT num, string n> : PPCReg { let HWEncoding{4-0} = num; } -// ACC - One of the 8 512-bit VSX accumulators. -class ACC num, string n, list subregs> : PPCReg { - let HWEncoding{2-0} = num; - let SubRegs = subregs; -} - -// UACC - One of the 8 512-bit VSX accumulators prior to being primed. -// Without using this register class, the register allocator has no way to -// differentiate a primed accumulator from an unprimed accumulator. -// This may result in invalid copies between primed and unprimed accumulators. -class UACC num, string n, list subregs> : PPCReg { - let HWEncoding{2-0} = num; - let SubRegs = subregs; -} - // VSR Pairs - One of the 32 paired even-odd consecutive VSRs. class VSRPair num, string n, list subregs> : PPCReg { let HWEncoding{4-0} = num; @@ -272,9 +255,6 @@ def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66, -2]>; def VRSAVE: SPR<256, "vrsave">, DwarfRegNum<[109]>; // SPE extra registers -// SPE Accumulator for multiply-accumulate SPE operations. Never directly -// accessed, so there's no real encoding for it. -def SPEACC: DwarfRegNum<[99, 111]>; def SPEFSCR: SPR<512, "spefscr">, DwarfRegNum<[612, 112]>; def XER: SPR<1, "xer">, DwarfRegNum<[76]>; @@ -448,72 +428,6 @@ def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY, XER)> { let CopyCost = -1; } -let SubRegIndices = [sub_pair0, sub_pair1] in { - def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>; - def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>; - def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>; - def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>; - def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>; - def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>; - def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>; - def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>; -} -def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3, - ACC4, ACC5, ACC6, ACC7)> { - // The AllocationPriority is in the range [0, 63]. Assigned the ACC registers - // the highest possible priority in this range to force the register allocator - // to assign these registers first. This is done because the ACC registers - // must represent 4 advacent vector registers. For example ACC1 must be - // VS4 - VS7. The value here must be at least 32 as we want to allocate - // these registers even before we allocate global ranges. - let AllocationPriority = 63; - let Size = 512; -} - -let SubRegIndices = [sub_pair0, sub_pair1] in { - def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>; - def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>; - def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>; - def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>; - def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>; - def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>; - def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>; - def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>; -} -def UACCRC : RegisterClass<"PPC", [v512i1], 128, - (add UACC0, UACC1, UACC2, UACC3, - UACC4, UACC5, UACC6, UACC7)> { - // The AllocationPriority for the UACC registers is still high and must be at - // least 32 as we want to allocate these registers before we allocate other - // global ranges. The value must be less than the AllocationPriority of the - // ACC registers. - let AllocationPriority = 36; - let Size = 512; -} - -// FIXME: This allocation order may increase stack frame size when allocating -// non-volatile registers. -// -// Placing Altivec registers first and allocate the rest as underlying VSX -// ones, to reduce interference with accumulator registers (lower 32 VSRs). -// This reduces copies when loading for accumulators, which is common use for -// paired VSX registers. -def VSRpRC : - RegisterClass<"PPC", [v256i1], 128, - (add VSRp17, VSRp18, VSRp16, VSRp19, VSRp20, VSRp21, - VSRp22, VSRp23, VSRp24, VSRp25, VSRp31, VSRp30, - VSRp29, VSRp28, VSRp27, VSRp26, - (sequence "VSRp%u", 0, 6), - (sequence "VSRp%u", 15, 7))> { - // Give the VSRp registers a non-zero AllocationPriority. The value is less - // than 32 as these registers should not always be allocated before global - // ranges and the value should be less than the AllocationPriority - 32 for - // the UACC registers. Even global VSRp registers should be allocated after - // the UACC registers have been chosen. - let AllocationPriority = 2; - let Size = 256; -} - // Make AllocationOrder as similar as G8RC's to avoid potential spilling. // Similarly, we have an AltOrder for 64-bit ELF ABI which r2 is allocated // at last. @@ -528,3 +442,5 @@ def G8pRC : }]; let Size = 128; } + +include "PPCRegisterInfoMMA.td" diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfoMMA.td b/llvm/lib/Target/PowerPC/PPCRegisterInfoMMA.td new file mode 100644 index 000000000000..0b6305f95a0a --- /dev/null +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfoMMA.td @@ -0,0 +1,106 @@ +//===-- PPCRegisterInfoMMA.td - The PowerPC Register File --*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Register info for registers related to MMA. These are the ACC and UACC +// registers. +// +//===----------------------------------------------------------------------===// + +let Namespace = "PPC" in { +def sub_pair0 : SubRegIndex<256>; +def sub_pair1 : SubRegIndex<256, 256>; +} + +// ACC - One of the 8 512-bit VSX accumulators. +class ACC num, string n, list subregs> : PPCReg { + let HWEncoding{2-0} = num; + let SubRegs = subregs; +} + +// UACC - One of the 8 512-bit VSX accumulators prior to being primed. +// Without using this register class, the register allocator has no way to +// differentiate a primed accumulator from an unprimed accumulator. +// This may result in invalid copies between primed and unprimed accumulators. +class UACC num, string n, list subregs> : PPCReg { + let HWEncoding{2-0} = num; + let SubRegs = subregs; +} + +// SPE Accumulator for multiply-accumulate SPE operations. Never directly +// accessed, so there's no real encoding for it. +def SPEACC: DwarfRegNum<[99, 111]>; + +let SubRegIndices = [sub_pair0, sub_pair1] in { + def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>; + def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>; + def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>; + def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>; + def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>; + def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>; + def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>; + def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>; +} +def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3, + ACC4, ACC5, ACC6, ACC7)> { + // The AllocationPriority is in the range [0, 63]. Assigned the ACC registers + // the highest possible priority in this range to force the register allocator + // to assign these registers first. This is done because the ACC registers + // must represent 4 advacent vector registers. For example ACC1 must be + // VS4 - VS7. The value here must be at least 32 as we want to allocate + // these registers even before we allocate global ranges. + let AllocationPriority = 63; + let Size = 512; +} + +let SubRegIndices = [sub_pair0, sub_pair1] in { + def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>; + def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>; + def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>; + def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>; + def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>; + def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>; + def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>; + def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>; +} +def UACCRC : RegisterClass<"PPC", [v512i1], 128, + (add UACC0, UACC1, UACC2, UACC3, + UACC4, UACC5, UACC6, UACC7)> { + // The AllocationPriority for the UACC registers is still high and must be at + // least 32 as we want to allocate these registers before we allocate other + // global ranges. The value must be less than the AllocationPriority of the + // ACC registers. + let AllocationPriority = 36; + let Size = 512; +} + +// FIXME: This allocation order may increase stack frame size when allocating +// non-volatile registers. +// +// Placing Altivec registers first and allocate the rest as underlying VSX +// ones, to reduce interference with accumulator registers (lower 32 VSRs). +// This reduces copies when loading for accumulators, which is common use for +// paired VSX registers. +def VSRpRC : + RegisterClass<"PPC", [v256i1], 128, + (add VSRp17, VSRp18, VSRp16, VSRp19, VSRp20, VSRp21, + VSRp22, VSRp23, VSRp24, VSRp25, VSRp31, VSRp30, + VSRp29, VSRp28, VSRp27, VSRp26, + (sequence "VSRp%u", 0, 6), + (sequence "VSRp%u", 15, 7))> { + // Give the VSRp registers a non-zero AllocationPriority. The value is less + // than 32 as these registers should not always be allocated before global + // ranges and the value should be less than the AllocationPriority - 32 for + // the UACC registers. Even global VSRp registers should be allocated after + // the UACC registers have been chosen. + let AllocationPriority = 2; + let Size = 256; +} + + + +