[PowerPC][NFC] Split out the MMA instructions from the P10 instructions.

Currently all of the MMA instructions as well as the MMA related register info
is bundled with the Power 10 instructions. This patch just splits them out.

Reviewed By: lei

Differential Revision: https://reviews.llvm.org/D120515
This commit is contained in:
Stefan Pintilie 2022-02-25 11:16:04 -06:00
parent bf60a1c546
commit 0625aed2fc
5 changed files with 747 additions and 708 deletions

View File

@ -3749,6 +3749,7 @@ def : Pat<(not i1:$in),
// Prefixed instructions may require access to the above defs at a later // Prefixed instructions may require access to the above defs at a later
// time so we include this after the def. // time so we include this after the def.
include "PPCInstrP10.td" include "PPCInstrP10.td"
include "PPCInstrMMA.td"
// Patterns for arithmetic i1 operations. // Patterns for arithmetic i1 operations.
def : Pat<(add i1:$a, i1:$b), def : Pat<(add i1:$a, i1:$b),

View File

@ -0,0 +1,638 @@
// Mask immediates for MMA instructions (2, 4 and 8 bits).
def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>;
def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>;
def Msk8Imm : ImmLeaf<i32, [{ return isUInt<8>(Imm); }]>;
def MMA : Predicate<"Subtarget->hasMMA()">;
// Multiclass definitions for MMA accumulator instructions.
// ----------------------------------------------------------------------------
// Defines 2 unmasked instructions where the xo field for acc/non-acc version
// is even/odd.
multiclass ACC_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
string asmstr> {
let Predicates = [MMA] in {
def NAME :
XX3Form_AT3_XAB6<opcode, !or(xo, 0x01), (outs acc:$AT), IOL,
!strconcat(asmbase#" ", asmstr), IIC_VecFP, []>,
RegConstraint<"@earlyclobber $AT">;
def PP :
XX3Form_AT3_XAB6<opcode, xo, (outs acc:$AT), !con((ins acc:$ATi), IOL),
!strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
}
// Defines 4 instructions, masked/unmasked with masks 8, 4, 4 bits.
// The XO field for acc/non-acc version is even/odd.
multiclass ACC_UM_M844_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
string asmstr> {
defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
let Predicates = [MMA, PrefixInstrs] in {
def PM#NAME :
MMIRR_XX3Form_XY4P8_XAB6<
opcode, !or(xo, 0x01), (outs acc:$AT),
!con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK)),
!strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"@earlyclobber $AT">;
def PM#NAME#PP :
MMIRR_XX3Form_XY4P8_XAB6<
opcode, xo, (outs acc:$AT),
!con((ins acc:$ATi),
!con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK))),
!strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
}
// Defines 4 instructions, masked/unmasked with masks 4, 4, 4 bits.
// The XO field for acc/non-acc version is even/odd.
multiclass ACC_UM_M444_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
string asmstr> {
defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
let Predicates = [MMA, PrefixInstrs] in {
def PM#NAME :
MMIRR_XX3Form_XYP4_XAB6<
opcode, !or(xo, 0x01), (outs acc:$AT),
!con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK)),
!strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"@earlyclobber $AT">;
def PM#NAME#PP :
MMIRR_XX3Form_XYP4_XAB6<
opcode, xo, (outs acc:$AT),
!con((ins acc:$ATi),
!con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))),
!strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
}
// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits.
// The XO field for acc/non-acc version is even/odd.
multiclass ACC_UM_M244_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
string asmstr> {
defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
let Predicates = [MMA, PrefixInstrs] in {
def PM#NAME :
MMIRR_XX3Form_XY4P2_XAB6<
opcode, !or(xo, 0x01), (outs acc:$AT),
!con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)),
!strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"@earlyclobber $AT">;
def PM#NAME#PP :
MMIRR_XX3Form_XY4P2_XAB6<
opcode, xo, (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
!strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
}
// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits.
// Upper nibble of XO field for acc/non-acc version is 0x4/0x6.
multiclass ACC_UM_M244_XO46<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
string asmstr> {
let Predicates = [MMA] in {
def NAME :
XX3Form_AT3_XAB6<opcode, xo, (outs acc:$AT), IOL,
!strconcat(asmbase#" ", asmstr), IIC_VecFP, []>,
RegConstraint<"@earlyclobber $AT">;
def PP :
XX3Form_AT3_XAB6<
opcode, !or(xo, 0x20), (outs acc:$AT), !con((ins acc:$ATi), IOL),
!strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
let Predicates = [MMA, PrefixInstrs] in {
def PM#NAME :
MMIRR_XX3Form_XY4P2_XAB6<
opcode, xo, (outs acc:$AT),
!con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)),
!strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"@earlyclobber $AT">;
def PM#NAME#PP :
MMIRR_XX3Form_XY4P2_XAB6<
opcode, !or(xo, 0x20), (outs acc:$AT),
!con((ins acc:$ATi),
!con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
!strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
}
// Defines 10 instructions, operand negating, unmasked, masked with 2, 4, 4
// bits. Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
multiclass ACC_NEG_UM_M244_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
string asmbase, string asmstr> {
defm NAME : ACC_UM_M244_XOEO<opcode, xo, IOL, asmbase, asmstr>;
let Predicates = [MMA] in {
def PN : XX3Form_AT3_XAB6<
opcode, !or(xo, 0x80), (outs acc:$AT), !con((ins acc:$ATi), IOL),
!strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def NP : XX3Form_AT3_XAB6<
opcode, !or(xo, 0x40), (outs acc:$AT), !con((ins acc:$ATi), IOL),
!strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def NN : XX3Form_AT3_XAB6<
opcode, !or(xo, 0xC0), (outs acc:$AT), !con((ins acc:$ATi), IOL),
!strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
let Predicates = [MMA, PrefixInstrs] in {
def PM#NAME#PN :
MMIRR_XX3Form_XY4P2_XAB6<
opcode, !or(xo, 0x80), (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
!strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def PM#NAME#NP :
MMIRR_XX3Form_XY4P2_XAB6<
opcode, !or(xo, 0x40), (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
!strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def PM#NAME#NN :
MMIRR_XX3Form_XY4P2_XAB6<
opcode, !or(xo, 0xC0), (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
!strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
}
// Defines 5 instructions, unmasked, operand negating.
// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
multiclass ACC_NEG_UM_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
string asmbase, string asmstr> {
defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
let Predicates = [MMA] in {
def PN : XX3Form_AT3_XAB6<opcode, !or(xo, 0x80), (outs acc:$AT),
!con((ins acc:$ATi), IOL),
!strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def NP : XX3Form_AT3_XAB6<opcode, !or(xo, 0x40), (outs acc:$AT),
!con((ins acc:$ATi), IOL),
!strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def NN : XX3Form_AT3_XAB6<opcode, !or(xo, 0xC0), (outs acc:$AT),
!con((ins acc:$ATi), IOL),
!strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
}
// Defines 10 instructions, operand negating, unmasked, masked with 4, 4 bits.
// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
multiclass ACC_NEG_UM_M44_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
string asmbase, string asmstr> {
defm NAME : ACC_NEG_UM_XOM84C<opcode, xo, IOL, asmbase, asmstr>;
let Predicates = [MMA, PrefixInstrs] in {
def PM#NAME :
MMIRR_XX3Form_XY4_XAB6<
opcode, !or(xo, 0x01), (outs acc:$AT),
!con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK)),
!strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"),
IIC_VecFP, []>,
RegConstraint<"@earlyclobber $AT">;
def PM#NAME#PP :
MMIRR_XX3Form_XY4_XAB6<
opcode, xo, (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
!strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def PM#NAME#PN :
MMIRR_XX3Form_XY4_XAB6<
opcode, !or(xo, 0x80), (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
!strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def PM#NAME#NP :
MMIRR_XX3Form_XY4_XAB6<
opcode, !or(xo, 0x40), (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
!strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def PM#NAME#NN :
MMIRR_XX3Form_XY4_XAB6<
opcode, !or(xo, 0xC0), (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
!strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
}
// Defines 10 instructions, operand negating, unmasked, masked with 4, 2 bits.
// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
multiclass ACC_NEG_UM_M42_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
string asmbase, string asmstr> {
defm NAME : ACC_NEG_UM_XOM84C<opcode, xo, IOL, asmbase, asmstr>;
let Predicates = [MMA, PrefixInstrs] in {
def PM#NAME :
MMIRR_XX3Form_X4Y2_XAB6<
opcode, !or(xo, 0x01), (outs acc:$AT),
!con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK)),
!strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"),
IIC_VecFP, []>,
RegConstraint<"@earlyclobber $AT">;
def PM#NAME#PP :
MMIRR_XX3Form_X4Y2_XAB6<
opcode, xo, (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
!strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def PM#NAME#PN :
MMIRR_XX3Form_X4Y2_XAB6<
opcode, !or(xo, 0x80), (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
!strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def PM#NAME#NP :
MMIRR_XX3Form_X4Y2_XAB6<
opcode, !or(xo, 0x40), (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
!strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def PM#NAME#NN :
MMIRR_XX3Form_X4Y2_XAB6<
opcode, !or(xo, 0xC0), (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
!strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
}
// End of class definitions.
//-----------------------------------------------------------------------------
let Predicates = [MMA] in {
def XXMFACC :
XForm_AT3<31, 0, 177, (outs acc:$ASo), (ins acc:$AS), "xxmfacc $AS",
IIC_VecGeneral,
[(set v512i1:$ASo, (int_ppc_mma_xxmfacc v512i1:$AS))]>,
RegConstraint<"$ASo = $AS">, NoEncode<"$ASo">;
def XXMTACC :
XForm_AT3<31, 1, 177, (outs acc:$AT), (ins acc:$ATi), "xxmtacc $AT",
IIC_VecGeneral,
[(set v512i1:$AT, (int_ppc_mma_xxmtacc v512i1:$ATi))]>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def KILL_PAIR : PPCPostRAExpPseudo<(outs vsrprc:$XTp), (ins vsrprc:$XSp),
"#KILL_PAIR", []>,
RegConstraint<"$XTp = $XSp">;
def BUILD_UACC : PPCPostRAExpPseudo<(outs acc:$AT), (ins uacc:$AS),
"#BUILD_UACC $AT, $AS", []>;
// We define XXSETACCZ as rematerializable to undo CSE of that intrinsic in
// the backend. We avoid CSE here because it generates a copy of the acc
// register and this copy is more expensive than calling the intrinsic again.
let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
def XXSETACCZ :
XForm_AT3<31, 3, 177, (outs acc:$AT), (ins), "xxsetaccz $AT", IIC_VecGeneral,
[(set v512i1:$AT, (int_ppc_mma_xxsetaccz))]>;
}
def XVI8GER4SPP :
XX3Form_AT3_XAB6<59, 99, (outs acc:$AT), (ins acc:$ATi, vsrc:$XA, vsrc:$XB),
"xvi8ger4spp $AT, $XA, $XB", IIC_VecGeneral, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
let mayStore = 1 in {
def SPILL_ACC: PPCEmitTimePseudo<(outs), (ins acc:$AT, memrix16:$dst),
"#SPILL_ACC", []>;
def SPILL_UACC: PPCEmitTimePseudo<(outs), (ins uacc:$AT, memrix16:$dst),
"#SPILL_UACC", []>;
}
let mayLoad = 1, hasSideEffects = 0 in {
def RESTORE_ACC: PPCEmitTimePseudo<(outs acc:$AT), (ins memrix16:$src),
"#RESTORE_ACC", []>;
def RESTORE_UACC: PPCEmitTimePseudo<(outs uacc:$AT), (ins memrix16:$src),
"#RESTORE_UACC", []>;
}
}
let Predicates = [MMA, PrefixInstrs] in {
def PMXVI8GER4SPP :
MMIRR_XX3Form_XYP4_XAB6<59, 99, (outs acc:$AT),
(ins acc:$ATi, vsrc:$XA,vsrc:$XB, u4imm:$XMSK,
u4imm:$YMSK, u4imm:$PMSK),
"pmxvi8ger4spp $AT, $XA, $XB, $XMSK, $YMSK, $PMSK",
IIC_VecGeneral, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
// MMA accumulating/non-accumulating instructions.
//------------------------------------------------------------------------------
// XVBF16GER2, XVBF16GER2PP, XVBF16GER2PN, XVBF16GER2NP, XVBF16GER2NN
// PMXVBF16GER2, PMXVBF16GER2PP, PMXVBF16GER2PN, PMXVBF16GER2NP, PMXVBF16GER2NN
defm XVBF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 50, (ins vsrc:$XA, vsrc:$XB),
"xvbf16ger2", "$AT, $XA, $XB">;
// XVI4GER8, XVI4GER8PP, PMXVI4GER8, PMXVI4GER8PP
defm XVI4GER8 : ACC_UM_M844_XOEO<59, 34, (ins vsrc:$XA, vsrc:$XB),
"xvi4ger8", "$AT, $XA, $XB">;
// XVI8GER4, XVI8GER4PP, PMXVI8GER4, PMXVI8GER4PP
defm XVI8GER4 : ACC_UM_M444_XOEO<59, 2, (ins vsrc:$XA, vsrc:$XB),
"xvi8ger4", "$AT, $XA, $XB">;
// XVI16GER2, XVI16GER2PP, PMXVI16GER2, PMXVI16GER2PP
defm XVI16GER2 : ACC_UM_M244_XO46<59, 75, (ins vsrc:$XA, vsrc:$XB),
"xvi16ger2", "$AT, $XA, $XB">;
// XVI16GER2S, XVI16GER2SPP, PMXVI16GER2S, PMXVI16GER2SPP
defm XVI16GER2S : ACC_UM_M244_XOEO<59, 42, (ins vsrc:$XA, vsrc:$XB),
"xvi16ger2s", "$AT, $XA, $XB">;
// XVF16GER2, XVF16GER2PP, XVF16GER2PN, XVF16GER2NP, XVF16GER2NN
// PMXVF16GER2, PMXVF16GER2PP, PMXVF16GER2PN, PMXVF16GER2NP, PMXVF16GER2NN
defm XVF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 18, (ins vsrc:$XA, vsrc:$XB),
"xvf16ger2", "$AT, $XA, $XB">;
// XVF32GER, XVF32GERPP, XVF32GERPN, XVF32GERNP, XVF32GERPP
// PMXVF32GER, PMXVF32GERPP, PMXVF32GERPN, PMXVF32GERNP, PMXVF32GERPP
defm XVF32GER : ACC_NEG_UM_M44_XOM84C<59, 26, (ins vsrc:$XA, vsrc:$XB),
"xvf32ger", "$AT, $XA, $XB">;
// XVF64GER, XVF64GERPP, XVF64GERPN, XVF64GERNP, XVF64GERNN
// PMXVF64GER, PMXVF64GERPP, PMXVF64GERPN, PMXVF64GERNP, PMXVF64GERNN
defm XVF64GER : ACC_NEG_UM_M42_XOM84C<59, 58, (ins vsrpevenrc:$XA, vsrc:$XB),
"xvf64ger", "$AT, $XA, $XB">;
//------------------------------------------------------------------------------
// MMA Intrinsics
let Predicates = [MMA] in {
def : Pat<(v512i1 (int_ppc_mma_xvi4ger8 v16i8:$XA, v16i8:$XB)),
(XVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvi8ger4 v16i8:$XA, v16i8:$XB)),
(XVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvi16ger2s v16i8:$XA, v16i8:$XB)),
(XVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2 v16i8:$XA, v16i8:$XB)),
(XVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf32ger v16i8:$XA, v16i8:$XB)),
(XVF32GER RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf64ger v256i1:$XA, v16i8:$XB)),
(XVF64GER $XA, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
(XVF64GERPP $ATi, $XA, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
(XVF64GERPN $ATi, $XA, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
(XVF64GERNP $ATi, $XA, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
(XVF64GERNN $ATi, $XA, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2 v16i8:$XA, v16i8:$XB)),
(XVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvi16ger2 v16i8:$XA, v16i8:$XB)),
(XVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
}
// MMA Intrinsics
let Predicates = [MMA, PrefixInstrs] in {
def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk8Imm:$PMSK)),
(PMXVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk8Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk8Imm:$PMSK)),
(PMXVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk8Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk4Imm:$PMSK)),
(PMXVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk4Imm:$PMSK)),
(PMXVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2s v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
(PMXVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
(PMXVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf32ger v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk4Imm:$YMSK)),
(PMXVF32GER RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
(PMXVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
(PMXVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
(PMXVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
(PMXVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf64ger v256i1:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk2Imm:$YMSK)),
(PMXVF64GER $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, Msk2Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
(PMXVF64GERPP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk2Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
(PMXVF64GERPN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk2Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
(PMXVF64GERNP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk2Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
(PMXVF64GERNN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk2Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
(PMXVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
(PMXVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
}
def ConcatsMMA {
dag VecsToVecPair0 =
(v256i1 (INSERT_SUBREG
(INSERT_SUBREG (IMPLICIT_DEF), $vs0, sub_vsx1),
$vs1, sub_vsx0));
dag VecsToVecPair1 =
(v256i1 (INSERT_SUBREG
(INSERT_SUBREG (IMPLICIT_DEF), $vs2, sub_vsx1),
$vs3, sub_vsx0));
dag VecsToVecQuad =
(BUILD_UACC (INSERT_SUBREG
(INSERT_SUBREG (v512i1 (IMPLICIT_DEF)),
(KILL_PAIR VecsToVecPair0), sub_pair0),
(KILL_PAIR VecsToVecPair1), sub_pair1));
}
def Extracts {
dag Pair0 = (v256i1 (EXTRACT_SUBREG $v, sub_pair0));
dag Pair1 = (v256i1 (EXTRACT_SUBREG $v, sub_pair1));
dag Vec0 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx0));
dag Vec1 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx1));
dag Vec2 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx0));
dag Vec3 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx1));
}
let Predicates = [MMA] in {
def : Pat<(v512i1 (PPCAccBuild v4i32:$vs1, v4i32:$vs0, v4i32:$vs3, v4i32:$vs2)),
(XXMTACC ConcatsMMA.VecsToVecQuad)>;
def : Pat<(v512i1 (int_ppc_mma_assemble_acc v16i8:$vs1, v16i8:$vs0,
v16i8:$vs3, v16i8:$vs2)),
(XXMTACC ConcatsMMA.VecsToVecQuad)>;
def : Pat<(v512i1 (PPCxxmfacc v512i1:$AS)), (XXMFACC acc:$AS)>;
def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 0)),
Extracts.Vec0>;
def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 1)),
Extracts.Vec1>;
def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 2)),
Extracts.Vec2>;
def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 3)),
Extracts.Vec3>;
}

View File

@ -54,10 +54,6 @@
//-------------------------- Predicate definitions ---------------------------// //-------------------------- Predicate definitions ---------------------------//
def IsPPC32 : Predicate<"!Subtarget->isPPC64()">; def IsPPC32 : Predicate<"!Subtarget->isPPC64()">;
// Mask immediates for MMA instructions (2, 4 and 8 bits).
def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>;
def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>;
def Msk8Imm : ImmLeaf<i32, [{ return isUInt<8>(Imm); }]>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// PowerPC ISA 3.1 specific type constraints. // PowerPC ISA 3.1 specific type constraints.
@ -906,7 +902,6 @@ class MMIRR_XX3Form_XYP4_XAB6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
def PrefixInstrs : Predicate<"Subtarget->hasPrefixInstrs()">; def PrefixInstrs : Predicate<"Subtarget->hasPrefixInstrs()">;
def IsISA3_1 : Predicate<"Subtarget->isISA3_1()">; def IsISA3_1 : Predicate<"Subtarget->isISA3_1()">;
def PairedVectorMemops : Predicate<"Subtarget->pairedVectorMemops()">; def PairedVectorMemops : Predicate<"Subtarget->pairedVectorMemops()">;
def MMA : Predicate<"Subtarget->hasMMA()">;
def RCCp { def RCCp {
dag AToVSRC = (COPY_TO_REGCLASS $XA, VSRC); dag AToVSRC = (COPY_TO_REGCLASS $XA, VSRC);
@ -1058,592 +1053,6 @@ let Predicates = [PrefixInstrs] in {
} }
} }
// Multiclass definitions for MMA accumulator instructions.
// ----------------------------------------------------------------------------
// Defines 2 unmasked instructions where the xo field for acc/non-acc version
// is even/odd.
multiclass ACC_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
string asmstr> {
let Predicates = [MMA] in {
def NAME :
XX3Form_AT3_XAB6<opcode, !or(xo, 0x01), (outs acc:$AT), IOL,
!strconcat(asmbase#" ", asmstr), IIC_VecFP, []>,
RegConstraint<"@earlyclobber $AT">;
def PP :
XX3Form_AT3_XAB6<opcode, xo, (outs acc:$AT), !con((ins acc:$ATi), IOL),
!strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
}
// Defines 4 instructions, masked/unmasked with masks 8, 4, 4 bits.
// The XO field for acc/non-acc version is even/odd.
multiclass ACC_UM_M844_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
string asmstr> {
defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
let Predicates = [MMA, PrefixInstrs] in {
def PM#NAME :
MMIRR_XX3Form_XY4P8_XAB6<
opcode, !or(xo, 0x01), (outs acc:$AT),
!con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK)),
!strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"@earlyclobber $AT">;
def PM#NAME#PP :
MMIRR_XX3Form_XY4P8_XAB6<
opcode, xo, (outs acc:$AT),
!con((ins acc:$ATi),
!con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK))),
!strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
}
// Defines 4 instructions, masked/unmasked with masks 4, 4, 4 bits.
// The XO field for acc/non-acc version is even/odd.
multiclass ACC_UM_M444_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
string asmstr> {
defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
let Predicates = [MMA, PrefixInstrs] in {
def PM#NAME :
MMIRR_XX3Form_XYP4_XAB6<
opcode, !or(xo, 0x01), (outs acc:$AT),
!con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK)),
!strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"@earlyclobber $AT">;
def PM#NAME#PP :
MMIRR_XX3Form_XYP4_XAB6<
opcode, xo, (outs acc:$AT),
!con((ins acc:$ATi),
!con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))),
!strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
}
// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits.
// The XO field for acc/non-acc version is even/odd.
multiclass ACC_UM_M244_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
string asmstr> {
defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
let Predicates = [MMA, PrefixInstrs] in {
def PM#NAME :
MMIRR_XX3Form_XY4P2_XAB6<
opcode, !or(xo, 0x01), (outs acc:$AT),
!con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)),
!strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"@earlyclobber $AT">;
def PM#NAME#PP :
MMIRR_XX3Form_XY4P2_XAB6<
opcode, xo, (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
!strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
}
// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits.
// Upper nibble of XO field for acc/non-acc version is 0x4/0x6.
multiclass ACC_UM_M244_XO46<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
string asmstr> {
let Predicates = [MMA] in {
def NAME :
XX3Form_AT3_XAB6<opcode, xo, (outs acc:$AT), IOL,
!strconcat(asmbase#" ", asmstr), IIC_VecFP, []>,
RegConstraint<"@earlyclobber $AT">;
def PP :
XX3Form_AT3_XAB6<
opcode, !or(xo, 0x20), (outs acc:$AT), !con((ins acc:$ATi), IOL),
!strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
let Predicates = [MMA, PrefixInstrs] in {
def PM#NAME :
MMIRR_XX3Form_XY4P2_XAB6<
opcode, xo, (outs acc:$AT),
!con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)),
!strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"@earlyclobber $AT">;
def PM#NAME#PP :
MMIRR_XX3Form_XY4P2_XAB6<
opcode, !or(xo, 0x20), (outs acc:$AT),
!con((ins acc:$ATi),
!con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
!strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
}
// Defines 10 instructions, operand negating, unmasked, masked with 2, 4, 4
// bits. Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
multiclass ACC_NEG_UM_M244_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
string asmbase, string asmstr> {
defm NAME : ACC_UM_M244_XOEO<opcode, xo, IOL, asmbase, asmstr>;
let Predicates = [MMA] in {
def PN : XX3Form_AT3_XAB6<
opcode, !or(xo, 0x80), (outs acc:$AT), !con((ins acc:$ATi), IOL),
!strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def NP : XX3Form_AT3_XAB6<
opcode, !or(xo, 0x40), (outs acc:$AT), !con((ins acc:$ATi), IOL),
!strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def NN : XX3Form_AT3_XAB6<
opcode, !or(xo, 0xC0), (outs acc:$AT), !con((ins acc:$ATi), IOL),
!strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
let Predicates = [MMA, PrefixInstrs] in {
def PM#NAME#PN :
MMIRR_XX3Form_XY4P2_XAB6<
opcode, !or(xo, 0x80), (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
!strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def PM#NAME#NP :
MMIRR_XX3Form_XY4P2_XAB6<
opcode, !or(xo, 0x40), (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
!strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def PM#NAME#NN :
MMIRR_XX3Form_XY4P2_XAB6<
opcode, !or(xo, 0xC0), (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
!strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
}
// Defines 5 instructions, unmasked, operand negating.
// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
multiclass ACC_NEG_UM_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
string asmbase, string asmstr> {
defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
let Predicates = [MMA] in {
def PN : XX3Form_AT3_XAB6<opcode, !or(xo, 0x80), (outs acc:$AT),
!con((ins acc:$ATi), IOL),
!strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def NP : XX3Form_AT3_XAB6<opcode, !or(xo, 0x40), (outs acc:$AT),
!con((ins acc:$ATi), IOL),
!strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def NN : XX3Form_AT3_XAB6<opcode, !or(xo, 0xC0), (outs acc:$AT),
!con((ins acc:$ATi), IOL),
!strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
}
// Defines 10 instructions, operand negating, unmasked, masked with 4, 4 bits.
// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
multiclass ACC_NEG_UM_M44_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
string asmbase, string asmstr> {
defm NAME : ACC_NEG_UM_XOM84C<opcode, xo, IOL, asmbase, asmstr>;
let Predicates = [MMA, PrefixInstrs] in {
def PM#NAME :
MMIRR_XX3Form_XY4_XAB6<
opcode, !or(xo, 0x01), (outs acc:$AT),
!con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK)),
!strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"),
IIC_VecFP, []>,
RegConstraint<"@earlyclobber $AT">;
def PM#NAME#PP :
MMIRR_XX3Form_XY4_XAB6<
opcode, xo, (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
!strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def PM#NAME#PN :
MMIRR_XX3Form_XY4_XAB6<
opcode, !or(xo, 0x80), (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
!strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def PM#NAME#NP :
MMIRR_XX3Form_XY4_XAB6<
opcode, !or(xo, 0x40), (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
!strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def PM#NAME#NN :
MMIRR_XX3Form_XY4_XAB6<
opcode, !or(xo, 0xC0), (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
!strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
}
// Defines 10 instructions, operand negating, unmasked, masked with 4, 2 bits.
// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
multiclass ACC_NEG_UM_M42_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
string asmbase, string asmstr> {
defm NAME : ACC_NEG_UM_XOM84C<opcode, xo, IOL, asmbase, asmstr>;
let Predicates = [MMA, PrefixInstrs] in {
def PM#NAME :
MMIRR_XX3Form_X4Y2_XAB6<
opcode, !or(xo, 0x01), (outs acc:$AT),
!con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK)),
!strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"),
IIC_VecFP, []>,
RegConstraint<"@earlyclobber $AT">;
def PM#NAME#PP :
MMIRR_XX3Form_X4Y2_XAB6<
opcode, xo, (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
!strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def PM#NAME#PN :
MMIRR_XX3Form_X4Y2_XAB6<
opcode, !or(xo, 0x80), (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
!strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def PM#NAME#NP :
MMIRR_XX3Form_X4Y2_XAB6<
opcode, !or(xo, 0x40), (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
!strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def PM#NAME#NN :
MMIRR_XX3Form_X4Y2_XAB6<
opcode, !or(xo, 0xC0), (outs acc:$AT),
!con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
!strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"),
IIC_VecFP, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
}
// End of class definitions.
//-----------------------------------------------------------------------------
let Predicates = [MMA] in {
def XXMFACC :
XForm_AT3<31, 0, 177, (outs acc:$ASo), (ins acc:$AS), "xxmfacc $AS",
IIC_VecGeneral,
[(set v512i1:$ASo, (int_ppc_mma_xxmfacc v512i1:$AS))]>,
RegConstraint<"$ASo = $AS">, NoEncode<"$ASo">;
def XXMTACC :
XForm_AT3<31, 1, 177, (outs acc:$AT), (ins acc:$ATi), "xxmtacc $AT",
IIC_VecGeneral,
[(set v512i1:$AT, (int_ppc_mma_xxmtacc v512i1:$ATi))]>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
def KILL_PAIR : PPCPostRAExpPseudo<(outs vsrprc:$XTp), (ins vsrprc:$XSp),
"#KILL_PAIR", []>,
RegConstraint<"$XTp = $XSp">;
def BUILD_UACC : PPCPostRAExpPseudo<(outs acc:$AT), (ins uacc:$AS),
"#BUILD_UACC $AT, $AS", []>;
// We define XXSETACCZ as rematerializable to undo CSE of that intrinsic in
// the backend. We avoid CSE here because it generates a copy of the acc
// register and this copy is more expensive than calling the intrinsic again.
let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
def XXSETACCZ :
XForm_AT3<31, 3, 177, (outs acc:$AT), (ins), "xxsetaccz $AT", IIC_VecGeneral,
[(set v512i1:$AT, (int_ppc_mma_xxsetaccz))]>;
}
def XVI8GER4SPP :
XX3Form_AT3_XAB6<59, 99, (outs acc:$AT), (ins acc:$ATi, vsrc:$XA, vsrc:$XB),
"xvi8ger4spp $AT, $XA, $XB", IIC_VecGeneral, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
let mayStore = 1 in {
def SPILL_ACC: PPCEmitTimePseudo<(outs), (ins acc:$AT, memrix16:$dst),
"#SPILL_ACC", []>;
def SPILL_UACC: PPCEmitTimePseudo<(outs), (ins uacc:$AT, memrix16:$dst),
"#SPILL_UACC", []>;
}
let mayLoad = 1, hasSideEffects = 0 in {
def RESTORE_ACC: PPCEmitTimePseudo<(outs acc:$AT), (ins memrix16:$src),
"#RESTORE_ACC", []>;
def RESTORE_UACC: PPCEmitTimePseudo<(outs uacc:$AT), (ins memrix16:$src),
"#RESTORE_UACC", []>;
}
}
let Predicates = [MMA, PrefixInstrs] in {
def PMXVI8GER4SPP :
MMIRR_XX3Form_XYP4_XAB6<59, 99, (outs acc:$AT),
(ins acc:$ATi, vsrc:$XA,vsrc:$XB, u4imm:$XMSK,
u4imm:$YMSK, u4imm:$PMSK),
"pmxvi8ger4spp $AT, $XA, $XB, $XMSK, $YMSK, $PMSK",
IIC_VecGeneral, []>,
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}
// MMA accumulating/non-accumulating instructions.
//------------------------------------------------------------------------------
// XVBF16GER2, XVBF16GER2PP, XVBF16GER2PN, XVBF16GER2NP, XVBF16GER2NN
// PMXVBF16GER2, PMXVBF16GER2PP, PMXVBF16GER2PN, PMXVBF16GER2NP, PMXVBF16GER2NN
defm XVBF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 50, (ins vsrc:$XA, vsrc:$XB),
"xvbf16ger2", "$AT, $XA, $XB">;
// XVI4GER8, XVI4GER8PP, PMXVI4GER8, PMXVI4GER8PP
defm XVI4GER8 : ACC_UM_M844_XOEO<59, 34, (ins vsrc:$XA, vsrc:$XB),
"xvi4ger8", "$AT, $XA, $XB">;
// XVI8GER4, XVI8GER4PP, PMXVI8GER4, PMXVI8GER4PP
defm XVI8GER4 : ACC_UM_M444_XOEO<59, 2, (ins vsrc:$XA, vsrc:$XB),
"xvi8ger4", "$AT, $XA, $XB">;
// XVI16GER2, XVI16GER2PP, PMXVI16GER2, PMXVI16GER2PP
defm XVI16GER2 : ACC_UM_M244_XO46<59, 75, (ins vsrc:$XA, vsrc:$XB),
"xvi16ger2", "$AT, $XA, $XB">;
// XVI16GER2S, XVI16GER2SPP, PMXVI16GER2S, PMXVI16GER2SPP
defm XVI16GER2S : ACC_UM_M244_XOEO<59, 42, (ins vsrc:$XA, vsrc:$XB),
"xvi16ger2s", "$AT, $XA, $XB">;
// XVF16GER2, XVF16GER2PP, XVF16GER2PN, XVF16GER2NP, XVF16GER2NN
// PMXVF16GER2, PMXVF16GER2PP, PMXVF16GER2PN, PMXVF16GER2NP, PMXVF16GER2NN
defm XVF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 18, (ins vsrc:$XA, vsrc:$XB),
"xvf16ger2", "$AT, $XA, $XB">;
// XVF32GER, XVF32GERPP, XVF32GERPN, XVF32GERNP, XVF32GERPP
// PMXVF32GER, PMXVF32GERPP, PMXVF32GERPN, PMXVF32GERNP, PMXVF32GERPP
defm XVF32GER : ACC_NEG_UM_M44_XOM84C<59, 26, (ins vsrc:$XA, vsrc:$XB),
"xvf32ger", "$AT, $XA, $XB">;
// XVF64GER, XVF64GERPP, XVF64GERPN, XVF64GERNP, XVF64GERNN
// PMXVF64GER, PMXVF64GERPP, PMXVF64GERPN, PMXVF64GERNP, PMXVF64GERNN
defm XVF64GER : ACC_NEG_UM_M42_XOM84C<59, 58, (ins vsrpevenrc:$XA, vsrc:$XB),
"xvf64ger", "$AT, $XA, $XB">;
//------------------------------------------------------------------------------
// MMA Intrinsics
let Predicates = [MMA] in {
def : Pat<(v512i1 (int_ppc_mma_xvi4ger8 v16i8:$XA, v16i8:$XB)),
(XVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvi8ger4 v16i8:$XA, v16i8:$XB)),
(XVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvi16ger2s v16i8:$XA, v16i8:$XB)),
(XVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2 v16i8:$XA, v16i8:$XB)),
(XVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf32ger v16i8:$XA, v16i8:$XB)),
(XVF32GER RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf64ger v256i1:$XA, v16i8:$XB)),
(XVF64GER $XA, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
(XVF64GERPP $ATi, $XA, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
(XVF64GERPN $ATi, $XA, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
(XVF64GERNP $ATi, $XA, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
(XVF64GERNN $ATi, $XA, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2 v16i8:$XA, v16i8:$XB)),
(XVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvi16ger2 v16i8:$XA, v16i8:$XB)),
(XVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
def : Pat<(v512i1 (int_ppc_mma_xvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
(XVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
}
// MMA Intrinsics
let Predicates = [MMA, PrefixInstrs] in {
def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk8Imm:$PMSK)),
(PMXVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk8Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk8Imm:$PMSK)),
(PMXVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk8Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk4Imm:$PMSK)),
(PMXVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk4Imm:$PMSK)),
(PMXVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2s v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
(PMXVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
(PMXVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf32ger v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk4Imm:$YMSK)),
(PMXVF32GER RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
(PMXVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
(PMXVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
(PMXVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
(PMXVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf64ger v256i1:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk2Imm:$YMSK)),
(PMXVF64GER $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, Msk2Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
(PMXVF64GERPP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk2Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
(PMXVF64GERPN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk2Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
(PMXVF64GERNP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk2Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
(PMXVF64GERNN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk2Imm:$YMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
(PMXVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
(PMXVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
Msk4Imm:$XMSK, Msk4Imm:$YMSK,
Msk2Imm:$PMSK)),
(PMXVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
}
def Concats { def Concats {
dag VecsToVecPair0 = dag VecsToVecPair0 =
(v256i1 (INSERT_SUBREG (v256i1 (INSERT_SUBREG
@ -1653,37 +1062,6 @@ def Concats {
(v256i1 (INSERT_SUBREG (v256i1 (INSERT_SUBREG
(INSERT_SUBREG (IMPLICIT_DEF), $vs2, sub_vsx1), (INSERT_SUBREG (IMPLICIT_DEF), $vs2, sub_vsx1),
$vs3, sub_vsx0)); $vs3, sub_vsx0));
dag VecsToVecQuad =
(BUILD_UACC (INSERT_SUBREG
(INSERT_SUBREG (v512i1 (IMPLICIT_DEF)),
(KILL_PAIR VecsToVecPair0), sub_pair0),
(KILL_PAIR VecsToVecPair1), sub_pair1));
}
def Extracts {
dag Pair0 = (v256i1 (EXTRACT_SUBREG $v, sub_pair0));
dag Pair1 = (v256i1 (EXTRACT_SUBREG $v, sub_pair1));
dag Vec0 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx0));
dag Vec1 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx1));
dag Vec2 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx0));
dag Vec3 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx1));
}
let Predicates = [MMA] in {
def : Pat<(v512i1 (PPCAccBuild v4i32:$vs1, v4i32:$vs0, v4i32:$vs3, v4i32:$vs2)),
(XXMTACC Concats.VecsToVecQuad)>;
def : Pat<(v512i1 (int_ppc_mma_assemble_acc v16i8:$vs1, v16i8:$vs0,
v16i8:$vs3, v16i8:$vs2)),
(XXMTACC Concats.VecsToVecQuad)>;
def : Pat<(v512i1 (PPCxxmfacc v512i1:$AS)), (XXMFACC acc:$AS)>;
def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 0)),
Extracts.Vec0>;
def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 1)),
Extracts.Vec1>;
def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 2)),
Extracts.Vec2>;
def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 3)),
Extracts.Vec3>;
} }
let Predicates = [PairedVectorMemops] in { let Predicates = [PairedVectorMemops] in {

View File

@ -18,8 +18,6 @@ def sub_32 : SubRegIndex<32>;
def sub_64 : SubRegIndex<64>; def sub_64 : SubRegIndex<64>;
def sub_vsx0 : SubRegIndex<128>; def sub_vsx0 : SubRegIndex<128>;
def sub_vsx1 : SubRegIndex<128, 128>; def sub_vsx1 : SubRegIndex<128, 128>;
def sub_pair0 : SubRegIndex<256>;
def sub_pair1 : SubRegIndex<256, 256>;
def sub_gp8_x0 : SubRegIndex<64>; def sub_gp8_x0 : SubRegIndex<64>;
def sub_gp8_x1 : SubRegIndex<64, 64>; def sub_gp8_x1 : SubRegIndex<64, 64>;
} }
@ -100,21 +98,6 @@ class CRBIT<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num; let HWEncoding{4-0} = num;
} }
// ACC - One of the 8 512-bit VSX accumulators.
class ACC<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
let HWEncoding{2-0} = num;
let SubRegs = subregs;
}
// UACC - One of the 8 512-bit VSX accumulators prior to being primed.
// Without using this register class, the register allocator has no way to
// differentiate a primed accumulator from an unprimed accumulator.
// This may result in invalid copies between primed and unprimed accumulators.
class UACC<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
let HWEncoding{2-0} = num;
let SubRegs = subregs;
}
// VSR Pairs - One of the 32 paired even-odd consecutive VSRs. // VSR Pairs - One of the 32 paired even-odd consecutive VSRs.
class VSRPair<bits<5> num, string n, list<Register> subregs> : PPCReg<n> { class VSRPair<bits<5> num, string n, list<Register> subregs> : PPCReg<n> {
let HWEncoding{4-0} = num; let HWEncoding{4-0} = num;
@ -272,9 +255,6 @@ def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66, -2]>;
def VRSAVE: SPR<256, "vrsave">, DwarfRegNum<[109]>; def VRSAVE: SPR<256, "vrsave">, DwarfRegNum<[109]>;
// SPE extra registers // SPE extra registers
// SPE Accumulator for multiply-accumulate SPE operations. Never directly
// accessed, so there's no real encoding for it.
def SPEACC: DwarfRegNum<[99, 111]>;
def SPEFSCR: SPR<512, "spefscr">, DwarfRegNum<[612, 112]>; def SPEFSCR: SPR<512, "spefscr">, DwarfRegNum<[612, 112]>;
def XER: SPR<1, "xer">, DwarfRegNum<[76]>; def XER: SPR<1, "xer">, DwarfRegNum<[76]>;
@ -448,72 +428,6 @@ def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY, XER)> {
let CopyCost = -1; let CopyCost = -1;
} }
let SubRegIndices = [sub_pair0, sub_pair1] in {
def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>;
def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>;
def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>;
def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>;
def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>;
def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>;
def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>;
def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>;
}
def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3,
ACC4, ACC5, ACC6, ACC7)> {
// The AllocationPriority is in the range [0, 63]. Assigned the ACC registers
// the highest possible priority in this range to force the register allocator
// to assign these registers first. This is done because the ACC registers
// must represent 4 advacent vector registers. For example ACC1 must be
// VS4 - VS7. The value here must be at least 32 as we want to allocate
// these registers even before we allocate global ranges.
let AllocationPriority = 63;
let Size = 512;
}
let SubRegIndices = [sub_pair0, sub_pair1] in {
def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>;
def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>;
def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>;
def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>;
def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>;
def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>;
def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>;
def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>;
}
def UACCRC : RegisterClass<"PPC", [v512i1], 128,
(add UACC0, UACC1, UACC2, UACC3,
UACC4, UACC5, UACC6, UACC7)> {
// The AllocationPriority for the UACC registers is still high and must be at
// least 32 as we want to allocate these registers before we allocate other
// global ranges. The value must be less than the AllocationPriority of the
// ACC registers.
let AllocationPriority = 36;
let Size = 512;
}
// FIXME: This allocation order may increase stack frame size when allocating
// non-volatile registers.
//
// Placing Altivec registers first and allocate the rest as underlying VSX
// ones, to reduce interference with accumulator registers (lower 32 VSRs).
// This reduces copies when loading for accumulators, which is common use for
// paired VSX registers.
def VSRpRC :
RegisterClass<"PPC", [v256i1], 128,
(add VSRp17, VSRp18, VSRp16, VSRp19, VSRp20, VSRp21,
VSRp22, VSRp23, VSRp24, VSRp25, VSRp31, VSRp30,
VSRp29, VSRp28, VSRp27, VSRp26,
(sequence "VSRp%u", 0, 6),
(sequence "VSRp%u", 15, 7))> {
// Give the VSRp registers a non-zero AllocationPriority. The value is less
// than 32 as these registers should not always be allocated before global
// ranges and the value should be less than the AllocationPriority - 32 for
// the UACC registers. Even global VSRp registers should be allocated after
// the UACC registers have been chosen.
let AllocationPriority = 2;
let Size = 256;
}
// Make AllocationOrder as similar as G8RC's to avoid potential spilling. // Make AllocationOrder as similar as G8RC's to avoid potential spilling.
// Similarly, we have an AltOrder for 64-bit ELF ABI which r2 is allocated // Similarly, we have an AltOrder for 64-bit ELF ABI which r2 is allocated
// at last. // at last.
@ -528,3 +442,5 @@ def G8pRC :
}]; }];
let Size = 128; let Size = 128;
} }
include "PPCRegisterInfoMMA.td"

View File

@ -0,0 +1,106 @@
//===-- PPCRegisterInfoMMA.td - The PowerPC Register File --*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Register info for registers related to MMA. These are the ACC and UACC
// registers.
//
//===----------------------------------------------------------------------===//
let Namespace = "PPC" in {
def sub_pair0 : SubRegIndex<256>;
def sub_pair1 : SubRegIndex<256, 256>;
}
// ACC - One of the 8 512-bit VSX accumulators.
class ACC<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
let HWEncoding{2-0} = num;
let SubRegs = subregs;
}
// UACC - One of the 8 512-bit VSX accumulators prior to being primed.
// Without using this register class, the register allocator has no way to
// differentiate a primed accumulator from an unprimed accumulator.
// This may result in invalid copies between primed and unprimed accumulators.
class UACC<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
let HWEncoding{2-0} = num;
let SubRegs = subregs;
}
// SPE Accumulator for multiply-accumulate SPE operations. Never directly
// accessed, so there's no real encoding for it.
def SPEACC: DwarfRegNum<[99, 111]>;
let SubRegIndices = [sub_pair0, sub_pair1] in {
def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>;
def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>;
def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>;
def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>;
def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>;
def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>;
def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>;
def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>;
}
def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3,
ACC4, ACC5, ACC6, ACC7)> {
// The AllocationPriority is in the range [0, 63]. Assigned the ACC registers
// the highest possible priority in this range to force the register allocator
// to assign these registers first. This is done because the ACC registers
// must represent 4 advacent vector registers. For example ACC1 must be
// VS4 - VS7. The value here must be at least 32 as we want to allocate
// these registers even before we allocate global ranges.
let AllocationPriority = 63;
let Size = 512;
}
let SubRegIndices = [sub_pair0, sub_pair1] in {
def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>;
def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>;
def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>;
def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>;
def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>;
def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>;
def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>;
def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>;
}
def UACCRC : RegisterClass<"PPC", [v512i1], 128,
(add UACC0, UACC1, UACC2, UACC3,
UACC4, UACC5, UACC6, UACC7)> {
// The AllocationPriority for the UACC registers is still high and must be at
// least 32 as we want to allocate these registers before we allocate other
// global ranges. The value must be less than the AllocationPriority of the
// ACC registers.
let AllocationPriority = 36;
let Size = 512;
}
// FIXME: This allocation order may increase stack frame size when allocating
// non-volatile registers.
//
// Placing Altivec registers first and allocate the rest as underlying VSX
// ones, to reduce interference with accumulator registers (lower 32 VSRs).
// This reduces copies when loading for accumulators, which is common use for
// paired VSX registers.
def VSRpRC :
RegisterClass<"PPC", [v256i1], 128,
(add VSRp17, VSRp18, VSRp16, VSRp19, VSRp20, VSRp21,
VSRp22, VSRp23, VSRp24, VSRp25, VSRp31, VSRp30,
VSRp29, VSRp28, VSRp27, VSRp26,
(sequence "VSRp%u", 0, 6),
(sequence "VSRp%u", 15, 7))> {
// Give the VSRp registers a non-zero AllocationPriority. The value is less
// than 32 as these registers should not always be allocated before global
// ranges and the value should be less than the AllocationPriority - 32 for
// the UACC registers. Even global VSRp registers should be allocated after
// the UACC registers have been chosen.
let AllocationPriority = 2;
let Size = 256;
}