forked from OSchip/llvm-project
[PowerPC] Add assemble disassemble intrinsics for MMA
This patch adds support for assemble disassemble intrinsics for MMA. Reviewed By: bsaleil, #powerpc Differential Revision: https://reviews.llvm.org/D88739
This commit is contained in:
parent
bfd9cef70e
commit
f3202b30b8
|
@ -260,6 +260,7 @@ def llvm_v16i1_ty : LLVMType<v16i1>; // 16 x i1
|
|||
def llvm_v32i1_ty : LLVMType<v32i1>; // 32 x i1
|
||||
def llvm_v64i1_ty : LLVMType<v64i1>; // 64 x i1
|
||||
def llvm_v128i1_ty : LLVMType<v128i1>; // 128 x i1
|
||||
def llvm_v256i1_ty : LLVMType<v256i1>; // 256 x i1
|
||||
def llvm_v512i1_ty : LLVMType<v512i1>; // 512 x i1
|
||||
def llvm_v1024i1_ty : LLVMType<v1024i1>; //1024 x i1
|
||||
|
||||
|
|
|
@ -1364,3 +1364,32 @@ def int_ppc_setrnd : GCCBuiltin<"__builtin_setrnd">,
|
|||
Intrinsic<[llvm_double_ty], [llvm_i32_ty], []>;
|
||||
|
||||
}
|
||||
|
||||
let TargetPrefix = "ppc" in {
|
||||
def int_ppc_mma_assemble_acc :
|
||||
Intrinsic<[llvm_v512i1_ty],
|
||||
[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
def int_ppc_mma_disassemble_acc :
|
||||
Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
|
||||
[llvm_v512i1_ty], [IntrNoMem]>;
|
||||
|
||||
def int_ppc_mma_assemble_pair :
|
||||
Intrinsic<[llvm_v256i1_ty],
|
||||
[llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
|
||||
|
||||
def int_ppc_mma_disassemble_pair :
|
||||
Intrinsic<[llvm_v16i8_ty, llvm_v16i8_ty],
|
||||
[llvm_v256i1_ty], [IntrNoMem]>;
|
||||
|
||||
def int_ppc_mma_xxmtacc :
|
||||
Intrinsic<[llvm_v512i1_ty], [llvm_v512i1_ty], [IntrNoMem]>;
|
||||
|
||||
def int_ppc_mma_xxmfacc :
|
||||
Intrinsic<[llvm_v512i1_ty], [llvm_v512i1_ty], [IntrNoMem]>;
|
||||
|
||||
def int_ppc_mma_xxsetaccz :
|
||||
Intrinsic<[llvm_v512i1_ty], [], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
|
|
|
@ -834,7 +834,8 @@ enum IIT_Info {
|
|||
IIT_VEC_OF_BITCASTS_TO_INT = 46,
|
||||
IIT_V128 = 47,
|
||||
IIT_BF16 = 48,
|
||||
IIT_STRUCT9 = 49
|
||||
IIT_STRUCT9 = 49,
|
||||
IIT_V256 = 50
|
||||
};
|
||||
|
||||
static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
|
||||
|
@ -928,6 +929,10 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
|
|||
OutputTable.push_back(IITDescriptor::getVector(128, IsScalableVector));
|
||||
DecodeIITType(NextElt, Infos, Info, OutputTable);
|
||||
return;
|
||||
case IIT_V256:
|
||||
OutputTable.push_back(IITDescriptor::getVector(256, IsScalableVector));
|
||||
DecodeIITType(NextElt, Infos, Info, OutputTable);
|
||||
return;
|
||||
case IIT_V512:
|
||||
OutputTable.push_back(IITDescriptor::getVector(512, IsScalableVector));
|
||||
DecodeIITType(NextElt, Infos, Info, OutputTable);
|
||||
|
|
|
@ -10416,11 +10416,32 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||
|
||||
SDLoc dl(Op);
|
||||
|
||||
if (IntrinsicID == Intrinsic::thread_pointer) {
|
||||
switch (IntrinsicID) {
|
||||
case Intrinsic::thread_pointer:
|
||||
// Reads the thread pointer register, used for __builtin_thread_pointer.
|
||||
if (Subtarget.isPPC64())
|
||||
return DAG.getRegister(PPC::X13, MVT::i64);
|
||||
return DAG.getRegister(PPC::R2, MVT::i32);
|
||||
|
||||
case Intrinsic::ppc_mma_disassemble_acc:
|
||||
case Intrinsic::ppc_mma_disassemble_pair: {
|
||||
int NumVecs = 2;
|
||||
SDValue WideVec = Op.getOperand(1);
|
||||
if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
|
||||
NumVecs = 4;
|
||||
WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
|
||||
}
|
||||
SmallVector<SDValue, 4> RetOps;
|
||||
for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
|
||||
SDValue Extract = DAG.getNode(
|
||||
PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
|
||||
DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
|
||||
: VecNo,
|
||||
dl, MVT::i64));
|
||||
RetOps.push_back(Extract);
|
||||
}
|
||||
return DAG.getMergeValues(RetOps, dl);
|
||||
}
|
||||
}
|
||||
|
||||
// If this is a lowered altivec predicate compare, CompareOpc is set to the
|
||||
|
|
|
@ -672,6 +672,7 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
|
|||
case PPC::V_SETALLONES:
|
||||
case PPC::CRSET:
|
||||
case PPC::CRUNSET:
|
||||
case PPC::XXSETACCZ:
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -1340,6 +1341,22 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||
|
||||
PPC::VSSRCRegClass.contains(DestReg, SrcReg))
|
||||
Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;
|
||||
else if (Subtarget.pairedVectorMemops() &&
|
||||
PPC::VSRpRCRegClass.contains(DestReg, SrcReg)) {
|
||||
if (SrcReg > PPC::VSRp15)
|
||||
SrcReg = PPC::V0 + (SrcReg - PPC::VSRp16) * 2;
|
||||
else
|
||||
SrcReg = PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2;
|
||||
if (DestReg > PPC::VSRp15)
|
||||
DestReg = PPC::V0 + (DestReg - PPC::VSRp16) * 2;
|
||||
else
|
||||
DestReg = PPC::VSL0 + (DestReg - PPC::VSRp0) * 2;
|
||||
BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg).
|
||||
addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
|
||||
BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg + 1).
|
||||
addReg(SrcReg + 1).addReg(SrcReg + 1, getKillRegState(KillSrc));
|
||||
return;
|
||||
}
|
||||
else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
|
||||
Opc = PPC::CROR;
|
||||
else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))
|
||||
|
|
|
@ -1264,12 +1264,14 @@ multiclass ACC_NEG_UM_M42_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
|
|||
let Predicates = [MMA] in {
|
||||
def XXMFACC :
|
||||
XForm_AT3<31, 0, 177, (outs acc:$ASo), (ins acc:$AS), "xxmfacc $AS",
|
||||
IIC_VecGeneral, []>, RegConstraint<"$ASo = $AS">,
|
||||
NoEncode<"$ASo">;
|
||||
IIC_VecGeneral,
|
||||
[(set v512i1:$ASo, (int_ppc_mma_xxmfacc v512i1:$AS))]>,
|
||||
RegConstraint<"$ASo = $AS">, NoEncode<"$ASo">;
|
||||
def XXMTACC :
|
||||
XForm_AT3<31, 1, 177, (outs acc:$AT), (ins acc:$ATi), "xxmtacc $AT",
|
||||
IIC_VecGeneral, []>, RegConstraint<"$ATi = $AT">,
|
||||
NoEncode<"$ATi">;
|
||||
IIC_VecGeneral,
|
||||
[(set v512i1:$AT, (int_ppc_mma_xxmtacc v512i1:$ATi))]>,
|
||||
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
|
||||
def KILL_PAIR : PPCPostRAExpPseudo<(outs vsrprc:$XTp), (ins vsrprc:$XSp),
|
||||
"#KILL_PAIR", []>,
|
||||
RegConstraint<"$XTp = $XSp">;
|
||||
|
@ -1280,8 +1282,8 @@ let Predicates = [MMA] in {
|
|||
// register and this copy is more expensive than calling the intrinsic again.
|
||||
let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
|
||||
def XXSETACCZ :
|
||||
XForm_AT3<31, 3, 177, (outs acc:$AT), (ins), "xxsetaccz $AT",
|
||||
IIC_VecGeneral, []>;
|
||||
XForm_AT3<31, 3, 177, (outs acc:$AT), (ins), "xxsetaccz $AT", IIC_VecGeneral,
|
||||
[(set v512i1:$AT, (int_ppc_mma_xxsetaccz))]>;
|
||||
}
|
||||
def XVI8GER4SPP :
|
||||
XX3Form_AT3_XAB6<59, 99, (outs acc:$AT), (ins acc:$ATi, vsrc:$XA, vsrc:$XB),
|
||||
|
@ -1369,6 +1371,11 @@ let Predicates = [MMA] in {
|
|||
(XXMTACC Concats.VecsToVecQuad)>;
|
||||
def : Pat<(v256i1 (PPCPairBuild v4i32:$vs1, v4i32:$vs0)),
|
||||
Concats.VecsToVecPair0>;
|
||||
def : Pat<(v512i1 (int_ppc_mma_assemble_acc v16i8:$vs1, v16i8:$vs0,
|
||||
v16i8:$vs3, v16i8:$vs2)),
|
||||
(XXMTACC Concats.VecsToVecQuad)>;
|
||||
def : Pat<(v256i1 (int_ppc_mma_assemble_pair v16i8:$vs1, v16i8:$vs0)),
|
||||
Concats.VecsToVecPair0>;
|
||||
def : Pat<(v512i1 (PPCxxmfacc v512i1:$AS)), (XXMFACC acc:$AS)>;
|
||||
def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, (i64 0))),
|
||||
Extracts.Vec0>;
|
||||
|
|
|
@ -0,0 +1,250 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
|
||||
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names \
|
||||
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
|
||||
|
||||
; assemble_acc
|
||||
declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
|
||||
define void @ass_acc(<512 x i1>* %ptr, <16 x i8> %vc) {
|
||||
; CHECK-LABEL: ass_acc:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vmr v3, v2
|
||||
; CHECK-NEXT: xxlor vs0, v2, v2
|
||||
; CHECK-NEXT: xxlor vs1, v3, v3
|
||||
; CHECK-NEXT: xxlor vs2, v2, v2
|
||||
; CHECK-NEXT: xxlor vs3, v3, v3
|
||||
; CHECK-NEXT: xxmtacc acc0
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: stxv vs0, 48(r3)
|
||||
; CHECK-NEXT: stxv vs1, 32(r3)
|
||||
; CHECK-NEXT: stxv vs2, 16(r3)
|
||||
; CHECK-NEXT: stxv vs3, 0(r3)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: ass_acc:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: vmr v3, v2
|
||||
; CHECK-BE-NEXT: xxlor vs0, v2, v2
|
||||
; CHECK-BE-NEXT: xxlor vs1, v3, v3
|
||||
; CHECK-BE-NEXT: xxlor vs2, v2, v2
|
||||
; CHECK-BE-NEXT: xxlor vs3, v3, v3
|
||||
; CHECK-BE-NEXT: xxmtacc acc0
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: stxv vs3, 48(r3)
|
||||
; CHECK-BE-NEXT: stxv vs2, 32(r3)
|
||||
; CHECK-BE-NEXT: blr
|
||||
; CHECK-O0-LABEL: ass_acc:
|
||||
; CHECK-O0: # %bb.0: # %entry
|
||||
; CHECK-BE-O0-LABEL: ass_acc:
|
||||
; CHECK-BE-O0: # %bb.0: # %entry
|
||||
entry:
|
||||
%0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc)
|
||||
store <512 x i1> %0, <512 x i1>* %ptr, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
; assemble_pair
|
||||
declare <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8>, <16 x i8>)
|
||||
define void @ass_pair(<256 x i1>* %ptr, <16 x i8> %vc) {
|
||||
; CHECK-LABEL: ass_pair:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vmr v3, v2
|
||||
; CHECK-NEXT: stxv v2, 16(r3)
|
||||
; CHECK-NEXT: stxv v3, 0(r3)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: ass_pair:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: vmr v3, v2
|
||||
; CHECK-BE-NEXT: stxv v2, 16(r3)
|
||||
; CHECK-BE-NEXT: stxv v2, 0(r3)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = tail call <256 x i1> @llvm.ppc.mma.assemble.pair(<16 x i8> %vc, <16 x i8> %vc)
|
||||
store <256 x i1> %0, <256 x i1>* %ptr, align 32
|
||||
ret void
|
||||
}
|
||||
|
||||
; xxmtacc
|
||||
declare <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1>)
|
||||
define void @int_xxmtacc(<512 x i1>* %ptr, <16 x i8> %vc) {
|
||||
; CHECK-LABEL: int_xxmtacc:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vmr v3, v2
|
||||
; CHECK-NEXT: xxlor vs0, v2, v2
|
||||
; CHECK-NEXT: xxlor vs1, v3, v3
|
||||
; CHECK-NEXT: xxlor vs2, v2, v2
|
||||
; CHECK-NEXT: xxlor vs3, v3, v3
|
||||
; CHECK-NEXT: xxmtacc acc0
|
||||
; CHECK-NEXT: xxmtacc acc0
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: stxv vs0, 48(r3)
|
||||
; CHECK-NEXT: stxv vs1, 32(r3)
|
||||
; CHECK-NEXT: stxv vs2, 16(r3)
|
||||
; CHECK-NEXT: stxv vs3, 0(r3)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: int_xxmtacc:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: vmr v3, v2
|
||||
; CHECK-BE-NEXT: xxlor vs0, v2, v2
|
||||
; CHECK-BE-NEXT: xxlor vs1, v3, v3
|
||||
; CHECK-BE-NEXT: xxlor vs2, v2, v2
|
||||
; CHECK-BE-NEXT: xxlor vs3, v3, v3
|
||||
; CHECK-BE-NEXT: xxmtacc acc0
|
||||
; CHECK-BE-NEXT: xxmtacc acc0
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: stxv vs3, 48(r3)
|
||||
; CHECK-BE-NEXT: stxv vs2, 32(r3)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
; One xxmtacc is generated from the call to assemble.acc then one xxmtacc is
|
||||
; generated from the call to xxmtacc then one xxmfacc is generated for the store
|
||||
%0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc)
|
||||
%1 = tail call <512 x i1> @llvm.ppc.mma.xxmtacc(<512 x i1> %0)
|
||||
store <512 x i1> %1, <512 x i1>* %ptr, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
; xxmfacc
|
||||
declare <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1>)
|
||||
define void @int_xxmfacc(<512 x i1>* %ptr, <16 x i8> %vc) {
|
||||
; CHECK-LABEL: int_xxmfacc:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vmr v3, v2
|
||||
; CHECK-NEXT: xxlor vs0, v2, v2
|
||||
; CHECK-NEXT: xxlor vs1, v3, v3
|
||||
; CHECK-NEXT: xxlor vs2, v2, v2
|
||||
; CHECK-NEXT: xxlor vs3, v3, v3
|
||||
; CHECK-NEXT: xxmtacc acc0
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: stxv vs0, 48(r3)
|
||||
; CHECK-NEXT: stxv vs1, 32(r3)
|
||||
; CHECK-NEXT: stxv vs2, 16(r3)
|
||||
; CHECK-NEXT: stxv vs3, 0(r3)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: int_xxmfacc:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: vmr v3, v2
|
||||
; CHECK-BE-NEXT: xxlor vs0, v2, v2
|
||||
; CHECK-BE-NEXT: xxlor vs1, v3, v3
|
||||
; CHECK-BE-NEXT: xxlor vs2, v2, v2
|
||||
; CHECK-BE-NEXT: xxlor vs3, v3, v3
|
||||
; CHECK-BE-NEXT: xxmtacc acc0
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: stxv vs3, 48(r3)
|
||||
; CHECK-BE-NEXT: stxv vs2, 32(r3)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
; One xxmtacc is generated from the call to assemble.acc then one xxmfacc is
|
||||
; generated from the call to xxmfacc then one xxmfacc is generated for the store
|
||||
%0 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc, <16 x i8> %vc)
|
||||
%1 = tail call <512 x i1> @llvm.ppc.mma.xxmfacc(<512 x i1> %0)
|
||||
store <512 x i1> %1, <512 x i1>* %ptr, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
; xxsetaccz
|
||||
declare <512 x i1> @llvm.ppc.mma.xxsetaccz()
|
||||
define void @int_xxsetaccz(<512 x i1>* %ptr) {
|
||||
; CHECK-LABEL: int_xxsetaccz:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxsetaccz acc0
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: stxv vs0, 48(r3)
|
||||
; CHECK-NEXT: stxv vs1, 32(r3)
|
||||
; CHECK-NEXT: stxv vs2, 16(r3)
|
||||
; CHECK-NEXT: stxv vs3, 0(r3)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: int_xxsetaccz:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: xxsetaccz acc0
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: stxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: stxv vs3, 48(r3)
|
||||
; CHECK-BE-NEXT: stxv vs2, 32(r3)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
|
||||
store <512 x i1> %0, <512 x i1>* %ptr, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
; disassemble_acc
|
||||
declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1>)
|
||||
define void @disass_acc(<16 x i8>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3, <16 x i8>* %ptr4) {
|
||||
; CHECK-LABEL: disass_acc:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxsetaccz acc0
|
||||
; CHECK-NEXT: xxmfacc acc0
|
||||
; CHECK-NEXT: stxv vs3, 0(r3)
|
||||
; CHECK-NEXT: stxv vs2, 0(r4)
|
||||
; CHECK-NEXT: stxv vs1, 0(r5)
|
||||
; CHECK-NEXT: stxv vs0, 0(r6)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: disass_acc:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: xxsetaccz acc0
|
||||
; CHECK-BE-NEXT: xxmfacc acc0
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: stxv vs1, 0(r4)
|
||||
; CHECK-BE-NEXT: stxv vs2, 0(r5)
|
||||
; CHECK-BE-NEXT: stxv vs3, 0(r6)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = tail call <512 x i1> @llvm.ppc.mma.xxsetaccz()
|
||||
%1 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.acc(<512 x i1> %0)
|
||||
%2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 0
|
||||
%3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 1
|
||||
%4 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 2
|
||||
%5 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %1, 3
|
||||
store <16 x i8> %2, <16 x i8>* %ptr1, align 16
|
||||
store <16 x i8> %3, <16 x i8>* %ptr2, align 16
|
||||
store <16 x i8> %4, <16 x i8>* %ptr3, align 16
|
||||
store <16 x i8> %5, <16 x i8>* %ptr4, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; disassemble_pair
|
||||
declare { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1>)
|
||||
define void @disass_pair(<256 x i1>* %ptr1, <16 x i8>* %ptr2, <16 x i8>* %ptr3) {
|
||||
; CHECK-LABEL: disass_pair:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv vs1, 0(r3)
|
||||
; CHECK-NEXT: lxv vs0, 16(r3)
|
||||
; CHECK-NEXT: stxv vs1, 0(r4)
|
||||
; CHECK-NEXT: stxv vs0, 0(r5)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: disass_pair:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv vs1, 16(r3)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r3)
|
||||
; CHECK-BE-NEXT: stxv vs0, 0(r4)
|
||||
; CHECK-BE-NEXT: stxv vs1, 0(r5)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%0 = load <256 x i1>, <256 x i1>* %ptr1, align 32
|
||||
%1 = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.mma.disassemble.pair(<256 x i1> %0)
|
||||
%2 = extractvalue { <16 x i8>, <16 x i8> } %1, 0
|
||||
%3 = extractvalue { <16 x i8>, <16 x i8> } %1, 1
|
||||
store <16 x i8> %2, <16 x i8>* %ptr2, align 16
|
||||
store <16 x i8> %3, <16 x i8>* %ptr3, align 16
|
||||
ret void
|
||||
}
|
||||
|
|
@ -247,7 +247,8 @@ enum IIT_Info {
|
|||
IIT_VEC_OF_BITCASTS_TO_INT = 46,
|
||||
IIT_V128 = 47,
|
||||
IIT_BF16 = 48,
|
||||
IIT_STRUCT9 = 49
|
||||
IIT_STRUCT9 = 49,
|
||||
IIT_V256 = 50
|
||||
};
|
||||
|
||||
static void EncodeFixedValueType(MVT::SimpleValueType VT,
|
||||
|
@ -385,6 +386,7 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
|
|||
case 32: Sig.push_back(IIT_V32); break;
|
||||
case 64: Sig.push_back(IIT_V64); break;
|
||||
case 128: Sig.push_back(IIT_V128); break;
|
||||
case 256: Sig.push_back(IIT_V256); break;
|
||||
case 512: Sig.push_back(IIT_V512); break;
|
||||
case 1024: Sig.push_back(IIT_V1024); break;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue