forked from OSchip/llvm-project
[AMDGPU] Allow v_accvgpr_write to use SGPR on gfx90a
This is undocumented, but it should work. Differential Revision: https://reviews.llvm.org/D122252
This commit is contained in:
parent
7abefc4222
commit
72c1a0d9c2
|
@ -3688,7 +3688,7 @@ bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
|
|||
|
||||
auto Reg = mc2PseudoReg(Src0.getReg());
|
||||
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
|
||||
if (!isGFX940() && isSGPR(Reg, TRI)) {
|
||||
if (!isGFX90A() && isSGPR(Reg, TRI)) {
|
||||
Error(getRegLoc(Reg, Operands),
|
||||
"source operand must be either a VGPR or an inline constant");
|
||||
return false;
|
||||
|
|
|
@ -822,7 +822,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
|
||||
if (RC == &AMDGPU::AGPR_32RegClass) {
|
||||
if (AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
|
||||
(ST.hasGFX940Insts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
|
||||
(ST.hasGFX90AInsts() && AMDGPU::SReg_32RegClass.contains(SrcReg))) {
|
||||
BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32_e64), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
return;
|
||||
|
@ -949,7 +949,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
if (ST.hasGFX90AInsts() && RI.isAGPRClass(SrcRC))
|
||||
Opcode = AMDGPU::V_ACCVGPR_MOV_B32;
|
||||
else if (RI.hasVGPRs(SrcRC) ||
|
||||
(ST.hasGFX940Insts() && RI.isSGPRClass(SrcRC)))
|
||||
(ST.hasGFX90AInsts() && RI.isSGPRClass(SrcRC)))
|
||||
Opcode = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
|
||||
else
|
||||
Opcode = AMDGPU::INSTRUCTION_LIST_END;
|
||||
|
@ -4647,7 +4647,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
|
|||
}
|
||||
|
||||
if (MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
|
||||
!ST.hasGFX940Insts()) {
|
||||
!ST.hasGFX90AInsts()) {
|
||||
const MachineOperand *Src = getNamedOperand(MI, AMDGPU::OpName::src0);
|
||||
if (Src->isReg() && RI.isSGPRReg(MRI, Src->getReg())) {
|
||||
ErrInfo = "Invalid register class: "
|
||||
|
@ -5032,7 +5032,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
|
|||
RI.isAGPR(MRI, MI.getOperand(Data1Idx).getReg()) != IsAGPR)
|
||||
return false;
|
||||
}
|
||||
if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX940Insts() &&
|
||||
if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
|
||||
(int)OpIdx == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) &&
|
||||
RI.isSGPRReg(MRI, MO->getReg()))
|
||||
return false;
|
||||
|
|
|
@ -22,70 +22,38 @@ define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(<32 x float> addrspace(1
|
|||
; GCN-NEXT: s_load_dwordx16 s[0:15], s[34:35], 0x0
|
||||
; GCN-NEXT: s_load_dwordx16 s[16:31], s[34:35], 0x40
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s0
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a0, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s1
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a1, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s2
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a2, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s3
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a3, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s4
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a4, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s5
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a5, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s6
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a6, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s7
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a7, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s8
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a8, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s9
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a9, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s10
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a10, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s11
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a11, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s12
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a12, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s13
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a13, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s14
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a14, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s15
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a15, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s16
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a16, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s17
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a17, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s18
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a18, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s19
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a19, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s20
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a20, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s21
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a21, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s22
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a22, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s23
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a23, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s24
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a24, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s25
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a25, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s26
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a26, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s27
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a27, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s28
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a28, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s29
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a29, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s30
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a30, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s31
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a31, v4
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a0, s0
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a1, s1
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a2, s2
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a3, s3
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a4, s4
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a5, s5
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a6, s6
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a7, s7
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a8, s8
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a9, s9
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a10, s10
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a11, s11
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a12, s12
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a13, s13
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a14, s14
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a15, s15
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a16, s16
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a17, s17
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a18, s18
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a19, s19
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a20, s20
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a21, s21
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a22, s22
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a23, s23
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a24, s24
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a25, s25
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a26, s26
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a27, s27
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a28, s28
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a29, s29
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a30, s30
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a31, s31
|
||||
; GCN-NEXT: s_nop 1
|
||||
; GCN-NEXT: v_mfma_f32_32x32x4bf16_1k a[0:31], v[0:1], v[2:3], a[0:31] cbsz:1 abid:2 blgp:3
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
|
@ -121,38 +89,22 @@ define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(<16 x float> addrspace(1
|
|||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s0
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a0, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s1
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a1, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s2
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a2, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s3
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a3, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s4
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a4, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s5
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a5, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s6
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a6, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s7
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a7, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s8
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a8, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s9
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a9, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s10
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a10, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s11
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a11, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s12
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a12, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s13
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a13, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s14
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a14, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s15
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a15, v4
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a0, s0
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a1, s1
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a2, s2
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a3, s3
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a4, s4
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a5, s5
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a6, s6
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a7, s7
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a8, s8
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a9, s9
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a10, s10
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a11, s11
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a12, s12
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a13, s13
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a14, s14
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a15, s15
|
||||
; GCN-NEXT: s_nop 1
|
||||
; GCN-NEXT: v_mfma_f32_16x16x4bf16_1k a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
|
@ -183,14 +135,10 @@ define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(<4 x float> addrspace(1)*
|
|||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s0
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a0, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s1
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a1, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s2
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a2, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s3
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a3, v4
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a0, s0
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a1, s1
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a2, s2
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a3, s3
|
||||
; GCN-NEXT: s_nop 1
|
||||
; GCN-NEXT: v_mfma_f32_4x4x4bf16_1k a[0:3], v[0:1], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
|
@ -217,38 +165,22 @@ define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(<16 x float> addrspace(1
|
|||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s0
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a0, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s1
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a1, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s2
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a2, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s3
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a3, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s4
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a4, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s5
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a5, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s6
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a6, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s7
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a7, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s8
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a8, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s9
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a9, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s10
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a10, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s11
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a11, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s12
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a12, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s13
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a13, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s14
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a14, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s15
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a15, v4
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a0, s0
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a1, s1
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a2, s2
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a3, s3
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a4, s4
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a5, s5
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a6, s6
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a7, s7
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a8, s8
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a9, s9
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a10, s10
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a11, s11
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a12, s12
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a13, s13
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a14, s14
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a15, s15
|
||||
; GCN-NEXT: s_nop 1
|
||||
; GCN-NEXT: v_mfma_f32_32x32x8bf16_1k a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
|
@ -280,14 +212,10 @@ define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(<4 x float> addrspace(1
|
|||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s0
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a0, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s1
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a1, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s2
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a2, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s3
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a3, v4
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a0, s0
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a1, s1
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a2, s2
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a3, s3
|
||||
; GCN-NEXT: s_nop 1
|
||||
; GCN-NEXT: v_mfma_f32_16x16x16bf16_1k a[0:3], v[0:1], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
|
@ -337,22 +265,14 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(<4 x double> addrspace(1)* %
|
|||
; GCN-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0
|
||||
; GCN-NEXT: v_pk_mov_b32 v[2:3], s[12:13], s[12:13] op_sel:[0,1]
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s0
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a0, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s1
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a1, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s2
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a2, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s3
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a3, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s4
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a4, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s5
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a5, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s6
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a6, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s7
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a7, v4
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a0, s0
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a1, s1
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a2, s2
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a3, s3
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a4, s4
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a5, s5
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a6, s6
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a7, s7
|
||||
; GCN-NEXT: s_nop 1
|
||||
; GCN-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7] cbsz:1 abid:2 blgp:3
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, 0
|
||||
|
@ -399,28 +319,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(<4 x double> addrspace(1
|
|||
; GCN: ; %bb.0: ; %bb
|
||||
; GCN-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x24
|
||||
; GCN-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
|
||||
; GCN-NEXT: s_mov_b64 s[10:11], 1.0
|
||||
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
|
||||
; GCN-NEXT: s_mov_b64 s[8:9], s[4:5]
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s4
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a0, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s5
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a1, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s6
|
||||
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a2, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s7
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_pk_mov_b32 v[0:1], s[14:15], s[14:15] op_sel:[0,1]
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a3, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s8
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a4, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s9
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a5, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s10
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a6, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s11
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a7, v4
|
||||
; GCN-NEXT: s_mov_b64 s[8:9], s[4:5]
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a0, s4
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a1, s5
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a2, s6
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a3, s7
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a4, s8
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a5, s9
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a6, s10
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a7, s11
|
||||
; GCN-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1]
|
||||
; GCN-NEXT: s_nop 1
|
||||
; GCN-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7]
|
||||
|
@ -442,29 +354,21 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(<4 x double> addrs
|
|||
; GCN: ; %bb.0: ; %bb
|
||||
; GCN-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x24
|
||||
; GCN-NEXT: s_mov_b32 s4, 0
|
||||
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
|
||||
; GCN-NEXT: s_mov_b32 s5, 0x405ec000
|
||||
; GCN-NEXT: s_mov_b64 s[6:7], s[4:5]
|
||||
; GCN-NEXT: s_mov_b64 s[8:9], s[4:5]
|
||||
; GCN-NEXT: s_mov_b64 s[10:11], s[4:5]
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s4
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a0, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s5
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a1, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s6
|
||||
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a2, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s7
|
||||
; GCN-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GCN-NEXT: v_pk_mov_b32 v[0:1], s[14:15], s[14:15] op_sel:[0,1]
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a3, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s8
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a4, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s9
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a5, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s10
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a6, v4
|
||||
; GCN-NEXT: v_mov_b32_e32 v4, s11
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a7, v4
|
||||
; GCN-NEXT: s_mov_b64 s[8:9], s[4:5]
|
||||
; GCN-NEXT: s_mov_b64 s[10:11], s[4:5]
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a0, s4
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a1, s5
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a2, s6
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a3, s7
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a4, s8
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a5, s9
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a6, s10
|
||||
; GCN-NEXT: v_accvgpr_write_b32 a7, s11
|
||||
; GCN-NEXT: v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1]
|
||||
; GCN-NEXT: s_nop 1
|
||||
; GCN-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[0:7]
|
||||
|
|
|
@ -531,8 +531,7 @@ body: |
|
|||
; GFX90A-LABEL: name: s_to_a
|
||||
; GFX90A: liveins: $sgpr0
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
|
||||
; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $sgpr0, implicit $exec, implicit $exec
|
||||
; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0
|
||||
; GFX940-LABEL: name: s_to_a
|
||||
; GFX940: liveins: $sgpr0
|
||||
|
@ -560,10 +559,8 @@ body: |
|
|||
; GFX90A-LABEL: name: s2_to_a2
|
||||
; GFX90A: liveins: $sgpr0_sgpr1
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1
|
||||
; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1
|
||||
; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
|
||||
; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $sgpr0_sgpr1
|
||||
; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit killed $sgpr0_sgpr1, implicit $exec
|
||||
; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1
|
||||
; GFX940-LABEL: name: s2_to_a2
|
||||
; GFX940: liveins: $sgpr0_sgpr1
|
||||
|
@ -594,12 +591,9 @@ body: |
|
|||
; GFX90A-LABEL: name: s3_to_a3
|
||||
; GFX90A: liveins: $sgpr0_sgpr1_sgpr2
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
|
||||
; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
|
||||
; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2
|
||||
; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
|
||||
; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $sgpr0_sgpr1_sgpr2
|
||||
; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2
|
||||
; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2, implicit $exec
|
||||
; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2
|
||||
; GFX940-LABEL: name: s3_to_a3
|
||||
; GFX940: liveins: $sgpr0_sgpr1_sgpr2
|
||||
|
@ -633,14 +627,10 @@ body: |
|
|||
; GFX90A-LABEL: name: s4_to_a4
|
||||
; GFX90A: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr3, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
|
||||
; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec
|
||||
; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
|
||||
; GFX940-LABEL: name: s4_to_a4
|
||||
; GFX940: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
|
@ -679,18 +669,12 @@ body: |
|
|||
; GFX90A-LABEL: name: s6_to_a6
|
||||
; GFX90A: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
|
||||
; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
|
||||
; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
|
||||
; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
|
||||
; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
|
||||
; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
|
||||
; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
|
||||
; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
|
||||
; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
|
||||
; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
|
||||
; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
|
||||
; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
|
||||
; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5, implicit $exec
|
||||
; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
|
||||
; GFX940-LABEL: name: s6_to_a6
|
||||
; GFX940: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
|
||||
|
@ -735,22 +719,14 @@ body: |
|
|||
; GFX90A-LABEL: name: s8_to_a8
|
||||
; GFX90A: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||
; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||
; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||
; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||
; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||
; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||
; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||
; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||
; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
|
||||
; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||
; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||
; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||
; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||
; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||
; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||
; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||
; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr7, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec
|
||||
; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
|
||||
; GFX940-LABEL: name: s8_to_a8
|
||||
; GFX940: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
|
||||
|
@ -813,38 +789,22 @@ body: |
|
|||
; GFX90A-LABEL: name: s16_to_a16
|
||||
; GFX90A: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr7, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr10, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr11, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr12, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr13, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr14, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
|
||||
; GFX90A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr4, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr5, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr6, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr7, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 $sgpr8, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 $sgpr9, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 $sgpr10, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 $sgpr11, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 $sgpr12, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 $sgpr13, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 $sgpr14, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GFX90A-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 $sgpr15, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $exec
|
||||
; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
|
||||
; GFX940-LABEL: name: s16_to_a16
|
||||
; GFX940: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
|
@ -1261,14 +1221,10 @@ body: |
|
|||
; GFX90A: liveins: $agpr0, $sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: S_NOP 0, implicit-def dead $sgpr0_sgpr1
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
|
||||
; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec
|
||||
; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX940-LABEL: name: copy_sgpr_to_agpr_tuple
|
||||
; GFX940: liveins: $agpr0, $sgpr2_sgpr3
|
||||
|
@ -1307,14 +1263,10 @@ body: |
|
|||
; GFX90A: liveins: $agpr0, $sgpr2_sgpr3
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: S_NOP 0, implicit-def dead $sgpr0_sgpr1
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX90A-NEXT: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit $exec
|
||||
; GFX90A-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 $sgpr3, implicit $exec, implicit-def $agpr4_agpr5_agpr6_agpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 $sgpr2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 $sgpr1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; GFX90A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 $sgpr0, implicit $exec, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec
|
||||
; GFX90A-NEXT: S_ENDPGM 0, implicit $agpr4_agpr5_agpr6_agpr7
|
||||
; GFX940-LABEL: name: copy_sgpr_to_agpr_tuple_kill
|
||||
; GFX940: liveins: $agpr0, $sgpr2_sgpr3
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx908 -run-pass=postrapseudos -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX908 %s
|
||||
# RUN: not --crash llc -march=amdgcn -mcpu=gfx90a -run-pass=postrapseudos -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck %s
|
||||
|
||||
# CHECK: LLVM ERROR: Error while trying to spill VGPR0 from class VGPR_32: Cannot scavenge register without an emergency spill slot!
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass=postrapseudos -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX90A %s
|
||||
|
||||
---
|
||||
name: no_free_vgprs_for_copy_s32_to_a32
|
||||
|
@ -17,6 +15,11 @@ body: |
|
|||
; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr8, implicit $exec
|
||||
; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec
|
||||
; GFX908-NEXT: S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr1
|
||||
; GFX90A-LABEL: name: no_free_vgprs_for_copy_s32_to_a32
|
||||
; GFX90A: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $sgpr8, implicit $exec
|
||||
; GFX90A-NEXT: S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr1
|
||||
$agpr1 = COPY $sgpr8
|
||||
S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr1
|
||||
...
|
||||
|
@ -36,6 +39,12 @@ body: |
|
|||
; GFX908-NEXT: $vgpr32 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit $sgpr8_sgpr9
|
||||
; GFX908-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec
|
||||
; GFX908-NEXT: S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr2_agpr3
|
||||
; GFX90A-LABEL: name: no_free_vgprs_for_copy_s64_to_a64
|
||||
; GFX90A: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8_sgpr9
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $sgpr8, implicit $exec, implicit-def $agpr2_agpr3, implicit $sgpr8_sgpr9
|
||||
; GFX90A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $sgpr9, implicit $exec, implicit $sgpr8_sgpr9
|
||||
; GFX90A-NEXT: S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr2_agpr3
|
||||
$agpr2_agpr3 = COPY $sgpr8_sgpr9
|
||||
S_ENDPGM 0, csr_amdgpu_allvgprs, implicit $agpr2_agpr3
|
||||
...
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx908 -mattr=-mfma-inline-literal-bug -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX908 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx908 -mattr=-mfma-inline-literal-bug -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX908 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A %s
|
||||
|
||||
declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16>, <2 x i16>, <32 x float>, i32, i32, i32)
|
||||
|
@ -14,38 +14,39 @@ declare i32 @llvm.amdgcn.workitem.id.x()
|
|||
; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
|
||||
; GCN-DAG: s_load_dwordx16
|
||||
; GCN-DAG: s_load_dwordx16
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GCN: v_mfma_f32_32x32x2bf16 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX908-COUNT-32: v_accvgpr_read_b32
|
||||
; GFX908: global_store_dwordx4
|
||||
|
@ -65,7 +66,8 @@ bb:
|
|||
; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
|
||||
; GCN: s_load_dwordx16
|
||||
; GCN-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GCN: v_mfma_f32_16x16x2bf16 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX908-COUNT-16: v_accvgpr_read_b32
|
||||
; GFX908: global_store_dwordx4
|
||||
|
@ -85,7 +87,8 @@ bb:
|
|||
; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
|
||||
; GCN: s_load_dwordx4
|
||||
; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GCN: v_mfma_f32_4x4x2bf16 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX908-COUNT-4: v_accvgpr_read_b32
|
||||
; GFX908: global_store_dwordx4
|
||||
|
@ -105,7 +108,8 @@ bb:
|
|||
; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
|
||||
; GCN: s_load_dwordx16
|
||||
; GCN-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GCN: v_mfma_f32_32x32x4bf16 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX908-COUNT-16: v_accvgpr_read_b32
|
||||
; GFX908: global_store_dwordx4
|
||||
|
@ -125,7 +129,8 @@ bb:
|
|||
; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
|
||||
; GCN: s_load_dwordx4
|
||||
; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GCN: v_mfma_f32_16x16x8bf16 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX908-COUNT-4: v_accvgpr_read_b32
|
||||
; GFX908: global_store_dwordx4
|
||||
|
|
|
@ -13,43 +13,9 @@ declare i32 @llvm.amdgcn.workitem.id.x()
|
|||
; GCN-LABEL: {{^}}test_mfma_f32_32x32x4bf16_1k:
|
||||
; GCN-DAG: s_load_dwordx16
|
||||
; GCN-DAG: s_load_dwordx16
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX940-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
|
||||
; GFX940-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
|
||||
; GFX940-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX90A-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
|
||||
; GFX90A-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
|
||||
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
|
||||
; GCN-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX90A: v_mfma_f32_32x32x4bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX940: v_mfma_f32_32x32x4_2b_bf16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GCN-NOT: v_accvgpr_read_b32
|
||||
|
@ -65,15 +31,14 @@ bb:
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_mfma_f32_16x16x4bf16_1k:
|
||||
; GCN-DAG: s_load_dwordx16
|
||||
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
|
||||
; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX940-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX90A: v_mfma_f32_16x16x4bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX940: v_mfma_f32_16x16x4_4b_bf16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GCN-NOT: v_accvgpr_read_b32
|
||||
; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
|
||||
; GCN-DAG: s_load_dwordx16
|
||||
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
|
||||
; GCN-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX90A: v_mfma_f32_16x16x4bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX940: v_mfma_f32_16x16x4_4b_bf16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GCN-NOT: v_accvgpr_read_b32
|
||||
; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
|
||||
define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(<16 x float> addrspace(1)* %arg) #0 {
|
||||
bb:
|
||||
%in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg
|
||||
|
@ -85,15 +50,14 @@ bb:
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_mfma_f32_4x4x4bf16_1k:
|
||||
; GCN-DAG: s_load_dwordx4
|
||||
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
|
||||
; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX940-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX90A: v_mfma_f32_4x4x4bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX940: v_mfma_f32_4x4x4_16b_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GCN-NOT: v_accvgpr_read_b32
|
||||
; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]],
|
||||
; GCN-DAG: s_load_dwordx4
|
||||
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
|
||||
; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX90A: v_mfma_f32_4x4x4bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX940: v_mfma_f32_4x4x4_16b_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GCN-NOT: v_accvgpr_read_b32
|
||||
; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]],
|
||||
define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(<4 x float> addrspace(1)* %arg) #0 {
|
||||
bb:
|
||||
%in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg
|
||||
|
@ -105,15 +69,14 @@ bb:
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_mfma_f32_32x32x8bf16_1k:
|
||||
; GCN-DAG: s_load_dwordx16
|
||||
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
|
||||
; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX940-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX90A: v_mfma_f32_32x32x8bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX940: v_mfma_f32_32x32x8_bf16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GCN-NOT: v_accvgpr_read_b32
|
||||
; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
|
||||
; GCN-DAG: s_load_dwordx16
|
||||
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
|
||||
; GCN-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX90A: v_mfma_f32_32x32x8bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX940: v_mfma_f32_32x32x8_bf16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GCN-NOT: v_accvgpr_read_b32
|
||||
; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
|
||||
define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(<16 x float> addrspace(1)* %arg) #0 {
|
||||
bb:
|
||||
%in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg
|
||||
|
@ -125,15 +88,14 @@ bb:
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_mfma_f32_16x16x16bf16_1k:
|
||||
; GCN-DAG: s_load_dwordx4
|
||||
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
|
||||
; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX940-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX90A: v_mfma_f32_16x16x16bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX940: v_mfma_f32_16x16x16_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GCN-NOT: v_accvgpr_read_b32
|
||||
; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]],
|
||||
; GCN-DAG: s_load_dwordx4
|
||||
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
|
||||
; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX90A: v_mfma_f32_16x16x16bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX940: v_mfma_f32_16x16x16_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GCN-NOT: v_accvgpr_read_b32
|
||||
; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]],
|
||||
define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(<4 x float> addrspace(1)* %arg) #0 {
|
||||
bb:
|
||||
%in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg
|
||||
|
@ -200,13 +162,12 @@ bb:
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_lit:
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}}
|
||||
; GFX90A-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x405ec000
|
||||
; GFX940-DAG: s_mov_b32 s{{[0-9]+}}, 0x405ec000
|
||||
; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}}
|
||||
; GFX940: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}}
|
||||
; GCN: global_store_dwordx4
|
||||
; GCN: global_store_dwordx4
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}}
|
||||
; GCN-DAG: s_mov_b32 s{{[0-9]+}}, 0x405ec000
|
||||
; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}}
|
||||
; GFX940: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}}
|
||||
; GCN: global_store_dwordx4
|
||||
; GCN: global_store_dwordx4
|
||||
define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(<4 x double> addrspace(1)* %arg, double %a, double %b) #0 {
|
||||
bb:
|
||||
%mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> <double 123.0, double 123.0, double 123.0, double 123.0>, i32 0, i32 0, i32 0)
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx908 -mattr=-mfma-inline-literal-bug -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX908 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx908 -mattr=-mfma-inline-literal-bug -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX908 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A %s
|
||||
|
||||
declare <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32, i32, <16 x i32>, i32, i32, i32)
|
||||
|
@ -9,7 +9,8 @@ declare <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32, i32, <4 x i32>, i32, i32
|
|||
; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
|
||||
; GCN: s_load_dwordx16
|
||||
; GCN-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GCN: v_mfma_i32_32x32x8i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX908-COUNT-16: v_accvgpr_read_b32
|
||||
; GFX908: global_store_dwordx4
|
||||
|
@ -27,7 +28,8 @@ bb:
|
|||
; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
|
||||
; GCN: s_load_dwordx4
|
||||
; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GCN: v_mfma_i32_16x16x16i8 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX908-COUNT-4: v_accvgpr_read_b32
|
||||
; GFX908: global_store_dwordx4
|
||||
|
|
|
@ -23,39 +23,39 @@ declare i32 @llvm.amdgcn.workitem.id.x()
|
|||
; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
|
||||
; GCN-DAG: s_load_dwordx16
|
||||
; GCN-DAG: s_load_dwordx16
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX940-COUNT-32:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A_40-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908_A: v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX940: v_mfma_f32_32x32x1_2b_f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX908-COUNT-4: v_accvgpr_read_b32
|
||||
|
@ -80,8 +80,8 @@ bb:
|
|||
; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0
|
||||
; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
|
||||
; GCN: s_load_dwordx16
|
||||
; GFX908_A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX940-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A_40-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908_A: v_mfma_f32_16x16x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX940: v_mfma_f32_16x16x1_4b_f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX908-COUNT: v_accvgpr_read_b32
|
||||
|
@ -100,8 +100,8 @@ bb:
|
|||
; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0
|
||||
; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
|
||||
; GCN: s_load_dwordx4
|
||||
; GFX908_A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX940-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A_40-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908_A: v_mfma_f32_4x4x1f32 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX940: v_mfma_f32_4x4x1_16b_f32 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX908-COUNT-4: v_accvgpr_read_b32
|
||||
|
@ -120,8 +120,8 @@ bb:
|
|||
; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0
|
||||
; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
|
||||
; GCN: s_load_dwordx16
|
||||
; GFX908_A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX940-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A_40-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908_A: v_mfma_f32_32x32x2f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX940: v_mfma_f32_32x32x2_f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX908-COUNT-16: v_accvgpr_read_b32
|
||||
|
@ -140,8 +140,8 @@ bb:
|
|||
; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0
|
||||
; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
|
||||
; GCN: s_load_dwordx4
|
||||
; GFX908_A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX940-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A_40-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908_A: v_mfma_f32_16x16x4f32 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX940: v_mfma_f32_16x16x4_f32 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX908-COUNT-4: v_accvgpr_read_b32
|
||||
|
@ -159,8 +159,8 @@ bb:
|
|||
; GCN-LABEL: {{^}}test_mfma_f32_32x32x4f16:
|
||||
; GCN-DAG: s_load_dwordx16
|
||||
; GCN-DAG: s_load_dwordx16
|
||||
; GFX908_A-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX940-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A_40-COUNT-32:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908_A: v_mfma_f32_32x32x4f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX940: v_mfma_f32_32x32x4_2b_f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX908-COUNT-32: v_accvgpr_read_b32
|
||||
|
@ -180,8 +180,8 @@ bb:
|
|||
|
||||
; GCN-LABEL: {{^}}test_mfma_f32_16x16x4f16:
|
||||
; GCN: s_load_dwordx16
|
||||
; GFX908_A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX940-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A_40-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908_A: v_mfma_f32_16x16x4f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX940: v_mfma_f32_16x16x4_4b_f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX908-COUNT-16: v_accvgpr_read_b32
|
||||
|
@ -202,8 +202,8 @@ bb:
|
|||
; GCN-LABEL: {{^}}test_mfma_f32_4x4x4f16:
|
||||
; GCN: s_load_dwordx4
|
||||
; GCN: s_load_dwordx4
|
||||
; GFX908_A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX940-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A_40-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908_A: v_mfma_f32_4x4x4f16 [[RES:a\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX940: v_mfma_f32_4x4x4_16b_f16 [[RES:a\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX908-COUNT-4: v_accvgpr_read_b32
|
||||
|
@ -225,8 +225,8 @@ bb:
|
|||
; GCN: s_load_dwordx16
|
||||
; GCN: s_waitcnt lgkmcnt(0)
|
||||
; GFX908_A: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908_A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX940-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A_40-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908_A: v_mfma_f32_32x32x8f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX940: v_mfma_f32_32x32x8_f16 a[{{[0-9]+:[0-9]+}}], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX908-COUNT-16: v_accvgpr_read_b32
|
||||
|
@ -247,8 +247,8 @@ bb:
|
|||
; GCN-LABEL: {{^}}test_mfma_f32_16x16x16f16:
|
||||
; GCN: s_load_dwordx4
|
||||
; GCN: s_load_dwordx4
|
||||
; GFX908_A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX940-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A_40-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908_A: v_mfma_f32_16x16x16f16 [[RES:a\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX940: v_mfma_f32_16x16x16_f16 [[RES:a\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX908-COUNT-4: v_accvgpr_read_b32
|
||||
|
@ -271,39 +271,39 @@ bb:
|
|||
; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
|
||||
; GCN-DAG: s_load_dwordx16
|
||||
; GCN-DAG: s_load_dwordx16
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908_A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX940-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A_40-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908_A: v_mfma_i32_32x32x4i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX940: v_mfma_i32_32x32x4_2b_i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX908-COUNT-32: v_accvgpr_read_b32
|
||||
|
@ -322,8 +322,8 @@ bb:
|
|||
; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
|
||||
; GCN: s_load_dwordx16
|
||||
; GFX908_A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX940-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A_40-COUNT-16:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908_A: v_mfma_i32_16x16x4i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX940: v_mfma_i32_16x16x4_4b_i8 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX908-COUNT-16: v_accvgpr_read_b32
|
||||
|
@ -342,8 +342,8 @@ bb:
|
|||
; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
|
||||
; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
|
||||
; GCN: s_load_dwordx4
|
||||
; GFX908_A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX940-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
|
||||
; GFX90A_40-COUNT-4:v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX908_A: v_mfma_i32_4x4x4i8 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX940: v_mfma_i32_4x4x4_16b_i8 [[RES:a\[[0-9]+:[0-9]+\]]], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
|
||||
; GFX908-COUNT-4: v_accvgpr_read_b32
|
||||
|
@ -594,8 +594,8 @@ bb:
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_mfma_f32_4x4x1f32_lit_splat:
|
||||
; GFX908_A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x42f60000
|
||||
; GFX940: s_mov_b32 [[TMP:s[0-9]+]], 0x42f60000
|
||||
; GFX908: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x42f60000
|
||||
; GFX90A_40: s_mov_b32 [[TMP:s[0-9]+]], 0x42f60000
|
||||
; GCN: v_accvgpr_write_b32 [[TTMPA:a[0-9]+]], [[TMP]]
|
||||
; GFX908: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX908: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
|
@ -620,8 +620,8 @@ bb:
|
|||
}
|
||||
|
||||
; GCN-LABEL: {{^}}test_mfma_f32_4x4x1f32_lit_splat_bad_code:
|
||||
; GFX908_A: v_mov_b32_e32 [[TMP0:v[0-9]+]], 0x42f60000
|
||||
; GFX940: s_mov_b32 [[TMP0:s[0-9]+]], 0x42f60000
|
||||
; GFX908: v_mov_b32_e32 [[TMP0:v[0-9]+]], 0x42f60000
|
||||
; GFX90A_40:s_mov_b32 [[TMP0:s[0-9]+]], 0x42f60000
|
||||
; GCN: v_accvgpr_write_b32 [[AGPR:a[0-9]+]], [[TMP0]]
|
||||
; GFX90A_40-COUNT-3: v_accvgpr_mov_b32 a{{[0-9]+}}, [[AGPR]]
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP0]]
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX908,GFX908_A %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX90A,GFX908_A %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX940 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX90A,GFX908_A,GFX940_A %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX940,GFX940_A %s
|
||||
|
||||
; GCN-LABEL: {{^}}test_mfma_loop_zeroinit:
|
||||
|
||||
|
@ -47,8 +47,8 @@ exit:
|
|||
; 3 vgprs are needed to avoid wait states between writes.
|
||||
; Check that we do not use 32 temp sgprs as well.
|
||||
|
||||
; GFX908_A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x42f60000
|
||||
; GFX940: s_mov_b32 [[TMP:s[0-9]+]], 0x42f60000
|
||||
; GFX908: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x42f60000
|
||||
; GFX940_A: s_mov_b32 [[TMP:s[0-9]+]], 0x42f60000
|
||||
; GFX908-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_accvgpr_write_b32 [[LEAD:a[0-9]+]], [[TMP]]
|
||||
; GFX90A-COUNT-31: v_accvgpr_mov_b32 a{{[0-9]+}}, [[LEAD]]
|
||||
|
@ -188,73 +188,8 @@ exit:
|
|||
; GFX908: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP2]]
|
||||
; GFX908: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP3]]
|
||||
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_mov_b32_e32 [[TMP:v[0-9]+]], 0x4{{[0-9a-f]+}}
|
||||
; GFX90A: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
|
||||
; GFX940-COUNT-32: s_mov_b32 s{{[0-9]+}}, 0x4{{[0-9a-f]+}}
|
||||
; GFX940-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
; GFX940_A-COUNT-32: s_mov_b32 s{{[0-9]+}}, 0x4{{[0-9a-f]+}}
|
||||
; GFX940_A-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
|
||||
|
||||
; GCN: [[LOOP:.LBB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
|
@ -355,10 +290,9 @@ exit:
|
|||
|
||||
; GCN-LABEL: {{^}}test_mfma_loop_sgpr_init:
|
||||
|
||||
; GFX908_A: v_mov_b32_e32 [[TMP:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX908: v_mov_b32_e32 [[TMP:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX908-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX90A: v_accvgpr_write_b32 [[LEAD:a[0-9]+]], [[TMP]]
|
||||
; GFX940: v_accvgpr_write_b32 [[LEAD:a[0-9]+]], s{{[0-9]+}}
|
||||
; GFX940_A: v_accvgpr_write_b32 [[LEAD:a[0-9]+]], s{{[0-9]+}}
|
||||
; GFX90A-COUNT-31: v_accvgpr_mov_b32 a{{[0-9]+}}, [[LEAD]]
|
||||
|
||||
; GCN: [[LOOP:.LBB[0-9_]+]]:
|
||||
|
@ -426,8 +360,8 @@ exit:
|
|||
; GCN-LABEL: {{^}}test_mfma_loop_mixed_init:
|
||||
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v0
|
||||
; GFX908_A-DAG: v_mov_b32_e32 [[TMP:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX940-DAG: s_load_dword [[TMP:s[0-9]+]],
|
||||
; GFX908-DAG: v_mov_b32_e32 [[TMP:v[0-9]+]], s{{[0-9]+}}
|
||||
; GFX940_A-DAG: s_load_dword [[TMP:s[0-9]+]],
|
||||
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, [[TMP]]
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}}
|
||||
; GFX908-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}}
|
||||
|
|
|
@ -27,6 +27,9 @@ v_accvgpr_write a2, v255
|
|||
v_accvgpr_mov_b32 a1, a2
|
||||
// GFX90A: v_accvgpr_mov_b32 a1, a2 ; encoding: [0x02,0xa5,0x02,0x7e]
|
||||
|
||||
v_accvgpr_write_b32 a10, s20
|
||||
// GFX940: v_accvgpr_write_b32 a10, s20 ; encoding: [0x0a,0x40,0xd9,0xd3,0x14,0x00,0x00,0x18]
|
||||
|
||||
v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[34:65]
|
||||
// GFX90A: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[34:65] ; encoding: [0x00,0x80,0xc0,0xd3,0x00,0x03,0x8a,0x04]
|
||||
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
|
||||
v_accvgpr_write_b32 a10, s20
|
||||
// GFX940: v_accvgpr_write_b32 a10, s20 ; encoding: [0x0a,0x40,0xd9,0xd3,0x14,0x00,0x00,0x18]
|
||||
// GFX90A: error: source operand must be either a VGPR or an inline constant
|
||||
|
||||
v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3]
|
||||
// GFX940: v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] ; encoding: [0x00,0x80,0xef,0xd3,0x00,0x05,0x0a,0x14]
|
||||
|
|
|
@ -21,6 +21,9 @@
|
|||
# GFX90A: v_accvgpr_mov_b32 a1, a2 ; encoding: [0x02,0xa5,0x02,0x7e]
|
||||
0x02,0xa5,0x02,0x7e
|
||||
|
||||
# GFX940: v_accvgpr_write_b32 a10, s20 ; encoding: [0x0a,0x40,0xd9,0xd3,0x14,0x00,0x00,0x18]
|
||||
0x0a,0x40,0xd9,0xd3,0x14,0x00,0x00,0x18
|
||||
|
||||
# GFX90A: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[2:33] ; encoding: [0x00,0x80,0xc0,0xd3,0x00,0x03,0x0a,0x04]
|
||||
0x00,0x80,0xc0,0xd3,0x00,0x03,0x0a,0x04
|
||||
|
||||
|
|
Loading…
Reference in New Issue