[AMDGPU] Minimize number of s_mov generated by copyPhysReg

Generate the minimal set of s_mov instructions required when
expanding a SGPR copy operation in copyPhysReg.

Reviewed By: foad

Differential Revision: https://reviews.llvm.org/D89187
This commit is contained in:
Carl Ritson 2020-10-15 18:40:46 +09:00
parent 145e44bb18
commit b70cb50204
2 changed files with 86 additions and 55 deletions

View File

@ -637,6 +637,54 @@ static void indirectCopyToAGPR(const SIInstrInfo &TII,
DefBuilder.addReg(ImpDefSuperReg, RegState::Define | RegState::Implicit);
}
static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, const DebugLoc &DL,
MCRegister DestReg, MCRegister SrcReg, bool KillSrc,
const TargetRegisterClass *RC, bool Forward) {
const SIRegisterInfo &RI = TII.getRegisterInfo();
ArrayRef<int16_t> BaseIndices = RI.getRegSplitParts(RC, 4);
MachineBasicBlock::iterator I = MI;
MachineInstr *FirstMI = nullptr, *LastMI = nullptr;
for (unsigned Idx = 0; Idx < BaseIndices.size(); ++Idx) {
int16_t SubIdx = BaseIndices[Idx];
Register Reg = RI.getSubReg(DestReg, SubIdx);
unsigned Opcode = AMDGPU::S_MOV_B32;
// Is SGPR aligned? If so try to combine with next.
Register Src = RI.getSubReg(SrcReg, SubIdx);
bool AlignedDest = ((Reg - AMDGPU::SGPR0) % 2) == 0;
bool AlignedSrc = ((Src - AMDGPU::SGPR0) % 2) == 0;
if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.size())) {
// Can use SGPR64 copy
unsigned Channel = RI.getChannelFromSubReg(SubIdx);
SubIdx = RI.getSubRegFromChannel(Channel, 2);
Opcode = AMDGPU::S_MOV_B64;
Idx++;
}
LastMI = BuildMI(MBB, I, DL, TII.get(Opcode), RI.getSubReg(DestReg, SubIdx))
.addReg(RI.getSubReg(SrcReg, SubIdx))
.addReg(SrcReg, RegState::Implicit);
if (!FirstMI)
FirstMI = LastMI;
if (!Forward)
I--;
}
assert(FirstMI && LastMI);
if (!Forward)
std::swap(FirstMI, LastMI);
FirstMI->addOperand(
MachineOperand::CreateReg(DestReg, true /*IsDef*/, true /*IsImp*/));
if (KillSrc)
LastMI->addRegisterKilled(SrcReg, &RI);
}
void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const DebugLoc &DL, MCRegister DestReg,
@ -842,23 +890,18 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
unsigned EltSize = 4;
unsigned Opcode = AMDGPU::V_MOV_B32_e32;
const bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
if (RI.isSGPRClass(RC)) {
// TODO: Copy vec3/vec5 with s_mov_b64s then final s_mov_b32.
if (!(RI.getRegSizeInBits(*RC) % 64)) {
Opcode = AMDGPU::S_MOV_B64;
EltSize = 8;
} else {
Opcode = AMDGPU::S_MOV_B32;
EltSize = 4;
}
if (!RI.isSGPRClass(RI.getPhysRegClass(SrcReg))) {
reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
return;
}
} else if (RI.hasAGPRs(RC)) {
expandSGPRCopy(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RC, Forward);
return;
}
unsigned Opcode = AMDGPU::V_MOV_B32_e32;
if (RI.hasAGPRs(RC)) {
Opcode = RI.hasVGPRs(RI.getPhysRegClass(SrcReg)) ?
AMDGPU::V_ACCVGPR_WRITE_B32 : AMDGPU::INSTRUCTION_LIST_END;
} else if (RI.hasVGPRs(RC) && RI.hasAGPRs(RI.getPhysRegClass(SrcReg))) {
@ -866,8 +909,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
// For the cases where we need an intermediate instruction/temporary register
// (the result is an SGPR, and the source is either an SGPR or AGPR), we need
// a scavenger.
// (destination is an AGPR), we need a scavenger.
//
// FIXME: The pass should maintain this for us so we don't have to re-scan the
// whole block for every handled copy.
@ -875,8 +917,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (Opcode == AMDGPU::INSTRUCTION_LIST_END)
RS.reset(new RegScavenger());
ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize);
bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, 4);
for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
unsigned SubIdx;
@ -885,7 +926,6 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else
SubIdx = SubIndices[SubIndices.size() - Idx - 1];
bool UseKill = KillSrc && Idx == SubIndices.size() - 1;
if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {

View File

@ -47,9 +47,8 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2
; GFX9-LABEL: name: sgpr96_aligned_src_dst
; GFX9: $sgpr8 = S_MOV_B32 $sgpr2, implicit-def $sgpr6_sgpr7_sgpr8, implicit $sgpr0_sgpr1_sgpr2
; GFX9: $sgpr7 = S_MOV_B32 $sgpr1, implicit $sgpr0_sgpr1_sgpr2
; GFX9: $sgpr6 = S_MOV_B32 $sgpr0, implicit $sgpr0_sgpr1_sgpr2
; GFX9: $sgpr8 = S_MOV_B32 $sgpr2, implicit $sgpr0_sgpr1_sgpr2, implicit-def $sgpr6_sgpr7_sgpr8
; GFX9: $sgpr6_sgpr7 = S_MOV_B64 $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2
$sgpr6_sgpr7_sgpr8 = COPY $sgpr0_sgpr1_sgpr2
...
@ -59,7 +58,7 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2
; GFX9-LABEL: name: sgpr96_aligned_src
; GFX9: $sgpr5 = S_MOV_B32 $sgpr2, implicit-def $sgpr3_sgpr4_sgpr5, implicit $sgpr0_sgpr1_sgpr2
; GFX9: $sgpr5 = S_MOV_B32 $sgpr2, implicit $sgpr0_sgpr1_sgpr2, implicit-def $sgpr3_sgpr4_sgpr5
; GFX9: $sgpr4 = S_MOV_B32 $sgpr1, implicit $sgpr0_sgpr1_sgpr2
; GFX9: $sgpr3 = S_MOV_B32 $sgpr0, implicit $sgpr0_sgpr1_sgpr2
$sgpr3_sgpr4_sgpr5 = COPY $sgpr0_sgpr1_sgpr2
@ -71,7 +70,7 @@ body: |
bb.0:
liveins: $sgpr3_sgpr4_sgpr5
; GFX9-LABEL: name: sgpr96_aligned_dst
; GFX9: $sgpr0 = S_MOV_B32 $sgpr3, implicit-def $sgpr0_sgpr1_sgpr2, implicit $sgpr3_sgpr4_sgpr5
; GFX9: $sgpr0 = S_MOV_B32 $sgpr3, implicit $sgpr3_sgpr4_sgpr5, implicit-def $sgpr0_sgpr1_sgpr2
; GFX9: $sgpr1 = S_MOV_B32 $sgpr4, implicit $sgpr3_sgpr4_sgpr5
; GFX9: $sgpr2 = S_MOV_B32 $sgpr5, implicit $sgpr3_sgpr4_sgpr5
$sgpr0_sgpr1_sgpr2 = COPY $sgpr3_sgpr4_sgpr5
@ -83,8 +82,7 @@ body: |
bb.0:
liveins: $sgpr3_sgpr4_sgpr5
; GFX9-LABEL: name: sgpr96_unaligned_src_dst
; GFX9: $sgpr11 = S_MOV_B32 $sgpr5, implicit-def $sgpr9_sgpr10_sgpr11, implicit $sgpr3_sgpr4_sgpr5
; GFX9: $sgpr10 = S_MOV_B32 $sgpr4, implicit $sgpr3_sgpr4_sgpr5
; GFX9: $sgpr10_sgpr11 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr3_sgpr4_sgpr5, implicit-def $sgpr9_sgpr10_sgpr11
; GFX9: $sgpr9 = S_MOV_B32 $sgpr3, implicit $sgpr3_sgpr4_sgpr5
$sgpr9_sgpr10_sgpr11 = COPY $sgpr3_sgpr4_sgpr5
...
@ -95,8 +93,7 @@ body: |
bb.0:
liveins: $sgpr3_sgpr4_sgpr5
; GFX9-LABEL: name: sgpr96_killed
; GFX9: $sgpr11 = S_MOV_B32 $sgpr5, implicit-def $sgpr9_sgpr10_sgpr11, implicit $sgpr3_sgpr4_sgpr5
; GFX9: $sgpr10 = S_MOV_B32 $sgpr4, implicit $sgpr3_sgpr4_sgpr5
; GFX9: $sgpr10_sgpr11 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr3_sgpr4_sgpr5, implicit-def $sgpr9_sgpr10_sgpr11
; GFX9: $sgpr9 = S_MOV_B32 $sgpr3, implicit killed $sgpr3_sgpr4_sgpr5
$sgpr9_sgpr10_sgpr11 = COPY killed $sgpr3_sgpr4_sgpr5
...
@ -107,7 +104,7 @@ body: |
bb.0:
liveins: $sgpr4_sgpr5_sgpr6_sgpr7
; GFX9-LABEL: name: sgpr128_forward
; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr4_sgpr5, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5_sgpr6_sgpr7
; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr4_sgpr5_sgpr6_sgpr7, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; GFX9: $sgpr2_sgpr3 = S_MOV_B64 $sgpr6_sgpr7, implicit $sgpr4_sgpr5_sgpr6_sgpr7
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY $sgpr4_sgpr5_sgpr6_sgpr7
...
@ -118,7 +115,7 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; GFX9-LABEL: name: sgpr128_backward
; GFX9: $sgpr6_sgpr7 = S_MOV_B64 $sgpr2_sgpr3, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GFX9: $sgpr6_sgpr7 = S_MOV_B64 $sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
; GFX9: $sgpr4_sgpr5 = S_MOV_B64 $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
$sgpr4_sgpr5_sgpr6_sgpr7 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
...
@ -129,7 +126,7 @@ body: |
bb.0:
liveins: $sgpr4_sgpr5_sgpr6_sgpr7
; GFX9-LABEL: name: sgpr128_killed
; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr4_sgpr5, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5_sgpr6_sgpr7
; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr4_sgpr5_sgpr6_sgpr7, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; GFX9: $sgpr2_sgpr3 = S_MOV_B64 $sgpr6_sgpr7, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7
$sgpr0_sgpr1_sgpr2_sgpr3 = COPY killed $sgpr4_sgpr5_sgpr6_sgpr7
...
@ -140,10 +137,8 @@ body: |
bb.0:
liveins: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; GFX9-LABEL: name: sgpr160_forward
; GFX9: $sgpr0 = S_MOV_B32 $sgpr8, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; GFX9: $sgpr1 = S_MOV_B32 $sgpr9, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; GFX9: $sgpr2 = S_MOV_B32 $sgpr10, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; GFX9: $sgpr3 = S_MOV_B32 $sgpr11, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr8_sgpr9, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
; GFX9: $sgpr2_sgpr3 = S_MOV_B64 $sgpr10_sgpr11, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; GFX9: $sgpr4 = S_MOV_B32 $sgpr12, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
$sgpr0_sgpr1_sgpr2_sgpr3_sgpr4 = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
...
@ -154,11 +149,9 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
; GFX9-LABEL: name: sgpr160_backward
; GFX9: $sgpr12 = S_MOV_B32 $sgpr4, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
; GFX9: $sgpr11 = S_MOV_B32 $sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
; GFX9: $sgpr10 = S_MOV_B32 $sgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
; GFX9: $sgpr9 = S_MOV_B32 $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
; GFX9: $sgpr8 = S_MOV_B32 $sgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
; GFX9: $sgpr12 = S_MOV_B32 $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; GFX9: $sgpr10_sgpr11 = S_MOV_B64 $sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
; GFX9: $sgpr8_sgpr9 = S_MOV_B64 $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
$sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
...
@ -168,11 +161,9 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
; GFX9-LABEL: name: sgpr160_killed
; GFX9: $sgpr12 = S_MOV_B32 $sgpr4, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
; GFX9: $sgpr11 = S_MOV_B32 $sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
; GFX9: $sgpr10 = S_MOV_B32 $sgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
; GFX9: $sgpr9 = S_MOV_B32 $sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
; GFX9: $sgpr8 = S_MOV_B32 $sgpr0, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
; GFX9: $sgpr12 = S_MOV_B32 $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; GFX9: $sgpr10_sgpr11 = S_MOV_B64 $sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
; GFX9: $sgpr8_sgpr9 = S_MOV_B64 $sgpr0_sgpr1, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
$sgpr8_sgpr9_sgpr10_sgpr11_sgpr12 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4
...
@ -183,7 +174,7 @@ body: |
bb.0:
liveins: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX9-LABEL: name: sgpr192_forward
; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr8_sgpr9, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr8_sgpr9, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
; GFX9: $sgpr2_sgpr3 = S_MOV_B64 $sgpr10_sgpr11, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX9: $sgpr4_sgpr5 = S_MOV_B64 $sgpr12_sgpr13, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
$sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5 = COPY $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
@ -195,7 +186,7 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
; GFX9-LABEL: name: sgpr192_backward
; GFX9: $sgpr12_sgpr13 = S_MOV_B64 $sgpr4_sgpr5, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
; GFX9: $sgpr12_sgpr13 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX9: $sgpr10_sgpr11 = S_MOV_B64 $sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
; GFX9: $sgpr8_sgpr9 = S_MOV_B64 $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
$sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
@ -207,7 +198,7 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
; GFX9-LABEL: name: sgpr192_killed
; GFX9: $sgpr12_sgpr13 = S_MOV_B64 $sgpr4_sgpr5, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
; GFX9: $sgpr12_sgpr13 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; GFX9: $sgpr10_sgpr11 = S_MOV_B64 $sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
; GFX9: $sgpr8_sgpr9 = S_MOV_B64 $sgpr0_sgpr1, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
$sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
@ -219,7 +210,7 @@ body: |
bb.0:
liveins: $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX9-LABEL: name: sgpr256_forward
; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr8_sgpr9, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr8_sgpr9, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX9: $sgpr2_sgpr3 = S_MOV_B64 $sgpr10_sgpr11, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $sgpr4_sgpr5 = S_MOV_B64 $sgpr12_sgpr13, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $sgpr6_sgpr7 = S_MOV_B64 $sgpr14_sgpr15, implicit $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
@ -232,7 +223,7 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX9-LABEL: name: sgpr256_backward
; GFX9: $sgpr14_sgpr15 = S_MOV_B64 $sgpr6_sgpr7, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX9: $sgpr14_sgpr15 = S_MOV_B64 $sgpr6_sgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $sgpr12_sgpr13 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX9: $sgpr10_sgpr11 = S_MOV_B64 $sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX9: $sgpr8_sgpr9 = S_MOV_B64 $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
@ -245,7 +236,7 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX9-LABEL: name: sgpr256_killed
; GFX9: $sgpr14_sgpr15 = S_MOV_B64 $sgpr6_sgpr7, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX9: $sgpr14_sgpr15 = S_MOV_B64 $sgpr6_sgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $sgpr12_sgpr13 = S_MOV_B64 $sgpr4_sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX9: $sgpr10_sgpr11 = S_MOV_B64 $sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX9: $sgpr8_sgpr9 = S_MOV_B64 $sgpr0_sgpr1, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
@ -258,7 +249,7 @@ body: |
bb.0:
liveins: $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
; GFX9-LABEL: name: sgpr512_forward
; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr16_sgpr17, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr16_sgpr17, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $sgpr2_sgpr3 = S_MOV_B64 $sgpr18_sgpr19, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
; GFX9: $sgpr4_sgpr5 = S_MOV_B64 $sgpr20_sgpr21, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
; GFX9: $sgpr6_sgpr7 = S_MOV_B64 $sgpr22_sgpr23, implicit $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
@ -275,7 +266,7 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX9-LABEL: name: sgpr512_backward
; GFX9: $sgpr30_sgpr31 = S_MOV_B64 $sgpr14_sgpr15, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $sgpr30_sgpr31 = S_MOV_B64 $sgpr14_sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
; GFX9: $sgpr28_sgpr29 = S_MOV_B64 $sgpr12_sgpr13, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $sgpr26_sgpr27 = S_MOV_B64 $sgpr10_sgpr11, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $sgpr24_sgpr25 = S_MOV_B64 $sgpr8_sgpr9, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
@ -292,7 +283,7 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX9-LABEL: name: sgpr512_killed
; GFX9: $sgpr30_sgpr31 = S_MOV_B64 $sgpr14_sgpr15, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $sgpr30_sgpr31 = S_MOV_B64 $sgpr14_sgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit-def $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
; GFX9: $sgpr28_sgpr29 = S_MOV_B64 $sgpr12_sgpr13, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $sgpr26_sgpr27 = S_MOV_B64 $sgpr10_sgpr11, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; GFX9: $sgpr24_sgpr25 = S_MOV_B64 $sgpr8_sgpr9, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
@ -309,7 +300,7 @@ body: |
bb.0:
liveins: $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
; GFX9-LABEL: name: sgpr1024_forward
; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr32_sgpr33, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
; GFX9: $sgpr0_sgpr1 = S_MOV_B64 $sgpr32_sgpr33, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
; GFX9: $sgpr2_sgpr3 = S_MOV_B64 $sgpr34_sgpr35, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
; GFX9: $sgpr4_sgpr5 = S_MOV_B64 $sgpr36_sgpr37, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
; GFX9: $sgpr6_sgpr7 = S_MOV_B64 $sgpr38_sgpr39, implicit $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
@ -334,7 +325,7 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
; GFX9-LABEL: name: sgpr1024_backward
; GFX9: $sgpr62_sgpr63 = S_MOV_B64 $sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
; GFX9: $sgpr62_sgpr63 = S_MOV_B64 $sgpr30_sgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
; GFX9: $sgpr60_sgpr61 = S_MOV_B64 $sgpr28_sgpr29, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
; GFX9: $sgpr58_sgpr59 = S_MOV_B64 $sgpr26_sgpr27, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
; GFX9: $sgpr56_sgpr57 = S_MOV_B64 $sgpr24_sgpr25, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
@ -359,7 +350,7 @@ body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
; GFX9-LABEL: name: sgpr1024_killed
; GFX9: $sgpr62_sgpr63 = S_MOV_B64 $sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
; GFX9: $sgpr62_sgpr63 = S_MOV_B64 $sgpr30_sgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63
; GFX9: $sgpr60_sgpr61 = S_MOV_B64 $sgpr28_sgpr29, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
; GFX9: $sgpr58_sgpr59 = S_MOV_B64 $sgpr26_sgpr27, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
; GFX9: $sgpr56_sgpr57 = S_MOV_B64 $sgpr24_sgpr25, implicit $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31