[AMDGPU] Use multi-dword flat scratch for spilling

Differential Revision: https://reviews.llvm.org/D93067
This commit is contained in:
Stanislav Mekhanoshin 2020-11-25 15:27:49 -08:00
parent 19d57b5c42
commit cf5845d6c4
6 changed files with 628 additions and 292 deletions

View File

@ -745,6 +745,41 @@ static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST,
return true;
}
static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII,
unsigned LoadStoreOp,
unsigned EltSize) {
bool IsStore = TII->get(LoadStoreOp).mayStore();
bool UseST =
AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0 &&
AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::saddr) < 0;
switch (EltSize) {
case 4:
LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
: AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
break;
case 8:
LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
: AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
break;
case 12:
LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
: AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
break;
case 16:
LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
: AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
break;
default:
llvm_unreachable("Unexpected spill load/store size!");
}
if (UseST)
LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
return LoadStoreOp;
}
void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp,
int Index,
@ -768,18 +803,31 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
bool Scavenged = false;
MCRegister SOffset = ScratchOffsetReg;
const unsigned EltSize = 4;
const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / (EltSize * CHAR_BIT);
const bool IsAGPR = hasAGPRs(RC);
const unsigned RegWidth = AMDGPU::getRegBitWidth(RC->getID()) / 8;
// Always use 4 byte operations for AGPRs because we need to scavenge
// a temporary VGPR.
unsigned EltSize = (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u) : 4u;
unsigned NumSubRegs = RegWidth / EltSize;
unsigned Size = NumSubRegs * EltSize;
unsigned RemSize = RegWidth - Size;
unsigned NumRemSubRegs = RemSize ? 1 : 0;
int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
int64_t MaxOffset = Offset + Size - EltSize;
int64_t MaxOffset = Offset + Size + RemSize - EltSize;
int64_t ScratchOffsetRegDelta = 0;
if (IsFlat && EltSize > 4) {
LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
Desc = &TII->get(LoadStoreOp);
}
Align Alignment = MFI.getObjectAlign(Index);
const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
assert((Offset % EltSize) == 0 && "unexpected VGPR spill offset");
assert((IsFlat || ((Offset % EltSize) == 0)) &&
"unexpected VGPR spill offset");
bool IsOffsetLegal = IsFlat
? TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS, true)
@ -840,12 +888,19 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
Register TmpReg;
// FIXME: Flat scratch does not have to be limited to a dword per store.
for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
Register SubReg =
NumSubRegs == 1
for (unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
++i, RegOffset += EltSize) {
if (i == NumSubRegs) {
EltSize = RemSize;
LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
}
Desc = &TII->get(LoadStoreOp);
unsigned NumRegs = EltSize / 4;
Register SubReg = e == 1
? ValueReg
: Register(getSubReg(ValueReg, getSubRegFromChannel(i)));
: Register(getSubReg(ValueReg,
getSubRegFromChannel(RegOffset / 4, NumRegs)));
unsigned SOffsetRegState = 0;
unsigned SrcDstRegState = getDefRegState(!IsStore);
@ -857,75 +912,110 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
// Make sure the whole register is defined if there are undef components by
// adding an implicit def of the super-reg on the first instruction.
const bool NeedSuperRegDef = NumSubRegs > 1 && IsStore && i == 0;
bool NeedSuperRegDef = e > 1 && IsStore && i == 0;
bool NeedSuperRegImpOperand = e > 1;
auto MIB = spillVGPRtoAGPR(ST, MI, Index, i, SubReg, IsKill);
if (!MIB.getInstr()) {
unsigned FinalReg = SubReg;
const bool IsAGPR = hasAGPRs(RC);
if (IsAGPR) {
if (!TmpReg) {
assert(RS && "Needs to have RegScavenger to spill an AGPR!");
// FIXME: change to scavengeRegisterBackwards()
TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
RS->setRegUsed(TmpReg);
}
if (IsStore) {
auto AccRead = BuildMI(*MBB, MI, DL,
TII->get(AMDGPU::V_ACCVGPR_READ_B32), TmpReg)
.addReg(SubReg, getKillRegState(IsKill));
if (NeedSuperRegDef)
AccRead.addReg(ValueReg, RegState::ImplicitDefine);
AccRead->setAsmPrinterFlag(MachineInstr::ReloadReuse);
}
SubReg = TmpReg;
}
MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
MachineMemOperand *NewMMO =
MF->getMachineMemOperand(PInfo, MMO->getFlags(), EltSize,
commonAlignment(Alignment, EltSize * i));
MIB = BuildMI(*MBB, MI, DL, *Desc)
.addReg(SubReg,
getDefRegState(!IsStore) | getKillRegState(IsKill));
if (!IsFlat)
MIB.addReg(FuncInfo->getScratchRSrcReg());
if (SOffset == AMDGPU::NoRegister) {
if (!IsFlat)
MIB.addImm(0);
} else {
MIB.addReg(SOffset, SOffsetRegState);
}
MIB.addImm(Offset)
.addImm(0) // glc
.addImm(0) // slc
.addImm(0); // tfe for MUBUF or dlc for FLAT
if (!IsFlat)
MIB.addImm(0) // dlc
.addImm(0); // swz
MIB.addMemOperand(NewMMO);
if (!IsAGPR && NeedSuperRegDef)
unsigned Lane = RegOffset / 4;
unsigned LaneE = (RegOffset + EltSize) / 4;
for ( ; Lane != LaneE; ++Lane) {
bool IsSubReg = e > 1 || EltSize > 4;
Register Sub = IsSubReg
? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane)))
: ValueReg;
auto MIB = spillVGPRtoAGPR(ST, MI, Index, Lane, Sub, IsKill);
if (!MIB.getInstr())
break;
if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == 0)) {
MIB.addReg(ValueReg, RegState::ImplicitDefine);
if (!IsStore && TmpReg != AMDGPU::NoRegister) {
MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32),
FinalReg)
.addReg(TmpReg, RegState::Kill);
MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
NeedSuperRegDef = false;
}
if (IsSubReg || NeedSuperRegImpOperand) {
NeedSuperRegImpOperand = true;
unsigned State = SrcDstRegState;
if (Lane + 1 != LaneE)
State &= ~RegState::Kill;
MIB.addReg(ValueReg, RegState::Implicit | State);
}
}
if (Lane == LaneE) // Fully spilled into AGPRs.
continue;
// Offset in bytes from the beginning of the ValueReg to its portion we
// still need to spill. It may differ from RegOffset if a portion of
// current SubReg has been already spilled into AGPRs by the loop above.
unsigned RemRegOffset = Lane * 4;
unsigned RemEltSize = EltSize - (RemRegOffset - RegOffset);
if (RemEltSize != EltSize) { // Partially spilled to AGPRs
assert(IsFlat && EltSize > 4);
unsigned NumRegs = RemEltSize / 4;
SubReg = Register(getSubReg(ValueReg,
getSubRegFromChannel(RemRegOffset / 4, NumRegs)));
unsigned Opc = getFlatScratchSpillOpcode(TII, LoadStoreOp, RemEltSize);
Desc = &TII->get(Opc);
}
unsigned FinalReg = SubReg;
if (IsAGPR) {
assert(EltSize == 4);
if (!TmpReg) {
assert(RS && "Needs to have RegScavenger to spill an AGPR!");
// FIXME: change to scavengeRegisterBackwards()
TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
RS->setRegUsed(TmpReg);
}
if (IsStore) {
auto AccRead = BuildMI(*MBB, MI, DL,
TII->get(AMDGPU::V_ACCVGPR_READ_B32), TmpReg)
.addReg(SubReg, getKillRegState(IsKill));
if (NeedSuperRegDef)
AccRead.addReg(ValueReg, RegState::ImplicitDefine);
AccRead->setAsmPrinterFlag(MachineInstr::ReloadReuse);
}
SubReg = TmpReg;
}
MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RemRegOffset);
MachineMemOperand *NewMMO =
MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize,
commonAlignment(Alignment, RemRegOffset));
auto MIB = BuildMI(*MBB, MI, DL, *Desc)
.addReg(SubReg,
getDefRegState(!IsStore) | getKillRegState(IsKill));
if (!IsFlat)
MIB.addReg(FuncInfo->getScratchRSrcReg());
if (SOffset == AMDGPU::NoRegister) {
if (!IsFlat)
MIB.addImm(0);
} else {
if (NeedSuperRegDef)
MIB.addReg(ValueReg, RegState::ImplicitDefine);
MIB.addReg(SOffset, SOffsetRegState);
}
MIB.addImm(Offset + RemRegOffset)
.addImm(0) // glc
.addImm(0) // slc
.addImm(0); // tfe for MUBUF or dlc for FLAT
if (!IsFlat)
MIB.addImm(0) // dlc
.addImm(0); // swz
MIB.addMemOperand(NewMMO);
if (!IsAGPR && NeedSuperRegDef)
MIB.addReg(ValueReg, RegState::ImplicitDefine);
if (!IsStore && TmpReg != AMDGPU::NoRegister) {
MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32),
FinalReg)
.addReg(TmpReg, RegState::Kill);
MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
}
if (NumSubRegs > 1) {
if (NeedSuperRegImpOperand)
MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
}
}
if (ScratchOffsetRegDelta != 0) {

View File

@ -4,14 +4,14 @@
; GCN-LABEL: spill_v2i32:
; MUBUF-DAG: buffer_store_dword v{{.*}} offset:16 ; 4-byte Folded Spill
; MUBUF-DAG: buffer_store_dword v{{.*}} offset:20 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:16 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:20 ; 4-byte Folded Spill
; FLATSCR: scratch_store_dwordx2 off, v{{.*}} offset:16 ; 8-byte Folded Spill
; FLATSCR-NOT: scratch_store_dword
; GCN: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:16 ; 4-byte Folded Reload
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:20 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:16 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:20 ; 4-byte Folded Reload
; FLATSCR: scratch_load_dwordx2 v{{.*}} offset:16 ; 8-byte Folded Reload
; FLATSCR-NOT: scratch_load_dword
define void @spill_v2i32() {
entry:
@ -32,14 +32,14 @@ entry:
; GCN-LABEL: spill_v2f32:
; MUBUF-DAG: buffer_store_dword v{{.*}} offset:16 ; 4-byte Folded Spill
; MUBUF-DAG: buffer_store_dword v{{.*}} offset:20 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:16 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:20 ; 4-byte Folded Spill
; FLATSCR: scratch_store_dwordx2 off, v{{.*}} offset:16 ; 8-byte Folded Spill
; FLATSCR-NOT: scratch_store_dword
; GCN: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:16 ; 4-byte Folded Reload
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:20 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:16 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:20 ; 4-byte Folded Reload
; FLATSCR: scratch_load_dwordx2 v{{.*}} offset:16 ; 8-byte Folded Reload
; FLATSCR-NOT: scratch_load_dword
define void @spill_v2f32() {
entry:
@ -61,17 +61,15 @@ entry:
; MUBUF-DAG: buffer_store_dword v{{.*}} offset:32 ; 4-byte Folded Spill
; MUBUF-DAG: buffer_store_dword v{{.*}} offset:36 ; 4-byte Folded Spill
; MUBUF-DAG: buffer_store_dword v{{.*}} offset:40 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:32 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:36 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:40 ; 4-byte Folded Spill
; FLATSCR: scratch_store_dwordx3 off, v{{.*}} offset:32 ; 12-byte Folded Spill
; FLATSCR-NOT: scratch_store_dword
; GCN: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:32 ; 4-byte Folded Reload
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:36 ; 4-byte Folded Reload
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:40 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:32 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:36 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:40 ; 4-byte Folded Reload
; FLATSCR: scratch_load_dwordx3 v{{.*}} offset:32 ; 12-byte Folded Reload
; FLATSCR-NOT: scratch_load_dword
define void @spill_v3i32() {
entry:
@ -93,17 +91,15 @@ entry:
; MUBUF-DAG: buffer_store_dword v{{.*}} offset:32 ; 4-byte Folded Spill
; MUBUF-DAG: buffer_store_dword v{{.*}} offset:36 ; 4-byte Folded Spill
; MUBUF-DAG: buffer_store_dword v{{.*}} offset:40 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:32 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:36 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:40 ; 4-byte Folded Spill
; FLATSCR: scratch_store_dwordx3 off, v{{.*}} offset:32 ; 12-byte Folded Spill
; FLATSCR-NOT: scratch_store_dword
; GCN: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:32 ; 4-byte Folded Reload
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:36 ; 4-byte Folded Reload
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:40 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:32 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:36 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:40 ; 4-byte Folded Reload
; FLATSCR: scratch_load_dwordx3 v{{.*}} offset:32 ; 12-byte Folded Reload
; FLATSCR-NOT: scratch_load_dword
define void @spill_v3f32() {
entry:
@ -126,20 +122,16 @@ entry:
; MUBUF-DAG: buffer_store_dword v{{.*}} offset:36 ; 4-byte Folded Spill
; MUBUF-DAG: buffer_store_dword v{{.*}} offset:40 ; 4-byte Folded Spill
; MUBUF-DAG: buffer_store_dword v{{.*}} offset:44 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:32 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:36 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:40 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:44 ; 4-byte Folded Spill
; FLATSCR: scratch_store_dwordx4 off, v{{.*}} offset:32 ; 16-byte Folded Spill
; FLATSCR-NOT: scratch_store_dword
; GCN: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:32 ; 4-byte Folded Reload
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:36 ; 4-byte Folded Reload
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:40 ; 4-byte Folded Reload
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:44 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:32 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:36 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:40 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:44 ; 4-byte Folded Reload
; FLATSCR: scratch_load_dwordx4 v{{.*}} offset:32 ; 16-byte Folded Reload
; FLATSCR-NOT: scratch_load_dword
define void @spill_v4i32() {
entry:
@ -162,20 +154,16 @@ entry:
; MUBUF-DAG: buffer_store_dword v{{.*}} offset:36 ; 4-byte Folded Spill
; MUBUF-DAG: buffer_store_dword v{{.*}} offset:40 ; 4-byte Folded Spill
; MUBUF-DAG: buffer_store_dword v{{.*}} offset:44 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:32 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:36 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:40 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:44 ; 4-byte Folded Spill
; FLATSCR: scratch_store_dwordx4 off, v{{.*}} offset:32 ; 16-byte Folded Spill
; FLATSCR-NOT: scratch_store_dword
; GCN: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:32 ; 4-byte Folded Reload
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:36 ; 4-byte Folded Reload
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:40 ; 4-byte Folded Reload
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:44 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:32 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:36 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:40 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:44 ; 4-byte Folded Reload
; FLATSCR: scratch_load_dwordx4 v{{.*}} offset:32 ; 16-byte Folded Reload
; FLATSCR-NOT: scratch_load_dword
define void @spill_v4f32() {
entry:
@ -198,20 +186,18 @@ entry:
; MUBUF-DAG: buffer_store_dword v{{.*}} offset:68 ; 4-byte Folded Spill
; MUBUF-DAG: buffer_store_dword v{{.*}} offset:72 ; 4-byte Folded Spill
; MUBUF-DAG: buffer_store_dword v{{.*}} offset:76 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:64 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:68 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:72 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:76 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dwordx4 off, v{{.*}} offset:64 ; 16-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:80 ; 4-byte Folded Spill
; FLATSCR-NOT: scratch_store_dword
; GCN: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:64 ; 4-byte Folded Reload
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:68 ; 4-byte Folded Reload
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:72 ; 4-byte Folded Reload
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:76 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:64 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:68 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:72 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:76 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dwordx4 v{{.*}} offset:64 ; 16-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:80 ; 4-byte Folded Reload
; FLATSCR-NOT: scratch_load_dword
define void @spill_v5i32() {
entry:
%alloca = alloca <5 x i32>, i32 2, align 4, addrspace(5)
@ -233,20 +219,18 @@ entry:
; MUBUF-DAG: buffer_store_dword v{{.*}} offset:68 ; 4-byte Folded Spill
; MUBUF-DAG: buffer_store_dword v{{.*}} offset:72 ; 4-byte Folded Spill
; MUBUF-DAG: buffer_store_dword v{{.*}} offset:76 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:64 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:68 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:72 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:76 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dwordx4 off, v{{.*}} offset:64 ; 16-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v{{.*}} offset:80 ; 4-byte Folded Spill
; FLATSCR-NOT: scratch_store_dword
; GCN: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:64 ; 4-byte Folded Reload
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:68 ; 4-byte Folded Reload
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:72 ; 4-byte Folded Reload
; MUBUF-DAG: buffer_load_dword v{{.*}} offset:76 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:64 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:68 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:72 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:76 ; 4-byte Folded Reload
; FLATSCR-DAG: scratch_load_dwordx4 v{{.*}} offset:64 ; 16-byte Folded Reload
; FLATSCR-DAG: scratch_load_dword v{{.*}} offset:80 ; 4-byte Folded Reload
; FLATSCR-NOT: scratch_load_dword
define void @spill_v5f32() {
entry:
%alloca = alloca <5 x i32>, i32 2, align 4, addrspace(5)

View File

@ -0,0 +1,379 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass=si-lower-sgpr-spills,prologepilog -o - %s | FileCheck -check-prefix=MUBUF-V2A %s
# RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -amdgpu-enable-flat-scratch -run-pass=si-lower-sgpr-spills,prologepilog -o - %s | FileCheck -check-prefix=FLATSCR-V2A %s
--- |
define void @test_spill_v2_partial_agpr() #1 {
entry:
unreachable
}
define void @test_spill_v3_partial_agpr() #1 {
entry:
unreachable
}
define void @test_spill_v4_partial_agpr() #2 {
entry:
unreachable
}
define void @test_spill_v5_partial_agpr() #2 {
entry:
unreachable
}
define void @test_spill_v6_partial_agpr() #4 {
entry:
unreachable
}
define void @test_spill_v8_partial_agpr() #3 {
entry:
unreachable
}
define void @test_spill_v16_partial_agpr() #4 {
entry:
unreachable
}
attributes #1 = { nounwind "amdgpu-num-vgpr"="1" }
attributes #2 = { nounwind "amdgpu-num-vgpr"="3" }
attributes #3 = { nounwind "amdgpu-num-vgpr"="4" }
attributes #4 = { nounwind "amdgpu-num-vgpr"="5" }
---
name: test_spill_v2_partial_agpr
tracksRegLiveness: true
stack:
- { id: 0, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4 }
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
stackPtrOffsetReg: '$sgpr32'
hasSpilledVGPRs: true
body: |
bb.0.entry:
; MUBUF-V2A-LABEL: name: test_spill_v2_partial_agpr
; MUBUF-V2A: liveins: $agpr0
; MUBUF-V2A: $vgpr0_vgpr1 = IMPLICIT_DEF
; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1
; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1
; MUBUF-V2A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load 4 from %stack.0 + 4, addrspace 5)
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v2_partial_agpr
; FLATSCR-V2A: liveins: $agpr0
; FLATSCR-V2A: $vgpr0_vgpr1 = IMPLICIT_DEF
; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1
; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1
; FLATSCR-V2A: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1 :: (load 4 from %stack.0 + 4, addrspace 5)
; FLATSCR-V2A: S_ENDPGM 0
$vgpr0_vgpr1 = IMPLICIT_DEF
SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5)
$vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
---
name: test_spill_v3_partial_agpr
tracksRegLiveness: true
stack:
- { id: 0, name: '', type: spill-slot, offset: 0, size: 12, alignment: 4 }
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
stackPtrOffsetReg: '$sgpr32'
hasSpilledVGPRs: true
body: |
bb.0.entry:
; MUBUF-V2A-LABEL: name: test_spill_v3_partial_agpr
; MUBUF-V2A: liveins: $agpr0
; MUBUF-V2A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2
; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0 + 4, addrspace 5)
; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0 + 8, addrspace 5)
; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2
; MUBUF-V2A: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0 + 4, addrspace 5)
; MUBUF-V2A: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0 + 8, addrspace 5)
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v3_partial_agpr
; FLATSCR-V2A: liveins: $agpr0
; FLATSCR-V2A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2
; FLATSCR-V2A: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr1_vgpr2, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2 :: (store 8 into %stack.0 + 4, align 4, addrspace 5)
; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2
; FLATSCR-V2A: $vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 8 from %stack.0 + 4, align 4, addrspace 5)
; FLATSCR-V2A: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
SI_SPILL_V96_SAVE killed $vgpr0_vgpr1_vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store 12 into %stack.0, align 4, addrspace 5)
$vgpr0_vgpr1_vgpr2 = SI_SPILL_V96_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 12 from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
---
name: test_spill_v4_partial_agpr
tracksRegLiveness: true
stack:
- { id: 0, name: '', type: spill-slot, offset: 0, size: 16, alignment: 4 }
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
stackPtrOffsetReg: '$sgpr32'
hasSpilledVGPRs: true
body: |
bb.0.entry:
; MUBUF-V2A-LABEL: name: test_spill_v4_partial_agpr
; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2
; MUBUF-V2A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
; MUBUF-V2A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 12, addrspace 5)
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v4_partial_agpr
; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2
; FLATSCR-V2A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
; FLATSCR-V2A: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 12, addrspace 5)
; FLATSCR-V2A: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store 16 into %stack.0, align 4, addrspace 5)
$vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 16 from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
---
name: test_spill_v5_partial_agpr
tracksRegLiveness: true
stack:
- { id: 0, name: '', type: spill-slot, offset: 0, size: 20, alignment: 4 }
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
stackPtrOffsetReg: '$sgpr32'
hasSpilledVGPRs: true
body: |
bb.0.entry:
; MUBUF-V2A-LABEL: name: test_spill_v5_partial_agpr
; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2
; MUBUF-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 12, addrspace 5)
; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 16, addrspace 5)
; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
; MUBUF-V2A: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 12, addrspace 5)
; MUBUF-V2A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 16, addrspace 5)
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v5_partial_agpr
; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2
; FLATSCR-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 12, addrspace 5)
; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 16, addrspace 5)
; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
; FLATSCR-V2A: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 12, addrspace 5)
; FLATSCR-V2A: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 16, addrspace 5)
; FLATSCR-V2A: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
SI_SPILL_V160_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, %stack.0, $sgpr32, 0, implicit $exec :: (store 20 into %stack.0, align 4, addrspace 5)
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = SI_SPILL_V160_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 20 from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
---
name: test_spill_v6_partial_agpr
tracksRegLiveness: true
stack:
- { id: 0, name: '', type: spill-slot, offset: 0, size: 24, alignment: 4 }
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
stackPtrOffsetReg: '$sgpr32'
hasSpilledVGPRs: true
body: |
bb.0.entry:
; MUBUF-V2A-LABEL: name: test_spill_v6_partial_agpr
; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4
; MUBUF-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; MUBUF-V2A: $agpr3 = V_ACCVGPR_WRITE_B32 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; MUBUF-V2A: $agpr4 = V_ACCVGPR_WRITE_B32 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 20, addrspace 5)
; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; MUBUF-V2A: $vgpr3 = V_ACCVGPR_READ_B32 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; MUBUF-V2A: $vgpr4 = V_ACCVGPR_READ_B32 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; MUBUF-V2A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 20, addrspace 5)
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v6_partial_agpr
; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4
; FLATSCR-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; FLATSCR-V2A: $agpr3 = V_ACCVGPR_WRITE_B32 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; FLATSCR-V2A: $agpr4 = V_ACCVGPR_WRITE_B32 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; FLATSCR-V2A: SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 20, addrspace 5)
; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; FLATSCR-V2A: $vgpr3 = V_ACCVGPR_READ_B32 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; FLATSCR-V2A: $vgpr4 = V_ACCVGPR_READ_B32 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; FLATSCR-V2A: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 20, addrspace 5)
; FLATSCR-V2A: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
SI_SPILL_V192_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, %stack.0, $sgpr32, 0, implicit $exec :: (store 24 into %stack.0, align 4, addrspace 5)
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
---
name: test_spill_v8_partial_agpr
tracksRegLiveness: true
stack:
- { id: 0, name: '', type: spill-slot, offset: 0, size: 32, alignment: 4 }
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
stackPtrOffsetReg: '$sgpr32'
hasSpilledVGPRs: true
body: |
bb.0.entry:
; MUBUF-V2A-LABEL: name: test_spill_v8_partial_agpr
; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3
; MUBUF-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; MUBUF-V2A: $agpr3 = V_ACCVGPR_WRITE_B32 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 16, addrspace 5)
; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 20, addrspace 5)
; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 24, addrspace 5)
; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 28, addrspace 5)
; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; MUBUF-V2A: $vgpr3 = V_ACCVGPR_READ_B32 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; MUBUF-V2A: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 16, addrspace 5)
; MUBUF-V2A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 20, addrspace 5)
; MUBUF-V2A: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 24, addrspace 5)
; MUBUF-V2A: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 28, addrspace 5)
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v8_partial_agpr
; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3
; FLATSCR-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; FLATSCR-V2A: $agpr3 = V_ACCVGPR_WRITE_B32 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; FLATSCR-V2A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; FLATSCR-V2A: $vgpr3 = V_ACCVGPR_READ_B32 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; FLATSCR-V2A: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
; FLATSCR-V2A: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
SI_SPILL_V256_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, %stack.0, $sgpr32, 0, implicit $exec :: (store 32 into %stack.0, align 4, addrspace 5)
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = SI_SPILL_V256_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 32 from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
---
name: test_spill_v16_partial_agpr
tracksRegLiveness: true
stack:
- { id: 0, name: '', type: spill-slot, offset: 0, size: 64, alignment: 4 }
machineFunctionInfo:
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
stackPtrOffsetReg: '$sgpr32'
hasSpilledVGPRs: true
body: |
bb.0.entry:
; MUBUF-V2A-LABEL: name: test_spill_v16_partial_agpr
; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4
; MUBUF-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
; MUBUF-V2A: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; MUBUF-V2A: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; MUBUF-V2A: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; MUBUF-V2A: $agpr3 = V_ACCVGPR_WRITE_B32 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; MUBUF-V2A: $agpr4 = V_ACCVGPR_WRITE_B32 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 20, addrspace 5)
; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 24, addrspace 5)
; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 28, addrspace 5)
; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 32, addrspace 5)
; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 36, addrspace 5)
; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 40, addrspace 5)
; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 44, addrspace 5)
; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 48, addrspace 5)
; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 52, addrspace 5)
; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 56, addrspace 5)
; MUBUF-V2A: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 60, addrspace 5)
; MUBUF-V2A: $vgpr0 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; MUBUF-V2A: $vgpr1 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; MUBUF-V2A: $vgpr2 = V_ACCVGPR_READ_B32 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; MUBUF-V2A: $vgpr3 = V_ACCVGPR_READ_B32 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; MUBUF-V2A: $vgpr4 = V_ACCVGPR_READ_B32 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; MUBUF-V2A: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 20, addrspace 5)
; MUBUF-V2A: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 24, addrspace 5)
; MUBUF-V2A: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 28, addrspace 5)
; MUBUF-V2A: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 32, addrspace 5)
; MUBUF-V2A: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 36, addrspace 5)
; MUBUF-V2A: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 40, addrspace 5)
; MUBUF-V2A: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 44, addrspace 5)
; MUBUF-V2A: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 48, addrspace 5)
; MUBUF-V2A: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 52, addrspace 5)
; MUBUF-V2A: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 56, addrspace 5)
; MUBUF-V2A: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 60, addrspace 5)
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v16_partial_agpr
; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4
; FLATSCR-V2A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
; FLATSCR-V2A: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; FLATSCR-V2A: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; FLATSCR-V2A: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; FLATSCR-V2A: $agpr3 = V_ACCVGPR_WRITE_B32 killed $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; FLATSCR-V2A: $agpr4 = V_ACCVGPR_WRITE_B32 killed $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; FLATSCR-V2A: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr5_vgpr6_vgpr7, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 12 into %stack.0 + 20, align 4, addrspace 5)
; FLATSCR-V2A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 32, align 4, addrspace 5)
; FLATSCR-V2A: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 48, align 4, addrspace 5)
; FLATSCR-V2A: $vgpr0 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; FLATSCR-V2A: $vgpr1 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; FLATSCR-V2A: $vgpr2 = V_ACCVGPR_READ_B32 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; FLATSCR-V2A: $vgpr3 = V_ACCVGPR_READ_B32 $agpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; FLATSCR-V2A: $vgpr4 = V_ACCVGPR_READ_B32 $agpr4, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
; FLATSCR-V2A: $vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 12 from %stack.0 + 20, align 4, addrspace 5)
; FLATSCR-V2A: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 32, align 4, addrspace 5)
; FLATSCR-V2A: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 48, align 4, addrspace 5)
; FLATSCR-V2A: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.0, $sgpr32, 0, implicit $exec :: (store 64 into %stack.0, align 4, addrspace 5)
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load 64 from %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...

View File

@ -72,10 +72,8 @@ body: |
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-LABEL: name: test_spill_v2
; FLATSCR: $vgpr0_vgpr1 = IMPLICIT_DEF
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store 4 into %stack.0, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1 :: (store 4 into %stack.0 + 4, addrspace 5)
; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1 :: (load 4 from %stack.0, addrspace 5)
; FLATSCR: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1 :: (load 4 from %stack.0 + 4, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 8 into %stack.0, align 4, addrspace 5)
; FLATSCR: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 8 from %stack.0, align 4, addrspace 5)
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v2
; FLATSCR-V2A: liveins: $agpr0, $agpr1
@ -124,12 +122,8 @@ body: |
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-LABEL: name: test_spill_v3
; FLATSCR: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0 + 4, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2 :: (store 4 into %stack.0 + 8, addrspace 5)
; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0, addrspace 5)
; FLATSCR: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0 + 4, addrspace 5)
; FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2 :: (load 4 from %stack.0 + 8, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 12 into %stack.0, align 4, addrspace 5)
; FLATSCR: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 12 from %stack.0, align 4, addrspace 5)
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v3
; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2
@ -184,14 +178,8 @@ body: |
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-LABEL: name: test_spill_v4
; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 4, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 8, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store 4 into %stack.0 + 12, addrspace 5)
; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0, addrspace 5)
; FLATSCR: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 4, addrspace 5)
; FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 8, addrspace 5)
; FLATSCR: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load 4 from %stack.0 + 12, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 16 into %stack.0, align 4, addrspace 5)
; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %stack.0, align 4, addrspace 5)
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v4
; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3
@ -252,15 +240,9 @@ body: |
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-LABEL: name: test_spill_v5
; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 4, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 8, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 12, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 16 into %stack.0, align 4, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store 4 into %stack.0 + 16, addrspace 5)
; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0, addrspace 5)
; FLATSCR: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 4, addrspace 5)
; FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 8, addrspace 5)
; FLATSCR: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 12, addrspace 5)
; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 16 from %stack.0, align 4, addrspace 5)
; FLATSCR: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load 4 from %stack.0 + 16, addrspace 5)
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v5
@ -328,18 +310,10 @@ body: |
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-LABEL: name: test_spill_v6
; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 4, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 8, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 12, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 16, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 4 into %stack.0 + 20, addrspace 5)
; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0, addrspace 5)
; FLATSCR: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 4, addrspace 5)
; FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 8, addrspace 5)
; FLATSCR: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 12, addrspace 5)
; FLATSCR: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 16, addrspace 5)
; FLATSCR: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 4 from %stack.0 + 20, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 16 into %stack.0, align 4, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store 8 into %stack.0 + 16, align 4, addrspace 5)
; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 16 from %stack.0, align 4, addrspace 5)
; FLATSCR: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load 8 from %stack.0 + 16, align 4, addrspace 5)
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v6
; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5
@ -416,22 +390,10 @@ body: |
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-LABEL: name: test_spill_v8
; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 4, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 8, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 12, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 16, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 20, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr6, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 24, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr7, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 4 into %stack.0 + 28, addrspace 5)
; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0, addrspace 5)
; FLATSCR: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 4, addrspace 5)
; FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 8, addrspace 5)
; FLATSCR: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 12, addrspace 5)
; FLATSCR: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 16, addrspace 5)
; FLATSCR: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 20, addrspace 5)
; FLATSCR: $vgpr6 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 24, addrspace 5)
; FLATSCR: $vgpr7 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 4 from %stack.0 + 28, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 16 into %stack.0, align 4, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 16 from %stack.0, align 4, addrspace 5)
; FLATSCR: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v8
; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7
@ -544,38 +506,14 @@ body: |
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-LABEL: name: test_spill_v16
; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 4, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 8, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 12, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 16, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 20, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr6, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 24, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr7, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 28, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr8, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 32, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr9, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 36, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr10, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 40, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr11, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 44, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr12, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 48, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr13, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 52, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr14, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 56, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr15, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 4 into %stack.0 + 60, addrspace 5)
; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0, addrspace 5)
; FLATSCR: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 4, addrspace 5)
; FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 8, addrspace 5)
; FLATSCR: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 12, addrspace 5)
; FLATSCR: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 16, addrspace 5)
; FLATSCR: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 20, addrspace 5)
; FLATSCR: $vgpr6 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 24, addrspace 5)
; FLATSCR: $vgpr7 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 28, addrspace 5)
; FLATSCR: $vgpr8 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 32, addrspace 5)
; FLATSCR: $vgpr9 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 36, addrspace 5)
; FLATSCR: $vgpr10 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 40, addrspace 5)
; FLATSCR: $vgpr11 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 44, addrspace 5)
; FLATSCR: $vgpr12 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 48, addrspace 5)
; FLATSCR: $vgpr13 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 52, addrspace 5)
; FLATSCR: $vgpr14 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 56, addrspace 5)
; FLATSCR: $vgpr15 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 4 from %stack.0 + 60, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0, align 4, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 32, align 4, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store 16 into %stack.0 + 48, align 4, addrspace 5)
; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0, align 4, addrspace 5)
; FLATSCR: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
; FLATSCR: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 32, align 4, addrspace 5)
; FLATSCR: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load 16 from %stack.0 + 48, align 4, addrspace 5)
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v16
; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15
@ -768,70 +706,22 @@ body: |
; MUBUF-V2A: S_ENDPGM 0
; FLATSCR-LABEL: name: test_spill_v32
; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 4, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 8, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 12, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 16, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 20, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr6, $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 24, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr7, $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 28, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr8, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 32, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr9, $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 36, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr10, $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 40, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr11, $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 44, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr12, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 48, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr13, $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 52, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr14, $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 56, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr15, $sgpr32, 60, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 60, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr16, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 64, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr17, $sgpr32, 68, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 68, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr18, $sgpr32, 72, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 72, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr19, $sgpr32, 76, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 76, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr20, $sgpr32, 80, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 80, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr21, $sgpr32, 84, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 84, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr22, $sgpr32, 88, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 88, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr23, $sgpr32, 92, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 92, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr24, $sgpr32, 96, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 96, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr25, $sgpr32, 100, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 100, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr26, $sgpr32, 104, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 104, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr27, $sgpr32, 108, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 108, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr28, $sgpr32, 112, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 112, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr29, $sgpr32, 116, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 116, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr30, $sgpr32, 120, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 120, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr31, $sgpr32, 124, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 4 into %stack.0 + 124, addrspace 5)
; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0, addrspace 5)
; FLATSCR: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 4, addrspace 5)
; FLATSCR: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 8, addrspace 5)
; FLATSCR: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 12, addrspace 5)
; FLATSCR: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 16, addrspace 5)
; FLATSCR: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 20, addrspace 5)
; FLATSCR: $vgpr6 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 24, addrspace 5)
; FLATSCR: $vgpr7 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 28, addrspace 5)
; FLATSCR: $vgpr8 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 32, addrspace 5)
; FLATSCR: $vgpr9 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 36, addrspace 5)
; FLATSCR: $vgpr10 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 40, addrspace 5)
; FLATSCR: $vgpr11 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 44, addrspace 5)
; FLATSCR: $vgpr12 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 48, addrspace 5)
; FLATSCR: $vgpr13 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 52, addrspace 5)
; FLATSCR: $vgpr14 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 56, addrspace 5)
; FLATSCR: $vgpr15 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 60, addrspace 5)
; FLATSCR: $vgpr16 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 64, addrspace 5)
; FLATSCR: $vgpr17 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 68, addrspace 5)
; FLATSCR: $vgpr18 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 72, addrspace 5)
; FLATSCR: $vgpr19 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 76, addrspace 5)
; FLATSCR: $vgpr20 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 80, addrspace 5)
; FLATSCR: $vgpr21 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 84, addrspace 5)
; FLATSCR: $vgpr22 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 88, addrspace 5)
; FLATSCR: $vgpr23 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 92, addrspace 5)
; FLATSCR: $vgpr24 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 96, addrspace 5)
; FLATSCR: $vgpr25 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 100, addrspace 5)
; FLATSCR: $vgpr26 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 104, addrspace 5)
; FLATSCR: $vgpr27 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 108, addrspace 5)
; FLATSCR: $vgpr28 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 112, addrspace 5)
; FLATSCR: $vgpr29 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 116, addrspace 5)
; FLATSCR: $vgpr30 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 120, addrspace 5)
; FLATSCR: $vgpr31 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 4 from %stack.0 + 124, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0, align 4, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 16, align 4, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 32, align 4, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 48, align 4, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 64, align 4, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 80, align 4, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 96, align 4, addrspace 5)
; FLATSCR: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store 16 into %stack.0 + 112, align 4, addrspace 5)
; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0, align 4, addrspace 5)
; FLATSCR: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 16, align 4, addrspace 5)
; FLATSCR: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 32, align 4, addrspace 5)
; FLATSCR: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 48, align 4, addrspace 5)
; FLATSCR: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 64, align 4, addrspace 5)
; FLATSCR: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 80, align 4, addrspace 5)
; FLATSCR: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 96, align 4, addrspace 5)
; FLATSCR: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load 16 from %stack.0 + 112, align 4, addrspace 5)
; FLATSCR: S_ENDPGM 0
; FLATSCR-V2A-LABEL: name: test_spill_v32
; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4, $agpr5, $agpr6, $agpr7, $agpr8, $agpr9, $agpr10, $agpr11, $agpr12, $agpr13, $agpr14, $agpr15, $agpr16, $agpr17, $agpr18, $agpr19, $agpr20, $agpr21, $agpr22, $agpr23, $agpr24, $agpr25, $agpr26, $agpr27, $agpr28, $agpr29, $agpr30, $agpr31

View File

@ -121,8 +121,7 @@ entry:
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4088 ; 4-byte Folded Spill
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4092 ; 4-byte Folded Spill
; FLATSCR: s_movk_i32 [[SOFF:s[0-9]+]], 0xff8
; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, [[SOFF]] ; 4-byte Folded Spill
; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, [[SOFF]] offset:4 ; 4-byte Folded Spill
; FLATSCR: scratch_store_dwordx2 off, v[{{[0-9:]+}}], [[SOFF]] ; 8-byte Folded Spill
%aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
%a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
@ -154,8 +153,7 @@ entry:
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 ; 4-byte Folded Spill
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 offset:4 ; 4-byte Folded Spill
; FLATSCR: s_movk_i32 [[SOFF:s[0-9]+]], 0xffc
; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, [[SOFF]] ; 4-byte Folded Spill
; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, [[SOFF]] offset:4 ; 4-byte Folded Spill
; FLATSCR: scratch_store_dwordx2 off, v[{{[0-9:]+}}], [[SOFF]] ; 8-byte Folded Spill
%aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
%a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
@ -231,8 +229,7 @@ entry:
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4088 ; 4-byte Folded Spill
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4092 ; 4-byte Folded Spill
; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s32 offset:4088 ; 4-byte Folded Spill
; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s32 offset:4092 ; 4-byte Folded Spill
; FLATSCR: scratch_store_dwordx2 off, v[{{[0-9:]+}}], s32 offset:4088 ; 8-byte Folded Spill
%aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
%a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
@ -263,9 +260,7 @@ entry:
; MUBUF: s_add_u32 s4, s32, 0x3ff00
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 ; 4-byte Folded Spill
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 offset:4 ; 4-byte Folded Spill
; FLATSCR: s_add_u32 [[SOFF:s[0-9]+]], s32, 0xffc
; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, [[SOFF]] ; 4-byte Folded Spill
; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, [[SOFF]] offset:4 ; 4-byte Folded Spill
; FLATSCR: scratch_store_dwordx2 off, v[{{[0-9:]+}}], s32 offset:4092 ; 8-byte Folded Spill
%aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
%a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr

View File

@ -14,15 +14,13 @@
; Just test that it compiles successfully.
; CHECK-LABEL: test
; GFX9-FLATSCR: s_mov_b32 [[SOFF1:s[0-9]+]], 4{{$}}
; GFX9-FLATSCR-DAG: scratch_store_dword off, v{{[0-9]+}}, [[SOFF1]] ; 4-byte Folded Spill
; GFX9-FLATSCR-DAG: scratch_store_dword off, v{{[0-9]+}}, [[SOFF1]] offset:{{[0-9]+}} ; 4-byte Folded Spill
; GFX9-FLATSCR: s_movk_i32 [[SOFF2:s[0-9]+]], 0x{{[0-9a-f]+}}{{$}}
; GFX9-FLATSCR-DAG: scratch_load_dword v{{[0-9]+}}, off, [[SOFF2]] ; 4-byte Folded Reload
; GFX9-FLATSCR-DAG: scratch_load_dword v{{[0-9]+}}, off, [[SOFF2]] offset:{{[0-9]+}} ; 4-byte Folded Reload
; GFX9-FLATSCR: s_mov_b32 [[SOFF1:s[0-9]+]], 4{{$}}
; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SOFF1]] ; 16-byte Folded Spill
; GFX9-FLATSCR: s_movk_i32 [[SOFF2:s[0-9]+]], 0x{{[0-9a-f]+}}{{$}}
; GFX9-FLATSCR: scratch_load_dwordx4 v[{{[0-9:]+}}], off, [[SOFF2]] ; 16-byte Folded Reload
; GFX10-FLATSCR: scratch_store_dword off, v{{[0-9]+}}, off offset:{{[0-9]+}} ; 4-byte Folded Spill
; GFX10-FLATSCR: scratch_load_dword v{{[0-9]+}}, off, off offset:{{[0-9]+}} ; 4-byte Folded Reload
; GFX10-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], off offset:{{[0-9]+}} ; 16-byte Folded Spill
; GFX10-FLATSCR: scratch_load_dwordx4 v[{{[0-9:]+}}], off, off offset:{{[0-9]+}} ; 16-byte Folded Reload
define amdgpu_kernel void @test(<1280 x i32> addrspace(1)* %out, <1280 x i32> addrspace(1)* %in) {
entry:
%lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
@ -55,9 +53,9 @@ entry:
; FLATSCR: s_movk_i32 [[SOFF1:s[0-9]+]], 0x
; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: scratch_store_dword off, v{{[0-9]+}}, [[SOFF1]] ; 4-byte Folded Spill
; FLATSCR-NEXT: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SOFF1]] ; 16-byte Folded Spill
; FLATSCR: s_movk_i32 [[SOFF2:s[0-9]+]], 0x
; FLATSCR: scratch_load_dword v{{[0-9]+}}, off, [[SOFF2]] ; 4-byte Folded Reload
; FLATSCR: scratch_load_dwordx4 v[{{[0-9:]+}}], off, [[SOFF2]] ; 16-byte Folded Reload
define amdgpu_kernel void @test_limited_sgpr(<64 x i32> addrspace(1)* %out, <64 x i32> addrspace(1)* %in) #0 {
entry:
%lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)