forked from OSchip/llvm-project
[AMDGPU] Unify spill code
Instead of reimplementing spilling in prolog and epilog, reuse buildSpillLoadStore. Reviewed By: scott.linder Differential Revision: https://reviews.llvm.org/D99269
This commit is contained in:
parent
f9a8c6a0e5
commit
32bc9a9bc3
|
@ -115,189 +115,43 @@ static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF,
|
|||
// We need to specially emit stack operations here because a different frame
|
||||
// register is used than in the rest of the function, as getFrameRegister would
|
||||
// use.
|
||||
static void buildPrologSpill(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
const SIInstrInfo *TII, Register SpillReg,
|
||||
Register ScratchRsrcReg, Register SPReg, int FI) {
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
MachineFrameInfo &MFI = MF->getFrameInfo();
|
||||
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
|
||||
const SIMachineFunctionInfo &FuncInfo,
|
||||
LivePhysRegs &LiveRegs, MachineFunction &MF,
|
||||
MachineBasicBlock::iterator I, Register SpillReg,
|
||||
int FI) {
|
||||
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
|
||||
: AMDGPU::BUFFER_STORE_DWORD_OFFSET;
|
||||
|
||||
int64_t Offset = MFI.getObjectOffset(FI);
|
||||
|
||||
MachineMemOperand *MMO = MF->getMachineMemOperand(
|
||||
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, 4,
|
||||
MFI.getObjectAlign(FI));
|
||||
|
||||
if (ST.enableFlatScratch()) {
|
||||
if (TII->isLegalFLATOffset(Offset, AMDGPUAS::PRIVATE_ADDRESS,
|
||||
SIInstrFlags::FlatScratch)) {
|
||||
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_STORE_DWORD_SADDR))
|
||||
.addReg(SpillReg, RegState::Kill)
|
||||
.addReg(SPReg)
|
||||
.addImm(Offset)
|
||||
.addImm(0) // cpol
|
||||
.addMemOperand(MMO);
|
||||
return;
|
||||
}
|
||||
} else if (SIInstrInfo::isLegalMUBUFImmOffset(Offset)) {
|
||||
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFSET))
|
||||
.addReg(SpillReg, RegState::Kill)
|
||||
.addReg(ScratchRsrcReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(Offset)
|
||||
.addImm(0) // cpol
|
||||
.addImm(0) // tfe
|
||||
.addImm(0) // swz
|
||||
.addMemOperand(MMO);
|
||||
return;
|
||||
}
|
||||
|
||||
// Don't clobber the TmpVGPR if we also need a scratch reg for the stack
|
||||
// offset in the spill.
|
||||
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
|
||||
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
|
||||
MachineMemOperand *MMO = MF.getMachineMemOperand(
|
||||
PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
|
||||
FrameInfo.getObjectAlign(FI));
|
||||
LiveRegs.addReg(SpillReg);
|
||||
|
||||
if (ST.enableFlatScratch()) {
|
||||
MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
|
||||
MF->getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0RegClass);
|
||||
|
||||
bool HasOffsetReg = OffsetReg;
|
||||
if (!HasOffsetReg) {
|
||||
// No free register, use stack pointer and restore afterwards.
|
||||
OffsetReg = SPReg;
|
||||
}
|
||||
|
||||
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_ADD_U32), OffsetReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(Offset);
|
||||
|
||||
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_STORE_DWORD_SADDR))
|
||||
.addReg(SpillReg, RegState::Kill)
|
||||
.addReg(OffsetReg, HasOffsetReg ? RegState::Kill : 0)
|
||||
.addImm(0) // offset
|
||||
.addImm(0) // cpol
|
||||
.addMemOperand(MMO);
|
||||
|
||||
if (!HasOffsetReg) {
|
||||
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_SUB_U32), OffsetReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(Offset);
|
||||
}
|
||||
} else {
|
||||
MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
|
||||
MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
|
||||
|
||||
if (OffsetReg) {
|
||||
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
|
||||
.addImm(Offset);
|
||||
|
||||
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFEN))
|
||||
.addReg(SpillReg, RegState::Kill)
|
||||
.addReg(OffsetReg, RegState::Kill)
|
||||
.addReg(ScratchRsrcReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(0) // offset
|
||||
.addImm(0) // cpol
|
||||
.addImm(0) // tfe
|
||||
.addImm(0) // swz
|
||||
.addMemOperand(MMO);
|
||||
} else {
|
||||
// No free register, use stack pointer and restore afterwards.
|
||||
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_ADD_U32), SPReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(Offset);
|
||||
|
||||
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::BUFFER_STORE_DWORD_OFFSET))
|
||||
.addReg(SpillReg, RegState::Kill)
|
||||
.addReg(ScratchRsrcReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(0) // offset
|
||||
.addImm(0) // cpol
|
||||
.addImm(0) // tfe
|
||||
.addImm(0) // swz
|
||||
.addMemOperand(MMO);
|
||||
|
||||
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_SUB_U32), SPReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(Offset);
|
||||
}
|
||||
}
|
||||
|
||||
TRI.buildSpillLoadStore(I, Opc, FI, SpillReg, true,
|
||||
FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
|
||||
&LiveRegs);
|
||||
LiveRegs.removeReg(SpillReg);
|
||||
}
|
||||
|
||||
static void buildEpilogReload(const GCNSubtarget &ST, LivePhysRegs &LiveRegs,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
const SIInstrInfo *TII, Register SpillReg,
|
||||
Register ScratchRsrcReg, Register SPReg, int FI) {
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
MachineFrameInfo &MFI = MF->getFrameInfo();
|
||||
int64_t Offset = MFI.getObjectOffset(FI);
|
||||
static void buildEpilogRestore(const GCNSubtarget &ST,
|
||||
const SIRegisterInfo &TRI,
|
||||
const SIMachineFunctionInfo &FuncInfo,
|
||||
LivePhysRegs &LiveRegs, MachineFunction &MF,
|
||||
MachineBasicBlock::iterator I, Register SpillReg,
|
||||
int FI) {
|
||||
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
|
||||
: AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
|
||||
|
||||
MachineMemOperand *MMO = MF->getMachineMemOperand(
|
||||
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, 4,
|
||||
MFI.getObjectAlign(FI));
|
||||
|
||||
if (ST.enableFlatScratch()) {
|
||||
if (TII->isLegalFLATOffset(Offset, AMDGPUAS::PRIVATE_ADDRESS,
|
||||
SIInstrFlags::FlatScratch)) {
|
||||
BuildMI(MBB, I, DebugLoc(),
|
||||
TII->get(AMDGPU::SCRATCH_LOAD_DWORD_SADDR), SpillReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(Offset)
|
||||
.addImm(0) // cpol
|
||||
.addMemOperand(MMO);
|
||||
return;
|
||||
}
|
||||
MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
|
||||
MF->getRegInfo(), LiveRegs, AMDGPU::SReg_32_XM0RegClass);
|
||||
if (!OffsetReg)
|
||||
report_fatal_error("failed to find free scratch register");
|
||||
|
||||
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_ADD_U32), OffsetReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(Offset);
|
||||
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::SCRATCH_LOAD_DWORD_SADDR),
|
||||
SpillReg)
|
||||
.addReg(OffsetReg, RegState::Kill)
|
||||
.addImm(0)
|
||||
.addImm(0) // cpol
|
||||
.addMemOperand(MMO);
|
||||
return;
|
||||
}
|
||||
|
||||
if (SIInstrInfo::isLegalMUBUFImmOffset(Offset)) {
|
||||
BuildMI(MBB, I, DebugLoc(),
|
||||
TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFSET), SpillReg)
|
||||
.addReg(ScratchRsrcReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(Offset)
|
||||
.addImm(0) // cpol
|
||||
.addImm(0) // tfe
|
||||
.addImm(0) // swz
|
||||
.addMemOperand(MMO);
|
||||
return;
|
||||
}
|
||||
|
||||
MCPhysReg OffsetReg = findScratchNonCalleeSaveRegister(
|
||||
MF->getRegInfo(), LiveRegs, AMDGPU::VGPR_32RegClass);
|
||||
if (!OffsetReg)
|
||||
report_fatal_error("failed to find free scratch register");
|
||||
|
||||
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), OffsetReg)
|
||||
.addImm(Offset);
|
||||
|
||||
BuildMI(MBB, I, DebugLoc(),
|
||||
TII->get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), SpillReg)
|
||||
.addReg(OffsetReg, RegState::Kill)
|
||||
.addReg(ScratchRsrcReg)
|
||||
.addReg(SPReg)
|
||||
.addImm(0)
|
||||
.addImm(0) // cpol
|
||||
.addImm(0) // tfe
|
||||
.addImm(0) // swz
|
||||
.addMemOperand(MMO);
|
||||
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
|
||||
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
|
||||
MachineMemOperand *MMO = MF.getMachineMemOperand(
|
||||
PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
|
||||
FrameInfo.getObjectAlign(FI));
|
||||
TRI.buildSpillLoadStore(I, Opc, FI, SpillReg, false,
|
||||
FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
|
||||
&LiveRegs);
|
||||
}
|
||||
|
||||
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
||||
|
@ -880,12 +734,10 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
continue;
|
||||
|
||||
if (!ScratchExecCopy)
|
||||
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true);
|
||||
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI,
|
||||
/*IsProlog*/ true);
|
||||
|
||||
buildPrologSpill(ST, LiveRegs, MBB, MBBI, TII, Reg.VGPR,
|
||||
FuncInfo->getScratchRSrcReg(),
|
||||
StackPtrReg,
|
||||
Reg.FI.getValue());
|
||||
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, Reg.VGPR, *Reg.FI);
|
||||
}
|
||||
|
||||
if (FPSaveIndex && spilledToMemory(MF, *FPSaveIndex)) {
|
||||
|
@ -893,7 +745,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
assert(!MFI.isDeadObjectIndex(FramePtrFI));
|
||||
|
||||
if (!ScratchExecCopy)
|
||||
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true);
|
||||
ScratchExecCopy =
|
||||
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true);
|
||||
|
||||
MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
|
||||
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
|
||||
|
@ -903,8 +756,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
|
||||
.addReg(FramePtrReg);
|
||||
|
||||
buildPrologSpill(ST, LiveRegs, MBB, MBBI, TII, TmpVGPR,
|
||||
FuncInfo->getScratchRSrcReg(), StackPtrReg, FramePtrFI);
|
||||
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR,
|
||||
FramePtrFI);
|
||||
}
|
||||
|
||||
if (BPSaveIndex && spilledToMemory(MF, *BPSaveIndex)) {
|
||||
|
@ -912,7 +765,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
assert(!MFI.isDeadObjectIndex(BasePtrFI));
|
||||
|
||||
if (!ScratchExecCopy)
|
||||
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, true);
|
||||
ScratchExecCopy =
|
||||
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true);
|
||||
|
||||
MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
|
||||
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
|
||||
|
@ -922,8 +776,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
|
|||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
|
||||
.addReg(BasePtrReg);
|
||||
|
||||
buildPrologSpill(ST, LiveRegs, MBB, MBBI, TII, TmpVGPR,
|
||||
FuncInfo->getScratchRSrcReg(), StackPtrReg, BasePtrFI);
|
||||
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR,
|
||||
BasePtrFI);
|
||||
}
|
||||
|
||||
if (ScratchExecCopy) {
|
||||
|
@ -1120,16 +974,17 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
assert(!MFI.isDeadObjectIndex(FramePtrFI));
|
||||
if (spilledToMemory(MF, FramePtrFI)) {
|
||||
if (!ScratchExecCopy)
|
||||
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false);
|
||||
ScratchExecCopy =
|
||||
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
|
||||
|
||||
MCPhysReg TempVGPR = findScratchNonCalleeSaveRegister(
|
||||
MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
|
||||
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
|
||||
if (!TempVGPR)
|
||||
if (!TmpVGPR)
|
||||
report_fatal_error("failed to find free scratch register");
|
||||
buildEpilogReload(ST, LiveRegs, MBB, MBBI, TII, TempVGPR,
|
||||
FuncInfo->getScratchRSrcReg(), StackPtrReg, FramePtrFI);
|
||||
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR,
|
||||
FramePtrFI);
|
||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg)
|
||||
.addReg(TempVGPR, RegState::Kill);
|
||||
.addReg(TmpVGPR, RegState::Kill);
|
||||
} else {
|
||||
// Reload from VGPR spill.
|
||||
assert(MFI.getStackID(FramePtrFI) == TargetStackID::SGPRSpill);
|
||||
|
@ -1147,16 +1002,17 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
assert(!MFI.isDeadObjectIndex(BasePtrFI));
|
||||
if (spilledToMemory(MF, BasePtrFI)) {
|
||||
if (!ScratchExecCopy)
|
||||
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false);
|
||||
ScratchExecCopy =
|
||||
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
|
||||
|
||||
MCPhysReg TempVGPR = findScratchNonCalleeSaveRegister(
|
||||
MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
|
||||
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
|
||||
if (!TempVGPR)
|
||||
if (!TmpVGPR)
|
||||
report_fatal_error("failed to find free scratch register");
|
||||
buildEpilogReload(ST, LiveRegs, MBB, MBBI, TII, TempVGPR,
|
||||
FuncInfo->getScratchRSrcReg(), StackPtrReg, BasePtrFI);
|
||||
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, TmpVGPR,
|
||||
BasePtrFI);
|
||||
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg)
|
||||
.addReg(TempVGPR, RegState::Kill);
|
||||
.addReg(TmpVGPR, RegState::Kill);
|
||||
} else {
|
||||
// Reload from VGPR spill.
|
||||
assert(MFI.getStackID(BasePtrFI) == TargetStackID::SGPRSpill);
|
||||
|
@ -1175,11 +1031,11 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
|
|||
continue;
|
||||
|
||||
if (!ScratchExecCopy)
|
||||
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, false);
|
||||
ScratchExecCopy =
|
||||
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
|
||||
|
||||
buildEpilogReload(ST, LiveRegs, MBB, MBBI, TII, Reg.VGPR,
|
||||
FuncInfo->getScratchRSrcReg(), StackPtrReg,
|
||||
Reg.FI.getValue());
|
||||
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBBI, Reg.VGPR,
|
||||
*Reg.FI);
|
||||
}
|
||||
|
||||
if (ScratchExecCopy) {
|
||||
|
|
|
@ -1018,15 +1018,13 @@ static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII,
|
|||
return LoadStoreOp;
|
||||
}
|
||||
|
||||
void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
|
||||
unsigned LoadStoreOp,
|
||||
int Index,
|
||||
Register ValueReg,
|
||||
bool IsKill,
|
||||
MCRegister ScratchOffsetReg,
|
||||
int64_t InstOffset,
|
||||
MachineMemOperand *MMO,
|
||||
RegScavenger *RS) const {
|
||||
void SIRegisterInfo::buildSpillLoadStore(
|
||||
MachineBasicBlock::iterator MI, unsigned LoadStoreOp, int Index,
|
||||
Register ValueReg, bool IsKill, MCRegister ScratchOffsetReg,
|
||||
int64_t InstOffset, MachineMemOperand *MMO, RegScavenger *RS,
|
||||
LivePhysRegs *LiveRegs) const {
|
||||
assert((!RS || !LiveRegs) && "Only RS or LiveRegs can be set but not both");
|
||||
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
MachineFunction *MF = MI->getParent()->getParent();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
|
@ -1082,9 +1080,17 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
|
|||
Offset *= ST.getWavefrontSize();
|
||||
|
||||
// We don't have access to the register scavenger if this function is called
|
||||
// during PEI::scavengeFrameVirtualRegs().
|
||||
if (RS)
|
||||
// during PEI::scavengeFrameVirtualRegs() so use LiveRegs in this case.
|
||||
if (RS) {
|
||||
SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false);
|
||||
} else if (LiveRegs) {
|
||||
for (MCRegister Reg : AMDGPU::SGPR_32RegClass) {
|
||||
if (LiveRegs->available(MF->getRegInfo(), Reg)) {
|
||||
SOffset = Reg;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!SOffset) {
|
||||
// There are no free SGPRs, and since we are in the process of spilling
|
||||
|
@ -1613,6 +1619,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
|||
}
|
||||
|
||||
default: {
|
||||
// Other access to frame index
|
||||
const DebugLoc &DL = MI->getDebugLoc();
|
||||
|
||||
int64_t Offset = FrameInfo.getObjectOffset(Index);
|
||||
|
|
|
@ -21,6 +21,7 @@ namespace llvm {
|
|||
|
||||
class GCNSubtarget;
|
||||
class LiveIntervals;
|
||||
class LivePhysRegs;
|
||||
class RegisterBank;
|
||||
struct SGPRSpillBuilder;
|
||||
class SIMachineFunctionInfo;
|
||||
|
@ -344,16 +345,15 @@ public:
|
|||
/// of the subtarget.
|
||||
ArrayRef<MCPhysReg> getAllSGPR32(const MachineFunction &MF) const;
|
||||
|
||||
private:
|
||||
void buildSpillLoadStore(MachineBasicBlock::iterator MI,
|
||||
unsigned LoadStoreOp,
|
||||
int Index,
|
||||
Register ValueReg,
|
||||
bool ValueIsKill,
|
||||
MCRegister ScratchOffsetReg,
|
||||
int64_t InstrOffset,
|
||||
MachineMemOperand *MMO,
|
||||
RegScavenger *RS) const;
|
||||
// Insert spill or restore instructions.
|
||||
// When lowering spill pseudos, the RegScavenger should be set.
|
||||
// For creating spill instructions during frame lowering, where no scavenger
|
||||
// is available, LiveRegs can be used.
|
||||
void buildSpillLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp,
|
||||
int Index, Register ValueReg, bool ValueIsKill,
|
||||
MCRegister ScratchOffsetReg, int64_t InstrOffset,
|
||||
MachineMemOperand *MMO, RegScavenger *RS,
|
||||
LivePhysRegs *LiveRegs = nullptr) const;
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
|
|
@ -483,8 +483,8 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 {
|
|||
; GCN-LABEL: {{^}}scratch_reg_needed_mubuf_offset:
|
||||
; GCN: s_waitcnt
|
||||
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; MUBUF-NEXT: v_mov_b32_e32 [[SCRATCH_VGPR:v[0-9]+]], 0x1008
|
||||
; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], [[SCRATCH_VGPR]], s[0:3], s32 offen ; 4-byte Folded Spill
|
||||
; MUBUF-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200
|
||||
; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
|
||||
; FLATSCR-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x1008
|
||||
; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
|
||||
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
|
||||
|
@ -506,8 +506,8 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 {
|
|||
; FLATSCR-NEXT: s_sub_u32 s32, s32, 0x100c{{$}}
|
||||
; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2
|
||||
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
|
||||
; MUBUF-NEXT: v_mov_b32_e32 [[SCRATCH_VGPR:v[0-9]+]], 0x1008
|
||||
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], [[SCRATCH_VGPR]], s[0:3], s32 offen ; 4-byte Folded Reload
|
||||
; MUBUF-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200
|
||||
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Reload
|
||||
; FLATSCR-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x1008
|
||||
; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, [[SCRATCH_SGPR]] ; 4-byte Folded Reload
|
||||
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
|
||||
|
@ -670,12 +670,12 @@ define void @callee_need_to_spill_fp_to_reg() #1 {
|
|||
; scratch VGPR to hold the offset.
|
||||
; GCN-LABEL: {{^}}spill_fp_to_memory_scratch_reg_needed_mubuf_offset
|
||||
; MUBUF: s_or_saveexec_b64 s[4:5], -1
|
||||
; MUBUF: v_mov_b32_e32 v0, 0x1008
|
||||
; MUBUF-NEXT: buffer_store_dword v39, v0, s[0:3], s32 offen ; 4-byte Folded Spill
|
||||
; MUBUF: v_mov_b32_e32 v0, s33
|
||||
; MUBUF-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200
|
||||
; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v0, s33
|
||||
; GCN-NOT: v_mov_b32_e32 v0, 0x100c
|
||||
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x100c
|
||||
; MUBUF-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen ; 4-byte Folded Spill
|
||||
; MUBUF-NEXT: s_add_u32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40300
|
||||
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
|
||||
; FLATSCR: s_add_u32 [[SOFF:s[0-9]+]], s33, 0x1004
|
||||
; FLATSCR: v_mov_b32_e32 v0, 0
|
||||
; FLATSCR: scratch_store_dword off, v0, [[SOFF]]
|
||||
|
|
|
@ -26,17 +26,16 @@ body: |
|
|||
; GFX8-LABEL: name: pei_scavenge_vgpr_spill
|
||||
; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
|
||||
; GFX8: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX8: $sgpr32 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
|
||||
; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
|
||||
; GFX8: $sgpr32 = S_SUB_U32 $sgpr32, 8196, implicit-def $scc
|
||||
; GFX8: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc
|
||||
; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
|
||||
; GFX8: $exec = S_MOV_B64 killed $sgpr4_sgpr5
|
||||
; GFX8: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
|
||||
; GFX8: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
|
||||
; GFX8: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
|
||||
; GFX8: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
|
||||
; GFX8: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
||||
; GFX8: $sgpr6 = S_ADD_U32 $sgpr33, 524800, implicit-def $scc
|
||||
; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
|
||||
; GFX8: $sgpr7 = S_ADD_U32 $sgpr33, 524800, implicit-def $scc
|
||||
; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr7, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
|
||||
; GFX8: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
||||
; GFX8: $vcc_lo = S_MOV_B32 8192
|
||||
; GFX8: $vgpr3, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr3, 0, implicit $exec
|
||||
|
@ -44,8 +43,8 @@ body: |
|
|||
; GFX8: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
|
||||
; GFX8: $sgpr33 = V_READLANE_B32 $vgpr2, 0
|
||||
; GFX8: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX8: $vgpr0 = V_MOV_B32_e32 8196, implicit $exec
|
||||
; GFX8: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5)
|
||||
; GFX8: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc
|
||||
; GFX8: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5)
|
||||
; GFX8: $exec = S_MOV_B64 killed $sgpr4_sgpr5
|
||||
; GFX8: $sgpr4 = S_ADD_U32 $sgpr33, 524800, implicit-def $scc
|
||||
; GFX8: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5)
|
||||
|
@ -53,25 +52,24 @@ body: |
|
|||
; GFX9-LABEL: name: pei_scavenge_vgpr_spill
|
||||
; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
|
||||
; GFX9: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX9: $sgpr32 = S_ADD_U32 $sgpr32, 8196, implicit-def $scc
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
|
||||
; GFX9: $sgpr32 = S_SUB_U32 $sgpr32, 8196, implicit-def $scc
|
||||
; GFX9: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
|
||||
; GFX9: $exec = S_MOV_B64 killed $sgpr4_sgpr5
|
||||
; GFX9: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
|
||||
; GFX9: $sgpr33 = frame-setup S_ADD_U32 $sgpr32, 524224, implicit-def $scc
|
||||
; GFX9: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def $scc
|
||||
; GFX9: $sgpr32 = frame-setup S_ADD_U32 $sgpr32, 1572864, implicit-def $scc
|
||||
; GFX9: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
||||
; GFX9: $sgpr6 = S_ADD_U32 $sgpr33, 524800, implicit-def $scc
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
|
||||
; GFX9: $sgpr7 = S_ADD_U32 $sgpr33, 524800, implicit-def $scc
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr7, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
|
||||
; GFX9: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
|
||||
; GFX9: $vgpr3 = V_ADD_U32_e32 8192, killed $vgpr3, implicit $exec
|
||||
; GFX9: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec
|
||||
; GFX9: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
|
||||
; GFX9: $sgpr33 = V_READLANE_B32 $vgpr2, 0
|
||||
; GFX9: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; GFX9: $vgpr0 = V_MOV_B32_e32 8196, implicit $exec
|
||||
; GFX9: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5)
|
||||
; GFX9: $sgpr6 = S_ADD_U32 $sgpr32, 524544, implicit-def $scc
|
||||
; GFX9: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5)
|
||||
; GFX9: $exec = S_MOV_B64 killed $sgpr4_sgpr5
|
||||
; GFX9: $sgpr4 = S_ADD_U32 $sgpr33, 524800, implicit-def $scc
|
||||
; GFX9: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5)
|
||||
|
|
|
@ -292,14 +292,16 @@ define void @spill_bp_to_memory_scratch_reg_needed_mubuf_offset(<32 x i32> %a, i
|
|||
|
||||
; GCN-LABEL: spill_bp_to_memory_scratch_reg_needed_mubuf_offset
|
||||
; GCN: s_or_saveexec_b64 s[4:5], -1
|
||||
; GCN: v_mov_b32_e32 v0, s33
|
||||
; GCN-NEXT: s_add_u32 s6, s32, 0x42100
|
||||
; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s6 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s33
|
||||
; GCN-NOT: v_mov_b32_e32 v0, 0x1088
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0x1088
|
||||
; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen
|
||||
; GCN: v_mov_b32_e32 v0, s34
|
||||
; GCN-NEXT: s_add_u32 s6, s32, 0x42200
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
|
||||
; GCN-NEXT: v_mov_b32_e32 v0, s34
|
||||
; GCN-NOT: v_mov_b32_e32 v0, 0x108c
|
||||
; GCN-NEXT: v_mov_b32_e32 v1, 0x108c
|
||||
; GCN-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen
|
||||
; GCN-NEXT: s_add_u32 s6, s32, 0x42300
|
||||
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
|
||||
%local_val = alloca i32, align 128, addrspace(5)
|
||||
store volatile i32 %b, i32 addrspace(5)* %local_val, align 128
|
||||
|
||||
|
|
Loading…
Reference in New Issue