forked from OSchip/llvm-project
Revert "AMDGPU: Implement SGPR spilling with scalar stores"
This reverts commit 4404d0d6e354e80dd7f8f0a0e12d8ad809cf007e. llvm-svn: 287936
This commit is contained in:
parent
dad553a5cf
commit
e3895bfb47
|
@ -532,7 +532,6 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
|
|||
TRI = &TII->getRegisterInfo();
|
||||
MRI = &MF.getRegInfo();
|
||||
IV = getIsaVersion(ST->getFeatureBits());
|
||||
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||||
|
||||
HardwareLimits.Named.VM = getVmcntBitMask(IV);
|
||||
HardwareLimits.Named.EXP = getExpcntBitMask(IV);
|
||||
|
@ -544,27 +543,20 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
|
|||
LastOpcodeType = OTHER;
|
||||
LastInstWritesM0 = false;
|
||||
IsFlatOutstanding = false;
|
||||
ReturnsVoid = MFI->returnsVoid();
|
||||
ReturnsVoid = MF.getInfo<SIMachineFunctionInfo>()->returnsVoid();
|
||||
|
||||
memset(&UsedRegs, 0, sizeof(UsedRegs));
|
||||
memset(&DefinedRegs, 0, sizeof(DefinedRegs));
|
||||
|
||||
SmallVector<MachineInstr *, 4> RemoveMI;
|
||||
SmallVector<MachineBasicBlock *, 4> EndPgmBlocks;
|
||||
|
||||
bool HaveScalarStores = false;
|
||||
|
||||
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
|
||||
BI != BE; ++BI) {
|
||||
|
||||
MachineBasicBlock &MBB = *BI;
|
||||
|
||||
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||
I != E; ++I) {
|
||||
|
||||
if (!HaveScalarStores && TII->isScalarStore(*I))
|
||||
HaveScalarStores = true;
|
||||
|
||||
if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS) {
|
||||
// There is a hardware bug on CI/SI where SMRD instruction may corrupt
|
||||
// vccz bit, so when we detect that an instruction may read from a
|
||||
|
@ -633,45 +625,12 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
|
|||
|
||||
pushInstruction(MBB, I, Increment);
|
||||
handleSendMsg(MBB, I);
|
||||
|
||||
if (I->getOpcode() == AMDGPU::S_ENDPGM ||
|
||||
I->getOpcode() == AMDGPU::SI_RETURN)
|
||||
EndPgmBlocks.push_back(&MBB);
|
||||
}
|
||||
|
||||
// Wait for everything at the end of the MBB
|
||||
Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
|
||||
}
|
||||
|
||||
if (HaveScalarStores) {
|
||||
// If scalar writes are used, the cache must be flushed or else the next
|
||||
// wave to reuse the same scratch memory can be clobbered.
|
||||
//
|
||||
// Insert s_dcache_wb at wave termination points if there were any scalar
|
||||
// stores, and only if the cache hasn't already been flushed. This could be
|
||||
// improved by looking across blocks for flushes in postdominating blocks
|
||||
// from the stores but an explicitly requested flush is probably very rare.
|
||||
for (MachineBasicBlock *MBB : EndPgmBlocks) {
|
||||
bool SeenDCacheWB = false;
|
||||
|
||||
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
|
||||
I != E; ++I) {
|
||||
|
||||
if (I->getOpcode() == AMDGPU::S_DCACHE_WB)
|
||||
SeenDCacheWB = true;
|
||||
else if (TII->isScalarStore(*I))
|
||||
SeenDCacheWB = false;
|
||||
|
||||
// FIXME: It would be better to insert this before a waitcnt if any.
|
||||
if ((I->getOpcode() == AMDGPU::S_ENDPGM ||
|
||||
I->getOpcode() == AMDGPU::SI_RETURN) && !SeenDCacheWB) {
|
||||
Changes = true;
|
||||
BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (MachineInstr *I : RemoveMI)
|
||||
I->eraseFromParent();
|
||||
|
||||
|
|
|
@ -544,7 +544,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
|||
MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
|
||||
}
|
||||
|
||||
MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc)
|
||||
BuildMI(MBB, MI, DL, OpDesc)
|
||||
.addReg(SrcReg, getKillRegState(isKill)) // data
|
||||
.addFrameIndex(FrameIndex) // addr
|
||||
.addMemOperand(MMO)
|
||||
|
@ -554,11 +554,6 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
|||
// needing them, and need to ensure that the reserved registers are
|
||||
// correctly handled.
|
||||
|
||||
if (ST.hasScalarStores()) {
|
||||
// m0 is used for offset to scalar stores if used to spill.
|
||||
Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -648,17 +643,12 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
|||
MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass);
|
||||
}
|
||||
|
||||
MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc, DestReg)
|
||||
BuildMI(MBB, MI, DL, OpDesc, DestReg)
|
||||
.addFrameIndex(FrameIndex) // addr
|
||||
.addMemOperand(MMO)
|
||||
.addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
|
||||
.addReg(MFI->getScratchWaveOffsetReg(), RegState::Implicit);
|
||||
|
||||
if (ST.hasScalarStores()) {
|
||||
// m0 is used for offset to scalar stores if used to spill.
|
||||
Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -24,12 +24,6 @@
|
|||
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<bool> EnableSpillSGPRToSMEM(
|
||||
"amdgpu-spill-sgpr-to-smem",
|
||||
cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
|
||||
cl::init(true));
|
||||
|
||||
|
||||
static bool hasPressureSet(const int *PSets, unsigned PSetID) {
|
||||
for (unsigned i = 0; PSets[i] != -1; ++i) {
|
||||
if (PSets[i] == (int)PSetID)
|
||||
|
@ -491,21 +485,18 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
|
|||
void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
|
||||
int Index,
|
||||
RegScavenger *RS) const {
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
MachineFunction *MF = MBB->getParent();
|
||||
MachineFunction *MF = MI->getParent()->getParent();
|
||||
MachineRegisterInfo &MRI = MF->getRegInfo();
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
|
||||
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
|
||||
const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
const DebugLoc &DL = MI->getDebugLoc();
|
||||
|
||||
unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
|
||||
unsigned SuperReg = MI->getOperand(0).getReg();
|
||||
bool IsKill = MI->getOperand(0).isKill();
|
||||
const DebugLoc &DL = MI->getDebugLoc();
|
||||
|
||||
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
|
||||
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
|
||||
|
||||
bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;
|
||||
|
||||
// SubReg carries the "Kill" flag when SubReg == SuperReg.
|
||||
unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
|
||||
|
@ -513,55 +504,6 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
|
|||
unsigned SubReg = NumSubRegs == 1 ?
|
||||
SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i));
|
||||
|
||||
if (SpillToSMEM) {
|
||||
if (SuperReg == AMDGPU::M0) {
|
||||
assert(NumSubRegs == 1);
|
||||
unsigned CopyM0
|
||||
= MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
|
||||
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), CopyM0)
|
||||
.addReg(AMDGPU::M0, getKillRegState(IsKill));
|
||||
|
||||
// The real spill now kills the temp copy.
|
||||
SubReg = SuperReg = CopyM0;
|
||||
IsKill = true;
|
||||
}
|
||||
|
||||
int64_t FrOffset = FrameInfo.getObjectOffset(Index);
|
||||
unsigned Size = FrameInfo.getObjectSize(Index);
|
||||
unsigned Align = FrameInfo.getObjectAlignment(Index);
|
||||
MachinePointerInfo PtrInfo
|
||||
= MachinePointerInfo::getFixedStack(*MF, Index);
|
||||
MachineMemOperand *MMO
|
||||
= MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
|
||||
Size, Align);
|
||||
|
||||
unsigned OffsetReg = AMDGPU::M0;
|
||||
// Add i * 4 wave offset.
|
||||
//
|
||||
// SMEM instructions only support a single offset, so increment the wave
|
||||
// offset.
|
||||
|
||||
int64_t Offset = ST.getWavefrontSize() * (FrOffset + 4 * i);
|
||||
if (Offset != 0) {
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
|
||||
.addReg(MFI->getScratchWaveOffsetReg())
|
||||
.addImm(Offset);
|
||||
} else {
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
|
||||
.addReg(MFI->getScratchWaveOffsetReg());
|
||||
}
|
||||
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BUFFER_STORE_DWORD_SGPR))
|
||||
.addReg(SubReg, getKillRegState(IsKill)) // sdata
|
||||
.addReg(MFI->getScratchRSrcReg()) // sbase
|
||||
.addReg(OffsetReg) // soff
|
||||
.addImm(0) // glc
|
||||
.addMemOperand(MMO);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
struct SIMachineFunctionInfo::SpilledReg Spill =
|
||||
MFI->getSpilledReg(MF, Index, i);
|
||||
if (Spill.hasReg()) {
|
||||
|
@ -588,9 +530,10 @@ void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
|
|||
// it are fixed.
|
||||
} else {
|
||||
// Spill SGPR to a frame index.
|
||||
// FIXME we should use S_STORE_DWORD here for VI.
|
||||
|
||||
// TODO: Should VI try to spill to VGPR and then spill to SMEM?
|
||||
unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
|
||||
// TODO: Should VI try to spill to VGPR and then spill to SMEM?
|
||||
|
||||
MachineInstrBuilder Mov
|
||||
= BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
|
||||
|
@ -642,7 +585,6 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
|
|||
|
||||
unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
|
||||
unsigned SuperReg = MI->getOperand(0).getReg();
|
||||
bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;
|
||||
|
||||
// m0 is not allowed as with readlane/writelane, so a temporary SGPR and
|
||||
// extra copy is needed.
|
||||
|
@ -652,44 +594,10 @@ void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
|
|||
SuperReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
|
||||
}
|
||||
|
||||
int64_t FrOffset = FrameInfo.getObjectOffset(Index);
|
||||
|
||||
for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
|
||||
unsigned SubReg = NumSubRegs == 1 ?
|
||||
SuperReg : getSubReg(SuperReg, getSubRegFromChannel(i));
|
||||
|
||||
if (SpillToSMEM) {
|
||||
unsigned Size = FrameInfo.getObjectSize(Index);
|
||||
unsigned Align = FrameInfo.getObjectAlignment(Index);
|
||||
MachinePointerInfo PtrInfo
|
||||
= MachinePointerInfo::getFixedStack(*MF, Index);
|
||||
MachineMemOperand *MMO
|
||||
= MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
|
||||
Size, Align);
|
||||
|
||||
unsigned OffsetReg = AMDGPU::M0;
|
||||
|
||||
// Add i * 4 offset
|
||||
int64_t Offset = ST.getWavefrontSize() * (FrOffset + 4 * i);
|
||||
if (Offset != 0) {
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
|
||||
.addReg(MFI->getScratchWaveOffsetReg())
|
||||
.addImm(Offset);
|
||||
} else {
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
|
||||
.addReg(MFI->getScratchWaveOffsetReg());
|
||||
}
|
||||
|
||||
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BUFFER_LOAD_DWORD_SGPR), SubReg)
|
||||
.addReg(MFI->getScratchRSrcReg()) // sbase
|
||||
.addReg(OffsetReg) // soff
|
||||
.addImm(0) // glc
|
||||
.addMemOperand(MMO)
|
||||
.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
SIMachineFunctionInfo::SpilledReg Spill
|
||||
= MFI->getSpilledReg(MF, Index, i);
|
||||
|
||||
|
|
|
@ -1,20 +1,16 @@
|
|||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=TOSGPR -check-prefix=ALL %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=TOSMEM -check-prefix=ALL %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
; If spilling to smem, additional registers are used for the resource
|
||||
; descriptor.
|
||||
|
||||
; ALL-LABEL: {{^}}max_14_sgprs:
|
||||
; CHECK-LABEL: {{^}}max_14_sgprs:
|
||||
|
||||
; FIXME: Should be ablo to skip this copying of the private segment
|
||||
; buffer because all the SGPR spills are to VGPRs.
|
||||
|
||||
; ALL: s_mov_b64 s[6:7], s[2:3]
|
||||
; ALL: s_mov_b64 s[4:5], s[0:1]
|
||||
; ALL: SGPRBlocks: 1
|
||||
; ALL: NumSGPRsForWavesPerEU: 14
|
||||
define void @max_14_sgprs(i32 addrspace(1)* %out1,
|
||||
; CHECK: s_mov_b64 s[6:7], s[2:3]
|
||||
; CHECK: s_mov_b64 s[4:5], s[0:1]
|
||||
|
||||
; CHECK: SGPRBlocks: 1
|
||||
; CHECK: NumSGPRsForWavesPerEU: 14
|
||||
define void @max_14_sgprs(i32 addrspace(1)* %out1,
|
||||
i32 addrspace(1)* %out2,
|
||||
i32 addrspace(1)* %out3,
|
||||
i32 addrspace(1)* %out4,
|
||||
|
@ -35,7 +31,7 @@ define void @max_14_sgprs(i32 addrspace(1)* %out1,
|
|||
; ---------------------
|
||||
; total: 14
|
||||
|
||||
; + reserved vcc, xnack, flat_scratch = 20
|
||||
; + reserved vcc, flat_scratch = 18
|
||||
|
||||
; Because we can't handle re-using the last few input registers as the
|
||||
; special vcc etc. registers (as well as decide to not use the unused
|
||||
|
@ -44,14 +40,14 @@ define void @max_14_sgprs(i32 addrspace(1)* %out1,
|
|||
|
||||
; ALL-LABEL: {{^}}max_12_sgprs_14_input_sgprs:
|
||||
; TOSGPR: SGPRBlocks: 2
|
||||
; TOSGPR: NumSGPRsForWavesPerEU: 20
|
||||
; TOSGPR: NumSGPRsForWavesPerEU: 18
|
||||
|
||||
; TOSMEM: s_mov_b64 s[6:7], s[2:3]
|
||||
; TOSMEM: s_mov_b32 s9, s13
|
||||
; TOSMEM: s_mov_b64 s[4:5], s[0:1]
|
||||
; TOSMEM: s_mov_b32 s3, s13
|
||||
|
||||
; TOSMEM: SGPRBlocks: 2
|
||||
; TOSMEM: NumSGPRsForWavesPerEU: 20
|
||||
; TOSMEM: NumSGPRsForWavesPerEU: 18
|
||||
define void @max_12_sgprs_14_input_sgprs(i32 addrspace(1)* %out1,
|
||||
i32 addrspace(1)* %out2,
|
||||
i32 addrspace(1)* %out3,
|
||||
|
@ -83,12 +79,12 @@ define void @max_12_sgprs_14_input_sgprs(i32 addrspace(1)* %out1,
|
|||
; ; swapping the order the registers are copied from what normally
|
||||
; ; happens.
|
||||
|
||||
; TOSMEM: s_mov_b32 s5, s11
|
||||
; TOSMEM: s_add_u32 m0, s5,
|
||||
; TOSMEM: s_buffer_store_dword vcc_lo, s[0:3], m0
|
||||
; TOSMEM: s_mov_b64 s[6:7], s[2:3]
|
||||
; TOSMEM: s_mov_b64 s[4:5], s[0:1]
|
||||
; TOSMEM: s_mov_b32 s3, s11
|
||||
|
||||
; ALL: SGPRBlocks: 2
|
||||
; ALL: NumSGPRsForWavesPerEU: 18
|
||||
; ALL: SGPRBlocks: 1
|
||||
; ALL: NumSGPRsForWavesPerEU: 16
|
||||
define void @max_12_sgprs_12_input_sgprs(i32 addrspace(1)* %out1,
|
||||
i32 addrspace(1)* %out2,
|
||||
i32 addrspace(1)* %out3,
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: llc -O0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCNNOOPT -check-prefix=GCN %s
|
||||
; RUN: llc -O0 -march=amdgcn -mcpu=tonga -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCNNOOPT -check-prefix=GCN %s
|
||||
; RUN: llc -O0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCNNOOPT -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCNOPT -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCNOPT -check-prefix=GCN %s
|
||||
|
||||
|
|
|
@ -1,44 +1,14 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SGPR %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SMEM %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
; Make sure this doesn't crash.
|
||||
; ALL-LABEL: {{^}}test:
|
||||
; ALL: s_mov_b32 s92, SCRATCH_RSRC_DWORD0
|
||||
; ALL: s_mov_b32 s91, s3
|
||||
|
||||
; CHECK: {{^}}test:
|
||||
; Make sure we are handling hazards correctly.
|
||||
; SGPR: buffer_load_dword [[VHI:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:12
|
||||
; SGPR-NEXT: s_waitcnt vmcnt(0)
|
||||
; SGPR-NEXT: v_readfirstlane_b32 s[[HI:[0-9]+]], [[VHI]]
|
||||
; SGPR-NEXT: s_nop 4
|
||||
; SGPR-NEXT: buffer_store_dword v0, off, s[0:[[HI]]{{\]}}, 0
|
||||
|
||||
|
||||
; Make sure scratch wave offset register is correctly incremented and
|
||||
; then restored.
|
||||
; SMEM: s_mov_b32 m0, s91{{$}}
|
||||
; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Spill
|
||||
; SMEM: s_add_u32 m0, s91, 0x100{{$}}
|
||||
; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Spill
|
||||
; SMEM: s_add_u32 m0, s91, 0x200{{$}}
|
||||
; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Spill
|
||||
; SMEM: s_add_u32 m0, s91, 0x300{{$}}
|
||||
; SMEM: s_buffer_store_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Spill
|
||||
|
||||
|
||||
; SMEM: s_mov_b32 m0, s91{{$}}
|
||||
; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Reload
|
||||
; SMEM: s_add_u32 m0, s91, 0x100{{$}}
|
||||
; SMEM: s_waitcnt lgkmcnt(0)
|
||||
; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Reload
|
||||
; SMEM: s_add_u32 m0, s91, 0x200{{$}}
|
||||
; SMEM: s_waitcnt lgkmcnt(0)
|
||||
; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Reload
|
||||
; SMEM: s_add_u32 m0, s91, 0x300{{$}}
|
||||
; SMEM: s_waitcnt lgkmcnt(0)
|
||||
; SMEM: s_buffer_load_dword s{{[0-9]+}}, s[92:95], m0 ; 16-byte Folded Reload
|
||||
|
||||
; ALL: s_endpgm
|
||||
; CHECK: buffer_load_dword [[VHI:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:12
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: v_readfirstlane_b32 s[[HI:[0-9]+]], [[VHI]]
|
||||
; CHECK-NEXT: s_nop 4
|
||||
; CHECK-NEXT: buffer_store_dword v0, off, s[0:[[HI]]{{\]}}, 0
|
||||
; CHECK: s_endpgm
|
||||
define void @test(i32 addrspace(1)* %out, i32 %in) {
|
||||
call void asm sideeffect "", "~{SGPR0_SGPR1_SGPR2_SGPR3_SGPR4_SGPR5_SGPR6_SGPR7}" ()
|
||||
call void asm sideeffect "", "~{SGPR8_SGPR9_SGPR10_SGPR11_SGPR12_SGPR13_SGPR14_SGPR15}" ()
|
||||
|
|
|
@ -1,13 +1,12 @@
|
|||
; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s
|
||||
; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -amdgpu-spill-sgpr-to-smem=0 -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s
|
||||
; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s
|
||||
; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s
|
||||
; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -amdgpu-spill-sgpr-to-smem=0 -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s
|
||||
; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -amdgpu-spill-sgpr-to-smem=1 -march=amdgcn -mcpu=tonga -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=TOSMEM -check-prefix=GCN %s
|
||||
; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mattr=+vgpr-spilling -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s
|
||||
|
||||
; XXX - Why does it like to use vcc?
|
||||
|
||||
; GCN-LABEL: {{^}}spill_m0:
|
||||
; TOSMEM: s_mov_b32 s84, SCRATCH_RSRC_DWORD0
|
||||
; TOSMEM: s_mov_b32 s88, SCRATCH_RSRC_DWORD0
|
||||
|
||||
; GCN: s_cmp_lg_u32
|
||||
|
||||
|
@ -17,13 +16,6 @@
|
|||
; TOVMEM: v_mov_b32_e32 [[SPILL_VREG:v[0-9]+]], m0
|
||||
; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Spill
|
||||
; TOVMEM: s_waitcnt vmcnt(0)
|
||||
|
||||
; TOSMEM: s_mov_b32 vcc_hi, m0
|
||||
; TOSMEM: s_mov_b32 m0, s3{{$}}
|
||||
; TOSMEM-NOT: vcc_hi
|
||||
; TOSMEM: s_buffer_store_dword vcc_hi, s[84:87], m0 ; 4-byte Folded Spill
|
||||
; TOSMEM: s_waitcnt lgkmcnt(0)
|
||||
|
||||
; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; GCN: [[ENDIF]]:
|
||||
|
@ -35,11 +27,6 @@
|
|||
; TOVMEM: v_readfirstlane_b32 vcc_hi, [[RELOAD_VREG]]
|
||||
; TOVMEM: s_mov_b32 m0, vcc_hi
|
||||
|
||||
; TOSMEM: s_mov_b32 m0, s3{{$}}
|
||||
; TOSMEM: s_buffer_load_dword vcc_hi, s[84:87], m0 ; 4-byte Folded Reload
|
||||
; TOSMEM-NOT: vcc_hi
|
||||
; TOSMEM: s_mov_b32 m0, vcc_hi
|
||||
|
||||
; GCN: s_add_i32 m0, m0, 1
|
||||
define void @spill_m0(i32 %cond, i32 addrspace(1)* %out) #0 {
|
||||
entry:
|
||||
|
@ -61,8 +48,6 @@ endif:
|
|||
|
||||
; GCN-LABEL: {{^}}spill_m0_lds:
|
||||
; GCN-NOT: v_readlane_b32 m0
|
||||
; GCN-NOT: s_buffer_store_dword m0
|
||||
; GCN-NOT: s_buffer_load_dword m0
|
||||
define amdgpu_ps void @spill_m0_lds(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) #0 {
|
||||
main_body:
|
||||
%4 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
|
||||
|
|
|
@ -1,173 +0,0 @@
|
|||
# RUN: llc -march=amdgcn -run-pass si-insert-waits %s -o - | FileCheck %s
|
||||
|
||||
--- |
|
||||
define void @basic_insert_dcache_wb() {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @explicit_flush_after() {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @explicit_flush_before() {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @no_scalar_store() {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @multi_block_store() {
|
||||
bb0:
|
||||
br i1 undef, label %bb1, label %bb2
|
||||
|
||||
bb1:
|
||||
ret void
|
||||
|
||||
bb2:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @one_block_store() {
|
||||
bb0:
|
||||
br i1 undef, label %bb1, label %bb2
|
||||
|
||||
bb1:
|
||||
ret void
|
||||
|
||||
bb2:
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps float @si_return() {
|
||||
ret float undef
|
||||
}
|
||||
|
||||
...
|
||||
---
|
||||
# CHECK-LABEL: name: basic_insert_dcache_wb
|
||||
# CHECK: bb.0:
|
||||
# CHECK-NEXT: S_STORE_DWORD
|
||||
# CHECK-NEXT: S_DCACHE_WB
|
||||
# CHECK-NEXT: S_ENDPGM
|
||||
|
||||
name: basic_insert_dcache_wb
|
||||
tracksRegLiveness: false
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
S_STORE_DWORD_SGPR undef %sgpr2, undef %sgpr0_sgpr1, undef %m0, 0
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# Already has an explicitly requested flush after the last store.
|
||||
# CHECK-LABEL: name: explicit_flush_after
|
||||
# CHECK: bb.0:
|
||||
# CHECK-NEXT: S_STORE_DWORD
|
||||
# CHECK-NEXT: S_DCACHE_WB
|
||||
# CHECK-NEXT: S_ENDPGM
|
||||
|
||||
name: explicit_flush_after
|
||||
tracksRegLiveness: false
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
S_STORE_DWORD_SGPR undef %sgpr2, undef %sgpr0_sgpr1, undef %m0, 0
|
||||
S_DCACHE_WB
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# Already has an explicitly requested flush before the last store.
|
||||
# CHECK-LABEL: name: explicit_flush_before
|
||||
# CHECK: bb.0:
|
||||
# CHECK-NEXT: S_DCACHE_WB
|
||||
# CHECK-NEXT: S_STORE_DWORD
|
||||
# CHECK-NEXT: S_DCACHE_WB
|
||||
# CHECK-NEXT: S_ENDPGM
|
||||
|
||||
name: explicit_flush_before
|
||||
tracksRegLiveness: false
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
S_DCACHE_WB
|
||||
S_STORE_DWORD_SGPR undef %sgpr2, undef %sgpr0_sgpr1, undef %m0, 0
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# CHECK-LABEL: no_scalar_store
|
||||
# CHECK: bb.0
|
||||
# CHECK-NEXT: S_ENDPGM
|
||||
name: no_scalar_store
|
||||
tracksRegLiveness: false
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
S_ENDPGM
|
||||
...
|
||||
|
||||
# CHECK-LABEL: name: multi_block_store
|
||||
# CHECK: bb.0:
|
||||
# CHECK-NEXT: S_STORE_DWORD
|
||||
# CHECK-NEXT: S_DCACHE_WB
|
||||
# CHECK-NEXT: S_ENDPGM
|
||||
|
||||
# CHECK: bb.1:
|
||||
# CHECK-NEXT: S_STORE_DWORD
|
||||
# CHECK-NEXT: S_DCACHE_WB
|
||||
# CHECK-NEXT: S_ENDPGM
|
||||
|
||||
name: multi_block_store
|
||||
tracksRegLiveness: false
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
S_STORE_DWORD_SGPR undef %sgpr2, undef %sgpr0_sgpr1, undef %m0, 0
|
||||
S_ENDPGM
|
||||
|
||||
bb.1:
|
||||
S_STORE_DWORD_SGPR undef %sgpr4, undef %sgpr6_sgpr7, undef %m0, 0
|
||||
S_ENDPGM
|
||||
...
|
||||
...
|
||||
|
||||
# This one should be able to omit the flush in the storeless block but
|
||||
# this isn't handled now.
|
||||
|
||||
# CHECK-LABEL: name: one_block_store
|
||||
# CHECK: bb.0:
|
||||
# CHECK-NEXT: S_DCACHE_WB
|
||||
# CHECK-NEXT: S_ENDPGM
|
||||
|
||||
# CHECK: bb.1:
|
||||
# CHECK-NEXT: S_STORE_DWORD
|
||||
# CHECK-NEXT: S_DCACHE_WB
|
||||
# CHECK-NEXT: S_ENDPGM
|
||||
|
||||
name: one_block_store
|
||||
tracksRegLiveness: false
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
S_ENDPGM
|
||||
|
||||
bb.1:
|
||||
S_STORE_DWORD_SGPR undef %sgpr4, undef %sgpr6_sgpr7, undef %m0, 0
|
||||
S_ENDPGM
|
||||
...
|
||||
---
|
||||
# CHECK-LABEL: name: si_return
|
||||
# CHECK: bb.0:
|
||||
# CHECK-NEXT: S_STORE_DWORD
|
||||
# CHECK-NEXT: S_WAITCNT
|
||||
# CHECK-NEXT: S_DCACHE_WB
|
||||
# CHECK-NEXT: SI_RETURN
|
||||
|
||||
name: si_return
|
||||
tracksRegLiveness: false
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
S_STORE_DWORD_SGPR undef %sgpr2, undef %sgpr0_sgpr1, undef %m0, 0
|
||||
SI_RETURN undef %vgpr0
|
||||
...
|
Loading…
Reference in New Issue