[AMDGPU] Mark waterfall loops as SI_WATERFALL_LOOP

This way, they can be detected later, e.g. by the
SIOptimizeVGPRLiveRange pass.

Differential Revision: https://reviews.llvm.org/D105467
This commit is contained in:
Sebastian Neubauer 2021-07-06 11:23:06 +02:00
parent 78463ebde2
commit 9d72c0ad43
4 changed files with 22 additions and 9 deletions

View File

@ -5278,7 +5278,7 @@ emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
.addReg(Exec)
.addReg(SaveExec);
BuildMI(LoopBB, I, DL, TII.get(AMDGPU::S_CBRANCH_EXECNZ)).addMBB(&LoopBB);
BuildMI(LoopBB, I, DL, TII.get(AMDGPU::SI_WATERFALL_LOOP)).addMBB(&LoopBB);
}
// Build a waterfall loop around \p MI, replacing the VGPR \p Rsrc register

View File

@ -318,6 +318,14 @@ def SI_ELSE : CFPseudoInstSI <
let hasSideEffects = 1;
}
def SI_WATERFALL_LOOP : CFPseudoInstSI <
(outs),
(ins brtarget:$target), [], 1> {
let Size = 8;
let isBranch = 1;
let Defs = [];
}
def SI_LOOP : CFPseudoInstSI <
(outs), (ins SReg_1:$saved, brtarget:$target),
[(AMDGPUloop i1:$saved, bb:$target)], 1, 1> {

View File

@ -600,6 +600,10 @@ MachineBasicBlock *SILowerControlFlow::process(MachineInstr &MI) {
emitLoop(MI);
break;
case AMDGPU::SI_WATERFALL_LOOP:
MI.setDesc(TII->get(AMDGPU::S_CBRANCH_EXECNZ));
break;
case AMDGPU::SI_END_CF:
SplitBB = emitEndCf(MI);
break;
@ -840,6 +844,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::SI_IF:
case AMDGPU::SI_ELSE:
case AMDGPU::SI_IF_BREAK:
case AMDGPU::SI_WATERFALL_LOOP:
case AMDGPU::SI_LOOP:
case AMDGPU::SI_END_CF:
SplitMBB = process(MI);

View File

@ -30,7 +30,7 @@
# W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# W64: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W64-LABEL: bb.2:
# W64: $exec = S_MOV_B64 [[SAVEEXEC]]
@ -55,7 +55,7 @@
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# TODO: S_XOR_B32_term should be `implicit-def $scc`
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W32-LABEL: bb.2:
# W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]
---
@ -103,7 +103,7 @@ body: |
# W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# W64: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W64-LABEL: bb.2:
# W64: $exec = S_MOV_B64 [[SAVEEXEC]]
@ -128,7 +128,7 @@ body: |
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# TODO: S_XOR_B32_term should be `implicit-def $scc`
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W32-LABEL: bb.2:
# W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]
---
@ -176,7 +176,7 @@ body: |
# W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# W64: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W64-LABEL: bb.2:
# W64: $exec = S_MOV_B64 [[SAVEEXEC]]
@ -201,7 +201,7 @@ body: |
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# TODO: S_XOR_B32_term should be `implicit-def $scc`
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W32-LABEL: bb.2:
# W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]
---
@ -286,7 +286,7 @@ body: |
# W64-NO-ADDR64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
# W64-NO-ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# W64-NO-ADDR64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
# W64-NO-ADDR64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# W64-NO-ADDR64: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W64-NO-ADDR64-LABEL: bb.2:
# W64-NO-ADDR64: $exec = S_MOV_B64 [[SAVEEXEC]]
@ -309,7 +309,7 @@ body: |
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, implicit $exec
# TODO: S_XOR_B32_term should be `implicit-def $scc`
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
# W32: SI_WATERFALL_LOOP %bb.1, implicit $exec
# W32-LABEL: bb.2:
# W32: $exec_lo = S_MOV_B32 [[SAVEEXEC]]