AMDGPU: Don't sometimes allow instructions before lowered si_end_cf

Since 6524a7a2b9, this would sometimes
not emit the or to exec at the beginning of the block, where it really
has to be. If there is an instruction that defines one of the source
operands, split the block and turn the si_end_cf into a terminator.

This avoids regressions when regalloc fast is switched to inserting
reloads at the beginning of the block, instead of spills at the end of
the block.

In a future change, this should always split the block.
This commit is contained in:
Matt Arsenault 2020-09-10 15:49:09 -04:00
parent 615695de27
commit 0576f436e5
6 changed files with 434 additions and 28 deletions

View File

@ -1659,7 +1659,11 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
// register allocation.
MI.setDesc(get(AMDGPU::S_XOR_B32));
break;
case AMDGPU::S_OR_B64_term:
// This is only a terminator to get the correct spill code placement during
// register allocation.
MI.setDesc(get(AMDGPU::S_OR_B64));
break;
case AMDGPU::S_OR_B32_term:
// This is only a terminator to get the correct spill code placement during
// register allocation.
@ -2236,6 +2240,7 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
case AMDGPU::SI_MASK_BRANCH:
case AMDGPU::S_MOV_B64_term:
case AMDGPU::S_XOR_B64_term:
case AMDGPU::S_OR_B64_term:
case AMDGPU::S_ANDN2_B64_term:
case AMDGPU::S_MOV_B32_term:
case AMDGPU::S_XOR_B32_term:

View File

@ -264,6 +264,7 @@ class WrapTerminatorInst<SOP_Pseudo base_inst> : SPseudoInstSI<
let WaveSizePredicate = isWave64 in {
def S_MOV_B64_term : WrapTerminatorInst<S_MOV_B64>;
def S_XOR_B64_term : WrapTerminatorInst<S_XOR_B64>;
def S_OR_B64_term : WrapTerminatorInst<S_OR_B64>;
def S_ANDN2_B64_term : WrapTerminatorInst<S_ANDN2_B64>;
}

View File

@ -99,6 +99,7 @@ private:
unsigned MovTermOpc;
unsigned Andn2TermOpc;
unsigned XorTermrOpc;
unsigned OrTermrOpc;
unsigned OrSaveExecOpc;
unsigned Exec;
@ -106,7 +107,10 @@ private:
void emitElse(MachineInstr &MI);
void emitIfBreak(MachineInstr &MI);
void emitLoop(MachineInstr &MI);
void emitEndCf(MachineInstr &MI);
MachineBasicBlock *splitBlock(MachineInstr &MI, MachineBasicBlock *BB,
LiveIntervals *LIS);
MachineBasicBlock *emitEndCf(MachineInstr &MI);
void findMaskOperands(MachineInstr &MI, unsigned OpNo,
SmallVectorImpl<MachineOperand> &Src) const;
@ -115,7 +119,7 @@ private:
bool removeMBBifRedundant(MachineBasicBlock &MBB);
void process(MachineInstr &MI);
MachineBasicBlock *process(MachineInstr &MI);
// Skip to the next instruction, ignoring debug instructions, and trivial
// block boundaries (blocks that have one (typically fallthrough) successor,
@ -489,19 +493,73 @@ SILowerControlFlow::skipIgnoreExecInstsTrivialSucc(
} while (true);
}
void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
MachineBasicBlock *SILowerControlFlow::splitBlock(MachineInstr &MI,
MachineBasicBlock *BB,
LiveIntervals *LIS) {
MachineBasicBlock::iterator SplitPoint(&MI);
++SplitPoint;
if (SplitPoint == BB->end()) {
// Don't bother with a new block.
return BB;
}
// Make sure we add any physregs we define in the block as liveins to the new
// block.
LivePhysRegs LiveRegs(*TRI);
LiveRegs.addLiveOuts(*BB);
for (auto I = BB->rbegin(), E = SplitPoint.getReverse(); I != E; ++I)
LiveRegs.stepBackward(*I);
MachineFunction *MF = BB->getParent();
MachineBasicBlock *SplitBB
= MF->CreateMachineBasicBlock(BB->getBasicBlock());
MF->insert(++MachineFunction::iterator(BB), SplitBB);
SplitBB->splice(SplitBB->begin(), BB, SplitPoint, BB->end());
SplitBB->transferSuccessorsAndUpdatePHIs(BB);
BB->addSuccessor(SplitBB);
addLiveIns(*SplitBB, LiveRegs);
if (LIS)
LIS->insertMBBInMaps(SplitBB, &MI);
return SplitBB;
}
MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
Register CFMask = MI.getOperand(0).getReg();
MachineInstr *Def = MRI.getUniqueVRegDef(CFMask);
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock::iterator InsPt =
Def && Def->getParent() == &MBB ? std::next(MachineBasicBlock::iterator(Def))
: MBB.begin();
MachineInstr *NewMI = BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec)
.addReg(Exec)
.add(MI.getOperand(0));
MachineBasicBlock::iterator InsPt = MBB.begin();
// If we have instructions that aren't prolog instructions, split the block
// and emit a terminator instruction. This ensures correct spill placement.
// FIXME: We should unconditionally split the block here.
bool NeedBlockSplit = false;
Register DataReg = MI.getOperand(0).getReg();
for (MachineBasicBlock::iterator I = InsPt, E = MI.getIterator();
I != E; ++I) {
if (I->modifiesRegister(DataReg, TRI)) {
NeedBlockSplit = true;
break;
}
}
unsigned Opcode = OrOpc;
MachineBasicBlock *SplitBB = &MBB;
if (NeedBlockSplit) {
SplitBB = splitBlock(MI, &MBB, LIS);
Opcode = OrTermrOpc;
InsPt = MI;
}
MachineInstr *NewMI =
BuildMI(MBB, InsPt, DL, TII->get(Opcode), Exec)
.addReg(Exec)
.add(MI.getOperand(0));
LoweredEndCf.insert(NewMI);
@ -522,6 +580,7 @@ void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
if (LIS)
LIS->handleMove(*NewMI);
return SplitBB;
}
// Returns replace operands for a logical operation, either single result
@ -608,11 +667,13 @@ void SILowerControlFlow::optimizeEndCf() {
}
}
void SILowerControlFlow::process(MachineInstr &MI) {
MachineBasicBlock *SILowerControlFlow::process(MachineInstr &MI) {
MachineBasicBlock &MBB = *MI.getParent();
MachineBasicBlock::iterator I(MI);
MachineInstr *Prev = (I != MBB.begin()) ? &*(std::prev(I)) : nullptr;
MachineBasicBlock *SplitBB = &MBB;
switch (MI.getOpcode()) {
case AMDGPU::SI_IF:
emitIf(MI);
@ -631,7 +692,7 @@ void SILowerControlFlow::process(MachineInstr &MI) {
break;
case AMDGPU::SI_END_CF:
emitEndCf(MI);
SplitBB = emitEndCf(MI);
break;
default:
@ -656,6 +717,8 @@ void SILowerControlFlow::process(MachineInstr &MI) {
break;
}
}
return SplitBB;
}
bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) {
@ -718,6 +781,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
MovTermOpc = AMDGPU::S_MOV_B32_term;
Andn2TermOpc = AMDGPU::S_ANDN2_B32_term;
XorTermrOpc = AMDGPU::S_XOR_B32_term;
OrTermrOpc = AMDGPU::S_OR_B32_term;
OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B32;
Exec = AMDGPU::EXEC_LO;
} else {
@ -727,6 +791,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
MovTermOpc = AMDGPU::S_MOV_B64_term;
Andn2TermOpc = AMDGPU::S_ANDN2_B64_term;
XorTermrOpc = AMDGPU::S_XOR_B64_term;
OrTermrOpc = AMDGPU::S_OR_B64_term;
OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B64;
Exec = AMDGPU::EXEC;
}
@ -734,19 +799,21 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
SmallVector<MachineInstr *, 32> Worklist;
MachineFunction::iterator NextBB;
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; BI = NextBB) {
for (MachineFunction::iterator BI = MF.begin();
BI != MF.end(); BI = NextBB) {
NextBB = std::next(BI);
MachineBasicBlock &MBB = *BI;
MachineBasicBlock *MBB = &*BI;
MachineBasicBlock::iterator I, Next;
for (I = MBB.begin(); I != MBB.end(); I = Next) {
MachineBasicBlock::iterator I, E, Next;
E = MBB->end();
for (I = MBB->begin(); I != E; I = Next) {
Next = std::next(I);
MachineInstr &MI = *I;
MachineBasicBlock *SplitMBB = MBB;
switch (MI.getOpcode()) {
case AMDGPU::SI_IF:
process(MI);
SplitMBB = process(MI);
break;
case AMDGPU::SI_ELSE:
@ -757,12 +824,17 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
if (InsertKillCleanups)
Worklist.push_back(&MI);
else
process(MI);
SplitMBB = process(MI);
break;
default:
break;
}
if (SplitMBB != MBB) {
MBB = Next->getParent();
E = MBB->end();
}
}
}

View File

@ -196,6 +196,12 @@ static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) {
MI.setDesc(TII.get(AMDGPU::S_XOR_B32));
return true;
}
case AMDGPU::S_OR_B64_term: {
// This is only a terminator to get the correct spill code placement during
// register allocation.
MI.setDesc(TII.get(AMDGPU::S_OR_B64));
return true;
}
case AMDGPU::S_OR_B32_term: {
// This is only a terminator to get the correct spill code placement during
// register allocation.

View File

@ -205,9 +205,11 @@ body: |
; CHECK: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, 0, 0, implicit $exec :: (volatile load 4, addrspace 1)
; CHECK: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]]
; CHECK: bb.2:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: successors: %bb.3(0x80000000)
; CHECK: [[COPY5:%[0-9]+]]:sreg_64_xexec = COPY [[COPY4]]
; CHECK: $exec = S_OR_B64 $exec, killed [[COPY5]], implicit-def $scc
; CHECK: $exec = S_OR_B64_term $exec, killed [[COPY5]], implicit-def $scc
; CHECK: bb.3:
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK: S_SLEEP 1
; CHECK: [[COPY6:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY6]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc

View File

@ -1,18 +1,17 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=si-lower-control-flow -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=liveintervals,si-lower-control-flow,si-lower-control-flow -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN %s
# Check that assert is not triggered
...
---
name: si-lower-control-flow
body: |
bb.0:
; GCN-LABEL: name: si-lower-control-flow
; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 16, 0
; GCN: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 16, 0, 0
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[S_LOAD_DWORD_IMM]], 255, implicit-def $scc
; GCN: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 65535, [[S_AND_B32_]], implicit-def $scc
; GCN: dead %3:sreg_32_xm0 = S_AND_B32 65535, [[S_AND_B32_]], implicit-def $scc
; GCN: S_ENDPGM 0
%0:sgpr_64 = COPY $sgpr4_sgpr5
%1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0, 0
@ -51,3 +50,324 @@ body: |
S_ENDPGM 0
...
# We need to split the block for SI_END_CF, but
---
name: end_cf_split_block_end
tracksRegLiveness: true
body: |
; GCN-LABEL: name: end_cf_split_block_end
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc
; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]]
; GCN: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
; GCN: S_BRANCH %bb.2
; GCN: bb.1:
; GCN: successors: %bb.2(0x80000000)
; GCN: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]]
; GCN: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc
; GCN: bb.2:
; GCN: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
%0:vgpr_32 = COPY killed $vgpr0
%1:vgpr_32 = COPY killed $vgpr1
%3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec
%4:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
%5:sreg_64_xexec = S_MOV_B64_term %4, implicit $exec
S_BRANCH %bb.2
bb.1:
successors: %bb.2
%6:sreg_64_xexec = COPY %5
SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec
bb.2:
S_ENDPGM 0
...
---
name: end_cf_split_block_physreg_livein
tracksRegLiveness: true
body: |
; GCN-LABEL: name: end_cf_split_block_physreg_livein
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc
; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]]
; GCN: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
; GCN: S_BRANCH %bb.2
; GCN: bb.1:
; GCN: successors: %bb.3(0x80000000)
; GCN: liveins: $vgpr0, $sgpr4_sgpr5
; GCN: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]]
; GCN: S_NOP 0
; GCN: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc
; GCN: bb.3:
; GCN: successors: %bb.2(0x80000000)
; GCN: liveins: $vgpr0, $sgpr4_sgpr5
; GCN: S_SLEEP 3
; GCN: S_NOP 0, implicit $vgpr0, implicit $sgpr4_sgpr5
; GCN: bb.2:
; GCN: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5
%0:vgpr_32 = COPY killed $vgpr0
%1:vgpr_32 = COPY killed $vgpr1
%3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec
%4:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
%5:sreg_64_xexec = S_MOV_B64_term %4, implicit $exec
S_BRANCH %bb.2
bb.1:
successors: %bb.2
liveins: $vgpr0, $sgpr4_sgpr5
%6:sreg_64_xexec = COPY %5
S_NOP 0
SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec
S_SLEEP 3
S_NOP 0, implicit $vgpr0, implicit $sgpr4_sgpr5
bb.2:
S_ENDPGM 0
...
---
name: end_cf_split_block_physreg_livein_liveout
tracksRegLiveness: true
body: |
; GCN-LABEL: name: end_cf_split_block_physreg_livein_liveout
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x0000000000000003
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc
; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]]
; GCN: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
; GCN: S_BRANCH %bb.2
; GCN: bb.1:
; GCN: successors: %bb.3(0x80000000)
; GCN: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x0000000000000003
; GCN: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]]
; GCN: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc
; GCN: bb.3:
; GCN: successors: %bb.2(0x80000000)
; GCN: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9
; GCN: S_SLEEP 3
; GCN: S_NOP 0
; GCN: bb.2:
; GCN: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x0000000000000003
; GCN: S_ENDPGM 0, implicit $vgpr0, implicit $sgpr4_sgpr5, implicit $sgpr8_sgpr9_sgpr10_sgpr11
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x00000003
%0:vgpr_32 = COPY killed $vgpr0
%1:vgpr_32 = COPY killed $vgpr1
%3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec
%4:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
%5:sreg_64_xexec = S_MOV_B64_term %4, implicit $exec
S_BRANCH %bb.2
bb.1:
successors: %bb.2
liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x00000003
%6:sreg_64_xexec = COPY %5
SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec
S_SLEEP 3
S_NOP 0
bb.2:
liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x00000003
S_ENDPGM 0, implicit $vgpr0, implicit $sgpr4_sgpr5, implicit $sgpr8_sgpr9_sgpr10_sgpr11
...
---
name: end_cf_split_block_physreg_liveout
tracksRegLiveness: true
body: |
; GCN-LABEL: name: end_cf_split_block_physreg_liveout
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc
; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]]
; GCN: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
; GCN: S_BRANCH %bb.2
; GCN: bb.1:
; GCN: successors: %bb.3(0x80000000)
; GCN: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]]
; GCN: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc
; GCN: bb.3:
; GCN: successors: %bb.2(0x80000000)
; GCN: liveins: $vgpr3
; GCN: $vgpr3 = V_MOV_B32_e32 0, implicit $exec
; GCN: $sgpr4_sgpr5 = S_MOV_B64 32
; GCN: bb.2:
; GCN: liveins: $vgpr3, $sgpr4_sgpr5
; GCN: S_ENDPGM 0, implicit $vgpr3, implicit $sgpr4_sgpr5
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
%0:vgpr_32 = COPY killed $vgpr0
%1:vgpr_32 = COPY killed $vgpr1
%3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec
%4:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
%5:sreg_64_xexec = S_MOV_B64_term %4, implicit $exec
S_BRANCH %bb.2
bb.1:
successors: %bb.2
%6:sreg_64_xexec = COPY %5
SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
$sgpr4_sgpr5 = S_MOV_B64 32
bb.2:
liveins: $vgpr3, $sgpr4_sgpr5
S_ENDPGM 0, implicit $vgpr3, implicit $sgpr4_sgpr5
...
---
name: end_cf_split_block_physreg_live_across_split
tracksRegLiveness: true
body: |
; GCN-LABEL: name: end_cf_split_block_physreg_live_across_split
; GCN: bb.0:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc
; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]]
; GCN: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
; GCN: S_BRANCH %bb.2
; GCN: bb.1:
; GCN: successors: %bb.3(0x80000000)
; GCN: liveins: $vgpr0, $sgpr4_sgpr5
; GCN: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]]
; GCN: $sgpr4_sgpr5 = S_MOV_B64 32
; GCN: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc
; GCN: bb.3:
; GCN: successors: %bb.2(0x80000000)
; GCN: liveins: $vgpr0, $sgpr4_sgpr5
; GCN: S_SLEEP 3, implicit $sgpr4_sgpr5
; GCN: S_NOP 0
; GCN: bb.2:
; GCN: liveins: $vgpr0, $sgpr4_sgpr5
; GCN: S_ENDPGM 0, implicit $vgpr0, implicit $sgpr4_sgpr5
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5
%0:vgpr_32 = COPY killed $vgpr0
%1:vgpr_32 = COPY killed $vgpr1
%3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec
%4:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
%5:sreg_64_xexec = S_MOV_B64_term %4, implicit $exec
S_BRANCH %bb.2
bb.1:
successors: %bb.2
liveins: $vgpr0, $sgpr4_sgpr5
%6:sreg_64_xexec = COPY %5
$sgpr4_sgpr5 = S_MOV_B64 32
SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec
S_SLEEP 3, implicit $sgpr4_sgpr5
S_NOP 0
bb.2:
liveins: $vgpr0, $sgpr4_sgpr5
S_ENDPGM 0, implicit $vgpr0, implicit $sgpr4_sgpr5
...
---
name: end_cf_split_block_process_next_inst
tracksRegLiveness: true
body: |
; GCN-LABEL: name: end_cf_split_block_process_next_inst
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
; GCN: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY2]], implicit $exec
; GCN: dead %5:sreg_64_xexec = S_MOV_B64 0
; GCN: bb.1:
; GCN: successors: %bb.3(0x80000000)
; GCN: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[V_CMP_EQ_U32_e64_]]
; GCN: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc
; GCN: bb.3:
; GCN: successors: %bb.2(0x80000000)
; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc
; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY4]], implicit-def dead $scc
; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]]
; GCN: dead %8:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
; GCN: S_CBRANCH_EXECZ %bb.2, implicit $exec
; GCN: bb.2:
; GCN: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
%0:vgpr_32 = COPY killed $vgpr0
%1:vgpr_32 = COPY killed $vgpr1
%2:vgpr_32 = COPY killed $vgpr2
%3:sreg_64_xexec = V_CMP_EQ_U32_e64 %0, killed %1, implicit $exec
%4:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %2, implicit $exec
%5:sreg_64_xexec = S_MOV_B64 0
bb.1:
successors: %bb.2
%6:sreg_64_xexec = COPY %3
SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec
%7:sreg_64_xexec = SI_IF %4, %bb.2, implicit-def $exec, implicit-def dead $scc, implicit $exec
%8:sreg_64_xexec = S_MOV_B64_term %7, implicit $exec
bb.2:
S_ENDPGM 0
...