forked from OSchip/llvm-project
AMDGPU: Don't sometimes allow instructions before lowered si_end_cf
Since 6524a7a2b9
, this would sometimes
not emit the or to exec at the beginning of the block, where it really
has to be. If there is an instruction that defines one of the source
operands, split the block and turn the si_end_cf into a terminator.
This avoids regressions when regalloc fast is switched to inserting
reloads at the beginning of the block, instead of spills at the end of
the block.
In a future change, this should always split the block.
This commit is contained in:
parent
615695de27
commit
0576f436e5
|
@ -1659,7 +1659,11 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
|
|||
// register allocation.
|
||||
MI.setDesc(get(AMDGPU::S_XOR_B32));
|
||||
break;
|
||||
|
||||
case AMDGPU::S_OR_B64_term:
|
||||
// This is only a terminator to get the correct spill code placement during
|
||||
// register allocation.
|
||||
MI.setDesc(get(AMDGPU::S_OR_B64));
|
||||
break;
|
||||
case AMDGPU::S_OR_B32_term:
|
||||
// This is only a terminator to get the correct spill code placement during
|
||||
// register allocation.
|
||||
|
@ -2236,6 +2240,7 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
|
|||
case AMDGPU::SI_MASK_BRANCH:
|
||||
case AMDGPU::S_MOV_B64_term:
|
||||
case AMDGPU::S_XOR_B64_term:
|
||||
case AMDGPU::S_OR_B64_term:
|
||||
case AMDGPU::S_ANDN2_B64_term:
|
||||
case AMDGPU::S_MOV_B32_term:
|
||||
case AMDGPU::S_XOR_B32_term:
|
||||
|
|
|
@ -264,6 +264,7 @@ class WrapTerminatorInst<SOP_Pseudo base_inst> : SPseudoInstSI<
|
|||
let WaveSizePredicate = isWave64 in {
|
||||
def S_MOV_B64_term : WrapTerminatorInst<S_MOV_B64>;
|
||||
def S_XOR_B64_term : WrapTerminatorInst<S_XOR_B64>;
|
||||
def S_OR_B64_term : WrapTerminatorInst<S_OR_B64>;
|
||||
def S_ANDN2_B64_term : WrapTerminatorInst<S_ANDN2_B64>;
|
||||
}
|
||||
|
||||
|
|
|
@ -99,6 +99,7 @@ private:
|
|||
unsigned MovTermOpc;
|
||||
unsigned Andn2TermOpc;
|
||||
unsigned XorTermrOpc;
|
||||
unsigned OrTermrOpc;
|
||||
unsigned OrSaveExecOpc;
|
||||
unsigned Exec;
|
||||
|
||||
|
@ -106,7 +107,10 @@ private:
|
|||
void emitElse(MachineInstr &MI);
|
||||
void emitIfBreak(MachineInstr &MI);
|
||||
void emitLoop(MachineInstr &MI);
|
||||
void emitEndCf(MachineInstr &MI);
|
||||
|
||||
MachineBasicBlock *splitBlock(MachineInstr &MI, MachineBasicBlock *BB,
|
||||
LiveIntervals *LIS);
|
||||
MachineBasicBlock *emitEndCf(MachineInstr &MI);
|
||||
|
||||
void findMaskOperands(MachineInstr &MI, unsigned OpNo,
|
||||
SmallVectorImpl<MachineOperand> &Src) const;
|
||||
|
@ -115,7 +119,7 @@ private:
|
|||
|
||||
bool removeMBBifRedundant(MachineBasicBlock &MBB);
|
||||
|
||||
void process(MachineInstr &MI);
|
||||
MachineBasicBlock *process(MachineInstr &MI);
|
||||
|
||||
// Skip to the next instruction, ignoring debug instructions, and trivial
|
||||
// block boundaries (blocks that have one (typically fallthrough) successor,
|
||||
|
@ -489,19 +493,73 @@ SILowerControlFlow::skipIgnoreExecInstsTrivialSucc(
|
|||
} while (true);
|
||||
}
|
||||
|
||||
void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
|
||||
MachineBasicBlock *SILowerControlFlow::splitBlock(MachineInstr &MI,
|
||||
MachineBasicBlock *BB,
|
||||
LiveIntervals *LIS) {
|
||||
MachineBasicBlock::iterator SplitPoint(&MI);
|
||||
++SplitPoint;
|
||||
|
||||
if (SplitPoint == BB->end()) {
|
||||
// Don't bother with a new block.
|
||||
return BB;
|
||||
}
|
||||
|
||||
// Make sure we add any physregs we define in the block as liveins to the new
|
||||
// block.
|
||||
LivePhysRegs LiveRegs(*TRI);
|
||||
LiveRegs.addLiveOuts(*BB);
|
||||
for (auto I = BB->rbegin(), E = SplitPoint.getReverse(); I != E; ++I)
|
||||
LiveRegs.stepBackward(*I);
|
||||
|
||||
MachineFunction *MF = BB->getParent();
|
||||
MachineBasicBlock *SplitBB
|
||||
= MF->CreateMachineBasicBlock(BB->getBasicBlock());
|
||||
|
||||
MF->insert(++MachineFunction::iterator(BB), SplitBB);
|
||||
SplitBB->splice(SplitBB->begin(), BB, SplitPoint, BB->end());
|
||||
|
||||
SplitBB->transferSuccessorsAndUpdatePHIs(BB);
|
||||
BB->addSuccessor(SplitBB);
|
||||
|
||||
addLiveIns(*SplitBB, LiveRegs);
|
||||
|
||||
if (LIS)
|
||||
LIS->insertMBBInMaps(SplitBB, &MI);
|
||||
|
||||
return SplitBB;
|
||||
}
|
||||
|
||||
MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &MI) {
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
|
||||
Register CFMask = MI.getOperand(0).getReg();
|
||||
MachineInstr *Def = MRI.getUniqueVRegDef(CFMask);
|
||||
const DebugLoc &DL = MI.getDebugLoc();
|
||||
|
||||
MachineBasicBlock::iterator InsPt =
|
||||
Def && Def->getParent() == &MBB ? std::next(MachineBasicBlock::iterator(Def))
|
||||
: MBB.begin();
|
||||
MachineInstr *NewMI = BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec)
|
||||
.addReg(Exec)
|
||||
.add(MI.getOperand(0));
|
||||
MachineBasicBlock::iterator InsPt = MBB.begin();
|
||||
|
||||
// If we have instructions that aren't prolog instructions, split the block
|
||||
// and emit a terminator instruction. This ensures correct spill placement.
|
||||
// FIXME: We should unconditionally split the block here.
|
||||
bool NeedBlockSplit = false;
|
||||
Register DataReg = MI.getOperand(0).getReg();
|
||||
for (MachineBasicBlock::iterator I = InsPt, E = MI.getIterator();
|
||||
I != E; ++I) {
|
||||
if (I->modifiesRegister(DataReg, TRI)) {
|
||||
NeedBlockSplit = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned Opcode = OrOpc;
|
||||
MachineBasicBlock *SplitBB = &MBB;
|
||||
if (NeedBlockSplit) {
|
||||
SplitBB = splitBlock(MI, &MBB, LIS);
|
||||
Opcode = OrTermrOpc;
|
||||
InsPt = MI;
|
||||
}
|
||||
|
||||
MachineInstr *NewMI =
|
||||
BuildMI(MBB, InsPt, DL, TII->get(Opcode), Exec)
|
||||
.addReg(Exec)
|
||||
.add(MI.getOperand(0));
|
||||
|
||||
LoweredEndCf.insert(NewMI);
|
||||
|
||||
|
@ -522,6 +580,7 @@ void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
|
|||
|
||||
if (LIS)
|
||||
LIS->handleMove(*NewMI);
|
||||
return SplitBB;
|
||||
}
|
||||
|
||||
// Returns replace operands for a logical operation, either single result
|
||||
|
@ -608,11 +667,13 @@ void SILowerControlFlow::optimizeEndCf() {
|
|||
}
|
||||
}
|
||||
|
||||
void SILowerControlFlow::process(MachineInstr &MI) {
|
||||
MachineBasicBlock *SILowerControlFlow::process(MachineInstr &MI) {
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
MachineBasicBlock::iterator I(MI);
|
||||
MachineInstr *Prev = (I != MBB.begin()) ? &*(std::prev(I)) : nullptr;
|
||||
|
||||
MachineBasicBlock *SplitBB = &MBB;
|
||||
|
||||
switch (MI.getOpcode()) {
|
||||
case AMDGPU::SI_IF:
|
||||
emitIf(MI);
|
||||
|
@ -631,7 +692,7 @@ void SILowerControlFlow::process(MachineInstr &MI) {
|
|||
break;
|
||||
|
||||
case AMDGPU::SI_END_CF:
|
||||
emitEndCf(MI);
|
||||
SplitBB = emitEndCf(MI);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -656,6 +717,8 @@ void SILowerControlFlow::process(MachineInstr &MI) {
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return SplitBB;
|
||||
}
|
||||
|
||||
bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) {
|
||||
|
@ -718,6 +781,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
|
|||
MovTermOpc = AMDGPU::S_MOV_B32_term;
|
||||
Andn2TermOpc = AMDGPU::S_ANDN2_B32_term;
|
||||
XorTermrOpc = AMDGPU::S_XOR_B32_term;
|
||||
OrTermrOpc = AMDGPU::S_OR_B32_term;
|
||||
OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B32;
|
||||
Exec = AMDGPU::EXEC_LO;
|
||||
} else {
|
||||
|
@ -727,6 +791,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
|
|||
MovTermOpc = AMDGPU::S_MOV_B64_term;
|
||||
Andn2TermOpc = AMDGPU::S_ANDN2_B64_term;
|
||||
XorTermrOpc = AMDGPU::S_XOR_B64_term;
|
||||
OrTermrOpc = AMDGPU::S_OR_B64_term;
|
||||
OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B64;
|
||||
Exec = AMDGPU::EXEC;
|
||||
}
|
||||
|
@ -734,19 +799,21 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
|
|||
SmallVector<MachineInstr *, 32> Worklist;
|
||||
|
||||
MachineFunction::iterator NextBB;
|
||||
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
|
||||
BI != BE; BI = NextBB) {
|
||||
for (MachineFunction::iterator BI = MF.begin();
|
||||
BI != MF.end(); BI = NextBB) {
|
||||
NextBB = std::next(BI);
|
||||
MachineBasicBlock &MBB = *BI;
|
||||
MachineBasicBlock *MBB = &*BI;
|
||||
|
||||
MachineBasicBlock::iterator I, Next;
|
||||
for (I = MBB.begin(); I != MBB.end(); I = Next) {
|
||||
MachineBasicBlock::iterator I, E, Next;
|
||||
E = MBB->end();
|
||||
for (I = MBB->begin(); I != E; I = Next) {
|
||||
Next = std::next(I);
|
||||
MachineInstr &MI = *I;
|
||||
MachineBasicBlock *SplitMBB = MBB;
|
||||
|
||||
switch (MI.getOpcode()) {
|
||||
case AMDGPU::SI_IF:
|
||||
process(MI);
|
||||
SplitMBB = process(MI);
|
||||
break;
|
||||
|
||||
case AMDGPU::SI_ELSE:
|
||||
|
@ -757,12 +824,17 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
|
|||
if (InsertKillCleanups)
|
||||
Worklist.push_back(&MI);
|
||||
else
|
||||
process(MI);
|
||||
SplitMBB = process(MI);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (SplitMBB != MBB) {
|
||||
MBB = Next->getParent();
|
||||
E = MBB->end();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -196,6 +196,12 @@ static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) {
|
|||
MI.setDesc(TII.get(AMDGPU::S_XOR_B32));
|
||||
return true;
|
||||
}
|
||||
case AMDGPU::S_OR_B64_term: {
|
||||
// This is only a terminator to get the correct spill code placement during
|
||||
// register allocation.
|
||||
MI.setDesc(TII.get(AMDGPU::S_OR_B64));
|
||||
return true;
|
||||
}
|
||||
case AMDGPU::S_OR_B32_term: {
|
||||
// This is only a terminator to get the correct spill code placement during
|
||||
// register allocation.
|
||||
|
|
|
@ -205,9 +205,11 @@ body: |
|
|||
; CHECK: dead %7:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, 0, 0, implicit $exec :: (volatile load 4, addrspace 1)
|
||||
; CHECK: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]]
|
||||
; CHECK: bb.2:
|
||||
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: successors: %bb.3(0x80000000)
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_64_xexec = COPY [[COPY4]]
|
||||
; CHECK: $exec = S_OR_B64 $exec, killed [[COPY5]], implicit-def $scc
|
||||
; CHECK: $exec = S_OR_B64_term $exec, killed [[COPY5]], implicit-def $scc
|
||||
; CHECK: bb.3:
|
||||
; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: S_SLEEP 1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY6]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
|
||||
|
|
|
@ -1,18 +1,17 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=si-lower-control-flow -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN %s
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=liveintervals,si-lower-control-flow,si-lower-control-flow -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN %s
|
||||
|
||||
# Check that assert is not triggered
|
||||
|
||||
...
|
||||
---
|
||||
name: si-lower-control-flow
|
||||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: si-lower-control-flow
|
||||
; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
|
||||
; GCN: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 16, 0
|
||||
; GCN: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 16, 0, 0
|
||||
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 [[S_LOAD_DWORD_IMM]], 255, implicit-def $scc
|
||||
; GCN: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0 = S_AND_B32 65535, [[S_AND_B32_]], implicit-def $scc
|
||||
; GCN: dead %3:sreg_32_xm0 = S_AND_B32 65535, [[S_AND_B32_]], implicit-def $scc
|
||||
; GCN: S_ENDPGM 0
|
||||
%0:sgpr_64 = COPY $sgpr4_sgpr5
|
||||
%1:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0, 0
|
||||
|
@ -51,3 +50,324 @@ body: |
|
|||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
||||
# We need to split the block for SI_END_CF, but
|
||||
---
|
||||
name: end_cf_split_block_end
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; GCN-LABEL: name: end_cf_split_block_end
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
|
||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
|
||||
; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
|
||||
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
|
||||
; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc
|
||||
; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]]
|
||||
; GCN: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
|
||||
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
; GCN: S_BRANCH %bb.2
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.2(0x80000000)
|
||||
; GCN: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]]
|
||||
; GCN: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc
|
||||
; GCN: bb.2:
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
|
||||
|
||||
%0:vgpr_32 = COPY killed $vgpr0
|
||||
%1:vgpr_32 = COPY killed $vgpr1
|
||||
%3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec
|
||||
%4:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
||||
%5:sreg_64_xexec = S_MOV_B64_term %4, implicit $exec
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2
|
||||
|
||||
%6:sreg_64_xexec = COPY %5
|
||||
SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: end_cf_split_block_physreg_livein
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; GCN-LABEL: name: end_cf_split_block_physreg_livein
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5
|
||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
|
||||
; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
|
||||
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
|
||||
; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc
|
||||
; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]]
|
||||
; GCN: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
|
||||
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
; GCN: S_BRANCH %bb.2
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.3(0x80000000)
|
||||
; GCN: liveins: $vgpr0, $sgpr4_sgpr5
|
||||
; GCN: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]]
|
||||
; GCN: S_NOP 0
|
||||
; GCN: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc
|
||||
; GCN: bb.3:
|
||||
; GCN: successors: %bb.2(0x80000000)
|
||||
; GCN: liveins: $vgpr0, $sgpr4_sgpr5
|
||||
; GCN: S_SLEEP 3
|
||||
; GCN: S_NOP 0, implicit $vgpr0, implicit $sgpr4_sgpr5
|
||||
; GCN: bb.2:
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5
|
||||
|
||||
%0:vgpr_32 = COPY killed $vgpr0
|
||||
%1:vgpr_32 = COPY killed $vgpr1
|
||||
%3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec
|
||||
%4:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
||||
%5:sreg_64_xexec = S_MOV_B64_term %4, implicit $exec
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2
|
||||
liveins: $vgpr0, $sgpr4_sgpr5
|
||||
|
||||
%6:sreg_64_xexec = COPY %5
|
||||
S_NOP 0
|
||||
SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
||||
S_SLEEP 3
|
||||
S_NOP 0, implicit $vgpr0, implicit $sgpr4_sgpr5
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: end_cf_split_block_physreg_livein_liveout
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; GCN-LABEL: name: end_cf_split_block_physreg_livein_liveout
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x0000000000000003
|
||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
|
||||
; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
|
||||
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
|
||||
; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc
|
||||
; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]]
|
||||
; GCN: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
|
||||
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
; GCN: S_BRANCH %bb.2
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.3(0x80000000)
|
||||
; GCN: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x0000000000000003
|
||||
; GCN: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]]
|
||||
; GCN: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc
|
||||
; GCN: bb.3:
|
||||
; GCN: successors: %bb.2(0x80000000)
|
||||
; GCN: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9
|
||||
; GCN: S_SLEEP 3
|
||||
; GCN: S_NOP 0
|
||||
; GCN: bb.2:
|
||||
; GCN: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x0000000000000003
|
||||
; GCN: S_ENDPGM 0, implicit $vgpr0, implicit $sgpr4_sgpr5, implicit $sgpr8_sgpr9_sgpr10_sgpr11
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x00000003
|
||||
|
||||
%0:vgpr_32 = COPY killed $vgpr0
|
||||
%1:vgpr_32 = COPY killed $vgpr1
|
||||
%3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec
|
||||
%4:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
||||
%5:sreg_64_xexec = S_MOV_B64_term %4, implicit $exec
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2
|
||||
liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x00000003
|
||||
|
||||
%6:sreg_64_xexec = COPY %5
|
||||
SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
||||
S_SLEEP 3
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9_sgpr10_sgpr11:0x00000003
|
||||
S_ENDPGM 0, implicit $vgpr0, implicit $sgpr4_sgpr5, implicit $sgpr8_sgpr9_sgpr10_sgpr11
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: end_cf_split_block_physreg_liveout
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; GCN-LABEL: name: end_cf_split_block_physreg_liveout
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
|
||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
|
||||
; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
|
||||
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
|
||||
; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc
|
||||
; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]]
|
||||
; GCN: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
|
||||
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
; GCN: S_BRANCH %bb.2
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.3(0x80000000)
|
||||
; GCN: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]]
|
||||
; GCN: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc
|
||||
; GCN: bb.3:
|
||||
; GCN: successors: %bb.2(0x80000000)
|
||||
; GCN: liveins: $vgpr3
|
||||
; GCN: $vgpr3 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GCN: $sgpr4_sgpr5 = S_MOV_B64 32
|
||||
; GCN: bb.2:
|
||||
; GCN: liveins: $vgpr3, $sgpr4_sgpr5
|
||||
; GCN: S_ENDPGM 0, implicit $vgpr3, implicit $sgpr4_sgpr5
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
|
||||
|
||||
%0:vgpr_32 = COPY killed $vgpr0
|
||||
%1:vgpr_32 = COPY killed $vgpr1
|
||||
%3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec
|
||||
%4:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
||||
%5:sreg_64_xexec = S_MOV_B64_term %4, implicit $exec
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2
|
||||
|
||||
%6:sreg_64_xexec = COPY %5
|
||||
SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
||||
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
|
||||
$sgpr4_sgpr5 = S_MOV_B64 32
|
||||
|
||||
bb.2:
|
||||
liveins: $vgpr3, $sgpr4_sgpr5
|
||||
S_ENDPGM 0, implicit $vgpr3, implicit $sgpr4_sgpr5
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: end_cf_split_block_physreg_live_across_split
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; GCN-LABEL: name: end_cf_split_block_physreg_live_across_split
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GCN: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5
|
||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
|
||||
; GCN: [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
|
||||
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
|
||||
; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc
|
||||
; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]]
|
||||
; GCN: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
|
||||
; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
|
||||
; GCN: S_BRANCH %bb.2
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.3(0x80000000)
|
||||
; GCN: liveins: $vgpr0, $sgpr4_sgpr5
|
||||
; GCN: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term]]
|
||||
; GCN: $sgpr4_sgpr5 = S_MOV_B64 32
|
||||
; GCN: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc
|
||||
; GCN: bb.3:
|
||||
; GCN: successors: %bb.2(0x80000000)
|
||||
; GCN: liveins: $vgpr0, $sgpr4_sgpr5
|
||||
; GCN: S_SLEEP 3, implicit $sgpr4_sgpr5
|
||||
; GCN: S_NOP 0
|
||||
; GCN: bb.2:
|
||||
; GCN: liveins: $vgpr0, $sgpr4_sgpr5
|
||||
; GCN: S_ENDPGM 0, implicit $vgpr0, implicit $sgpr4_sgpr5
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31, $sgpr4_sgpr5
|
||||
|
||||
%0:vgpr_32 = COPY killed $vgpr0
|
||||
%1:vgpr_32 = COPY killed $vgpr1
|
||||
%3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec
|
||||
%4:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
||||
%5:sreg_64_xexec = S_MOV_B64_term %4, implicit $exec
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2
|
||||
liveins: $vgpr0, $sgpr4_sgpr5
|
||||
|
||||
%6:sreg_64_xexec = COPY %5
|
||||
$sgpr4_sgpr5 = S_MOV_B64 32
|
||||
SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
||||
S_SLEEP 3, implicit $sgpr4_sgpr5
|
||||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
liveins: $vgpr0, $sgpr4_sgpr5
|
||||
S_ENDPGM 0, implicit $vgpr0, implicit $sgpr4_sgpr5
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
name: end_cf_split_block_process_next_inst
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; GCN-LABEL: name: end_cf_split_block_process_next_inst
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2
|
||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
|
||||
; GCN: [[V_CMP_EQ_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY2]], implicit $exec
|
||||
; GCN: dead %5:sreg_64_xexec = S_MOV_B64 0
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.3(0x80000000)
|
||||
; GCN: [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[V_CMP_EQ_U32_e64_]]
|
||||
; GCN: $exec = S_OR_B64_term $exec, [[COPY3]], implicit-def $scc
|
||||
; GCN: bb.3:
|
||||
; GCN: successors: %bb.2(0x80000000)
|
||||
; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
|
||||
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_EQ_U32_e64_1]], implicit-def dead $scc
|
||||
; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY4]], implicit-def dead $scc
|
||||
; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]]
|
||||
; GCN: dead %8:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
|
||||
; GCN: S_CBRANCH_EXECZ %bb.2, implicit $exec
|
||||
; GCN: bb.2:
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $vgpr2
|
||||
|
||||
%0:vgpr_32 = COPY killed $vgpr0
|
||||
%1:vgpr_32 = COPY killed $vgpr1
|
||||
%2:vgpr_32 = COPY killed $vgpr2
|
||||
%3:sreg_64_xexec = V_CMP_EQ_U32_e64 %0, killed %1, implicit $exec
|
||||
%4:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %2, implicit $exec
|
||||
%5:sreg_64_xexec = S_MOV_B64 0
|
||||
|
||||
bb.1:
|
||||
successors: %bb.2
|
||||
|
||||
%6:sreg_64_xexec = COPY %3
|
||||
SI_END_CF killed %6, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
||||
%7:sreg_64_xexec = SI_IF %4, %bb.2, implicit-def $exec, implicit-def dead $scc, implicit $exec
|
||||
%8:sreg_64_xexec = S_MOV_B64_term %7, implicit $exec
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
Loading…
Reference in New Issue