forked from OSchip/llvm-project
AMDGPU: Fix verifier error with kill intrinsic
Don't create a terminator in the middle of the block. We should probably get rid of this intrinsic. llvm-svn: 275203
This commit is contained in:
parent
b9f8e29290
commit
657f871a4e
|
@ -76,7 +76,7 @@ private:
|
|||
bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
|
||||
|
||||
void Skip(MachineInstr &From, MachineOperand &To);
|
||||
void SkipIfDead(MachineInstr &MI);
|
||||
bool skipIfDead(MachineInstr &MI);
|
||||
|
||||
void If(MachineInstr &MI);
|
||||
void Else(MachineInstr &MI, bool ExecModified);
|
||||
|
@ -89,12 +89,16 @@ private:
|
|||
void Kill(MachineInstr &MI);
|
||||
void Branch(MachineInstr &MI);
|
||||
|
||||
void splitBlockLiveIns(const MachineBasicBlock &MBB,
|
||||
const MachineInstr &MI,
|
||||
MachineBasicBlock &LoopBB,
|
||||
MachineBasicBlock &RemainderBB,
|
||||
unsigned SaveReg,
|
||||
const MachineOperand &IdxReg);
|
||||
std::pair<MachineBasicBlock *, MachineBasicBlock *>
|
||||
splitBlock(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
|
||||
|
||||
void splitLoadM0BlockLiveIns(LivePhysRegs &RemainderLiveRegs,
|
||||
const MachineRegisterInfo &MRI,
|
||||
const MachineInstr &MI,
|
||||
MachineBasicBlock &LoopBB,
|
||||
MachineBasicBlock &RemainderBB,
|
||||
unsigned SaveReg,
|
||||
const MachineOperand &IdxReg);
|
||||
|
||||
void emitLoadM0FromVGPRLoop(MachineBasicBlock &LoopBB, DebugLoc DL,
|
||||
MachineInstr *MovRel,
|
||||
|
@ -171,7 +175,19 @@ bool SILowerControlFlow::shouldSkip(MachineBasicBlock *From,
|
|||
I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
|
||||
return true;
|
||||
|
||||
if (++NumInstr >= SkipThreshold)
|
||||
if (I->isInlineAsm()) {
|
||||
const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
|
||||
const char *AsmStr = I->getOperand(0).getSymbolName();
|
||||
|
||||
// inlineasm length estimate is number of bytes assuming the longest
|
||||
// instruction.
|
||||
uint64_t MaxAsmSize = TII->getInlineAsmLength(AsmStr, *MAI);
|
||||
NumInstr += MaxAsmSize / MAI->getMaxInstLength();
|
||||
} else {
|
||||
++NumInstr;
|
||||
}
|
||||
|
||||
if (NumInstr >= SkipThreshold)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -189,36 +205,55 @@ void SILowerControlFlow::Skip(MachineInstr &From, MachineOperand &To) {
|
|||
.addOperand(To);
|
||||
}
|
||||
|
||||
void SILowerControlFlow::SkipIfDead(MachineInstr &MI) {
|
||||
|
||||
bool SILowerControlFlow::skipIfDead(MachineInstr &MI) {
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
|
||||
if (MBB.getParent()->getFunction()->getCallingConv() != CallingConv::AMDGPU_PS ||
|
||||
!shouldSkip(&MBB, &MBB.getParent()->back()))
|
||||
return;
|
||||
return false;
|
||||
|
||||
MachineBasicBlock::iterator Insert = &MI;
|
||||
++Insert;
|
||||
LivePhysRegs RemainderLiveRegs(TRI);
|
||||
RemainderLiveRegs.addLiveOuts(MBB);
|
||||
|
||||
MachineBasicBlock *SkipBB;
|
||||
MachineBasicBlock *RemainderBB;
|
||||
std::tie(SkipBB, RemainderBB) = splitBlock(MBB, MI.getIterator());
|
||||
|
||||
const DebugLoc &DL = MI.getDebugLoc();
|
||||
|
||||
// If the exec mask is non-zero, skip the next two instructions
|
||||
BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
|
||||
.addImm(3);
|
||||
BuildMI(&MBB, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
|
||||
.addMBB(RemainderBB);
|
||||
|
||||
MBB.addSuccessor(RemainderBB);
|
||||
|
||||
MachineBasicBlock::iterator Insert = SkipBB->begin();
|
||||
|
||||
// Exec mask is zero: Export to NULL target...
|
||||
BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP))
|
||||
.addImm(0)
|
||||
.addImm(0x09) // V_008DFC_SQ_EXP_NULL
|
||||
.addImm(0)
|
||||
.addImm(1)
|
||||
.addImm(1)
|
||||
.addReg(AMDGPU::VGPR0)
|
||||
.addReg(AMDGPU::VGPR0)
|
||||
.addReg(AMDGPU::VGPR0)
|
||||
.addReg(AMDGPU::VGPR0);
|
||||
BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP))
|
||||
.addImm(0)
|
||||
.addImm(0x09) // V_008DFC_SQ_EXP_NULL
|
||||
.addImm(0)
|
||||
.addImm(1)
|
||||
.addImm(1)
|
||||
.addReg(AMDGPU::VGPR0, RegState::Undef)
|
||||
.addReg(AMDGPU::VGPR0, RegState::Undef)
|
||||
.addReg(AMDGPU::VGPR0, RegState::Undef)
|
||||
.addReg(AMDGPU::VGPR0, RegState::Undef);
|
||||
|
||||
// ... and terminate wavefront
|
||||
BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
|
||||
// ... and terminate wavefront.
|
||||
BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
|
||||
|
||||
for (const MachineInstr &Inst : reverse(*RemainderBB))
|
||||
RemainderLiveRegs.stepBackward(Inst);
|
||||
|
||||
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
|
||||
for (unsigned Reg : RemainderLiveRegs) {
|
||||
if (MRI.isAllocatable(Reg))
|
||||
RemainderBB->addLiveIn(Reg);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void SILowerControlFlow::If(MachineInstr &MI) {
|
||||
|
@ -386,20 +421,13 @@ void SILowerControlFlow::Kill(MachineInstr &MI) {
|
|||
}
|
||||
|
||||
// All currently live registers must remain so in the remainder block.
|
||||
void SILowerControlFlow::splitBlockLiveIns(const MachineBasicBlock &MBB,
|
||||
const MachineInstr &MI,
|
||||
MachineBasicBlock &LoopBB,
|
||||
MachineBasicBlock &RemainderBB,
|
||||
unsigned SaveReg,
|
||||
const MachineOperand &IdxReg) {
|
||||
LivePhysRegs RemainderLiveRegs(TRI);
|
||||
|
||||
RemainderLiveRegs.addLiveOuts(MBB);
|
||||
for (MachineBasicBlock::const_reverse_iterator I = MBB.rbegin(), E(&MI);
|
||||
I != E; ++I) {
|
||||
RemainderLiveRegs.stepBackward(*I);
|
||||
}
|
||||
|
||||
void SILowerControlFlow::splitLoadM0BlockLiveIns(LivePhysRegs &RemainderLiveRegs,
|
||||
const MachineRegisterInfo &MRI,
|
||||
const MachineInstr &MI,
|
||||
MachineBasicBlock &LoopBB,
|
||||
MachineBasicBlock &RemainderBB,
|
||||
unsigned SaveReg,
|
||||
const MachineOperand &IdxReg) {
|
||||
// Add reg defined in loop body.
|
||||
RemainderLiveRegs.addReg(SaveReg);
|
||||
|
||||
|
@ -410,13 +438,11 @@ void SILowerControlFlow::splitBlockLiveIns(const MachineBasicBlock &MBB,
|
|||
}
|
||||
}
|
||||
|
||||
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
|
||||
for (unsigned Reg : RemainderLiveRegs) {
|
||||
if (MRI.isAllocatable(Reg))
|
||||
RemainderBB.addLiveIn(Reg);
|
||||
}
|
||||
|
||||
|
||||
const MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src);
|
||||
if (!Src->isUndef())
|
||||
LoopBB.addLiveIn(Src->getReg());
|
||||
|
@ -469,6 +495,30 @@ void SILowerControlFlow::emitLoadM0FromVGPRLoop(MachineBasicBlock &LoopBB,
|
|||
.addMBB(&LoopBB);
|
||||
}
|
||||
|
||||
std::pair<MachineBasicBlock *, MachineBasicBlock *>
|
||||
SILowerControlFlow::splitBlock(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) {
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
|
||||
// To insert the loop we need to split the block. Move everything after this
|
||||
// point to a new block, and insert a new empty block between the two.
|
||||
MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock();
|
||||
MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
|
||||
MachineFunction::iterator MBBI(MBB);
|
||||
++MBBI;
|
||||
|
||||
MF->insert(MBBI, LoopBB);
|
||||
MF->insert(MBBI, RemainderBB);
|
||||
|
||||
// Move the rest of the block into a new block.
|
||||
RemainderBB->transferSuccessors(&MBB);
|
||||
RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
|
||||
|
||||
MBB.addSuccessor(LoopBB);
|
||||
|
||||
return std::make_pair(LoopBB, RemainderBB);
|
||||
}
|
||||
|
||||
// Returns true if a new block was inserted.
|
||||
bool SILowerControlFlow::loadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) {
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
|
@ -492,7 +542,6 @@ bool SILowerControlFlow::loadM0(MachineInstr &MI, MachineInstr *MovRel, int Offs
|
|||
return false;
|
||||
}
|
||||
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
MachineOperand *SaveOp = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
|
||||
SaveOp->setIsDead(false);
|
||||
unsigned Save = SaveOp->getReg();
|
||||
|
@ -505,25 +554,24 @@ bool SILowerControlFlow::loadM0(MachineInstr &MI, MachineInstr *MovRel, int Offs
|
|||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64), Save)
|
||||
.addReg(AMDGPU::EXEC);
|
||||
|
||||
// To insert the loop we need to split the block. Move everything after this
|
||||
// point to a new block, and insert a new empty block between the two.
|
||||
MachineBasicBlock *LoopBB = MF.CreateMachineBasicBlock();
|
||||
MachineBasicBlock *RemainderBB = MF.CreateMachineBasicBlock();
|
||||
MachineFunction::iterator MBBI(MBB);
|
||||
++MBBI;
|
||||
LivePhysRegs RemainderLiveRegs(TRI);
|
||||
|
||||
MF.insert(MBBI, LoopBB);
|
||||
MF.insert(MBBI, RemainderBB);
|
||||
RemainderLiveRegs.addLiveOuts(MBB);
|
||||
|
||||
LoopBB->addSuccessor(LoopBB);
|
||||
MachineBasicBlock *LoopBB;
|
||||
MachineBasicBlock *RemainderBB;
|
||||
|
||||
std::tie(LoopBB, RemainderBB) = splitBlock(MBB, I);
|
||||
|
||||
for (const MachineInstr &Inst : reverse(*RemainderBB))
|
||||
RemainderLiveRegs.stepBackward(Inst);
|
||||
|
||||
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
|
||||
LoopBB->addSuccessor(RemainderBB);
|
||||
LoopBB->addSuccessor(LoopBB);
|
||||
|
||||
splitBlockLiveIns(MBB, MI, *LoopBB, *RemainderBB, Save, *Idx);
|
||||
|
||||
// Move the rest of the block into a new block.
|
||||
RemainderBB->transferSuccessors(&MBB);
|
||||
RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
|
||||
MBB.addSuccessor(LoopBB);
|
||||
splitLoadM0BlockLiveIns(RemainderLiveRegs, MRI, MI, *LoopBB,
|
||||
*RemainderBB, Save, *Idx);
|
||||
|
||||
emitLoadM0FromVGPRLoop(*LoopBB, DL, MovRel, *Idx, Offset);
|
||||
|
||||
|
@ -695,16 +743,25 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
|
|||
|
||||
case AMDGPU::SI_END_CF:
|
||||
if (--Depth == 0 && HaveKill) {
|
||||
SkipIfDead(MI);
|
||||
HaveKill = false;
|
||||
|
||||
if (skipIfDead(MI)) {
|
||||
NextBB = std::next(BI);
|
||||
BE = MF.end();
|
||||
Next = MBB.end();
|
||||
}
|
||||
}
|
||||
EndCf(MI);
|
||||
break;
|
||||
|
||||
case AMDGPU::SI_KILL:
|
||||
if (Depth == 0)
|
||||
SkipIfDead(MI);
|
||||
else
|
||||
if (Depth == 0) {
|
||||
if (skipIfDead(MI)) {
|
||||
NextBB = std::next(BI);
|
||||
BE = MF.end();
|
||||
Next = MBB.end();
|
||||
}
|
||||
} else
|
||||
HaveKill = true;
|
||||
Kill(MI);
|
||||
break;
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
# CHECK-LABEL: name: extract_undef_offset_vgpr{{$}}
|
||||
# CHECK: bb.1:
|
||||
# CHECK: successors: %bb.1(0x40000000 / 0x80000000 = 50.00%), %bb.2(0x40000000 / 0x80000000 = 50.00%)
|
||||
# CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
|
||||
# CHECK: liveins: %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
|
||||
|
||||
# CHECK: V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
|
||||
|
@ -103,7 +103,7 @@ body: |
|
|||
|
||||
# CHECK-LABEL: name: extract_undef_neg_offset_vgpr{{$}}
|
||||
# CHECK: bb.1:
|
||||
# CHECK: successors: %bb.1(0x40000000 / 0x80000000 = 50.00%), %bb.2(0x40000000 / 0x80000000 = 50.00%)
|
||||
# CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
|
||||
# CHECK: liveins: %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
|
||||
|
||||
# CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
|
||||
|
@ -159,7 +159,7 @@ body: |
|
|||
|
||||
# CHECK-LABEL: name: insert_undef_offset_vgpr{{$}}
|
||||
# CHECK: bb.1:
|
||||
# CHECK: successors: %bb.1(0x40000000 / 0x80000000 = 50.00%), %bb.2(0x40000000 / 0x80000000 = 50.00%)
|
||||
# CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
|
||||
# CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
|
||||
|
||||
# CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
|
||||
|
@ -215,7 +215,7 @@ body: |
|
|||
|
||||
# CHECK-LABEL: name: insert_undef_neg_offset_vgpr{{$}}
|
||||
# CHECK: bb.1:
|
||||
# CHECK: successors: %bb.1(0x40000000 / 0x80000000 = 50.00%), %bb.2(0x40000000 / 0x80000000 = 50.00%)
|
||||
# CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
|
||||
# CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
|
||||
|
||||
# CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
|
||||
|
@ -272,7 +272,7 @@ body: |
|
|||
|
||||
# CHECK-LABEL: insert_undef_value_offset_vgpr{{$}}
|
||||
# CHECK: bb.1:
|
||||
# CHECK: successors: %bb.1(0x40000000 / 0x80000000 = 50.00%), %bb.2(0x40000000 / 0x80000000 = 50.00%)
|
||||
# CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
|
||||
# CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
|
||||
|
||||
# CHECK: %vcc_lo = V_READFIRSTLANE_B32 %vgpr4, implicit %exec
|
||||
|
|
|
@ -0,0 +1,145 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: {{^}}test_kill_depth_0_imm_pos:
|
||||
; CHECK-NEXT: ; BB#0:
|
||||
; CHECK-NEXT: s_endpgm
|
||||
define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 {
|
||||
call void @llvm.AMDGPU.kill(float 0.0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_kill_depth_0_imm_neg:
|
||||
; CHECK-NEXT: ; BB#0:
|
||||
; CHECK-NEXT: s_mov_b64 exec, 0
|
||||
; CHECK-NEXT: s_endpgm
|
||||
define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 {
|
||||
call void @llvm.AMDGPU.kill(float -0.0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_kill_depth_var:
|
||||
; CHECK-NEXT: ; BB#0:
|
||||
; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: s_endpgm
|
||||
define amdgpu_ps void @test_kill_depth_var(float %x) #0 {
|
||||
call void @llvm.AMDGPU.kill(float %x)
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: why does the skip depend on the asm length in the same block?
|
||||
|
||||
; CHECK-LABEL: {{^}}test_kill_control_flow:
|
||||
; CHECK: s_cmp_lg_i32 s{{[0-9]+}}, 0
|
||||
; CHECK: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; CHECK: ; BB#1:
|
||||
; CHECK: v_nop_e64
|
||||
; CHECK: v_nop_e64
|
||||
; CHECK: v_nop_e64
|
||||
; CHECK: v_nop_e64
|
||||
; CHECK: v_nop_e64
|
||||
; CHECK: v_nop_e64
|
||||
; CHECK: v_nop_e64
|
||||
; CHECK: v_nop_e64
|
||||
; CHECK: v_nop_e64
|
||||
; CHECK: v_nop_e64
|
||||
|
||||
; CHECK: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]]
|
||||
; CHECK-NEXT: ; BB#3:
|
||||
; CHECK-NEXT: exp 0, 9, 0, 1, 1, v0, v0, v0, v0
|
||||
; CHECK-NEXT: s_endpgm
|
||||
|
||||
; CHECK-NEXT: {{^}}[[SPLIT_BB]]:
|
||||
; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v7
|
||||
; CHECK-NEXT: {{^}}BB{{[0-9]+_[0-9]+}}:
|
||||
; CHECK-NEXT: s_endpgm
|
||||
define amdgpu_ps void @test_kill_control_flow(i32 inreg %arg) #0 {
|
||||
entry:
|
||||
%cmp = icmp eq i32 %arg, 0
|
||||
br i1 %cmp, label %bb, label %exit
|
||||
|
||||
bb:
|
||||
%var = call float asm sideeffect "
|
||||
v_mov_b32_e64 v7, -1
|
||||
v_nop_e64
|
||||
v_nop_e64
|
||||
v_nop_e64
|
||||
v_nop_e64
|
||||
v_nop_e64
|
||||
v_nop_e64
|
||||
v_nop_e64
|
||||
v_nop_e64
|
||||
v_nop_e64
|
||||
v_nop_e64", "={VGPR7}"()
|
||||
call void @llvm.AMDGPU.kill(float %var)
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_kill_control_flow_remainder:
|
||||
; CHECK: s_cmp_lg_i32 s{{[0-9]+}}, 0
|
||||
; CHECK-NEXT: s_cbranch_scc1 [[RETURN_BB:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; CHECK-NEXT: ; BB#1: ; %bb
|
||||
; CHECK: v_mov_b32_e64 v7, -1
|
||||
; CHECK: v_nop_e64
|
||||
; CHECK: v_nop_e64
|
||||
; CHECK: v_nop_e64
|
||||
; CHECK: v_nop_e64
|
||||
; CHECK: v_nop_e64
|
||||
; CHECK: v_nop_e64
|
||||
; CHECK: v_nop_e64
|
||||
; CHECK: v_nop_e64
|
||||
; CHECK: ;;#ASMEND
|
||||
; CHECK: v_mov_b32_e64 v8, -1
|
||||
; CHECK: ;;#ASMEND
|
||||
; CHECK-NEXT: s_cbranch_execnz [[SPLIT_BB:BB[0-9]+_[0-9]+]]
|
||||
|
||||
; CHECK-NEXT: ; BB#3:
|
||||
; CHECK-NEXT: exp 0, 9, 0, 1, 1, v0, v0, v0, v0
|
||||
; CHECK-NEXT: s_endpgm
|
||||
|
||||
; CHECK-NEXT: {{^}}[[SPLIT_BB]]:
|
||||
; CHECK-NEXT: v_cmpx_le_f32_e32 vcc, 0, v7
|
||||
; CHECK: buffer_store_dword v8
|
||||
; CHECK: v_mov_b32_e64 v9, -2
|
||||
|
||||
; CHECK: {{^}}BB{{[0-9]+_[0-9]+}}:
|
||||
; CHECK: buffer_store_dword v9
|
||||
; CHECK-NEXT: s_endpgm
|
||||
define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) #0 {
|
||||
entry:
|
||||
%cmp = icmp eq i32 %arg, 0
|
||||
br i1 %cmp, label %bb, label %exit
|
||||
|
||||
bb:
|
||||
%var = call float asm sideeffect "
|
||||
v_mov_b32_e64 v7, -1
|
||||
v_nop_e64
|
||||
v_nop_e64
|
||||
v_nop_e64
|
||||
v_nop_e64
|
||||
v_nop_e64
|
||||
v_nop_e64
|
||||
v_nop_e64
|
||||
v_nop_e64
|
||||
v_nop_e64
|
||||
v_nop_e64
|
||||
v_nop_e64", "={VGPR7}"()
|
||||
%live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={VGPR8}"()
|
||||
call void @llvm.AMDGPU.kill(float %var)
|
||||
store volatile float %live.across, float addrspace(1)* undef
|
||||
%live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={VGPR9}"()
|
||||
br label %exit
|
||||
|
||||
exit:
|
||||
%phi = phi float [ 0.0, %entry ], [ %live.out, %bb ]
|
||||
store float %phi, float addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.AMDGPU.kill(float) #0
|
||||
|
||||
attributes #0 = { nounwind }
|
Loading…
Reference in New Issue