forked from OSchip/llvm-project
AMDGPU/GlobalISel: Fix branch targets when emitting SI_IF
The branch target needs to be changed depending on whether there is an unconditional branch or not. Loops also need to be similarly fixed, but compiling a simple testcase end to end requires another set of patches that aren't upstream yet.
This commit is contained in:
parent
7d9b0a61c3
commit
ca19d7a399
|
@ -1805,14 +1805,26 @@ bool AMDGPULegalizerInfo::legalizeAtomicCmpXChg(
|
|||
|
||||
// Return the use branch instruction, otherwise null if the usage is invalid.
|
||||
static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
|
||||
MachineRegisterInfo &MRI) {
|
||||
MachineRegisterInfo &MRI,
|
||||
MachineInstr *&Br) {
|
||||
Register CondDef = MI.getOperand(0).getReg();
|
||||
if (!MRI.hasOneNonDBGUse(CondDef))
|
||||
return nullptr;
|
||||
|
||||
MachineInstr &UseMI = *MRI.use_instr_nodbg_begin(CondDef);
|
||||
return UseMI.getParent() == MI.getParent() &&
|
||||
UseMI.getOpcode() == AMDGPU::G_BRCOND ? &UseMI : nullptr;
|
||||
if (UseMI.getParent() != MI.getParent() ||
|
||||
UseMI.getOpcode() != AMDGPU::G_BRCOND)
|
||||
return nullptr;
|
||||
|
||||
// Make sure the cond br is followed by a G_BR
|
||||
MachineBasicBlock::iterator Next = std::next(UseMI.getIterator());
|
||||
if (Next != MI.getParent()->end()) {
|
||||
if (Next->getOpcode() != AMDGPU::G_BR)
|
||||
return nullptr;
|
||||
Br = &*Next;
|
||||
}
|
||||
|
||||
return &UseMI;
|
||||
}
|
||||
|
||||
Register AMDGPULegalizerInfo::getLiveInRegister(MachineRegisterInfo &MRI,
|
||||
|
@ -2341,7 +2353,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
|
|||
switch (IntrID) {
|
||||
case Intrinsic::amdgcn_if:
|
||||
case Intrinsic::amdgcn_else: {
|
||||
if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) {
|
||||
MachineInstr *Br = nullptr;
|
||||
if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br)) {
|
||||
const SIRegisterInfo *TRI
|
||||
= static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
|
||||
|
||||
|
@ -2349,19 +2362,26 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
|
|||
Register Def = MI.getOperand(1).getReg();
|
||||
Register Use = MI.getOperand(3).getReg();
|
||||
|
||||
MachineBasicBlock *BrTarget = BrCond->getOperand(1).getMBB();
|
||||
if (Br)
|
||||
BrTarget = Br->getOperand(0).getMBB();
|
||||
|
||||
if (IntrID == Intrinsic::amdgcn_if) {
|
||||
B.buildInstr(AMDGPU::SI_IF)
|
||||
.addDef(Def)
|
||||
.addUse(Use)
|
||||
.addMBB(BrCond->getOperand(1).getMBB());
|
||||
.addMBB(BrTarget);
|
||||
} else {
|
||||
B.buildInstr(AMDGPU::SI_ELSE)
|
||||
.addDef(Def)
|
||||
.addUse(Use)
|
||||
.addMBB(BrCond->getOperand(1).getMBB())
|
||||
.addMBB(BrTarget)
|
||||
.addImm(0);
|
||||
}
|
||||
|
||||
if (Br)
|
||||
Br->getOperand(0).setMBB(BrCond->getOperand(1).getMBB());
|
||||
|
||||
MRI.setRegClass(Def, TRI->getWaveMaskRegClass());
|
||||
MRI.setRegClass(Use, TRI->getWaveMaskRegClass());
|
||||
MI.eraseFromParent();
|
||||
|
@ -2372,11 +2392,14 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
|
|||
return false;
|
||||
}
|
||||
case Intrinsic::amdgcn_loop: {
|
||||
if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) {
|
||||
MachineInstr *Br = nullptr;
|
||||
if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI, Br)) {
|
||||
const SIRegisterInfo *TRI
|
||||
= static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
|
||||
|
||||
B.setInstr(*BrCond);
|
||||
|
||||
// FIXME: Need to adjust branch targets based on unconditional branch.
|
||||
Register Reg = MI.getOperand(2).getReg();
|
||||
B.buildInstr(AMDGPU::SI_LOOP)
|
||||
.addUse(Reg)
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
|
||||
|
||||
; Make sure the branch targets are correct after lowering llvm.amdgcn.if
|
||||
|
||||
define i32 @divergent_if_swap_brtarget_order0(i32 %value) {
|
||||
; CHECK-LABEL: divergent_if_swap_brtarget_order0:
|
||||
; CHECK: ; %bb.0: ; %entry
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: ; mask branch BB0_2
|
||||
; CHECK-NEXT: s_cbranch_execz BB0_2
|
||||
; CHECK-NEXT: BB0_1: ; %if.true
|
||||
; CHECK-NEXT: global_load_dword v0, v[0:1], off
|
||||
; CHECK-NEXT: BB0_2: ; %endif
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
entry:
|
||||
%c = icmp ne i32 %value, 0
|
||||
br i1 %c, label %if.true, label %endif
|
||||
|
||||
if.true:
|
||||
%val = load volatile i32, i32 addrspace(1)* undef
|
||||
br label %endif
|
||||
|
||||
endif:
|
||||
%v = phi i32 [ %val, %if.true ], [ undef, %entry ]
|
||||
ret i32 %v
|
||||
}
|
||||
|
||||
define i32 @divergent_if_swap_brtarget_order1(i32 %value) {
|
||||
; CHECK-LABEL: divergent_if_swap_brtarget_order1:
|
||||
; CHECK: ; %bb.0: ; %entry
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: ; implicit-def: $vgpr0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: ; mask branch BB1_2
|
||||
; CHECK-NEXT: BB1_1: ; %endif
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
; CHECK-NEXT: BB1_2: ; %if.true
|
||||
; CHECK-NEXT: global_load_dword v0, v[0:1], off
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
entry:
|
||||
%c = icmp ne i32 %value, 0
|
||||
br i1 %c, label %if.true, label %endif
|
||||
|
||||
endif:
|
||||
%v = phi i32 [ %val, %if.true ], [ undef, %entry ]
|
||||
ret i32 %v
|
||||
|
||||
if.true:
|
||||
%val = load volatile i32, i32 addrspace(1)* undef
|
||||
br label %endif
|
||||
}
|
Loading…
Reference in New Issue