forked from OSchip/llvm-project
[AMDGPU][GlobalISel] Insert an and with exec before s_cbranch_vccnz if necessary
While v_cmp will AND inactive lanes with 0, that is not the case for logical operations. This fixes a Vulkan CTS test that would hang otherwise. Differential Revision: https://reviews.llvm.org/D105709
This commit is contained in:
parent
d81a843846
commit
971f4173f8
|
@ -2465,6 +2465,27 @@ bool AMDGPUInstructionSelector::selectG_AMDGPU_ATOMIC_CMPXCHG(
|
|||
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
|
||||
}
|
||||
|
||||
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI) {
|
||||
if (Reg.isPhysical())
|
||||
return false;
|
||||
|
||||
MachineInstr &MI = *MRI.getUniqueVRegDef(Reg);
|
||||
const unsigned Opcode = MI.getOpcode();
|
||||
|
||||
if (Opcode == AMDGPU::COPY)
|
||||
return isVCmpResult(MI.getOperand(1).getReg(), MRI);
|
||||
|
||||
if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
|
||||
Opcode == AMDGPU::G_XOR)
|
||||
return isVCmpResult(MI.getOperand(1).getReg(), MRI) &&
|
||||
isVCmpResult(MI.getOperand(2).getReg(), MRI);
|
||||
|
||||
if (Opcode == TargetOpcode::G_INTRINSIC)
|
||||
return MI.getIntrinsicID() == Intrinsic::amdgcn_class;
|
||||
|
||||
return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
|
||||
}
|
||||
|
||||
bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
|
||||
MachineBasicBlock *BB = I.getParent();
|
||||
MachineOperand &CondOp = I.getOperand(0);
|
||||
|
@ -2488,11 +2509,22 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
|
|||
BrOpcode = AMDGPU::S_CBRANCH_SCC1;
|
||||
ConstrainRC = &AMDGPU::SReg_32RegClass;
|
||||
} else {
|
||||
// FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
|
||||
// We sort of know that a VCC producer based on the register bank, that ands
|
||||
// inactive lanes with 0. What if there was a logical operation with vcc
|
||||
// producers in different blocks/with different exec masks?
|
||||
// FIXME: Should scc->vcc copies and with exec?
|
||||
|
||||
// Unless the value of CondReg is a result of a V_CMP* instruction then we
|
||||
// need to insert an and with exec.
|
||||
if (!isVCmpResult(CondReg, *MRI)) {
|
||||
const bool Is64 = STI.isWave64();
|
||||
const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
|
||||
const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
|
||||
|
||||
Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
|
||||
BuildMI(*BB, &I, DL, TII.get(Opcode), TmpReg)
|
||||
.addReg(CondReg)
|
||||
.addReg(Exec);
|
||||
CondReg = TmpReg;
|
||||
}
|
||||
|
||||
CondPhysReg = TRI.getVCC();
|
||||
BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
|
||||
ConstrainRC = TRI.getBoolRC();
|
||||
|
|
|
@ -174,3 +174,138 @@ body: |
|
|||
bb.1:
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: brcond_class_intrinsic
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
||||
body: |
|
||||
; GCN-LABEL: name: brcond_class_intrinsic
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GCN: [[V_CMP_CLASS_F32_e64_:%[0-9]+]]:sreg_64 = V_CMP_CLASS_F32_e64 0, [[COPY]], [[COPY1]], implicit $exec
|
||||
; GCN: $vcc = COPY [[V_CMP_CLASS_F32_e64_]]
|
||||
; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
; GCN: bb.1:
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%2:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), %0:vgpr(s32), %1:vgpr(s32)
|
||||
G_BRCOND %2(s1), %bb.1
|
||||
|
||||
bb.1:
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: brcond_cmp_logic
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
||||
body: |
|
||||
; GCN-LABEL: name: brcond_cmp_logic
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
|
||||
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
|
||||
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
|
||||
; GCN: %5:sreg_64 = nofpexcept V_CMP_EQ_F32_e64 0, [[COPY2]], 0, [[COPY3]], 0, implicit $mode, implicit $exec
|
||||
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], %5, implicit-def dead $scc
|
||||
; GCN: $vcc = COPY [[S_AND_B64_]]
|
||||
; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
; GCN: bb.1:
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
|
||||
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%2:vgpr(s32) = COPY $vgpr2
|
||||
%3:vgpr(s32) = COPY $vgpr3
|
||||
%4:vcc(s1) = G_ICMP intpred(eq), %0, %1
|
||||
%5:vcc(s1) = G_FCMP floatpred(oeq), %2, %3
|
||||
%6:vcc(s1) = G_AND %4, %5
|
||||
G_BRCOND %6(s1), %bb.1
|
||||
|
||||
bb.1:
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: brcond_logic
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
||||
body: |
|
||||
; GCN-LABEL: name: brcond_logic
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, [[COPY2]], implicit-def $scc
|
||||
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 0, [[S_AND_B32_]], implicit $exec
|
||||
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
|
||||
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
|
||||
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[S_AND_B64_]], $exec, implicit-def $scc
|
||||
; GCN: $vcc = COPY [[S_AND_B64_1]]
|
||||
; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
; GCN: bb.1:
|
||||
bb.0:
|
||||
liveins: $sgpr0, $vgpr0, $vgpr1
|
||||
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%2:sgpr(s32) = COPY $sgpr0
|
||||
%3:sgpr(s1) = G_TRUNC %2(s32)
|
||||
%4:vcc(s1) = COPY %3(s1)
|
||||
%5:vcc(s1) = G_ICMP intpred(eq), %0, %1
|
||||
%6:vcc(s1) = G_AND %5, %4
|
||||
G_BRCOND %6(s1), %bb.1
|
||||
|
||||
bb.1:
|
||||
|
||||
...
|
||||
|
||||
---
|
||||
|
||||
name: brcond_logic_const
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
||||
body: |
|
||||
; GCN-LABEL: name: brcond_logic_const
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
|
||||
; GCN: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1
|
||||
; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[V_CMP_EQ_U32_e64_]], [[S_MOV_B64_]], implicit-def dead $scc
|
||||
; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[S_XOR_B64_]], $exec, implicit-def $scc
|
||||
; GCN: $vcc = COPY [[S_AND_B64_]]
|
||||
; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
; GCN: bb.1:
|
||||
bb.0:
|
||||
liveins: $vgpr0, $vgpr1
|
||||
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = COPY $vgpr1
|
||||
%2:vcc(s1) = G_ICMP intpred(eq), %0, %1
|
||||
%3:sgpr(s1) = G_CONSTANT i1 true
|
||||
%4:vcc(s1) = COPY %3(s1)
|
||||
%5:vcc(s1) = G_XOR %2, %4
|
||||
G_BRCOND %5(s1), %bb.1
|
||||
|
||||
bb.1:
|
||||
|
||||
...
|
||||
|
|
Loading…
Reference in New Issue