AMDGPU/GlobalISel: Implement select for G_ICMP and G_SELECT

Reviewers: arsenm

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, hiraditya, Petar.Avramovic, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D60640

llvm-svn: 363576
This commit is contained in:
Tom Stellard 2019-06-17 16:27:43 +00:00
parent a8dcd47688
commit 8b1c53b528
5 changed files with 559 additions and 5 deletions

View File

@ -59,11 +59,52 @@ AMDGPUInstructionSelector::AMDGPUInstructionSelector(
const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
static bool isSCC(unsigned Reg, const MachineRegisterInfo &MRI) {
if (Reg == AMDGPU::SCC)
return true;
if (TargetRegisterInfo::isPhysicalRegister(Reg))
return false;
auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
const TargetRegisterClass *RC =
RegClassOrBank.dyn_cast<const TargetRegisterClass*>();
if (RC)
return RC->getID() == AMDGPU::SReg_32_XM0RegClassID &&
MRI.getType(Reg).getSizeInBits() == 1;
const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
return RB->getID() == AMDGPU::SCCRegBankID;
}
bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
I.setDesc(TII.get(TargetOpcode::COPY));
// Special case for COPY from the scc register bank. The scc register bank
// is modeled using 32-bit sgprs.
const MachineOperand &Src = I.getOperand(1);
unsigned SrcReg = Src.getReg();
if (!TargetRegisterInfo::isPhysicalRegister(SrcReg) && isSCC(SrcReg, MRI)) {
unsigned DstReg = TRI.getRegSizeInBits(I.getOperand(0).getReg(), MRI);
unsigned DstSize = TRI.getRegSizeInBits(DstReg, MRI);
// We have a copy from a 32-bit to 64-bit register. This happens
// when we are selecting scc->vcc copies.
if (DstSize == 64) {
const DebugLoc &DL = I.getDebugLoc();
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), I.getOperand(0).getReg())
.addImm(0)
.addReg(SrcReg);
if (!MRI.getRegClassOrNull(SrcReg))
MRI.setRegClass(SrcReg, TRI.getConstrainedRegClassForOperand(Src, MRI));
I.eraseFromParent();
return true;
}
}
for (const MachineOperand &MO : I.operands()) {
if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
continue;
@ -262,6 +303,101 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I,
return false;
}
static unsigned getV_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
assert(Size == 32 || Size == 64);
switch (P) {
default:
llvm_unreachable("Unknown condition code!");
case CmpInst::ICMP_NE:
return Size == 32 ? AMDGPU::V_CMP_NE_U32_e64 : AMDGPU::V_CMP_NE_U64_e64;
case CmpInst::ICMP_EQ:
return Size == 32 ? AMDGPU::V_CMP_EQ_U32_e64 : AMDGPU::V_CMP_EQ_U64_e64;
case CmpInst::ICMP_SGT:
return Size == 32 ? AMDGPU::V_CMP_GT_I32_e64 : AMDGPU::V_CMP_GT_I64_e64;
case CmpInst::ICMP_SGE:
return Size == 32 ? AMDGPU::V_CMP_GE_I32_e64 : AMDGPU::V_CMP_GE_I64_e64;
case CmpInst::ICMP_SLT:
return Size == 32 ? AMDGPU::V_CMP_LT_I32_e64 : AMDGPU::V_CMP_LT_I64_e64;
case CmpInst::ICMP_SLE:
return Size == 32 ? AMDGPU::V_CMP_LE_I32_e64 : AMDGPU::V_CMP_LE_I64_e64;
case CmpInst::ICMP_UGT:
return Size == 32 ? AMDGPU::V_CMP_GT_U32_e64 : AMDGPU::V_CMP_GT_U64_e64;
case CmpInst::ICMP_UGE:
return Size == 32 ? AMDGPU::V_CMP_GE_U32_e64 : AMDGPU::V_CMP_GE_U64_e64;
case CmpInst::ICMP_ULT:
return Size == 32 ? AMDGPU::V_CMP_LT_U32_e64 : AMDGPU::V_CMP_LT_U64_e64;
case CmpInst::ICMP_ULE:
return Size == 32 ? AMDGPU::V_CMP_LE_U32_e64 : AMDGPU::V_CMP_LE_U64_e64;
}
}
static unsigned getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) {
// FIXME: VI supports 64-bit comparse.
assert(Size == 32);
switch (P) {
default:
llvm_unreachable("Unknown condition code!");
case CmpInst::ICMP_NE:
return AMDGPU::S_CMP_LG_U32;
case CmpInst::ICMP_EQ:
return AMDGPU::S_CMP_EQ_U32;
case CmpInst::ICMP_SGT:
return AMDGPU::S_CMP_GT_I32;
case CmpInst::ICMP_SGE:
return AMDGPU::S_CMP_GE_I32;
case CmpInst::ICMP_SLT:
return AMDGPU::S_CMP_LT_I32;
case CmpInst::ICMP_SLE:
return AMDGPU::S_CMP_LE_I32;
case CmpInst::ICMP_UGT:
return AMDGPU::S_CMP_GT_U32;
case CmpInst::ICMP_UGE:
return AMDGPU::S_CMP_GE_U32;
case CmpInst::ICMP_ULT:
return AMDGPU::S_CMP_LT_U32;
case CmpInst::ICMP_ULE:
return AMDGPU::S_CMP_LE_U32;
}
}
bool AMDGPUInstructionSelector::selectG_ICMP(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
DebugLoc DL = I.getDebugLoc();
unsigned SrcReg = I.getOperand(2).getReg();
unsigned Size = RBI.getSizeInBits(SrcReg, MRI, TRI);
// FIXME: VI supports 64-bit compares.
assert(Size == 32);
unsigned CCReg = I.getOperand(0).getReg();
if (isSCC(CCReg, MRI)) {
unsigned Opcode = getS_CMPOpcode((CmpInst::Predicate)I.getOperand(1).getPredicate(), Size);
MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode))
.add(I.getOperand(2))
.add(I.getOperand(3));
MachineInstr *Copy = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), CCReg)
.addReg(AMDGPU::SCC);
bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI) |
constrainSelectedInstRegOperands(*Copy, TII, TRI, RBI);
I.eraseFromParent();
return Ret;
}
assert(Size == 32 || Size == 64);
unsigned Opcode = getV_CMPOpcode((CmpInst::Predicate)I.getOperand(1).getPredicate(), Size);
MachineInstr *ICmp = BuildMI(*BB, &I, DL, TII.get(Opcode),
I.getOperand(0).getReg())
.add(I.getOperand(2))
.add(I.getOperand(3));
RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
AMDGPU::SReg_64RegClass, MRI);
bool Ret = constrainSelectedInstRegOperands(*ICmp, TII, TRI, RBI);
I.eraseFromParent();
return Ret;
}
static MachineInstr *
buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
unsigned Reg0, unsigned Reg1, unsigned Reg2, unsigned Reg3,
@ -325,6 +461,53 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
return false;
}
bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
const DebugLoc &DL = I.getDebugLoc();
unsigned DstReg = I.getOperand(0).getReg();
unsigned Size = RBI.getSizeInBits(DstReg, MRI, TRI);
assert(Size == 32 || Size == 64);
const MachineOperand &CCOp = I.getOperand(1);
unsigned CCReg = CCOp.getReg();
if (isSCC(CCReg, MRI)) {
unsigned SelectOpcode = Size == 32 ? AMDGPU::S_CSELECT_B32 :
AMDGPU::S_CSELECT_B64;
MachineInstr *CopySCC = BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
.addReg(CCReg);
// The generic constrainSelectedInstRegOperands doesn't work for the scc register
// bank, because it does not cover the register class that we used to represent
// for it. So we need to manually set the register class here.
if (!MRI.getRegClassOrNull(CCReg))
MRI.setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, MRI));
MachineInstr *Select = BuildMI(*BB, &I, DL, TII.get(SelectOpcode), DstReg)
.add(I.getOperand(2))
.add(I.getOperand(3));
bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
I.eraseFromParent();
return Ret;
}
assert(Size == 32);
// FIXME: Support 64-bit select
MachineInstr *Select =
BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
.addImm(0)
.add(I.getOperand(3))
.addImm(0)
.add(I.getOperand(2))
.add(I.getOperand(1));
bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
I.eraseFromParent();
return Ret;
}
bool AMDGPUInstructionSelector::selectG_STORE(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineFunction *MF = BB->getParent();
@ -573,10 +756,14 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,
return selectG_INTRINSIC(I, CoverageInfo);
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
return selectG_INTRINSIC_W_SIDE_EFFECTS(I, CoverageInfo);
case TargetOpcode::G_ICMP:
return selectG_ICMP(I);
case TargetOpcode::G_LOAD:
if (selectImpl(I, CoverageInfo))
return true;
return selectG_LOAD(I);
case TargetOpcode::G_SELECT:
return selectG_SELECT(I);
case TargetOpcode::G_STORE:
return selectG_STORE(I);
}

View File

@ -72,11 +72,13 @@ private:
bool selectG_INTRINSIC(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I,
CodeGenCoverage &CoverageInfo) const;
bool selectG_ICMP(MachineInstr &I) const;
bool hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const;
void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI,
SmallVectorImpl<GEPInfo> &AddrInfo) const;
bool selectSMRD(MachineInstr &I, ArrayRef<GEPInfo> AddrInfo) const;
bool selectG_LOAD(MachineInstr &I) const;
bool selectG_SELECT(MachineInstr &I) const;
bool selectG_STORE(MachineInstr &I) const;
InstructionSelector::ComplexRendererFns

View File

@ -1688,6 +1688,10 @@ SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
Size = PowerOf2Ceil(Size);
switch (Size) {
case 1:
if (RB->getID() == AMDGPU::SCCRegBankID)
return &AMDGPU::SReg_32_XM0RegClass;
break;
case 32:
return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
&AMDGPU::SReg_32_XM0RegClass;
@ -1710,8 +1714,9 @@ SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
return RB->getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass :
&AMDGPU::SReg_512RegClass;
default:
llvm_unreachable("not implemented");
break;
}
llvm_unreachable("not implemented");
}
unsigned SIRegisterInfo::getVCC() const {

View File

@ -1,9 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
--- |
define amdgpu_kernel void @copy(i32 addrspace(1)* %global0) {ret void}
...
---
name: copy
@ -22,6 +19,60 @@ body: |
%0:sgpr(p1) = COPY $sgpr2_sgpr3
%1:vgpr(p1) = COPY %0
%2:vgpr(s32) = G_IMPLICIT_DEF
G_STORE %2, %1 :: (store 4 into %ir.global0)
G_STORE %2, %1 :: (store 4, addrspace 1)
...
---
name: copy_vcc_scc
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc
; GCN-LABEL: name: copy_vcc_scc
; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GCN: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
; GCN: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = COPY $vgpr3
%3:scc(s1) = COPY $scc
%4:vcc(s1) = COPY %3
%5:vgpr(s32) = G_SELECT %4, %1, %2
G_STORE %5, %0 :: (store 4, addrspace 1)
...
---
name: copy_vcc_scc_2_uses
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $scc
; GCN-LABEL: name: copy_vcc_scc_2_uses
; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GCN: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $scc
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_]], implicit $exec
; GCN: [[V_CMP_NE_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 0, [[COPY3]], implicit $exec
; GCN: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_CNDMASK_B32_e64_]], 0, [[COPY1]], [[V_CMP_NE_U32_e64_1]], implicit $exec
; GCN: FLAT_STORE_DWORD [[COPY]], [[V_CNDMASK_B32_e64_1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = COPY $vgpr3
%3:scc(s1) = COPY $scc
%4:vcc(s1) = COPY %3
%5:vgpr(s32) = G_SELECT %4, %1, %2
%6:vcc(s1) = COPY %3
%7:vgpr(s32) = G_SELECT %6, %1, %5
G_STORE %7, %0 :: (store 4, addrspace 1)
...
---

View File

@ -0,0 +1,309 @@
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
---
name: icmp_s_mix
legalized: true
regBankSelected: true
# GCN: name: icmp_s_mix
# GCN: [[SGPR0:%[0-9]+]]:sreg_32 = COPY $sgpr0
# GCN: [[SGPR1:%[0-9]+]]:sreg_32 = COPY $sgpr1
# GCN: [[SGPR2:%[0-9]+]]:sreg_32 = COPY $sgpr2
# GCN: [[SGPR3:%[0-9]+]]:sreg_32 = COPY $sgpr3
# GCN: [[SGPR4:%[0-9]+]]:sreg_32 = COPY $sgpr4
# GCN: [[SGPR5:%[0-9]+]]:sreg_32 = COPY $sgpr5
# GCN: [[SGPR6:%[0-9]+]]:sreg_32 = COPY $sgpr6
# GCN: [[SGPR7:%[0-9]+]]:sreg_32 = COPY $sgpr7
# GCN: S_CMP_LG_U32 [[SGPR0]], [[SGPR1]], implicit-def $scc
# GCN-NEXT: [[COND0:%[0-9]+]]:sreg_32_xm0 = COPY $scc
# GCN: S_CMP_LG_U32 [[SGPR4]], [[SGPR5]], implicit-def $scc
# GCN-NEXT: [[COND1:%[0-9]+]]:sreg_32_xm0 = COPY $scc
# GCN: $scc = COPY [[COND0]]
# GCN-NEXT: S_CSELECT_B32 [[SGPR6]], [[SGPR7]], implicit $scc
# GCN: $scc = COPY [[COND1]]
# GCN-NEXT: S_CSELECT_B32 [[SGPR2]], [[SGPR3]], implicit $scc
body: |
bb.0:
liveins: $vgpr0_vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:sgpr(s32) = COPY $sgpr0
%2:sgpr(s32) = COPY $sgpr1
%3:sgpr(s32) = COPY $sgpr2
%4:sgpr(s32) = COPY $sgpr3
%5:sgpr(s32) = COPY $sgpr4
%6:sgpr(s32) = COPY $sgpr5
%7:sgpr(s32) = COPY $sgpr6
%8:sgpr(s32) = COPY $sgpr7
%9:scc(s1) = G_ICMP intpred(ne), %1, %2
%10:scc(s1) = G_ICMP intpred(ne), %5, %6
%11:sgpr(s32) = G_SELECT %9, %7, %8
%12:sgpr(s32) = G_SELECT %10, %3, %4
%13:vgpr(s32) = COPY %11
G_STORE %13, %0 :: (volatile store 4, addrspace 1)
%14:vgpr(s32) = COPY %12
G_STORE %14, %0 :: (volatile store 4, addrspace 1)
...
---
name: icmp_salu
legalized: true
regBankSelected: true
# GCN-LABEL: name: icmp_salu
# GCN: S_CMP_LG_U32
# GCN: S_CMP_EQ_U32
# GCN: S_CMP_GT_I32
# GCN: S_CMP_GE_I32
# GCN: S_CMP_LT_I32
# GCN: S_CMP_LE_I32
# GCN: S_CMP_GT_U32
# GCN: S_CMP_GE_U32
# GCN: S_CMP_LT_U32
# GCN: S_CMP_LE_U32
body: |
bb.0:
liveins: $vgpr0_vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:sgpr(s32) = COPY $sgpr0
%2:sgpr(s32) = COPY $sgpr1
%3:sgpr(s32) = COPY $sgpr2
%4:sgpr(s32) = COPY $sgpr3
%5:scc(s1) = G_ICMP intpred(ne), %1, %2
%6:scc(s1) = G_ICMP intpred(eq), %1, %2
%7:scc(s1) = G_ICMP intpred(sgt), %1, %2
%8:scc(s1) = G_ICMP intpred(sge), %1, %2
%9:scc(s1) = G_ICMP intpred(slt), %1, %2
%10:scc(s1) = G_ICMP intpred(sle), %1, %2
%11:scc(s1) = G_ICMP intpred(ugt), %1, %2
%12:scc(s1) = G_ICMP intpred(uge), %1, %2
%13:scc(s1) = G_ICMP intpred(ult), %1, %2
%14:scc(s1) = G_ICMP intpred(ule), %1, %2
%15:sgpr(s32) = G_SELECT %5, %3, %4
%16:sgpr(s32) = G_SELECT %6, %3, %4
%17:sgpr(s32) = G_SELECT %7, %3, %4
%18:sgpr(s32) = G_SELECT %8, %3, %4
%19:sgpr(s32) = G_SELECT %9, %3, %4
%20:sgpr(s32) = G_SELECT %10, %3, %4
%21:sgpr(s32) = G_SELECT %11, %3, %4
%22:sgpr(s32) = G_SELECT %12, %3, %4
%23:sgpr(s32) = G_SELECT %13, %3, %4
%24:sgpr(s32) = G_SELECT %14, %3, %4
%25:vgpr(s32) = COPY %15
G_STORE %25, %0 :: (volatile store 4, addrspace 1)
%26:vgpr(s32) = COPY %16
G_STORE %26, %0 :: (volatile store 4, addrspace 1)
%27:vgpr(s32) = COPY %17
G_STORE %27, %0 :: (volatile store 4, addrspace 1)
%28:vgpr(s32) = COPY %18
G_STORE %28, %0 :: (volatile store 4, addrspace 1)
%29:vgpr(s32) = COPY %19
G_STORE %29, %0 :: (volatile store 4, addrspace 1)
%30:vgpr(s32) = COPY %20
G_STORE %30, %0 :: (volatile store 4, addrspace 1)
%31:vgpr(s32) = COPY %21
G_STORE %31, %0 :: (volatile store 4, addrspace 1)
%32:vgpr(s32) = COPY %22
G_STORE %32, %0 :: (volatile store 4, addrspace 1)
%33:vgpr(s32) = COPY %23
G_STORE %33, %0 :: (volatile store 4, addrspace 1)
%34:vgpr(s32) = COPY %24
G_STORE %34, %0 :: (volatile store 4, addrspace 1)
...
---
name: icmp_v_mix
legalized: true
regBankSelected: true
# GCN-LABEL: name: icmp_v_mix
# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
# GCN: [[VGPR3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
# GCN: [[VGPR4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
# GCN: [[VGPR5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
# GCN: [[VGPR6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
# GCN: [[VGPR7:%[0-9]+]]:vgpr_32 = COPY $vgpr7
# GCN: [[VGPR8:%[0-9]+]]:vgpr_32 = COPY $vgpr8
# GCN: [[VGPR9:%[0-9]+]]:vgpr_32 = COPY $vgpr9
# GCN: [[COND0:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 [[VGPR2]], [[VGPR3]]
# GCN: [[COND1:%[0-9]+]]:sreg_64_xexec = V_CMP_NE_U32_e64 [[VGPR6]], [[VGPR7]]
# GCN: V_CNDMASK_B32_e64 0, [[VGPR9]], 0, [[VGPR8]], [[COND0]]
# GCN: V_CNDMASK_B32_e64 0, [[VGPR5]], 0, [[VGPR4]], [[COND1]]
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = COPY $vgpr3
%3:vgpr(s32) = COPY $vgpr4
%4:vgpr(s32) = COPY $vgpr5
%5:vgpr(s32) = COPY $vgpr6
%6:vgpr(s32) = COPY $vgpr7
%7:vgpr(s32) = COPY $vgpr8
%8:vgpr(s32) = COPY $vgpr9
%9:sgpr(s1) = G_ICMP intpred(ne), %1, %2
%10:sgpr(s1) = G_ICMP intpred(ne), %5, %6
%11:vgpr(s32) = G_SELECT %9, %7, %8
%12:vgpr(s32) = G_SELECT %10, %3, %4
G_STORE %11, %0 :: (volatile store 4, addrspace 1)
G_STORE %12, %0 :: (volatile store 4, addrspace 1)
...
---
name: icmp_valu
legalized: true
regBankSelected: true
# GCN-LABEL: name: icmp_valu
# GCN: V_CMP_NE_U32_e64
# GCN: V_CMP_EQ_U32_e64
# GCN: V_CMP_GT_I32_e64
# GCN: V_CMP_GE_I32_e64
# GCN: V_CMP_LT_I32_e64
# GCN: V_CMP_LE_I32_e64
# GCN: V_CMP_GT_U32_e64
# GCN: V_CMP_GE_U32_e64
# GCN: V_CMP_LT_U32_e64
# GCN: V_CMP_LE_U32_e64
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = COPY $vgpr3
%3:vgpr(s32) = COPY $vgpr4
%4:vgpr(s32) = COPY $vgpr5
%5:sgpr(s1) = G_ICMP intpred(ne), %1, %2
%6:sgpr(s1) = G_ICMP intpred(eq), %1, %2
%7:sgpr(s1) = G_ICMP intpred(sgt), %1, %2
%8:sgpr(s1) = G_ICMP intpred(sge), %1, %2
%9:sgpr(s1) = G_ICMP intpred(slt), %1, %2
%10:sgpr(s1) = G_ICMP intpred(sle), %1, %2
%11:sgpr(s1) = G_ICMP intpred(ugt), %1, %2
%12:sgpr(s1) = G_ICMP intpred(uge), %1, %2
%13:sgpr(s1) = G_ICMP intpred(ult), %1, %2
%14:sgpr(s1) = G_ICMP intpred(ule), %1, %2
%15:vgpr(s32) = G_SELECT %5, %3, %4
%16:vgpr(s32) = G_SELECT %6, %3, %4
%17:vgpr(s32) = G_SELECT %7, %3, %4
%18:vgpr(s32) = G_SELECT %8, %3, %4
%19:vgpr(s32) = G_SELECT %9, %3, %4
%20:vgpr(s32) = G_SELECT %10, %3, %4
%21:vgpr(s32) = G_SELECT %11, %3, %4
%22:vgpr(s32) = G_SELECT %12, %3, %4
%23:vgpr(s32) = G_SELECT %13, %3, %4
%24:vgpr(s32) = G_SELECT %14, %3, %4
G_STORE %15, %0 :: (volatile store 4, addrspace 1)
G_STORE %16, %0 :: (volatile store 4, addrspace 1)
G_STORE %17, %0 :: (volatile store 4, addrspace 1)
G_STORE %18, %0 :: (volatile store 4, addrspace 1)
G_STORE %19, %0 :: (volatile store 4, addrspace 1)
G_STORE %20, %0 :: (volatile store 4, addrspace 1)
G_STORE %21, %0 :: (volatile store 4, addrspace 1)
G_STORE %22, %0 :: (volatile store 4, addrspace 1)
G_STORE %23, %0 :: (volatile store 4, addrspace 1)
G_STORE %24, %0 :: (volatile store 4, addrspace 1)
...
---
name: icmp_vv
legalized: true
regBankSelected: true
# GCN-LABEL: name: icmp_vv
# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
# GCN: [[VGPR3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
# GCN: V_CMP_NE_U32_e64 [[VGPR2]], [[VGPR3]]
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = COPY $vgpr3
%3:vgpr(s32) = COPY $vgpr4
%4:vgpr(s32) = COPY $vgpr5
%5:sgpr(s1) = G_ICMP intpred(ne), %1, %2
%6:vgpr(s32) = G_SELECT %5, %3, %4
G_STORE %6, %0 :: (store 4, addrspace 1)
...
---
name: icmp_vs
legalized: true
regBankSelected: true
# GCN-LABEL: name: icmp_vs
# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
# GCN: [[SGPR0:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
# GCN: V_CMP_NE_U32_e64 [[VGPR2]], [[SGPR0]]
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr0
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = COPY $vgpr3
%3:vgpr(s32) = COPY $vgpr4
%4:sgpr(s32) = COPY $sgpr0
%5:sgpr(s1) = G_ICMP intpred(ne), %1, %4
%6:vgpr(s32) = G_SELECT %5, %2, %3
G_STORE %6, %0 :: (store 4, addrspace 1)
...
---
name: icmp_sv
legalized: true
regBankSelected: true
# GCN-LABEL: name: icmp_sv
# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
# GCN: [[SGPR0:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
# GCN: V_CMP_NE_U32_e64 [[SGPR0]], [[VGPR2]]
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr0
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = COPY $vgpr3
%3:vgpr(s32) = COPY $vgpr4
%4:sgpr(s32) = COPY $sgpr0
%5:sgpr(s1) = G_ICMP intpred(ne), %4, %1
%6:vgpr(s32) = G_SELECT %5, %2, %3
G_STORE %6, %0 :: (store 4, addrspace 1)
...
---
name: icmp_or_vcc
legalized: true
regBankSelected: true
# GCN-LABEL: name: icmp_or_vcc
# GCN: [[VGPR2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
# GCN: [[SGPR0:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
# GCN: V_CMP_NE_U32_e64 [[SGPR0]], [[VGPR2]]
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr0
%0:vgpr(p1) = COPY $vgpr0_vgpr1
%1:vgpr(s32) = COPY $vgpr2
%2:vgpr(s32) = COPY $vgpr3
%3:vgpr(s32) = COPY $vgpr4
%4:sgpr(s32) = COPY $sgpr0
%5:sgpr(s1) = G_ICMP intpred(ne), %4, %1
%6:vgpr(s32) = G_SELECT %5, %2, %3
G_STORE %6, %0 :: (store 4, addrspace 1)
...
---