[AArch64][GlobalISel] Optimize conjunctions of compares to conditional compares.

This is a partial port of the same optimization from AArch64ISelLowering,
although the original handles more cases when generating regular compares
instead of this one which just does it when selecting G_SELECTs.

For more detailed comments see the original comments for
emitConditionalComparison() in AArch64ISelLowering.

Gives minor code size improvements.

Differential Revision: https://reviews.llvm.org/D117166
This commit is contained in:
Amara Emerson 2022-02-20 01:13:34 -08:00
parent b09e63bad1
commit 2a46450849
3 changed files with 479 additions and 194 deletions

View File

@ -14,6 +14,7 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
#define LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
#include "llvm/IR/Instructions.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/TargetOpcodes.h"
@ -226,6 +227,37 @@ public:
}
};
/// Represent a G_ICMP or G_FCMP.
class GAnyCmp : public GenericMachineInstr {
public:
CmpInst::Predicate getCond() const {
return static_cast<CmpInst::Predicate>(getOperand(1).getPredicate());
}
Register getLHSReg() const { return getReg(2); }
Register getRHSReg() const { return getReg(3); }
static bool classof(const MachineInstr *MI) {
return MI->getOpcode() == TargetOpcode::G_ICMP ||
MI->getOpcode() == TargetOpcode::G_FCMP;
}
};
/// Represent a G_ICMP.
class GICmp : public GAnyCmp {
public:
static bool classof(const MachineInstr *MI) {
return MI->getOpcode() == TargetOpcode::G_ICMP;
}
};
/// Represent a G_FCMP.
class GFCmp : public GAnyCmp {
public:
static bool classof(const MachineInstr *MI) {
return MI->getOpcode() == TargetOpcode::G_FCMP;
}
};
} // namespace llvm
#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H

View File

@ -27,6 +27,7 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
@ -63,6 +64,7 @@ namespace {
#include "AArch64GenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATE_BITSET
class AArch64InstructionSelector : public InstructionSelector {
public:
AArch64InstructionSelector(const AArch64TargetMachine &TM,
@ -294,6 +296,20 @@ private:
emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
/// In some cases this is even possible with OR operations in the expression.
MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,
MachineIRBuilder &MIB) const;
MachineInstr *emitConditionalComparison(Register LHS, Register RHS,
CmpInst::Predicate CC,
AArch64CC::CondCode Predicate,
AArch64CC::CondCode OutCC,
MachineIRBuilder &MIB) const;
MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,
bool Negate, Register CCOp,
AArch64CC::CondCode Predicate,
MachineIRBuilder &MIB) const;
/// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
/// \p IsNegative is true if the test should be "not zero".
/// This will also optimize the test bit instruction when possible.
@ -425,7 +441,8 @@ private:
void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
// Optimization methods.
bool tryOptSelect(MachineInstr &MI);
bool tryOptSelect(GSelect &Sel);
bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
MachineOperand &Predicate,
MachineIRBuilder &MIRBuilder) const;
@ -1310,6 +1327,90 @@ static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
}
}
/// changeFPCCToAArch64CC - Convert an IR fp condition code to an AArch64 CC.
static void changeFPCCToAArch64CC(CmpInst::Predicate CC,
AArch64CC::CondCode &CondCode,
AArch64CC::CondCode &CondCode2) {
CondCode2 = AArch64CC::AL;
switch (CC) {
default:
llvm_unreachable("Unknown FP condition!");
case CmpInst::FCMP_OEQ:
CondCode = AArch64CC::EQ;
break;
case CmpInst::FCMP_OGT:
CondCode = AArch64CC::GT;
break;
case CmpInst::FCMP_OGE:
CondCode = AArch64CC::GE;
break;
case CmpInst::FCMP_OLT:
CondCode = AArch64CC::MI;
break;
case CmpInst::FCMP_OLE:
CondCode = AArch64CC::LS;
break;
case CmpInst::FCMP_ONE:
CondCode = AArch64CC::MI;
CondCode2 = AArch64CC::GT;
break;
case CmpInst::FCMP_ORD:
CondCode = AArch64CC::VC;
break;
case CmpInst::FCMP_UNO:
CondCode = AArch64CC::VS;
break;
case CmpInst::FCMP_UEQ:
CondCode = AArch64CC::EQ;
CondCode2 = AArch64CC::VS;
break;
case CmpInst::FCMP_UGT:
CondCode = AArch64CC::HI;
break;
case CmpInst::FCMP_UGE:
CondCode = AArch64CC::PL;
break;
case CmpInst::FCMP_ULT:
CondCode = AArch64CC::LT;
break;
case CmpInst::FCMP_ULE:
CondCode = AArch64CC::LE;
break;
case CmpInst::FCMP_UNE:
CondCode = AArch64CC::NE;
break;
}
}
/// Convert an IR fp condition code to an AArch64 CC.
/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
/// should be AND'ed instead of OR'ed.
static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC,
AArch64CC::CondCode &CondCode,
AArch64CC::CondCode &CondCode2) {
CondCode2 = AArch64CC::AL;
switch (CC) {
default:
changeFPCCToAArch64CC(CC, CondCode, CondCode2);
assert(CondCode2 == AArch64CC::AL);
break;
case CmpInst::FCMP_ONE:
// (a one b)
// == ((a olt b) || (a ogt b))
// == ((a ord b) && (a une b))
CondCode = AArch64CC::VC;
CondCode2 = AArch64CC::NE;
break;
case CmpInst::FCMP_UEQ:
// (a ueq b)
// == ((a uno b) || (a oeq b))
// == ((a ule b) && (a uge b))
CondCode = AArch64CC::PL;
CondCode2 = AArch64CC::LE;
break;
}
}
/// Return a register which can be used as a bit to test in a TB(N)Z.
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
MachineRegisterInfo &MRI) {
@ -3292,17 +3393,18 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return selectCopy(I, TII, MRI, TRI, RBI);
case TargetOpcode::G_SELECT: {
if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
auto &Sel = cast<GSelect>(I);
if (MRI.getType(Sel.getCondReg()) != LLT::scalar(1)) {
LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
<< ", expected: " << LLT::scalar(1) << '\n');
return false;
}
const Register CondReg = I.getOperand(1).getReg();
const Register TReg = I.getOperand(2).getReg();
const Register FReg = I.getOperand(3).getReg();
const Register CondReg = Sel.getCondReg();
const Register TReg = Sel.getTrueReg();
const Register FReg = Sel.getFalseReg();
if (tryOptSelect(I))
if (tryOptSelect(Sel))
return true;
// Make sure to use an unused vreg instead of wzr, so that the peephole
@ -3311,9 +3413,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
return false;
I.eraseFromParent();
Sel.eraseFromParent();
return true;
}
case TargetOpcode::G_ICMP: {
@ -4702,7 +4804,263 @@ AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
}
}
bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) {
/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
/// expressed as a conjunction.
/// \param CanNegate Set to true if we can negate the whole sub-tree just by
/// changing the conditions on the CMP tests.
/// (this means we can call emitConjunctionRec() with
/// Negate==true on this sub-tree)
/// \param MustBeFirst Set to true if this subtree needs to be negated and we
/// cannot do the negation naturally. We are required to
/// emit the subtree first in this case.
/// \param WillNegate Is true if are called when the result of this
/// subexpression must be negated. This happens when the
/// outer expression is an OR. We can use this fact to know
/// that we have a double negation (or (or ...) ...) that
/// can be implemented for free.
static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
bool WillNegate, MachineRegisterInfo &MRI,
unsigned Depth = 0) {
if (!MRI.hasOneNonDBGUse(Val))
return false;
MachineInstr *ValDef = MRI.getVRegDef(Val);
unsigned Opcode = ValDef->getOpcode();
if (Opcode == TargetOpcode::G_TRUNC) {
// Look through a trunc.
Val = ValDef->getOperand(1).getReg();
ValDef = MRI.getVRegDef(Val);
Opcode = ValDef->getOpcode();
}
if (isa<GAnyCmp>(ValDef)) {
CanNegate = true;
MustBeFirst = false;
return true;
}
// Protect against exponential runtime and stack overflow.
if (Depth > 6)
return false;
if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
bool IsOR = Opcode == TargetOpcode::G_OR;
Register O0 = ValDef->getOperand(1).getReg();
Register O1 = ValDef->getOperand(2).getReg();
bool CanNegateL;
bool MustBeFirstL;
if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
return false;
bool CanNegateR;
bool MustBeFirstR;
if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
return false;
if (MustBeFirstL && MustBeFirstR)
return false;
if (IsOR) {
// For an OR expression we need to be able to naturally negate at least
// one side or we cannot do the transformation at all.
if (!CanNegateL && !CanNegateR)
return false;
// If we the result of the OR will be negated and we can naturally negate
// the leafs, then this sub-tree as a whole negates naturally.
CanNegate = WillNegate && CanNegateL && CanNegateR;
// If we cannot naturally negate the whole sub-tree, then this must be
// emitted first.
MustBeFirst = !CanNegate;
} else {
assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
// We cannot naturally negate an AND operation.
CanNegate = false;
MustBeFirst = MustBeFirstL || MustBeFirstR;
}
return true;
}
return false;
}
MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
Register LHS, Register RHS, CmpInst::Predicate CC,
AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,
MachineIRBuilder &MIB) const {
// TODO: emit CMN as an optimization.
auto &MRI = *MIB.getMRI();
LLT OpTy = MRI.getType(LHS);
assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
unsigned CCmpOpc;
if (CmpInst::isIntPredicate(CC)) {
CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
} else {
switch (OpTy.getSizeInBits()) {
case 16:
CCmpOpc = AArch64::FCCMPHrr;
break;
case 32:
CCmpOpc = AArch64::FCCMPSrr;
break;
case 64:
CCmpOpc = AArch64::FCCMPDrr;
break;
default:
return nullptr;
}
}
AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
auto CCmp =
MIB.buildInstr(CCmpOpc, {}, {LHS, RHS}).addImm(NZCV).addImm(Predicate);
constrainSelectedInstRegOperands(*CCmp, TII, TRI, RBI);
return &*CCmp;
}
MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
// We're at a tree leaf, produce a conditional comparison operation.
auto &MRI = *MIB.getMRI();
MachineInstr *ValDef = MRI.getVRegDef(Val);
unsigned Opcode = ValDef->getOpcode();
if (Opcode == TargetOpcode::G_TRUNC) {
// Look through a trunc.
Val = ValDef->getOperand(1).getReg();
ValDef = MRI.getVRegDef(Val);
Opcode = ValDef->getOpcode();
}
if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
Register LHS = Cmp->getLHSReg();
Register RHS = Cmp->getRHSReg();
CmpInst::Predicate CC = Cmp->getCond();
if (Negate)
CC = CmpInst::getInversePredicate(CC);
// We only handle integer compares for now.
if (isa<GICmp>(Cmp)) {
OutCC = changeICMPPredToAArch64CC(CC);
} else {
// Handle special FP cases.
AArch64CC::CondCode ExtraCC;
changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
// Some floating point conditions can't be tested with a single condition
// code. Construct an additional comparison in this case.
if (ExtraCC != AArch64CC::AL) {
MachineInstr *ExtraCmp;
if (!CCOp)
ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
else
ExtraCmp =
emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
CCOp = ExtraCmp->getOperand(0).getReg();
Predicate = ExtraCC;
}
}
// Produce a normal comparison if we are first in the chain
if (!CCOp) {
auto Dst = MRI.cloneVirtualRegister(LHS);
if (isa<GICmp>(Cmp))
return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB);
return emitFPCompare(Cmp->getOperand(2).getReg(),
Cmp->getOperand(3).getReg(), MIB);
}
// Otherwise produce a ccmp.
return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
}
assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
bool IsOR = Opcode == TargetOpcode::G_OR;
Register LHS = ValDef->getOperand(1).getReg();
bool CanNegateL;
bool MustBeFirstL;
bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
assert(ValidL && "Valid conjunction/disjunction tree");
(void)ValidL;
Register RHS = ValDef->getOperand(2).getReg();
bool CanNegateR;
bool MustBeFirstR;
bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
assert(ValidR && "Valid conjunction/disjunction tree");
(void)ValidR;
// Swap sub-tree that must come first to the right side.
if (MustBeFirstL) {
assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
std::swap(LHS, RHS);
std::swap(CanNegateL, CanNegateR);
std::swap(MustBeFirstL, MustBeFirstR);
}
bool NegateR;
bool NegateAfterR;
bool NegateL;
bool NegateAfterAll;
if (Opcode == TargetOpcode::G_OR) {
// Swap the sub-tree that we can negate naturally to the left.
if (!CanNegateL) {
assert(CanNegateR && "at least one side must be negatable");
assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
assert(!Negate);
std::swap(LHS, RHS);
NegateR = false;
NegateAfterR = true;
} else {
// Negate the left sub-tree if possible, otherwise negate the result.
NegateR = CanNegateR;
NegateAfterR = !CanNegateR;
}
NegateL = true;
NegateAfterAll = !Negate;
} else {
assert(Opcode == TargetOpcode::G_AND &&
"Valid conjunction/disjunction tree");
assert(!Negate && "Valid conjunction/disjunction tree");
NegateL = false;
NegateR = false;
NegateAfterR = false;
NegateAfterAll = false;
}
// Emit sub-trees.
AArch64CC::CondCode RHSCC;
MachineInstr *CmpR =
emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
if (NegateAfterR)
RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
MachineInstr *CmpL = emitConjunctionRec(
LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
if (NegateAfterAll)
OutCC = AArch64CC::getInvertedCondCode(OutCC);
return CmpL;
}
MachineInstr *AArch64InstructionSelector::emitConjunction(
Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
bool DummyCanNegate;
bool DummyMustBeFirst;
if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
*MIB.getMRI()))
return nullptr;
return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
}
bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
MachineInstr &CondMI) {
MachineRegisterInfo &MRI = *MIB.getMRI();
AArch64CC::CondCode AArch64CC;
MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
if (!ConjMI)
return false;
auto CSel =
MIB.buildInstr(MRI.getType(SelI.getReg(0)).getSizeInBits() == 32
? AArch64::CSELWr
: AArch64::CSELXr,
{SelI.getReg(0)}, {SelI.getTrueReg(), SelI.getFalseReg()})
.addImm(AArch64CC);
constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
SelI.eraseFromParent();
return true;
}
bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
MachineRegisterInfo &MRI = *MIB.getMRI();
// We want to recognize this pattern:
//
@ -4755,8 +5113,11 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) {
return false;
unsigned CondOpc = CondDef->getOpcode();
if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
if (tryOptSelectConjunction(I, *CondDef))
return true;
return false;
}
AArch64CC::CondCode CondCode;
if (CondOpc == TargetOpcode::G_ICMP) {

View File

@ -569,14 +569,10 @@ define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
;
; GISEL-LABEL: select_and:
; GISEL: ; %bb.0:
; GISEL-NEXT: cmp w0, w1
; GISEL-NEXT: cset w8, lt
; GISEL-NEXT: mov w9, #5
; GISEL-NEXT: cmp w9, w1
; GISEL-NEXT: cset w9, ne
; GISEL-NEXT: and w8, w8, w9
; GISEL-NEXT: tst w8, #0x1
; GISEL-NEXT: csel x0, x2, x3, ne
; GISEL-NEXT: mov w8, #5
; GISEL-NEXT: cmp w8, w1
; GISEL-NEXT: ccmp w0, w1, #0, ne
; GISEL-NEXT: csel x0, x2, x3, lt
; GISEL-NEXT: ret
%1 = icmp slt i32 %w0, %w1
%2 = icmp ne i32 5, %w1
@ -595,14 +591,10 @@ define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
;
; GISEL-LABEL: select_or:
; GISEL: ; %bb.0:
; GISEL-NEXT: cmp w0, w1
; GISEL-NEXT: cset w8, lt
; GISEL-NEXT: mov w9, #5
; GISEL-NEXT: cmp w9, w1
; GISEL-NEXT: cset w9, ne
; GISEL-NEXT: orr w8, w8, w9
; GISEL-NEXT: tst w8, #0x1
; GISEL-NEXT: csel x0, x2, x3, ne
; GISEL-NEXT: mov w8, #5
; GISEL-NEXT: cmp w8, w1
; GISEL-NEXT: ccmp w0, w1, #8, eq
; GISEL-NEXT: csel x0, x2, x3, lt
; GISEL-NEXT: ret
%1 = icmp slt i32 %w0, %w1
%2 = icmp ne i32 5, %w1
@ -623,17 +615,13 @@ define i64 @gccbug(i64 %x0, i64 %x1) {
;
; GISEL-LABEL: gccbug:
; GISEL: ; %bb.0:
; GISEL-NEXT: cmp x1, #0
; GISEL-NEXT: cset w8, eq
; GISEL-NEXT: mov w9, #2
; GISEL-NEXT: mov w8, #2
; GISEL-NEXT: mov w9, #4
; GISEL-NEXT: mov w10, #1
; GISEL-NEXT: cmp x0, #2
; GISEL-NEXT: cset w10, eq
; GISEL-NEXT: cmp x0, #4
; GISEL-NEXT: cset w11, eq
; GISEL-NEXT: orr w10, w11, w10
; GISEL-NEXT: and w8, w10, w8
; GISEL-NEXT: tst w8, #0x1
; GISEL-NEXT: csinc x0, x9, xzr, ne
; GISEL-NEXT: ccmp x0, x9, #4, ne
; GISEL-NEXT: ccmp x1, xzr, #0, eq
; GISEL-NEXT: csel x0, x8, x10, eq
; GISEL-NEXT: ret
%cmp0 = icmp eq i64 %x1, 0
%cmp1 = icmp eq i64 %x0, 2
@ -658,19 +646,13 @@ define i32 @select_ororand(i32 %w0, i32 %w1, i32 %w2, i32 %w3) {
;
; GISEL-LABEL: select_ororand:
; GISEL: ; %bb.0:
; GISEL-NEXT: cmp w0, #0
; GISEL-NEXT: cset w8, eq
; GISEL-NEXT: cmp w1, #13
; GISEL-NEXT: cset w9, hi
; GISEL-NEXT: cmp w2, #2
; GISEL-NEXT: cset w10, lt
; GISEL-NEXT: mov w8, #13
; GISEL-NEXT: mov w9, #2
; GISEL-NEXT: cmp w3, #4
; GISEL-NEXT: cset w11, gt
; GISEL-NEXT: orr w8, w8, w9
; GISEL-NEXT: and w9, w10, w11
; GISEL-NEXT: orr w8, w8, w9
; GISEL-NEXT: tst w8, #0x1
; GISEL-NEXT: csel w0, w3, wzr, ne
; GISEL-NEXT: ccmp w2, w9, #0, gt
; GISEL-NEXT: ccmp w1, w8, #2, ge
; GISEL-NEXT: ccmp w0, wzr, #4, ls
; GISEL-NEXT: csel w0, w3, wzr, eq
; GISEL-NEXT: ret
%c0 = icmp eq i32 %w0, 0
%c1 = icmp ugt i32 %w1, 13
@ -694,16 +676,10 @@ define i32 @select_andor(i32 %v1, i32 %v2, i32 %v3) {
;
; GISEL-LABEL: select_andor:
; GISEL: ; %bb.0:
; GISEL-NEXT: cmp w0, w1
; GISEL-NEXT: cset w8, eq
; GISEL-NEXT: cmp w1, w2
; GISEL-NEXT: cset w9, ge
; GISEL-NEXT: cmp w0, #0
; GISEL-NEXT: cset w10, eq
; GISEL-NEXT: orr w9, w10, w9
; GISEL-NEXT: and w8, w9, w8
; GISEL-NEXT: tst w8, #0x1
; GISEL-NEXT: csel w0, w0, w1, ne
; GISEL-NEXT: ccmp w0, wzr, #4, lt
; GISEL-NEXT: ccmp w0, w1, #0, eq
; GISEL-NEXT: csel w0, w0, w1, eq
; GISEL-NEXT: ret
%c0 = icmp eq i32 %v1, %v2
%c1 = icmp sge i32 %v2, %v3
@ -872,14 +848,9 @@ define i32 @select_and_olt_one(double %v0, double %v1, double %v2, double %v3, i
; GISEL-LABEL: select_and_olt_one:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp d0, d1
; GISEL-NEXT: cset w8, mi
; GISEL-NEXT: fcmp d2, d3
; GISEL-NEXT: cset w9, mi
; GISEL-NEXT: cset w10, gt
; GISEL-NEXT: orr w9, w9, w10
; GISEL-NEXT: and w8, w9, w8
; GISEL-NEXT: tst w8, #0x1
; GISEL-NEXT: csel w0, w0, w1, ne
; GISEL-NEXT: fccmp d2, d3, #4, mi
; GISEL-NEXT: fccmp d2, d3, #1, ne
; GISEL-NEXT: csel w0, w0, w1, vc
; GISEL-NEXT: ret
%c0 = fcmp olt double %v0, %v1
%c1 = fcmp one double %v2, %v3
@ -900,14 +871,9 @@ define i32 @select_and_one_olt(double %v0, double %v1, double %v2, double %v3, i
; GISEL-LABEL: select_and_one_olt:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp d0, d1
; GISEL-NEXT: cset w8, mi
; GISEL-NEXT: cset w9, gt
; GISEL-NEXT: orr w8, w8, w9
; GISEL-NEXT: fcmp d2, d3
; GISEL-NEXT: cset w9, mi
; GISEL-NEXT: and w8, w9, w8
; GISEL-NEXT: tst w8, #0x1
; GISEL-NEXT: csel w0, w0, w1, ne
; GISEL-NEXT: fccmp d0, d1, #1, ne
; GISEL-NEXT: fccmp d2, d3, #0, vc
; GISEL-NEXT: csel w0, w0, w1, mi
; GISEL-NEXT: ret
%c0 = fcmp one double %v0, %v1
%c1 = fcmp olt double %v2, %v3
@ -928,14 +894,9 @@ define i32 @select_and_olt_ueq(double %v0, double %v1, double %v2, double %v3, i
; GISEL-LABEL: select_and_olt_ueq:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp d0, d1
; GISEL-NEXT: cset w8, mi
; GISEL-NEXT: fcmp d2, d3
; GISEL-NEXT: cset w9, eq
; GISEL-NEXT: cset w10, vs
; GISEL-NEXT: orr w9, w9, w10
; GISEL-NEXT: and w8, w9, w8
; GISEL-NEXT: tst w8, #0x1
; GISEL-NEXT: csel w0, w0, w1, ne
; GISEL-NEXT: fccmp d2, d3, #0, mi
; GISEL-NEXT: fccmp d2, d3, #8, le
; GISEL-NEXT: csel w0, w0, w1, pl
; GISEL-NEXT: ret
%c0 = fcmp olt double %v0, %v1
%c1 = fcmp ueq double %v2, %v3
@ -956,14 +917,9 @@ define i32 @select_and_ueq_olt(double %v0, double %v1, double %v2, double %v3, i
; GISEL-LABEL: select_and_ueq_olt:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp d0, d1
; GISEL-NEXT: cset w8, eq
; GISEL-NEXT: cset w9, vs
; GISEL-NEXT: orr w8, w8, w9
; GISEL-NEXT: fcmp d2, d3
; GISEL-NEXT: cset w9, mi
; GISEL-NEXT: and w8, w9, w8
; GISEL-NEXT: tst w8, #0x1
; GISEL-NEXT: csel w0, w0, w1, ne
; GISEL-NEXT: fccmp d0, d1, #8, le
; GISEL-NEXT: fccmp d2, d3, #0, pl
; GISEL-NEXT: csel w0, w0, w1, mi
; GISEL-NEXT: ret
%c0 = fcmp ueq double %v0, %v1
%c1 = fcmp olt double %v2, %v3
@ -984,14 +940,9 @@ define i32 @select_or_olt_one(double %v0, double %v1, double %v2, double %v3, i3
; GISEL-LABEL: select_or_olt_one:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp d0, d1
; GISEL-NEXT: cset w8, mi
; GISEL-NEXT: fcmp d2, d3
; GISEL-NEXT: cset w9, mi
; GISEL-NEXT: cset w10, gt
; GISEL-NEXT: orr w9, w9, w10
; GISEL-NEXT: orr w8, w9, w8
; GISEL-NEXT: tst w8, #0x1
; GISEL-NEXT: csel w0, w0, w1, ne
; GISEL-NEXT: fccmp d2, d3, #0, pl
; GISEL-NEXT: fccmp d2, d3, #8, le
; GISEL-NEXT: csel w0, w0, w1, mi
; GISEL-NEXT: ret
%c0 = fcmp olt double %v0, %v1
%c1 = fcmp one double %v2, %v3
@ -1012,14 +963,9 @@ define i32 @select_or_one_olt(double %v0, double %v1, double %v2, double %v3, i3
; GISEL-LABEL: select_or_one_olt:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp d0, d1
; GISEL-NEXT: cset w8, mi
; GISEL-NEXT: cset w9, gt
; GISEL-NEXT: orr w8, w8, w9
; GISEL-NEXT: fcmp d2, d3
; GISEL-NEXT: cset w9, mi
; GISEL-NEXT: orr w8, w9, w8
; GISEL-NEXT: tst w8, #0x1
; GISEL-NEXT: csel w0, w0, w1, ne
; GISEL-NEXT: fccmp d0, d1, #8, le
; GISEL-NEXT: fccmp d2, d3, #8, pl
; GISEL-NEXT: csel w0, w0, w1, mi
; GISEL-NEXT: ret
%c0 = fcmp one double %v0, %v1
%c1 = fcmp olt double %v2, %v3
@ -1040,14 +986,9 @@ define i32 @select_or_olt_ueq(double %v0, double %v1, double %v2, double %v3, i3
; GISEL-LABEL: select_or_olt_ueq:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp d0, d1
; GISEL-NEXT: cset w8, mi
; GISEL-NEXT: fcmp d2, d3
; GISEL-NEXT: cset w9, eq
; GISEL-NEXT: cset w10, vs
; GISEL-NEXT: orr w9, w9, w10
; GISEL-NEXT: orr w8, w9, w8
; GISEL-NEXT: tst w8, #0x1
; GISEL-NEXT: csel w0, w0, w1, ne
; GISEL-NEXT: fccmp d2, d3, #4, pl
; GISEL-NEXT: fccmp d2, d3, #1, ne
; GISEL-NEXT: csel w0, w0, w1, vs
; GISEL-NEXT: ret
%c0 = fcmp olt double %v0, %v1
%c1 = fcmp ueq double %v2, %v3
@ -1068,14 +1009,9 @@ define i32 @select_or_ueq_olt(double %v0, double %v1, double %v2, double %v3, i3
; GISEL-LABEL: select_or_ueq_olt:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp d0, d1
; GISEL-NEXT: cset w8, eq
; GISEL-NEXT: cset w9, vs
; GISEL-NEXT: orr w8, w8, w9
; GISEL-NEXT: fcmp d2, d3
; GISEL-NEXT: cset w9, mi
; GISEL-NEXT: orr w8, w9, w8
; GISEL-NEXT: tst w8, #0x1
; GISEL-NEXT: csel w0, w0, w1, ne
; GISEL-NEXT: fccmp d0, d1, #1, ne
; GISEL-NEXT: fccmp d2, d3, #8, vc
; GISEL-NEXT: csel w0, w0, w1, mi
; GISEL-NEXT: ret
%c0 = fcmp ueq double %v0, %v1
%c1 = fcmp olt double %v2, %v3
@ -1097,17 +1033,10 @@ define i32 @select_or_olt_ogt_ueq(double %v0, double %v1, double %v2, double %v3
; GISEL-LABEL: select_or_olt_ogt_ueq:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp d0, d1
; GISEL-NEXT: cset w8, mi
; GISEL-NEXT: fcmp d2, d3
; GISEL-NEXT: cset w9, gt
; GISEL-NEXT: fcmp d4, d5
; GISEL-NEXT: cset w10, eq
; GISEL-NEXT: cset w11, vs
; GISEL-NEXT: orr w10, w10, w11
; GISEL-NEXT: orr w8, w9, w8
; GISEL-NEXT: orr w8, w10, w8
; GISEL-NEXT: tst w8, #0x1
; GISEL-NEXT: csel w0, w0, w1, ne
; GISEL-NEXT: fccmp d2, d3, #0, pl
; GISEL-NEXT: fccmp d4, d5, #4, le
; GISEL-NEXT: fccmp d4, d5, #1, ne
; GISEL-NEXT: csel w0, w0, w1, vs
; GISEL-NEXT: ret
%c0 = fcmp olt double %v0, %v1
%c1 = fcmp ogt double %v2, %v3
@ -1131,17 +1060,10 @@ define i32 @select_or_olt_ueq_ogt(double %v0, double %v1, double %v2, double %v3
; GISEL-LABEL: select_or_olt_ueq_ogt:
; GISEL: ; %bb.0:
; GISEL-NEXT: fcmp d0, d1
; GISEL-NEXT: cset w8, mi
; GISEL-NEXT: fcmp d2, d3
; GISEL-NEXT: cset w9, eq
; GISEL-NEXT: cset w10, vs
; GISEL-NEXT: orr w9, w9, w10
; GISEL-NEXT: fcmp d4, d5
; GISEL-NEXT: cset w10, gt
; GISEL-NEXT: orr w8, w9, w8
; GISEL-NEXT: orr w8, w10, w8
; GISEL-NEXT: tst w8, #0x1
; GISEL-NEXT: csel w0, w0, w1, ne
; GISEL-NEXT: fccmp d2, d3, #4, pl
; GISEL-NEXT: fccmp d2, d3, #1, ne
; GISEL-NEXT: fccmp d4, d5, #0, vc
; GISEL-NEXT: csel w0, w0, w1, gt
; GISEL-NEXT: ret
%c0 = fcmp olt double %v0, %v1
%c1 = fcmp ueq double %v2, %v3
@ -1170,15 +1092,11 @@ define i32 @half_select_and_olt_oge(half %v0, half %v1, half %v2, half %v3, i32
; GISEL: ; %bb.0:
; GISEL-NEXT: fcvt s0, h0
; GISEL-NEXT: fcvt s1, h1
; GISEL-NEXT: fcvt s2, h2
; GISEL-NEXT: fcvt s3, h3
; GISEL-NEXT: fcmp s0, s1
; GISEL-NEXT: cset w8, mi
; GISEL-NEXT: fcvt s0, h2
; GISEL-NEXT: fcvt s1, h3
; GISEL-NEXT: fcmp s0, s1
; GISEL-NEXT: cset w9, ge
; GISEL-NEXT: and w8, w9, w8
; GISEL-NEXT: tst w8, #0x1
; GISEL-NEXT: csel w0, w0, w1, ne
; GISEL-NEXT: fccmp s2, s3, #8, mi
; GISEL-NEXT: csel w0, w0, w1, ge
; GISEL-NEXT: ret
%c0 = fcmp olt half %v0, %v1
%c1 = fcmp oge half %v2, %v3
@ -1204,17 +1122,12 @@ define i32 @half_select_and_olt_one(half %v0, half %v1, half %v2, half %v3, i32
; GISEL: ; %bb.0:
; GISEL-NEXT: fcvt s0, h0
; GISEL-NEXT: fcvt s1, h1
; GISEL-NEXT: fcvt s2, h2
; GISEL-NEXT: fcvt s3, h3
; GISEL-NEXT: fcmp s0, s1
; GISEL-NEXT: cset w8, mi
; GISEL-NEXT: fcvt s0, h2
; GISEL-NEXT: fcvt s1, h3
; GISEL-NEXT: fcmp s0, s1
; GISEL-NEXT: cset w9, mi
; GISEL-NEXT: cset w10, gt
; GISEL-NEXT: orr w9, w9, w10
; GISEL-NEXT: and w8, w9, w8
; GISEL-NEXT: tst w8, #0x1
; GISEL-NEXT: csel w0, w0, w1, ne
; GISEL-NEXT: fccmp s2, s3, #4, mi
; GISEL-NEXT: fccmp s2, s3, #1, ne
; GISEL-NEXT: csel w0, w0, w1, vc
; GISEL-NEXT: ret
%c0 = fcmp olt half %v0, %v1
%c1 = fcmp one half %v2, %v3
@ -1294,18 +1207,11 @@ define i32 @deep_or(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) {
;
; GISEL-LABEL: deep_or:
; GISEL: ; %bb.0:
; GISEL-NEXT: cmp w0, #0
; GISEL-NEXT: cset w8, ne
; GISEL-NEXT: cmp w1, #0
; GISEL-NEXT: cset w9, ne
; GISEL-NEXT: cmp w2, #15
; GISEL-NEXT: cset w10, eq
; GISEL-NEXT: mov w8, #15
; GISEL-NEXT: cmp w2, #20
; GISEL-NEXT: cset w11, eq
; GISEL-NEXT: orr w10, w10, w11
; GISEL-NEXT: and w9, w10, w9
; GISEL-NEXT: and w8, w9, w8
; GISEL-NEXT: tst w8, #0x1
; GISEL-NEXT: ccmp w2, w8, #4, ne
; GISEL-NEXT: ccmp w1, wzr, #4, eq
; GISEL-NEXT: ccmp w0, wzr, #4, ne
; GISEL-NEXT: csel w0, w4, w5, ne
; GISEL-NEXT: ret
%c0 = icmp ne i32 %a0, 0
@ -1333,18 +1239,11 @@ define i32 @deep_or1(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) {
;
; GISEL-LABEL: deep_or1:
; GISEL: ; %bb.0:
; GISEL-NEXT: cmp w0, #0
; GISEL-NEXT: cset w8, ne
; GISEL-NEXT: cmp w1, #0
; GISEL-NEXT: cset w9, ne
; GISEL-NEXT: cmp w2, #15
; GISEL-NEXT: cset w10, eq
; GISEL-NEXT: mov w8, #15
; GISEL-NEXT: cmp w2, #20
; GISEL-NEXT: cset w11, eq
; GISEL-NEXT: orr w10, w10, w11
; GISEL-NEXT: and w8, w8, w10
; GISEL-NEXT: and w8, w8, w9
; GISEL-NEXT: tst w8, #0x1
; GISEL-NEXT: ccmp w2, w8, #4, ne
; GISEL-NEXT: ccmp w0, wzr, #4, eq
; GISEL-NEXT: ccmp w1, wzr, #4, ne
; GISEL-NEXT: csel w0, w4, w5, ne
; GISEL-NEXT: ret
%c0 = icmp ne i32 %a0, 0
@ -1372,18 +1271,11 @@ define i32 @deep_or2(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) {
;
; GISEL-LABEL: deep_or2:
; GISEL: ; %bb.0:
; GISEL-NEXT: cmp w0, #0
; GISEL-NEXT: cset w8, ne
; GISEL-NEXT: cmp w1, #0
; GISEL-NEXT: cset w9, ne
; GISEL-NEXT: cmp w2, #15
; GISEL-NEXT: cset w10, eq
; GISEL-NEXT: mov w8, #15
; GISEL-NEXT: cmp w2, #20
; GISEL-NEXT: cset w11, eq
; GISEL-NEXT: orr w10, w10, w11
; GISEL-NEXT: and w8, w8, w9
; GISEL-NEXT: and w8, w8, w10
; GISEL-NEXT: tst w8, #0x1
; GISEL-NEXT: ccmp w2, w8, #4, ne
; GISEL-NEXT: ccmp w1, wzr, #4, eq
; GISEL-NEXT: ccmp w0, wzr, #4, ne
; GISEL-NEXT: csel w0, w4, w5, ne
; GISEL-NEXT: ret
%c0 = icmp ne i32 %a0, 0