[ARM] Rewrite how VCMP are lowered, using a single node

This removes the VCEQ/VCNE/VCGE/VCEQZ/etc nodes, just using two called VCMP and
VCMPZ with an extra operand as the condition code. I believe this will make
some combines simpler, allowing us to just look at these codes and not the
operands. It also helps fill in a missing VCGTUZ MVE selection without adding
extra nodes for it.

Differential Revision: https://reviews.llvm.org/D65072

llvm-svn: 366934
This commit is contained in:
David Green 2019-07-24 17:36:47 +00:00
parent 7d318b2bb1
commit cd7a6fa314
9 changed files with 286 additions and 257 deletions

View File

@ -1519,20 +1519,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK"; case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
case ARMISD::PREDICATE_CAST: return "ARMISD::PREDICATE_CAST"; case ARMISD::PREDICATE_CAST: return "ARMISD::PREDICATE_CAST";
case ARMISD::VCEQ: return "ARMISD::VCEQ"; case ARMISD::VCMP: return "ARMISD::VCMP";
case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; case ARMISD::VCMPZ: return "ARMISD::VCMPZ";
case ARMISD::VCNE: return "ARMISD::VCNE";
case ARMISD::VCNEZ: return "ARMISD::VCNEZ";
case ARMISD::VCGE: return "ARMISD::VCGE";
case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
case ARMISD::VCLE: return "ARMISD::VCLE";
case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
case ARMISD::VCGEU: return "ARMISD::VCGEU";
case ARMISD::VCGT: return "ARMISD::VCGT";
case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
case ARMISD::VCLT: return "ARMISD::VCLT";
case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
case ARMISD::VCGTU: return "ARMISD::VCGTU";
case ARMISD::VTST: return "ARMISD::VTST"; case ARMISD::VTST: return "ARMISD::VTST";
case ARMISD::VSHLs: return "ARMISD::VSHLs"; case ARMISD::VSHLs: return "ARMISD::VSHLs";
@ -5881,10 +5869,9 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) { const ARMSubtarget *ST) {
SDValue TmpOp0, TmpOp1;
bool Invert = false; bool Invert = false;
bool Swap = false; bool Swap = false;
unsigned Opc = 0; unsigned Opc = ARMCC::AL;
SDValue Op0 = Op.getOperand(0); SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1); SDValue Op1 = Op.getOperand(1);
@ -5940,44 +5927,48 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG,
case ISD::SETUNE: case ISD::SETUNE:
case ISD::SETNE: case ISD::SETNE:
if (ST->hasMVEFloatOps()) { if (ST->hasMVEFloatOps()) {
Opc = ARMISD::VCNE; break; Opc = ARMCC::NE; break;
} else { } else {
Invert = true; LLVM_FALLTHROUGH; Invert = true; LLVM_FALLTHROUGH;
} }
case ISD::SETOEQ: case ISD::SETOEQ:
case ISD::SETEQ: Opc = ARMISD::VCEQ; break; case ISD::SETEQ: Opc = ARMCC::EQ; break;
case ISD::SETOLT: case ISD::SETOLT:
case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETOGT: case ISD::SETOGT:
case ISD::SETGT: Opc = ARMISD::VCGT; break; case ISD::SETGT: Opc = ARMCC::GT; break;
case ISD::SETOLE: case ISD::SETOLE:
case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETOGE: case ISD::SETOGE:
case ISD::SETGE: Opc = ARMISD::VCGE; break; case ISD::SETGE: Opc = ARMCC::GE; break;
case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; case ISD::SETULE: Invert = true; Opc = ARMCC::GT; break;
case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; case ISD::SETULT: Invert = true; Opc = ARMCC::GE; break;
case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH; case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH;
case ISD::SETONE: case ISD::SETONE: {
// Expand this to (OLT | OGT). // Expand this to (OLT | OGT).
TmpOp0 = Op0; SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
TmpOp1 = Op1; DAG.getConstant(ARMCC::GT, dl, MVT::i32));
Opc = ISD::OR; SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0); DAG.getConstant(ARMCC::GT, dl, MVT::i32));
Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1); SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
break; if (Invert)
case ISD::SETUO: Result = DAG.getNOT(dl, Result, VT);
Invert = true; return Result;
LLVM_FALLTHROUGH; }
case ISD::SETO: case ISD::SETUO: Invert = true; LLVM_FALLTHROUGH;
case ISD::SETO: {
// Expand this to (OLT | OGE). // Expand this to (OLT | OGE).
TmpOp0 = Op0; SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
TmpOp1 = Op1; DAG.getConstant(ARMCC::GT, dl, MVT::i32));
Opc = ISD::OR; SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0); DAG.getConstant(ARMCC::GE, dl, MVT::i32));
Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1); SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
break; if (Invert)
Result = DAG.getNOT(dl, Result, VT);
return Result;
}
} }
} else { } else {
// Integer comparisons. // Integer comparisons.
@ -5985,23 +5976,23 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG,
default: llvm_unreachable("Illegal integer comparison"); default: llvm_unreachable("Illegal integer comparison");
case ISD::SETNE: case ISD::SETNE:
if (ST->hasMVEIntegerOps()) { if (ST->hasMVEIntegerOps()) {
Opc = ARMISD::VCNE; break; Opc = ARMCC::NE; break;
} else { } else {
Invert = true; LLVM_FALLTHROUGH; Invert = true; LLVM_FALLTHROUGH;
} }
case ISD::SETEQ: Opc = ARMISD::VCEQ; break; case ISD::SETEQ: Opc = ARMCC::EQ; break;
case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETGT: Opc = ARMISD::VCGT; break; case ISD::SETGT: Opc = ARMCC::GT; break;
case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETGE: Opc = ARMISD::VCGE; break; case ISD::SETGE: Opc = ARMCC::GE; break;
case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETUGT: Opc = ARMISD::VCGTU; break; case ISD::SETUGT: Opc = ARMCC::HI; break;
case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH;
case ISD::SETUGE: Opc = ARMISD::VCGEU; break; case ISD::SETUGE: Opc = ARMCC::HS; break;
} }
// Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
if (ST->hasNEON() && Opc == ARMISD::VCEQ) { if (ST->hasNEON() && Opc == ARMCC::EQ) {
SDValue AndOp; SDValue AndOp;
if (ISD::isBuildVectorAllZeros(Op1.getNode())) if (ISD::isBuildVectorAllZeros(Op1.getNode()))
AndOp = Op0; AndOp = Op0;
@ -6013,10 +6004,12 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG,
AndOp = AndOp.getOperand(0); AndOp = AndOp.getOperand(0);
if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
Opc = ARMISD::VTST;
Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0)); Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1)); Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
Invert = !Invert; SDValue Result = DAG.getNode(ARMISD::VTST, dl, CmpVT, Op0, Op1);
if (!Invert)
Result = DAG.getNOT(dl, Result, VT);
return Result;
} }
} }
} }
@ -6030,34 +6023,20 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG,
if (ISD::isBuildVectorAllZeros(Op1.getNode())) if (ISD::isBuildVectorAllZeros(Op1.getNode()))
SingleOp = Op0; SingleOp = Op0;
else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
if (Opc == ARMISD::VCGE) if (Opc == ARMCC::GE)
Opc = ARMISD::VCLEZ; Opc = ARMCC::LE;
else if (Opc == ARMISD::VCGT) else if (Opc == ARMCC::GT)
Opc = ARMISD::VCLTZ; Opc = ARMCC::LT;
SingleOp = Op1; SingleOp = Op1;
} }
SDValue Result; SDValue Result;
if (SingleOp.getNode()) { if (SingleOp.getNode()) {
switch (Opc) { Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, SingleOp,
case ARMISD::VCNE: DAG.getConstant(Opc, dl, MVT::i32));
assert(ST->hasMVEIntegerOps() && "Unexpected DAG node");
Result = DAG.getNode(ARMISD::VCNEZ, dl, CmpVT, SingleOp); break;
case ARMISD::VCEQ:
Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break;
case ARMISD::VCGE:
Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break;
case ARMISD::VCLEZ:
Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break;
case ARMISD::VCGT:
Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break;
case ARMISD::VCLTZ:
Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break;
default:
Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
}
} else { } else {
Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1); Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
DAG.getConstant(Opc, dl, MVT::i32));
} }
Result = DAG.getSExtOrTrunc(Result, dl, VT); Result = DAG.getSExtOrTrunc(Result, dl, VT);
@ -7488,7 +7467,8 @@ static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG,
// Now return the result of comparing the shuffled vector with zero, // Now return the result of comparing the shuffled vector with zero,
// which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
return DAG.getNode(ARMISD::VCNEZ, dl, VT, Shuffled); return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled,
DAG.getConstant(ARMCC::NE, dl, MVT::i32));
} }
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
@ -7830,7 +7810,8 @@ static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG,
// Now return the result of comparing the subvector with zero, // Now return the result of comparing the subvector with zero,
// which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
return DAG.getNode(ARMISD::VCNEZ, dl, VT, ConVec); return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
DAG.getConstant(ARMCC::NE, dl, MVT::i32));
} }
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG,
@ -7891,7 +7872,8 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG,
// Now return the result of comparing the subvector with zero, // Now return the result of comparing the subvector with zero,
// which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
return DAG.getNode(ARMISD::VCNEZ, dl, VT, SubVec); return DAG.getNode(ARMISD::VCMPZ, dl, VT, SubVec,
DAG.getConstant(ARMCC::NE, dl, MVT::i32));
} }
/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
@ -11826,52 +11808,72 @@ static SDValue PerformORCombineToBFI(SDNode *N,
return SDValue(); return SDValue();
} }
static bool isValidMVECond(unsigned CC, bool IsFloat) {
switch (CC) {
case ARMCC::EQ:
case ARMCC::NE:
case ARMCC::LE:
case ARMCC::GT:
case ARMCC::GE:
case ARMCC::LT:
return true;
case ARMCC::HS:
case ARMCC::HI:
return !IsFloat;
default:
return false;
};
}
static SDValue PerformORCombine_i1(SDNode *N, static SDValue PerformORCombine_i1(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI, TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) { const ARMSubtarget *Subtarget) {
// Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain
// together with predicates // together with predicates
struct Codes {
unsigned Opcode;
unsigned Opposite;
} InvertCodes[] = {
{ARMISD::VCEQ, ARMISD::VCNE},
{ARMISD::VCEQZ, ARMISD::VCNEZ},
{ARMISD::VCGE, ARMISD::VCLT},
{ARMISD::VCGEZ, ARMISD::VCLTZ},
{ARMISD::VCGT, ARMISD::VCLE},
{ARMISD::VCGTZ, ARMISD::VCLEZ},
};
EVT VT = N->getValueType(0); EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0); SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1); SDValue N1 = N->getOperand(1);
unsigned Opposite0 = 0; ARMCC::CondCodes CondCode0 = ARMCC::AL;
unsigned Opposite1 = 0; ARMCC::CondCodes CondCode1 = ARMCC::AL;
for (auto Code : InvertCodes) { if (N0->getOpcode() == ARMISD::VCMP)
if (N0->getOpcode() == Code.Opcode) CondCode0 = (ARMCC::CondCodes)cast<const ConstantSDNode>(N0->getOperand(2))
Opposite0 = Code.Opposite; ->getZExtValue();
if (N0->getOpcode() == Code.Opposite) else if (N0->getOpcode() == ARMISD::VCMPZ)
Opposite0 = Code.Opcode; CondCode0 = (ARMCC::CondCodes)cast<const ConstantSDNode>(N0->getOperand(1))
if (N1->getOpcode() == Code.Opcode) ->getZExtValue();
Opposite1 = Code.Opposite; if (N1->getOpcode() == ARMISD::VCMP)
if (N1->getOpcode() == Code.Opposite) CondCode1 = (ARMCC::CondCodes)cast<const ConstantSDNode>(N1->getOperand(2))
Opposite1 = Code.Opcode; ->getZExtValue();
} else if (N1->getOpcode() == ARMISD::VCMPZ)
CondCode1 = (ARMCC::CondCodes)cast<const ConstantSDNode>(N1->getOperand(1))
->getZExtValue();
if (!Opposite0 || !Opposite1) if (CondCode0 == ARMCC::AL || CondCode1 == ARMCC::AL)
return SDValue();
unsigned Opposite0 = ARMCC::getOppositeCondition(CondCode0);
unsigned Opposite1 = ARMCC::getOppositeCondition(CondCode1);
if (!isValidMVECond(Opposite0,
N0->getOperand(0)->getValueType(0).isFloatingPoint()) ||
!isValidMVECond(Opposite1,
N1->getOperand(0)->getValueType(0).isFloatingPoint()))
return SDValue(); return SDValue();
SmallVector<SDValue, 4> Ops0; SmallVector<SDValue, 4> Ops0;
for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) Ops0.push_back(N0->getOperand(0));
Ops0.push_back(N0->getOperand(i)); if (N0->getOpcode() == ARMISD::VCMP)
Ops0.push_back(N0->getOperand(1));
Ops0.push_back(DCI.DAG.getConstant(Opposite0, SDLoc(N0), MVT::i32));
SmallVector<SDValue, 4> Ops1; SmallVector<SDValue, 4> Ops1;
for (unsigned i = 0, e = N1->getNumOperands(); i != e; ++i) Ops1.push_back(N1->getOperand(0));
Ops1.push_back(N1->getOperand(i)); if (N1->getOpcode() == ARMISD::VCMP)
Ops1.push_back(N1->getOperand(1));
Ops1.push_back(DCI.DAG.getConstant(Opposite1, SDLoc(N1), MVT::i32));
SDValue NewN0 = DCI.DAG.getNode(Opposite0, SDLoc(N0), VT, Ops0); SDValue NewN0 = DCI.DAG.getNode(N0->getOpcode(), SDLoc(N0), VT, Ops0);
SDValue NewN1 = DCI.DAG.getNode(Opposite1, SDLoc(N1), VT, Ops1); SDValue NewN1 = DCI.DAG.getNode(N1->getOpcode(), SDLoc(N1), VT, Ops1);
SDValue And = DCI.DAG.getNode(ISD::AND, SDLoc(N), VT, NewN0, NewN1); SDValue And = DCI.DAG.getNode(ISD::AND, SDLoc(N), VT, NewN0, NewN1);
return DCI.DAG.getNode(ISD::XOR, SDLoc(N), VT, And, return DCI.DAG.getNode(ISD::XOR, SDLoc(N), VT, And,
DCI.DAG.getAllOnesConstant(SDLoc(N), VT)); DCI.DAG.getAllOnesConstant(SDLoc(N), VT));

View File

@ -131,20 +131,8 @@ class VectorType;
PREDICATE_CAST, // Predicate cast for MVE i1 types PREDICATE_CAST, // Predicate cast for MVE i1 types
VCEQ, // Vector compare equal. VCMP, // Vector compare.
VCEQZ, // Vector compare equal to zero. VCMPZ, // Vector compare to zero.
VCNE, // Vector compare not equal (MVE)
VCNEZ, // Vector compare not equal to zero (MVE)
VCGE, // Vector compare greater than or equal.
VCGEZ, // Vector compare greater than or equal to zero.
VCLE, // Vector compare less than or equal.
VCLEZ, // Vector compare less than or equal to zero.
VCGEU, // Vector compare unsigned greater than or equal.
VCGT, // Vector compare greater than.
VCGTZ, // Vector compare greater than zero.
VCLT, // Vector compare less than.
VCLTZ, // Vector compare less than zero.
VCGTU, // Vector compare unsigned greater than.
VTST, // Vector test bits. VTST, // Vector test bits.
// Vector shift by vector // Vector shift by vector

View File

@ -265,23 +265,12 @@ def ARMvshruImm : SDNode<"ARMISD::VSHRuIMM", SDTARMVSHIMM>;
def ARMvshls : SDNode<"ARMISD::VSHLs", SDTARMVSH>; def ARMvshls : SDNode<"ARMISD::VSHLs", SDTARMVSH>;
def ARMvshlu : SDNode<"ARMISD::VSHLu", SDTARMVSH>; def ARMvshlu : SDNode<"ARMISD::VSHLu", SDTARMVSH>;
def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; def SDTARMVCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; SDTCisInt<3>]>;
def SDTARMVCMPZ : SDTypeProfile<1, 2, [SDTCisInt<2>]>;
def ARMvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; def ARMvcmp : SDNode<"ARMISD::VCMP", SDTARMVCMP>;
def ARMvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; def ARMvcmpz : SDNode<"ARMISD::VCMPZ", SDTARMVCMPZ>;
def ARMvcne : SDNode<"ARMISD::VCNE", SDTARMVCMP>;
def ARMvcnez : SDNode<"ARMISD::VCNEZ", SDTARMVCMPZ>;
def ARMvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
def ARMvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
def ARMvcle : SDNode<"ARMISD::VCLE", SDTARMVCMP>;
def ARMvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
def ARMvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
def ARMvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
def ARMvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
def ARMvclt : SDNode<"ARMISD::VCLT", SDTARMVCMP>;
def ARMvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
def ARMvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
def ARMWLS : SDNode<"ARMISD::WLS", SDT_ARMLoLoop, [SDNPHasChain]>; def ARMWLS : SDNode<"ARMISD::WLS", SDT_ARMLoLoop, [SDNPHasChain]>;
def ARMLE : SDNode<"ARMISD::LE", SDT_ARMLoLoop, [SDNPHasChain]>; def ARMLE : SDNode<"ARMISD::LE", SDT_ARMLoLoop, [SDNPHasChain]>;

View File

@ -2982,118 +2982,120 @@ def MVE_VCMPs8r : MVE_VCMPqrs<"s8", 0b00>;
def MVE_VCMPs16r : MVE_VCMPqrs<"s16", 0b01>; def MVE_VCMPs16r : MVE_VCMPqrs<"s16", 0b01>;
def MVE_VCMPs32r : MVE_VCMPqrs<"s32", 0b10>; def MVE_VCMPs32r : MVE_VCMPqrs<"s32", 0b10>;
multiclass unpred_vcmp_z<SDPatternOperator opnode, string suffix, int fc> { multiclass unpred_vcmp_z<string suffix, int fc> {
def i8 : Pat<(v16i1 (opnode (v16i8 MQPR:$v1))), def i8 : Pat<(v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))),
(v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc))>; (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc))>;
def i16 : Pat<(v8i1 (opnode (v8i16 MQPR:$v1))), def i16 : Pat<(v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))),
(v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc))>; (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc))>;
def i32 : Pat<(v4i1 (opnode (v4i32 MQPR:$v1))), def i32 : Pat<(v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))),
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc))>; (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc))>;
def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (opnode (v16i8 MQPR:$v1))))), def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))))),
(v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (opnode (v8i16 MQPR:$v1))))), def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))))),
(v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (opnode (v4i32 MQPR:$v1))))), def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))))),
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
} }
multiclass unpred_vcmp_r<SDPatternOperator opnode, string suffix, int fc> { multiclass unpred_vcmp_r<string suffix, int fc> {
def i8 : Pat<(v16i1 (opnode (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), def i8 : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))),
(v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc))>; (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc))>;
def i16 : Pat<(v8i1 (opnode (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), def i16 : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))),
(v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc))>; (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc))>;
def i32 : Pat<(v4i1 (opnode (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), def i32 : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))),
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc))>; (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc))>;
def i8r : Pat<(v16i1 (opnode (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)))), def i8r : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))),
(v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc))>; (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc))>;
def i16r : Pat<(v8i1 (opnode (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)))), def i16r : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))),
(v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc))>; (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc))>;
def i32r : Pat<(v4i1 (opnode (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)))), def i32r : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))),
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc))>; (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc))>;
def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (opnode (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))))), def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))))),
(v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, 1, VCCR:$p1))>; (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, 1, VCCR:$p1))>;
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (opnode (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))))), def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))))),
(v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc, 1, VCCR:$p1))>; (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc, 1, VCCR:$p1))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (opnode (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))))), def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))))),
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, 1, VCCR:$p1))>; (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, 1, VCCR:$p1))>;
def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (opnode (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)))))), def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))))),
(v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>;
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (opnode (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)))))), def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))))),
(v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (opnode (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)))))), def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))))),
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>;
} }
multiclass unpred_vcmpf_z<SDPatternOperator opnode, int fc> { multiclass unpred_vcmpf_z<int fc> {
def f16 : Pat<(v8i1 (opnode (v8f16 MQPR:$v1))), def f16 : Pat<(v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))),
(v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc))>; (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc))>;
def f32 : Pat<(v4i1 (opnode (v4f32 MQPR:$v1))), def f32 : Pat<(v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))),
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc))>; (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc))>;
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (opnode (v8f16 MQPR:$v1))))), def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))))),
(v8i1 (MVE_VCMPf32r (v8f16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; (v8i1 (MVE_VCMPf32r (v8f16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (opnode (v4f32 MQPR:$v1))))), def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))))),
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>;
} }
multiclass unpred_vcmpf_r<SDPatternOperator opnode, int fc> { multiclass unpred_vcmpf_r<int fc> {
def f16 : Pat<(v8i1 (opnode (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))), def f16 : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))),
(v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>; (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>;
def f32 : Pat<(v4i1 (opnode (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))), def f32 : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))),
(v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc))>; (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc))>;
def f16r : Pat<(v8i1 (opnode (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)))), def f16r : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))),
(v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc))>; (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc))>;
def f32r : Pat<(v4i1 (opnode (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)))), def f32r : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))),
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc))>; (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc))>;
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (opnode (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))))), def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))))),
(v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, 1, VCCR:$p1))>; (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, 1, VCCR:$p1))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (opnode (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))))), def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))))),
(v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, 1, VCCR:$p1))>; (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, 1, VCCR:$p1))>;
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (opnode (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)))))), def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))))),
(v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc, 1, VCCR:$p1))>; (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc, 1, VCCR:$p1))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (opnode (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)))))), def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))))),
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc, 1, VCCR:$p1))>; (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc, 1, VCCR:$p1))>;
} }
let Predicates = [HasMVEInt] in { let Predicates = [HasMVEInt] in {
defm MVE_VCEQZ : unpred_vcmp_z<ARMvceqz, "i", 0>; defm MVE_VCEQZ : unpred_vcmp_z<"i", 0>;
defm MVE_VCNEZ : unpred_vcmp_z<ARMvcnez, "i", 1>; defm MVE_VCNEZ : unpred_vcmp_z<"i", 1>;
defm MVE_VCGEZ : unpred_vcmp_z<ARMvcgez, "s", 10>; defm MVE_VCGEZ : unpred_vcmp_z<"s", 10>;
defm MVE_VCLTZ : unpred_vcmp_z<ARMvcltz, "s", 11>; defm MVE_VCLTZ : unpred_vcmp_z<"s", 11>;
defm MVE_VCGTZ : unpred_vcmp_z<ARMvcgtz, "s", 12>; defm MVE_VCGTZ : unpred_vcmp_z<"s", 12>;
defm MVE_VCLEZ : unpred_vcmp_z<ARMvclez, "s", 13>; defm MVE_VCLEZ : unpred_vcmp_z<"s", 13>;
defm MVE_VCGTUZ : unpred_vcmp_z<"u", 8>;
defm MVE_VCGEUZ : unpred_vcmp_z<"u", 2>;
defm MVE_VCEQ : unpred_vcmp_r<ARMvceq, "i", 0>; defm MVE_VCEQ : unpred_vcmp_r<"i", 0>;
defm MVE_VCNE : unpred_vcmp_r<ARMvcne, "i", 1>; defm MVE_VCNE : unpred_vcmp_r<"i", 1>;
defm MVE_VCGE : unpred_vcmp_r<ARMvcge, "s", 10>; defm MVE_VCGE : unpred_vcmp_r<"s", 10>;
defm MVE_VCLT : unpred_vcmp_r<ARMvclt, "s", 11>; defm MVE_VCLT : unpred_vcmp_r<"s", 11>;
defm MVE_VCGT : unpred_vcmp_r<ARMvcgt, "s", 12>; defm MVE_VCGT : unpred_vcmp_r<"s", 12>;
defm MVE_VCLE : unpred_vcmp_r<ARMvcle, "s", 13>; defm MVE_VCLE : unpred_vcmp_r<"s", 13>;
defm MVE_VCGTU : unpred_vcmp_r<ARMvcgtu, "u", 8>; defm MVE_VCGTU : unpred_vcmp_r<"u", 8>;
defm MVE_VCGEU : unpred_vcmp_r<ARMvcgeu, "u", 2>; defm MVE_VCGEU : unpred_vcmp_r<"u", 2>;
} }
let Predicates = [HasMVEFloat] in { let Predicates = [HasMVEFloat] in {
defm MVE_VFCEQZ : unpred_vcmpf_z<ARMvceqz, 0>; defm MVE_VFCEQZ : unpred_vcmpf_z<0>;
defm MVE_VFCNEZ : unpred_vcmpf_z<ARMvcnez, 1>; defm MVE_VFCNEZ : unpred_vcmpf_z<1>;
defm MVE_VFCGEZ : unpred_vcmpf_z<ARMvcgez, 10>; defm MVE_VFCGEZ : unpred_vcmpf_z<10>;
defm MVE_VFCLTZ : unpred_vcmpf_z<ARMvcltz, 11>; defm MVE_VFCLTZ : unpred_vcmpf_z<11>;
defm MVE_VFCGTZ : unpred_vcmpf_z<ARMvcgtz, 12>; defm MVE_VFCGTZ : unpred_vcmpf_z<12>;
defm MVE_VFCLEZ : unpred_vcmpf_z<ARMvclez, 13>; defm MVE_VFCLEZ : unpred_vcmpf_z<13>;
defm MVE_VFCGE : unpred_vcmpf_r<ARMvcge, 10>; defm MVE_VFCEQ : unpred_vcmpf_r<0>;
defm MVE_VFCLT : unpred_vcmpf_r<ARMvclt, 11>; defm MVE_VFCNE : unpred_vcmpf_r<1>;
defm MVE_VFCGT : unpred_vcmpf_r<ARMvcgt, 12>; defm MVE_VFCGE : unpred_vcmpf_r<10>;
defm MVE_VFCLE : unpred_vcmpf_r<ARMvcle, 13>; defm MVE_VFCLT : unpred_vcmpf_r<11>;
defm MVE_VFCEQ : unpred_vcmpf_r<ARMvceq, 0>; defm MVE_VFCGT : unpred_vcmpf_r<12>;
defm MVE_VFCNE : unpred_vcmpf_r<ARMvcne, 1>; defm MVE_VFCLE : unpred_vcmpf_r<13>;
} }

View File

@ -478,7 +478,8 @@ def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
// NEON-specific DAG Nodes. // NEON-specific DAG Nodes.
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; def SDTARMVTST : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVTST>;
// Types for vector shift by immediates. The "SHX" version is for long and // Types for vector shift by immediates. The "SHX" version is for long and
// narrow operations where the source and destination vectors have different // narrow operations where the source and destination vectors have different
@ -3313,30 +3314,30 @@ class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
// source operand element sizes of 8, 16 and 32 bits: // source operand element sizes of 8, 16 and 32 bits:
multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
bits<5> op11_7, bit op4, string opc, string Dt, bits<5> op11_7, bit op4, string opc, string Dt,
string asm, SDNode OpNode> { string asm, int fc> {
// 64-bit vector types. // 64-bit vector types.
def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4, def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "8"), asm, "", opc, !strconcat(Dt, "8"), asm, "",
[(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>; [(set DPR:$Vd, (v8i8 (ARMvcmpz (v8i8 DPR:$Vm), (i32 fc))))]>;
def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "16"), asm, "", opc, !strconcat(Dt, "16"), asm, "",
[(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>; [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4i16 DPR:$Vm), (i32 fc))))]>;
def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "32"), asm, "", opc, !strconcat(Dt, "32"), asm, "",
[(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>; [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2i32 DPR:$Vm), (i32 fc))))]>;
def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4, def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
opc, "f32", asm, "", opc, "f32", asm, "",
[(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> { [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2f32 DPR:$Vm), (i32 fc))))]> {
let Inst{10} = 1; // overwrite F = 1 let Inst{10} = 1; // overwrite F = 1
} }
def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4, def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
opc, "f16", asm, "", opc, "f16", asm, "",
[(set DPR:$Vd, (v4i16 (OpNode (v4f16 DPR:$Vm))))]>, [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4f16 DPR:$Vm), (i32 fc))))]>,
Requires<[HasNEON,HasFullFP16]> { Requires<[HasNEON,HasFullFP16]> {
let Inst{10} = 1; // overwrite F = 1 let Inst{10} = 1; // overwrite F = 1
} }
@ -3345,30 +3346,83 @@ multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4, def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "8"), asm, "", opc, !strconcat(Dt, "8"), asm, "",
[(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>; [(set QPR:$Vd, (v16i8 (ARMvcmpz (v16i8 QPR:$Vm), (i32 fc))))]>;
def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "16"), asm, "", opc, !strconcat(Dt, "16"), asm, "",
[(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>; [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8i16 QPR:$Vm), (i32 fc))))]>;
def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
opc, !strconcat(Dt, "32"), asm, "", opc, !strconcat(Dt, "32"), asm, "",
[(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>; [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4i32 QPR:$Vm), (i32 fc))))]>;
def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4, def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
opc, "f32", asm, "", opc, "f32", asm, "",
[(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> { [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4f32 QPR:$Vm), (i32 fc))))]> {
let Inst{10} = 1; // overwrite F = 1 let Inst{10} = 1; // overwrite F = 1
} }
def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4, def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vm), NoItinerary, (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
opc, "f16", asm, "", opc, "f16", asm, "",
[(set QPR:$Vd, (v8i16 (OpNode (v8f16 QPR:$Vm))))]>, [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8f16 QPR:$Vm), (i32 fc))))]>,
Requires<[HasNEON,HasFullFP16]> { Requires<[HasNEON,HasFullFP16]> {
let Inst{10} = 1; // overwrite F = 1 let Inst{10} = 1; // overwrite F = 1
} }
} }
// Neon 3-register comparisons.
class N3VQ_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, int fc, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
[(set QPR:$Vd, (ResTy (ARMvcmp (OpTy QPR:$Vn), (OpTy QPR:$Vm), (i32 fc))))]> {
// All of these have a two-operand InstAlias.
let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
class N3VD_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, int fc, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
[(set DPR:$Vd, (ResTy (ARMvcmp (OpTy DPR:$Vn), (OpTy DPR:$Vm), (i32 fc))))]> {
// All of these have a two-operand InstAlias.
let TwoOperandAliasConstraint = "$Vn = $Vd";
let isCommutable = Commutable;
}
multiclass N3V_QHS_cmp<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
int fc, bit Commutable = 0> {
// 64-bit vector types.
def v8i8 : N3VD_cmp<op24, op23, 0b00, op11_8, op4, itinD16,
OpcodeStr, !strconcat(Dt, "8"),
v8i8, v8i8, fc, Commutable>;
def v4i16 : N3VD_cmp<op24, op23, 0b01, op11_8, op4, itinD16,
OpcodeStr, !strconcat(Dt, "16"),
v4i16, v4i16, fc, Commutable>;
def v2i32 : N3VD_cmp<op24, op23, 0b10, op11_8, op4, itinD32,
OpcodeStr, !strconcat(Dt, "32"),
v2i32, v2i32, fc, Commutable>;
// 128-bit vector types.
def v16i8 : N3VQ_cmp<op24, op23, 0b00, op11_8, op4, itinQ16,
OpcodeStr, !strconcat(Dt, "8"),
v16i8, v16i8, fc, Commutable>;
def v8i16 : N3VQ_cmp<op24, op23, 0b01, op11_8, op4, itinQ16,
OpcodeStr, !strconcat(Dt, "16"),
v8i16, v8i16, fc, Commutable>;
def v4i32 : N3VQ_cmp<op24, op23, 0b10, op11_8, op4, itinQ32,
OpcodeStr, !strconcat(Dt, "32"),
v4i32, v4i32, fc, Commutable>;
}
// Neon 2-register vector intrinsics, // Neon 2-register vector intrinsics,
// element sizes of 8, 16 and 32 bits: // element sizes of 8, 16 and 32 bits:
@ -5013,67 +5067,67 @@ def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
// Vector Comparisons. // Vector Comparisons.
// VCEQ : Vector Compare Equal // VCEQ : Vector Compare Equal
defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, defm VCEQ : N3V_QHS_cmp<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
IIC_VSUBi4Q, "vceq", "i", ARMvceq, 1>; IIC_VSUBi4Q, "vceq", "i", 0, 1>;
def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, def VCEQfd : N3VD_cmp<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
ARMvceq, 1>; 0, 1>;
def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, def VCEQfq : N3VQ_cmp<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
ARMvceq, 1>; 0, 1>;
def VCEQhd : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, def VCEQhd : N3VD_cmp<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16,
ARMvceq, 1>, 0, 1>,
Requires<[HasNEON, HasFullFP16]>; Requires<[HasNEON, HasFullFP16]>;
def VCEQhq : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, def VCEQhq : N3VQ_cmp<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16,
ARMvceq, 1>, 0, 1>,
Requires<[HasNEON, HasFullFP16]>; Requires<[HasNEON, HasFullFP16]>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in let TwoOperandAliasConstraint = "$Vm = $Vd" in
defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
"$Vd, $Vm, #0", ARMvceqz>; "$Vd, $Vm, #0", 0>;
// VCGE : Vector Compare Greater Than or Equal // VCGE : Vector Compare Greater Than or Equal
defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, defm VCGEs : N3V_QHS_cmp<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
IIC_VSUBi4Q, "vcge", "s", ARMvcge, 0>; IIC_VSUBi4Q, "vcge", "s", 10, 0>;
defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, defm VCGEu : N3V_QHS_cmp<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
IIC_VSUBi4Q, "vcge", "u", ARMvcgeu, 0>; IIC_VSUBi4Q, "vcge", "u", 2, 0>;
def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, def VCGEfd : N3VD_cmp<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
ARMvcge, 0>; 10, 0>;
def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, def VCGEfq : N3VQ_cmp<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
ARMvcge, 0>; 10, 0>;
def VCGEhd : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, def VCGEhd : N3VD_cmp<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16,
ARMvcge, 0>, 10, 0>,
Requires<[HasNEON, HasFullFP16]>; Requires<[HasNEON, HasFullFP16]>;
def VCGEhq : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, def VCGEhq : N3VQ_cmp<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16,
ARMvcge, 0>, 10, 0>,
Requires<[HasNEON, HasFullFP16]>; Requires<[HasNEON, HasFullFP16]>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in { let TwoOperandAliasConstraint = "$Vm = $Vd" in {
defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
"$Vd, $Vm, #0", ARMvcgez>; "$Vd, $Vm, #0", 10>;
defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
"$Vd, $Vm, #0", ARMvclez>; "$Vd, $Vm, #0", 13>;
} }
// VCGT : Vector Compare Greater Than // VCGT : Vector Compare Greater Than
defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, defm VCGTs : N3V_QHS_cmp<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
IIC_VSUBi4Q, "vcgt", "s", ARMvcgt, 0>; IIC_VSUBi4Q, "vcgt", "s", 12, 0>;
defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, defm VCGTu : N3V_QHS_cmp<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
IIC_VSUBi4Q, "vcgt", "u", ARMvcgtu, 0>; IIC_VSUBi4Q, "vcgt", "u", 8, 0>;
def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, def VCGTfd : N3VD_cmp<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
ARMvcgt, 0>; 12, 0>;
def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, def VCGTfq : N3VQ_cmp<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
ARMvcgt, 0>; 12, 0>;
def VCGThd : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, def VCGThd : N3VD_cmp<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16,
ARMvcgt, 0>, 12, 0>,
Requires<[HasNEON, HasFullFP16]>; Requires<[HasNEON, HasFullFP16]>;
def VCGThq : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, def VCGThq : N3VQ_cmp<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16,
ARMvcgt, 0>, 12, 0>,
Requires<[HasNEON, HasFullFP16]>; Requires<[HasNEON, HasFullFP16]>;
let TwoOperandAliasConstraint = "$Vm = $Vd" in { let TwoOperandAliasConstraint = "$Vm = $Vd" in {
defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
"$Vd, $Vm, #0", ARMvcgtz>; "$Vd, $Vm, #0", 12>;
defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
"$Vd, $Vm, #0", ARMvcltz>; "$Vd, $Vm, #0", 11>;
} }
// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)

View File

@ -129,9 +129,8 @@ define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: cmpulez_v4i1: ; CHECK-LABEL: cmpulez_v4i1:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vcmp.i32 eq, q0, zr ; CHECK-NEXT: vcmp.i32 eq, q0, zr
; CHECK-NEXT: vmov.i32 q2, #0x0
; CHECK-NEXT: vpst ; CHECK-NEXT: vpst
; CHECK-NEXT: vcmpt.u32 cs, q2, q1 ; CHECK-NEXT: vcmpt.u32 cs, q1, zr
; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: vpsel q0, q0, q1
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
entry: entry:

View File

@ -158,8 +158,7 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) { define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: cmpulez_v4i1: ; CHECK-LABEL: cmpulez_v4i1:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q2, #0x0 ; CHECK-NEXT: vcmp.u32 cs, q1, zr
; CHECK-NEXT: vcmp.u32 cs, q2, q1
; CHECK-NEXT: vmrs r0, p0 ; CHECK-NEXT: vmrs r0, p0
; CHECK-NEXT: vcmp.i32 eq, q0, zr ; CHECK-NEXT: vcmp.i32 eq, q0, zr
; CHECK-NEXT: vmrs r1, p0 ; CHECK-NEXT: vmrs r1, p0

View File

@ -151,8 +151,7 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) { define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: cmpulez_v4i1: ; CHECK-LABEL: cmpulez_v4i1:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q2, #0x0 ; CHECK-NEXT: vcmp.u32 cs, q1, zr
; CHECK-NEXT: vcmp.u32 cs, q2, q1
; CHECK-NEXT: vmrs r0, p0 ; CHECK-NEXT: vmrs r0, p0
; CHECK-NEXT: vcmp.i32 eq, q0, zr ; CHECK-NEXT: vcmp.i32 eq, q0, zr
; CHECK-NEXT: vmrs r1, p0 ; CHECK-NEXT: vmrs r1, p0

View File

@ -110,8 +110,7 @@ entry:
define arm_aapcs_vfpcc <4 x i32> @vcmp_ulez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { define arm_aapcs_vfpcc <4 x i32> @vcmp_ulez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: vcmp_ulez_v4i32: ; CHECK-LABEL: vcmp_ulez_v4i32:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q3, #0x0 ; CHECK-NEXT: vcmp.u32 cs, q0, zr
; CHECK-NEXT: vcmp.u32 cs, q3, q0
; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: vpsel q0, q1, q2
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
entry: entry:
@ -230,8 +229,7 @@ entry:
define arm_aapcs_vfpcc <8 x i16> @vcmp_ulez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { define arm_aapcs_vfpcc <8 x i16> @vcmp_ulez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: vcmp_ulez_v8i16: ; CHECK-LABEL: vcmp_ulez_v8i16:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q3, #0x0 ; CHECK-NEXT: vcmp.u16 cs, q0, zr
; CHECK-NEXT: vcmp.u16 cs, q3, q0
; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: vpsel q0, q1, q2
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
entry: entry:
@ -350,8 +348,7 @@ entry:
define arm_aapcs_vfpcc <16 x i8> @vcmp_ulez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { define arm_aapcs_vfpcc <16 x i8> @vcmp_ulez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: vcmp_ulez_v16i8: ; CHECK-LABEL: vcmp_ulez_v16i8:
; CHECK: @ %bb.0: @ %entry ; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vmov.i32 q3, #0x0 ; CHECK-NEXT: vcmp.u8 cs, q0, zr
; CHECK-NEXT: vcmp.u8 cs, q3, q0
; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: vpsel q0, q1, q2
; CHECK-NEXT: bx lr ; CHECK-NEXT: bx lr
entry: entry: