[AArch64] Optimise min/max lowering in ISel

Differential Revision: https://reviews.llvm.org/D106561
This commit is contained in:
Irina Dobrescu 2021-07-22 16:21:48 +01:00
parent 9988ab3989
commit b01417d3c5
7 changed files with 107 additions and 129 deletions

View File

@ -1040,6 +1040,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom);
setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
setOperationAction(ISD::BITREVERSE, MVT::v2i64, Custom);
for (auto VT : {MVT::v1i64, MVT::v2i64}) {
setOperationAction(ISD::UMAX, VT, Custom);
setOperationAction(ISD::SMAX, VT, Custom);
setOperationAction(ISD::UMIN, VT, Custom);
setOperationAction(ISD::SMIN, VT, Custom);
}
// AArch64 doesn't have MUL.2d:
setOperationAction(ISD::MUL, MVT::v2i64, Expand);
@ -4825,17 +4831,10 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::UDIV:
return LowerDIV(Op, DAG);
case ISD::SMIN:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
/*OverrideNEON=*/true);
case ISD::UMIN:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
/*OverrideNEON=*/true);
case ISD::SMAX:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
/*OverrideNEON=*/true);
case ISD::UMAX:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
/*OverrideNEON=*/true);
return LowerMinMax(Op, DAG);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL:
@ -7131,6 +7130,56 @@ SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::CTLZ, DL, VT, RBIT);
}
SDValue AArch64TargetLowering::LowerMinMax(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
unsigned Opcode = Op.getOpcode();
ISD::CondCode CC;
switch (Opcode) {
default:
llvm_unreachable("Wrong instruction");
case ISD::SMAX:
CC = ISD::SETGT;
break;
case ISD::SMIN:
CC = ISD::SETLT;
break;
case ISD::UMAX:
CC = ISD::SETUGT;
break;
case ISD::UMIN:
CC = ISD::SETULT;
break;
}
if (VT.isScalableVector() ||
useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true)) {
switch (Opcode) {
default:
llvm_unreachable("Wrong instruction");
case ISD::SMAX:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
/*OverrideNEON=*/true);
case ISD::SMIN:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
/*OverrideNEON=*/true);
case ISD::UMAX:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
/*OverrideNEON=*/true);
case ISD::UMIN:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
/*OverrideNEON=*/true);
}
}
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC);
return DAG.getSelect(DL, VT, Cond, Op0, Op1);
}
SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();

View File

@ -966,6 +966,7 @@ private:
SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;

View File

@ -220,19 +220,15 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
auto *RetTy = ICA.getReturnType();
switch (ICA.getID()) {
case Intrinsic::umin:
case Intrinsic::umax: {
auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
// umin(x,y) -> sub(x,usubsat(x,y))
// umax(x,y) -> add(x,usubsat(y,x))
if (LT.second == MVT::v2i64)
return LT.first * 2;
LLVM_FALLTHROUGH;
}
case Intrinsic::umax:
case Intrinsic::smin:
case Intrinsic::smax: {
static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
MVT::v8i16, MVT::v2i32, MVT::v4i32};
auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
// v2i64 types get converted to cmp+bif hence the cost of 2
if (LT.second == MVT::v2i64)
return LT.first * 2;
if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }))
return LT.first;
break;

View File

@ -96,8 +96,8 @@ define void @reduce_smin() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V4i64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%V1i8 = call <1 x i8> @llvm.smin.v1i8(<1 x i8> undef, <1 x i8> undef)
@ -135,8 +135,8 @@ define void @reduce_smax() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V4i64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%V1i8 = call <1 x i8> @llvm.smax.v1i8(<1 x i8> undef, <1 x i8> undef)

View File

@ -185,13 +185,8 @@ declare <1 x i64> @llvm.smax.v1i64(<1 x i64> %a, <1 x i64> %b) readnone
define <1 x i64> @smax1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-ISEL-LABEL: smax1i64:
; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-ISEL-NEXT: fmov x8, d1
; CHECK-ISEL-NEXT: fmov x9, d0
; CHECK-ISEL-NEXT: cmp x9, x8
; CHECK-ISEL-NEXT: csel x8, x9, x8, gt
; CHECK-ISEL-NEXT: fmov d0, x8
; CHECK-ISEL-NEXT: cmgt d2, d0, d1
; CHECK-ISEL-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-ISEL-NEXT: ret
;
; CHECK-GLOBAL-LABEL: smax1i64:
@ -210,16 +205,8 @@ declare <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
define <2 x i64> @smax2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-ISEL-LABEL: smax2i64:
; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: mov x8, v1.d[1]
; CHECK-ISEL-NEXT: mov x9, v0.d[1]
; CHECK-ISEL-NEXT: fmov x10, d1
; CHECK-ISEL-NEXT: fmov x11, d0
; CHECK-ISEL-NEXT: cmp x9, x8
; CHECK-ISEL-NEXT: csel x8, x9, x8, gt
; CHECK-ISEL-NEXT: cmp x11, x10
; CHECK-ISEL-NEXT: csel x9, x11, x10, gt
; CHECK-ISEL-NEXT: fmov d0, x9
; CHECK-ISEL-NEXT: mov v0.d[1], x8
; CHECK-ISEL-NEXT: cmgt v2.2d, v0.2d, v1.2d
; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-ISEL-NEXT: ret
;
; CHECK-GLOBAL-LABEL: smax2i64:
@ -238,26 +225,10 @@ declare <4 x i64> @llvm.smax.v4i64(<4 x i64> %a, <4 x i64> %b) readnone
define void @smax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
; CHECK-ISEL-LABEL: smax4i64:
; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: mov x8, v2.d[1]
; CHECK-ISEL-NEXT: mov x9, v0.d[1]
; CHECK-ISEL-NEXT: fmov x10, d2
; CHECK-ISEL-NEXT: fmov x11, d0
; CHECK-ISEL-NEXT: cmp x9, x8
; CHECK-ISEL-NEXT: csel x8, x9, x8, gt
; CHECK-ISEL-NEXT: cmp x11, x10
; CHECK-ISEL-NEXT: mov x9, v3.d[1]
; CHECK-ISEL-NEXT: csel x10, x11, x10, gt
; CHECK-ISEL-NEXT: mov x11, v1.d[1]
; CHECK-ISEL-NEXT: cmp x11, x9
; CHECK-ISEL-NEXT: fmov d0, x10
; CHECK-ISEL-NEXT: fmov x10, d3
; CHECK-ISEL-NEXT: csel x9, x11, x9, gt
; CHECK-ISEL-NEXT: fmov x11, d1
; CHECK-ISEL-NEXT: cmp x11, x10
; CHECK-ISEL-NEXT: csel x10, x11, x10, gt
; CHECK-ISEL-NEXT: fmov d1, x10
; CHECK-ISEL-NEXT: mov v0.d[1], x8
; CHECK-ISEL-NEXT: mov v1.d[1], x9
; CHECK-ISEL-NEXT: cmgt v4.2d, v0.2d, v2.2d
; CHECK-ISEL-NEXT: cmgt v5.2d, v1.2d, v3.2d
; CHECK-ISEL-NEXT: bif v0.16b, v2.16b, v4.16b
; CHECK-ISEL-NEXT: bif v1.16b, v3.16b, v5.16b
; CHECK-ISEL-NEXT: stp q0, q1, [x0]
; CHECK-ISEL-NEXT: ret
;
@ -457,13 +428,8 @@ declare <1 x i64> @llvm.umax.v1i64(<1 x i64> %a, <1 x i64> %b) readnone
define <1 x i64> @umax1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-ISEL-LABEL: umax1i64:
; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-ISEL-NEXT: fmov x8, d1
; CHECK-ISEL-NEXT: fmov x9, d0
; CHECK-ISEL-NEXT: cmp x9, x8
; CHECK-ISEL-NEXT: csel x8, x9, x8, hi
; CHECK-ISEL-NEXT: fmov d0, x8
; CHECK-ISEL-NEXT: cmhi d2, d0, d1
; CHECK-ISEL-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-ISEL-NEXT: ret
;
; CHECK-GLOBAL-LABEL: umax1i64:
@ -482,8 +448,8 @@ declare <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
define <2 x i64> @umax2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-ISEL-LABEL: umax2i64:
; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: uqsub v1.2d, v1.2d, v0.2d
; CHECK-ISEL-NEXT: add v0.2d, v0.2d, v1.2d
; CHECK-ISEL-NEXT: cmhi v2.2d, v0.2d, v1.2d
; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-ISEL-NEXT: ret
;
; CHECK-GLOBAL-LABEL: umax2i64:
@ -502,10 +468,10 @@ declare <4 x i64> @llvm.umax.v4i64(<4 x i64> %a, <4 x i64> %b) readnone
define void @umax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
; CHECK-ISEL-LABEL: umax4i64:
; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: uqsub v2.2d, v2.2d, v0.2d
; CHECK-ISEL-NEXT: uqsub v3.2d, v3.2d, v1.2d
; CHECK-ISEL-NEXT: add v0.2d, v0.2d, v2.2d
; CHECK-ISEL-NEXT: add v1.2d, v1.2d, v3.2d
; CHECK-ISEL-NEXT: cmhi v4.2d, v0.2d, v2.2d
; CHECK-ISEL-NEXT: cmhi v5.2d, v1.2d, v3.2d
; CHECK-ISEL-NEXT: bif v0.16b, v2.16b, v4.16b
; CHECK-ISEL-NEXT: bif v1.16b, v3.16b, v5.16b
; CHECK-ISEL-NEXT: stp q0, q1, [x0]
; CHECK-ISEL-NEXT: ret
;
@ -705,13 +671,8 @@ declare <1 x i64> @llvm.smin.v1i64(<1 x i64> %a, <1 x i64> %b) readnone
define <1 x i64> @smin1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-ISEL-LABEL: smin1i64:
; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-ISEL-NEXT: fmov x8, d1
; CHECK-ISEL-NEXT: fmov x9, d0
; CHECK-ISEL-NEXT: cmp x9, x8
; CHECK-ISEL-NEXT: csel x8, x9, x8, lt
; CHECK-ISEL-NEXT: fmov d0, x8
; CHECK-ISEL-NEXT: cmgt d2, d1, d0
; CHECK-ISEL-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-ISEL-NEXT: ret
;
; CHECK-GLOBAL-LABEL: smin1i64:
@ -730,16 +691,8 @@ declare <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
define <2 x i64> @smin2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-ISEL-LABEL: smin2i64:
; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: mov x8, v1.d[1]
; CHECK-ISEL-NEXT: mov x9, v0.d[1]
; CHECK-ISEL-NEXT: fmov x10, d1
; CHECK-ISEL-NEXT: fmov x11, d0
; CHECK-ISEL-NEXT: cmp x9, x8
; CHECK-ISEL-NEXT: csel x8, x9, x8, lt
; CHECK-ISEL-NEXT: cmp x11, x10
; CHECK-ISEL-NEXT: csel x9, x11, x10, lt
; CHECK-ISEL-NEXT: fmov d0, x9
; CHECK-ISEL-NEXT: mov v0.d[1], x8
; CHECK-ISEL-NEXT: cmgt v2.2d, v1.2d, v0.2d
; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-ISEL-NEXT: ret
;
; CHECK-GLOBAL-LABEL: smin2i64:
@ -758,26 +711,10 @@ declare <4 x i64> @llvm.smin.v4i64(<4 x i64> %a, <4 x i64> %b) readnone
define void @smin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
; CHECK-ISEL-LABEL: smin4i64:
; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: mov x8, v2.d[1]
; CHECK-ISEL-NEXT: mov x9, v0.d[1]
; CHECK-ISEL-NEXT: fmov x10, d2
; CHECK-ISEL-NEXT: fmov x11, d0
; CHECK-ISEL-NEXT: cmp x9, x8
; CHECK-ISEL-NEXT: csel x8, x9, x8, lt
; CHECK-ISEL-NEXT: cmp x11, x10
; CHECK-ISEL-NEXT: mov x9, v3.d[1]
; CHECK-ISEL-NEXT: csel x10, x11, x10, lt
; CHECK-ISEL-NEXT: mov x11, v1.d[1]
; CHECK-ISEL-NEXT: cmp x11, x9
; CHECK-ISEL-NEXT: fmov d0, x10
; CHECK-ISEL-NEXT: fmov x10, d3
; CHECK-ISEL-NEXT: csel x9, x11, x9, lt
; CHECK-ISEL-NEXT: fmov x11, d1
; CHECK-ISEL-NEXT: cmp x11, x10
; CHECK-ISEL-NEXT: csel x10, x11, x10, lt
; CHECK-ISEL-NEXT: fmov d1, x10
; CHECK-ISEL-NEXT: mov v0.d[1], x8
; CHECK-ISEL-NEXT: mov v1.d[1], x9
; CHECK-ISEL-NEXT: cmgt v4.2d, v2.2d, v0.2d
; CHECK-ISEL-NEXT: cmgt v5.2d, v3.2d, v1.2d
; CHECK-ISEL-NEXT: bif v0.16b, v2.16b, v4.16b
; CHECK-ISEL-NEXT: bif v1.16b, v3.16b, v5.16b
; CHECK-ISEL-NEXT: stp q0, q1, [x0]
; CHECK-ISEL-NEXT: ret
;
@ -977,13 +914,8 @@ declare <1 x i64> @llvm.umin.v1i64(<1 x i64> %a, <1 x i64> %b) readnone
define <1 x i64> @umin1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-ISEL-LABEL: umin1i64:
; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-ISEL-NEXT: fmov x8, d1
; CHECK-ISEL-NEXT: fmov x9, d0
; CHECK-ISEL-NEXT: cmp x9, x8
; CHECK-ISEL-NEXT: csel x8, x9, x8, lo
; CHECK-ISEL-NEXT: fmov d0, x8
; CHECK-ISEL-NEXT: cmhi d2, d1, d0
; CHECK-ISEL-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-ISEL-NEXT: ret
;
; CHECK-GLOBAL-LABEL: umin1i64:
@ -1002,8 +934,8 @@ declare <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
define <2 x i64> @umin2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-ISEL-LABEL: umin2i64:
; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: uqsub v1.2d, v0.2d, v1.2d
; CHECK-ISEL-NEXT: sub v0.2d, v0.2d, v1.2d
; CHECK-ISEL-NEXT: cmhi v2.2d, v1.2d, v0.2d
; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-ISEL-NEXT: ret
;
; CHECK-GLOBAL-LABEL: umin2i64:
@ -1022,10 +954,10 @@ declare <4 x i64> @llvm.umin.v4i64(<4 x i64> %a, <4 x i64> %b) readnone
define void @umin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
; CHECK-ISEL-LABEL: umin4i64:
; CHECK-ISEL: // %bb.0:
; CHECK-ISEL-NEXT: uqsub v2.2d, v0.2d, v2.2d
; CHECK-ISEL-NEXT: uqsub v3.2d, v1.2d, v3.2d
; CHECK-ISEL-NEXT: sub v0.2d, v0.2d, v2.2d
; CHECK-ISEL-NEXT: sub v1.2d, v1.2d, v3.2d
; CHECK-ISEL-NEXT: cmhi v4.2d, v2.2d, v0.2d
; CHECK-ISEL-NEXT: cmhi v5.2d, v3.2d, v1.2d
; CHECK-ISEL-NEXT: bif v0.16b, v2.16b, v4.16b
; CHECK-ISEL-NEXT: bif v1.16b, v3.16b, v5.16b
; CHECK-ISEL-NEXT: stp q0, q1, [x0]
; CHECK-ISEL-NEXT: ret
;

View File

@ -160,10 +160,10 @@ define <2 x i64> @t14(<2 x i64> %a, <2 x i64> %b) {
define <4 x i64> @t15(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: t15:
; CHECK: // %bb.0:
; CHECK-NEXT: cmhs v4.2d, v3.2d, v1.2d
; CHECK-NEXT: cmhs v5.2d, v2.2d, v0.2d
; CHECK-NEXT: bif v0.16b, v2.16b, v5.16b
; CHECK-NEXT: bif v1.16b, v3.16b, v4.16b
; CHECK-NEXT: cmhi v4.2d, v2.2d, v0.2d
; CHECK-NEXT: cmhi v5.2d, v3.2d, v1.2d
; CHECK-NEXT: bif v0.16b, v2.16b, v4.16b
; CHECK-NEXT: bif v1.16b, v3.16b, v5.16b
; CHECK-NEXT: ret
%t1 = icmp ule <4 x i64> %a, %b
%t2 = select <4 x i1> %t1, <4 x i64> %a, <4 x i64> %b

View File

@ -87,11 +87,11 @@ define i128 @test_v1i128(<1 x i128> %a) nounwind {
define i64 @test_v2i64(<2 x i64> %a) nounwind {
; CHECK-LABEL: test_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, v0.d[1]
; CHECK-NEXT: fmov x9, d0
; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: csel x0, x9, x8, hi
; CHECK-NEXT: ret
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: cmhi d2, d0, d1
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%b = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a)
ret i64 %b
}