forked from OSchip/llvm-project
[AArch64] Optimise min/max lowering in ISel
Differential Revision: https://reviews.llvm.org/D106561
This commit is contained in:
parent
9988ab3989
commit
b01417d3c5
|
@ -1040,6 +1040,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom);
|
||||
setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
|
||||
setOperationAction(ISD::BITREVERSE, MVT::v2i64, Custom);
|
||||
for (auto VT : {MVT::v1i64, MVT::v2i64}) {
|
||||
setOperationAction(ISD::UMAX, VT, Custom);
|
||||
setOperationAction(ISD::SMAX, VT, Custom);
|
||||
setOperationAction(ISD::UMIN, VT, Custom);
|
||||
setOperationAction(ISD::SMIN, VT, Custom);
|
||||
}
|
||||
|
||||
// AArch64 doesn't have MUL.2d:
|
||||
setOperationAction(ISD::MUL, MVT::v2i64, Expand);
|
||||
|
@ -4825,17 +4831,10 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
|
|||
case ISD::UDIV:
|
||||
return LowerDIV(Op, DAG);
|
||||
case ISD::SMIN:
|
||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
|
||||
/*OverrideNEON=*/true);
|
||||
case ISD::UMIN:
|
||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
|
||||
/*OverrideNEON=*/true);
|
||||
case ISD::SMAX:
|
||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
|
||||
/*OverrideNEON=*/true);
|
||||
case ISD::UMAX:
|
||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
|
||||
/*OverrideNEON=*/true);
|
||||
return LowerMinMax(Op, DAG);
|
||||
case ISD::SRA:
|
||||
case ISD::SRL:
|
||||
case ISD::SHL:
|
||||
|
@ -7131,6 +7130,56 @@ SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
|
|||
return DAG.getNode(ISD::CTLZ, DL, VT, RBIT);
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::LowerMinMax(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
|
||||
EVT VT = Op.getValueType();
|
||||
SDLoc DL(Op);
|
||||
unsigned Opcode = Op.getOpcode();
|
||||
ISD::CondCode CC;
|
||||
switch (Opcode) {
|
||||
default:
|
||||
llvm_unreachable("Wrong instruction");
|
||||
case ISD::SMAX:
|
||||
CC = ISD::SETGT;
|
||||
break;
|
||||
case ISD::SMIN:
|
||||
CC = ISD::SETLT;
|
||||
break;
|
||||
case ISD::UMAX:
|
||||
CC = ISD::SETUGT;
|
||||
break;
|
||||
case ISD::UMIN:
|
||||
CC = ISD::SETULT;
|
||||
break;
|
||||
}
|
||||
|
||||
if (VT.isScalableVector() ||
|
||||
useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true)) {
|
||||
switch (Opcode) {
|
||||
default:
|
||||
llvm_unreachable("Wrong instruction");
|
||||
case ISD::SMAX:
|
||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
|
||||
/*OverrideNEON=*/true);
|
||||
case ISD::SMIN:
|
||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
|
||||
/*OverrideNEON=*/true);
|
||||
case ISD::UMAX:
|
||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
|
||||
/*OverrideNEON=*/true);
|
||||
case ISD::UMIN:
|
||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
|
||||
/*OverrideNEON=*/true);
|
||||
}
|
||||
}
|
||||
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
SDValue Op1 = Op.getOperand(1);
|
||||
SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC);
|
||||
return DAG.getSelect(DL, VT, Cond, Op0, Op1);
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
EVT VT = Op.getValueType();
|
||||
|
|
|
@ -966,6 +966,7 @@ private:
|
|||
SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
|
|
@ -220,19 +220,15 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
|
|||
auto *RetTy = ICA.getReturnType();
|
||||
switch (ICA.getID()) {
|
||||
case Intrinsic::umin:
|
||||
case Intrinsic::umax: {
|
||||
auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
|
||||
// umin(x,y) -> sub(x,usubsat(x,y))
|
||||
// umax(x,y) -> add(x,usubsat(y,x))
|
||||
if (LT.second == MVT::v2i64)
|
||||
return LT.first * 2;
|
||||
LLVM_FALLTHROUGH;
|
||||
}
|
||||
case Intrinsic::umax:
|
||||
case Intrinsic::smin:
|
||||
case Intrinsic::smax: {
|
||||
static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
|
||||
MVT::v8i16, MVT::v2i32, MVT::v4i32};
|
||||
auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
|
||||
// v2i64 types get converted to cmp+bif hence the cost of 2
|
||||
if (LT.second == MVT::v2i64)
|
||||
return LT.first * 2;
|
||||
if (any_of(ValidMinMaxTys, [<](MVT M) { return M == LT.second; }))
|
||||
return LT.first;
|
||||
break;
|
||||
|
|
|
@ -96,8 +96,8 @@ define void @reduce_smin() {
|
|||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V4i64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
%V1i8 = call <1 x i8> @llvm.smin.v1i8(<1 x i8> undef, <1 x i8> undef)
|
||||
|
@ -135,8 +135,8 @@ define void @reduce_smax() {
|
|||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V4i64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef)
|
||||
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
|
||||
;
|
||||
%V1i8 = call <1 x i8> @llvm.smax.v1i8(<1 x i8> undef, <1 x i8> undef)
|
||||
|
|
|
@ -185,13 +185,8 @@ declare <1 x i64> @llvm.smax.v1i64(<1 x i64> %a, <1 x i64> %b) readnone
|
|||
define <1 x i64> @smax1i64(<1 x i64> %a, <1 x i64> %b) {
|
||||
; CHECK-ISEL-LABEL: smax1i64:
|
||||
; CHECK-ISEL: // %bb.0:
|
||||
; CHECK-ISEL-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-ISEL-NEXT: fmov x8, d1
|
||||
; CHECK-ISEL-NEXT: fmov x9, d0
|
||||
; CHECK-ISEL-NEXT: cmp x9, x8
|
||||
; CHECK-ISEL-NEXT: csel x8, x9, x8, gt
|
||||
; CHECK-ISEL-NEXT: fmov d0, x8
|
||||
; CHECK-ISEL-NEXT: cmgt d2, d0, d1
|
||||
; CHECK-ISEL-NEXT: bif v0.8b, v1.8b, v2.8b
|
||||
; CHECK-ISEL-NEXT: ret
|
||||
;
|
||||
; CHECK-GLOBAL-LABEL: smax1i64:
|
||||
|
@ -210,16 +205,8 @@ declare <2 x i64> @llvm.smax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
|
|||
define <2 x i64> @smax2i64(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-ISEL-LABEL: smax2i64:
|
||||
; CHECK-ISEL: // %bb.0:
|
||||
; CHECK-ISEL-NEXT: mov x8, v1.d[1]
|
||||
; CHECK-ISEL-NEXT: mov x9, v0.d[1]
|
||||
; CHECK-ISEL-NEXT: fmov x10, d1
|
||||
; CHECK-ISEL-NEXT: fmov x11, d0
|
||||
; CHECK-ISEL-NEXT: cmp x9, x8
|
||||
; CHECK-ISEL-NEXT: csel x8, x9, x8, gt
|
||||
; CHECK-ISEL-NEXT: cmp x11, x10
|
||||
; CHECK-ISEL-NEXT: csel x9, x11, x10, gt
|
||||
; CHECK-ISEL-NEXT: fmov d0, x9
|
||||
; CHECK-ISEL-NEXT: mov v0.d[1], x8
|
||||
; CHECK-ISEL-NEXT: cmgt v2.2d, v0.2d, v1.2d
|
||||
; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||
; CHECK-ISEL-NEXT: ret
|
||||
;
|
||||
; CHECK-GLOBAL-LABEL: smax2i64:
|
||||
|
@ -238,26 +225,10 @@ declare <4 x i64> @llvm.smax.v4i64(<4 x i64> %a, <4 x i64> %b) readnone
|
|||
define void @smax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
|
||||
; CHECK-ISEL-LABEL: smax4i64:
|
||||
; CHECK-ISEL: // %bb.0:
|
||||
; CHECK-ISEL-NEXT: mov x8, v2.d[1]
|
||||
; CHECK-ISEL-NEXT: mov x9, v0.d[1]
|
||||
; CHECK-ISEL-NEXT: fmov x10, d2
|
||||
; CHECK-ISEL-NEXT: fmov x11, d0
|
||||
; CHECK-ISEL-NEXT: cmp x9, x8
|
||||
; CHECK-ISEL-NEXT: csel x8, x9, x8, gt
|
||||
; CHECK-ISEL-NEXT: cmp x11, x10
|
||||
; CHECK-ISEL-NEXT: mov x9, v3.d[1]
|
||||
; CHECK-ISEL-NEXT: csel x10, x11, x10, gt
|
||||
; CHECK-ISEL-NEXT: mov x11, v1.d[1]
|
||||
; CHECK-ISEL-NEXT: cmp x11, x9
|
||||
; CHECK-ISEL-NEXT: fmov d0, x10
|
||||
; CHECK-ISEL-NEXT: fmov x10, d3
|
||||
; CHECK-ISEL-NEXT: csel x9, x11, x9, gt
|
||||
; CHECK-ISEL-NEXT: fmov x11, d1
|
||||
; CHECK-ISEL-NEXT: cmp x11, x10
|
||||
; CHECK-ISEL-NEXT: csel x10, x11, x10, gt
|
||||
; CHECK-ISEL-NEXT: fmov d1, x10
|
||||
; CHECK-ISEL-NEXT: mov v0.d[1], x8
|
||||
; CHECK-ISEL-NEXT: mov v1.d[1], x9
|
||||
; CHECK-ISEL-NEXT: cmgt v4.2d, v0.2d, v2.2d
|
||||
; CHECK-ISEL-NEXT: cmgt v5.2d, v1.2d, v3.2d
|
||||
; CHECK-ISEL-NEXT: bif v0.16b, v2.16b, v4.16b
|
||||
; CHECK-ISEL-NEXT: bif v1.16b, v3.16b, v5.16b
|
||||
; CHECK-ISEL-NEXT: stp q0, q1, [x0]
|
||||
; CHECK-ISEL-NEXT: ret
|
||||
;
|
||||
|
@ -457,13 +428,8 @@ declare <1 x i64> @llvm.umax.v1i64(<1 x i64> %a, <1 x i64> %b) readnone
|
|||
define <1 x i64> @umax1i64(<1 x i64> %a, <1 x i64> %b) {
|
||||
; CHECK-ISEL-LABEL: umax1i64:
|
||||
; CHECK-ISEL: // %bb.0:
|
||||
; CHECK-ISEL-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-ISEL-NEXT: fmov x8, d1
|
||||
; CHECK-ISEL-NEXT: fmov x9, d0
|
||||
; CHECK-ISEL-NEXT: cmp x9, x8
|
||||
; CHECK-ISEL-NEXT: csel x8, x9, x8, hi
|
||||
; CHECK-ISEL-NEXT: fmov d0, x8
|
||||
; CHECK-ISEL-NEXT: cmhi d2, d0, d1
|
||||
; CHECK-ISEL-NEXT: bif v0.8b, v1.8b, v2.8b
|
||||
; CHECK-ISEL-NEXT: ret
|
||||
;
|
||||
; CHECK-GLOBAL-LABEL: umax1i64:
|
||||
|
@ -482,8 +448,8 @@ declare <2 x i64> @llvm.umax.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
|
|||
define <2 x i64> @umax2i64(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-ISEL-LABEL: umax2i64:
|
||||
; CHECK-ISEL: // %bb.0:
|
||||
; CHECK-ISEL-NEXT: uqsub v1.2d, v1.2d, v0.2d
|
||||
; CHECK-ISEL-NEXT: add v0.2d, v0.2d, v1.2d
|
||||
; CHECK-ISEL-NEXT: cmhi v2.2d, v0.2d, v1.2d
|
||||
; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||
; CHECK-ISEL-NEXT: ret
|
||||
;
|
||||
; CHECK-GLOBAL-LABEL: umax2i64:
|
||||
|
@ -502,10 +468,10 @@ declare <4 x i64> @llvm.umax.v4i64(<4 x i64> %a, <4 x i64> %b) readnone
|
|||
define void @umax4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
|
||||
; CHECK-ISEL-LABEL: umax4i64:
|
||||
; CHECK-ISEL: // %bb.0:
|
||||
; CHECK-ISEL-NEXT: uqsub v2.2d, v2.2d, v0.2d
|
||||
; CHECK-ISEL-NEXT: uqsub v3.2d, v3.2d, v1.2d
|
||||
; CHECK-ISEL-NEXT: add v0.2d, v0.2d, v2.2d
|
||||
; CHECK-ISEL-NEXT: add v1.2d, v1.2d, v3.2d
|
||||
; CHECK-ISEL-NEXT: cmhi v4.2d, v0.2d, v2.2d
|
||||
; CHECK-ISEL-NEXT: cmhi v5.2d, v1.2d, v3.2d
|
||||
; CHECK-ISEL-NEXT: bif v0.16b, v2.16b, v4.16b
|
||||
; CHECK-ISEL-NEXT: bif v1.16b, v3.16b, v5.16b
|
||||
; CHECK-ISEL-NEXT: stp q0, q1, [x0]
|
||||
; CHECK-ISEL-NEXT: ret
|
||||
;
|
||||
|
@ -705,13 +671,8 @@ declare <1 x i64> @llvm.smin.v1i64(<1 x i64> %a, <1 x i64> %b) readnone
|
|||
define <1 x i64> @smin1i64(<1 x i64> %a, <1 x i64> %b) {
|
||||
; CHECK-ISEL-LABEL: smin1i64:
|
||||
; CHECK-ISEL: // %bb.0:
|
||||
; CHECK-ISEL-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-ISEL-NEXT: fmov x8, d1
|
||||
; CHECK-ISEL-NEXT: fmov x9, d0
|
||||
; CHECK-ISEL-NEXT: cmp x9, x8
|
||||
; CHECK-ISEL-NEXT: csel x8, x9, x8, lt
|
||||
; CHECK-ISEL-NEXT: fmov d0, x8
|
||||
; CHECK-ISEL-NEXT: cmgt d2, d1, d0
|
||||
; CHECK-ISEL-NEXT: bif v0.8b, v1.8b, v2.8b
|
||||
; CHECK-ISEL-NEXT: ret
|
||||
;
|
||||
; CHECK-GLOBAL-LABEL: smin1i64:
|
||||
|
@ -730,16 +691,8 @@ declare <2 x i64> @llvm.smin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
|
|||
define <2 x i64> @smin2i64(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-ISEL-LABEL: smin2i64:
|
||||
; CHECK-ISEL: // %bb.0:
|
||||
; CHECK-ISEL-NEXT: mov x8, v1.d[1]
|
||||
; CHECK-ISEL-NEXT: mov x9, v0.d[1]
|
||||
; CHECK-ISEL-NEXT: fmov x10, d1
|
||||
; CHECK-ISEL-NEXT: fmov x11, d0
|
||||
; CHECK-ISEL-NEXT: cmp x9, x8
|
||||
; CHECK-ISEL-NEXT: csel x8, x9, x8, lt
|
||||
; CHECK-ISEL-NEXT: cmp x11, x10
|
||||
; CHECK-ISEL-NEXT: csel x9, x11, x10, lt
|
||||
; CHECK-ISEL-NEXT: fmov d0, x9
|
||||
; CHECK-ISEL-NEXT: mov v0.d[1], x8
|
||||
; CHECK-ISEL-NEXT: cmgt v2.2d, v1.2d, v0.2d
|
||||
; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||
; CHECK-ISEL-NEXT: ret
|
||||
;
|
||||
; CHECK-GLOBAL-LABEL: smin2i64:
|
||||
|
@ -758,26 +711,10 @@ declare <4 x i64> @llvm.smin.v4i64(<4 x i64> %a, <4 x i64> %b) readnone
|
|||
define void @smin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
|
||||
; CHECK-ISEL-LABEL: smin4i64:
|
||||
; CHECK-ISEL: // %bb.0:
|
||||
; CHECK-ISEL-NEXT: mov x8, v2.d[1]
|
||||
; CHECK-ISEL-NEXT: mov x9, v0.d[1]
|
||||
; CHECK-ISEL-NEXT: fmov x10, d2
|
||||
; CHECK-ISEL-NEXT: fmov x11, d0
|
||||
; CHECK-ISEL-NEXT: cmp x9, x8
|
||||
; CHECK-ISEL-NEXT: csel x8, x9, x8, lt
|
||||
; CHECK-ISEL-NEXT: cmp x11, x10
|
||||
; CHECK-ISEL-NEXT: mov x9, v3.d[1]
|
||||
; CHECK-ISEL-NEXT: csel x10, x11, x10, lt
|
||||
; CHECK-ISEL-NEXT: mov x11, v1.d[1]
|
||||
; CHECK-ISEL-NEXT: cmp x11, x9
|
||||
; CHECK-ISEL-NEXT: fmov d0, x10
|
||||
; CHECK-ISEL-NEXT: fmov x10, d3
|
||||
; CHECK-ISEL-NEXT: csel x9, x11, x9, lt
|
||||
; CHECK-ISEL-NEXT: fmov x11, d1
|
||||
; CHECK-ISEL-NEXT: cmp x11, x10
|
||||
; CHECK-ISEL-NEXT: csel x10, x11, x10, lt
|
||||
; CHECK-ISEL-NEXT: fmov d1, x10
|
||||
; CHECK-ISEL-NEXT: mov v0.d[1], x8
|
||||
; CHECK-ISEL-NEXT: mov v1.d[1], x9
|
||||
; CHECK-ISEL-NEXT: cmgt v4.2d, v2.2d, v0.2d
|
||||
; CHECK-ISEL-NEXT: cmgt v5.2d, v3.2d, v1.2d
|
||||
; CHECK-ISEL-NEXT: bif v0.16b, v2.16b, v4.16b
|
||||
; CHECK-ISEL-NEXT: bif v1.16b, v3.16b, v5.16b
|
||||
; CHECK-ISEL-NEXT: stp q0, q1, [x0]
|
||||
; CHECK-ISEL-NEXT: ret
|
||||
;
|
||||
|
@ -977,13 +914,8 @@ declare <1 x i64> @llvm.umin.v1i64(<1 x i64> %a, <1 x i64> %b) readnone
|
|||
define <1 x i64> @umin1i64(<1 x i64> %a, <1 x i64> %b) {
|
||||
; CHECK-ISEL-LABEL: umin1i64:
|
||||
; CHECK-ISEL: // %bb.0:
|
||||
; CHECK-ISEL-NEXT: // kill: def $d1 killed $d1 def $q1
|
||||
; CHECK-ISEL-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-ISEL-NEXT: fmov x8, d1
|
||||
; CHECK-ISEL-NEXT: fmov x9, d0
|
||||
; CHECK-ISEL-NEXT: cmp x9, x8
|
||||
; CHECK-ISEL-NEXT: csel x8, x9, x8, lo
|
||||
; CHECK-ISEL-NEXT: fmov d0, x8
|
||||
; CHECK-ISEL-NEXT: cmhi d2, d1, d0
|
||||
; CHECK-ISEL-NEXT: bif v0.8b, v1.8b, v2.8b
|
||||
; CHECK-ISEL-NEXT: ret
|
||||
;
|
||||
; CHECK-GLOBAL-LABEL: umin1i64:
|
||||
|
@ -1002,8 +934,8 @@ declare <2 x i64> @llvm.umin.v2i64(<2 x i64> %a, <2 x i64> %b) readnone
|
|||
define <2 x i64> @umin2i64(<2 x i64> %a, <2 x i64> %b) {
|
||||
; CHECK-ISEL-LABEL: umin2i64:
|
||||
; CHECK-ISEL: // %bb.0:
|
||||
; CHECK-ISEL-NEXT: uqsub v1.2d, v0.2d, v1.2d
|
||||
; CHECK-ISEL-NEXT: sub v0.2d, v0.2d, v1.2d
|
||||
; CHECK-ISEL-NEXT: cmhi v2.2d, v1.2d, v0.2d
|
||||
; CHECK-ISEL-NEXT: bif v0.16b, v1.16b, v2.16b
|
||||
; CHECK-ISEL-NEXT: ret
|
||||
;
|
||||
; CHECK-GLOBAL-LABEL: umin2i64:
|
||||
|
@ -1022,10 +954,10 @@ declare <4 x i64> @llvm.umin.v4i64(<4 x i64> %a, <4 x i64> %b) readnone
|
|||
define void @umin4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64>* %p) {
|
||||
; CHECK-ISEL-LABEL: umin4i64:
|
||||
; CHECK-ISEL: // %bb.0:
|
||||
; CHECK-ISEL-NEXT: uqsub v2.2d, v0.2d, v2.2d
|
||||
; CHECK-ISEL-NEXT: uqsub v3.2d, v1.2d, v3.2d
|
||||
; CHECK-ISEL-NEXT: sub v0.2d, v0.2d, v2.2d
|
||||
; CHECK-ISEL-NEXT: sub v1.2d, v1.2d, v3.2d
|
||||
; CHECK-ISEL-NEXT: cmhi v4.2d, v2.2d, v0.2d
|
||||
; CHECK-ISEL-NEXT: cmhi v5.2d, v3.2d, v1.2d
|
||||
; CHECK-ISEL-NEXT: bif v0.16b, v2.16b, v4.16b
|
||||
; CHECK-ISEL-NEXT: bif v1.16b, v3.16b, v5.16b
|
||||
; CHECK-ISEL-NEXT: stp q0, q1, [x0]
|
||||
; CHECK-ISEL-NEXT: ret
|
||||
;
|
||||
|
|
|
@ -160,10 +160,10 @@ define <2 x i64> @t14(<2 x i64> %a, <2 x i64> %b) {
|
|||
define <4 x i64> @t15(<4 x i64> %a, <4 x i64> %b) {
|
||||
; CHECK-LABEL: t15:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: cmhs v4.2d, v3.2d, v1.2d
|
||||
; CHECK-NEXT: cmhs v5.2d, v2.2d, v0.2d
|
||||
; CHECK-NEXT: bif v0.16b, v2.16b, v5.16b
|
||||
; CHECK-NEXT: bif v1.16b, v3.16b, v4.16b
|
||||
; CHECK-NEXT: cmhi v4.2d, v2.2d, v0.2d
|
||||
; CHECK-NEXT: cmhi v5.2d, v3.2d, v1.2d
|
||||
; CHECK-NEXT: bif v0.16b, v2.16b, v4.16b
|
||||
; CHECK-NEXT: bif v1.16b, v3.16b, v5.16b
|
||||
; CHECK-NEXT: ret
|
||||
%t1 = icmp ule <4 x i64> %a, %b
|
||||
%t2 = select <4 x i1> %t1, <4 x i64> %a, <4 x i64> %b
|
||||
|
|
|
@ -87,11 +87,11 @@ define i128 @test_v1i128(<1 x i128> %a) nounwind {
|
|||
define i64 @test_v2i64(<2 x i64> %a) nounwind {
|
||||
; CHECK-LABEL: test_v2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov x8, v0.d[1]
|
||||
; CHECK-NEXT: fmov x9, d0
|
||||
; CHECK-NEXT: cmp x9, x8
|
||||
; CHECK-NEXT: csel x0, x9, x8, hi
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
|
||||
; CHECK-NEXT: cmhi d2, d0, d1
|
||||
; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%b = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a)
|
||||
ret i64 %b
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue