[SVE][CodeGen] Lower scalable integer vector reductions

This patch uses the existing LowerFixedLengthReductionToSVE function to also lower
scalable vector reductions. A separate function has been added to lower VECREDUCE_AND
& VECREDUCE_OR operations with predicate types using ptest.

Lowering scalable floating-point reductions will be addressed in a follow up patch,
for now these will hit the assertion added to expandVecReduce() in TargetLowering.

Reviewed By: paulwalker-arm

Differential Revision: https://reviews.llvm.org/D89382
This commit is contained in:
Kerry McLaughlin 2020-11-04 11:08:10 +00:00
parent f202d32216
commit f2412d372d
10 changed files with 1284 additions and 28 deletions

View File

@ -20857,7 +20857,7 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
unsigned Opcode = N->getOpcode();
// VECREDUCE over 1-element vector is just an extract.
if (VT.getVectorNumElements() == 1) {
if (VT.getVectorElementCount().isScalar()) {
SDLoc dl(N);
SDValue Res =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,

View File

@ -3323,6 +3323,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
SDValue InVec = Op.getOperand(0);
SDValue EltNo = Op.getOperand(1);
EVT VecVT = InVec.getValueType();
// computeKnownBits not yet implemented for scalable vectors.
if (VecVT.isScalableVector())
break;
const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
const unsigned NumSrcElts = VecVT.getVectorNumElements();
@ -4809,6 +4812,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::VSCALE:
assert(VT == Operand.getValueType() && "Unexpected VT!");
break;
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
if (Operand.getValueType().getScalarType() == MVT::i1)
return getNode(ISD::VECREDUCE_OR, DL, VT, Operand);
break;
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_UMIN:
if (Operand.getValueType().getScalarType() == MVT::i1)
return getNode(ISD::VECREDUCE_AND, DL, VT, Operand);
break;
}
SDNode *N;
@ -5318,10 +5331,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::MULHS:
case ISD::SDIV:
case ISD::SREM:
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
case ISD::SADDSAT:
case ISD::SSUBSAT:
case ISD::UADDSAT:
@ -5330,6 +5339,22 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
break;
case ISD::SMIN:
case ISD::UMAX:
assert(VT.isInteger() && "This operator does not apply to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
return getNode(ISD::OR, DL, VT, N1, N2);
break;
case ISD::SMAX:
case ISD::UMIN:
assert(VT.isInteger() && "This operator does not apply to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
return getNode(ISD::AND, DL, VT, N1, N2);
break;
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:

View File

@ -8000,6 +8000,10 @@ SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
SDValue Op = Node->getOperand(0);
EVT VT = Op.getValueType();
if (VT.isScalableVector())
report_fatal_error(
"Expanding reductions for scalable vectors is undefined.");
// Try to use a shuffle reduction for power of two vectors.
if (VT.isPow2VectorType()) {
while (VT.getVectorNumElements() > 1) {

View File

@ -1013,6 +1013,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
}
// Illegal unpacked integer vector types.
@ -1027,6 +1035,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
// There are no legal MVT::nxv16f## based types.
if (VT != MVT::nxv16i1) {
@ -9815,30 +9826,35 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
Op.getOpcode() == ISD::VECREDUCE_FADD ||
(Op.getOpcode() != ISD::VECREDUCE_ADD &&
SrcVT.getVectorElementType() == MVT::i64);
if (useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) {
if (SrcVT.isScalableVector() ||
useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) {
if (SrcVT.getVectorElementType() == MVT::i1)
return LowerPredReductionToSVE(Op, DAG);
switch (Op.getOpcode()) {
case ISD::VECREDUCE_ADD:
return LowerFixedLengthReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG);
return LowerReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG);
case ISD::VECREDUCE_AND:
return LowerFixedLengthReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG);
return LowerReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG);
case ISD::VECREDUCE_OR:
return LowerFixedLengthReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG);
return LowerReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG);
case ISD::VECREDUCE_SMAX:
return LowerFixedLengthReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG);
return LowerReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG);
case ISD::VECREDUCE_SMIN:
return LowerFixedLengthReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG);
return LowerReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG);
case ISD::VECREDUCE_UMAX:
return LowerFixedLengthReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG);
return LowerReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG);
case ISD::VECREDUCE_UMIN:
return LowerFixedLengthReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG);
return LowerReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG);
case ISD::VECREDUCE_XOR:
return LowerFixedLengthReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG);
return LowerReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG);
case ISD::VECREDUCE_FADD:
return LowerFixedLengthReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG);
return LowerReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG);
case ISD::VECREDUCE_FMAX:
return LowerFixedLengthReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);
return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);
case ISD::VECREDUCE_FMIN:
return LowerFixedLengthReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);
return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);
default:
llvm_unreachable("Unhandled fixed length reduction");
}
@ -16333,20 +16349,56 @@ SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero);
}
SDValue AArch64TargetLowering::LowerFixedLengthReductionToSVE(unsigned Opcode,
SDValue ScalarOp, SelectionDAG &DAG) const {
SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
SelectionDAG &DAG) const {
SDLoc DL(ReduceOp);
SDValue Op = ReduceOp.getOperand(0);
EVT OpVT = Op.getValueType();
EVT VT = ReduceOp.getValueType();
if (!OpVT.isScalableVector() || OpVT.getVectorElementType() != MVT::i1)
return SDValue();
SDValue Pg = getPredicateForVector(DAG, DL, OpVT);
switch (ReduceOp.getOpcode()) {
default:
return SDValue();
case ISD::VECREDUCE_OR:
return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
case ISD::VECREDUCE_AND: {
Op = DAG.getNode(ISD::XOR, DL, OpVT, Op, Pg);
return getPTest(DAG, VT, Pg, Op, AArch64CC::NONE_ACTIVE);
}
case ISD::VECREDUCE_XOR: {
SDValue ID =
DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64);
SDValue Cntp =
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i64, ID, Pg, Op);
return DAG.getAnyExtOrTrunc(Cntp, DL, VT);
}
}
return SDValue();
}
SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
SDValue ScalarOp,
SelectionDAG &DAG) const {
SDLoc DL(ScalarOp);
SDValue VecOp = ScalarOp.getOperand(0);
EVT SrcVT = VecOp.getValueType();
SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
if (useSVEForFixedLengthVectorVT(SrcVT, true)) {
EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
}
// UADDV always returns an i64 result.
EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
SrcVT.getVectorElementType();
SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
SDValue Rdx = DAG.getNode(Opcode, DL, getPackedSVEVectorVT(ResVT), Pg, VecOp);
SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
Rdx, DAG.getConstant(0, DL, MVT::i64));

View File

@ -933,8 +933,9 @@ private:
SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
SDValue LowerFixedLengthReductionToSVE(unsigned Opcode, SDValue ScalarOp,
SelectionDAG &DAG) const;
SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;

View File

@ -0,0 +1,375 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
; WARN-NOT: warning
; ANDV
define i1 @reduce_and_nxv16i1(<vscale x 16 x i1> %vec) {
; CHECK-LABEL: reduce_and_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: not p0.b, p1/z, p0.b
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.and.i1.nxv16i1(<vscale x 16 x i1> %vec)
ret i1 %res
}
define i1 @reduce_and_nxv8i1(<vscale x 8 x i1> %vec) {
; CHECK-LABEL: reduce_and_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: not p0.b, p1/z, p0.b
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.and.i1.nxv8i1(<vscale x 8 x i1> %vec)
ret i1 %res
}
define i1 @reduce_and_nxv4i1(<vscale x 4 x i1> %vec) {
; CHECK-LABEL: reduce_and_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: not p0.b, p1/z, p0.b
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.and.i1.nxv4i1(<vscale x 4 x i1> %vec)
ret i1 %res
}
define i1 @reduce_and_nxv2i1(<vscale x 2 x i1> %vec) {
; CHECK-LABEL: reduce_and_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: not p0.b, p1/z, p0.b
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.and.i1.nxv2i1(<vscale x 2 x i1> %vec)
ret i1 %res
}
; ORV
define i1 @reduce_or_nxv16i1(<vscale x 16 x i1> %vec) {
; CHECK-LABEL: reduce_or_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.or.i1.nxv16i1(<vscale x 16 x i1> %vec)
ret i1 %res
}
define i1 @reduce_or_nxv8i1(<vscale x 8 x i1> %vec) {
; CHECK-LABEL: reduce_or_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.or.i1.nxv8i1(<vscale x 8 x i1> %vec)
ret i1 %res
}
define i1 @reduce_or_nxv4i1(<vscale x 4 x i1> %vec) {
; CHECK-LABEL: reduce_or_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.or.i1.nxv4i1(<vscale x 4 x i1> %vec)
ret i1 %res
}
define i1 @reduce_or_nxv2i1(<vscale x 2 x i1> %vec) {
; CHECK-LABEL: reduce_or_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.or.i1.nxv2i1(<vscale x 2 x i1> %vec)
ret i1 %res
}
; XORV
define i1 @reduce_xor_nxv16i1(<vscale x 16 x i1> %vec) {
; CHECK-LABEL: reduce_xor_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: cntp x8, p1, p0.b
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.xor.i1.nxv16i1(<vscale x 16 x i1> %vec)
ret i1 %res
}
define i1 @reduce_xor_nxv8i1(<vscale x 8 x i1> %vec) {
; CHECK-LABEL: reduce_xor_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: cntp x8, p1, p0.h
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.xor.i1.nxv8i1(<vscale x 8 x i1> %vec)
ret i1 %res
}
define i1 @reduce_xor_nxv4i1(<vscale x 4 x i1> %vec) {
; CHECK-LABEL: reduce_xor_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: cntp x8, p1, p0.s
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.xor.i1.nxv4i1(<vscale x 4 x i1> %vec)
ret i1 %res
}
define i1 @reduce_xor_nxv2i1(<vscale x 2 x i1> %vec) {
; CHECK-LABEL: reduce_xor_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: cntp x8, p1, p0.d
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.xor.i1.nxv2i1(<vscale x 2 x i1> %vec)
ret i1 %res
}
; SMAXV
define i1 @reduce_smax_nxv16i1(<vscale x 16 x i1> %vec) {
; CHECK-LABEL: reduce_smax_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: not p0.b, p1/z, p0.b
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smax.i1.nxv16i1(<vscale x 16 x i1> %vec)
ret i1 %res
}
define i1 @reduce_smax_nxv8i1(<vscale x 8 x i1> %vec) {
; CHECK-LABEL: reduce_smax_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: not p0.b, p1/z, p0.b
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smax.i1.nxv8i1(<vscale x 8 x i1> %vec)
ret i1 %res
}
define i1 @reduce_smax_nxv4i1(<vscale x 4 x i1> %vec) {
; CHECK-LABEL: reduce_smax_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: not p0.b, p1/z, p0.b
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smax.i1.nxv4i1(<vscale x 4 x i1> %vec)
ret i1 %res
}
define i1 @reduce_smax_nxv2i1(<vscale x 2 x i1> %vec) {
; CHECK-LABEL: reduce_smax_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: not p0.b, p1/z, p0.b
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smax.i1.nxv2i1(<vscale x 2 x i1> %vec)
ret i1 %res
}
; SMINV
define i1 @reduce_smin_nxv16i1(<vscale x 16 x i1> %vec) {
; CHECK-LABEL: reduce_smin_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smin.i1.nxv16i1(<vscale x 16 x i1> %vec)
ret i1 %res
}
define i1 @reduce_smin_nxv8i1(<vscale x 8 x i1> %vec) {
; CHECK-LABEL: reduce_smin_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smin.i1.nxv8i1(<vscale x 8 x i1> %vec)
ret i1 %res
}
define i1 @reduce_smin_nxv4i1(<vscale x 4 x i1> %vec) {
; CHECK-LABEL: reduce_smin_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smin.i1.nxv4i1(<vscale x 4 x i1> %vec)
ret i1 %res
}
define i1 @reduce_smin_nxv2i1(<vscale x 2 x i1> %vec) {
; CHECK-LABEL: reduce_smin_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smin.i1.nxv2i1(<vscale x 2 x i1> %vec)
ret i1 %res
}
; UMAXV
define i1 @reduce_umax_nxv16i1(<vscale x 16 x i1> %vec) {
; CHECK-LABEL: reduce_umax_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umax.i1.nxv16i1(<vscale x 16 x i1> %vec)
ret i1 %res
}
define i1 @reduce_umax_nxv8i1(<vscale x 8 x i1> %vec) {
; CHECK-LABEL: reduce_umax_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umax.i1.nxv8i1(<vscale x 8 x i1> %vec)
ret i1 %res
}
define i1 @reduce_umax_nxv4i1(<vscale x 4 x i1> %vec) {
; CHECK-LABEL: reduce_umax_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umax.i1.nxv4i1(<vscale x 4 x i1> %vec)
ret i1 %res
}
define i1 @reduce_umax_nxv2i1(<vscale x 2 x i1> %vec) {
; CHECK-LABEL: reduce_umax_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umax.i1.nxv2i1(<vscale x 2 x i1> %vec)
ret i1 %res
}
; UMINV
define i1 @reduce_umin_nxv16i1(<vscale x 16 x i1> %vec) {
; CHECK-LABEL: reduce_umin_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: not p0.b, p1/z, p0.b
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umin.i1.nxv16i1(<vscale x 16 x i1> %vec)
ret i1 %res
}
define i1 @reduce_umin_nxv8i1(<vscale x 8 x i1> %vec) {
; CHECK-LABEL: reduce_umin_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: not p0.b, p1/z, p0.b
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umin.i1.nxv8i1(<vscale x 8 x i1> %vec)
ret i1 %res
}
define i1 @reduce_umin_nxv4i1(<vscale x 4 x i1> %vec) {
; CHECK-LABEL: reduce_umin_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: not p0.b, p1/z, p0.b
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umin.i1.nxv4i1(<vscale x 4 x i1> %vec)
ret i1 %res
}
define i1 @reduce_umin_nxv2i1(<vscale x 2 x i1> %vec) {
; CHECK-LABEL: reduce_umin_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: not p0.b, p1/z, p0.b
; CHECK-NEXT: ptest p1, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umin.i1.nxv2i1(<vscale x 2 x i1> %vec)
ret i1 %res
}
declare i1 @llvm.vector.reduce.and.i1.nxv16i1(<vscale x 16 x i1> %vec)
declare i1 @llvm.vector.reduce.and.i1.nxv8i1(<vscale x 8 x i1> %vec)
declare i1 @llvm.vector.reduce.and.i1.nxv4i1(<vscale x 4 x i1> %vec)
declare i1 @llvm.vector.reduce.and.i1.nxv2i1(<vscale x 2 x i1> %vec)
declare i1 @llvm.vector.reduce.or.i1.nxv16i1(<vscale x 16 x i1> %vec)
declare i1 @llvm.vector.reduce.or.i1.nxv8i1(<vscale x 8 x i1> %vec)
declare i1 @llvm.vector.reduce.or.i1.nxv4i1(<vscale x 4 x i1> %vec)
declare i1 @llvm.vector.reduce.or.i1.nxv2i1(<vscale x 2 x i1> %vec)
declare i1 @llvm.vector.reduce.xor.i1.nxv16i1(<vscale x 16 x i1> %vec)
declare i1 @llvm.vector.reduce.xor.i1.nxv8i1(<vscale x 8 x i1> %vec)
declare i1 @llvm.vector.reduce.xor.i1.nxv4i1(<vscale x 4 x i1> %vec)
declare i1 @llvm.vector.reduce.xor.i1.nxv2i1(<vscale x 2 x i1> %vec)
declare i1 @llvm.vector.reduce.smin.i1.nxv16i1(<vscale x 16 x i1> %vec)
declare i1 @llvm.vector.reduce.smin.i1.nxv8i1(<vscale x 8 x i1> %vec)
declare i1 @llvm.vector.reduce.smin.i1.nxv4i1(<vscale x 4 x i1> %vec)
declare i1 @llvm.vector.reduce.smin.i1.nxv2i1(<vscale x 2 x i1> %vec)
declare i1 @llvm.vector.reduce.smax.i1.nxv16i1(<vscale x 16 x i1> %vec)
declare i1 @llvm.vector.reduce.smax.i1.nxv8i1(<vscale x 8 x i1> %vec)
declare i1 @llvm.vector.reduce.smax.i1.nxv4i1(<vscale x 4 x i1> %vec)
declare i1 @llvm.vector.reduce.smax.i1.nxv2i1(<vscale x 2 x i1> %vec)
declare i1 @llvm.vector.reduce.umin.i1.nxv16i1(<vscale x 16 x i1> %vec)
declare i1 @llvm.vector.reduce.umin.i1.nxv8i1(<vscale x 8 x i1> %vec)
declare i1 @llvm.vector.reduce.umin.i1.nxv4i1(<vscale x 4 x i1> %vec)
declare i1 @llvm.vector.reduce.umin.i1.nxv2i1(<vscale x 2 x i1> %vec)
declare i1 @llvm.vector.reduce.umax.i1.nxv16i1(<vscale x 16 x i1> %vec)
declare i1 @llvm.vector.reduce.umax.i1.nxv8i1(<vscale x 8 x i1> %vec)
declare i1 @llvm.vector.reduce.umax.i1.nxv4i1(<vscale x 4 x i1> %vec)
declare i1 @llvm.vector.reduce.umax.i1.nxv2i1(<vscale x 2 x i1> %vec)

View File

@ -0,0 +1,417 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
; WARN-NOT: warning
; ANDV
define i8 @andv_nxv16i8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: andv_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: andv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.and.nxv16i8(<vscale x 16 x i8> %a)
ret i8 %res
}
define i16 @andv_nxv8i16(<vscale x 8 x i16> %a) {
; CHECK-LABEL: andv_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: andv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.and.nxv8i16(<vscale x 8 x i16> %a)
ret i16 %res
}
define i32 @andv_nxv4i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: andv_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: andv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %a)
ret i32 %res
}
define i64 @andv_nxv2i64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: andv_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: andv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%res = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %a)
ret i64 %res
}
; ORV
define i8 @orv_nxv16i8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: orv_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: orv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.or.nxv16i8(<vscale x 16 x i8> %a)
ret i8 %res
}
define i16 @orv_nxv8i16(<vscale x 8 x i16> %a) {
; CHECK-LABEL: orv_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: orv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.or.nxv8i16(<vscale x 8 x i16> %a)
ret i16 %res
}
define i32 @orv_nxv4i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: orv_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: orv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %a)
ret i32 %res
}
define i64 @orv_nxv2i64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: orv_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: orv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%res = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %a)
ret i64 %res
}
; XORV
define i8 @xorv_nxv16i8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: xorv_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: eorv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.xor.nxv16i8(<vscale x 16 x i8> %a)
ret i8 %res
}
define i16 @xorv_nxv8i16(<vscale x 8 x i16> %a) {
; CHECK-LABEL: xorv_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: eorv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.xor.nxv8i16(<vscale x 8 x i16> %a)
ret i16 %res
}
define i32 @xorv_nxv4i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: xorv_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: eorv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %a)
ret i32 %res
}
define i64 @xorv_nxv2i64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: xorv_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: eorv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%res = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %a)
ret i64 %res
}
; UADDV
define i8 @uaddv_nxv16i8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: uaddv_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: uaddv d0, p0, z0.b
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> %a)
ret i8 %res
}
define i16 @uaddv_nxv8i16(<vscale x 8 x i16> %a) {
; CHECK-LABEL: uaddv_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: uaddv d0, p0, z0.h
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> %a)
ret i16 %res
}
define i32 @uaddv_nxv4i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: uaddv_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uaddv d0, p0, z0.s
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %a)
ret i32 %res
}
define i64 @uaddv_nxv2i64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: uaddv_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uaddv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%res = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %a)
ret i64 %res
}
; UMINV
define i8 @umin_nxv16i8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: umin_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: uminv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.umin.nxv16i8(<vscale x 16 x i8> %a)
ret i8 %res
}
define i16 @umin_nxv8i16(<vscale x 8 x i16> %a) {
; CHECK-LABEL: umin_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: uminv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.umin.nxv8i16(<vscale x 8 x i16> %a)
ret i16 %res
}
define i32 @umin_nxv4i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: umin_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uminv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %a)
ret i32 %res
}
define i64 @umin_nxv2i64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: umin_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: uminv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%res = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %a)
ret i64 %res
}
; SMINV
define i8 @smin_nxv16i8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: smin_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: sminv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.smin.nxv16i8(<vscale x 16 x i8> %a)
ret i8 %res
}
define i16 @smin_nxv8i16(<vscale x 8 x i16> %a) {
; CHECK-LABEL: smin_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: sminv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.smin.nxv8i16(<vscale x 8 x i16> %a)
ret i16 %res
}
define i32 @smin_nxv4i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: smin_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: sminv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %a)
ret i32 %res
}
define i64 @smin_nxv2i64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: smin_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: sminv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%res = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %a)
ret i64 %res
}
; UMAXV
define i8 @umax_nxv16i8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: umax_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: umaxv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.umax.nxv16i8(<vscale x 16 x i8> %a)
ret i8 %res
}
define i16 @umax_nxv8i16(<vscale x 8 x i16> %a) {
; CHECK-LABEL: umax_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: umaxv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.umax.nxv8i16(<vscale x 8 x i16> %a)
ret i16 %res
}
define i32 @umax_nxv4i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: umax_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: umaxv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %a)
ret i32 %res
}
define i64 @umax_nxv2i64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: umax_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: umaxv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%res = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %a)
ret i64 %res
}
; SMAXV
define i8 @smax_nxv16i8(<vscale x 16 x i8> %a) {
; CHECK-LABEL: smax_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: smaxv b0, p0, z0.b
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.smax.nxv16i8(<vscale x 16 x i8> %a)
ret i8 %res
}
define i16 @smax_nxv8i16(<vscale x 8 x i16> %a) {
; CHECK-LABEL: smax_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: smaxv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.smax.nxv8i16(<vscale x 8 x i16> %a)
ret i16 %res
}
define i32 @smax_nxv4i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: smax_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: smaxv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %a)
ret i32 %res
}
define i64 @smax_nxv2i64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: smax_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: smaxv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%res = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %a)
ret i64 %res
}
declare i8 @llvm.vector.reduce.and.nxv16i8(<vscale x 16 x i8>)
declare i16 @llvm.vector.reduce.and.nxv8i16(<vscale x 8 x i16>)
declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>)
declare i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64>)
declare i8 @llvm.vector.reduce.or.nxv16i8(<vscale x 16 x i8>)
declare i16 @llvm.vector.reduce.or.nxv8i16(<vscale x 8 x i16>)
declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>)
declare i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64>)
declare i8 @llvm.vector.reduce.xor.nxv16i8(<vscale x 16 x i8>)
declare i16 @llvm.vector.reduce.xor.nxv8i16(<vscale x 8 x i16>)
declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>)
declare i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64>)
declare i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8>)
declare i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16>)
declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
declare i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64>)
declare i8 @llvm.vector.reduce.umin.nxv16i8(<vscale x 16 x i8>)
declare i16 @llvm.vector.reduce.umin.nxv8i16(<vscale x 8 x i16>)
declare i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32>)
declare i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64>)
declare i8 @llvm.vector.reduce.smin.nxv16i8(<vscale x 16 x i8>)
declare i16 @llvm.vector.reduce.smin.nxv8i16(<vscale x 8 x i16>)
declare i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32>)
declare i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64>)
declare i8 @llvm.vector.reduce.umax.nxv16i8(<vscale x 16 x i8>)
declare i16 @llvm.vector.reduce.umax.nxv8i16(<vscale x 8 x i16>)
declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>)
declare i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64>)
declare i8 @llvm.vector.reduce.smax.nxv16i8(<vscale x 16 x i8>)
declare i16 @llvm.vector.reduce.smax.nxv8i16(<vscale x 8 x i16>)
declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>)
declare i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64>)

View File

@ -0,0 +1,145 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
; WARN-NOT: warning
; ANDV
define i1 @andv_nxv32i1(<vscale x 32 x i1> %a) {
; CHECK-LABEL: andv_nxv32i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: not p0.b, p2/z, p0.b
; CHECK-NEXT: ptest p2, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> %a)
ret i1 %res
}
define i1 @andv_nxv64i1(<vscale x 64 x i1> %a) {
; CHECK-LABEL: andv_nxv64i1:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p4.b
; CHECK-NEXT: and p1.b, p4/z, p1.b, p3.b
; CHECK-NEXT: and p0.b, p4/z, p0.b, p2.b
; CHECK-NEXT: and p0.b, p4/z, p0.b, p1.b
; CHECK-NEXT: not p0.b, p4/z, p0.b
; CHECK-NEXT: ptest p4, p0.b
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1> %a)
ret i1 %res
}
; ORV
define i1 @orv_nxv32i1(<vscale x 32 x i1> %a) {
; CHECK-LABEL: orv_nxv32i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: orr p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: ptest p2, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.or.nxv32i1(<vscale x 32 x i1> %a)
ret i1 %res
}
; XORV
define i1 @xorv_nxv32i1(<vscale x 32 x i1> %a) {
; CHECK-LABEL: xorv_nxv32i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: cntp x8, p2, p0.b
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.xor.nxv32i1(<vscale x 32 x i1> %a)
ret i1 %res
}
; SMAXV
define i1 @smaxv_nxv32i1(<vscale x 32 x i1> %a) {
; CHECK-LABEL: smaxv_nxv32i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: not p0.b, p2/z, p0.b
; CHECK-NEXT: ptest p2, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smax.nxv32i1(<vscale x 32 x i1> %a)
ret i1 %res
}
; SMINV
define i1 @sminv_nxv32i1(<vscale x 32 x i1> %a) {
; CHECK-LABEL: sminv_nxv32i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: orr p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: ptest p2, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.smin.nxv32i1(<vscale x 32 x i1> %a)
ret i1 %res
}
; UMAXV
define i1 @umaxv_nxv32i1(<vscale x 32 x i1> %a) {
; CHECK-LABEL: umaxv_nxv32i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: orr p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: ptest p2, p0.b
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umax.nxv32i1(<vscale x 32 x i1> %a)
ret i1 %res
}
; UMINV
define i1 @uminv_nxv32i1(<vscale x 32 x i1> %a) {
; CHECK-LABEL: uminv_nxv32i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: not p0.b, p2/z, p0.b
; CHECK-NEXT: ptest p2, p0.b
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%res = call i1 @llvm.vector.reduce.umin.nxv32i1(<vscale x 32 x i1> %a)
ret i1 %res
}
declare i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1>)
declare i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1>)
declare i1 @llvm.vector.reduce.or.nxv32i1(<vscale x 32 x i1>)
declare i1 @llvm.vector.reduce.xor.nxv32i1(<vscale x 32 x i1>)
declare i1 @llvm.vector.reduce.smax.nxv32i1(<vscale x 32 x i1>)
declare i1 @llvm.vector.reduce.smin.nxv32i1(<vscale x 32 x i1>)
declare i1 @llvm.vector.reduce.umax.nxv32i1(<vscale x 32 x i1>)
declare i1 @llvm.vector.reduce.umin.nxv32i1(<vscale x 32 x i1>)

View File

@ -0,0 +1,233 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
; WARN-NOT: warning
; ANDV
define i8 @andv_nxv8i8(<vscale x 8 x i8> %a) {
; CHECK-LABEL: andv_nxv8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: andv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.and.nxv8i8(<vscale x 8 x i8> %a)
ret i8 %res
}
define i32 @andv_nxv8i32(<vscale x 8 x i32> %a) {
; CHECK-LABEL: andv_nxv8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: andv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> %a)
ret i32 %res
}
; ORV
define i32 @orv_nxv2i32(<vscale x 2 x i32> %a) {
; CHECK-LABEL: orv_nxv2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: orv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %a)
ret i32 %res
}
define i64 @orv_nxv8i64(<vscale x 8 x i64> %a) {
; CHECK-LABEL: orv_nxv8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: orr z1.d, z1.d, z3.d
; CHECK-NEXT: orr z0.d, z0.d, z2.d
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: orv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%res = call i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64> %a)
ret i64 %res
}
; XORV
define i16 @xorv_nxv2i16(<vscale x 2 x i16> %a) {
; CHECK-LABEL: xorv_nxv2i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: eorv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %a)
ret i16 %res
}
define i32 @xorv_nxv8i32(<vscale x 8 x i32> %a) {
; CHECK-LABEL: xorv_nxv8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: eorv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> %a)
ret i32 %res
}
; UADDV
define i16 @uaddv_nxv4i16(<vscale x 4 x i16> %a) {
; CHECK-LABEL: uaddv_nxv4i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uaddv d0, p0, z0.s
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %a)
ret i16 %res
}
define i16 @uaddv_nxv16i16(<vscale x 16 x i16> %a) {
; CHECK-LABEL: uaddv_nxv16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: add z0.h, z0.h, z1.h
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: uaddv d0, p0, z0.h
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16> %a)
ret i16 %res
}
define i32 @uaddv_nxv16i32(<vscale x 16 x i32> %a) {
; CHECK-LABEL: uaddv_nxv16i32:
; CHECK: // %bb.0:
; CHECK-NEXT: add z1.s, z1.s, z3.s
; CHECK-NEXT: add z0.s, z0.s, z2.s
; CHECK-NEXT: add z0.s, z0.s, z1.s
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: uaddv d0, p0, z0.s
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> %a)
ret i32 %res
}
; UMINV
define i32 @umin_nxv2i32(<vscale x 2 x i32> %a) {
; CHECK-LABEL: umin_nxv2i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
; CHECK-NEXT: uminv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %a)
ret i32 %res
}
define i64 @umin_nxv4i64(<vscale x 4 x i64> %a) {
; CHECK-LABEL: umin_nxv4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: uminv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%res = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %a)
ret i64 %res
}
; SMINV
define i8 @smin_nxv4i8(<vscale x 4 x i8> %a) {
; CHECK-LABEL: smin_nxv4i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
; CHECK-NEXT: sminv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %a)
ret i8 %res
}
define i32 @smin_nxv8i32(<vscale x 8 x i32> %a) {
; CHECK-LABEL: smin_nxv8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: sminv s0, p0, z0.s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> %a)
ret i32 %res
}
; UMAXV
define i16 @smin_nxv16i16(<vscale x 16 x i16> %a) {
; CHECK-LABEL: smin_nxv16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: umaxv h0, p0, z0.h
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
%res = call i16 @llvm.vector.reduce.umax.nxv16i16(<vscale x 16 x i16> %a)
ret i16 %res
}
; SMAXV
define i64 @smin_nxv8i64(<vscale x 8 x i64> %a) {
; CHECK-LABEL: smin_nxv8i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: smax z1.d, p0/m, z1.d, z3.d
; CHECK-NEXT: smax z0.d, p0/m, z0.d, z2.d
; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: smaxv d0, p0, z0.d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%res = call i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64> %a)
ret i64 %res
}
declare i8 @llvm.vector.reduce.and.nxv8i8(<vscale x 8 x i8>)
declare i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32>)
declare i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32>)
declare i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64>)
declare i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16>)
declare i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32>)
declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>)
declare i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16>)
declare i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32>)
declare i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32>)
declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>)
declare i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8>)
declare i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32>)
declare i16 @llvm.vector.reduce.umax.nxv16i16(<vscale x 16 x i16>)
declare i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64>)

View File

@ -142,10 +142,14 @@ define i32 @test_v3i32(<3 x i32> %a) nounwind {
define i1 @test_v4i1(<4 x i1> %a) nounwind {
; CHECK-LABEL: test_v4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.4h, #1
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
; CHECK-NEXT: umaxv h0, v0.4h
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: umov w10, v0.h[1]
; CHECK-NEXT: umov w11, v0.h[0]
; CHECK-NEXT: umov w9, v0.h[2]
; CHECK-NEXT: orr w10, w11, w10
; CHECK-NEXT: umov w8, v0.h[3]
; CHECK-NEXT: orr w9, w10, w9
; CHECK-NEXT: orr w8, w9, w8
; CHECK-NEXT: and w0, w8, #0x1
; CHECK-NEXT: ret
%b = call i1 @llvm.vector.reduce.umax.v4i1(<4 x i1> %a)