forked from OSchip/llvm-project
[SVE][CodeGen] Lower scalable integer vector reductions
This patch uses the existing LowerFixedLengthReductionToSVE function to also lower scalable vector reductions. A separate function has been added to lower VECREDUCE_AND & VECREDUCE_OR operations with predicate types using ptest. Lowering scalable floating-point reductions will be addressed in a follow up patch, for now these will hit the assertion added to expandVecReduce() in TargetLowering. Reviewed By: paulwalker-arm Differential Revision: https://reviews.llvm.org/D89382
This commit is contained in:
parent
f202d32216
commit
f2412d372d
|
@ -20857,7 +20857,7 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
|
|||
unsigned Opcode = N->getOpcode();
|
||||
|
||||
// VECREDUCE over 1-element vector is just an extract.
|
||||
if (VT.getVectorNumElements() == 1) {
|
||||
if (VT.getVectorElementCount().isScalar()) {
|
||||
SDLoc dl(N);
|
||||
SDValue Res =
|
||||
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
|
||||
|
|
|
@ -3323,6 +3323,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
|
|||
SDValue InVec = Op.getOperand(0);
|
||||
SDValue EltNo = Op.getOperand(1);
|
||||
EVT VecVT = InVec.getValueType();
|
||||
// computeKnownBits not yet implemented for scalable vectors.
|
||||
if (VecVT.isScalableVector())
|
||||
break;
|
||||
const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
|
||||
const unsigned NumSrcElts = VecVT.getVectorNumElements();
|
||||
|
||||
|
@ -4809,6 +4812,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
|
|||
case ISD::VSCALE:
|
||||
assert(VT == Operand.getValueType() && "Unexpected VT!");
|
||||
break;
|
||||
case ISD::VECREDUCE_SMIN:
|
||||
case ISD::VECREDUCE_UMAX:
|
||||
if (Operand.getValueType().getScalarType() == MVT::i1)
|
||||
return getNode(ISD::VECREDUCE_OR, DL, VT, Operand);
|
||||
break;
|
||||
case ISD::VECREDUCE_SMAX:
|
||||
case ISD::VECREDUCE_UMIN:
|
||||
if (Operand.getValueType().getScalarType() == MVT::i1)
|
||||
return getNode(ISD::VECREDUCE_AND, DL, VT, Operand);
|
||||
break;
|
||||
}
|
||||
|
||||
SDNode *N;
|
||||
|
@ -5318,10 +5331,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
|
|||
case ISD::MULHS:
|
||||
case ISD::SDIV:
|
||||
case ISD::SREM:
|
||||
case ISD::SMIN:
|
||||
case ISD::SMAX:
|
||||
case ISD::UMIN:
|
||||
case ISD::UMAX:
|
||||
case ISD::SADDSAT:
|
||||
case ISD::SSUBSAT:
|
||||
case ISD::UADDSAT:
|
||||
|
@ -5330,6 +5339,22 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
|
|||
assert(N1.getValueType() == N2.getValueType() &&
|
||||
N1.getValueType() == VT && "Binary operator types must match!");
|
||||
break;
|
||||
case ISD::SMIN:
|
||||
case ISD::UMAX:
|
||||
assert(VT.isInteger() && "This operator does not apply to FP types!");
|
||||
assert(N1.getValueType() == N2.getValueType() &&
|
||||
N1.getValueType() == VT && "Binary operator types must match!");
|
||||
if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
|
||||
return getNode(ISD::OR, DL, VT, N1, N2);
|
||||
break;
|
||||
case ISD::SMAX:
|
||||
case ISD::UMIN:
|
||||
assert(VT.isInteger() && "This operator does not apply to FP types!");
|
||||
assert(N1.getValueType() == N2.getValueType() &&
|
||||
N1.getValueType() == VT && "Binary operator types must match!");
|
||||
if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
|
||||
return getNode(ISD::AND, DL, VT, N1, N2);
|
||||
break;
|
||||
case ISD::FADD:
|
||||
case ISD::FSUB:
|
||||
case ISD::FMUL:
|
||||
|
|
|
@ -8000,6 +8000,10 @@ SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
|
|||
SDValue Op = Node->getOperand(0);
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
if (VT.isScalableVector())
|
||||
report_fatal_error(
|
||||
"Expanding reductions for scalable vectors is undefined.");
|
||||
|
||||
// Try to use a shuffle reduction for power of two vectors.
|
||||
if (VT.isPow2VectorType()) {
|
||||
while (VT.getVectorNumElements() > 1) {
|
||||
|
|
|
@ -1013,6 +1013,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::SHL, VT, Custom);
|
||||
setOperationAction(ISD::SRL, VT, Custom);
|
||||
setOperationAction(ISD::SRA, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
|
||||
}
|
||||
|
||||
// Illegal unpacked integer vector types.
|
||||
|
@ -1027,6 +1035,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::SETCC, VT, Custom);
|
||||
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
|
||||
setOperationAction(ISD::TRUNCATE, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
|
||||
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
|
||||
|
||||
// There are no legal MVT::nxv16f## based types.
|
||||
if (VT != MVT::nxv16i1) {
|
||||
|
@ -9815,30 +9826,35 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
|
|||
Op.getOpcode() == ISD::VECREDUCE_FADD ||
|
||||
(Op.getOpcode() != ISD::VECREDUCE_ADD &&
|
||||
SrcVT.getVectorElementType() == MVT::i64);
|
||||
if (useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) {
|
||||
if (SrcVT.isScalableVector() ||
|
||||
useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) {
|
||||
|
||||
if (SrcVT.getVectorElementType() == MVT::i1)
|
||||
return LowerPredReductionToSVE(Op, DAG);
|
||||
|
||||
switch (Op.getOpcode()) {
|
||||
case ISD::VECREDUCE_ADD:
|
||||
return LowerFixedLengthReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG);
|
||||
return LowerReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG);
|
||||
case ISD::VECREDUCE_AND:
|
||||
return LowerFixedLengthReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG);
|
||||
return LowerReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG);
|
||||
case ISD::VECREDUCE_OR:
|
||||
return LowerFixedLengthReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG);
|
||||
return LowerReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG);
|
||||
case ISD::VECREDUCE_SMAX:
|
||||
return LowerFixedLengthReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG);
|
||||
return LowerReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG);
|
||||
case ISD::VECREDUCE_SMIN:
|
||||
return LowerFixedLengthReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG);
|
||||
return LowerReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG);
|
||||
case ISD::VECREDUCE_UMAX:
|
||||
return LowerFixedLengthReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG);
|
||||
return LowerReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG);
|
||||
case ISD::VECREDUCE_UMIN:
|
||||
return LowerFixedLengthReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG);
|
||||
return LowerReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG);
|
||||
case ISD::VECREDUCE_XOR:
|
||||
return LowerFixedLengthReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG);
|
||||
return LowerReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG);
|
||||
case ISD::VECREDUCE_FADD:
|
||||
return LowerFixedLengthReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG);
|
||||
return LowerReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG);
|
||||
case ISD::VECREDUCE_FMAX:
|
||||
return LowerFixedLengthReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);
|
||||
return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);
|
||||
case ISD::VECREDUCE_FMIN:
|
||||
return LowerFixedLengthReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);
|
||||
return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);
|
||||
default:
|
||||
llvm_unreachable("Unhandled fixed length reduction");
|
||||
}
|
||||
|
@ -16333,20 +16349,56 @@ SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
|
|||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero);
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::LowerFixedLengthReductionToSVE(unsigned Opcode,
|
||||
SDValue ScalarOp, SelectionDAG &DAG) const {
|
||||
SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc DL(ReduceOp);
|
||||
SDValue Op = ReduceOp.getOperand(0);
|
||||
EVT OpVT = Op.getValueType();
|
||||
EVT VT = ReduceOp.getValueType();
|
||||
|
||||
if (!OpVT.isScalableVector() || OpVT.getVectorElementType() != MVT::i1)
|
||||
return SDValue();
|
||||
|
||||
SDValue Pg = getPredicateForVector(DAG, DL, OpVT);
|
||||
|
||||
switch (ReduceOp.getOpcode()) {
|
||||
default:
|
||||
return SDValue();
|
||||
case ISD::VECREDUCE_OR:
|
||||
return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
|
||||
case ISD::VECREDUCE_AND: {
|
||||
Op = DAG.getNode(ISD::XOR, DL, OpVT, Op, Pg);
|
||||
return getPTest(DAG, VT, Pg, Op, AArch64CC::NONE_ACTIVE);
|
||||
}
|
||||
case ISD::VECREDUCE_XOR: {
|
||||
SDValue ID =
|
||||
DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64);
|
||||
SDValue Cntp =
|
||||
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i64, ID, Pg, Op);
|
||||
return DAG.getAnyExtOrTrunc(Cntp, DL, VT);
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
|
||||
SDValue ScalarOp,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc DL(ScalarOp);
|
||||
SDValue VecOp = ScalarOp.getOperand(0);
|
||||
EVT SrcVT = VecOp.getValueType();
|
||||
|
||||
SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
|
||||
if (useSVEForFixedLengthVectorVT(SrcVT, true)) {
|
||||
EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
|
||||
VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
|
||||
}
|
||||
|
||||
// UADDV always returns an i64 result.
|
||||
EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
|
||||
SrcVT.getVectorElementType();
|
||||
|
||||
SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
|
||||
SDValue Rdx = DAG.getNode(Opcode, DL, getPackedSVEVectorVT(ResVT), Pg, VecOp);
|
||||
SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
|
||||
Rdx, DAG.getConstant(0, DL, MVT::i64));
|
||||
|
|
|
@ -933,7 +933,8 @@ private:
|
|||
SelectionDAG &DAG) const;
|
||||
SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
|
||||
SDValue LowerFixedLengthReductionToSVE(unsigned Opcode, SDValue ScalarOp,
|
||||
SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
|
||||
SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
|
||||
SelectionDAG &DAG) const;
|
||||
SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
|
|
@ -0,0 +1,375 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
|
||||
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
|
||||
|
||||
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
|
||||
; WARN-NOT: warning
|
||||
|
||||
; ANDV
|
||||
|
||||
define i1 @reduce_and_nxv16i1(<vscale x 16 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_and_nxv16i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: not p0.b, p1/z, p0.b
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.and.i1.nxv16i1(<vscale x 16 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define i1 @reduce_and_nxv8i1(<vscale x 8 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_and_nxv8i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.h
|
||||
; CHECK-NEXT: not p0.b, p1/z, p0.b
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.and.i1.nxv8i1(<vscale x 8 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define i1 @reduce_and_nxv4i1(<vscale x 4 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_and_nxv4i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.s
|
||||
; CHECK-NEXT: not p0.b, p1/z, p0.b
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.and.i1.nxv4i1(<vscale x 4 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define i1 @reduce_and_nxv2i1(<vscale x 2 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_and_nxv2i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: not p0.b, p1/z, p0.b
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.and.i1.nxv2i1(<vscale x 2 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
; ORV
|
||||
|
||||
define i1 @reduce_or_nxv16i1(<vscale x 16 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_or_nxv16i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.or.i1.nxv16i1(<vscale x 16 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define i1 @reduce_or_nxv8i1(<vscale x 8 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_or_nxv8i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.h
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.or.i1.nxv8i1(<vscale x 8 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define i1 @reduce_or_nxv4i1(<vscale x 4 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_or_nxv4i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.s
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.or.i1.nxv4i1(<vscale x 4 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define i1 @reduce_or_nxv2i1(<vscale x 2 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_or_nxv2i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.or.i1.nxv2i1(<vscale x 2 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
; XORV
|
||||
|
||||
define i1 @reduce_xor_nxv16i1(<vscale x 16 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_xor_nxv16i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: cntp x8, p1, p0.b
|
||||
; CHECK-NEXT: and w0, w8, #0x1
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.xor.i1.nxv16i1(<vscale x 16 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define i1 @reduce_xor_nxv8i1(<vscale x 8 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_xor_nxv8i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.h
|
||||
; CHECK-NEXT: cntp x8, p1, p0.h
|
||||
; CHECK-NEXT: and w0, w8, #0x1
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.xor.i1.nxv8i1(<vscale x 8 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define i1 @reduce_xor_nxv4i1(<vscale x 4 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_xor_nxv4i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.s
|
||||
; CHECK-NEXT: cntp x8, p1, p0.s
|
||||
; CHECK-NEXT: and w0, w8, #0x1
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.xor.i1.nxv4i1(<vscale x 4 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define i1 @reduce_xor_nxv2i1(<vscale x 2 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_xor_nxv2i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: cntp x8, p1, p0.d
|
||||
; CHECK-NEXT: and w0, w8, #0x1
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.xor.i1.nxv2i1(<vscale x 2 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
; SMAXV
|
||||
|
||||
define i1 @reduce_smax_nxv16i1(<vscale x 16 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_smax_nxv16i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: not p0.b, p1/z, p0.b
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.smax.i1.nxv16i1(<vscale x 16 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define i1 @reduce_smax_nxv8i1(<vscale x 8 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_smax_nxv8i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.h
|
||||
; CHECK-NEXT: not p0.b, p1/z, p0.b
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.smax.i1.nxv8i1(<vscale x 8 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define i1 @reduce_smax_nxv4i1(<vscale x 4 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_smax_nxv4i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.s
|
||||
; CHECK-NEXT: not p0.b, p1/z, p0.b
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.smax.i1.nxv4i1(<vscale x 4 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define i1 @reduce_smax_nxv2i1(<vscale x 2 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_smax_nxv2i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: not p0.b, p1/z, p0.b
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.smax.i1.nxv2i1(<vscale x 2 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
; SMINV
|
||||
|
||||
define i1 @reduce_smin_nxv16i1(<vscale x 16 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_smin_nxv16i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.smin.i1.nxv16i1(<vscale x 16 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define i1 @reduce_smin_nxv8i1(<vscale x 8 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_smin_nxv8i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.h
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.smin.i1.nxv8i1(<vscale x 8 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define i1 @reduce_smin_nxv4i1(<vscale x 4 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_smin_nxv4i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.s
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.smin.i1.nxv4i1(<vscale x 4 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define i1 @reduce_smin_nxv2i1(<vscale x 2 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_smin_nxv2i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.smin.i1.nxv2i1(<vscale x 2 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
; UMAXV
|
||||
|
||||
define i1 @reduce_umax_nxv16i1(<vscale x 16 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_umax_nxv16i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.umax.i1.nxv16i1(<vscale x 16 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define i1 @reduce_umax_nxv8i1(<vscale x 8 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_umax_nxv8i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.h
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.umax.i1.nxv8i1(<vscale x 8 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define i1 @reduce_umax_nxv4i1(<vscale x 4 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_umax_nxv4i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.s
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.umax.i1.nxv4i1(<vscale x 4 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define i1 @reduce_umax_nxv2i1(<vscale x 2 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_umax_nxv2i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.umax.i1.nxv2i1(<vscale x 2 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
; UMINV
|
||||
|
||||
define i1 @reduce_umin_nxv16i1(<vscale x 16 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_umin_nxv16i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: not p0.b, p1/z, p0.b
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.umin.i1.nxv16i1(<vscale x 16 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define i1 @reduce_umin_nxv8i1(<vscale x 8 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_umin_nxv8i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.h
|
||||
; CHECK-NEXT: not p0.b, p1/z, p0.b
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.umin.i1.nxv8i1(<vscale x 8 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define i1 @reduce_umin_nxv4i1(<vscale x 4 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_umin_nxv4i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.s
|
||||
; CHECK-NEXT: not p0.b, p1/z, p0.b
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.umin.i1.nxv4i1(<vscale x 4 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define i1 @reduce_umin_nxv2i1(<vscale x 2 x i1> %vec) {
|
||||
; CHECK-LABEL: reduce_umin_nxv2i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: not p0.b, p1/z, p0.b
|
||||
; CHECK-NEXT: ptest p1, p0.b
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.umin.i1.nxv2i1(<vscale x 2 x i1> %vec)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
declare i1 @llvm.vector.reduce.and.i1.nxv16i1(<vscale x 16 x i1> %vec)
|
||||
declare i1 @llvm.vector.reduce.and.i1.nxv8i1(<vscale x 8 x i1> %vec)
|
||||
declare i1 @llvm.vector.reduce.and.i1.nxv4i1(<vscale x 4 x i1> %vec)
|
||||
declare i1 @llvm.vector.reduce.and.i1.nxv2i1(<vscale x 2 x i1> %vec)
|
||||
|
||||
declare i1 @llvm.vector.reduce.or.i1.nxv16i1(<vscale x 16 x i1> %vec)
|
||||
declare i1 @llvm.vector.reduce.or.i1.nxv8i1(<vscale x 8 x i1> %vec)
|
||||
declare i1 @llvm.vector.reduce.or.i1.nxv4i1(<vscale x 4 x i1> %vec)
|
||||
declare i1 @llvm.vector.reduce.or.i1.nxv2i1(<vscale x 2 x i1> %vec)
|
||||
|
||||
declare i1 @llvm.vector.reduce.xor.i1.nxv16i1(<vscale x 16 x i1> %vec)
|
||||
declare i1 @llvm.vector.reduce.xor.i1.nxv8i1(<vscale x 8 x i1> %vec)
|
||||
declare i1 @llvm.vector.reduce.xor.i1.nxv4i1(<vscale x 4 x i1> %vec)
|
||||
declare i1 @llvm.vector.reduce.xor.i1.nxv2i1(<vscale x 2 x i1> %vec)
|
||||
|
||||
declare i1 @llvm.vector.reduce.smin.i1.nxv16i1(<vscale x 16 x i1> %vec)
|
||||
declare i1 @llvm.vector.reduce.smin.i1.nxv8i1(<vscale x 8 x i1> %vec)
|
||||
declare i1 @llvm.vector.reduce.smin.i1.nxv4i1(<vscale x 4 x i1> %vec)
|
||||
declare i1 @llvm.vector.reduce.smin.i1.nxv2i1(<vscale x 2 x i1> %vec)
|
||||
|
||||
declare i1 @llvm.vector.reduce.smax.i1.nxv16i1(<vscale x 16 x i1> %vec)
|
||||
declare i1 @llvm.vector.reduce.smax.i1.nxv8i1(<vscale x 8 x i1> %vec)
|
||||
declare i1 @llvm.vector.reduce.smax.i1.nxv4i1(<vscale x 4 x i1> %vec)
|
||||
declare i1 @llvm.vector.reduce.smax.i1.nxv2i1(<vscale x 2 x i1> %vec)
|
||||
|
||||
declare i1 @llvm.vector.reduce.umin.i1.nxv16i1(<vscale x 16 x i1> %vec)
|
||||
declare i1 @llvm.vector.reduce.umin.i1.nxv8i1(<vscale x 8 x i1> %vec)
|
||||
declare i1 @llvm.vector.reduce.umin.i1.nxv4i1(<vscale x 4 x i1> %vec)
|
||||
declare i1 @llvm.vector.reduce.umin.i1.nxv2i1(<vscale x 2 x i1> %vec)
|
||||
|
||||
declare i1 @llvm.vector.reduce.umax.i1.nxv16i1(<vscale x 16 x i1> %vec)
|
||||
declare i1 @llvm.vector.reduce.umax.i1.nxv8i1(<vscale x 8 x i1> %vec)
|
||||
declare i1 @llvm.vector.reduce.umax.i1.nxv4i1(<vscale x 4 x i1> %vec)
|
||||
declare i1 @llvm.vector.reduce.umax.i1.nxv2i1(<vscale x 2 x i1> %vec)
|
|
@ -0,0 +1,417 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
|
||||
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
|
||||
|
||||
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
|
||||
; WARN-NOT: warning
|
||||
|
||||
; ANDV
|
||||
|
||||
define i8 @andv_nxv16i8(<vscale x 16 x i8> %a) {
|
||||
; CHECK-LABEL: andv_nxv16i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: andv b0, p0, z0.b
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i8 @llvm.vector.reduce.and.nxv16i8(<vscale x 16 x i8> %a)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i16 @andv_nxv8i16(<vscale x 8 x i16> %a) {
|
||||
; CHECK-LABEL: andv_nxv8i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: andv h0, p0, z0.h
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i16 @llvm.vector.reduce.and.nxv8i16(<vscale x 8 x i16> %a)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define i32 @andv_nxv4i32(<vscale x 4 x i32> %a) {
|
||||
; CHECK-LABEL: andv_nxv4i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: andv s0, p0, z0.s
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %a)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i64 @andv_nxv2i64(<vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: andv_nxv2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: andv d0, p0, z0.d
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> %a)
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
; ORV
|
||||
|
||||
define i8 @orv_nxv16i8(<vscale x 16 x i8> %a) {
|
||||
; CHECK-LABEL: orv_nxv16i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: orv b0, p0, z0.b
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i8 @llvm.vector.reduce.or.nxv16i8(<vscale x 16 x i8> %a)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i16 @orv_nxv8i16(<vscale x 8 x i16> %a) {
|
||||
; CHECK-LABEL: orv_nxv8i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: orv h0, p0, z0.h
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i16 @llvm.vector.reduce.or.nxv8i16(<vscale x 8 x i16> %a)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define i32 @orv_nxv4i32(<vscale x 4 x i32> %a) {
|
||||
; CHECK-LABEL: orv_nxv4i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: orv s0, p0, z0.s
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %a)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i64 @orv_nxv2i64(<vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: orv_nxv2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: orv d0, p0, z0.d
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64> %a)
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
; XORV
|
||||
|
||||
define i8 @xorv_nxv16i8(<vscale x 16 x i8> %a) {
|
||||
; CHECK-LABEL: xorv_nxv16i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: eorv b0, p0, z0.b
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i8 @llvm.vector.reduce.xor.nxv16i8(<vscale x 16 x i8> %a)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i16 @xorv_nxv8i16(<vscale x 8 x i16> %a) {
|
||||
; CHECK-LABEL: xorv_nxv8i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: eorv h0, p0, z0.h
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i16 @llvm.vector.reduce.xor.nxv8i16(<vscale x 8 x i16> %a)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define i32 @xorv_nxv4i32(<vscale x 4 x i32> %a) {
|
||||
; CHECK-LABEL: xorv_nxv4i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: eorv s0, p0, z0.s
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %a)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i64 @xorv_nxv2i64(<vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: xorv_nxv2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: eorv d0, p0, z0.d
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64> %a)
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
; UADDV
|
||||
|
||||
define i8 @uaddv_nxv16i8(<vscale x 16 x i8> %a) {
|
||||
; CHECK-LABEL: uaddv_nxv16i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: uaddv d0, p0, z0.b
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8> %a)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i16 @uaddv_nxv8i16(<vscale x 8 x i16> %a) {
|
||||
; CHECK-LABEL: uaddv_nxv8i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: uaddv d0, p0, z0.h
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16> %a)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define i32 @uaddv_nxv4i32(<vscale x 4 x i32> %a) {
|
||||
; CHECK-LABEL: uaddv_nxv4i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: uaddv d0, p0, z0.s
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %a)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i64 @uaddv_nxv2i64(<vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: uaddv_nxv2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: uaddv d0, p0, z0.d
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> %a)
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
; UMINV
|
||||
|
||||
define i8 @umin_nxv16i8(<vscale x 16 x i8> %a) {
|
||||
; CHECK-LABEL: umin_nxv16i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: uminv b0, p0, z0.b
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i8 @llvm.vector.reduce.umin.nxv16i8(<vscale x 16 x i8> %a)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i16 @umin_nxv8i16(<vscale x 8 x i16> %a) {
|
||||
; CHECK-LABEL: umin_nxv8i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: uminv h0, p0, z0.h
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i16 @llvm.vector.reduce.umin.nxv8i16(<vscale x 8 x i16> %a)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define i32 @umin_nxv4i32(<vscale x 4 x i32> %a) {
|
||||
; CHECK-LABEL: umin_nxv4i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: uminv s0, p0, z0.s
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %a)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i64 @umin_nxv2i64(<vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: umin_nxv2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: uminv d0, p0, z0.d
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64> %a)
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
; SMINV
|
||||
|
||||
define i8 @smin_nxv16i8(<vscale x 16 x i8> %a) {
|
||||
; CHECK-LABEL: smin_nxv16i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: sminv b0, p0, z0.b
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i8 @llvm.vector.reduce.smin.nxv16i8(<vscale x 16 x i8> %a)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i16 @smin_nxv8i16(<vscale x 8 x i16> %a) {
|
||||
; CHECK-LABEL: smin_nxv8i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: sminv h0, p0, z0.h
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i16 @llvm.vector.reduce.smin.nxv8i16(<vscale x 8 x i16> %a)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define i32 @smin_nxv4i32(<vscale x 4 x i32> %a) {
|
||||
; CHECK-LABEL: smin_nxv4i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: sminv s0, p0, z0.s
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %a)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i64 @smin_nxv2i64(<vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: smin_nxv2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: sminv d0, p0, z0.d
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64> %a)
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
; UMAXV
|
||||
|
||||
define i8 @umax_nxv16i8(<vscale x 16 x i8> %a) {
|
||||
; CHECK-LABEL: umax_nxv16i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: umaxv b0, p0, z0.b
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i8 @llvm.vector.reduce.umax.nxv16i8(<vscale x 16 x i8> %a)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i16 @umax_nxv8i16(<vscale x 8 x i16> %a) {
|
||||
; CHECK-LABEL: umax_nxv8i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: umaxv h0, p0, z0.h
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i16 @llvm.vector.reduce.umax.nxv8i16(<vscale x 8 x i16> %a)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define i32 @umax_nxv4i32(<vscale x 4 x i32> %a) {
|
||||
; CHECK-LABEL: umax_nxv4i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: umaxv s0, p0, z0.s
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %a)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i64 @umax_nxv2i64(<vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: umax_nxv2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: umaxv d0, p0, z0.d
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64> %a)
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
; SMAXV
|
||||
|
||||
define i8 @smax_nxv16i8(<vscale x 16 x i8> %a) {
|
||||
; CHECK-LABEL: smax_nxv16i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: smaxv b0, p0, z0.b
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i8 @llvm.vector.reduce.smax.nxv16i8(<vscale x 16 x i8> %a)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i16 @smax_nxv8i16(<vscale x 8 x i16> %a) {
|
||||
; CHECK-LABEL: smax_nxv8i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: smaxv h0, p0, z0.h
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i16 @llvm.vector.reduce.smax.nxv8i16(<vscale x 8 x i16> %a)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define i32 @smax_nxv4i32(<vscale x 4 x i32> %a) {
|
||||
; CHECK-LABEL: smax_nxv4i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: smaxv s0, p0, z0.s
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %a)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i64 @smax_nxv2i64(<vscale x 2 x i64> %a) {
|
||||
; CHECK-LABEL: smax_nxv2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: smaxv d0, p0, z0.d
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64> %a)
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.vector.reduce.and.nxv16i8(<vscale x 16 x i8>)
|
||||
declare i16 @llvm.vector.reduce.and.nxv8i16(<vscale x 8 x i16>)
|
||||
declare i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32>)
|
||||
declare i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64>)
|
||||
|
||||
declare i8 @llvm.vector.reduce.or.nxv16i8(<vscale x 16 x i8>)
|
||||
declare i16 @llvm.vector.reduce.or.nxv8i16(<vscale x 8 x i16>)
|
||||
declare i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32>)
|
||||
declare i64 @llvm.vector.reduce.or.nxv2i64(<vscale x 2 x i64>)
|
||||
|
||||
declare i8 @llvm.vector.reduce.xor.nxv16i8(<vscale x 16 x i8>)
|
||||
declare i16 @llvm.vector.reduce.xor.nxv8i16(<vscale x 8 x i16>)
|
||||
declare i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32>)
|
||||
declare i64 @llvm.vector.reduce.xor.nxv2i64(<vscale x 2 x i64>)
|
||||
|
||||
declare i8 @llvm.vector.reduce.add.nxv16i8(<vscale x 16 x i8>)
|
||||
declare i16 @llvm.vector.reduce.add.nxv8i16(<vscale x 8 x i16>)
|
||||
declare i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32>)
|
||||
declare i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64>)
|
||||
|
||||
declare i8 @llvm.vector.reduce.umin.nxv16i8(<vscale x 16 x i8>)
|
||||
declare i16 @llvm.vector.reduce.umin.nxv8i16(<vscale x 8 x i16>)
|
||||
declare i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32>)
|
||||
declare i64 @llvm.vector.reduce.umin.nxv2i64(<vscale x 2 x i64>)
|
||||
|
||||
declare i8 @llvm.vector.reduce.smin.nxv16i8(<vscale x 16 x i8>)
|
||||
declare i16 @llvm.vector.reduce.smin.nxv8i16(<vscale x 8 x i16>)
|
||||
declare i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32>)
|
||||
declare i64 @llvm.vector.reduce.smin.nxv2i64(<vscale x 2 x i64>)
|
||||
|
||||
declare i8 @llvm.vector.reduce.umax.nxv16i8(<vscale x 16 x i8>)
|
||||
declare i16 @llvm.vector.reduce.umax.nxv8i16(<vscale x 8 x i16>)
|
||||
declare i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32>)
|
||||
declare i64 @llvm.vector.reduce.umax.nxv2i64(<vscale x 2 x i64>)
|
||||
|
||||
declare i8 @llvm.vector.reduce.smax.nxv16i8(<vscale x 16 x i8>)
|
||||
declare i16 @llvm.vector.reduce.smax.nxv8i16(<vscale x 8 x i16>)
|
||||
declare i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32>)
|
||||
declare i64 @llvm.vector.reduce.smax.nxv2i64(<vscale x 2 x i64>)
|
|
@ -0,0 +1,145 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
|
||||
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
|
||||
|
||||
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
|
||||
; WARN-NOT: warning
|
||||
|
||||
; ANDV
|
||||
|
||||
define i1 @andv_nxv32i1(<vscale x 32 x i1> %a) {
|
||||
; CHECK-LABEL: andv_nxv32i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p2.b
|
||||
; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
|
||||
; CHECK-NEXT: not p0.b, p2/z, p0.b
|
||||
; CHECK-NEXT: ptest p2, p0.b
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> %a)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
define i1 @andv_nxv64i1(<vscale x 64 x i1> %a) {
|
||||
; CHECK-LABEL: andv_nxv64i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: ptrue p4.b
|
||||
; CHECK-NEXT: and p1.b, p4/z, p1.b, p3.b
|
||||
; CHECK-NEXT: and p0.b, p4/z, p0.b, p2.b
|
||||
; CHECK-NEXT: and p0.b, p4/z, p0.b, p1.b
|
||||
; CHECK-NEXT: not p0.b, p4/z, p0.b
|
||||
; CHECK-NEXT: ptest p4, p0.b
|
||||
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1> %a)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
; ORV
|
||||
|
||||
define i1 @orv_nxv32i1(<vscale x 32 x i1> %a) {
|
||||
; CHECK-LABEL: orv_nxv32i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p2.b
|
||||
; CHECK-NEXT: orr p0.b, p2/z, p0.b, p1.b
|
||||
; CHECK-NEXT: ptest p2, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.or.nxv32i1(<vscale x 32 x i1> %a)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
; XORV
|
||||
|
||||
define i1 @xorv_nxv32i1(<vscale x 32 x i1> %a) {
|
||||
; CHECK-LABEL: xorv_nxv32i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p2.b
|
||||
; CHECK-NEXT: eor p0.b, p2/z, p0.b, p1.b
|
||||
; CHECK-NEXT: cntp x8, p2, p0.b
|
||||
; CHECK-NEXT: and w0, w8, #0x1
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.xor.nxv32i1(<vscale x 32 x i1> %a)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
; SMAXV
|
||||
|
||||
define i1 @smaxv_nxv32i1(<vscale x 32 x i1> %a) {
|
||||
; CHECK-LABEL: smaxv_nxv32i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p2.b
|
||||
; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
|
||||
; CHECK-NEXT: not p0.b, p2/z, p0.b
|
||||
; CHECK-NEXT: ptest p2, p0.b
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.smax.nxv32i1(<vscale x 32 x i1> %a)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
; SMINV
|
||||
|
||||
define i1 @sminv_nxv32i1(<vscale x 32 x i1> %a) {
|
||||
; CHECK-LABEL: sminv_nxv32i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p2.b
|
||||
; CHECK-NEXT: orr p0.b, p2/z, p0.b, p1.b
|
||||
; CHECK-NEXT: ptest p2, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.smin.nxv32i1(<vscale x 32 x i1> %a)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
; UMAXV
|
||||
|
||||
define i1 @umaxv_nxv32i1(<vscale x 32 x i1> %a) {
|
||||
; CHECK-LABEL: umaxv_nxv32i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p2.b
|
||||
; CHECK-NEXT: orr p0.b, p2/z, p0.b, p1.b
|
||||
; CHECK-NEXT: ptest p2, p0.b
|
||||
; CHECK-NEXT: cset w0, ne
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.umax.nxv32i1(<vscale x 32 x i1> %a)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
; UMINV
|
||||
|
||||
define i1 @uminv_nxv32i1(<vscale x 32 x i1> %a) {
|
||||
; CHECK-LABEL: uminv_nxv32i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p2.b
|
||||
; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
|
||||
; CHECK-NEXT: not p0.b, p2/z, p0.b
|
||||
; CHECK-NEXT: ptest p2, p0.b
|
||||
; CHECK-NEXT: cset w0, eq
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i1 @llvm.vector.reduce.umin.nxv32i1(<vscale x 32 x i1> %a)
|
||||
ret i1 %res
|
||||
}
|
||||
|
||||
declare i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1>)
|
||||
declare i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1>)
|
||||
|
||||
declare i1 @llvm.vector.reduce.or.nxv32i1(<vscale x 32 x i1>)
|
||||
|
||||
declare i1 @llvm.vector.reduce.xor.nxv32i1(<vscale x 32 x i1>)
|
||||
|
||||
declare i1 @llvm.vector.reduce.smax.nxv32i1(<vscale x 32 x i1>)
|
||||
|
||||
declare i1 @llvm.vector.reduce.smin.nxv32i1(<vscale x 32 x i1>)
|
||||
|
||||
declare i1 @llvm.vector.reduce.umax.nxv32i1(<vscale x 32 x i1>)
|
||||
|
||||
declare i1 @llvm.vector.reduce.umin.nxv32i1(<vscale x 32 x i1>)
|
|
@ -0,0 +1,233 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
|
||||
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
|
||||
|
||||
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
|
||||
; WARN-NOT: warning
|
||||
|
||||
; ANDV
|
||||
|
||||
define i8 @andv_nxv8i8(<vscale x 8 x i8> %a) {
|
||||
; CHECK-LABEL: andv_nxv8i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: andv h0, p0, z0.h
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i8 @llvm.vector.reduce.and.nxv8i8(<vscale x 8 x i8> %a)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i32 @andv_nxv8i32(<vscale x 8 x i32> %a) {
|
||||
; CHECK-LABEL: andv_nxv8i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: and z0.d, z0.d, z1.d
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: andv s0, p0, z0.s
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> %a)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; ORV
|
||||
|
||||
define i32 @orv_nxv2i32(<vscale x 2 x i32> %a) {
|
||||
; CHECK-LABEL: orv_nxv2i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: orv d0, p0, z0.d
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %a)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i64 @orv_nxv8i64(<vscale x 8 x i64> %a) {
|
||||
; CHECK-LABEL: orv_nxv8i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: orr z1.d, z1.d, z3.d
|
||||
; CHECK-NEXT: orr z0.d, z0.d, z2.d
|
||||
; CHECK-NEXT: orr z0.d, z0.d, z1.d
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: orv d0, p0, z0.d
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64> %a)
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
; XORV
|
||||
|
||||
define i16 @xorv_nxv2i16(<vscale x 2 x i16> %a) {
|
||||
; CHECK-LABEL: xorv_nxv2i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: eorv d0, p0, z0.d
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %a)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define i32 @xorv_nxv8i32(<vscale x 8 x i32> %a) {
|
||||
; CHECK-LABEL: xorv_nxv8i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: eor z0.d, z0.d, z1.d
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: eorv s0, p0, z0.s
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> %a)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; UADDV
|
||||
|
||||
define i16 @uaddv_nxv4i16(<vscale x 4 x i16> %a) {
|
||||
; CHECK-LABEL: uaddv_nxv4i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: uaddv d0, p0, z0.s
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %a)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define i16 @uaddv_nxv16i16(<vscale x 16 x i16> %a) {
|
||||
; CHECK-LABEL: uaddv_nxv16i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: add z0.h, z0.h, z1.h
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: uaddv d0, p0, z0.h
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16> %a)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
define i32 @uaddv_nxv16i32(<vscale x 16 x i32> %a) {
|
||||
; CHECK-LABEL: uaddv_nxv16i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: add z1.s, z1.s, z3.s
|
||||
; CHECK-NEXT: add z0.s, z0.s, z2.s
|
||||
; CHECK-NEXT: add z0.s, z0.s, z1.s
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: uaddv d0, p0, z0.s
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> %a)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; UMINV
|
||||
|
||||
define i32 @umin_nxv2i32(<vscale x 2 x i32> %a) {
|
||||
; CHECK-LABEL: umin_nxv2i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: and z0.d, z0.d, #0xffffffff
|
||||
; CHECK-NEXT: uminv d0, p0, z0.d
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %a)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
define i64 @umin_nxv4i64(<vscale x 4 x i64> %a) {
|
||||
; CHECK-LABEL: umin_nxv4i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: uminv d0, p0, z0.d
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %a)
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
; SMINV
|
||||
|
||||
define i8 @smin_nxv4i8(<vscale x 4 x i8> %a) {
|
||||
; CHECK-LABEL: smin_nxv4i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
|
||||
; CHECK-NEXT: sminv s0, p0, z0.s
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %a)
|
||||
ret i8 %res
|
||||
}
|
||||
|
||||
define i32 @smin_nxv8i32(<vscale x 8 x i32> %a) {
|
||||
; CHECK-LABEL: smin_nxv8i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
|
||||
; CHECK-NEXT: sminv s0, p0, z0.s
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> %a)
|
||||
ret i32 %res
|
||||
}
|
||||
|
||||
; UMAXV
|
||||
|
||||
define i16 @smin_nxv16i16(<vscale x 16 x i16> %a) {
|
||||
; CHECK-LABEL: smin_nxv16i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
|
||||
; CHECK-NEXT: umaxv h0, p0, z0.h
|
||||
; CHECK-NEXT: fmov w0, s0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i16 @llvm.vector.reduce.umax.nxv16i16(<vscale x 16 x i16> %a)
|
||||
ret i16 %res
|
||||
}
|
||||
|
||||
; SMAXV
|
||||
|
||||
define i64 @smin_nxv8i64(<vscale x 8 x i64> %a) {
|
||||
; CHECK-LABEL: smin_nxv8i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: smax z1.d, p0/m, z1.d, z3.d
|
||||
; CHECK-NEXT: smax z0.d, p0/m, z0.d, z2.d
|
||||
; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
|
||||
; CHECK-NEXT: smaxv d0, p0, z0.d
|
||||
; CHECK-NEXT: fmov x0, d0
|
||||
; CHECK-NEXT: ret
|
||||
%res = call i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64> %a)
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
declare i8 @llvm.vector.reduce.and.nxv8i8(<vscale x 8 x i8>)
|
||||
declare i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32>)
|
||||
|
||||
declare i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32>)
|
||||
declare i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64>)
|
||||
|
||||
declare i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16>)
|
||||
declare i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32>)
|
||||
|
||||
declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>)
|
||||
declare i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16>)
|
||||
declare i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32>)
|
||||
|
||||
declare i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32>)
|
||||
declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>)
|
||||
|
||||
declare i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8>)
|
||||
declare i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32>)
|
||||
|
||||
declare i16 @llvm.vector.reduce.umax.nxv16i16(<vscale x 16 x i16>)
|
||||
|
||||
declare i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64>)
|
|
@ -142,10 +142,14 @@ define i32 @test_v3i32(<3 x i32> %a) nounwind {
|
|||
define i1 @test_v4i1(<4 x i1> %a) nounwind {
|
||||
; CHECK-LABEL: test_v4i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: movi v1.4h, #1
|
||||
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
|
||||
; CHECK-NEXT: umaxv h0, v0.4h
|
||||
; CHECK-NEXT: fmov w8, s0
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
|
||||
; CHECK-NEXT: umov w10, v0.h[1]
|
||||
; CHECK-NEXT: umov w11, v0.h[0]
|
||||
; CHECK-NEXT: umov w9, v0.h[2]
|
||||
; CHECK-NEXT: orr w10, w11, w10
|
||||
; CHECK-NEXT: umov w8, v0.h[3]
|
||||
; CHECK-NEXT: orr w9, w10, w9
|
||||
; CHECK-NEXT: orr w8, w9, w8
|
||||
; CHECK-NEXT: and w0, w8, #0x1
|
||||
; CHECK-NEXT: ret
|
||||
%b = call i1 @llvm.vector.reduce.umax.v4i1(<4 x i1> %a)
|
||||
|
|
Loading…
Reference in New Issue