forked from OSchip/llvm-project
[SVE][CodeGen] Add simple integer add tests for SVE tuple types
I have added tests to: CodeGen/AArch64/sve-intrinsics-int-arith.ll for doing simple integer add operations on tuple types. Since these tests introduced new warnings due to incorrect use of getVectorNumElements() I have also fixed up these warnings in the same patch. These fixes are: 1. In narrowExtractedVectorBinOp I have changed the code to bail out early for scalable vector types, since we've not yet hit a case that proves the optimisations are profitable for scalable vectors. 2. In DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS I have replaced calls to getVectorNumElements with getVectorMinNumElements in cases that work with scalable vectors. For the other cases I have added asserts that the vector is not scalable because we should not be using shuffle vectors and build vectors in such cases. Differential revision: https://reviews.llvm.org/D84016
This commit is contained in:
parent
85342c27a3
commit
2078771759
|
@ -19194,7 +19194,10 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
|
|||
|
||||
// The binop must be a vector type, so we can extract some fraction of it.
|
||||
EVT WideBVT = BinOp.getValueType();
|
||||
if (!WideBVT.isVector())
|
||||
// The optimisations below currently assume we are dealing with fixed length
|
||||
// vectors. It is possible to add support for scalable vectors, but at the
|
||||
// moment we've done no analysis to prove whether they are profitable or not.
|
||||
if (!WideBVT.isFixedLengthVector())
|
||||
return SDValue();
|
||||
|
||||
EVT VT = Extract->getValueType(0);
|
||||
|
|
|
@ -3610,16 +3610,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
|
|||
EVT InVT = N->getOperand(0).getValueType();
|
||||
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
|
||||
SDLoc dl(N);
|
||||
unsigned WidenNumElts = WidenVT.getVectorNumElements();
|
||||
unsigned NumInElts = InVT.getVectorNumElements();
|
||||
unsigned NumOperands = N->getNumOperands();
|
||||
|
||||
bool InputWidened = false; // Indicates we need to widen the input.
|
||||
if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) {
|
||||
if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {
|
||||
unsigned WidenNumElts = WidenVT.getVectorMinNumElements();
|
||||
unsigned NumInElts = InVT.getVectorMinNumElements();
|
||||
if (WidenNumElts % NumInElts == 0) {
|
||||
// Add undef vectors to widen to correct length.
|
||||
unsigned NumConcat = WidenVT.getVectorNumElements() /
|
||||
InVT.getVectorNumElements();
|
||||
unsigned NumConcat = WidenNumElts / NumInElts;
|
||||
SDValue UndefVal = DAG.getUNDEF(InVT);
|
||||
SmallVector<SDValue, 16> Ops(NumConcat);
|
||||
for (unsigned i=0; i < NumOperands; ++i)
|
||||
|
@ -3643,6 +3642,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
|
|||
return GetWidenedVector(N->getOperand(0));
|
||||
|
||||
if (NumOperands == 2) {
|
||||
assert(!WidenVT.isScalableVector() &&
|
||||
"Cannot use vector shuffles to widen CONCAT_VECTOR result");
|
||||
unsigned WidenNumElts = WidenVT.getVectorNumElements();
|
||||
unsigned NumInElts = InVT.getVectorNumElements();
|
||||
|
||||
// Replace concat of two operands with a shuffle.
|
||||
SmallVector<int, 16> MaskOps(WidenNumElts, -1);
|
||||
for (unsigned i = 0; i < NumInElts; ++i) {
|
||||
|
@ -3657,6 +3661,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
|
|||
}
|
||||
}
|
||||
|
||||
assert(!WidenVT.isScalableVector() &&
|
||||
"Cannot use build vectors to widen CONCAT_VECTOR result");
|
||||
unsigned WidenNumElts = WidenVT.getVectorNumElements();
|
||||
unsigned NumInElts = InVT.getVectorNumElements();
|
||||
|
||||
// Fall back to use extracts and build vector.
|
||||
EVT EltVT = WidenVT.getVectorElementType();
|
||||
SmallVector<SDValue, 16> Ops(WidenNumElts);
|
||||
|
|
|
@ -325,6 +325,39 @@ define <vscale x 2 x i64> @uqsub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
|
|||
ret <vscale x 2 x i64> %out
|
||||
}
|
||||
|
||||
; ADD (tuples)
|
||||
|
||||
define <vscale x 4 x i64> @add_i64_tuple2(<vscale x 4 x i64>* %out, <vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2) {
|
||||
; CHECK-LABEL: add_i64_tuple2
|
||||
; CHECK: add z0.d, z0.d, z0.d
|
||||
; CHECK: add z1.d, z1.d, z1.d
|
||||
%tuple = tail call <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2)
|
||||
%res = add <vscale x 4 x i64> %tuple, %tuple
|
||||
ret <vscale x 4 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 6 x i64> @add_i64_tuple3(<vscale x 6 x i64>* %out, <vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3) {
|
||||
; CHECK-LABEL: add_i64_tuple3
|
||||
; CHECK: add z0.d, z0.d, z0.d
|
||||
; CHECK: add z1.d, z1.d, z1.d
|
||||
; CHECK: add z2.d, z2.d, z2.d
|
||||
%tuple = tail call <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3)
|
||||
%res = add <vscale x 6 x i64> %tuple, %tuple
|
||||
ret <vscale x 6 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 8 x i64> @add_i64_tuple4(<vscale x 8 x i64>* %out, <vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3, <vscale x 2 x i64> %in4) {
|
||||
; CHECK-LABEL: add_i64_tuple4
|
||||
; CHECK: add z0.d, z0.d, z0.d
|
||||
; CHECK: add z1.d, z1.d, z1.d
|
||||
; CHECK: add z2.d, z2.d, z2.d
|
||||
; CHECK: add z3.d, z3.d, z3.d
|
||||
%tuple = tail call <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3, <vscale x 2 x i64> %in4)
|
||||
%res = add <vscale x 8 x i64> %tuple, %tuple
|
||||
ret <vscale x 8 x i64> %res
|
||||
}
|
||||
|
||||
|
||||
declare <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>)
|
||||
declare <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
|
||||
|
@ -366,3 +399,7 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.x.nxv16i8(<vscale x 16 x i8>,
|
|||
declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
|
||||
|
||||
declare <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
|
||||
declare <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
|
||||
declare <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
|
||||
|
|
Loading…
Reference in New Issue