[SelectionDAG] Make WidenVecRes_EXTRACT_SUBVECTOR work for scalable vectors.

The legalizer handles this by breaking up an EXTRACT_SUBVECTOR into
smaller parts, and combines those together, padding the result with
UNDEF vectors, e.g.

  nxv6i64 extract_subvector(nxv12i64, 6)
  <->
  nxv8i64 concat(
    nxv2i64 extract_subvector(nxv16i64, 6)
    nxv2i64 extract_subvector(nxv16i64, 8)
    nxv2i64 extract_subvector(nxv16i64, 10)
    nxv2i64 undef)

Reviewed By: frasercrmck, david-arm

Differential Revision: https://reviews.llvm.org/D110253
This commit is contained in:
Sander de Smalen 2021-09-29 11:33:40 +01:00
parent 87bcbd61b5
commit 6709b193ea
3 changed files with 270 additions and 8 deletions

View File

@ -4061,12 +4061,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
EVT VT = N->getValueType(0);
EVT EltVT = VT.getVectorElementType();
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue InOp = N->getOperand(0);
SDValue Idx = N->getOperand(1);
SDLoc dl(N);
if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
auto InOpTypeAction = getTypeAction(InOp.getValueType());
if (InOpTypeAction == TargetLowering::TypeWidenVector)
InOp = GetWidenedVector(InOp);
EVT InVT = InOp.getValueType();
@ -4076,20 +4078,49 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
if (IdxVal == 0 && InVT == WidenVT)
return InOp;
if (VT.isScalableVector())
report_fatal_error("Don't know how to widen the result of "
"EXTRACT_SUBVECTOR for scalable vectors");
// Check if we can extract from the vector.
unsigned WidenNumElts = WidenVT.getVectorNumElements();
unsigned InNumElts = InVT.getVectorNumElements();
unsigned WidenNumElts = WidenVT.getVectorMinNumElements();
unsigned InNumElts = InVT.getVectorMinNumElements();
if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
if (VT.isScalableVector()) {
// Try to split the operation up into smaller extracts and concat the
// results together, e.g.
// nxv6i64 extract_subvector(nxv12i64, 6)
// <->
// nxv8i64 concat(
// nxv2i64 extract_subvector(nxv16i64, 6)
// nxv2i64 extract_subvector(nxv16i64, 8)
// nxv2i64 extract_subvector(nxv16i64, 10)
// undef)
unsigned VTNElts = VT.getVectorMinNumElements();
unsigned GCD = greatestCommonDivisor(VTNElts, WidenNumElts);
assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken "
"down type's element count");
EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
ElementCount::getScalable(GCD));
// Avoid recursion around e.g. nxv1i8.
if (getTypeAction(PartVT) != TargetLowering::TypeWidenVector) {
SmallVector<SDValue> Parts;
unsigned I = 0;
for (; I < VTNElts / GCD; ++I)
Parts.push_back(
DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, PartVT, InOp,
DAG.getVectorIdxConstant(IdxVal + I * GCD, dl)));
for (; I < WidenNumElts / GCD; ++I)
Parts.push_back(DAG.getUNDEF(PartVT));
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
}
report_fatal_error("Don't know how to widen the result of "
"EXTRACT_SUBVECTOR for scalable vectors");
}
// We could try widening the input to the right length but for now, extract
// the original elements, fill the rest with undefs and build a vector.
SmallVector<SDValue, 16> Ops(WidenNumElts);
EVT EltVT = VT.getVectorElementType();
unsigned NumElts = VT.getVectorNumElements();
unsigned i;
for (i = 0; i < NumElts; ++i)

View File

@ -45,6 +45,58 @@ define <vscale x 8 x i1> @extract_nxv8i1_nxv16i1_8(<vscale x 16 x i1> %in) {
declare <vscale x 8 x i1> @llvm.experimental.vector.extract.nxv8i1.nxv16i1(<vscale x 16 x i1>, i64)
;
; Extract i1 vector that needs widening from one that needs widening.
;
define <vscale x 14 x i1> @extract_nxv14i1_nxv28i1_0(<vscale x 28 x i1> %in) {
; CHECK-LABEL: extract_nxv14i1_nxv28i1_0:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%res = call <vscale x 14 x i1> @llvm.experimental.vector.extract.nxv14i1.nxv28i1(<vscale x 28 x i1> %in, i64 0)
ret <vscale x 14 x i1> %res
}
define <vscale x 14 x i1> @extract_nxv14i1_nxv28i1_14(<vscale x 28 x i1> %in) {
; CHECK-LABEL: extract_nxv14i1_nxv28i1_14:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: punpkhi p2.h, p1.b
; CHECK-NEXT: punpklo p1.h, p1.b
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: punpklo p2.h, p2.b
; CHECK-NEXT: punpkhi p3.h, p1.b
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: punpklo p1.h, p1.b
; CHECK-NEXT: punpkhi p4.h, p2.b
; CHECK-NEXT: punpklo p2.h, p2.b
; CHECK-NEXT: punpkhi p5.h, p3.b
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: punpklo p3.h, p3.b
; CHECK-NEXT: uzp1 p2.s, p5.s, p2.s
; CHECK-NEXT: punpkhi p5.h, p1.b
; CHECK-NEXT: punpklo p1.h, p1.b
; CHECK-NEXT: uzp1 p4.s, p4.s, p0.s
; CHECK-NEXT: uzp1 p3.s, p5.s, p3.s
; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s
; CHECK-NEXT: uzp1 p1.h, p2.h, p4.h
; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: uzp1 p0.h, p0.h, p3.h
; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%res = call <vscale x 14 x i1> @llvm.experimental.vector.extract.nxv14i1.nxv28i1(<vscale x 28 x i1> %in, i64 14)
ret <vscale x 14 x i1> %res
}
declare <vscale x 14 x i1> @llvm.experimental.vector.extract.nxv14i1.nxv28i1(<vscale x 28 x i1>, i64)
;
; Extract half i1 vector that needs promotion from one that needs splitting.
;
@ -451,6 +503,93 @@ define <vscale x 8 x i8> @extract_nxv8i8_nxv16i8_8(<vscale x 16 x i8> %in) {
declare <vscale x 8 x i8> @llvm.experimental.vector.extract.nxv8i8.nxv16i8(<vscale x 16 x i8>, i64)
;
; Extract i8 vector that needs widening from one that needs widening.
;
define <vscale x 14 x i8> @extract_nxv14i8_nxv28i8_0(<vscale x 28 x i8> %in) {
; CHECK-LABEL: extract_nxv14i8_nxv28i8_0:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%res = call <vscale x 14 x i8> @llvm.experimental.vector.extract.nxv14i8.nxv28i8(<vscale x 28 x i8> %in, i64 0)
ret <vscale x 14 x i8> %res
}
define <vscale x 14 x i8> @extract_nxv14i8_nxv28i8_14(<vscale x 28 x i8> %in) {
; CHECK-LABEL: extract_nxv14i8_nxv28i8_14:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpkhi z0.h, z0.b
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: uunpkhi z0.d, z0.s
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: uunpklo z2.h, z1.b
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: uunpklo z3.s, z2.h
; CHECK-NEXT: uunpklo z5.s, z0.h
; CHECK-NEXT: uunpklo z4.d, z3.s
; CHECK-NEXT: uunpklo z5.d, z5.s
; CHECK-NEXT: uzp1 z4.s, z5.s, z4.s
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: uzp1 z0.h, z4.h, z0.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: uunpkhi z4.s, z0.h
; CHECK-NEXT: uunpkhi z3.d, z3.s
; CHECK-NEXT: uunpkhi z4.d, z4.s
; CHECK-NEXT: uzp1 z3.s, z3.s, z4.s
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uzp1 z0.h, z0.h, z3.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b
; CHECK-NEXT: uunpklo z0.h, z0.b
; CHECK-NEXT: uunpkhi z2.s, z2.h
; CHECK-NEXT: uunpkhi z3.s, z0.h
; CHECK-NEXT: uunpklo z5.d, z2.s
; CHECK-NEXT: uunpklo z3.d, z3.s
; CHECK-NEXT: uunpklo z0.s, z0.h
; CHECK-NEXT: uzp1 z3.s, z3.s, z5.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z3.h
; CHECK-NEXT: uzp1 z3.b, z0.b, z0.b
; CHECK-NEXT: uunpkhi z3.h, z3.b
; CHECK-NEXT: uunpklo z5.s, z3.h
; CHECK-NEXT: uunpkhi z2.d, z2.s
; CHECK-NEXT: uunpkhi z5.d, z5.s
; CHECK-NEXT: uunpkhi z3.s, z3.h
; CHECK-NEXT: uzp1 z2.s, z2.s, z5.s
; CHECK-NEXT: uzp1 z2.h, z2.h, z3.h
; CHECK-NEXT: uzp1 z2.b, z0.b, z2.b
; CHECK-NEXT: uunpkhi z1.h, z1.b
; CHECK-NEXT: uunpkhi z2.h, z2.b
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: uunpklo z3.s, z2.h
; CHECK-NEXT: uunpklo z4.d, z1.s
; CHECK-NEXT: uunpklo z3.d, z3.s
; CHECK-NEXT: uunpkhi z2.s, z2.h
; CHECK-NEXT: uzp1 z3.s, z3.s, z4.s
; CHECK-NEXT: uzp1 z2.h, z3.h, z2.h
; CHECK-NEXT: uzp1 z2.b, z0.b, z2.b
; CHECK-NEXT: uunpkhi z2.h, z2.b
; CHECK-NEXT: uunpkhi z3.s, z2.h
; CHECK-NEXT: uunpkhi z1.d, z1.s
; CHECK-NEXT: uunpkhi z3.d, z3.s
; CHECK-NEXT: uunpklo z2.s, z2.h
; CHECK-NEXT: uzp1 z1.s, z1.s, z3.s
; CHECK-NEXT: uzp1 z1.h, z2.h, z1.h
; CHECK-NEXT: uzp1 z1.b, z0.b, z1.b
; CHECK-NEXT: uunpkhi z1.h, z1.b
; CHECK-NEXT: uunpkhi z2.s, z1.h
; CHECK-NEXT: uunpklo z2.d, z2.s
; CHECK-NEXT: uzp1 z2.s, z2.s, z0.s
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h
; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b
; CHECK-NEXT: ret
%res = call <vscale x 14 x i8> @llvm.experimental.vector.extract.nxv14i8.nxv28i8(<vscale x 28 x i8> %in, i64 14)
ret <vscale x 14 x i8> %res
}
declare <vscale x 14 x i8> @llvm.experimental.vector.extract.nxv14i8.nxv28i8(<vscale x 28 x i8>, i64)
;
; Extract 1/4th i8 vector that needs promotion from legal type.
;
@ -554,6 +693,35 @@ define <vscale x 4 x half> @extract_nxv4f16_nxv8f16_4(<vscale x 8 x half> %in) {
declare <vscale x 4 x half> @llvm.experimental.vector.extract.nxv4f16.nxv8f16(<vscale x 8 x half>, i64)
;
; Extract f16 vector that needs widening from one that needs widening.
;
define <vscale x 6 x half> @extract_nxv6f16_nxv12f16_0(<vscale x 12 x half> %in) {
; CHECK-LABEL: extract_nxv6f16_nxv12f16_0:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%res = call <vscale x 6 x half> @llvm.experimental.vector.extract.nxv6f16.nxv12f16(<vscale x 12 x half> %in, i64 0)
ret <vscale x 6 x half> %res
}
define <vscale x 6 x half> @extract_nxv6f16_nxv12f16_6(<vscale x 12 x half> %in) {
; CHECK-LABEL: extract_nxv6f16_nxv12f16_6:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: uunpkhi z2.d, z1.s
; CHECK-NEXT: uunpklo z1.d, z1.s
; CHECK-NEXT: uunpkhi z0.d, z0.s
; CHECK-NEXT: uzp1 z2.s, z2.s, z0.s
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h
; CHECK-NEXT: ret
%res = call <vscale x 6 x half> @llvm.experimental.vector.extract.nxv6f16.nxv12f16(<vscale x 12 x half> %in, i64 6)
ret <vscale x 6 x half> %res
}
declare <vscale x 6 x half> @llvm.experimental.vector.extract.nxv6f16.nxv12f16(<vscale x 12 x half>, i64)
;
; Extract half f16 vector that needs promotion from one that needs splitting.
;
@ -663,6 +831,35 @@ define <vscale x 4 x bfloat> @extract_nxv4bf16_nxv8bf16_4(<vscale x 8 x bfloat>
declare <vscale x 4 x bfloat> @llvm.experimental.vector.extract.nxv4bf16.nxv8bf16(<vscale x 8 x bfloat>, i64)
;
; Extract bf16 vector that needs widening from one that needs widening.
;
define <vscale x 6 x bfloat> @extract_nxv6bf16_nxv12bf16_0(<vscale x 12 x bfloat> %in) {
; CHECK-LABEL: extract_nxv6bf16_nxv12bf16_0:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%res = call <vscale x 6 x bfloat> @llvm.experimental.vector.extract.nxv6bf16.nxv12bf16(<vscale x 12 x bfloat> %in, i64 0)
ret <vscale x 6 x bfloat> %res
}
define <vscale x 6 x bfloat> @extract_nxv6bf16_nxv12bf16_6(<vscale x 12 x bfloat> %in) {
; CHECK-LABEL: extract_nxv6bf16_nxv12bf16_6:
; CHECK: // %bb.0:
; CHECK-NEXT: uunpklo z1.s, z1.h
; CHECK-NEXT: uunpkhi z0.s, z0.h
; CHECK-NEXT: uunpkhi z2.d, z1.s
; CHECK-NEXT: uunpklo z1.d, z1.s
; CHECK-NEXT: uunpkhi z0.d, z0.s
; CHECK-NEXT: uzp1 z2.s, z2.s, z0.s
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h
; CHECK-NEXT: ret
%res = call <vscale x 6 x bfloat> @llvm.experimental.vector.extract.nxv6bf16.nxv12bf16(<vscale x 12 x bfloat> %in, i64 6)
ret <vscale x 6 x bfloat> %res
}
declare <vscale x 6 x bfloat> @llvm.experimental.vector.extract.nxv6bf16.nxv12bf16(<vscale x 12 x bfloat>, i64)
;
; Extract bf16 vector that needs promotion from one that needs widening.
;

View File

@ -462,6 +462,40 @@ define <vscale x 16 x i1> @extract_nxv16i1_nxv32i1_16(<vscale x 32 x i1> %x) {
ret <vscale x 16 x i1> %c
}
;
; Extract f16 vector that needs widening from one that needs widening.
;
define <vscale x 6 x half> @extract_nxv6f16_nxv12f16_0(<vscale x 12 x half> %in) {
; CHECK-LABEL: extract_nxv6f16_nxv12f16_0:
; CHECK: # %bb.0:
; CHECK-NEXT: # kill: def $v8m2 killed $v8m2 killed $v8m4
; CHECK-NEXT: ret
%res = call <vscale x 6 x half> @llvm.experimental.vector.extract.nxv6f16.nxv12f16(<vscale x 12 x half> %in, i64 0)
ret <vscale x 6 x half> %res
}
define <vscale x 6 x half> @extract_nxv6f16_nxv12f16_6(<vscale x 12 x half> %in) {
; CHECK-LABEL: extract_nxv6f16_nxv12f16_6:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 2
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu
; CHECK-NEXT: vslidedown.vx v25, v10, a0
; CHECK-NEXT: vslidedown.vx v26, v9, a0
; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu
; CHECK-NEXT: vslideup.vi v27, v25, 0
; CHECK-NEXT: add a1, a0, a0
; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu
; CHECK-NEXT: vslideup.vx v27, v25, a0
; CHECK-NEXT: vslideup.vx v26, v10, a0
; CHECK-NEXT: vmv2r.v v8, v26
; CHECK-NEXT: ret
%res = call <vscale x 6 x half> @llvm.experimental.vector.extract.nxv6f16.nxv12f16(<vscale x 12 x half> %in, i64 6)
ret <vscale x 6 x half> %res
}
declare <vscale x 6 x half> @llvm.experimental.vector.extract.nxv6f16.nxv12f16(<vscale x 12 x half>, i64)
declare <vscale x 1 x i8> @llvm.experimental.vector.extract.nxv1i8.nxv4i8(<vscale x 4 x i8> %vec, i64 %idx)
declare <vscale x 1 x i8> @llvm.experimental.vector.extract.nxv1i8.nxv8i8(<vscale x 8 x i8> %vec, i64 %idx)