diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 5b658eea372a..044805a3541c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4061,12 +4061,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue InOp = N->getOperand(0); SDValue Idx = N->getOperand(1); SDLoc dl(N); - if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) + auto InOpTypeAction = getTypeAction(InOp.getValueType()); + if (InOpTypeAction == TargetLowering::TypeWidenVector) InOp = GetWidenedVector(InOp); EVT InVT = InOp.getValueType(); @@ -4076,20 +4078,49 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { if (IdxVal == 0 && InVT == WidenVT) return InOp; - if (VT.isScalableVector()) - report_fatal_error("Don't know how to widen the result of " - "EXTRACT_SUBVECTOR for scalable vectors"); - // Check if we can extract from the vector. - unsigned WidenNumElts = WidenVT.getVectorNumElements(); - unsigned InNumElts = InVT.getVectorNumElements(); + unsigned WidenNumElts = WidenVT.getVectorMinNumElements(); + unsigned InNumElts = InVT.getVectorMinNumElements(); if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts) return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx); + if (VT.isScalableVector()) { + // Try to split the operation up into smaller extracts and concat the + // results together, e.g. + // nxv6i64 extract_subvector(nxv12i64, 6) + // <-> + // nxv8i64 concat( + // nxv2i64 extract_subvector(nxv16i64, 6) + // nxv2i64 extract_subvector(nxv16i64, 8) + // nxv2i64 extract_subvector(nxv16i64, 10) + // undef) + unsigned VTNElts = VT.getVectorMinNumElements(); + unsigned GCD = greatestCommonDivisor(VTNElts, WidenNumElts); + assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken " + "down type's element count"); + EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT, + ElementCount::getScalable(GCD)); + // Avoid recursion around e.g. nxv1i8. + if (getTypeAction(PartVT) != TargetLowering::TypeWidenVector) { + SmallVector Parts; + unsigned I = 0; + for (; I < VTNElts / GCD; ++I) + Parts.push_back( + DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, PartVT, InOp, + DAG.getVectorIdxConstant(IdxVal + I * GCD, dl))); + for (; I < WidenNumElts / GCD; ++I) + Parts.push_back(DAG.getUNDEF(PartVT)); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts); + } + + report_fatal_error("Don't know how to widen the result of " + "EXTRACT_SUBVECTOR for scalable vectors"); + } + // We could try widening the input to the right length but for now, extract // the original elements, fill the rest with undefs and build a vector. SmallVector Ops(WidenNumElts); - EVT EltVT = VT.getVectorElementType(); unsigned NumElts = VT.getVectorNumElements(); unsigned i; for (i = 0; i < NumElts; ++i) diff --git a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll index 4367db6f53c6..71a19648413f 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll @@ -45,6 +45,58 @@ define @extract_nxv8i1_nxv16i1_8( %in) { declare @llvm.experimental.vector.extract.nxv8i1.nxv16i1(, i64) +; +; Extract i1 vector that needs widening from one that needs widening. +; +define @extract_nxv14i1_nxv28i1_0( %in) { +; CHECK-LABEL: extract_nxv14i1_nxv28i1_0: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv14i1.nxv28i1( %in, i64 0) + ret %res +} + +define @extract_nxv14i1_nxv28i1_14( %in) { +; CHECK-LABEL: extract_nxv14i1_nxv28i1_14: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: punpkhi p2.h, p1.b +; CHECK-NEXT: punpklo p1.h, p1.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpklo p2.h, p2.b +; CHECK-NEXT: punpkhi p3.h, p1.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpklo p1.h, p1.b +; CHECK-NEXT: punpkhi p4.h, p2.b +; CHECK-NEXT: punpklo p2.h, p2.b +; CHECK-NEXT: punpkhi p5.h, p3.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpklo p3.h, p3.b +; CHECK-NEXT: uzp1 p2.s, p5.s, p2.s +; CHECK-NEXT: punpkhi p5.h, p1.b +; CHECK-NEXT: punpklo p1.h, p1.b +; CHECK-NEXT: uzp1 p4.s, p4.s, p0.s +; CHECK-NEXT: uzp1 p3.s, p5.s, p3.s +; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s +; CHECK-NEXT: uzp1 p1.h, p2.h, p4.h +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: uzp1 p0.h, p0.h, p3.h +; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv14i1.nxv28i1( %in, i64 14) + ret %res +} + +declare @llvm.experimental.vector.extract.nxv14i1.nxv28i1(, i64) + ; ; Extract half i1 vector that needs promotion from one that needs splitting. ; @@ -451,6 +503,93 @@ define @extract_nxv8i8_nxv16i8_8( %in) { declare @llvm.experimental.vector.extract.nxv8i8.nxv16i8(, i64) +; +; Extract i8 vector that needs widening from one that needs widening. +; +define @extract_nxv14i8_nxv28i8_0( %in) { +; CHECK-LABEL: extract_nxv14i8_nxv28i8_0: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv14i8.nxv28i8( %in, i64 0) + ret %res +} + +define @extract_nxv14i8_nxv28i8_14( %in) { +; CHECK-LABEL: extract_nxv14i8_nxv28i8_14: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z0.h, z0.b +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b +; CHECK-NEXT: uunpklo z2.h, z1.b +; CHECK-NEXT: uunpklo z0.h, z0.b +; CHECK-NEXT: uunpklo z3.s, z2.h +; CHECK-NEXT: uunpklo z5.s, z0.h +; CHECK-NEXT: uunpklo z4.d, z3.s +; CHECK-NEXT: uunpklo z5.d, z5.s +; CHECK-NEXT: uzp1 z4.s, z5.s, z4.s +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: uzp1 z0.h, z4.h, z0.h +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b +; CHECK-NEXT: uunpklo z0.h, z0.b +; CHECK-NEXT: uunpkhi z4.s, z0.h +; CHECK-NEXT: uunpkhi z3.d, z3.s +; CHECK-NEXT: uunpkhi z4.d, z4.s +; CHECK-NEXT: uzp1 z3.s, z3.s, z4.s +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z3.h +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b +; CHECK-NEXT: uunpklo z0.h, z0.b +; CHECK-NEXT: uunpkhi z2.s, z2.h +; CHECK-NEXT: uunpkhi z3.s, z0.h +; CHECK-NEXT: uunpklo z5.d, z2.s +; CHECK-NEXT: uunpklo z3.d, z3.s +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uzp1 z3.s, z3.s, z5.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z3.h +; CHECK-NEXT: uzp1 z3.b, z0.b, z0.b +; CHECK-NEXT: uunpkhi z3.h, z3.b +; CHECK-NEXT: uunpklo z5.s, z3.h +; CHECK-NEXT: uunpkhi z2.d, z2.s +; CHECK-NEXT: uunpkhi z5.d, z5.s +; CHECK-NEXT: uunpkhi z3.s, z3.h +; CHECK-NEXT: uzp1 z2.s, z2.s, z5.s +; CHECK-NEXT: uzp1 z2.h, z2.h, z3.h +; CHECK-NEXT: uzp1 z2.b, z0.b, z2.b +; CHECK-NEXT: uunpkhi z1.h, z1.b +; CHECK-NEXT: uunpkhi z2.h, z2.b +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z3.s, z2.h +; CHECK-NEXT: uunpklo z4.d, z1.s +; CHECK-NEXT: uunpklo z3.d, z3.s +; CHECK-NEXT: uunpkhi z2.s, z2.h +; CHECK-NEXT: uzp1 z3.s, z3.s, z4.s +; CHECK-NEXT: uzp1 z2.h, z3.h, z2.h +; CHECK-NEXT: uzp1 z2.b, z0.b, z2.b +; CHECK-NEXT: uunpkhi z2.h, z2.b +; CHECK-NEXT: uunpkhi z3.s, z2.h +; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: uunpkhi z3.d, z3.s +; CHECK-NEXT: uunpklo z2.s, z2.h +; CHECK-NEXT: uzp1 z1.s, z1.s, z3.s +; CHECK-NEXT: uzp1 z1.h, z2.h, z1.h +; CHECK-NEXT: uzp1 z1.b, z0.b, z1.b +; CHECK-NEXT: uunpkhi z1.h, z1.b +; CHECK-NEXT: uunpkhi z2.s, z1.h +; CHECK-NEXT: uunpklo z2.d, z2.s +; CHECK-NEXT: uzp1 z2.s, z2.s, z0.s +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h +; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv14i8.nxv28i8( %in, i64 14) + ret %res +} + +declare @llvm.experimental.vector.extract.nxv14i8.nxv28i8(, i64) + ; ; Extract 1/4th i8 vector that needs promotion from legal type. ; @@ -554,6 +693,35 @@ define @extract_nxv4f16_nxv8f16_4( %in) { declare @llvm.experimental.vector.extract.nxv4f16.nxv8f16(, i64) +; +; Extract f16 vector that needs widening from one that needs widening. +; +define @extract_nxv6f16_nxv12f16_0( %in) { +; CHECK-LABEL: extract_nxv6f16_nxv12f16_0: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv6f16.nxv12f16( %in, i64 0) + ret %res +} + +define @extract_nxv6f16_nxv12f16_6( %in) { +; CHECK-LABEL: extract_nxv6f16_nxv12f16_6: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: uunpkhi z2.d, z1.s +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: uzp1 z2.s, z2.s, z0.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv6f16.nxv12f16( %in, i64 6) + ret %res +} + +declare @llvm.experimental.vector.extract.nxv6f16.nxv12f16(, i64) + ; ; Extract half f16 vector that needs promotion from one that needs splitting. ; @@ -663,6 +831,35 @@ define @extract_nxv4bf16_nxv8bf16_4( declare @llvm.experimental.vector.extract.nxv4bf16.nxv8bf16(, i64) +; +; Extract bf16 vector that needs widening from one that needs widening. +; +define @extract_nxv6bf16_nxv12bf16_0( %in) { +; CHECK-LABEL: extract_nxv6bf16_nxv12bf16_0: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv6bf16.nxv12bf16( %in, i64 0) + ret %res +} + +define @extract_nxv6bf16_nxv12bf16_6( %in) { +; CHECK-LABEL: extract_nxv6bf16_nxv12bf16_6: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: uunpkhi z2.d, z1.s +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: uzp1 z2.s, z2.s, z0.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv6bf16.nxv12bf16( %in, i64 6) + ret %res +} + +declare @llvm.experimental.vector.extract.nxv6bf16.nxv12bf16(, i64) + ; ; Extract bf16 vector that needs promotion from one that needs widening. ; diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll index 8161d531a169..79c1b7781aff 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll @@ -462,6 +462,40 @@ define @extract_nxv16i1_nxv32i1_16( %x) { ret %c } +; +; Extract f16 vector that needs widening from one that needs widening. +; +define @extract_nxv6f16_nxv12f16_0( %in) { +; CHECK-LABEL: extract_nxv6f16_nxv12f16_0: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $v8m2 killed $v8m2 killed $v8m4 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv6f16.nxv12f16( %in, i64 0) + ret %res +} + +define @extract_nxv6f16_nxv12f16_6( %in) { +; CHECK-LABEL: extract_nxv6f16_nxv12f16_6: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu +; CHECK-NEXT: vslidedown.vx v25, v10, a0 +; CHECK-NEXT: vslidedown.vx v26, v9, a0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vslideup.vi v27, v25, 0 +; CHECK-NEXT: add a1, a0, a0 +; CHECK-NEXT: vsetvli zero, a1, e16, m1, tu, mu +; CHECK-NEXT: vslideup.vx v27, v25, a0 +; CHECK-NEXT: vslideup.vx v26, v10, a0 +; CHECK-NEXT: vmv2r.v v8, v26 +; CHECK-NEXT: ret + %res = call @llvm.experimental.vector.extract.nxv6f16.nxv12f16( %in, i64 6) + ret %res +} + +declare @llvm.experimental.vector.extract.nxv6f16.nxv12f16(, i64) + declare @llvm.experimental.vector.extract.nxv1i8.nxv4i8( %vec, i64 %idx) declare @llvm.experimental.vector.extract.nxv1i8.nxv8i8( %vec, i64 %idx)