forked from OSchip/llvm-project
[SVE][CodeGen] Improve codegen for some FP insert_subvector cases
When inserting an unpacked FP subvector into a packed vector we can simply cast the unpacked value into a packed value, since both types are legal for SVE. We can then use this as the input for the UZP instruction. This avoids us expanding the operation by going through the stack. Differential Revision: https://reviews.llvm.org/D113270
This commit is contained in:
parent
438437cbb6
commit
8d38c24fb6
|
@ -10912,7 +10912,7 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
|
|||
SDLoc DL(Op);
|
||||
EVT VT = Op.getValueType();
|
||||
|
||||
if (!isTypeLegal(VT) || !VT.isInteger())
|
||||
if (!isTypeLegal(VT))
|
||||
return SDValue();
|
||||
|
||||
SDValue Vec0 = Op.getOperand(0);
|
||||
|
@ -10922,9 +10922,19 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
|
|||
if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
|
||||
return SDValue();
|
||||
|
||||
// Extend elements of smaller vector...
|
||||
EVT WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext()));
|
||||
SDValue ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
|
||||
EVT WideVT;
|
||||
SDValue ExtVec;
|
||||
|
||||
if (VT.isFloatingPoint()) {
|
||||
// The InVT type should be legal. We can safely cast the unpacked
|
||||
// subvector from InVT -> VT.
|
||||
WideVT = VT;
|
||||
ExtVec = getSVESafeBitCast(VT, Vec1, DAG);
|
||||
} else {
|
||||
// Extend elements of smaller vector...
|
||||
WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext()));
|
||||
ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
|
||||
}
|
||||
|
||||
if (Idx == 0) {
|
||||
SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0);
|
||||
|
|
|
@ -424,14 +424,7 @@ define <vscale x 3 x i32> @insert_nxv3i32_nxv2i32_2(<vscale x 3 x i32> %sv0, <vs
|
|||
define <vscale x 3 x float> @insert_nxv3f32_nxv2f32(<vscale x 2 x float> %sv0) nounwind {
|
||||
; CHECK-LABEL: insert_nxv3f32_nxv2f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
|
||||
; CHECK-NEXT: ret
|
||||
%v0 = call <vscale x 3 x float> @llvm.experimental.vector.insert.nxv3f32.nxv2f32(<vscale x 3 x float> undef, <vscale x 2 x float> %sv0, i64 0)
|
||||
ret <vscale x 3 x float> %v0
|
||||
|
|
Loading…
Reference in New Issue