forked from OSchip/llvm-project
[SelectionDAG] Implement PromoteIntRes_INSERT_SUBVECTOR
Inserting into a smaller-than-legal scalable vector would result in an internal compiler error. For example, inserting a <vscale x 4 x i8> into a <vscale x 8 x i8> (both illegal vector types for SVE) would cause a crash. This crash was happening because there was no code to promote (legalise) the result of an INSERT_SUBVECTOR node. This patch implements PromoteIntRes_INSERT_SUBVECTOR, which legalises the ISD node. This is currently done by going through memory. This is necessary because of the requirement that the SubVec parameter of the INSERT_SUBVECTOR node must be smaller than the Vec parameter, which means that INSERT_SUBVECTOR cannot always have a legal result/operand types. Co-Authored-by: Joe Ellis <joe.ellis@arm.com> Differential Revision: https://reviews.llvm.org/D102766
This commit is contained in:
parent
661577e698
commit
2668727929
|
@ -97,6 +97,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
|
|||
|
||||
case ISD::EXTRACT_SUBVECTOR:
|
||||
Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break;
|
||||
case ISD::INSERT_SUBVECTOR:
|
||||
Res = PromoteIntRes_INSERT_SUBVECTOR(N); break;
|
||||
case ISD::VECTOR_REVERSE:
|
||||
Res = PromoteIntRes_VECTOR_REVERSE(N); break;
|
||||
case ISD::VECTOR_SHUFFLE:
|
||||
|
@ -4729,6 +4731,50 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
|
|||
return DAG.getBuildVector(NOutVT, dl, Ops);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_SUBVECTOR(SDNode *N) {
|
||||
EVT OutVT = N->getValueType(0);
|
||||
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
|
||||
assert(NOutVT.isVector() && "This type must be promoted to a vector type");
|
||||
|
||||
SDLoc dl(N);
|
||||
SDValue Vec = N->getOperand(0);
|
||||
SDValue SubVec = N->getOperand(1);
|
||||
SDValue Idx = N->getOperand(2);
|
||||
|
||||
auto *ConstantIdx = cast<ConstantSDNode>(Idx);
|
||||
unsigned IdxN = ConstantIdx->getZExtValue();
|
||||
|
||||
EVT VecVT = Vec.getValueType();
|
||||
EVT SubVecVT = SubVec.getValueType();
|
||||
|
||||
// To insert SubVec into Vec, store the wider vector to memory, overwrite the
|
||||
// appropriate bits with the narrower vector, and reload.
|
||||
Align SmallestAlign = DAG.getReducedAlign(SubVecVT, /*UseABI=*/false);
|
||||
|
||||
SDValue StackPtr =
|
||||
DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
|
||||
auto StackPtrVT = StackPtr->getValueType(0);
|
||||
auto &MF = DAG.getMachineFunction();
|
||||
auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
|
||||
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
|
||||
|
||||
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
|
||||
SmallestAlign);
|
||||
|
||||
SDValue ScaledIdx = Idx;
|
||||
if (SubVecVT.isScalableVector() && IdxN != 0) {
|
||||
APInt IdxAPInt = cast<ConstantSDNode>(Idx)->getAPIntValue();
|
||||
ScaledIdx = DAG.getVScale(dl, StackPtrVT,
|
||||
IdxAPInt.sextOrSelf(StackPtrVT.getSizeInBits()));
|
||||
}
|
||||
|
||||
SDValue SubVecPtr =
|
||||
TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVecVT, ScaledIdx);
|
||||
Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, PtrInfo, SmallestAlign);
|
||||
return DAG.getExtLoad(ISD::LoadExtType::EXTLOAD, dl, NOutVT, Store, StackPtr,
|
||||
PtrInfo, OutVT, SmallestAlign);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_REVERSE(SDNode *N) {
|
||||
SDLoc dl(N);
|
||||
|
||||
|
|
|
@ -298,6 +298,7 @@ private:
|
|||
SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
|
||||
SDValue PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, unsigned ResNo);
|
||||
SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N);
|
||||
SDValue PromoteIntRes_INSERT_SUBVECTOR(SDNode *N);
|
||||
SDValue PromoteIntRes_VECTOR_REVERSE(SDNode *N);
|
||||
SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
|
||||
SDValue PromoteIntRes_VECTOR_SPLICE(SDNode *N);
|
||||
|
|
|
@ -7837,11 +7837,13 @@ SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
|
|||
assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
|
||||
"Converting bits to bytes lost precision");
|
||||
|
||||
assert(SubVecVT.isFixedLengthVector() &&
|
||||
SubVecVT.getVectorElementType() == EltVT &&
|
||||
"Sub-vector must be a fixed vector with matching element type");
|
||||
Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
|
||||
SubVecVT.getVectorNumElements());
|
||||
// Scalable vectors don't need clamping as these are checked at compile time
|
||||
if (SubVecVT.isFixedLengthVector()) {
|
||||
assert(SubVecVT.getVectorElementType() == EltVT &&
|
||||
"Sub-vector must be a fixed vector with matching element type");
|
||||
Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
|
||||
SubVecVT.getVectorNumElements());
|
||||
}
|
||||
|
||||
EVT IdxVT = Index.getValueType();
|
||||
|
||||
|
|
|
@ -17100,6 +17100,10 @@ void AArch64TargetLowering::ReplaceNodeResults(
|
|||
case ISD::EXTRACT_SUBVECTOR:
|
||||
ReplaceExtractSubVectorResults(N, Results, DAG);
|
||||
return;
|
||||
case ISD::INSERT_SUBVECTOR:
|
||||
// Custom lowering has been requested for INSERT_SUBVECTOR -- but delegate
|
||||
// to common code for result type legalisation
|
||||
return;
|
||||
case ISD::INTRINSIC_WO_CHAIN: {
|
||||
EVT VT = N->getValueType(0);
|
||||
assert((VT == MVT::i8 || VT == MVT::i16) &&
|
||||
|
|
|
@ -0,0 +1,276 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
; SCALABLE INSERTED INTO SCALABLE TESTS
|
||||
|
||||
define <vscale x 8 x i8> @vec_scalable_subvec_scalable_idx_zero_i8(<vscale x 8 x i8>* %a, <vscale x 4 x i8>* %b) #0 {
|
||||
; CHECK-LABEL: vec_scalable_subvec_scalable_idx_zero_i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: ptrue p1.s
|
||||
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
|
||||
; CHECK-NEXT: ld1b { z1.s }, p1/z, [x1]
|
||||
; CHECK-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: st1b { z1.s }, p1, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%vec = load <vscale x 8 x i8>, <vscale x 8 x i8>* %a
|
||||
%subvec = load <vscale x 4 x i8>, <vscale x 4 x i8>* %b
|
||||
%ins = call <vscale x 8 x i8> @llvm.experimental.vector.insert.nxv8i8.nxv4i8(<vscale x 8 x i8> %vec, <vscale x 4 x i8> %subvec, i64 0)
|
||||
ret <vscale x 8 x i8> %ins
|
||||
}
|
||||
|
||||
define <vscale x 8 x i8> @vec_scalable_subvec_scalable_idx_nonzero_i8(<vscale x 8 x i8>* %a, <vscale x 4 x i8>* %b) #0 {
|
||||
; CHECK-LABEL: vec_scalable_subvec_scalable_idx_nonzero_i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: ptrue p1.s
|
||||
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
|
||||
; CHECK-NEXT: ld1b { z1.s }, p1/z, [x1]
|
||||
; CHECK-NEXT: addpl x8, sp, #4
|
||||
; CHECK-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: st1b { z1.s }, p1, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%vec = load <vscale x 8 x i8>, <vscale x 8 x i8>* %a
|
||||
%subvec = load <vscale x 4 x i8>, <vscale x 4 x i8>* %b
|
||||
%ins = call <vscale x 8 x i8> @llvm.experimental.vector.insert.nxv8i8.nxv4i8(<vscale x 8 x i8> %vec, <vscale x 4 x i8> %subvec, i64 4)
|
||||
ret <vscale x 8 x i8> %ins
|
||||
}
|
||||
|
||||
define <vscale x 4 x i16> @vec_scalable_subvec_scalable_idx_zero_i16(<vscale x 4 x i16>* %a, <vscale x 2 x i16>* %b) #0 {
|
||||
; CHECK-LABEL: vec_scalable_subvec_scalable_idx_zero_i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
|
||||
; CHECK-NEXT: ld1h { z1.d }, p1/z, [x1]
|
||||
; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: st1h { z1.d }, p1, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%vec = load <vscale x 4 x i16>, <vscale x 4 x i16>* %a
|
||||
%subvec = load <vscale x 2 x i16>, <vscale x 2 x i16>* %b
|
||||
%ins = call <vscale x 4 x i16> @llvm.experimental.vector.insert.nxv4i16.nxv2i16(<vscale x 4 x i16> %vec, <vscale x 2 x i16> %subvec, i64 0)
|
||||
ret <vscale x 4 x i16> %ins
|
||||
}
|
||||
|
||||
define <vscale x 4 x i16> @vec_scalable_subvec_scalable_idx_nonzero_i16(<vscale x 4 x i16>* %a, <vscale x 2 x i16>* %b) #0 {
|
||||
; CHECK-LABEL: vec_scalable_subvec_scalable_idx_nonzero_i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
|
||||
; CHECK-NEXT: ld1h { z1.d }, p1/z, [x1]
|
||||
; CHECK-NEXT: addpl x8, sp, #4
|
||||
; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: st1h { z1.d }, p1, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%vec = load <vscale x 4 x i16>, <vscale x 4 x i16>* %a
|
||||
%subvec = load <vscale x 2 x i16>, <vscale x 2 x i16>* %b
|
||||
%ins = call <vscale x 4 x i16> @llvm.experimental.vector.insert.nxv4i16.nxv2i16(<vscale x 4 x i16> %vec, <vscale x 2 x i16> %subvec, i64 2)
|
||||
ret <vscale x 4 x i16> %ins
|
||||
}
|
||||
|
||||
; FIXED INSERTED INTO SCALABLE TESTS
|
||||
|
||||
define <vscale x 8 x i8> @vec_scalable_subvec_fixed_idx_zero_i8(<vscale x 8 x i8>* %a, <8 x i8>* %b) #0 {
|
||||
; CHECK-LABEL: vec_scalable_subvec_fixed_idx_zero_i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addpl x8, sp, #4
|
||||
; CHECK-NEXT: str d1, [x8]
|
||||
; CHECK-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%vec = load <vscale x 8 x i8>, <vscale x 8 x i8>* %a
|
||||
%subvec = load <8 x i8>, <8 x i8>* %b
|
||||
%ins = call <vscale x 8 x i8> @llvm.experimental.vector.insert.nxv8i8.v8i8(<vscale x 8 x i8> %vec, <8 x i8> %subvec, i64 0)
|
||||
ret <vscale x 8 x i8> %ins
|
||||
}
|
||||
|
||||
define <vscale x 8 x i8> @vec_scalable_subvec_fixed_idx_nonzero_i8(<vscale x 8 x i8>* %a, <8 x i8>* %b) #0 {
|
||||
; CHECK-LABEL: vec_scalable_subvec_fixed_idx_nonzero_i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: cnth x9
|
||||
; CHECK-NEXT: addpl x10, sp, #4
|
||||
; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: sub x9, x9, #8 // =8
|
||||
; CHECK-NEXT: mov w8, #8
|
||||
; CHECK-NEXT: cmp x9, #8 // =8
|
||||
; CHECK-NEXT: csel x8, x9, x8, lo
|
||||
; CHECK-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: str d1, [x10, x8]
|
||||
; CHECK-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%vec = load <vscale x 8 x i8>, <vscale x 8 x i8>* %a
|
||||
%subvec = load <8 x i8>, <8 x i8>* %b
|
||||
%ins = call <vscale x 8 x i8> @llvm.experimental.vector.insert.nxv8i8.v8i8(<vscale x 8 x i8> %vec, <8 x i8> %subvec, i64 8)
|
||||
ret <vscale x 8 x i8> %ins
|
||||
}
|
||||
|
||||
define <vscale x 4 x i16> @vec_scalable_subvec_fixed_idx_zero_i16(<vscale x 4 x i16>* %a, <4 x i16>* %b) #0 {
|
||||
; CHECK-LABEL: vec_scalable_subvec_fixed_idx_zero_i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addpl x8, sp, #4
|
||||
; CHECK-NEXT: str d1, [x8]
|
||||
; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%vec = load <vscale x 4 x i16>, <vscale x 4 x i16>* %a
|
||||
%subvec = load <4 x i16>, <4 x i16>* %b
|
||||
%ins = call <vscale x 4 x i16> @llvm.experimental.vector.insert.nxv4i16.v4i16(<vscale x 4 x i16> %vec, <4 x i16> %subvec, i64 0)
|
||||
ret <vscale x 4 x i16> %ins
|
||||
}
|
||||
|
||||
define <vscale x 4 x i16> @vec_scalable_subvec_fixed_idx_nonzero_i16(<vscale x 4 x i16>* %a, <4 x i16>* %b) #0 {
|
||||
; CHECK-LABEL: vec_scalable_subvec_fixed_idx_nonzero_i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: cntw x9
|
||||
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: sub x9, x9, #4 // =4
|
||||
; CHECK-NEXT: mov w8, #4
|
||||
; CHECK-NEXT: cmp x9, #4 // =4
|
||||
; CHECK-NEXT: csel x8, x9, x8, lo
|
||||
; CHECK-NEXT: addpl x9, sp, #4
|
||||
; CHECK-NEXT: lsl x8, x8, #1
|
||||
; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: str d1, [x9, x8]
|
||||
; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%vec = load <vscale x 4 x i16>, <vscale x 4 x i16>* %a
|
||||
%subvec = load <4 x i16>, <4 x i16>* %b
|
||||
%ins = call <vscale x 4 x i16> @llvm.experimental.vector.insert.nxv4i16.v4i16(<vscale x 4 x i16> %vec, <4 x i16> %subvec, i64 4)
|
||||
ret <vscale x 4 x i16> %ins
|
||||
}
|
||||
|
||||
define <vscale x 2 x i32> @vec_scalable_subvec_fixed_idx_zero_i32(<vscale x 2 x i32>* %a, <2 x i32>* %b) #0 {
|
||||
; CHECK-LABEL: vec_scalable_subvec_fixed_idx_zero_i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addpl x8, sp, #4
|
||||
; CHECK-NEXT: str d1, [x8]
|
||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%vec = load <vscale x 2 x i32>, <vscale x 2 x i32>* %a
|
||||
%subvec = load <2 x i32>, <2 x i32>* %b
|
||||
%ins = call <vscale x 2 x i32> @llvm.experimental.vector.insert.nxv2i32.v2i32(<vscale x 2 x i32> %vec, <2 x i32> %subvec, i64 0)
|
||||
ret <vscale x 2 x i32> %ins
|
||||
}
|
||||
|
||||
define <vscale x 2 x i32> @vec_scalable_subvec_fixed_idx_nonzero_i32(<vscale x 2 x i32>* %a, <2 x i32>* %b) #0 {
|
||||
; CHECK-LABEL: vec_scalable_subvec_fixed_idx_nonzero_i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: cntd x9
|
||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
|
||||
; CHECK-NEXT: ldr d1, [x1]
|
||||
; CHECK-NEXT: sub x9, x9, #2 // =2
|
||||
; CHECK-NEXT: mov w8, #2
|
||||
; CHECK-NEXT: cmp x9, #2 // =2
|
||||
; CHECK-NEXT: csel x8, x9, x8, lo
|
||||
; CHECK-NEXT: addpl x9, sp, #4
|
||||
; CHECK-NEXT: lsl x8, x8, #2
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: str d1, [x9, x8]
|
||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%vec = load <vscale x 2 x i32>, <vscale x 2 x i32>* %a
|
||||
%subvec = load <2 x i32>, <2 x i32>* %b
|
||||
%ins = call <vscale x 2 x i32> @llvm.experimental.vector.insert.nxv2i32.v2i32(<vscale x 2 x i32> %vec, <2 x i32> %subvec, i64 2)
|
||||
ret <vscale x 2 x i32> %ins
|
||||
}
|
||||
|
||||
define <vscale x 2 x i32> @vec_scalable_subvec_fixed_idx_nonzero_large_i32(<vscale x 2 x i32>* %a, <8 x i32>* %b) #0 {
|
||||
; CHECK-LABEL: vec_scalable_subvec_fixed_idx_nonzero_large_i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: cntd x8
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: subs x8, x8, #8 // =8
|
||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0]
|
||||
; CHECK-NEXT: ldp q1, q2, [x1]
|
||||
; CHECK-NEXT: csel x8, xzr, x8, lo
|
||||
; CHECK-NEXT: mov w9, #8
|
||||
; CHECK-NEXT: cmp x8, #8 // =8
|
||||
; CHECK-NEXT: csel x8, x8, x9, lo
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: add x8, x9, x8, lsl #2
|
||||
; CHECK-NEXT: st1w { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: stp q1, q2, [x8]
|
||||
; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%vec = load <vscale x 2 x i32>, <vscale x 2 x i32>* %a
|
||||
%subvec = load <8 x i32>, <8 x i32>* %b
|
||||
%ins = call <vscale x 2 x i32> @llvm.experimental.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> %vec, <8 x i32> %subvec, i64 8)
|
||||
ret <vscale x 2 x i32> %ins
|
||||
}
|
||||
|
||||
declare <vscale x 8 x i8> @llvm.experimental.vector.insert.nxv8i8.nxv4i8(<vscale x 8 x i8>, <vscale x 4 x i8>, i64)
|
||||
declare <vscale x 4 x i16> @llvm.experimental.vector.insert.nxv4i16.nxv2i16(<vscale x 4 x i16>, <vscale x 2 x i16>, i64)
|
||||
|
||||
declare <vscale x 8 x i8> @llvm.experimental.vector.insert.nxv8i8.v8i8(<vscale x 8 x i8>, <8 x i8>, i64)
|
||||
declare <vscale x 4 x i16> @llvm.experimental.vector.insert.nxv4i16.v4i16(<vscale x 4 x i16>, <4 x i16>, i64)
|
||||
declare <vscale x 2 x i32> @llvm.experimental.vector.insert.nxv2i32.v2i32(<vscale x 2 x i32>, <2 x i32>, i64)
|
||||
|
||||
declare <vscale x 2 x i32> @llvm.experimental.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32>, <8 x i32>, i64)
|
||||
|
||||
attributes #0 = { nounwind "target-features"="+sve" }
|
Loading…
Reference in New Issue