[SelectionDAG] Add llvm.vector.{extract,insert} intrinsics

This commit adds two new intrinsics.

- llvm.experimental.vector.insert: used to insert a vector into another
  vector starting at a given index.

- llvm.experimental.vector.extract: used to extract a subvector from a
  larger vector starting from a given index.

The codegen work for these intrinsics has already been completed; this
commit is simply exposing the existing ISD nodes to LLVM IR.

Reviewed By: cameron.mcinally

Differential Revision: https://reviews.llvm.org/D91362
This commit is contained in:
Joe Ellis 2020-12-09 11:05:51 +00:00
parent 4167a0259e
commit 80c33de2d3
11 changed files with 847 additions and 0 deletions

View File

@ -16095,6 +16095,81 @@ Arguments:
""""""""""
The argument to this intrinsic must be a vector of floating-point values.
'``llvm.experimental.vector.insert``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
This is an overloaded intrinsic. You can use ``llvm.experimental.vector.insert``
to insert a fixed-width vector into a scalable vector, but not the other way
around.
::
declare <vscale x 4 x float> @llvm.experimental.vector.insert.v4f32(<vscale x 4 x float> %vec, <4 x float> %subvec, i64 %idx)
declare <vscale x 2 x double> @llvm.experimental.vector.insert.v2f64(<vscale x 2 x double> %vec, <2 x double> %subvec, i64 %idx)
Overview:
"""""""""
The '``llvm.experimental.vector.insert.*``' intrinsics insert a vector into another vector
starting from a given index. The return type matches the type of the vector we
insert into. Conceptually, this can be used to build a scalable vector out of
non-scalable vectors.
Arguments:
""""""""""
The ``vec`` is the vector which ``subvec`` will be inserted into.
The ``subvec`` is the vector that will be inserted.
``idx`` represents the starting element number at which ``subvec`` will be
inserted. ``idx`` must be a constant multiple of ``subvec``'s known minimum
vector length. If ``subvec`` is a scalable vector, ``idx`` is first scaled by
the runtime scaling factor of ``subvec``. The elements of ``vec`` starting at
``idx`` are overwritten with ``subvec``. Elements ``idx`` through (``idx`` +
num_elements(``subvec``) - 1) must be valid ``vec`` indices. If this condition
cannot be determined statically but is false at runtime, then the result vector
is undefined.
'``llvm.experimental.vector.extract``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
This is an overloaded intrinsic. You can use
``llvm.experimental.vector.extract`` to extract a fixed-width vector from a
scalable vector, but not the other way around.
::
declare <4 x float> @llvm.experimental.vector.extract.v4f32(<vscale x 4 x float> %vec, i64 %idx)
declare <2 x double> @llvm.experimental.vector.extract.v2f64(<vscale x 2 x double> %vec, i64 %idx)
Overview:
"""""""""
The '``llvm.experimental.vector.extract.*``' intrinsics extract a vector from
within another vector starting from a given index. The return type must be
explicitly specified. Conceptually, this can be used to decompose a scalable
vector into non-scalable parts.
Arguments:
""""""""""
The ``vec`` is the vector from which we will extract a subvector.
The ``idx`` specifies the starting element number within ``vec`` from which a
subvector is extracted. ``idx`` must be a constant multiple of the known-minimum
vector length of the result type. If the result type is a scalable vector,
``idx`` is first scaled by the result type's runtime scaling factor. Elements
``idx`` through (``idx`` + num_elements(result_type) - 1) must be valid vector
indices. If this condition cannot be determined statically but is false at
runtime, then the result vector is undefined. The ``idx`` parameter must be a
vector index constant type (for most targets this will be an integer pointer
type).
Matrix Intrinsics
-----------------

View File

@ -1614,6 +1614,15 @@ def int_preserve_struct_access_index : DefaultAttrsIntrinsic<[llvm_anyptr_ty],
//===---------- Intrinsics to query properties of scalable vectors --------===//
def int_vscale : DefaultAttrsIntrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
//===---------- Intrinsics to perform subvector insertion/extraction ------===//
def int_experimental_vector_insert : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty, llvm_i64_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
def int_experimental_vector_extract : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[llvm_anyvector_ty, llvm_i64_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//

View File

@ -6932,6 +6932,27 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SetCC));
return;
}
case Intrinsic::experimental_vector_insert: {
auto DL = getCurSDLoc();
SDValue Vec = getValue(I.getOperand(0));
SDValue SubVec = getValue(I.getOperand(1));
SDValue Index = getValue(I.getOperand(2));
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResultVT, Vec, SubVec,
Index));
return;
}
case Intrinsic::experimental_vector_extract: {
auto DL = getCurSDLoc();
SDValue Vec = getValue(I.getOperand(0));
SDValue Index = getValue(I.getOperand(1));
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index));
return;
}
}
}

View File

@ -5138,6 +5138,26 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
break;
}
case Intrinsic::experimental_vector_insert: {
VectorType *VecTy = cast<VectorType>(Call.getArgOperand(0)->getType());
VectorType *SubVecTy = cast<VectorType>(Call.getArgOperand(1)->getType());
Assert(VecTy->getElementType() == SubVecTy->getElementType(),
"experimental_vector_insert parameters must have the same element "
"type.",
&Call);
break;
}
case Intrinsic::experimental_vector_extract: {
VectorType *ResultTy = cast<VectorType>(Call.getType());
VectorType *VecTy = cast<VectorType>(Call.getArgOperand(0)->getType());
Assert(ResultTy->getElementType() == VecTy->getElementType(),
"experimental_vector_extract result must have the same element "
"type as the input vector.",
&Call);
break;
}
};
}

View File

@ -1652,6 +1652,102 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
break;
}
case Intrinsic::experimental_vector_insert: {
Value *Vec = II->getArgOperand(0);
Value *SubVec = II->getArgOperand(1);
Value *Idx = II->getArgOperand(2);
auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());
// Only canonicalize if the destination vector, Vec, and SubVec are all
// fixed vectors.
if (DstTy && VecTy && SubVecTy) {
unsigned DstNumElts = DstTy->getNumElements();
unsigned VecNumElts = VecTy->getNumElements();
unsigned SubVecNumElts = SubVecTy->getNumElements();
unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
// The result of this call is undefined if IdxN is not a constant multiple
// of the SubVec's minimum vector length OR the insertion overruns Vec.
if (IdxN % SubVecNumElts != 0 || IdxN + SubVecNumElts > VecNumElts) {
replaceInstUsesWith(CI, UndefValue::get(CI.getType()));
return eraseInstFromFunction(CI);
}
// An insert that entirely overwrites Vec with SubVec is a nop.
if (VecNumElts == SubVecNumElts) {
replaceInstUsesWith(CI, SubVec);
return eraseInstFromFunction(CI);
}
// Widen SubVec into a vector of the same width as Vec, since
// shufflevector requires the two input vectors to be the same width.
// Elements beyond the bounds of SubVec within the widened vector are
// undefined.
SmallVector<int, 8> WidenMask;
unsigned i;
for (i = 0; i != SubVecNumElts; ++i)
WidenMask.push_back(i);
for (; i != VecNumElts; ++i)
WidenMask.push_back(UndefMaskElem);
Value *WidenShuffle = Builder.CreateShuffleVector(
SubVec, llvm::UndefValue::get(SubVecTy), WidenMask);
SmallVector<int, 8> Mask;
for (unsigned i = 0; i != IdxN; ++i)
Mask.push_back(i);
for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
Mask.push_back(i);
for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
Mask.push_back(i);
Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
replaceInstUsesWith(CI, Shuffle);
return eraseInstFromFunction(CI);
}
break;
}
case Intrinsic::experimental_vector_extract: {
Value *Vec = II->getArgOperand(0);
Value *Idx = II->getArgOperand(1);
auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
// Only canonicalize if the the destination vector and Vec are fixed
// vectors.
if (DstTy && VecTy) {
unsigned DstNumElts = DstTy->getNumElements();
unsigned VecNumElts = VecTy->getNumElements();
unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
// The result of this call is undefined if IdxN is not a constant multiple
// of the result type's minimum vector length OR the extraction overruns
// Vec.
if (IdxN % DstNumElts != 0 || IdxN + DstNumElts > VecNumElts) {
replaceInstUsesWith(CI, UndefValue::get(CI.getType()));
return eraseInstFromFunction(CI);
}
// Extracting the entirety of Vec is a nop.
if (VecNumElts == DstNumElts) {
replaceInstUsesWith(CI, Vec);
return eraseInstFromFunction(CI);
}
SmallVector<int, 8> Mask;
for (unsigned i = 0; i != DstNumElts; ++i)
Mask.push_back(IdxN + i);
Value *Shuffle =
Builder.CreateShuffleVector(Vec, UndefValue::get(VecTy), Mask);
replaceInstUsesWith(CI, Shuffle);
return eraseInstFromFunction(CI);
}
break;
}
default: {
// Handle target specific intrinsics
Optional<Instruction *> V = targetInstCombineIntrinsic(*II);

View File

@ -0,0 +1,138 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s --check-prefixes=CHECK
; RUN: FileCheck --check-prefix=WARN --allow-empty %s < %t
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
; WARN-NOT: warning
; Should codegen to a nop, since idx is zero.
define <2 x i64> @extract_v2i64_nxv2i64(<vscale x 2 x i64> %vec) nounwind {
; CHECK-LABEL: extract_v2i64_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%retval = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> %vec, i64 0)
ret <2 x i64> %retval
}
; Goes through memory currently; idx != 0.
define <2 x i64> @extract_v2i64_nxv2i64_idx1(<vscale x 2 x i64> %vec) nounwind {
; CHECK-LABEL: extract_v2i64_nxv2i64_idx1:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cntd x8
; CHECK-NEXT: sub x8, x8, #1 // =1
; CHECK-NEXT: cmp x8, #1 // =1
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: csinc x8, x8, xzr, lo
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: lsl x8, x8, #3
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ldr q0, [x9, x8]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%retval = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> %vec, i64 1)
ret <2 x i64> %retval
}
; Should codegen to a nop, since idx is zero.
define <4 x i32> @extract_v4i32_nxv4i32(<vscale x 4 x i32> %vec) nounwind {
; CHECK-LABEL: extract_v4i32_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 0)
ret <4 x i32> %retval
}
; Goes through memory currently; idx != 0.
define <4 x i32> @extract_v4i32_nxv4i32_idx1(<vscale x 4 x i32> %vec) nounwind {
; CHECK-LABEL: extract_v4i32_nxv4i32_idx1:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cntw x8
; CHECK-NEXT: sub x8, x8, #1 // =1
; CHECK-NEXT: cmp x8, #1 // =1
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: csinc x8, x8, xzr, lo
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: lsl x8, x8, #2
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ldr q0, [x9, x8]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 1)
ret <4 x i32> %retval
}
; Should codegen to a nop, since idx is zero.
define <8 x i16> @extract_v8i16_nxv8i16(<vscale x 8 x i16> %vec) nounwind {
; CHECK-LABEL: extract_v8i16_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> %vec, i64 0)
ret <8 x i16> %retval
}
; Goes through memory currently; idx != 0.
define <8 x i16> @extract_v8i16_nxv8i16_idx1(<vscale x 8 x i16> %vec) nounwind {
; CHECK-LABEL: extract_v8i16_nxv8i16_idx1:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cnth x8
; CHECK-NEXT: sub x8, x8, #1 // =1
; CHECK-NEXT: cmp x8, #1 // =1
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: csinc x8, x8, xzr, lo
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: lsl x8, x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ldr q0, [x9, x8]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> %vec, i64 1)
ret <8 x i16> %retval
}
; Should codegen to a nop, since idx is zero.
define <16 x i8> @extract_v16i8_nxv16i8(<vscale x 16 x i8> %vec) nounwind {
; CHECK-LABEL: extract_v16i8_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: ret
%retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> %vec, i64 0)
ret <16 x i8> %retval
}
; Goes through memory currently; idx != 0.
define <16 x i8> @extract_v16i8_nxv16i8_idx1(<vscale x 16 x i8> %vec) nounwind {
; CHECK-LABEL: extract_v16i8_nxv16i8_idx1:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: sub x8, x8, #1 // =1
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: cmp x8, #1 // =1
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
; CHECK-NEXT: csinc x8, x8, xzr, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ldr q0, [x9, x8]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> %vec, i64 1)
ret <16 x i8> %retval
}
declare <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64>, i64)
declare <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32>, i64)
declare <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16>, i64)
declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8>, i64)

View File

@ -0,0 +1,184 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s --check-prefixes=CHECK
; RUN: FileCheck --check-prefix=WARN --allow-empty %s < %t
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
; WARN-NOT: warning
define <vscale x 2 x i64> @insert_v2i64_nxv2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec) nounwind {
; CHECK-LABEL: insert_v2i64_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cntd x8
; CHECK-NEXT: sub x8, x8, #1 // =1
; CHECK-NEXT: cmp x8, #0 // =0
; CHECK-NEXT: csel x8, x8, xzr, lo
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: lsl x8, x8, #3
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: str q1, [x9, x8]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%retval = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec, i64 0)
ret <vscale x 2 x i64> %retval
}
define <vscale x 2 x i64> @insert_v2i64_nxv2i64_idx1(<vscale x 2 x i64> %vec, <2 x i64> %subvec) nounwind {
; CHECK-LABEL: insert_v2i64_nxv2i64_idx1:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cntd x8
; CHECK-NEXT: sub x8, x8, #1 // =1
; CHECK-NEXT: cmp x8, #1 // =1
; CHECK-NEXT: csinc x8, x8, xzr, lo
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: lsl x8, x8, #3
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: str q1, [x9, x8]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%retval = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec, i64 1)
ret <vscale x 2 x i64> %retval
}
define <vscale x 4 x i32> @insert_v4i32_nxv4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec) nounwind {
; CHECK-LABEL: insert_v4i32_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cntw x8
; CHECK-NEXT: sub x8, x8, #1 // =1
; CHECK-NEXT: cmp x8, #0 // =0
; CHECK-NEXT: csel x8, x8, xzr, lo
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: lsl x8, x8, #2
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: str q1, [x9, x8]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%retval = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 0)
ret <vscale x 4 x i32> %retval
}
define <vscale x 4 x i32> @insert_v4i32_nxv4i32_idx1(<vscale x 4 x i32> %vec, <4 x i32> %subvec) nounwind {
; CHECK-LABEL: insert_v4i32_nxv4i32_idx1:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cntw x8
; CHECK-NEXT: sub x8, x8, #1 // =1
; CHECK-NEXT: cmp x8, #1 // =1
; CHECK-NEXT: csinc x8, x8, xzr, lo
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: lsl x8, x8, #2
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: str q1, [x9, x8]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%retval = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 1)
ret <vscale x 4 x i32> %retval
}
define <vscale x 8 x i16> @insert_v8i16_nxv8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec) nounwind {
; CHECK-LABEL: insert_v8i16_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cnth x8
; CHECK-NEXT: sub x8, x8, #1 // =1
; CHECK-NEXT: cmp x8, #0 // =0
; CHECK-NEXT: csel x8, x8, xzr, lo
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: lsl x8, x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: str q1, [x9, x8]
; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%retval = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec, i64 0)
ret <vscale x 8 x i16> %retval
}
define <vscale x 8 x i16> @insert_v8i16_nxv8i16_idx1(<vscale x 8 x i16> %vec, <8 x i16> %subvec) nounwind {
; CHECK-LABEL: insert_v8i16_nxv8i16_idx1:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: cnth x8
; CHECK-NEXT: sub x8, x8, #1 // =1
; CHECK-NEXT: cmp x8, #1 // =1
; CHECK-NEXT: csinc x8, x8, xzr, lo
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: lsl x8, x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: str q1, [x9, x8]
; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%retval = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec, i64 1)
ret <vscale x 8 x i16> %retval
}
define <vscale x 16 x i8> @insert_v16i8_nxv16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec) nounwind {
; CHECK-LABEL: insert_v16i8_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: sub x8, x8, #1 // =1
; CHECK-NEXT: cmp x8, #0 // =0
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: csel x8, x8, xzr, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
; CHECK-NEXT: str q1, [x9, x8]
; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%retval = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec, i64 0)
ret <vscale x 16 x i8> %retval
}
define <vscale x 16 x i8> @insert_v16i8_nxv16i8_idx1(<vscale x 16 x i8> %vec, <16 x i8> %subvec) nounwind {
; CHECK-LABEL: insert_v16i8_nxv16i8_idx1:
; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: rdvl x8, #1
; CHECK-NEXT: sub x8, x8, #1 // =1
; CHECK-NEXT: cmp x8, #1 // =1
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: csinc x8, x8, xzr, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
; CHECK-NEXT: str q1, [x9, x8]
; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%retval = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec, i64 1)
ret <vscale x 16 x i8> %retval
}
declare <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64>, <2 x i64>, i64)
declare <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32>, <4 x i32>, i64)
declare <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16>, <8 x i16>, i64)
declare <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8>, <16 x i8>, i64)

View File

@ -0,0 +1,139 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
; llvm.experimental.vector.extract canonicalizes to shufflevector in the fixed case. In the
; scalable case, we lower to the EXTRACT_SUBVECTOR ISD node.
declare <10 x i32> @llvm.experimental.vector.extract.v10i32.v8i32(<8 x i32> %vec, i64 %idx)
declare <2 x i32> @llvm.experimental.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 %idx)
declare <3 x i32> @llvm.experimental.vector.extract.v3i32.v8i32(<8 x i32> %vec, i64 %idx)
declare <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 %idx)
declare <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 %idx)
declare <8 x i32> @llvm.experimental.vector.extract.v8i32.v8i32(<8 x i32> %vec, i64 %idx)
; ============================================================================ ;
; Trivial cases
; ============================================================================ ;
; Extracting the entirety of a vector is a nop.
define <8 x i32> @trivial_nop(<8 x i32> %vec) {
; CHECK-LABEL: @trivial_nop(
; CHECK-NEXT: ret <8 x i32> [[VEC:%.*]]
;
%1 = call <8 x i32> @llvm.experimental.vector.extract.v8i32.v8i32(<8 x i32> %vec, i64 0)
ret <8 x i32> %1
}
; ============================================================================ ;
; Valid canonicalizations
; ============================================================================ ;
define <2 x i32> @valid_extraction_a(<8 x i32> %vec) {
; CHECK-LABEL: @valid_extraction_a(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <2 x i32> <i32 0, i32 1>
; CHECK-NEXT: ret <2 x i32> [[TMP1]]
;
%1 = call <2 x i32> @llvm.experimental.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 0)
ret <2 x i32> %1
}
define <2 x i32> @valid_extraction_b(<8 x i32> %vec) {
; CHECK-LABEL: @valid_extraction_b(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: ret <2 x i32> [[TMP1]]
;
%1 = call <2 x i32> @llvm.experimental.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 2)
ret <2 x i32> %1
}
define <2 x i32> @valid_extraction_c(<8 x i32> %vec) {
; CHECK-LABEL: @valid_extraction_c(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <2 x i32> <i32 4, i32 5>
; CHECK-NEXT: ret <2 x i32> [[TMP1]]
;
%1 = call <2 x i32> @llvm.experimental.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 4)
ret <2 x i32> %1
}
define <2 x i32> @valid_extraction_d(<8 x i32> %vec) {
; CHECK-LABEL: @valid_extraction_d(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <2 x i32> <i32 6, i32 7>
; CHECK-NEXT: ret <2 x i32> [[TMP1]]
;
%1 = call <2 x i32> @llvm.experimental.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 6)
ret <2 x i32> %1
}
define <4 x i32> @valid_extraction_e(<8 x i32> %vec) {
; CHECK-LABEL: @valid_extraction_e(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
;
%1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 0)
ret <4 x i32> %1
}
define <4 x i32> @valid_extraction_f(<8 x i32> %vec) {
; CHECK-LABEL: @valid_extraction_f(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
;
%1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 4)
ret <4 x i32> %1
}
define <3 x i32> @valid_extraction_g(<8 x i32> %vec) {
; CHECK-LABEL: @valid_extraction_g(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
; CHECK-NEXT: ret <3 x i32> [[TMP1]]
;
%1 = call <3 x i32> @llvm.experimental.vector.extract.v3i32.v8i32(<8 x i32> %vec, i64 0)
ret <3 x i32> %1
}
define <3 x i32> @valid_extraction_h(<8 x i32> %vec) {
; CHECK-LABEL: @valid_extraction_h(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <3 x i32> <i32 3, i32 4, i32 5>
; CHECK-NEXT: ret <3 x i32> [[TMP1]]
;
%1 = call <3 x i32> @llvm.experimental.vector.extract.v3i32.v8i32(<8 x i32> %vec, i64 3)
ret <3 x i32> %1
}
; ============================================================================ ;
; Invalid canonicalizations
; ============================================================================ ;
; Idx must be the be a constant multiple of the destination vector's length,
; otherwise the result is undefined.
define <4 x i32> @idx_not_constant_multiple(<8 x i32> %vec) {
; CHECK-LABEL: @idx_not_constant_multiple(
; CHECK-NEXT: ret <4 x i32> undef
;
%1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 1)
ret <4 x i32> %1
}
; If the extraction overruns the vector, the result is undefined.
define <10 x i32> @extract_overrun(<8 x i32> %vec) {
; CHECK-LABEL: @extract_overrun(
; CHECK-NEXT: ret <10 x i32> undef
;
%1 = call <10 x i32> @llvm.experimental.vector.extract.v10i32.v8i32(<8 x i32> %vec, i64 0)
ret <10 x i32> %1
}
; ============================================================================ ;
; Scalable cases
; ============================================================================ ;
; Scalable extractions should not be canonicalized. This will be lowered to the
; EXTRACT_SUBVECTOR ISD node later.
define <4 x i32> @scalable_extract(<vscale x 4 x i32> %vec) {
; CHECK-LABEL: @scalable_extract(
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> [[VEC:%.*]], i64 0)
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
;
%1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 0)
ret <4 x i32> %1
}

View File

@ -0,0 +1,147 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
; llvm.experimental.vector.insert canonicalizes to shufflevector in the fixed case. In the
; scalable case, we lower to the INSERT_SUBVECTOR ISD node.
declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 %idx)
declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 %idx)
declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 %idx)
declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 %idx)
declare <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 %idx)
; ============================================================================ ;
; Trivial cases
; ============================================================================ ;
; An insert that entirely overwrites an <n x ty> with another <n x ty> is a
; nop.
define <8 x i32> @trivial_nop(<8 x i32> %vec, <8 x i32> %subvec) {
; CHECK-LABEL: @trivial_nop(
; CHECK-NEXT: ret <8 x i32> [[SUBVEC:%.*]]
;
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 0)
ret <8 x i32> %1
}
; ============================================================================ ;
; Valid canonicalizations
; ============================================================================ ;
define <8 x i32> @valid_insertion_a(<8 x i32> %vec, <2 x i32> %subvec) {
; CHECK-LABEL: @valid_insertion_a(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 0)
ret <8 x i32> %1
}
define <8 x i32> @valid_insertion_b(<8 x i32> %vec, <2 x i32> %subvec) {
; CHECK-LABEL: @valid_insertion_b(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 2)
ret <8 x i32> %1
}
define <8 x i32> @valid_insertion_c(<8 x i32> %vec, <2 x i32> %subvec) {
; CHECK-LABEL: @valid_insertion_c(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 4)
ret <8 x i32> %1
}
define <8 x i32> @valid_insertion_d(<8 x i32> %vec, <2 x i32> %subvec) {
; CHECK-LABEL: @valid_insertion_d(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 6)
ret <8 x i32> %1
}
define <8 x i32> @valid_insertion_e(<8 x i32> %vec, <4 x i32> %subvec) {
; CHECK-LABEL: @valid_insertion_e(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC:%.*]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 0)
ret <8 x i32> %1
}
define <8 x i32> @valid_insertion_f(<8 x i32> %vec, <4 x i32> %subvec) {
; CHECK-LABEL: @valid_insertion_f(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC:%.*]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 4)
ret <8 x i32> %1
}
define <8 x i32> @valid_insertion_g(<8 x i32> %vec, <3 x i32> %subvec) {
; CHECK-LABEL: @valid_insertion_g(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[SUBVEC:%.*]], <3 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 0)
ret <8 x i32> %1
}
define <8 x i32> @valid_insertion_h(<8 x i32> %vec, <3 x i32> %subvec) {
; CHECK-LABEL: @valid_insertion_h(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[SUBVEC:%.*]], <3 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 9, i32 10, i32 6, i32 7>
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
;
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 3)
ret <8 x i32> %1
}
; ============================================================================ ;
; Invalid canonicalizations
; ============================================================================ ;
; Idx must be the be a constant multiple of the subvector's minimum vector
; length, otherwise the result is undefined.
define <8 x i32> @idx_not_constant_multiple(<8 x i32> %vec, <4 x i32> %subvec) {
; CHECK-LABEL: @idx_not_constant_multiple(
; CHECK-NEXT: ret <8 x i32> undef
;
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 2)
ret <8 x i32> %1
}
; If the insertion overruns the vector, the result is undefined.
define <8 x i32> @insert_overrun(<8 x i32> %vec, <8 x i32> %subvec) {
; CHECK-LABEL: @insert_overrun(
; CHECK-NEXT: ret <8 x i32> undef
;
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 4)
ret <8 x i32> %1
}
; ============================================================================ ;
; Scalable cases
; ============================================================================ ;
; Scalable insertions should not be canonicalized. This will be lowered to the
; INSERT_SUBVECTOR ISD node later.
define <vscale x 4 x i32> @scalable_insert(<vscale x 4 x i32> %vec, <4 x i32> %subvec) {
; CHECK-LABEL: @scalable_insert(
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> [[VEC:%.*]], <4 x i32> [[SUBVEC:%.*]], i64 0)
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
;
%1 = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 0)
ret <vscale x 4 x i32> %1
}

View File

@ -0,0 +1,9 @@
; RUN: not opt -verify -S < %s 2>&1 >/dev/null | FileCheck %s
; CHECK: experimental_vector_extract result must have the same element type as the input vector.
define <16 x i16> @invalid_mismatched_element_types(<vscale x 16 x i8> %vec) nounwind {
%retval = call <16 x i16> @llvm.experimental.vector.extract.v16i16.nxv16i8(<vscale x 16 x i8> %vec, i64 0)
ret <16 x i16> %retval
}
declare <16 x i16> @llvm.experimental.vector.extract.v16i16.nxv16i8(<vscale x 16 x i8>, i64)

View File

@ -0,0 +1,9 @@
; RUN: not opt -verify -S < %s 2>&1 >/dev/null | FileCheck %s
; CHECK: experimental_vector_insert parameters must have the same element type.
define <vscale x 16 x i8> @invalid_mismatched_element_types(<vscale x 16 x i8> %vec, <4 x i16> %subvec) nounwind {
%retval = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v4i16(<vscale x 16 x i8> %vec, <4 x i16> %subvec, i64 0)
ret <vscale x 16 x i8> %retval
}
declare <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v4i16(<vscale x 16 x i8>, <4 x i16>, i64)