forked from OSchip/llvm-project
[SelectionDAG] Add llvm.vector.{extract,insert} intrinsics
This commit adds two new intrinsics. - llvm.experimental.vector.insert: used to insert a vector into another vector starting at a given index. - llvm.experimental.vector.extract: used to extract a subvector from a larger vector starting from a given index. The codegen work for these intrinsics has already been completed; this commit is simply exposing the existing ISD nodes to LLVM IR. Reviewed By: cameron.mcinally Differential Revision: https://reviews.llvm.org/D91362
This commit is contained in:
parent
4167a0259e
commit
80c33de2d3
|
@ -16095,6 +16095,81 @@ Arguments:
|
|||
""""""""""
|
||||
The argument to this intrinsic must be a vector of floating-point values.
|
||||
|
||||
'``llvm.experimental.vector.insert``' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
This is an overloaded intrinsic. You can use ``llvm.experimental.vector.insert``
|
||||
to insert a fixed-width vector into a scalable vector, but not the other way
|
||||
around.
|
||||
|
||||
::
|
||||
|
||||
declare <vscale x 4 x float> @llvm.experimental.vector.insert.v4f32(<vscale x 4 x float> %vec, <4 x float> %subvec, i64 %idx)
|
||||
declare <vscale x 2 x double> @llvm.experimental.vector.insert.v2f64(<vscale x 2 x double> %vec, <2 x double> %subvec, i64 %idx)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.experimental.vector.insert.*``' intrinsics insert a vector into another vector
|
||||
starting from a given index. The return type matches the type of the vector we
|
||||
insert into. Conceptually, this can be used to build a scalable vector out of
|
||||
non-scalable vectors.
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
The ``vec`` is the vector which ``subvec`` will be inserted into.
|
||||
The ``subvec`` is the vector that will be inserted.
|
||||
|
||||
``idx`` represents the starting element number at which ``subvec`` will be
|
||||
inserted. ``idx`` must be a constant multiple of ``subvec``'s known minimum
|
||||
vector length. If ``subvec`` is a scalable vector, ``idx`` is first scaled by
|
||||
the runtime scaling factor of ``subvec``. The elements of ``vec`` starting at
|
||||
``idx`` are overwritten with ``subvec``. Elements ``idx`` through (``idx`` +
|
||||
num_elements(``subvec``) - 1) must be valid ``vec`` indices. If this condition
|
||||
cannot be determined statically but is false at runtime, then the result vector
|
||||
is undefined.
|
||||
|
||||
|
||||
'``llvm.experimental.vector.extract``' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
This is an overloaded intrinsic. You can use
|
||||
``llvm.experimental.vector.extract`` to extract a fixed-width vector from a
|
||||
scalable vector, but not the other way around.
|
||||
|
||||
::
|
||||
|
||||
declare <4 x float> @llvm.experimental.vector.extract.v4f32(<vscale x 4 x float> %vec, i64 %idx)
|
||||
declare <2 x double> @llvm.experimental.vector.extract.v2f64(<vscale x 2 x double> %vec, i64 %idx)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.experimental.vector.extract.*``' intrinsics extract a vector from
|
||||
within another vector starting from a given index. The return type must be
|
||||
explicitly specified. Conceptually, this can be used to decompose a scalable
|
||||
vector into non-scalable parts.
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
The ``vec`` is the vector from which we will extract a subvector.
|
||||
|
||||
The ``idx`` specifies the starting element number within ``vec`` from which a
|
||||
subvector is extracted. ``idx`` must be a constant multiple of the known-minimum
|
||||
vector length of the result type. If the result type is a scalable vector,
|
||||
``idx`` is first scaled by the result type's runtime scaling factor. Elements
|
||||
``idx`` through (``idx`` + num_elements(result_type) - 1) must be valid vector
|
||||
indices. If this condition cannot be determined statically but is false at
|
||||
runtime, then the result vector is undefined. The ``idx`` parameter must be a
|
||||
vector index constant type (for most targets this will be an integer pointer
|
||||
type).
|
||||
|
||||
Matrix Intrinsics
|
||||
-----------------
|
||||
|
||||
|
|
|
@ -1614,6 +1614,15 @@ def int_preserve_struct_access_index : DefaultAttrsIntrinsic<[llvm_anyptr_ty],
|
|||
//===---------- Intrinsics to query properties of scalable vectors --------===//
|
||||
def int_vscale : DefaultAttrsIntrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
|
||||
|
||||
//===---------- Intrinsics to perform subvector insertion/extraction ------===//
|
||||
def int_experimental_vector_insert : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>, llvm_anyvector_ty, llvm_i64_ty],
|
||||
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
|
||||
|
||||
def int_experimental_vector_extract : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
|
||||
[llvm_anyvector_ty, llvm_i64_ty],
|
||||
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -6932,6 +6932,27 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
|
|||
SetCC));
|
||||
return;
|
||||
}
|
||||
case Intrinsic::experimental_vector_insert: {
|
||||
auto DL = getCurSDLoc();
|
||||
|
||||
SDValue Vec = getValue(I.getOperand(0));
|
||||
SDValue SubVec = getValue(I.getOperand(1));
|
||||
SDValue Index = getValue(I.getOperand(2));
|
||||
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
|
||||
setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResultVT, Vec, SubVec,
|
||||
Index));
|
||||
return;
|
||||
}
|
||||
case Intrinsic::experimental_vector_extract: {
|
||||
auto DL = getCurSDLoc();
|
||||
|
||||
SDValue Vec = getValue(I.getOperand(0));
|
||||
SDValue Index = getValue(I.getOperand(1));
|
||||
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
|
||||
|
||||
setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -5138,6 +5138,26 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
|
|||
|
||||
break;
|
||||
}
|
||||
case Intrinsic::experimental_vector_insert: {
|
||||
VectorType *VecTy = cast<VectorType>(Call.getArgOperand(0)->getType());
|
||||
VectorType *SubVecTy = cast<VectorType>(Call.getArgOperand(1)->getType());
|
||||
|
||||
Assert(VecTy->getElementType() == SubVecTy->getElementType(),
|
||||
"experimental_vector_insert parameters must have the same element "
|
||||
"type.",
|
||||
&Call);
|
||||
break;
|
||||
}
|
||||
case Intrinsic::experimental_vector_extract: {
|
||||
VectorType *ResultTy = cast<VectorType>(Call.getType());
|
||||
VectorType *VecTy = cast<VectorType>(Call.getArgOperand(0)->getType());
|
||||
|
||||
Assert(ResultTy->getElementType() == VecTy->getElementType(),
|
||||
"experimental_vector_extract result must have the same element "
|
||||
"type as the input vector.",
|
||||
&Call);
|
||||
break;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -1652,6 +1652,102 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
|
|||
}
|
||||
break;
|
||||
}
|
||||
case Intrinsic::experimental_vector_insert: {
|
||||
Value *Vec = II->getArgOperand(0);
|
||||
Value *SubVec = II->getArgOperand(1);
|
||||
Value *Idx = II->getArgOperand(2);
|
||||
auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
|
||||
auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
|
||||
auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());
|
||||
|
||||
// Only canonicalize if the destination vector, Vec, and SubVec are all
|
||||
// fixed vectors.
|
||||
if (DstTy && VecTy && SubVecTy) {
|
||||
unsigned DstNumElts = DstTy->getNumElements();
|
||||
unsigned VecNumElts = VecTy->getNumElements();
|
||||
unsigned SubVecNumElts = SubVecTy->getNumElements();
|
||||
unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
|
||||
|
||||
// The result of this call is undefined if IdxN is not a constant multiple
|
||||
// of the SubVec's minimum vector length OR the insertion overruns Vec.
|
||||
if (IdxN % SubVecNumElts != 0 || IdxN + SubVecNumElts > VecNumElts) {
|
||||
replaceInstUsesWith(CI, UndefValue::get(CI.getType()));
|
||||
return eraseInstFromFunction(CI);
|
||||
}
|
||||
|
||||
// An insert that entirely overwrites Vec with SubVec is a nop.
|
||||
if (VecNumElts == SubVecNumElts) {
|
||||
replaceInstUsesWith(CI, SubVec);
|
||||
return eraseInstFromFunction(CI);
|
||||
}
|
||||
|
||||
// Widen SubVec into a vector of the same width as Vec, since
|
||||
// shufflevector requires the two input vectors to be the same width.
|
||||
// Elements beyond the bounds of SubVec within the widened vector are
|
||||
// undefined.
|
||||
SmallVector<int, 8> WidenMask;
|
||||
unsigned i;
|
||||
for (i = 0; i != SubVecNumElts; ++i)
|
||||
WidenMask.push_back(i);
|
||||
for (; i != VecNumElts; ++i)
|
||||
WidenMask.push_back(UndefMaskElem);
|
||||
|
||||
Value *WidenShuffle = Builder.CreateShuffleVector(
|
||||
SubVec, llvm::UndefValue::get(SubVecTy), WidenMask);
|
||||
|
||||
SmallVector<int, 8> Mask;
|
||||
for (unsigned i = 0; i != IdxN; ++i)
|
||||
Mask.push_back(i);
|
||||
for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
|
||||
Mask.push_back(i);
|
||||
for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
|
||||
Mask.push_back(i);
|
||||
|
||||
Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
|
||||
replaceInstUsesWith(CI, Shuffle);
|
||||
return eraseInstFromFunction(CI);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Intrinsic::experimental_vector_extract: {
|
||||
Value *Vec = II->getArgOperand(0);
|
||||
Value *Idx = II->getArgOperand(1);
|
||||
|
||||
auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
|
||||
auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
|
||||
|
||||
// Only canonicalize if the the destination vector and Vec are fixed
|
||||
// vectors.
|
||||
if (DstTy && VecTy) {
|
||||
unsigned DstNumElts = DstTy->getNumElements();
|
||||
unsigned VecNumElts = VecTy->getNumElements();
|
||||
unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
|
||||
|
||||
// The result of this call is undefined if IdxN is not a constant multiple
|
||||
// of the result type's minimum vector length OR the extraction overruns
|
||||
// Vec.
|
||||
if (IdxN % DstNumElts != 0 || IdxN + DstNumElts > VecNumElts) {
|
||||
replaceInstUsesWith(CI, UndefValue::get(CI.getType()));
|
||||
return eraseInstFromFunction(CI);
|
||||
}
|
||||
|
||||
// Extracting the entirety of Vec is a nop.
|
||||
if (VecNumElts == DstNumElts) {
|
||||
replaceInstUsesWith(CI, Vec);
|
||||
return eraseInstFromFunction(CI);
|
||||
}
|
||||
|
||||
SmallVector<int, 8> Mask;
|
||||
for (unsigned i = 0; i != DstNumElts; ++i)
|
||||
Mask.push_back(IdxN + i);
|
||||
|
||||
Value *Shuffle =
|
||||
Builder.CreateShuffleVector(Vec, UndefValue::get(VecTy), Mask);
|
||||
replaceInstUsesWith(CI, Shuffle);
|
||||
return eraseInstFromFunction(CI);
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
// Handle target specific intrinsics
|
||||
Optional<Instruction *> V = targetInstCombineIntrinsic(*II);
|
||||
|
|
|
@ -0,0 +1,138 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s --check-prefixes=CHECK
|
||||
; RUN: FileCheck --check-prefix=WARN --allow-empty %s < %t
|
||||
|
||||
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
|
||||
; WARN-NOT: warning
|
||||
|
||||
; Should codegen to a nop, since idx is zero.
|
||||
define <2 x i64> @extract_v2i64_nxv2i64(<vscale x 2 x i64> %vec) nounwind {
|
||||
; CHECK-LABEL: extract_v2i64_nxv2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> %vec, i64 0)
|
||||
ret <2 x i64> %retval
|
||||
}
|
||||
|
||||
; Goes through memory currently; idx != 0.
|
||||
define <2 x i64> @extract_v2i64_nxv2i64_idx1(<vscale x 2 x i64> %vec) nounwind {
|
||||
; CHECK-LABEL: extract_v2i64_nxv2i64_idx1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: cntd x8
|
||||
; CHECK-NEXT: sub x8, x8, #1 // =1
|
||||
; CHECK-NEXT: cmp x8, #1 // =1
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: csinc x8, x8, xzr, lo
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: lsl x8, x8, #3
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: ldr q0, [x9, x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> %vec, i64 1)
|
||||
ret <2 x i64> %retval
|
||||
}
|
||||
|
||||
; Should codegen to a nop, since idx is zero.
|
||||
define <4 x i32> @extract_v4i32_nxv4i32(<vscale x 4 x i32> %vec) nounwind {
|
||||
; CHECK-LABEL: extract_v4i32_nxv4i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 0)
|
||||
ret <4 x i32> %retval
|
||||
}
|
||||
|
||||
; Goes through memory currently; idx != 0.
|
||||
define <4 x i32> @extract_v4i32_nxv4i32_idx1(<vscale x 4 x i32> %vec) nounwind {
|
||||
; CHECK-LABEL: extract_v4i32_nxv4i32_idx1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: cntw x8
|
||||
; CHECK-NEXT: sub x8, x8, #1 // =1
|
||||
; CHECK-NEXT: cmp x8, #1 // =1
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: csinc x8, x8, xzr, lo
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: lsl x8, x8, #2
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: ldr q0, [x9, x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 1)
|
||||
ret <4 x i32> %retval
|
||||
}
|
||||
|
||||
; Should codegen to a nop, since idx is zero.
|
||||
define <8 x i16> @extract_v8i16_nxv8i16(<vscale x 8 x i16> %vec) nounwind {
|
||||
; CHECK-LABEL: extract_v8i16_nxv8i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> %vec, i64 0)
|
||||
ret <8 x i16> %retval
|
||||
}
|
||||
|
||||
; Goes through memory currently; idx != 0.
|
||||
define <8 x i16> @extract_v8i16_nxv8i16_idx1(<vscale x 8 x i16> %vec) nounwind {
|
||||
; CHECK-LABEL: extract_v8i16_nxv8i16_idx1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: cnth x8
|
||||
; CHECK-NEXT: sub x8, x8, #1 // =1
|
||||
; CHECK-NEXT: cmp x8, #1 // =1
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: csinc x8, x8, xzr, lo
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: lsl x8, x8, #1
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: ldr q0, [x9, x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> %vec, i64 1)
|
||||
ret <8 x i16> %retval
|
||||
}
|
||||
|
||||
; Should codegen to a nop, since idx is zero.
|
||||
define <16 x i8> @extract_v16i8_nxv16i8(<vscale x 16 x i8> %vec) nounwind {
|
||||
; CHECK-LABEL: extract_v16i8_nxv16i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> %vec, i64 0)
|
||||
ret <16 x i8> %retval
|
||||
}
|
||||
|
||||
; Goes through memory currently; idx != 0.
|
||||
define <16 x i8> @extract_v16i8_nxv16i8_idx1(<vscale x 16 x i8> %vec) nounwind {
|
||||
; CHECK-LABEL: extract_v16i8_nxv16i8_idx1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: rdvl x8, #1
|
||||
; CHECK-NEXT: sub x8, x8, #1 // =1
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: cmp x8, #1 // =1
|
||||
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
|
||||
; CHECK-NEXT: csinc x8, x8, xzr, lo
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: ldr q0, [x9, x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> %vec, i64 1)
|
||||
ret <16 x i8> %retval
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64>, i64)
|
||||
declare <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32>, i64)
|
||||
declare <8 x i16> @llvm.experimental.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16>, i64)
|
||||
declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8>, i64)
|
|
@ -0,0 +1,184 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s --check-prefixes=CHECK
|
||||
; RUN: FileCheck --check-prefix=WARN --allow-empty %s < %t
|
||||
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
|
||||
; WARN-NOT: warning
|
||||
|
||||
define <vscale x 2 x i64> @insert_v2i64_nxv2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec) nounwind {
|
||||
; CHECK-LABEL: insert_v2i64_nxv2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: cntd x8
|
||||
; CHECK-NEXT: sub x8, x8, #1 // =1
|
||||
; CHECK-NEXT: cmp x8, #0 // =0
|
||||
; CHECK-NEXT: csel x8, x8, xzr, lo
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: lsl x8, x8, #3
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: str q1, [x9, x8]
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec, i64 0)
|
||||
ret <vscale x 2 x i64> %retval
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @insert_v2i64_nxv2i64_idx1(<vscale x 2 x i64> %vec, <2 x i64> %subvec) nounwind {
|
||||
; CHECK-LABEL: insert_v2i64_nxv2i64_idx1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: cntd x8
|
||||
; CHECK-NEXT: sub x8, x8, #1 // =1
|
||||
; CHECK-NEXT: cmp x8, #1 // =1
|
||||
; CHECK-NEXT: csinc x8, x8, xzr, lo
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: lsl x8, x8, #3
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: str q1, [x9, x8]
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> %vec, <2 x i64> %subvec, i64 1)
|
||||
ret <vscale x 2 x i64> %retval
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @insert_v4i32_nxv4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec) nounwind {
|
||||
; CHECK-LABEL: insert_v4i32_nxv4i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: cntw x8
|
||||
; CHECK-NEXT: sub x8, x8, #1 // =1
|
||||
; CHECK-NEXT: cmp x8, #0 // =0
|
||||
; CHECK-NEXT: csel x8, x8, xzr, lo
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: lsl x8, x8, #2
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: str q1, [x9, x8]
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 0)
|
||||
ret <vscale x 4 x i32> %retval
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @insert_v4i32_nxv4i32_idx1(<vscale x 4 x i32> %vec, <4 x i32> %subvec) nounwind {
|
||||
; CHECK-LABEL: insert_v4i32_nxv4i32_idx1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: cntw x8
|
||||
; CHECK-NEXT: sub x8, x8, #1 // =1
|
||||
; CHECK-NEXT: cmp x8, #1 // =1
|
||||
; CHECK-NEXT: csinc x8, x8, xzr, lo
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: lsl x8, x8, #2
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: str q1, [x9, x8]
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 1)
|
||||
ret <vscale x 4 x i32> %retval
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @insert_v8i16_nxv8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec) nounwind {
|
||||
; CHECK-LABEL: insert_v8i16_nxv8i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: cnth x8
|
||||
; CHECK-NEXT: sub x8, x8, #1 // =1
|
||||
; CHECK-NEXT: cmp x8, #0 // =0
|
||||
; CHECK-NEXT: csel x8, x8, xzr, lo
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: lsl x8, x8, #1
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: str q1, [x9, x8]
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec, i64 0)
|
||||
ret <vscale x 8 x i16> %retval
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @insert_v8i16_nxv8i16_idx1(<vscale x 8 x i16> %vec, <8 x i16> %subvec) nounwind {
|
||||
; CHECK-LABEL: insert_v8i16_nxv8i16_idx1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: cnth x8
|
||||
; CHECK-NEXT: sub x8, x8, #1 // =1
|
||||
; CHECK-NEXT: cmp x8, #1 // =1
|
||||
; CHECK-NEXT: csinc x8, x8, xzr, lo
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: lsl x8, x8, #1
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: str q1, [x9, x8]
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> %vec, <8 x i16> %subvec, i64 1)
|
||||
ret <vscale x 8 x i16> %retval
|
||||
}
|
||||
|
||||
define <vscale x 16 x i8> @insert_v16i8_nxv16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec) nounwind {
|
||||
; CHECK-LABEL: insert_v16i8_nxv16i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: rdvl x8, #1
|
||||
; CHECK-NEXT: sub x8, x8, #1 // =1
|
||||
; CHECK-NEXT: cmp x8, #0 // =0
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: csel x8, x8, xzr, lo
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
|
||||
; CHECK-NEXT: str q1, [x9, x8]
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec, i64 0)
|
||||
ret <vscale x 16 x i8> %retval
|
||||
}
|
||||
|
||||
define <vscale x 16 x i8> @insert_v16i8_nxv16i8_idx1(<vscale x 16 x i8> %vec, <16 x i8> %subvec) nounwind {
|
||||
; CHECK-LABEL: insert_v16i8_nxv16i8_idx1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: rdvl x8, #1
|
||||
; CHECK-NEXT: sub x8, x8, #1 // =1
|
||||
; CHECK-NEXT: cmp x8, #1 // =1
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: csinc x8, x8, xzr, lo
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
|
||||
; CHECK-NEXT: str q1, [x9, x8]
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
%retval = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> %vec, <16 x i8> %subvec, i64 1)
|
||||
ret <vscale x 16 x i8> %retval
|
||||
}
|
||||
|
||||
declare <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64>, <2 x i64>, i64)
|
||||
declare <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32>, <4 x i32>, i64)
|
||||
declare <vscale x 8 x i16> @llvm.experimental.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16>, <8 x i16>, i64)
|
||||
declare <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8>, <16 x i8>, i64)
|
|
@ -0,0 +1,139 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -instcombine -S | FileCheck %s
|
||||
|
||||
; llvm.experimental.vector.extract canonicalizes to shufflevector in the fixed case. In the
|
||||
; scalable case, we lower to the EXTRACT_SUBVECTOR ISD node.
|
||||
|
||||
declare <10 x i32> @llvm.experimental.vector.extract.v10i32.v8i32(<8 x i32> %vec, i64 %idx)
|
||||
declare <2 x i32> @llvm.experimental.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 %idx)
|
||||
declare <3 x i32> @llvm.experimental.vector.extract.v3i32.v8i32(<8 x i32> %vec, i64 %idx)
|
||||
declare <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 %idx)
|
||||
declare <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 %idx)
|
||||
declare <8 x i32> @llvm.experimental.vector.extract.v8i32.v8i32(<8 x i32> %vec, i64 %idx)
|
||||
|
||||
; ============================================================================ ;
|
||||
; Trivial cases
|
||||
; ============================================================================ ;
|
||||
|
||||
; Extracting the entirety of a vector is a nop.
|
||||
define <8 x i32> @trivial_nop(<8 x i32> %vec) {
|
||||
; CHECK-LABEL: @trivial_nop(
|
||||
; CHECK-NEXT: ret <8 x i32> [[VEC:%.*]]
|
||||
;
|
||||
%1 = call <8 x i32> @llvm.experimental.vector.extract.v8i32.v8i32(<8 x i32> %vec, i64 0)
|
||||
ret <8 x i32> %1
|
||||
}
|
||||
|
||||
; ============================================================================ ;
|
||||
; Valid canonicalizations
|
||||
; ============================================================================ ;
|
||||
|
||||
define <2 x i32> @valid_extraction_a(<8 x i32> %vec) {
|
||||
; CHECK-LABEL: @valid_extraction_a(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <2 x i32> <i32 0, i32 1>
|
||||
; CHECK-NEXT: ret <2 x i32> [[TMP1]]
|
||||
;
|
||||
%1 = call <2 x i32> @llvm.experimental.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 0)
|
||||
ret <2 x i32> %1
|
||||
}
|
||||
|
||||
define <2 x i32> @valid_extraction_b(<8 x i32> %vec) {
|
||||
; CHECK-LABEL: @valid_extraction_b(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <2 x i32> <i32 2, i32 3>
|
||||
; CHECK-NEXT: ret <2 x i32> [[TMP1]]
|
||||
;
|
||||
%1 = call <2 x i32> @llvm.experimental.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 2)
|
||||
ret <2 x i32> %1
|
||||
}
|
||||
|
||||
define <2 x i32> @valid_extraction_c(<8 x i32> %vec) {
|
||||
; CHECK-LABEL: @valid_extraction_c(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <2 x i32> <i32 4, i32 5>
|
||||
; CHECK-NEXT: ret <2 x i32> [[TMP1]]
|
||||
;
|
||||
%1 = call <2 x i32> @llvm.experimental.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 4)
|
||||
ret <2 x i32> %1
|
||||
}
|
||||
|
||||
define <2 x i32> @valid_extraction_d(<8 x i32> %vec) {
|
||||
; CHECK-LABEL: @valid_extraction_d(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <2 x i32> <i32 6, i32 7>
|
||||
; CHECK-NEXT: ret <2 x i32> [[TMP1]]
|
||||
;
|
||||
%1 = call <2 x i32> @llvm.experimental.vector.extract.v2i32.v4i32(<8 x i32> %vec, i64 6)
|
||||
ret <2 x i32> %1
|
||||
}
|
||||
|
||||
define <4 x i32> @valid_extraction_e(<8 x i32> %vec) {
|
||||
; CHECK-LABEL: @valid_extraction_e(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
|
||||
;
|
||||
%1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 0)
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define <4 x i32> @valid_extraction_f(<8 x i32> %vec) {
|
||||
; CHECK-LABEL: @valid_extraction_f(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
|
||||
;
|
||||
%1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 4)
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define <3 x i32> @valid_extraction_g(<8 x i32> %vec) {
|
||||
; CHECK-LABEL: @valid_extraction_g(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
; CHECK-NEXT: ret <3 x i32> [[TMP1]]
|
||||
;
|
||||
%1 = call <3 x i32> @llvm.experimental.vector.extract.v3i32.v8i32(<8 x i32> %vec, i64 0)
|
||||
ret <3 x i32> %1
|
||||
}
|
||||
|
||||
define <3 x i32> @valid_extraction_h(<8 x i32> %vec) {
|
||||
; CHECK-LABEL: @valid_extraction_h(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> undef, <3 x i32> <i32 3, i32 4, i32 5>
|
||||
; CHECK-NEXT: ret <3 x i32> [[TMP1]]
|
||||
;
|
||||
%1 = call <3 x i32> @llvm.experimental.vector.extract.v3i32.v8i32(<8 x i32> %vec, i64 3)
|
||||
ret <3 x i32> %1
|
||||
}
|
||||
|
||||
; ============================================================================ ;
|
||||
; Invalid canonicalizations
|
||||
; ============================================================================ ;
|
||||
|
||||
; Idx must be the be a constant multiple of the destination vector's length,
|
||||
; otherwise the result is undefined.
|
||||
define <4 x i32> @idx_not_constant_multiple(<8 x i32> %vec) {
|
||||
; CHECK-LABEL: @idx_not_constant_multiple(
|
||||
; CHECK-NEXT: ret <4 x i32> undef
|
||||
;
|
||||
%1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.v8i32(<8 x i32> %vec, i64 1)
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
; If the extraction overruns the vector, the result is undefined.
|
||||
define <10 x i32> @extract_overrun(<8 x i32> %vec) {
|
||||
; CHECK-LABEL: @extract_overrun(
|
||||
; CHECK-NEXT: ret <10 x i32> undef
|
||||
;
|
||||
%1 = call <10 x i32> @llvm.experimental.vector.extract.v10i32.v8i32(<8 x i32> %vec, i64 0)
|
||||
ret <10 x i32> %1
|
||||
}
|
||||
|
||||
; ============================================================================ ;
|
||||
; Scalable cases
|
||||
; ============================================================================ ;
|
||||
|
||||
; Scalable extractions should not be canonicalized. This will be lowered to the
|
||||
; EXTRACT_SUBVECTOR ISD node later.
|
||||
define <4 x i32> @scalable_extract(<vscale x 4 x i32> %vec) {
|
||||
; CHECK-LABEL: @scalable_extract(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> [[VEC:%.*]], i64 0)
|
||||
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
|
||||
;
|
||||
%1 = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> %vec, i64 0)
|
||||
ret <4 x i32> %1
|
||||
}
|
|
@ -0,0 +1,147 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt < %s -instcombine -S | FileCheck %s
|
||||
|
||||
; llvm.experimental.vector.insert canonicalizes to shufflevector in the fixed case. In the
|
||||
; scalable case, we lower to the INSERT_SUBVECTOR ISD node.
|
||||
|
||||
declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 %idx)
|
||||
declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 %idx)
|
||||
declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 %idx)
|
||||
declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 %idx)
|
||||
declare <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 %idx)
|
||||
|
||||
; ============================================================================ ;
|
||||
; Trivial cases
|
||||
; ============================================================================ ;
|
||||
|
||||
; An insert that entirely overwrites an <n x ty> with another <n x ty> is a
|
||||
; nop.
|
||||
define <8 x i32> @trivial_nop(<8 x i32> %vec, <8 x i32> %subvec) {
|
||||
; CHECK-LABEL: @trivial_nop(
|
||||
; CHECK-NEXT: ret <8 x i32> [[SUBVEC:%.*]]
|
||||
;
|
||||
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 0)
|
||||
ret <8 x i32> %1
|
||||
}
|
||||
|
||||
; ============================================================================ ;
|
||||
; Valid canonicalizations
|
||||
; ============================================================================ ;
|
||||
|
||||
define <8 x i32> @valid_insertion_a(<8 x i32> %vec, <2 x i32> %subvec) {
|
||||
; CHECK-LABEL: @valid_insertion_a(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
|
||||
;
|
||||
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 0)
|
||||
ret <8 x i32> %1
|
||||
}
|
||||
|
||||
define <8 x i32> @valid_insertion_b(<8 x i32> %vec, <2 x i32> %subvec) {
|
||||
; CHECK-LABEL: @valid_insertion_b(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
|
||||
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
|
||||
;
|
||||
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 2)
|
||||
ret <8 x i32> %1
|
||||
}
|
||||
|
||||
define <8 x i32> @valid_insertion_c(<8 x i32> %vec, <2 x i32> %subvec) {
|
||||
; CHECK-LABEL: @valid_insertion_c(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
|
||||
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
|
||||
;
|
||||
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 4)
|
||||
ret <8 x i32> %1
|
||||
}
|
||||
|
||||
define <8 x i32> @valid_insertion_d(<8 x i32> %vec, <2 x i32> %subvec) {
|
||||
; CHECK-LABEL: @valid_insertion_d(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[SUBVEC:%.*]], <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
|
||||
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
|
||||
;
|
||||
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v2i32(<8 x i32> %vec, <2 x i32> %subvec, i64 6)
|
||||
ret <8 x i32> %1
|
||||
}
|
||||
|
||||
define <8 x i32> @valid_insertion_e(<8 x i32> %vec, <4 x i32> %subvec) {
|
||||
; CHECK-LABEL: @valid_insertion_e(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC:%.*]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
|
||||
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
|
||||
;
|
||||
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 0)
|
||||
ret <8 x i32> %1
|
||||
}
|
||||
|
||||
define <8 x i32> @valid_insertion_f(<8 x i32> %vec, <4 x i32> %subvec) {
|
||||
; CHECK-LABEL: @valid_insertion_f(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SUBVEC:%.*]], <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
|
||||
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
|
||||
;
|
||||
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 4)
|
||||
ret <8 x i32> %1
|
||||
}
|
||||
|
||||
define <8 x i32> @valid_insertion_g(<8 x i32> %vec, <3 x i32> %subvec) {
|
||||
; CHECK-LABEL: @valid_insertion_g(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[SUBVEC:%.*]], <3 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[VEC:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
|
||||
;
|
||||
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 0)
|
||||
ret <8 x i32> %1
|
||||
}
|
||||
|
||||
define <8 x i32> @valid_insertion_h(<8 x i32> %vec, <3 x i32> %subvec) {
|
||||
; CHECK-LABEL: @valid_insertion_h(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[SUBVEC:%.*]], <3 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[VEC:%.*]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 9, i32 10, i32 6, i32 7>
|
||||
; CHECK-NEXT: ret <8 x i32> [[TMP2]]
|
||||
;
|
||||
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v3i32(<8 x i32> %vec, <3 x i32> %subvec, i64 3)
|
||||
ret <8 x i32> %1
|
||||
}
|
||||
|
||||
; ============================================================================ ;
|
||||
; Invalid canonicalizations
|
||||
; ============================================================================ ;
|
||||
|
||||
; Idx must be the be a constant multiple of the subvector's minimum vector
|
||||
; length, otherwise the result is undefined.
|
||||
define <8 x i32> @idx_not_constant_multiple(<8 x i32> %vec, <4 x i32> %subvec) {
|
||||
; CHECK-LABEL: @idx_not_constant_multiple(
|
||||
; CHECK-NEXT: ret <8 x i32> undef
|
||||
;
|
||||
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %vec, <4 x i32> %subvec, i64 2)
|
||||
ret <8 x i32> %1
|
||||
}
|
||||
|
||||
; If the insertion overruns the vector, the result is undefined.
|
||||
define <8 x i32> @insert_overrun(<8 x i32> %vec, <8 x i32> %subvec) {
|
||||
; CHECK-LABEL: @insert_overrun(
|
||||
; CHECK-NEXT: ret <8 x i32> undef
|
||||
;
|
||||
%1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v8i32(<8 x i32> %vec, <8 x i32> %subvec, i64 4)
|
||||
ret <8 x i32> %1
|
||||
}
|
||||
|
||||
; ============================================================================ ;
|
||||
; Scalable cases
|
||||
; ============================================================================ ;
|
||||
|
||||
; Scalable insertions should not be canonicalized. This will be lowered to the
|
||||
; INSERT_SUBVECTOR ISD node later.
|
||||
define <vscale x 4 x i32> @scalable_insert(<vscale x 4 x i32> %vec, <4 x i32> %subvec) {
|
||||
; CHECK-LABEL: @scalable_insert(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> [[VEC:%.*]], <4 x i32> [[SUBVEC:%.*]], i64 0)
|
||||
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
|
||||
;
|
||||
%1 = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %vec, <4 x i32> %subvec, i64 0)
|
||||
ret <vscale x 4 x i32> %1
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
; RUN: not opt -verify -S < %s 2>&1 >/dev/null | FileCheck %s
|
||||
|
||||
; CHECK: experimental_vector_extract result must have the same element type as the input vector.
|
||||
define <16 x i16> @invalid_mismatched_element_types(<vscale x 16 x i8> %vec) nounwind {
|
||||
%retval = call <16 x i16> @llvm.experimental.vector.extract.v16i16.nxv16i8(<vscale x 16 x i8> %vec, i64 0)
|
||||
ret <16 x i16> %retval
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.experimental.vector.extract.v16i16.nxv16i8(<vscale x 16 x i8>, i64)
|
|
@ -0,0 +1,9 @@
|
|||
; RUN: not opt -verify -S < %s 2>&1 >/dev/null | FileCheck %s
|
||||
|
||||
; CHECK: experimental_vector_insert parameters must have the same element type.
|
||||
define <vscale x 16 x i8> @invalid_mismatched_element_types(<vscale x 16 x i8> %vec, <4 x i16> %subvec) nounwind {
|
||||
%retval = call <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v4i16(<vscale x 16 x i8> %vec, <4 x i16> %subvec, i64 0)
|
||||
ret <vscale x 16 x i8> %retval
|
||||
}
|
||||
|
||||
declare <vscale x 16 x i8> @llvm.experimental.vector.insert.nxv16i8.v4i16(<vscale x 16 x i8>, <4 x i16>, i64)
|
Loading…
Reference in New Issue