forked from OSchip/llvm-project
[AArch64][SVE] Implement vector tuple intrinsics
Summary: This patch adds the following intrinsics for creating two-tuple, three-tuple and four-tuple scalable vectors: * llvm.aarch64.sve.tuple.create2 * llvm.aarch64.sve.tuple.create3 * llvm.aarch64.sve.tuple.create4 As well as: * llvm.aarch64.sve.tuple.get * llvm.aarch64.sve.tuple.set For extracting and inserting scalable vectors from vector tuples. These intrinsics are intended to be used by the ACLE functions svcreate<n>, svget and svset. This patch also includes calling convention support for passing and returning tuples of scalable vectors to/from functions. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D75674
This commit is contained in:
parent
94b0c32a0b
commit
3ebbe35363
|
@ -789,6 +789,31 @@ def llvm_nxv2f64_ty : LLVMType<nxv2f64>;
|
|||
|
||||
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
|
||||
|
||||
class AdvSIMD_SVE_Create_2Vector_Tuple
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[llvm_anyvector_ty, LLVMMatchType<1>],
|
||||
[IntrReadMem]>;
|
||||
|
||||
class AdvSIMD_SVE_Create_3Vector_Tuple
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>],
|
||||
[IntrReadMem]>;
|
||||
|
||||
class AdvSIMD_SVE_Create_4Vector_Tuple
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
|
||||
LLVMMatchType<1>],
|
||||
[IntrReadMem]>;
|
||||
|
||||
class AdvSIMD_SVE_Set_Vector_Tuple
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty],
|
||||
[IntrReadMem, ImmArg<ArgIndex<1>>]>;
|
||||
|
||||
class AdvSIMD_SVE_Get_Vector_Tuple
|
||||
: Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty],
|
||||
[IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
|
||||
|
||||
class AdvSIMD_1Vec_PredLoad_Intrinsic
|
||||
: Intrinsic<[llvm_anyvector_ty],
|
||||
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
|
||||
|
@ -1300,6 +1325,21 @@ class SVE_MatMul_Intrinsic
|
|||
[LLVMMatchType<0>, LLVMSubdivide4VectorType<0>, LLVMSubdivide4VectorType<0>],
|
||||
[IntrNoMem]>;
|
||||
|
||||
//
|
||||
// Vector tuple creation intrinsics (ACLE)
|
||||
//
|
||||
|
||||
def int_aarch64_sve_tuple_create2 : AdvSIMD_SVE_Create_2Vector_Tuple;
|
||||
def int_aarch64_sve_tuple_create3 : AdvSIMD_SVE_Create_3Vector_Tuple;
|
||||
def int_aarch64_sve_tuple_create4 : AdvSIMD_SVE_Create_4Vector_Tuple;
|
||||
|
||||
//
|
||||
// Vector tuple insertion/extraction intrinsics (ACLE)
|
||||
//
|
||||
|
||||
def int_aarch64_sve_tuple_get : AdvSIMD_SVE_Get_Vector_Tuple;
|
||||
def int_aarch64_sve_tuple_set : AdvSIMD_SVE_Set_Vector_Tuple;
|
||||
|
||||
//
|
||||
// Loads
|
||||
//
|
||||
|
|
|
@ -13657,6 +13657,73 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
|
|||
/*OnlyPackedOffsets=*/false);
|
||||
case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
|
||||
return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM);
|
||||
case Intrinsic::aarch64_sve_tuple_get: {
|
||||
SDLoc DL(N);
|
||||
SDValue Chain = N->getOperand(0);
|
||||
SDValue Src1 = N->getOperand(2);
|
||||
SDValue Idx = N->getOperand(3);
|
||||
|
||||
uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
|
||||
if (IdxConst > Src1->getNumOperands() - 1)
|
||||
report_fatal_error("index larger than expected");
|
||||
|
||||
EVT ResVT = N->getValueType(0);
|
||||
uint64_t NumLanes = ResVT.getVectorElementCount().Min;
|
||||
SDValue Val =
|
||||
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1,
|
||||
DAG.getConstant(IdxConst * NumLanes, DL, MVT::i32));
|
||||
return DAG.getMergeValues({Val, Chain}, DL);
|
||||
}
|
||||
case Intrinsic::aarch64_sve_tuple_set: {
|
||||
SDLoc DL(N);
|
||||
SDValue Chain = N->getOperand(0);
|
||||
SDValue Tuple = N->getOperand(2);
|
||||
SDValue Idx = N->getOperand(3);
|
||||
SDValue Vec = N->getOperand(4);
|
||||
|
||||
EVT TupleVT = Tuple.getValueType();
|
||||
uint64_t TupleLanes = TupleVT.getVectorElementCount().Min;
|
||||
|
||||
uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
|
||||
uint64_t NumLanes = Vec.getValueType().getVectorElementCount().Min;
|
||||
|
||||
if ((TupleLanes % NumLanes) != 0)
|
||||
report_fatal_error("invalid tuple vector!");
|
||||
|
||||
uint64_t NumVecs = TupleLanes / NumLanes;
|
||||
|
||||
SmallVector<SDValue, 4> Opnds;
|
||||
for (unsigned I = 0; I < NumVecs; ++I) {
|
||||
if (I == IdxConst)
|
||||
Opnds.push_back(Vec);
|
||||
else {
|
||||
Opnds.push_back(
|
||||
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, Vec.getValueType(), Tuple,
|
||||
DAG.getConstant(I * NumLanes, DL, MVT::i32)));
|
||||
}
|
||||
}
|
||||
SDValue Concat =
|
||||
DAG.getNode(ISD::CONCAT_VECTORS, DL, Tuple.getValueType(), Opnds);
|
||||
return DAG.getMergeValues({Concat, Chain}, DL);
|
||||
}
|
||||
case Intrinsic::aarch64_sve_tuple_create2:
|
||||
case Intrinsic::aarch64_sve_tuple_create3:
|
||||
case Intrinsic::aarch64_sve_tuple_create4: {
|
||||
SDLoc DL(N);
|
||||
SDValue Chain = N->getOperand(0);
|
||||
|
||||
SmallVector<SDValue, 4> Opnds;
|
||||
for (unsigned I = 2; I < N->getNumOperands(); ++I)
|
||||
Opnds.push_back(N->getOperand(I));
|
||||
|
||||
EVT VT = Opnds[0].getValueType();
|
||||
EVT EltVT = VT.getVectorElementType();
|
||||
EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
|
||||
VT.getVectorElementCount() *
|
||||
(N->getNumOperands() - 2));
|
||||
SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, DestVT, Opnds);
|
||||
return DAG.getMergeValues({Concat, Chain}, DL);
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,499 @@
|
|||
; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; svint8x2_t
|
||||
;
|
||||
|
||||
define <vscale x 32 x i8> @ret_svint8x2_t(<vscale x 16 x i8> %unused_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2) #0 {
|
||||
; CHECK-LABEL: ret_svint8x2_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2)
|
||||
ret <vscale x 32 x i8> %tuple
|
||||
}
|
||||
|
||||
define void @call_svint8x2_t(<vscale x 16 x i8> %dummy_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %dummy_z2, <vscale x 16 x i8> %z3) #0 {
|
||||
; CHECK-LABEL: call_svint8x2_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z3.d
|
||||
; CHECK-NEXT: bl callee_svint8x2_t
|
||||
%tuple = tail call <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z3)
|
||||
call void @callee_svint8x2_t(<vscale x 32 x i8> %tuple)
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; svint16x2_t
|
||||
;
|
||||
|
||||
define <vscale x 16 x i16> @ret_svint16x2_t(<vscale x 8 x i16> %unused_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2) #0 {
|
||||
; CHECK-LABEL: ret_svint16x2_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2)
|
||||
ret <vscale x 16 x i16> %tuple
|
||||
}
|
||||
|
||||
define void @call_svint16x2_t(<vscale x 8 x i16> %dummy_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %dummy_z2, <vscale x 8 x i16> %z3) #0 {
|
||||
; CHECK-LABEL: call_svint16x2_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z3.d
|
||||
; CHECK-NEXT: bl callee_svint16x2_t
|
||||
%tuple = tail call <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z3)
|
||||
call void @callee_svint16x2_t(<vscale x 16 x i16> %tuple)
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; svint32x2_t
|
||||
;
|
||||
|
||||
define <vscale x 8 x i32> @ret_svint32x2_t(<vscale x 4 x i32> %unused_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) #0 {
|
||||
; CHECK-LABEL: ret_svint32x2_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
|
||||
ret <vscale x 8 x i32> %tuple
|
||||
}
|
||||
|
||||
define void @call_svint32x2_t(<vscale x 4 x i32> %dummy_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %dummy_z2, <vscale x 4 x i32> %z3) #0 {
|
||||
; CHECK-LABEL: call_svint32x2_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z3.d
|
||||
; CHECK-NEXT: bl callee_svint32x2_t
|
||||
%tuple = tail call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z3)
|
||||
call void @callee_svint32x2_t(<vscale x 8 x i32> %tuple)
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; svint64x2_t
|
||||
;
|
||||
|
||||
define <vscale x 4 x i64> @ret_svint64x2_t(<vscale x 2 x i64> %unused_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2) #0 {
|
||||
; CHECK-LABEL: ret_svint64x2_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2)
|
||||
ret <vscale x 4 x i64> %tuple
|
||||
}
|
||||
|
||||
define void @call_svint64x2_t(<vscale x 2 x i64> %dummy_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %dummy_z2, <vscale x 2 x i64> %z3) #0 {
|
||||
; CHECK-LABEL: call_svint64x2_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z3.d
|
||||
; CHECK-NEXT: bl callee_svint64x2_t
|
||||
%tuple = tail call <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z3)
|
||||
call void @callee_svint64x2_t(<vscale x 4 x i64> %tuple)
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; svfloatx2_t
|
||||
;
|
||||
|
||||
define <vscale x 8 x float> @ret_svfloatx2_t(<vscale x 4 x float> %unused_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2) #0 {
|
||||
; CHECK-LABEL: ret_svfloatx2_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 8 x float> @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2)
|
||||
ret <vscale x 8 x float> %tuple
|
||||
}
|
||||
|
||||
define void @call_svfloatx2_t(<vscale x 4 x float> %dummy_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %dummy_z2, <vscale x 4 x float> %z3) #0 {
|
||||
; CHECK-LABEL: call_svfloatx2_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z3.d
|
||||
; CHECK-NEXT: bl callee_svfloatx2_t
|
||||
%tuple = tail call <vscale x 8 x float> @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z3)
|
||||
call void @callee_svfloatx2_t(<vscale x 8 x float> %tuple)
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; svdoublex2_t
|
||||
;
|
||||
|
||||
define <vscale x 4 x double> @ret_svdoublex2_t(<vscale x 2 x double> %unused_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2) #0 {
|
||||
; CHECK-LABEL: ret_svdoublex2_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 4 x double> @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2)
|
||||
ret <vscale x 4 x double> %tuple
|
||||
}
|
||||
|
||||
define void @call_svdoublex2_t(<vscale x 2 x double> %dummy_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %dummy_z2, <vscale x 2 x double> %z3) #0 {
|
||||
; CHECK-LABEL: call_svdoublex2_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z3.d
|
||||
; CHECK-NEXT: bl callee_svdoublex2_t
|
||||
%tuple = tail call <vscale x 4 x double> @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z3)
|
||||
call void @callee_svdoublex2_t(<vscale x 4 x double> %tuple)
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; svint8x3_t
|
||||
;
|
||||
|
||||
define <vscale x 48 x i8> @ret_svint8x3_t(<vscale x 16 x i8> %unused_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3) #0 {
|
||||
; CHECK-LABEL: ret_svint8x3_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z3.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 48 x i8> @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3)
|
||||
ret <vscale x 48 x i8> %tuple
|
||||
}
|
||||
|
||||
define void @call_svint8x3_t(<vscale x 16 x i8> %dummy_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %dummy_z3, <vscale x 16 x i8> %z4) #0 {
|
||||
; CHECK-LABEL: call_svint8x3_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z4.d
|
||||
; CHECK-NEXT: bl callee_svint8x3_t
|
||||
%tuple = tail call <vscale x 48 x i8> @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z4)
|
||||
call void @callee_svint8x3_t(<vscale x 48 x i8> %tuple)
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; svint16x3_t
|
||||
;
|
||||
|
||||
define <vscale x 24 x i16> @ret_svint16x3_t(<vscale x 8 x i16> %unused_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3) #0 {
|
||||
; CHECK-LABEL: ret_svint16x3_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z3.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3)
|
||||
ret <vscale x 24 x i16> %tuple
|
||||
}
|
||||
|
||||
define void @call_svint16x3_t(<vscale x 8 x i16> %dummy_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %dummy_z3, <vscale x 8 x i16> %z4) #0 {
|
||||
; CHECK-LABEL: call_svint16x3_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z4.d
|
||||
; CHECK-NEXT: bl callee_svint16x3_t
|
||||
%tuple = tail call <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z4)
|
||||
call void @callee_svint16x3_t(<vscale x 24 x i16> %tuple)
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; svint32x3_t
|
||||
;
|
||||
|
||||
define <vscale x 12 x i32> @ret_svint32x3_t(<vscale x 4 x i32> %unused_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) #0 {
|
||||
; CHECK-LABEL: ret_svint32x3_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z3.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
|
||||
ret <vscale x 12 x i32> %tuple
|
||||
}
|
||||
|
||||
define void @call_svint32x3_t(<vscale x 4 x i32> %dummy_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %dummy_z3, <vscale x 4 x i32> %z4) #0 {
|
||||
; CHECK-LABEL: call_svint32x3_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z4.d
|
||||
; CHECK-NEXT: bl callee_svint32x3_t
|
||||
%tuple = tail call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z4)
|
||||
call void @callee_svint32x3_t(<vscale x 12 x i32> %tuple)
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; svint64x3_t
|
||||
;
|
||||
|
||||
define <vscale x 6 x i64> @ret_svint64x3_t(<vscale x 2 x i64> %unused_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3) #0 {
|
||||
; CHECK-LABEL: ret_svint64x3_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z3.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3)
|
||||
ret <vscale x 6 x i64> %tuple
|
||||
}
|
||||
|
||||
define void @call_svint64x3_t(<vscale x 2 x i64> %dummy_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %dummy_z3, <vscale x 2 x i64> %z4) #0 {
|
||||
; CHECK-LABEL: call_svint64x3_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z4.d
|
||||
; CHECK-NEXT: bl callee_svint64x3_t
|
||||
%tuple = tail call <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z4)
|
||||
call void @callee_svint64x3_t(<vscale x 6 x i64> %tuple)
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; svfloatx3_t
|
||||
;
|
||||
|
||||
define <vscale x 12 x float> @ret_svfloatx3_t(<vscale x 4 x float> %unused_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3) #0 {
|
||||
; CHECK-LABEL: ret_svfloatx3_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z3.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3)
|
||||
ret <vscale x 12 x float> %tuple
|
||||
}
|
||||
|
||||
define void @call_svfloatx3_t(<vscale x 4 x float> %dummy_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %dummy_z3, <vscale x 4 x float> %z4) #0 {
|
||||
; CHECK-LABEL: call_svfloatx3_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z4.d
|
||||
; CHECK-NEXT: bl callee_svfloatx3_t
|
||||
%tuple = tail call <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z4)
|
||||
call void @callee_svfloatx3_t(<vscale x 12 x float> %tuple)
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; svdoublex3_t
|
||||
;
|
||||
|
||||
define <vscale x 6 x double> @ret_svdoublex3_t(<vscale x 2 x double> %unused_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3) #0 {
|
||||
; CHECK-LABEL: ret_svdoublex3_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z3.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 6 x double> @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3)
|
||||
ret <vscale x 6 x double> %tuple
|
||||
}
|
||||
|
||||
define void @call_svdoublex3_t(<vscale x 2 x double> %dummy_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %dummy_z3, <vscale x 2 x double> %z4) #0 {
|
||||
; CHECK-LABEL: call_svdoublex3_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z4.d
|
||||
; CHECK-NEXT: bl callee_svdoublex3_t
|
||||
%tuple = tail call <vscale x 6 x double> @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z4)
|
||||
call void @callee_svdoublex3_t(<vscale x 6 x double> %tuple)
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; svint8x4_t
|
||||
;
|
||||
|
||||
define <vscale x 64 x i8> @ret_svint8x4_t(<vscale x 16 x i8> %unused_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3, <vscale x 16 x i8> %z4) #0 {
|
||||
; CHECK-LABEL: ret_svint8x4_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z3.d
|
||||
; CHECK-NEXT: mov z3.d, z4.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3, <vscale x 16 x i8> %z4)
|
||||
ret <vscale x 64 x i8> %tuple
|
||||
}
|
||||
|
||||
define void @call_svint8x4_t(<vscale x 16 x i8> %dummy_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %dummy_z3, <vscale x 16 x i8> %z4, <vscale x 16 x i8> %z5) #0 {
|
||||
; CHECK-LABEL: call_svint8x4_t
|
||||
; CHECK: mov z3.d, z5.d
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z4.d
|
||||
; CHECK-NEXT: bl callee_svint8x4_t
|
||||
%tuple = tail call <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z4, <vscale x 16 x i8> %z5)
|
||||
call void @callee_svint8x4_t(<vscale x 64 x i8> %tuple)
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; svint16x4_t
|
||||
;
|
||||
|
||||
define <vscale x 32 x i16> @ret_svint16x4_t(<vscale x 8 x i16> %unused_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3, <vscale x 8 x i16> %z4) #0 {
|
||||
; CHECK-LABEL: ret_svint16x4_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z3.d
|
||||
; CHECK-NEXT: mov z3.d, z4.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3, <vscale x 8 x i16> %z4)
|
||||
ret <vscale x 32 x i16> %tuple
|
||||
}
|
||||
|
||||
define void @call_svint16x4_t(<vscale x 8 x i16> %dummy_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %dummy_z3, <vscale x 8 x i16> %z4, <vscale x 8 x i16> %z5) #0 {
|
||||
; CHECK-LABEL: call_svint16x4_t
|
||||
; CHECK: mov z3.d, z5.d
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z4.d
|
||||
; CHECK-NEXT: bl callee_svint16x4_t
|
||||
%tuple = tail call <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z4, <vscale x 8 x i16> %z5)
|
||||
call void @callee_svint16x4_t(<vscale x 32 x i16> %tuple)
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; svint32x4_t
|
||||
;
|
||||
|
||||
define <vscale x 16 x i32> @ret_svint32x4_t(<vscale x 4 x i32> %unused_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4) #0 {
|
||||
; CHECK-LABEL: ret_svint32x4_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z3.d
|
||||
; CHECK-NEXT: mov z3.d, z4.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4)
|
||||
ret <vscale x 16 x i32> %tuple
|
||||
}
|
||||
|
||||
define void @call_svint32x4_t(<vscale x 4 x i32> %dummy_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %dummy_z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
|
||||
; CHECK-LABEL: call_svint32x4_t
|
||||
; CHECK: mov z3.d, z5.d
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z4.d
|
||||
; CHECK-NEXT: bl callee_svint32x4_t
|
||||
%tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5)
|
||||
call void @callee_svint32x4_t(<vscale x 16 x i32> %tuple)
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; svint64x4_t
|
||||
;
|
||||
|
||||
define <vscale x 8 x i64> @ret_svint64x4_t(<vscale x 2 x i64> %unused_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3, <vscale x 2 x i64> %z4) #0 {
|
||||
; CHECK-LABEL: ret_svint64x4_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z3.d
|
||||
; CHECK-NEXT: mov z3.d, z4.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3, <vscale x 2 x i64> %z4)
|
||||
ret <vscale x 8 x i64> %tuple
|
||||
}
|
||||
|
||||
define void @call_svint64x4_t(<vscale x 2 x i64> %dummy_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %dummy_z3, <vscale x 2 x i64> %z4, <vscale x 2 x i64> %z5) #0 {
|
||||
; CHECK-LABEL: call_svint64x4_t
|
||||
; CHECK: mov z3.d, z5.d
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z4.d
|
||||
; CHECK-NEXT: bl callee_svint64x4_t
|
||||
%tuple = tail call <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z4, <vscale x 2 x i64> %z5)
|
||||
call void @callee_svint64x4_t(<vscale x 8 x i64> %tuple)
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; svfloatx4_t
|
||||
;
|
||||
|
||||
define <vscale x 16 x float> @ret_svfloatx4_t(<vscale x 4 x float> %unused_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3, <vscale x 4 x float> %z4) #0 {
|
||||
; CHECK-LABEL: ret_svfloatx4_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z3.d
|
||||
; CHECK-NEXT: mov z3.d, z4.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3, <vscale x 4 x float> %z4)
|
||||
ret <vscale x 16 x float> %tuple
|
||||
}
|
||||
|
||||
define void @call_svfloatx4_t(<vscale x 4 x float> %dummy_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %dummy_z3, <vscale x 4 x float> %z4, <vscale x 4 x float> %z5) #0 {
|
||||
; CHECK-LABEL: call_svfloatx4_t
|
||||
; CHECK: mov z3.d, z5.d
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z4.d
|
||||
; CHECK-NEXT: bl callee_svfloatx4_t
|
||||
%tuple = tail call <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z4, <vscale x 4 x float> %z5)
|
||||
call void @callee_svfloatx4_t(<vscale x 16 x float> %tuple)
|
||||
ret void
|
||||
}
|
||||
|
||||
;
|
||||
; svdoublex4_t
|
||||
;
|
||||
|
||||
define <vscale x 8 x double> @ret_svdoublex4_t(<vscale x 2 x double> %unused_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3, <vscale x 2 x double> %z4) #0 {
|
||||
; CHECK-LABEL: ret_svdoublex4_t
|
||||
; CHECK: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z3.d
|
||||
; CHECK-NEXT: mov z3.d, z4.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 8 x double> @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3, <vscale x 2 x double> %z4)
|
||||
ret <vscale x 8 x double> %tuple
|
||||
}
|
||||
|
||||
define void @call_svdoublex4_t(<vscale x 2 x double> %dummy_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %dummy_z3, <vscale x 2 x double> %z4, <vscale x 2 x double> %z5) #0 {
|
||||
; CHECK-LABEL: call_svdoublex4_t
|
||||
; CHECK: mov z3.d, z5.d
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: mov z2.d, z4.d
|
||||
; CHECK-NEXT: bl callee_svdoublex4_t
|
||||
%tuple = tail call <vscale x 8 x double> @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z4, <vscale x 2 x double> %z5)
|
||||
call void @callee_svdoublex4_t(<vscale x 8 x double> %tuple)
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "target-features"="+sve" }
|
||||
|
||||
declare void @callee_svint8x2_t(<vscale x 32 x i8>)
|
||||
declare void @callee_svint16x2_t(<vscale x 16 x i16>)
|
||||
declare void @callee_svint32x2_t(<vscale x 8 x i32>)
|
||||
declare void @callee_svint64x2_t(<vscale x 4 x i64>)
|
||||
declare void @callee_svfloatx2_t(<vscale x 8 x float>)
|
||||
declare void @callee_svdoublex2_t(<vscale x 4 x double>)
|
||||
|
||||
declare void @callee_svint8x3_t(<vscale x 48 x i8>)
|
||||
declare void @callee_svint16x3_t(<vscale x 24 x i16>)
|
||||
declare void @callee_svint32x3_t(<vscale x 12 x i32>)
|
||||
declare void @callee_svint64x3_t(<vscale x 6 x i64>)
|
||||
declare void @callee_svfloatx3_t(<vscale x 12 x float>)
|
||||
declare void @callee_svdoublex3_t(<vscale x 6 x double>)
|
||||
|
||||
declare void @callee_svint8x4_t(<vscale x 64 x i8>)
|
||||
declare void @callee_svint16x4_t(<vscale x 32 x i16>)
|
||||
declare void @callee_svint32x4_t(<vscale x 16 x i32>)
|
||||
declare void @callee_svint64x4_t(<vscale x 8 x i64>)
|
||||
declare void @callee_svfloatx4_t(<vscale x 16 x float>)
|
||||
declare void @callee_svdoublex4_t(<vscale x 8 x double>)
|
||||
|
||||
|
||||
; x2
|
||||
declare <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
|
||||
declare <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
|
||||
declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
|
||||
declare <vscale x 8 x float> @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 4 x double> @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
; x3
|
||||
declare <vscale x 48 x i8> @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
|
||||
declare <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
|
||||
declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||
declare <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
|
||||
declare <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 6 x double> @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
|
||||
; x4
|
||||
declare <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
|
||||
declare <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
|
||||
declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||
declare <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
|
||||
declare <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 8 x double> @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
|
|
@ -0,0 +1,706 @@
|
|||
; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=1 < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; SVCREATE2 (i8)
|
||||
;
|
||||
|
||||
define <vscale x 16 x i8> @test_svcreate2_s8_vec0(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate2_s8_vec0:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 16 x i8> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8(<vscale x 32 x i8> %tuple, i32 0)
|
||||
ret <vscale x 16 x i8> %extract
|
||||
}
|
||||
|
||||
define <vscale x 16 x i8> @test_svcreate2_s8_vec1(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate2_s8_vec1:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 16 x i8> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8(<vscale x 32 x i8> %tuple, i32 1)
|
||||
ret <vscale x 16 x i8> %extract
|
||||
}
|
||||
|
||||
;
|
||||
; SVCREATE2 (i16)
|
||||
;
|
||||
|
||||
define <vscale x 8 x i16> @test_svcreate2_s16_vec0(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate2_s16_vec0:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 8 x i16> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16(<vscale x 16 x i16> %tuple, i32 0)
|
||||
ret <vscale x 8 x i16> %extract
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @test_svcreate2_s16_vec1(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate2_s16_vec1:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 8 x i16> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16(<vscale x 16 x i16> %tuple, i32 1)
|
||||
ret <vscale x 8 x i16> %extract
|
||||
}
|
||||
|
||||
;
|
||||
; SVCREATE2 (half)
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @test_svcreate2_f16_vec0(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate2_f16_vec0:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 16 x half> @llvm.aarch64.sve.tuple.create2.nxv16f16.nxv8f16(<vscale x 8 x half> %z0, <vscale x 8 x half> %z1)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 8 x half> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16(<vscale x 16 x half> %tuple, i32 0)
|
||||
ret <vscale x 8 x half> %extract
|
||||
}
|
||||
|
||||
define <vscale x 8 x half> @test_svcreate2_f16_vec1(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate2_f16_vec1:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 16 x half> @llvm.aarch64.sve.tuple.create2.nxv16f16.nxv8f16(<vscale x 8 x half> %z0, <vscale x 8 x half> %z1)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 8 x half> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16(<vscale x 16 x half> %tuple, i32 1)
|
||||
ret <vscale x 8 x half> %extract
|
||||
}
|
||||
|
||||
;
|
||||
; SVCREATE2 (i32)
|
||||
;
|
||||
|
||||
define <vscale x 4 x i32> @test_svcreate2_s32_vec0(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate2_s32_vec0:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 4 x i32> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32(<vscale x 8 x i32> %tuple, i32 0)
|
||||
ret <vscale x 4 x i32> %extract
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @test_svcreate2_s32_vec1(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate2_s32_vec1:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 4 x i32> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32(<vscale x 8 x i32> %tuple, i32 1)
|
||||
ret <vscale x 4 x i32> %extract
|
||||
}
|
||||
|
||||
;
|
||||
; SVCREATE2 (float)
|
||||
;
|
||||
|
||||
define <vscale x 4 x float> @test_svcreate2_f32_vec0(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate2_f32_vec0:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 8 x float> @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float> %z0, <vscale x 4 x float> %z1)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 4 x float> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32(<vscale x 8 x float> %tuple, i32 0)
|
||||
ret <vscale x 4 x float> %extract
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @test_svcreate2_f32_vec1(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate2_f32_vec1:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 8 x float> @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float> %z0, <vscale x 4 x float> %z1)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 4 x float> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32(<vscale x 8 x float> %tuple, i32 1)
|
||||
ret <vscale x 4 x float> %extract
|
||||
}
|
||||
|
||||
;
|
||||
; SVCREATE2 (i64)
|
||||
;
|
||||
|
||||
define <vscale x 2 x i64> @test_svcreate2_s64_vec0(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate2_s64_vec0:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 2 x i64> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64(<vscale x 4 x i64> %tuple, i32 0)
|
||||
ret <vscale x 2 x i64> %extract
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @test_svcreate2_s64_vec1(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate2_s64_vec1:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 2 x i64> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64(<vscale x 4 x i64> %tuple, i32 1)
|
||||
ret <vscale x 2 x i64> %extract
|
||||
}
|
||||
|
||||
;
|
||||
; SVCREATE2 (double)
|
||||
;
|
||||
|
||||
define <vscale x 2 x double> @test_svcreate2_f64_vec0(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate2_f64_vec0:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 4 x double> @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double> %z0, <vscale x 2 x double> %z1)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 2 x double> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64(<vscale x 4 x double> %tuple, i32 0)
|
||||
ret <vscale x 2 x double> %extract
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @test_svcreate2_f64_vec1(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate2_f64_vec1:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 4 x double> @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double> %z0, <vscale x 2 x double> %z1)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 2 x double> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64(<vscale x 4 x double> %tuple, i32 1)
|
||||
ret <vscale x 2 x double> %extract
|
||||
}
|
||||
|
||||
;
|
||||
; SVCREATE3 (i8)
|
||||
;
|
||||
|
||||
define <vscale x 16 x i8> @test_svcreate3_s8_vec0(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate3_s8_vec0:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 48 x i8> @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 16 x i8> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8(<vscale x 48 x i8> %tuple, i32 0)
|
||||
ret <vscale x 16 x i8> %extract
|
||||
}
|
||||
|
||||
define <vscale x 16 x i8> @test_svcreate3_s8_vec2(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate3_s8_vec2:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: mov z0.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 48 x i8> @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 16 x i8> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8(<vscale x 48 x i8> %tuple, i32 2)
|
||||
ret <vscale x 16 x i8> %extract
|
||||
}
|
||||
|
||||
;
|
||||
; SVCREATE3 (i16)
|
||||
;
|
||||
|
||||
define <vscale x 8 x i16> @test_svcreate3_s16_vec0(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate3_s16_vec0:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 8 x i16> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(<vscale x 24 x i16> %tuple, i32 0)
|
||||
ret <vscale x 8 x i16> %extract
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @test_svcreate3_s16_vec2(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate3_s16_vec2:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: mov z0.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 8 x i16> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(<vscale x 24 x i16> %tuple, i32 2)
|
||||
ret <vscale x 8 x i16> %extract
|
||||
}
|
||||
;
|
||||
; SVCREATE3 (half)
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @test_svcreate3_f16_vec0(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate3_f16_vec0:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 24 x half> @llvm.aarch64.sve.tuple.create3.nxv24f16.nxv8f16(<vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 8 x half> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16(<vscale x 24 x half> %tuple, i32 0)
|
||||
ret <vscale x 8 x half> %extract
|
||||
}
|
||||
|
||||
define <vscale x 8 x half> @test_svcreate3_f16_vec2(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate3_f16_vec2:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: mov z0.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 24 x half> @llvm.aarch64.sve.tuple.create3.nxv24f16.nxv8f16(<vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 8 x half> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16(<vscale x 24 x half> %tuple, i32 2)
|
||||
ret <vscale x 8 x half> %extract
|
||||
}
|
||||
|
||||
|
||||
;
|
||||
; SVCREATE3 (i32)
|
||||
;
|
||||
|
||||
define <vscale x 4 x i32> @test_svcreate3_s32_vec0(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate3_s32_vec0:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 4 x i32> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32(<vscale x 12 x i32> %tuple, i32 0)
|
||||
ret <vscale x 4 x i32> %extract
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @test_svcreate3_s32_vec2(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate3_s32_vec2:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: mov z0.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 4 x i32> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32(<vscale x 12 x i32> %tuple, i32 2)
|
||||
ret <vscale x 4 x i32> %extract
|
||||
}
|
||||
|
||||
;
|
||||
; SVCREATE3 (float)
|
||||
;
|
||||
|
||||
define <vscale x 4 x float> @test_svcreate3_f32_vec0(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate3_f32_vec0:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 4 x float> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32(<vscale x 12 x float> %tuple, i32 0)
|
||||
ret <vscale x 4 x float> %extract
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @test_svcreate3_f32_vec2(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate3_f32_vec2:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: mov z0.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 4 x float> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32(<vscale x 12 x float> %tuple, i32 2)
|
||||
ret <vscale x 4 x float> %extract
|
||||
}
|
||||
|
||||
;
|
||||
; SVCREATE3 (i64)
|
||||
;
|
||||
|
||||
define <vscale x 2 x i64> @test_svcreate3_s64_vec0(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate3_s64_vec0:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 2 x i64> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(<vscale x 6 x i64> %tuple, i32 0)
|
||||
ret <vscale x 2 x i64> %extract
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @test_svcreate3_s64_vec2(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate3_s64_vec2:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: mov z0.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 2 x i64> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(<vscale x 6 x i64> %tuple, i32 2)
|
||||
ret <vscale x 2 x i64> %extract
|
||||
}
|
||||
|
||||
;
|
||||
; SVCREATE3 (double)
|
||||
;
|
||||
|
||||
define <vscale x 2 x double> @test_svcreate3_f64_vec0(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate3_f64_vec0:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 6 x double> @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 2 x double> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64(<vscale x 6 x double> %tuple, i32 0)
|
||||
ret <vscale x 2 x double> %extract
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @test_svcreate3_f64_vec2(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate3_f64_vec2:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: mov z0.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 6 x double> @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 2 x double> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64(<vscale x 6 x double> %tuple, i32 2)
|
||||
ret <vscale x 2 x double> %extract
|
||||
}
|
||||
|
||||
;
|
||||
; SVCREATE4 (i8)
|
||||
;
|
||||
|
||||
define <vscale x 16 x i8> @test_svcreate4_s8_vec0(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate4_s8_vec0:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 16 x i8> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %tuple, i32 0)
|
||||
ret <vscale x 16 x i8> %extract
|
||||
}
|
||||
|
||||
define <vscale x 16 x i8> @test_svcreate4_s8_vec3(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate4_s8_vec3:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: mov z0.d, z3.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 16 x i8> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %tuple, i32 3)
|
||||
ret <vscale x 16 x i8> %extract
|
||||
}
|
||||
|
||||
;
|
||||
; SVCREATE4 (i16)
|
||||
;
|
||||
|
||||
define <vscale x 8 x i16> @test_svcreate4_s16_vec0(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate4_s16_vec0:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 8 x i16> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %tuple, i32 0)
|
||||
ret <vscale x 8 x i16> %extract
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @test_svcreate4_s16_vec3(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate4_s16_vec3:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: mov z0.d, z3.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 8 x i16> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %tuple, i32 3)
|
||||
ret <vscale x 8 x i16> %extract
|
||||
}
|
||||
|
||||
;
|
||||
; SVCREATE4 (half)
|
||||
;
|
||||
|
||||
define <vscale x 8 x half> @test_svcreate4_f16_vec0(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2, <vscale x 8 x half> %z3) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate4_f16_vec0:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 32 x half> @llvm.aarch64.sve.tuple.create4.nxv32f16.nxv8f16(<vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2, <vscale x 8 x half> %z3)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 8 x half> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half> %tuple, i32 0)
|
||||
ret <vscale x 8 x half> %extract
|
||||
}
|
||||
|
||||
define <vscale x 8 x half> @test_svcreate4_f16_vec3(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2, <vscale x 8 x half> %z3) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate4_f16_vec3:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: mov z0.d, z3.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 32 x half> @llvm.aarch64.sve.tuple.create4.nxv32f16.nxv8f16(<vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2, <vscale x 8 x half> %z3)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 8 x half> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half> %tuple, i32 3)
|
||||
ret <vscale x 8 x half> %extract
|
||||
}
|
||||
|
||||
;
|
||||
; SVCREATE4 (i32)
|
||||
;
|
||||
|
||||
define <vscale x 4 x i32> @test_svcreate4_s32_vec0(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate4_s32_vec0:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 4 x i32> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %tuple, i32 0)
|
||||
ret <vscale x 4 x i32> %extract
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @test_svcreate4_s32_vec3(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate4_s32_vec3:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: mov z0.d, z3.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 4 x i32> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %tuple, i32 3)
|
||||
ret <vscale x 4 x i32> %extract
|
||||
}
|
||||
|
||||
;
|
||||
; SVCREATE4 (float)
|
||||
;
|
||||
|
||||
define <vscale x 4 x float> @test_svcreate4_f32_vec0(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate4_f32_vec0:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 4 x float> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(<vscale x 16 x float> %tuple, i32 0)
|
||||
ret <vscale x 4 x float> %extract
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @test_svcreate4_f32_vec3(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate4_f32_vec3:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: mov z0.d, z3.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 4 x float> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(<vscale x 16 x float> %tuple, i32 3)
|
||||
ret <vscale x 4 x float> %extract
|
||||
}
|
||||
|
||||
;
|
||||
; SVCREATE4 (i64)
|
||||
;
|
||||
|
||||
define <vscale x 2 x i64> @test_svcreate4_s64_vec0(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate4_s64_vec0:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 2 x i64> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %tuple, i32 0)
|
||||
ret <vscale x 2 x i64> %extract
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @test_svcreate4_s64_vec3(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate4_s64_vec3:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: mov z0.d, z3.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 2 x i64> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %tuple, i32 3)
|
||||
ret <vscale x 2 x i64> %extract
|
||||
}
|
||||
|
||||
;
|
||||
; SVCREATE4 (double)
|
||||
;
|
||||
|
||||
define <vscale x 2 x double> @test_svcreate4_f64_vec0(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate4_f64_vec0:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 8 x double> @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(<vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 2 x double> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(<vscale x 8 x double> %tuple, i32 0)
|
||||
ret <vscale x 2 x double> %extract
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @test_svcreate4_f64_vec3(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3) local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: test_svcreate4_f64_vec3:
|
||||
; CHECK: // %L2
|
||||
; CHECK-NEXT: mov z0.d, z3.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 8 x double> @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(<vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3)
|
||||
br i1 %p, label %L1, label %L2
|
||||
L1:
|
||||
ret <vscale x 2 x double> undef
|
||||
L2:
|
||||
%extract = tail call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(<vscale x 8 x double> %tuple, i32 3)
|
||||
ret <vscale x 2 x double> %extract
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "target-features"="+sve" }
|
||||
|
||||
declare <vscale x 4 x double> @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
|
||||
declare <vscale x 8 x float> @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 16 x half> @llvm.aarch64.sve.tuple.create2.nxv16f16.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
|
||||
declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||
declare <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
|
||||
declare <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
|
||||
|
||||
declare <vscale x 6 x double> @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
declare <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 24 x half> @llvm.aarch64.sve.tuple.create3.nxv24f16.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
|
||||
declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||
declare <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
|
||||
declare <vscale x 48 x i8> @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
|
||||
|
||||
declare <vscale x 8 x double> @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64 (<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
|
||||
declare <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
|
||||
declare <vscale x 32 x half> @llvm.aarch64.sve.tuple.create4.nxv32f16.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
|
||||
declare <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
|
||||
declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||
declare <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
|
||||
declare <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
|
||||
|
||||
declare <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8(<vscale x 32 x i8>, i32 immarg)
|
||||
declare <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8(<vscale x 48 x i8>, i32 immarg)
|
||||
declare <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8>, i32 immarg)
|
||||
|
||||
declare <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16(<vscale x 16 x i16>, i32 immarg)
|
||||
declare <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(<vscale x 24 x i16>, i32 immarg)
|
||||
declare <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16>, i32 immarg)
|
||||
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32(<vscale x 8 x i32>, i32 immarg)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32(<vscale x 12 x i32>, i32 immarg)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32>, i32 immarg)
|
||||
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64(<vscale x 4 x i64>, i32 immarg)
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(<vscale x 6 x i64>, i32 immarg)
|
||||
declare <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64>, i32 immarg)
|
||||
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16(<vscale x 16 x half>, i32 immarg)
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16(<vscale x 24 x half>, i32 immarg)
|
||||
declare <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half>, i32 immarg)
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32(<vscale x 8 x float>, i32 immarg)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32(<vscale x 12 x float>, i32 immarg)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(<vscale x 16 x float>, i32 immarg)
|
||||
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64(<vscale x 4 x double>, i32 immarg)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64(<vscale x 6 x double>, i32 immarg)
|
||||
declare <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(<vscale x 8 x double>, i32 immarg)
|
|
@ -0,0 +1,243 @@
|
|||
; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
|
||||
|
||||
; All these tests create a vector tuple, insert z5 into one of the elements,
|
||||
; and finally extracts that element from the wide vector to return it. These
|
||||
; checks ensure that z5 is always the value that is returned.
|
||||
|
||||
;
|
||||
; Insert into two element tuples
|
||||
;
|
||||
|
||||
; tuple: { tuple2.res0, tuple2.res1 }
|
||||
; insert z5: { z5 , tuple2.res1 }
|
||||
; extract z5: ^^
|
||||
define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
|
||||
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
|
||||
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
|
||||
; CHECK-LABEL: set_tuple2_nxv8i32_elt0:
|
||||
; CHECK-NEXT: mov z0.d, z5.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
|
||||
%ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
|
||||
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 0)
|
||||
ret <vscale x 4 x i32> %ext
|
||||
}
|
||||
|
||||
; tuple: { tuple2.res0, tuple2.res1 }
|
||||
; insert z5: { tuple2.res0, z5 }
|
||||
; extract z5: ^^
|
||||
define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
|
||||
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
|
||||
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
|
||||
; CHECK-LABEL: set_tuple2_nxv8i32_elt1:
|
||||
; CHECK-NEXT: mov z0.d, z5.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
|
||||
%ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
|
||||
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 1)
|
||||
ret <vscale x 4 x i32> %ext
|
||||
}
|
||||
|
||||
; This test checks the elements _not_ being set aren't changed.
|
||||
|
||||
; tuple: { tuple2.res0, tuple2.res1 }
|
||||
; insert z5: { tuple2.res0, z5 }
|
||||
; extract z0: ^^
|
||||
define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1_ret_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
|
||||
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
|
||||
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
|
||||
; CHECK-LABEL: set_tuple2_nxv8i32_elt1_ret_elt0:
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
|
||||
%ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
|
||||
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 0)
|
||||
ret <vscale x 4 x i32> %ext
|
||||
}
|
||||
|
||||
; Test extract of tuple passed into function
|
||||
define <vscale x 4 x i32> @get_tuple2_nxv8i32_elt1(<vscale x 8 x i32> %tuple) #0 {
|
||||
; CHECK-LABEL: get_tuple2_nxv8i32_elt1:
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %tuple, i32 1)
|
||||
ret <vscale x 4 x i32> %ext
|
||||
}
|
||||
|
||||
;
|
||||
; Insert into three element tuples
|
||||
;
|
||||
|
||||
; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 }
|
||||
; insert z5: { z5 , tuple3.res0, tuple3.res2 }
|
||||
; extract z5: ^^
|
||||
define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
|
||||
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
|
||||
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
|
||||
; CHECK-LABEL: set_tuple3_nxv12i32_elt0:
|
||||
; CHECK-NEXT: mov z0.d, z5.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
|
||||
%ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
|
||||
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 0)
|
||||
ret <vscale x 4 x i32> %ext
|
||||
}
|
||||
|
||||
; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 }
|
||||
; insert z5: { tuple3.res0, z5 , tuple3.res2 }
|
||||
; extract z5: ^^
|
||||
define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
|
||||
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
|
||||
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
|
||||
; CHECK-LABEL: set_tuple3_nxv12i32_elt1:
|
||||
; CHECK-NEXT: mov z0.d, z5.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
|
||||
%ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
|
||||
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 1)
|
||||
ret <vscale x 4 x i32> %ext
|
||||
}
|
||||
|
||||
; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 }
|
||||
; insert z5: { tuple3.res0, tuple3.res1, z5 }
|
||||
; extract z5: ^^
|
||||
define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
|
||||
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
|
||||
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
|
||||
; CHECK-LABEL: set_tuple3_nxv12i32_elt2:
|
||||
; CHECK-NEXT: mov z0.d, z5.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
|
||||
%ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 2, <vscale x 4 x i32> %z5)
|
||||
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 2)
|
||||
ret <vscale x 4 x i32> %ext
|
||||
}
|
||||
|
||||
; This test checks the elements _not_ being set aren't changed.
|
||||
|
||||
; tuple: { tuple3.res0, tuple3.res1, tuple3.res2 }
|
||||
; insert z5: { tuple3.res0, z5 , tuple3.res2 }
|
||||
; extract z2: ^^
|
||||
define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
|
||||
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
|
||||
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
|
||||
; CHECK-LABEL: set_tuple3_nxv12i32_elt1_ret_elt2:
|
||||
; CHECK-NEXT: mov z0.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
|
||||
%ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
|
||||
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 2)
|
||||
ret <vscale x 4 x i32> %ext
|
||||
}
|
||||
|
||||
; Test extract of tuple passed into function
|
||||
define <vscale x 4 x i32> @get_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 12 x i32> %tuple) #0 {
|
||||
; CHECK-LABEL: get_tuple3_nxv12i32_elt2:
|
||||
; CHECK-NEXT: mov z0.d, z3.d
|
||||
; CHECK-NEXT: ret
|
||||
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %tuple, i32 2)
|
||||
ret <vscale x 4 x i32> %ext
|
||||
}
|
||||
|
||||
;
|
||||
; Insert into four element tuples
|
||||
;
|
||||
|
||||
; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
|
||||
; insert z5: { z5 , tuple4.res1, tuple4.res2, tuple4.res3 }
|
||||
; extract z5: ^^
|
||||
define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
|
||||
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
|
||||
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
|
||||
; CHECK-LABEL: set_tuple4_nxv16i32_elt0:
|
||||
; CHECK-NEXT: mov z0.d, z5.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
|
||||
%ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
|
||||
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 0)
|
||||
ret <vscale x 4 x i32> %ext
|
||||
}
|
||||
|
||||
; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
|
||||
; insert z5: { tuple4.res0, z5 , tuple4.res2, tuple4.res3 }
|
||||
; extract z5: ^^
|
||||
define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
|
||||
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
|
||||
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
|
||||
; CHECK-LABEL: set_tuple4_nxv16i32_elt1:
|
||||
; CHECK-NEXT: mov z0.d, z5.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
|
||||
%ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
|
||||
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 1)
|
||||
ret <vscale x 4 x i32> %ext
|
||||
}
|
||||
|
||||
; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
|
||||
; insert z5: { tuple4.res0, tuple4.res1, z5 , tuple4.res3 }
|
||||
; extract z5: ^^
|
||||
define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
|
||||
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
|
||||
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
|
||||
; CHECK-LABEL: set_tuple4_nxv16i32_elt2:
|
||||
; CHECK-NEXT: mov z0.d, z5.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
|
||||
%ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 2, <vscale x 4 x i32> %z5)
|
||||
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 2)
|
||||
ret <vscale x 4 x i32> %ext
|
||||
}
|
||||
|
||||
; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
|
||||
; insert z5: { tuple4.res0, tuple4.res1, tuple4.res2, z5 }
|
||||
; extract z5: ^^
|
||||
define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
|
||||
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
|
||||
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
|
||||
; CHECK-LABEL: set_tuple4_nxv16i32_elt3:
|
||||
; CHECK-NEXT: mov z0.d, z5.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
|
||||
%ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 3, <vscale x 4 x i32> %z5)
|
||||
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 3)
|
||||
ret <vscale x 4 x i32> %ext
|
||||
}
|
||||
|
||||
; This test checks the elements _not_ being set aren't changed.
|
||||
|
||||
; tuple: { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
|
||||
; insert z5: { tuple4.res0, tuple4.res1, tuple4.res2, z5 }
|
||||
; extract z2: ^^
|
||||
define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1,
|
||||
<vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3,
|
||||
<vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
|
||||
; CHECK-LABEL: set_tuple4_nxv16i32_elt3_ret_elt2:
|
||||
; CHECK-NEXT: mov z0.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
|
||||
%ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 3, <vscale x 4 x i32> %z5)
|
||||
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 2)
|
||||
ret <vscale x 4 x i32> %ext
|
||||
}
|
||||
|
||||
; Test extract of tuple passed into function
|
||||
define <vscale x 4 x i32> @get_tuple4_nxv16i32_elt3(<vscale x 16 x i32> %tuple) #0 {
|
||||
; CHECK-LABEL: get_tuple4_nxv16i32_elt3:
|
||||
; CHECK-NEXT: mov z0.d, z3.d
|
||||
; CHECK-NEXT: ret
|
||||
%ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %tuple, i32 3)
|
||||
ret <vscale x 4 x i32> %ext
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "target-features"="+sve" }
|
||||
|
||||
declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||
declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32>, i32, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32>, i32)
|
||||
|
||||
declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||
declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32>, i32, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32>, i32)
|
||||
|
||||
declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
|
||||
declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32>, i32, <vscale x 4 x i32>)
|
||||
declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32>, i32)
|
Loading…
Reference in New Issue