forked from OSchip/llvm-project
[SveEmitter] Add builtins for struct loads/stores (ld2/ld3/etc)
The struct store intrinsics in LLVM IR take the individual parts as arguments, so this patch uses the intrinsics used for `svget` to break the tuples into individual parts. Reviewers: c-rhodes, efriedma, ctetreau, david-arm Reviewed By: efriedma Tags: #clang Differential Revision: https://reviews.llvm.org/D81466
This commit is contained in:
parent
0dc28af219
commit
ad828e3f4d
|
@ -480,6 +480,16 @@ def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoa
|
|||
// Load one quadword and replicate (scalar base)
|
||||
def SVLD1RQ : SInst<"svld1rq[_{2}]", "dPc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld1rq">;
|
||||
|
||||
// Load N-element structure into N vectors (scalar base)
|
||||
def SVLD2 : SInst<"svld2[_{2}]", "2Pc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld2", [IsStructLoad]>;
|
||||
def SVLD3 : SInst<"svld3[_{2}]", "3Pc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld3", [IsStructLoad]>;
|
||||
def SVLD4 : SInst<"svld4[_{2}]", "4Pc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld4", [IsStructLoad]>;
|
||||
|
||||
// Load N-element structure into N vectors (scalar base, VL displacement)
|
||||
def SVLD2_VNUM : SInst<"svld2_vnum[_{2}]", "2Pcl", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld2", [IsStructLoad]>;
|
||||
def SVLD3_VNUM : SInst<"svld3_vnum[_{2}]", "3Pcl", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld3", [IsStructLoad]>;
|
||||
def SVLD4_VNUM : SInst<"svld4_vnum[_{2}]", "4Pcl", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld4", [IsStructLoad]>;
|
||||
|
||||
// Load one octoword and replicate (scalar base)
|
||||
let ArchGuard = "defined(__ARM_FEATURE_SVE_MATMUL_FP64)" in {
|
||||
def SVLD1RO : SInst<"svld1ro[_{2}]", "dPc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld1ro">;
|
||||
|
@ -577,6 +587,16 @@ def SVST1_SCATTER_INDEX_S : MInst<"svst1_scatter[_{2}base]_index[_{d}]", "v
|
|||
def SVST1H_SCATTER_INDEX_S : MInst<"svst1h_scatter[_{2}base]_index[_{d}]", "vPuld", "ilUiUl", [IsScatterStore], MemEltTyInt16, "aarch64_sve_st1_scatter_scalar_offset">;
|
||||
def SVST1W_SCATTER_INDEX_S : MInst<"svst1w_scatter[_{2}base]_index[_{d}]", "vPuld", "lUl", [IsScatterStore], MemEltTyInt32, "aarch64_sve_st1_scatter_scalar_offset">;
|
||||
|
||||
// Store N vectors into N-element structure (scalar base)
|
||||
def SVST2 : SInst<"svst2[_{d}]", "vPp2", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_st2", [IsStructStore]>;
|
||||
def SVST3 : SInst<"svst3[_{d}]", "vPp3", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_st3", [IsStructStore]>;
|
||||
def SVST4 : SInst<"svst4[_{d}]", "vPp4", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_st4", [IsStructStore]>;
|
||||
|
||||
// Store N vectors into N-element structure (scalar base, VL displacement)
|
||||
def SVST2_VNUM : SInst<"svst2_vnum[_{d}]", "vPpl2", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_st2", [IsStructStore]>;
|
||||
def SVST3_VNUM : SInst<"svst3_vnum[_{d}]", "vPpl3", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_st3", [IsStructStore]>;
|
||||
def SVST4_VNUM : SInst<"svst4_vnum[_{d}]", "vPpl4", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_st4", [IsStructStore]>;
|
||||
|
||||
// Store one vector, with no truncation, non-temporal (scalar base)
|
||||
def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">;
|
||||
|
||||
|
|
|
@ -7904,6 +7904,84 @@ Value *CodeGenFunction::EmitSVEGatherPrefetch(SVETypeFlags TypeFlags,
|
|||
return Builder.CreateCall(F, Ops);
|
||||
}
|
||||
|
||||
Value *CodeGenFunction::EmitSVEStructLoad(SVETypeFlags TypeFlags,
|
||||
SmallVectorImpl<Value*> &Ops,
|
||||
unsigned IntID) {
|
||||
llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
|
||||
auto VecPtrTy = llvm::PointerType::getUnqual(VTy);
|
||||
auto EltPtrTy = llvm::PointerType::getUnqual(VTy->getElementType());
|
||||
|
||||
unsigned N;
|
||||
switch (IntID) {
|
||||
case Intrinsic::aarch64_sve_ld2:
|
||||
N = 2;
|
||||
break;
|
||||
case Intrinsic::aarch64_sve_ld3:
|
||||
N = 3;
|
||||
break;
|
||||
case Intrinsic::aarch64_sve_ld4:
|
||||
N = 4;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("unknown intrinsic!");
|
||||
}
|
||||
auto RetTy = llvm::VectorType::get(VTy->getElementType(),
|
||||
VTy->getElementCount() * N);
|
||||
|
||||
Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
|
||||
Value *BasePtr= Builder.CreateBitCast(Ops[1], VecPtrTy);
|
||||
Value *Offset = Ops.size() > 2 ? Ops[2] : Builder.getInt32(0);
|
||||
BasePtr = Builder.CreateGEP(VTy, BasePtr, Offset);
|
||||
BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy);
|
||||
|
||||
Function *F = CGM.getIntrinsic(IntID, {RetTy, Predicate->getType()});
|
||||
return Builder.CreateCall(F, { Predicate, BasePtr });
|
||||
}
|
||||
|
||||
Value *CodeGenFunction::EmitSVEStructStore(SVETypeFlags TypeFlags,
|
||||
SmallVectorImpl<Value*> &Ops,
|
||||
unsigned IntID) {
|
||||
llvm::ScalableVectorType *VTy = getSVEType(TypeFlags);
|
||||
auto VecPtrTy = llvm::PointerType::getUnqual(VTy);
|
||||
auto EltPtrTy = llvm::PointerType::getUnqual(VTy->getElementType());
|
||||
|
||||
unsigned N;
|
||||
switch (IntID) {
|
||||
case Intrinsic::aarch64_sve_st2:
|
||||
N = 2;
|
||||
break;
|
||||
case Intrinsic::aarch64_sve_st3:
|
||||
N = 3;
|
||||
break;
|
||||
case Intrinsic::aarch64_sve_st4:
|
||||
N = 4;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("unknown intrinsic!");
|
||||
}
|
||||
auto TupleTy =
|
||||
llvm::VectorType::get(VTy->getElementType(), VTy->getElementCount() * N);
|
||||
|
||||
Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy);
|
||||
Value *BasePtr = Builder.CreateBitCast(Ops[1], VecPtrTy);
|
||||
Value *Offset = Ops.size() > 3 ? Ops[2] : Builder.getInt32(0);
|
||||
Value *Val = Ops.back();
|
||||
BasePtr = Builder.CreateGEP(VTy, BasePtr, Offset);
|
||||
BasePtr = Builder.CreateBitCast(BasePtr, EltPtrTy);
|
||||
|
||||
// The llvm.aarch64.sve.st2/3/4 intrinsics take legal part vectors, so we
|
||||
// need to break up the tuple vector.
|
||||
SmallVector<llvm::Value*, 5> Operands;
|
||||
Function *FExtr =
|
||||
CGM.getIntrinsic(Intrinsic::aarch64_sve_tuple_get, {VTy, TupleTy});
|
||||
for (unsigned I = 0; I < N; ++I)
|
||||
Operands.push_back(Builder.CreateCall(FExtr, {Val, Builder.getInt32(I)}));
|
||||
Operands.append({Predicate, BasePtr});
|
||||
|
||||
Function *F = CGM.getIntrinsic(IntID, { VTy });
|
||||
return Builder.CreateCall(F, Operands);
|
||||
}
|
||||
|
||||
// SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and
|
||||
// svpmullt_pair intrinsics, with the exception that their results are bitcast
|
||||
// to a wider type.
|
||||
|
@ -8114,6 +8192,10 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
|
|||
return EmitSVEPrefetchLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
|
||||
else if (TypeFlags.isGatherPrefetch())
|
||||
return EmitSVEGatherPrefetch(TypeFlags, Ops, Builtin->LLVMIntrinsic);
|
||||
else if (TypeFlags.isStructLoad())
|
||||
return EmitSVEStructLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic);
|
||||
else if (TypeFlags.isStructStore())
|
||||
return EmitSVEStructStore(TypeFlags, Ops, Builtin->LLVMIntrinsic);
|
||||
else if (TypeFlags.isUndef())
|
||||
return UndefValue::get(Ty);
|
||||
else if (Builtin->LLVMIntrinsic != 0) {
|
||||
|
|
|
@ -3990,6 +3990,11 @@ public:
|
|||
llvm::Value *EmitSVEGatherPrefetch(SVETypeFlags TypeFlags,
|
||||
SmallVectorImpl<llvm::Value *> &Ops,
|
||||
unsigned IntID);
|
||||
llvm::Value *EmitSVEStructLoad(SVETypeFlags TypeFlags,
|
||||
SmallVectorImpl<llvm::Value *> &Ops, unsigned IntID);
|
||||
llvm::Value *EmitSVEStructStore(SVETypeFlags TypeFlags,
|
||||
SmallVectorImpl<llvm::Value *> &Ops,
|
||||
unsigned IntID);
|
||||
llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
|
||||
|
||||
llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
|
||||
|
|
|
@ -0,0 +1,227 @@
|
|||
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
|
||||
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
|
||||
|
||||
#include <arm_sve.h>
|
||||
|
||||
#ifdef SVE_OVERLOADED_FORMS
|
||||
// A simple used,unused... macro, long enough to represent any SVE builtin.
|
||||
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
|
||||
#else
|
||||
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
|
||||
#endif
|
||||
|
||||
svint8x2_t test_svld2_s8(svbool_t pg, const int8_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld2_s8
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 32 x i8> @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1(<vscale x 16 x i1> %pg, i8* %base)
|
||||
// CHECK-NEXT: ret <vscale x 32 x i8> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld2,_s8,,)(pg, base);
|
||||
}
|
||||
|
||||
svint16x2_t test_svld2_s16(svbool_t pg, const int16_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld2_s16
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 16 x i16> @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1(<vscale x 8 x i1> %[[PG]], i16* %base)
|
||||
// CHECK-NEXT: ret <vscale x 16 x i16> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld2,_s16,,)(pg, base);
|
||||
}
|
||||
|
||||
svint32x2_t test_svld2_s32(svbool_t pg, const int32_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld2_s32
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 8 x i32> @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1(<vscale x 4 x i1> %[[PG]], i32* %base)
|
||||
// CHECK-NEXT: ret <vscale x 8 x i32> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld2,_s32,,)(pg, base);
|
||||
}
|
||||
|
||||
svint64x2_t test_svld2_s64(svbool_t pg, const int64_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld2_s64
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 4 x i64> @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1(<vscale x 2 x i1> %[[PG]], i64* %base)
|
||||
// CHECK-NEXT: ret <vscale x 4 x i64> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld2,_s64,,)(pg, base);
|
||||
}
|
||||
|
||||
svuint8x2_t test_svld2_u8(svbool_t pg, const uint8_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld2_u8
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 32 x i8> @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1(<vscale x 16 x i1> %pg, i8* %base)
|
||||
// CHECK-NEXT: ret <vscale x 32 x i8> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld2,_u8,,)(pg, base);
|
||||
}
|
||||
|
||||
svuint16x2_t test_svld2_u16(svbool_t pg, const uint16_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld2_u16
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 16 x i16> @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1(<vscale x 8 x i1> %[[PG]], i16* %base)
|
||||
// CHECK-NEXT: ret <vscale x 16 x i16> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld2,_u16,,)(pg, base);
|
||||
}
|
||||
|
||||
svuint32x2_t test_svld2_u32(svbool_t pg, const uint32_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld2_u32
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 8 x i32> @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1(<vscale x 4 x i1> %[[PG]], i32* %base)
|
||||
// CHECK-NEXT: ret <vscale x 8 x i32> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld2,_u32,,)(pg, base);
|
||||
}
|
||||
|
||||
svuint64x2_t test_svld2_u64(svbool_t pg, const uint64_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld2_u64
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 4 x i64> @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1(<vscale x 2 x i1> %[[PG]], i64* %base)
|
||||
// CHECK-NEXT: ret <vscale x 4 x i64> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld2,_u64,,)(pg, base);
|
||||
}
|
||||
|
||||
svfloat16x2_t test_svld2_f16(svbool_t pg, const float16_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld2_f16
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 16 x half> @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1(<vscale x 8 x i1> %[[PG]], half* %base)
|
||||
// CHECK-NEXT: ret <vscale x 16 x half> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld2,_f16,,)(pg, base);
|
||||
}
|
||||
|
||||
svfloat32x2_t test_svld2_f32(svbool_t pg, const float32_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld2_f32
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 8 x float> @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1(<vscale x 4 x i1> %[[PG]], float* %base)
|
||||
// CHECK-NEXT: ret <vscale x 8 x float> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld2,_f32,,)(pg, base);
|
||||
}
|
||||
|
||||
svfloat64x2_t test_svld2_f64(svbool_t pg, const float64_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld2_f64
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 4 x double> @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1(<vscale x 2 x i1> %[[PG]], double* %base)
|
||||
// CHECK-NEXT: ret <vscale x 4 x double> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld2,_f64,,)(pg, base);
|
||||
}
|
||||
|
||||
svint8x2_t test_svld2_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld2_vnum_s8
|
||||
// CHECK: %[[BASE:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
|
||||
// CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 32 x i8> @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1(<vscale x 16 x i1> %pg, i8* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 32 x i8> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld2_vnum,_s8,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svint16x2_t test_svld2_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld2_vnum_s16
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 16 x i16> @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1(<vscale x 8 x i1> %[[PG]], i16* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 16 x i16> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld2_vnum,_s16,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svint32x2_t test_svld2_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld2_vnum_s32
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 8 x i32> @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1(<vscale x 4 x i1> %[[PG]], i32* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 8 x i32> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld2_vnum,_s32,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svint64x2_t test_svld2_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld2_vnum_s64
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 4 x i64> @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1(<vscale x 2 x i1> %[[PG]], i64* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 4 x i64> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld2_vnum,_s64,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svuint8x2_t test_svld2_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld2_vnum_u8
|
||||
// CHECK: %[[BASE:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
|
||||
// CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 32 x i8> @llvm.aarch64.sve.ld2.nxv32i8.nxv16i1(<vscale x 16 x i1> %pg, i8* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 32 x i8> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld2_vnum,_u8,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svuint16x2_t test_svld2_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld2_vnum_u16
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 16 x i16> @llvm.aarch64.sve.ld2.nxv16i16.nxv8i1(<vscale x 8 x i1> %[[PG]], i16* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 16 x i16> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld2_vnum,_u16,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svuint32x2_t test_svld2_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld2_vnum_u32
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 8 x i32> @llvm.aarch64.sve.ld2.nxv8i32.nxv4i1(<vscale x 4 x i1> %[[PG]], i32* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 8 x i32> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld2_vnum,_u32,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svuint64x2_t test_svld2_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld2_vnum_u64
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 4 x i64> @llvm.aarch64.sve.ld2.nxv4i64.nxv2i1(<vscale x 2 x i1> %[[PG]], i64* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 4 x i64> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld2_vnum,_u64,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svfloat16x2_t test_svld2_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld2_vnum_f16
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast half* %base to <vscale x 8 x half>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 16 x half> @llvm.aarch64.sve.ld2.nxv16f16.nxv8i1(<vscale x 8 x i1> %[[PG]], half* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 16 x half> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld2_vnum,_f16,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svfloat32x2_t test_svld2_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld2_vnum_f32
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast float* %base to <vscale x 4 x float>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 8 x float> @llvm.aarch64.sve.ld2.nxv8f32.nxv4i1(<vscale x 4 x i1> %[[PG]], float* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 8 x float> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld2_vnum,_f32,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svfloat64x2_t test_svld2_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld2_vnum_f64
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast double* %base to <vscale x 2 x double>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 4 x double> @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1(<vscale x 2 x i1> %[[PG]], double* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 4 x double> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld2_vnum,_f64,,)(pg, base, vnum);
|
||||
}
|
|
@ -0,0 +1,227 @@
|
|||
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
|
||||
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
|
||||
|
||||
#include <arm_sve.h>
|
||||
|
||||
#ifdef SVE_OVERLOADED_FORMS
|
||||
// A simple used,unused... macro, long enough to represent any SVE builtin.
|
||||
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
|
||||
#else
|
||||
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
|
||||
#endif
|
||||
|
||||
svint8x3_t test_svld3_s8(svbool_t pg, const int8_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld3_s8
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 48 x i8> @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1(<vscale x 16 x i1> %pg, i8* %base)
|
||||
// CHECK-NEXT: ret <vscale x 48 x i8> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld3,_s8,,)(pg, base);
|
||||
}
|
||||
|
||||
svint16x3_t test_svld3_s16(svbool_t pg, const int16_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld3_s16
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 24 x i16> @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1(<vscale x 8 x i1> %[[PG]], i16* %base)
|
||||
// CHECK-NEXT: ret <vscale x 24 x i16> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld3,_s16,,)(pg, base);
|
||||
}
|
||||
|
||||
svint32x3_t test_svld3_s32(svbool_t pg, const int32_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld3_s32
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1(<vscale x 4 x i1> %[[PG]], i32* %base)
|
||||
// CHECK-NEXT: ret <vscale x 12 x i32> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld3,_s32,,)(pg, base);
|
||||
}
|
||||
|
||||
svint64x3_t test_svld3_s64(svbool_t pg, const int64_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld3_s64
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 6 x i64> @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1(<vscale x 2 x i1> %[[PG]], i64* %base)
|
||||
// CHECK-NEXT: ret <vscale x 6 x i64> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld3,_s64,,)(pg, base);
|
||||
}
|
||||
|
||||
svuint8x3_t test_svld3_u8(svbool_t pg, const uint8_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld3_u8
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 48 x i8> @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1(<vscale x 16 x i1> %pg, i8* %base)
|
||||
// CHECK-NEXT: ret <vscale x 48 x i8> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld3,_u8,,)(pg, base);
|
||||
}
|
||||
|
||||
svuint16x3_t test_svld3_u16(svbool_t pg, const uint16_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld3_u16
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 24 x i16> @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1(<vscale x 8 x i1> %[[PG]], i16* %base)
|
||||
// CHECK-NEXT: ret <vscale x 24 x i16> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld3,_u16,,)(pg, base);
|
||||
}
|
||||
|
||||
svuint32x3_t test_svld3_u32(svbool_t pg, const uint32_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld3_u32
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1(<vscale x 4 x i1> %[[PG]], i32* %base)
|
||||
// CHECK-NEXT: ret <vscale x 12 x i32> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld3,_u32,,)(pg, base);
|
||||
}
|
||||
|
||||
svuint64x3_t test_svld3_u64(svbool_t pg, const uint64_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld3_u64
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 6 x i64> @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1(<vscale x 2 x i1> %[[PG]], i64* %base)
|
||||
// CHECK-NEXT: ret <vscale x 6 x i64> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld3,_u64,,)(pg, base);
|
||||
}
|
||||
|
||||
svfloat16x3_t test_svld3_f16(svbool_t pg, const float16_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld3_f16
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 24 x half> @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1(<vscale x 8 x i1> %[[PG]], half* %base)
|
||||
// CHECK-NEXT: ret <vscale x 24 x half> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld3,_f16,,)(pg, base);
|
||||
}
|
||||
|
||||
svfloat32x3_t test_svld3_f32(svbool_t pg, const float32_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld3_f32
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 12 x float> @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1(<vscale x 4 x i1> %[[PG]], float* %base)
|
||||
// CHECK-NEXT: ret <vscale x 12 x float> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld3,_f32,,)(pg, base);
|
||||
}
|
||||
|
||||
svfloat64x3_t test_svld3_f64(svbool_t pg, const float64_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld3_f64
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 6 x double> @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1(<vscale x 2 x i1> %[[PG]], double* %base)
|
||||
// CHECK-NEXT: ret <vscale x 6 x double> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld3,_f64,,)(pg, base);
|
||||
}
|
||||
|
||||
svint8x3_t test_svld3_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld3_vnum_s8
|
||||
// CHECK: %[[BASE:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
|
||||
// CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 48 x i8> @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1(<vscale x 16 x i1> %pg, i8* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 48 x i8> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld3_vnum,_s8,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svint16x3_t test_svld3_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld3_vnum_s16
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 24 x i16> @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1(<vscale x 8 x i1> %[[PG]], i16* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 24 x i16> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld3_vnum,_s16,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svint32x3_t test_svld3_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld3_vnum_s32
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1(<vscale x 4 x i1> %[[PG]], i32* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 12 x i32> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld3_vnum,_s32,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svint64x3_t test_svld3_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld3_vnum_s64
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 6 x i64> @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1(<vscale x 2 x i1> %[[PG]], i64* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 6 x i64> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld3_vnum,_s64,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svuint8x3_t test_svld3_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld3_vnum_u8
|
||||
// CHECK: %[[BASE:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
|
||||
// CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 48 x i8> @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1(<vscale x 16 x i1> %pg, i8* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 48 x i8> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld3_vnum,_u8,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svuint16x3_t test_svld3_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld3_vnum_u16
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 24 x i16> @llvm.aarch64.sve.ld3.nxv24i16.nxv8i1(<vscale x 8 x i1> %[[PG]], i16* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 24 x i16> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld3_vnum,_u16,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svuint32x3_t test_svld3_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld3_vnum_u32
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32.nxv4i1(<vscale x 4 x i1> %[[PG]], i32* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 12 x i32> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld3_vnum,_u32,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svuint64x3_t test_svld3_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld3_vnum_u64
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 6 x i64> @llvm.aarch64.sve.ld3.nxv6i64.nxv2i1(<vscale x 2 x i1> %[[PG]], i64* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 6 x i64> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld3_vnum,_u64,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svfloat16x3_t test_svld3_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld3_vnum_f16
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast half* %base to <vscale x 8 x half>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 24 x half> @llvm.aarch64.sve.ld3.nxv24f16.nxv8i1(<vscale x 8 x i1> %[[PG]], half* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 24 x half> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld3_vnum,_f16,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svfloat32x3_t test_svld3_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld3_vnum_f32
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast float* %base to <vscale x 4 x float>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 12 x float> @llvm.aarch64.sve.ld3.nxv12f32.nxv4i1(<vscale x 4 x i1> %[[PG]], float* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 12 x float> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld3_vnum,_f32,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svfloat64x3_t test_svld3_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld3_vnum_f64
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast double* %base to <vscale x 2 x double>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 6 x double> @llvm.aarch64.sve.ld3.nxv6f64.nxv2i1(<vscale x 2 x i1> %[[PG]], double* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 6 x double> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld3_vnum,_f64,,)(pg, base, vnum);
|
||||
}
|
|
@ -0,0 +1,227 @@
|
|||
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
|
||||
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
|
||||
|
||||
#include <arm_sve.h>
|
||||
|
||||
#ifdef SVE_OVERLOADED_FORMS
|
||||
// A simple used,unused... macro, long enough to represent any SVE builtin.
|
||||
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
|
||||
#else
|
||||
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
|
||||
#endif
|
||||
|
||||
svint8x4_t test_svld4_s8(svbool_t pg, const int8_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld4_s8
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 64 x i8> @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1(<vscale x 16 x i1> %pg, i8* %base)
|
||||
// CHECK-NEXT: ret <vscale x 64 x i8> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld4,_s8,,)(pg, base);
|
||||
}
|
||||
|
||||
svint16x4_t test_svld4_s16(svbool_t pg, const int16_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld4_s16
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 32 x i16> @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1(<vscale x 8 x i1> %[[PG]], i16* %base)
|
||||
// CHECK-NEXT: ret <vscale x 32 x i16> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld4,_s16,,)(pg, base);
|
||||
}
|
||||
|
||||
svint32x4_t test_svld4_s32(svbool_t pg, const int32_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld4_s32
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 16 x i32> @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1(<vscale x 4 x i1> %[[PG]], i32* %base)
|
||||
// CHECK-NEXT: ret <vscale x 16 x i32> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld4,_s32,,)(pg, base);
|
||||
}
|
||||
|
||||
svint64x4_t test_svld4_s64(svbool_t pg, const int64_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld4_s64
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 8 x i64> @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1(<vscale x 2 x i1> %[[PG]], i64* %base)
|
||||
// CHECK-NEXT: ret <vscale x 8 x i64> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld4,_s64,,)(pg, base);
|
||||
}
|
||||
|
||||
svuint8x4_t test_svld4_u8(svbool_t pg, const uint8_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld4_u8
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 64 x i8> @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1(<vscale x 16 x i1> %pg, i8* %base)
|
||||
// CHECK-NEXT: ret <vscale x 64 x i8> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld4,_u8,,)(pg, base);
|
||||
}
|
||||
|
||||
svuint16x4_t test_svld4_u16(svbool_t pg, const uint16_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld4_u16
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 32 x i16> @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1(<vscale x 8 x i1> %[[PG]], i16* %base)
|
||||
// CHECK-NEXT: ret <vscale x 32 x i16> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld4,_u16,,)(pg, base);
|
||||
}
|
||||
|
||||
svuint32x4_t test_svld4_u32(svbool_t pg, const uint32_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld4_u32
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 16 x i32> @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1(<vscale x 4 x i1> %[[PG]], i32* %base)
|
||||
// CHECK-NEXT: ret <vscale x 16 x i32> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld4,_u32,,)(pg, base);
|
||||
}
|
||||
|
||||
svuint64x4_t test_svld4_u64(svbool_t pg, const uint64_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld4_u64
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 8 x i64> @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1(<vscale x 2 x i1> %[[PG]], i64* %base)
|
||||
// CHECK-NEXT: ret <vscale x 8 x i64> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld4,_u64,,)(pg, base);
|
||||
}
|
||||
|
||||
svfloat16x4_t test_svld4_f16(svbool_t pg, const float16_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld4_f16
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 32 x half> @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1(<vscale x 8 x i1> %[[PG]], half* %base)
|
||||
// CHECK-NEXT: ret <vscale x 32 x half> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld4,_f16,,)(pg, base);
|
||||
}
|
||||
|
||||
svfloat32x4_t test_svld4_f32(svbool_t pg, const float32_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld4_f32
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 16 x float> @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1(<vscale x 4 x i1> %[[PG]], float* %base)
|
||||
// CHECK-NEXT: ret <vscale x 16 x float> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld4,_f32,,)(pg, base);
|
||||
}
|
||||
|
||||
svfloat64x4_t test_svld4_f64(svbool_t pg, const float64_t *base)
|
||||
{
|
||||
// CHECK-LABEL: test_svld4_f64
|
||||
// CHECK: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 8 x double> @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1(<vscale x 2 x i1> %[[PG]], double* %base)
|
||||
// CHECK-NEXT: ret <vscale x 8 x double> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld4,_f64,,)(pg, base);
|
||||
}
|
||||
|
||||
svint8x4_t test_svld4_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld4_vnum_s8
|
||||
// CHECK: %[[BASE:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
|
||||
// CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 64 x i8> @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1(<vscale x 16 x i1> %pg, i8* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 64 x i8> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld4_vnum,_s8,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svint16x4_t test_svld4_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld4_vnum_s16
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 32 x i16> @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1(<vscale x 8 x i1> %[[PG]], i16* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 32 x i16> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld4_vnum,_s16,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svint32x4_t test_svld4_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld4_vnum_s32
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 16 x i32> @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1(<vscale x 4 x i1> %[[PG]], i32* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 16 x i32> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld4_vnum,_s32,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svint64x4_t test_svld4_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld4_vnum_s64
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 8 x i64> @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1(<vscale x 2 x i1> %[[PG]], i64* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 8 x i64> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld4_vnum,_s64,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svuint8x4_t test_svld4_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld4_vnum_u8
|
||||
// CHECK: %[[BASE:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
|
||||
// CHECK: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 64 x i8> @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1(<vscale x 16 x i1> %pg, i8* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 64 x i8> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld4_vnum,_u8,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svuint16x4_t test_svld4_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld4_vnum_u16
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 32 x i16> @llvm.aarch64.sve.ld4.nxv32i16.nxv8i1(<vscale x 8 x i1> %[[PG]], i16* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 32 x i16> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld4_vnum,_u16,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svuint32x4_t test_svld4_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld4_vnum_u32
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 16 x i32> @llvm.aarch64.sve.ld4.nxv16i32.nxv4i1(<vscale x 4 x i1> %[[PG]], i32* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 16 x i32> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld4_vnum,_u32,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svuint64x4_t test_svld4_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld4_vnum_u64
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 8 x i64> @llvm.aarch64.sve.ld4.nxv8i64.nxv2i1(<vscale x 2 x i1> %[[PG]], i64* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 8 x i64> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld4_vnum,_u64,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svfloat16x4_t test_svld4_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld4_vnum_f16
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast half* %base to <vscale x 8 x half>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 32 x half> @llvm.aarch64.sve.ld4.nxv32f16.nxv8i1(<vscale x 8 x i1> %[[PG]], half* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 32 x half> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld4_vnum,_f16,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svfloat32x4_t test_svld4_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld4_vnum_f32
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast float* %base to <vscale x 4 x float>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 16 x float> @llvm.aarch64.sve.ld4.nxv16f32.nxv4i1(<vscale x 4 x i1> %[[PG]], float* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 16 x float> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld4_vnum,_f32,,)(pg, base, vnum);
|
||||
}
|
||||
|
||||
svfloat64x4_t test_svld4_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum)
|
||||
{
|
||||
// CHECK-LABEL: test_svld4_vnum_f64
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[BASE:.*]] = bitcast double* %base to <vscale x 2 x double>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %[[BASE]], i64 %vnum, i64 0
|
||||
// CHECK: %[[LOAD:.*]] = call <vscale x 8 x double> @llvm.aarch64.sve.ld4.nxv8f64.nxv2i1(<vscale x 2 x i1> %[[PG]], double* %[[GEP]])
|
||||
// CHECK-NEXT: ret <vscale x 8 x double> %[[LOAD]]
|
||||
return SVE_ACLE_FUNC(svld4_vnum,_f64,,)(pg, base, vnum);
|
||||
}
|
|
@ -0,0 +1,271 @@
|
|||
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
|
||||
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
|
||||
|
||||
#include <arm_sve.h>
|
||||
|
||||
#ifdef SVE_OVERLOADED_FORMS
|
||||
// A simple used,unused... macro, long enough to represent any SVE builtin.
|
||||
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
|
||||
#else
|
||||
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
|
||||
#endif
|
||||
|
||||
void test_svst2_s8(svbool_t pg, int8_t *base, svint8x2_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst2_s8
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8(<vscale x 32 x i8> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8(<vscale x 32 x i8> %data, i32 1)
|
||||
// CHECK: call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %[[V0]], <vscale x 16 x i8> %[[V1]], <vscale x 16 x i1> %pg, i8* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst2,_s8,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst2_s16(svbool_t pg, int16_t *base, svint16x2_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst2_s16
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16(<vscale x 16 x i16> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16(<vscale x 16 x i16> %data, i32 1)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %[[V0]], <vscale x 8 x i16> %[[V1]], <vscale x 8 x i1> %[[PG]], i16* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst2,_s16,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst2_s32(svbool_t pg, int32_t *base, svint32x2_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst2_s32
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32(<vscale x 8 x i32> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32(<vscale x 8 x i32> %data, i32 1)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %[[V0]], <vscale x 4 x i32> %[[V1]], <vscale x 4 x i1> %[[PG]], i32* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst2,_s32,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst2_s64(svbool_t pg, int64_t *base, svint64x2_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst2_s64
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64(<vscale x 4 x i64> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64(<vscale x 4 x i64> %data, i32 1)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %[[V0]], <vscale x 2 x i64> %[[V1]], <vscale x 2 x i1> %[[PG]], i64* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst2,_s64,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst2_u8(svbool_t pg, uint8_t *base, svuint8x2_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst2_u8
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8(<vscale x 32 x i8> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8(<vscale x 32 x i8> %data, i32 1)
|
||||
// CHECK: call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %[[V0]], <vscale x 16 x i8> %[[V1]], <vscale x 16 x i1> %pg, i8* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst2,_u8,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst2_u16(svbool_t pg, uint16_t *base, svuint16x2_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst2_u16
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16(<vscale x 16 x i16> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16(<vscale x 16 x i16> %data, i32 1)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %[[V0]], <vscale x 8 x i16> %[[V1]], <vscale x 8 x i1> %[[PG]], i16* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst2,_u16,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst2_u32(svbool_t pg, uint32_t *base, svuint32x2_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst2_u32
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32(<vscale x 8 x i32> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32(<vscale x 8 x i32> %data, i32 1)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %[[V0]], <vscale x 4 x i32> %[[V1]], <vscale x 4 x i1> %[[PG]], i32* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst2,_u32,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst2_u64(svbool_t pg, uint64_t *base, svuint64x2_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst2_u64
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64(<vscale x 4 x i64> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64(<vscale x 4 x i64> %data, i32 1)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %[[V0]], <vscale x 2 x i64> %[[V1]], <vscale x 2 x i1> %[[PG]], i64* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst2,_u64,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst2_f16(svbool_t pg, float16_t *base, svfloat16x2_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst2_f16
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16(<vscale x 16 x half> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16(<vscale x 16 x half> %data, i32 1)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %[[V0]], <vscale x 8 x half> %[[V1]], <vscale x 8 x i1> %[[PG]], half* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst2,_f16,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst2_f32(svbool_t pg, float32_t *base, svfloat32x2_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst2_f32
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32(<vscale x 8 x float> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32(<vscale x 8 x float> %data, i32 1)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %[[V0]], <vscale x 4 x float> %[[V1]], <vscale x 4 x i1> %[[PG]], float* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst2,_f32,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst2_f64(svbool_t pg, float64_t *base, svfloat64x2_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst2_f64
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64(<vscale x 4 x double> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64(<vscale x 4 x double> %data, i32 1)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %[[V0]], <vscale x 2 x double> %[[V1]], <vscale x 2 x i1> %[[PG]], double* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst2,_f64,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst2_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x2_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst2_vnum_s8
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8(<vscale x 32 x i8> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8(<vscale x 32 x i8> %data, i32 1)
|
||||
// CHECK: call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %[[V0]], <vscale x 16 x i8> %[[V1]], <vscale x 16 x i1> %pg, i8* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst2_vnum,_s8,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst2_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x2_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst2_vnum_s16
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16(<vscale x 16 x i16> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16(<vscale x 16 x i16> %data, i32 1)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %[[V0]], <vscale x 8 x i16> %[[V1]], <vscale x 8 x i1> %[[PG]], i16* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst2_vnum,_s16,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst2_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x2_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst2_vnum_s32
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32(<vscale x 8 x i32> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32(<vscale x 8 x i32> %data, i32 1)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %[[V0]], <vscale x 4 x i32> %[[V1]], <vscale x 4 x i1> %[[PG]], i32* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst2_vnum,_s32,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst2_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x2_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst2_vnum_s64
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64(<vscale x 4 x i64> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64(<vscale x 4 x i64> %data, i32 1)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %[[V0]], <vscale x 2 x i64> %[[V1]], <vscale x 2 x i1> %[[PG]], i64* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst2_vnum,_s64,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst2_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x2_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst2_vnum_u8
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8(<vscale x 32 x i8> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8(<vscale x 32 x i8> %data, i32 1)
|
||||
// CHECK: call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %[[V0]], <vscale x 16 x i8> %[[V1]], <vscale x 16 x i1> %pg, i8* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst2_vnum,_u8,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst2_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x2_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst2_vnum_u16
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16(<vscale x 16 x i16> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16(<vscale x 16 x i16> %data, i32 1)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %[[V0]], <vscale x 8 x i16> %[[V1]], <vscale x 8 x i1> %[[PG]], i16* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst2_vnum,_u16,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst2_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x2_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst2_vnum_u32
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32(<vscale x 8 x i32> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32(<vscale x 8 x i32> %data, i32 1)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %[[V0]], <vscale x 4 x i32> %[[V1]], <vscale x 4 x i1> %[[PG]], i32* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst2_vnum,_u32,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst2_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x2_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst2_vnum_u64
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64(<vscale x 4 x i64> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64(<vscale x 4 x i64> %data, i32 1)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %[[V0]], <vscale x 2 x i64> %[[V1]], <vscale x 2 x i1> %[[PG]], i64* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst2_vnum,_u64,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst2_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x2_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst2_vnum_f16
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast half* %base to <vscale x 8 x half>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16(<vscale x 16 x half> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16(<vscale x 16 x half> %data, i32 1)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %[[V0]], <vscale x 8 x half> %[[V1]], <vscale x 8 x i1> %[[PG]], half* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst2_vnum,_f16,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst2_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x2_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst2_vnum_f32
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast float* %base to <vscale x 4 x float>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32(<vscale x 8 x float> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32(<vscale x 8 x float> %data, i32 1)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %[[V0]], <vscale x 4 x float> %[[V1]], <vscale x 4 x i1> %[[PG]], float* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst2_vnum,_f32,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst2_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x2_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst2_vnum_f64
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast double* %base to <vscale x 2 x double>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64(<vscale x 4 x double> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64(<vscale x 4 x double> %data, i32 1)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %[[V0]], <vscale x 2 x double> %[[V1]], <vscale x 2 x i1> %[[PG]], double* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst2_vnum,_f64,,)(pg, base, vnum, data);
|
||||
}
|
|
@ -0,0 +1,293 @@
|
|||
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
|
||||
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
|
||||
|
||||
#include <arm_sve.h>
|
||||
|
||||
#ifdef SVE_OVERLOADED_FORMS
|
||||
// A simple used,unused... macro, long enough to represent any SVE builtin.
|
||||
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
|
||||
#else
|
||||
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
|
||||
#endif
|
||||
|
||||
void test_svst3_s8(svbool_t pg, int8_t *base, svint8x3_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst3_s8
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8(<vscale x 48 x i8> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8(<vscale x 48 x i8> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8(<vscale x 48 x i8> %data, i32 2)
|
||||
// CHECK: call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %[[V0]], <vscale x 16 x i8> %[[V1]], <vscale x 16 x i8> %[[V2]], <vscale x 16 x i1> %pg, i8* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst3,_s8,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst3_s16(svbool_t pg, int16_t *base, svint16x3_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst3_s16
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(<vscale x 24 x i16> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(<vscale x 24 x i16> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(<vscale x 24 x i16> %data, i32 2)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %[[V0]], <vscale x 8 x i16> %[[V1]], <vscale x 8 x i16> %[[V2]], <vscale x 8 x i1> %[[PG]], i16* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst3,_s16,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst3_s32(svbool_t pg, int32_t *base, svint32x3_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst3_s32
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32(<vscale x 12 x i32> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32(<vscale x 12 x i32> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32(<vscale x 12 x i32> %data, i32 2)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %[[V0]], <vscale x 4 x i32> %[[V1]], <vscale x 4 x i32> %[[V2]], <vscale x 4 x i1> %[[PG]], i32* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst3,_s32,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst3_s64(svbool_t pg, int64_t *base, svint64x3_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst3_s64
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(<vscale x 6 x i64> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(<vscale x 6 x i64> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(<vscale x 6 x i64> %data, i32 2)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %[[V0]], <vscale x 2 x i64> %[[V1]], <vscale x 2 x i64> %[[V2]], <vscale x 2 x i1> %[[PG]], i64* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst3,_s64,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst3_u8(svbool_t pg, uint8_t *base, svuint8x3_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst3_u8
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8(<vscale x 48 x i8> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8(<vscale x 48 x i8> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8(<vscale x 48 x i8> %data, i32 2)
|
||||
// CHECK: call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %[[V0]], <vscale x 16 x i8> %[[V1]], <vscale x 16 x i8> %[[V2]], <vscale x 16 x i1> %pg, i8* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst3,_u8,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst3_u16(svbool_t pg, uint16_t *base, svuint16x3_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst3_u16
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(<vscale x 24 x i16> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(<vscale x 24 x i16> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(<vscale x 24 x i16> %data, i32 2)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %[[V0]], <vscale x 8 x i16> %[[V1]], <vscale x 8 x i16> %[[V2]], <vscale x 8 x i1> %[[PG]], i16* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst3,_u16,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst3_u32(svbool_t pg, uint32_t *base, svuint32x3_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst3_u32
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32(<vscale x 12 x i32> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32(<vscale x 12 x i32> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32(<vscale x 12 x i32> %data, i32 2)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %[[V0]], <vscale x 4 x i32> %[[V1]], <vscale x 4 x i32> %[[V2]], <vscale x 4 x i1> %[[PG]], i32* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst3,_u32,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst3_u64(svbool_t pg, uint64_t *base, svuint64x3_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst3_u64
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(<vscale x 6 x i64> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(<vscale x 6 x i64> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(<vscale x 6 x i64> %data, i32 2)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %[[V0]], <vscale x 2 x i64> %[[V1]], <vscale x 2 x i64> %[[V2]], <vscale x 2 x i1> %[[PG]], i64* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst3,_u64,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst3_f16(svbool_t pg, float16_t *base, svfloat16x3_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst3_f16
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16(<vscale x 24 x half> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16(<vscale x 24 x half> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16(<vscale x 24 x half> %data, i32 2)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %[[V0]], <vscale x 8 x half> %[[V1]], <vscale x 8 x half> %[[V2]], <vscale x 8 x i1> %[[PG]], half* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst3,_f16,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst3_f32(svbool_t pg, float32_t *base, svfloat32x3_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst3_f32
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32(<vscale x 12 x float> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32(<vscale x 12 x float> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32(<vscale x 12 x float> %data, i32 2)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %[[V0]], <vscale x 4 x float> %[[V1]], <vscale x 4 x float> %[[V2]], <vscale x 4 x i1> %[[PG]], float* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst3,_f32,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst3_f64(svbool_t pg, float64_t *base, svfloat64x3_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst3_f64
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64(<vscale x 6 x double> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64(<vscale x 6 x double> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64(<vscale x 6 x double> %data, i32 2)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %[[V0]], <vscale x 2 x double> %[[V1]], <vscale x 2 x double> %[[V2]], <vscale x 2 x i1> %[[PG]], double* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst3,_f64,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst3_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x3_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst3_vnum_s8
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8(<vscale x 48 x i8> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8(<vscale x 48 x i8> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8(<vscale x 48 x i8> %data, i32 2)
|
||||
// CHECK: call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %[[V0]], <vscale x 16 x i8> %[[V1]], <vscale x 16 x i8> %[[V2]], <vscale x 16 x i1> %pg, i8* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst3_vnum,_s8,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst3_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x3_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst3_vnum_s16
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(<vscale x 24 x i16> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(<vscale x 24 x i16> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(<vscale x 24 x i16> %data, i32 2)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %[[V0]], <vscale x 8 x i16> %[[V1]], <vscale x 8 x i16> %[[V2]], <vscale x 8 x i1> %[[PG]], i16* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst3_vnum,_s16,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst3_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x3_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst3_vnum_s32
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32(<vscale x 12 x i32> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32(<vscale x 12 x i32> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32(<vscale x 12 x i32> %data, i32 2)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %[[V0]], <vscale x 4 x i32> %[[V1]], <vscale x 4 x i32> %[[V2]], <vscale x 4 x i1> %[[PG]], i32* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst3_vnum,_s32,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst3_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x3_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst3_vnum_s64
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(<vscale x 6 x i64> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(<vscale x 6 x i64> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(<vscale x 6 x i64> %data, i32 2)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %[[V0]], <vscale x 2 x i64> %[[V1]], <vscale x 2 x i64> %[[V2]], <vscale x 2 x i1> %[[PG]], i64* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst3_vnum,_s64,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst3_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x3_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst3_vnum_u8
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8(<vscale x 48 x i8> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8(<vscale x 48 x i8> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8(<vscale x 48 x i8> %data, i32 2)
|
||||
// CHECK: call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %[[V0]], <vscale x 16 x i8> %[[V1]], <vscale x 16 x i8> %[[V2]], <vscale x 16 x i1> %pg, i8* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst3_vnum,_u8,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst3_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x3_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst3_vnum_u16
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(<vscale x 24 x i16> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(<vscale x 24 x i16> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(<vscale x 24 x i16> %data, i32 2)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %[[V0]], <vscale x 8 x i16> %[[V1]], <vscale x 8 x i16> %[[V2]], <vscale x 8 x i1> %[[PG]], i16* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst3_vnum,_u16,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst3_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x3_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst3_vnum_u32
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32(<vscale x 12 x i32> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32(<vscale x 12 x i32> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32(<vscale x 12 x i32> %data, i32 2)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %[[V0]], <vscale x 4 x i32> %[[V1]], <vscale x 4 x i32> %[[V2]], <vscale x 4 x i1> %[[PG]], i32* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst3_vnum,_u32,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst3_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x3_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst3_vnum_u64
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(<vscale x 6 x i64> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(<vscale x 6 x i64> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(<vscale x 6 x i64> %data, i32 2)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %[[V0]], <vscale x 2 x i64> %[[V1]], <vscale x 2 x i64> %[[V2]], <vscale x 2 x i1> %[[PG]], i64* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst3_vnum,_u64,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst3_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x3_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst3_vnum_f16
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast half* %base to <vscale x 8 x half>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16(<vscale x 24 x half> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16(<vscale x 24 x half> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16(<vscale x 24 x half> %data, i32 2)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %[[V0]], <vscale x 8 x half> %[[V1]], <vscale x 8 x half> %[[V2]], <vscale x 8 x i1> %[[PG]], half* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst3_vnum,_f16,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst3_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x3_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst3_vnum_f32
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast float* %base to <vscale x 4 x float>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32(<vscale x 12 x float> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32(<vscale x 12 x float> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32(<vscale x 12 x float> %data, i32 2)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %[[V0]], <vscale x 4 x float> %[[V1]], <vscale x 4 x float> %[[V2]], <vscale x 4 x i1> %[[PG]], float* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst3_vnum,_f32,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst3_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x3_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst3_vnum_f64
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast double* %base to <vscale x 2 x double>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64(<vscale x 6 x double> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64(<vscale x 6 x double> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64(<vscale x 6 x double> %data, i32 2)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %[[V0]], <vscale x 2 x double> %[[V1]], <vscale x 2 x double> %[[V2]], <vscale x 2 x i1> %[[PG]], double* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst3_vnum,_f64,,)(pg, base, vnum, data);
|
||||
}
|
|
@ -0,0 +1,315 @@
|
|||
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
|
||||
// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
|
||||
|
||||
#include <arm_sve.h>
|
||||
|
||||
#ifdef SVE_OVERLOADED_FORMS
|
||||
// A simple used,unused... macro, long enough to represent any SVE builtin.
|
||||
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
|
||||
#else
|
||||
#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
|
||||
#endif
|
||||
|
||||
void test_svst4_s8(svbool_t pg, int8_t *base, svint8x4_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst4_s8
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 2)
|
||||
// CHECK-DAG: %[[V3:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 3)
|
||||
// CHECK: call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %[[V0]], <vscale x 16 x i8> %[[V1]], <vscale x 16 x i8> %[[V2]], <vscale x 16 x i8> %[[V3]], <vscale x 16 x i1> %pg, i8* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst4,_s8,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst4_s16(svbool_t pg, int16_t *base, svint16x4_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst4_s16
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 2)
|
||||
// CHECK-DAG: %[[V3:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 3)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %[[V0]], <vscale x 8 x i16> %[[V1]], <vscale x 8 x i16> %[[V2]], <vscale x 8 x i16> %[[V3]], <vscale x 8 x i1> %[[PG]], i16* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst4,_s16,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst4_s32(svbool_t pg, int32_t *base, svint32x4_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst4_s32
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 2)
|
||||
// CHECK-DAG: %[[V3:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 3)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %[[V0]], <vscale x 4 x i32> %[[V1]], <vscale x 4 x i32> %[[V2]], <vscale x 4 x i32> %[[V3]], <vscale x 4 x i1> %[[PG]], i32* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst4,_s32,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst4_s64(svbool_t pg, int64_t *base, svint64x4_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst4_s64
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 2)
|
||||
// CHECK-DAG: %[[V3:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 3)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %[[V0]], <vscale x 2 x i64> %[[V1]], <vscale x 2 x i64> %[[V2]], <vscale x 2 x i64> %[[V3]], <vscale x 2 x i1> %[[PG]], i64* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst4,_s64,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst4_u8(svbool_t pg, uint8_t *base, svuint8x4_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst4_u8
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 2)
|
||||
// CHECK-DAG: %[[V3:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 3)
|
||||
// CHECK: call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %[[V0]], <vscale x 16 x i8> %[[V1]], <vscale x 16 x i8> %[[V2]], <vscale x 16 x i8> %[[V3]], <vscale x 16 x i1> %pg, i8* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst4,_u8,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst4_u16(svbool_t pg, uint16_t *base, svuint16x4_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst4_u16
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 2)
|
||||
// CHECK-DAG: %[[V3:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 3)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %[[V0]], <vscale x 8 x i16> %[[V1]], <vscale x 8 x i16> %[[V2]], <vscale x 8 x i16> %[[V3]], <vscale x 8 x i1> %[[PG]], i16* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst4,_u16,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst4_u32(svbool_t pg, uint32_t *base, svuint32x4_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst4_u32
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 2)
|
||||
// CHECK-DAG: %[[V3:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 3)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %[[V0]], <vscale x 4 x i32> %[[V1]], <vscale x 4 x i32> %[[V2]], <vscale x 4 x i32> %[[V3]], <vscale x 4 x i1> %[[PG]], i32* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst4,_u32,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst4_u64(svbool_t pg, uint64_t *base, svuint64x4_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst4_u64
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 2)
|
||||
// CHECK-DAG: %[[V3:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 3)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %[[V0]], <vscale x 2 x i64> %[[V1]], <vscale x 2 x i64> %[[V2]], <vscale x 2 x i64> %[[V3]], <vscale x 2 x i1> %[[PG]], i64* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst4,_u64,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst4_f16(svbool_t pg, float16_t *base, svfloat16x4_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst4_f16
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half> %data, i32 2)
|
||||
// CHECK-DAG: %[[V3:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half> %data, i32 3)
|
||||
// CHECK: call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %[[V0]], <vscale x 8 x half> %[[V1]], <vscale x 8 x half> %[[V2]], <vscale x 8 x half> %[[V3]], <vscale x 8 x i1> %[[PG]], half* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst4,_f16,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst4_f32(svbool_t pg, float32_t *base, svfloat32x4_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst4_f32
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(<vscale x 16 x float> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(<vscale x 16 x float> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(<vscale x 16 x float> %data, i32 2)
|
||||
// CHECK-DAG: %[[V3:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(<vscale x 16 x float> %data, i32 3)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %[[V0]], <vscale x 4 x float> %[[V1]], <vscale x 4 x float> %[[V2]], <vscale x 4 x float> %[[V3]], <vscale x 4 x i1> %[[PG]], float* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst4,_f32,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst4_f64(svbool_t pg, float64_t *base, svfloat64x4_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst4_f64
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(<vscale x 8 x double> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(<vscale x 8 x double> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(<vscale x 8 x double> %data, i32 2)
|
||||
// CHECK-DAG: %[[V3:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(<vscale x 8 x double> %data, i32 3)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %[[V0]], <vscale x 2 x double> %[[V1]], <vscale x 2 x double> %[[V2]], <vscale x 2 x double> %[[V3]], <vscale x 2 x i1> %[[PG]], double* %base)
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst4,_f64,,)(pg, base, data);
|
||||
}
|
||||
|
||||
void test_svst4_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x4_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst4_vnum_s8
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 2)
|
||||
// CHECK-DAG: %[[V3:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 3)
|
||||
// CHECK: call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %[[V0]], <vscale x 16 x i8> %[[V1]], <vscale x 16 x i8> %[[V2]], <vscale x 16 x i8> %[[V3]], <vscale x 16 x i1> %pg, i8* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst4_vnum,_s8,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst4_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x4_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst4_vnum_s16
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 2)
|
||||
// CHECK-DAG: %[[V3:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 3)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %[[V0]], <vscale x 8 x i16> %[[V1]], <vscale x 8 x i16> %[[V2]], <vscale x 8 x i16> %[[V3]], <vscale x 8 x i1> %[[PG]], i16* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst4_vnum,_s16,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst4_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x4_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst4_vnum_s32
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 2)
|
||||
// CHECK-DAG: %[[V3:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 3)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %[[V0]], <vscale x 4 x i32> %[[V1]], <vscale x 4 x i32> %[[V2]], <vscale x 4 x i32> %[[V3]], <vscale x 4 x i1> %[[PG]], i32* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst4_vnum,_s32,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst4_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x4_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst4_vnum_s64
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 2)
|
||||
// CHECK-DAG: %[[V3:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 3)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %[[V0]], <vscale x 2 x i64> %[[V1]], <vscale x 2 x i64> %[[V2]], <vscale x 2 x i64> %[[V3]], <vscale x 2 x i1> %[[PG]], i64* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst4_vnum,_s64,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst4_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x4_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst4_vnum_u8
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i8* %base to <vscale x 16 x i8>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 2)
|
||||
// CHECK-DAG: %[[V3:.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %data, i32 3)
|
||||
// CHECK: call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %[[V0]], <vscale x 16 x i8> %[[V1]], <vscale x 16 x i8> %[[V2]], <vscale x 16 x i8> %[[V3]], <vscale x 16 x i1> %pg, i8* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst4_vnum,_u8,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst4_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x4_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst4_vnum_u16
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i16* %base to <vscale x 8 x i16>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 2)
|
||||
// CHECK-DAG: %[[V3:.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %data, i32 3)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %[[V0]], <vscale x 8 x i16> %[[V1]], <vscale x 8 x i16> %[[V2]], <vscale x 8 x i16> %[[V3]], <vscale x 8 x i1> %[[PG]], i16* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst4_vnum,_u16,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst4_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x4_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst4_vnum_u32
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i32* %base to <vscale x 4 x i32>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 2)
|
||||
// CHECK-DAG: %[[V3:.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %data, i32 3)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %[[V0]], <vscale x 4 x i32> %[[V1]], <vscale x 4 x i32> %[[V2]], <vscale x 4 x i32> %[[V3]], <vscale x 4 x i1> %[[PG]], i32* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst4_vnum,_u32,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst4_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x4_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst4_vnum_u64
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast i64* %base to <vscale x 2 x i64>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 2)
|
||||
// CHECK-DAG: %[[V3:.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %data, i32 3)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %[[V0]], <vscale x 2 x i64> %[[V1]], <vscale x 2 x i64> %[[V2]], <vscale x 2 x i64> %[[V3]], <vscale x 2 x i1> %[[PG]], i64* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst4_vnum,_u64,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst4_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x4_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst4_vnum_f16
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast half* %base to <vscale x 8 x half>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half> %data, i32 2)
|
||||
// CHECK-DAG: %[[V3:.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half> %data, i32 3)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %[[V0]], <vscale x 8 x half> %[[V1]], <vscale x 8 x half> %[[V2]], <vscale x 8 x half> %[[V3]], <vscale x 8 x i1> %[[PG]], half* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst4_vnum,_f16,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst4_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x4_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst4_vnum_f32
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast float* %base to <vscale x 4 x float>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(<vscale x 16 x float> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(<vscale x 16 x float> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(<vscale x 16 x float> %data, i32 2)
|
||||
// CHECK-DAG: %[[V3:.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(<vscale x 16 x float> %data, i32 3)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %[[V0]], <vscale x 4 x float> %[[V1]], <vscale x 4 x float> %[[V2]], <vscale x 4 x float> %[[V3]], <vscale x 4 x i1> %[[PG]], float* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst4_vnum,_f32,,)(pg, base, vnum, data);
|
||||
}
|
||||
|
||||
void test_svst4_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x4_t data)
|
||||
{
|
||||
// CHECK-LABEL: test_svst4_vnum_f64
|
||||
// CHECK-DAG: %[[BITCAST:.*]] = bitcast double* %base to <vscale x 2 x double>*
|
||||
// CHECK-DAG: %[[GEP:.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %[[BITCAST]], i64 %vnum, i64 0
|
||||
// CHECK-DAG: %[[V0:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(<vscale x 8 x double> %data, i32 0)
|
||||
// CHECK-DAG: %[[V1:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(<vscale x 8 x double> %data, i32 1)
|
||||
// CHECK-DAG: %[[V2:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(<vscale x 8 x double> %data, i32 2)
|
||||
// CHECK-DAG: %[[V3:.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(<vscale x 8 x double> %data, i32 3)
|
||||
// CHECK-DAG: %[[PG:.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> %pg)
|
||||
// CHECK: call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %[[V0]], <vscale x 2 x double> %[[V1]], <vscale x 2 x double> %[[V2]], <vscale x 2 x double> %[[V3]], <vscale x 2 x i1> %[[PG]], double* %[[GEP]])
|
||||
// CHECK-NEXT: ret
|
||||
return SVE_ACLE_FUNC(svst4_vnum,_f64,,)(pg, base, vnum, data);
|
||||
}
|
Loading…
Reference in New Issue