[AArch64][SVE] Instcombine SVE LD1/ST1 to stock LLVM IR

InstCombine AArch64 LD1/ST1 to llvm.masked.load/llvm.masked.store
and LD1/ST1 to load/store when a ptrue all predicate pattern operand
is present.

This allows existing IR optimizations such as dead-load removal to
occur.

Differential Revision: https://reviews.llvm.org/D113489
This commit is contained in:
Matt Devereau 2021-11-04 16:10:55 +00:00
parent 3f3d4e8a15
commit f526c600c0
15 changed files with 780 additions and 458 deletions

View File

@ -19,14 +19,16 @@
// CHECK-LABEL: @test_svld1_bf16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0nxv8bf16(<vscale x 8 x bfloat>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]]
//
// CPP-CHECK-LABEL: @_Z15test_svld1_bf16u10__SVBool_tPKu6__bf16(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0nxv8bf16(<vscale x 8 x bfloat>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP2]]
//
svbfloat16_t test_svld1_bf16(svbool_t pg, const bfloat16_t *base)
{
@ -39,16 +41,18 @@ svbfloat16_t test_svld1_bf16(svbool_t pg, const bfloat16_t *base)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP3]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast bfloat* [[TMP2]] to <vscale x 8 x bfloat>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0nxv8bf16(<vscale x 8 x bfloat>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
// CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP4]]
//
// CPP-CHECK-LABEL: @_Z20test_svld1_vnum_bf16u10__SVBool_tPKu6__bf16l(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP3]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast bfloat* [[TMP2]] to <vscale x 8 x bfloat>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0nxv8bf16(<vscale x 8 x bfloat>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP4]]
//
svbfloat16_t test_svld1_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum)
{

View File

@ -16,13 +16,15 @@
// CHECK-LABEL: @test_svld1_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z13test_svld1_s8u10__SVBool_tPKa(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
//
svint8_t test_svld1_s8(svbool_t pg, const int8_t *base)
{
@ -32,14 +34,16 @@ svint8_t test_svld1_s8(svbool_t pg, const int8_t *base)
// CHECK-LABEL: @test_svld1_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
//
// CPP-CHECK-LABEL: @_Z14test_svld1_s16u10__SVBool_tPKs(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
//
svint16_t test_svld1_s16(svbool_t pg, const int16_t *base)
{
@ -49,14 +53,16 @@ svint16_t test_svld1_s16(svbool_t pg, const int16_t *base)
// CHECK-LABEL: @test_svld1_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
//
// CPP-CHECK-LABEL: @_Z14test_svld1_s32u10__SVBool_tPKi(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
//
svint32_t test_svld1_s32(svbool_t pg, const int32_t *base)
{
@ -66,14 +72,16 @@ svint32_t test_svld1_s32(svbool_t pg, const int32_t *base)
// CHECK-LABEL: @test_svld1_s64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
//
// CPP-CHECK-LABEL: @_Z14test_svld1_s64u10__SVBool_tPKl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
//
svint64_t test_svld1_s64(svbool_t pg, const int64_t *base)
{
@ -82,13 +90,15 @@ svint64_t test_svld1_s64(svbool_t pg, const int64_t *base)
// CHECK-LABEL: @test_svld1_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
//
// CPP-CHECK-LABEL: @_Z13test_svld1_u8u10__SVBool_tPKh(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP1]]
//
svuint8_t test_svld1_u8(svbool_t pg, const uint8_t *base)
{
@ -98,14 +108,16 @@ svuint8_t test_svld1_u8(svbool_t pg, const uint8_t *base)
// CHECK-LABEL: @test_svld1_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
//
// CPP-CHECK-LABEL: @_Z14test_svld1_u16u10__SVBool_tPKt(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
//
svuint16_t test_svld1_u16(svbool_t pg, const uint16_t *base)
{
@ -115,14 +127,16 @@ svuint16_t test_svld1_u16(svbool_t pg, const uint16_t *base)
// CHECK-LABEL: @test_svld1_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
//
// CPP-CHECK-LABEL: @_Z14test_svld1_u32u10__SVBool_tPKj(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
//
svuint32_t test_svld1_u32(svbool_t pg, const uint32_t *base)
{
@ -132,14 +146,16 @@ svuint32_t test_svld1_u32(svbool_t pg, const uint32_t *base)
// CHECK-LABEL: @test_svld1_u64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
//
// CPP-CHECK-LABEL: @_Z14test_svld1_u64u10__SVBool_tPKm(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
//
svuint64_t test_svld1_u64(svbool_t pg, const uint64_t *base)
{
@ -149,14 +165,16 @@ svuint64_t test_svld1_u64(svbool_t pg, const uint64_t *base)
// CHECK-LABEL: @test_svld1_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ld1.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> zeroinitializer)
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
//
// CPP-CHECK-LABEL: @_Z14test_svld1_f16u10__SVBool_tPKDh(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ld1.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
//
svfloat16_t test_svld1_f16(svbool_t pg, const float16_t *base)
{
@ -166,14 +184,16 @@ svfloat16_t test_svld1_f16(svbool_t pg, const float16_t *base)
// CHECK-LABEL: @test_svld1_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> zeroinitializer)
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]]
//
// CPP-CHECK-LABEL: @_Z14test_svld1_f32u10__SVBool_tPKf(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]]
//
svfloat32_t test_svld1_f32(svbool_t pg, const float32_t *base)
{
@ -183,14 +203,16 @@ svfloat32_t test_svld1_f32(svbool_t pg, const float32_t *base)
// CHECK-LABEL: @test_svld1_f64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> zeroinitializer)
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
//
// CPP-CHECK-LABEL: @_Z14test_svld1_f64u10__SVBool_tPKd(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
//
svfloat64_t test_svld1_f64(svbool_t pg, const float64_t *base)
{
@ -201,15 +223,17 @@ svfloat64_t test_svld1_f64(svbool_t pg, const float64_t *base)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <vscale x 16 x i8>*
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z18test_svld1_vnum_s8u10__SVBool_tPKal(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <vscale x 16 x i8>*
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP3]]
//
svint8_t test_svld1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
{
@ -221,16 +245,18 @@ svint8_t test_svld1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP3]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 8 x i16>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
//
// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_s16u10__SVBool_tPKsl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP3]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 8 x i16>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
//
svint16_t test_svld1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
{
@ -242,16 +268,18 @@ svint16_t test_svld1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 4 x i32>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
//
// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_s32u10__SVBool_tPKil(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 4 x i32>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
//
svint32_t test_svld1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
{
@ -263,16 +291,18 @@ svint32_t test_svld1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <vscale x 2 x i64>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
//
// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_s64u10__SVBool_tPKll(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <vscale x 2 x i64>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
//
svint64_t test_svld1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
{
@ -283,15 +313,17 @@ svint64_t test_svld1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <vscale x 16 x i8>*
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z18test_svld1_vnum_u8u10__SVBool_tPKhl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <vscale x 16 x i8>*
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP3]]
//
svuint8_t test_svld1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
{
@ -303,16 +335,18 @@ svuint8_t test_svld1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP3]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 8 x i16>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
//
// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_u16u10__SVBool_tPKtl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP3]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 8 x i16>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
//
svuint16_t test_svld1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
{
@ -324,16 +358,18 @@ svuint16_t test_svld1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 4 x i32>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
//
// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_u32u10__SVBool_tPKjl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 4 x i32>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
//
svuint32_t test_svld1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
{
@ -345,16 +381,18 @@ svuint32_t test_svld1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <vscale x 2 x i64>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
//
// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_u64u10__SVBool_tPKml(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.nxv2i64(<vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <vscale x 2 x i64>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0nxv2i64(<vscale x 2 x i64>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
//
svuint64_t test_svld1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
{
@ -366,16 +404,18 @@ svuint64_t test_svld1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ld1.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP3]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast half* [[TMP2]] to <vscale x 8 x half>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> zeroinitializer)
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP4]]
//
// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_f16u10__SVBool_tPKDhl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x half> @llvm.aarch64.sve.ld1.nxv8f16(<vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP3]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast half* [[TMP2]] to <vscale x 8 x half>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0nxv8f16(<vscale x 8 x half>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP4]]
//
svfloat16_t test_svld1_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum)
{
@ -387,16 +427,18 @@ svfloat16_t test_svld1_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP3]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <vscale x 4 x float>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> zeroinitializer)
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP4]]
//
// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_f32u10__SVBool_tPKfl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.nxv4f32(<vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP3]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <vscale x 4 x float>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP4]]
//
svfloat32_t test_svld1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum)
{
@ -408,16 +450,18 @@ svfloat32_t test_svld1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP3]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[TMP2]] to <vscale x 2 x double>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> zeroinitializer)
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP4]]
//
// CPP-CHECK-LABEL: @_Z19test_svld1_vnum_f64u10__SVBool_tPKdl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP3]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[TMP2]] to <vscale x 2 x double>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> zeroinitializer)
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP4]]
//
svfloat64_t test_svld1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum)
{

View File

@ -17,16 +17,18 @@
// CHECK-LABEL: @test_svld1sb_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1sb_s16u10__SVBool_tPKa(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP3]]
//
svint16_t test_svld1sb_s16(svbool_t pg, const int8_t *base)
{
@ -36,16 +38,18 @@ svint16_t test_svld1sb_s16(svbool_t pg, const int8_t *base)
// CHECK-LABEL: @test_svld1sb_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1sb_s32u10__SVBool_tPKa(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
//
svint32_t test_svld1sb_s32(svbool_t pg, const int8_t *base)
{
@ -55,16 +59,18 @@ svint32_t test_svld1sb_s32(svbool_t pg, const int8_t *base)
// CHECK-LABEL: @test_svld1sb_s64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1sb_s64u10__SVBool_tPKa(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
svint64_t test_svld1sb_s64(svbool_t pg, const int8_t *base)
{
@ -74,16 +80,18 @@ svint64_t test_svld1sb_s64(svbool_t pg, const int8_t *base)
// CHECK-LABEL: @test_svld1sb_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1sb_u16u10__SVBool_tPKa(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP3]]
//
svuint16_t test_svld1sb_u16(svbool_t pg, const int8_t *base)
{
@ -93,16 +101,18 @@ svuint16_t test_svld1sb_u16(svbool_t pg, const int8_t *base)
// CHECK-LABEL: @test_svld1sb_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1sb_u32u10__SVBool_tPKa(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
//
svuint32_t test_svld1sb_u32(svbool_t pg, const int8_t *base)
{
@ -112,16 +122,18 @@ svuint32_t test_svld1sb_u32(svbool_t pg, const int8_t *base)
// CHECK-LABEL: @test_svld1sb_u64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1sb_u64u10__SVBool_tPKa(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
svuint64_t test_svld1sb_u64(svbool_t pg, const int8_t *base)
{
@ -133,18 +145,20 @@ svuint64_t test_svld1sb_u64(svbool_t pg, const int8_t *base)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 8 x i8>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1sb_vnum_s16u10__SVBool_tPKal(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 8 x i8>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP5]]
//
svint16_t test_svld1sb_vnum_s16(svbool_t pg, const int8_t *base, int64_t vnum)
{
@ -156,18 +170,20 @@ svint16_t test_svld1sb_vnum_s16(svbool_t pg, const int8_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 4 x i8>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1sb_vnum_s32u10__SVBool_tPKal(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 4 x i8>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
//
svint32_t test_svld1sb_vnum_s32(svbool_t pg, const int8_t *base, int64_t vnum)
{
@ -179,18 +195,20 @@ svint32_t test_svld1sb_vnum_s32(svbool_t pg, const int8_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 2 x i8>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1sb_vnum_s64u10__SVBool_tPKal(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 2 x i8>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
svint64_t test_svld1sb_vnum_s64(svbool_t pg, const int8_t *base, int64_t vnum)
{
@ -202,18 +220,20 @@ svint64_t test_svld1sb_vnum_s64(svbool_t pg, const int8_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 8 x i8>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1sb_vnum_u16u10__SVBool_tPKal(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 8 x i8>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP5]]
//
svuint16_t test_svld1sb_vnum_u16(svbool_t pg, const int8_t *base, int64_t vnum)
{
@ -225,18 +245,20 @@ svuint16_t test_svld1sb_vnum_u16(svbool_t pg, const int8_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 4 x i8>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1sb_vnum_u32u10__SVBool_tPKal(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 4 x i8>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
//
svuint32_t test_svld1sb_vnum_u32(svbool_t pg, const int8_t *base, int64_t vnum)
{
@ -248,18 +270,20 @@ svuint32_t test_svld1sb_vnum_u32(svbool_t pg, const int8_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 2 x i8>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1sb_vnum_u64u10__SVBool_tPKal(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 2 x i8>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
svuint64_t test_svld1sb_vnum_u64(svbool_t pg, const int8_t *base, int64_t vnum)
{

View File

@ -17,16 +17,18 @@
// CHECK-LABEL: @test_svld1sh_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1sh_s32u10__SVBool_tPKs(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
//
svint32_t test_svld1sh_s32(svbool_t pg, const int16_t *base)
{
@ -36,16 +38,18 @@ svint32_t test_svld1sh_s32(svbool_t pg, const int16_t *base)
// CHECK-LABEL: @test_svld1sh_s64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1sh_s64u10__SVBool_tPKs(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
svint64_t test_svld1sh_s64(svbool_t pg, const int16_t *base)
{
@ -55,16 +59,18 @@ svint64_t test_svld1sh_s64(svbool_t pg, const int16_t *base)
// CHECK-LABEL: @test_svld1sh_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1sh_u32u10__SVBool_tPKs(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
//
svuint32_t test_svld1sh_u32(svbool_t pg, const int16_t *base)
{
@ -74,16 +80,18 @@ svuint32_t test_svld1sh_u32(svbool_t pg, const int16_t *base)
// CHECK-LABEL: @test_svld1sh_u64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1sh_u64u10__SVBool_tPKs(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
svuint64_t test_svld1sh_u64(svbool_t pg, const int16_t *base)
{
@ -95,18 +103,20 @@ svuint64_t test_svld1sh_u64(svbool_t pg, const int16_t *base)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 4 x i16>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1sh_vnum_s32u10__SVBool_tPKsl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 4 x i16>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
//
svint32_t test_svld1sh_vnum_s32(svbool_t pg, const int16_t *base, int64_t vnum)
{
@ -118,18 +128,20 @@ svint32_t test_svld1sh_vnum_s32(svbool_t pg, const int16_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 2 x i16>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1sh_vnum_s64u10__SVBool_tPKsl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 2 x i16>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
svint64_t test_svld1sh_vnum_s64(svbool_t pg, const int16_t *base, int64_t vnum)
{
@ -141,18 +153,20 @@ svint64_t test_svld1sh_vnum_s64(svbool_t pg, const int16_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 4 x i16>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1sh_vnum_u32u10__SVBool_tPKsl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 4 x i16>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
//
svuint32_t test_svld1sh_vnum_u32(svbool_t pg, const int16_t *base, int64_t vnum)
{
@ -164,18 +178,20 @@ svuint32_t test_svld1sh_vnum_u32(svbool_t pg, const int16_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 2 x i16>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1sh_vnum_u64u10__SVBool_tPKsl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 2 x i16>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
svuint64_t test_svld1sh_vnum_u64(svbool_t pg, const int16_t *base, int64_t vnum)
{

View File

@ -17,16 +17,18 @@
// CHECK-LABEL: @test_svld1sw_s64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1sw_s64u10__SVBool_tPKi(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
svint64_t test_svld1sw_s64(svbool_t pg, const int32_t *base)
{
@ -36,16 +38,18 @@ svint64_t test_svld1sw_s64(svbool_t pg, const int32_t *base)
// CHECK-LABEL: @test_svld1sw_u64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1sw_u64u10__SVBool_tPKi(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = sext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
svuint64_t test_svld1sw_u64(svbool_t pg, const int32_t *base)
{
@ -57,18 +61,20 @@ svuint64_t test_svld1sw_u64(svbool_t pg, const int32_t *base)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 2 x i32>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1sw_vnum_s64u10__SVBool_tPKil(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 2 x i32>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
svint64_t test_svld1sw_vnum_s64(svbool_t pg, const int32_t *base, int64_t vnum)
{
@ -80,18 +86,20 @@ svint64_t test_svld1sw_vnum_s64(svbool_t pg, const int32_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 2 x i32>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1sw_vnum_u64u10__SVBool_tPKil(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = sext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 2 x i32>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = sext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
svuint64_t test_svld1sw_vnum_u64(svbool_t pg, const int32_t *base, int64_t vnum)
{

View File

@ -17,16 +17,18 @@
// CHECK-LABEL: @test_svld1ub_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1ub_s16u10__SVBool_tPKh(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP3]]
//
svint16_t test_svld1ub_s16(svbool_t pg, const uint8_t *base)
{
@ -36,16 +38,18 @@ svint16_t test_svld1ub_s16(svbool_t pg, const uint8_t *base)
// CHECK-LABEL: @test_svld1ub_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1ub_s32u10__SVBool_tPKh(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
//
svint32_t test_svld1ub_s32(svbool_t pg, const uint8_t *base)
{
@ -55,16 +59,18 @@ svint32_t test_svld1ub_s32(svbool_t pg, const uint8_t *base)
// CHECK-LABEL: @test_svld1ub_s64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1ub_s64u10__SVBool_tPKh(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
svint64_t test_svld1ub_s64(svbool_t pg, const uint8_t *base)
{
@ -74,16 +80,18 @@ svint64_t test_svld1ub_s64(svbool_t pg, const uint8_t *base)
// CHECK-LABEL: @test_svld1ub_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1ub_u16u10__SVBool_tPKh(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 8 x i8> [[TMP2]] to <vscale x 8 x i16>
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP3]]
//
svuint16_t test_svld1ub_u16(svbool_t pg, const uint8_t *base)
{
@ -93,16 +101,18 @@ svuint16_t test_svld1ub_u16(svbool_t pg, const uint8_t *base)
// CHECK-LABEL: @test_svld1ub_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1ub_u32u10__SVBool_tPKh(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 4 x i8> [[TMP2]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
//
svuint32_t test_svld1ub_u32(svbool_t pg, const uint8_t *base)
{
@ -112,16 +122,18 @@ svuint32_t test_svld1ub_u32(svbool_t pg, const uint8_t *base)
// CHECK-LABEL: @test_svld1ub_u64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1ub_u64u10__SVBool_tPKh(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 2 x i8> [[TMP2]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
svuint64_t test_svld1ub_u64(svbool_t pg, const uint8_t *base)
{
@ -133,18 +145,20 @@ svuint64_t test_svld1ub_u64(svbool_t pg, const uint8_t *base)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 8 x i8>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1ub_vnum_s16u10__SVBool_tPKhl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 8 x i8>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP5]]
//
svint16_t test_svld1ub_vnum_s16(svbool_t pg, const uint8_t *base, int64_t vnum)
{
@ -156,18 +170,20 @@ svint16_t test_svld1ub_vnum_s16(svbool_t pg, const uint8_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 4 x i8>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1ub_vnum_s32u10__SVBool_tPKhl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 4 x i8>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
//
svint32_t test_svld1ub_vnum_s32(svbool_t pg, const uint8_t *base, int64_t vnum)
{
@ -179,18 +195,20 @@ svint32_t test_svld1ub_vnum_s32(svbool_t pg, const uint8_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 2 x i8>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1ub_vnum_s64u10__SVBool_tPKhl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 2 x i8>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
svint64_t test_svld1ub_vnum_s64(svbool_t pg, const uint8_t *base, int64_t vnum)
{
@ -202,18 +220,20 @@ svint64_t test_svld1ub_vnum_s64(svbool_t pg, const uint8_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 8 x i8>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1ub_vnum_u16u10__SVBool_tPKhl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> [[TMP0]], i8* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 8 x i8> [[TMP3]] to <vscale x 8 x i16>
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 8 x i8>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0nxv8i8(<vscale x 8 x i8>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP5]]
//
svuint16_t test_svld1ub_vnum_u16(svbool_t pg, const uint8_t *base, int64_t vnum)
{
@ -225,18 +245,20 @@ svuint16_t test_svld1ub_vnum_u16(svbool_t pg, const uint8_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 4 x i8>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1ub_vnum_u32u10__SVBool_tPKhl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> [[TMP0]], i8* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 4 x i8> [[TMP3]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 4 x i8>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0nxv4i8(<vscale x 4 x i8>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
//
svuint32_t test_svld1ub_vnum_u32(svbool_t pg, const uint8_t *base, int64_t vnum)
{
@ -248,18 +270,20 @@ svuint32_t test_svld1ub_vnum_u32(svbool_t pg, const uint8_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 2 x i8>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1ub_vnum_u64u10__SVBool_tPKhl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> [[TMP0]], i8* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 2 x i8> [[TMP3]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <vscale x 2 x i8>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0nxv2i8(<vscale x 2 x i8>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i8> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
svuint64_t test_svld1ub_vnum_u64(svbool_t pg, const uint8_t *base, int64_t vnum)
{

View File

@ -17,16 +17,18 @@
// CHECK-LABEL: @test_svld1uh_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1uh_s32u10__SVBool_tPKt(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
//
svint32_t test_svld1uh_s32(svbool_t pg, const uint16_t *base)
{
@ -36,16 +38,18 @@ svint32_t test_svld1uh_s32(svbool_t pg, const uint16_t *base)
// CHECK-LABEL: @test_svld1uh_s64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1uh_s64u10__SVBool_tPKt(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
svint64_t test_svld1uh_s64(svbool_t pg, const uint16_t *base)
{
@ -55,16 +59,18 @@ svint64_t test_svld1uh_s64(svbool_t pg, const uint16_t *base)
// CHECK-LABEL: @test_svld1uh_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1uh_u32u10__SVBool_tPKt(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 4 x i16> [[TMP2]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
//
svuint32_t test_svld1uh_u32(svbool_t pg, const uint16_t *base)
{
@ -74,16 +80,18 @@ svuint32_t test_svld1uh_u32(svbool_t pg, const uint16_t *base)
// CHECK-LABEL: @test_svld1uh_u64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1uh_u64u10__SVBool_tPKt(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 2 x i16> [[TMP2]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
svuint64_t test_svld1uh_u64(svbool_t pg, const uint16_t *base)
{
@ -95,18 +103,20 @@ svuint64_t test_svld1uh_u64(svbool_t pg, const uint16_t *base)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 4 x i16>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1uh_vnum_s32u10__SVBool_tPKtl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 4 x i16>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
//
svint32_t test_svld1uh_vnum_s32(svbool_t pg, const uint16_t *base, int64_t vnum)
{
@ -118,18 +128,20 @@ svint32_t test_svld1uh_vnum_s32(svbool_t pg, const uint16_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 2 x i16>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1uh_vnum_s64u10__SVBool_tPKtl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 2 x i16>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
svint64_t test_svld1uh_vnum_s64(svbool_t pg, const uint16_t *base, int64_t vnum)
{
@ -141,18 +153,20 @@ svint64_t test_svld1uh_vnum_s64(svbool_t pg, const uint16_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 4 x i16>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1uh_vnum_u32u10__SVBool_tPKtl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> [[TMP0]], i16* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 4 x i16> [[TMP3]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 4 x i16>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0nxv4i16(<vscale x 4 x i16>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP5]]
//
svuint32_t test_svld1uh_vnum_u32(svbool_t pg, const uint16_t *base, int64_t vnum)
{
@ -164,18 +178,20 @@ svuint32_t test_svld1uh_vnum_u32(svbool_t pg, const uint16_t *base, int64_t vnum
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 2 x i16>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1uh_vnum_u64u10__SVBool_tPKtl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> [[TMP0]], i16* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 2 x i16> [[TMP3]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 2 x i16>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i16> @llvm.masked.load.nxv2i16.p0nxv2i16(<vscale x 2 x i16>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i16> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
svuint64_t test_svld1uh_vnum_u64(svbool_t pg, const uint16_t *base, int64_t vnum)
{

View File

@ -17,16 +17,18 @@
// CHECK-LABEL: @test_svld1uw_s64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1uw_s64u10__SVBool_tPKj(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
svint64_t test_svld1uw_s64(svbool_t pg, const uint32_t *base)
{
@ -36,16 +38,18 @@ svint64_t test_svld1uw_s64(svbool_t pg, const uint32_t *base)
// CHECK-LABEL: @test_svld1uw_u64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
// CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
// CPP-CHECK-LABEL: @_Z16test_svld1uw_u64u10__SVBool_tPKj(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP2]]
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP3:%.*]] = zext <vscale x 2 x i32> [[TMP2]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP3]]
//
svuint64_t test_svld1uw_u64(svbool_t pg, const uint32_t *base)
{
@ -57,18 +61,20 @@ svuint64_t test_svld1uw_u64(svbool_t pg, const uint32_t *base)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 2 x i32>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1uw_vnum_s64u10__SVBool_tPKjl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 2 x i32>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
svint64_t test_svld1uw_vnum_s64(svbool_t pg, const uint32_t *base, int64_t vnum)
{
@ -80,18 +86,20 @@ svint64_t test_svld1uw_vnum_s64(svbool_t pg, const uint32_t *base, int64_t vnum)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 2 x i32>*
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
// CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
// CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
// CPP-CHECK-LABEL: @_Z21test_svld1uw_vnum_u64u10__SVBool_tPKjl(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> [[TMP0]], i32* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP4:%.*]] = zext <vscale x 2 x i32> [[TMP3]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP4]]
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 2 x i32>*
// CPP-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i32> zeroinitializer)
// CPP-CHECK-NEXT: [[TMP5:%.*]] = zext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
// CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP5]]
//
svuint64_t test_svld1uw_vnum_u64(svbool_t pg, const uint32_t *base, int64_t vnum)
{

View File

@ -19,13 +19,15 @@
// CHECK-LABEL: @test_svst1_bf16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
// CHECK-NEXT: call void @llvm.masked.store.nxv8bf16.p0nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x bfloat>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z15test_svst1_bf16u10__SVBool_tPu6__bf16u14__SVBFloat16_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], bfloat* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv8bf16.p0nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x bfloat>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_bf16(svbool_t pg, bfloat16_t *base, svbfloat16_t data)
@ -39,7 +41,8 @@ void test_svst1_bf16(svbool_t pg, bfloat16_t *base, svbfloat16_t data)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast bfloat* [[TMP2]] to <vscale x 8 x bfloat>*
// CHECK-NEXT: call void @llvm.masked.store.nxv8bf16.p0nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x bfloat>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z20test_svst1_vnum_bf16u10__SVBool_tPu6__bf16lu14__SVBFloat16_t(
@ -47,7 +50,8 @@ void test_svst1_bf16(svbool_t pg, bfloat16_t *base, svbfloat16_t data)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast bfloat* [[BASE:%.*]] to <vscale x 8 x bfloat>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], bfloat* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast bfloat* [[TMP2]] to <vscale x 8 x bfloat>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv8bf16.p0nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x bfloat>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16_t data)

View File

@ -16,12 +16,14 @@
// CHECK-LABEL: @test_svst1_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
// CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8>* [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z13test_svst1_s8u10__SVBool_tPau10__SVInt8_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8>* [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_s8(svbool_t pg, int8_t *base, svint8_t data)
@ -32,13 +34,15 @@ void test_svst1_s8(svbool_t pg, int8_t *base, svint8_t data)
// CHECK-LABEL: @test_svst1_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
// CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i16>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z14test_svst1_s16u10__SVBool_tPsu11__SVInt16_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i16>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_s16(svbool_t pg, int16_t *base, svint16_t data)
@ -49,13 +53,15 @@ void test_svst1_s16(svbool_t pg, int16_t *base, svint16_t data)
// CHECK-LABEL: @test_svst1_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
// CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i32>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z14test_svst1_s32u10__SVBool_tPiu11__SVInt32_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i32>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_s32(svbool_t pg, int32_t *base, svint32_t data)
@ -66,13 +72,15 @@ void test_svst1_s32(svbool_t pg, int32_t *base, svint32_t data)
// CHECK-LABEL: @test_svst1_s64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
// CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i64>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z14test_svst1_s64u10__SVBool_tPlu11__SVInt64_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i64>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_s64(svbool_t pg, int64_t *base, svint64_t data)
@ -82,12 +90,14 @@ void test_svst1_s64(svbool_t pg, int64_t *base, svint64_t data)
// CHECK-LABEL: @test_svst1_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
// CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8>* [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z13test_svst1_u8u10__SVBool_tPhu11__SVUint8_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], i8* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8>* [[TMP0]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_u8(svbool_t pg, uint8_t *base, svuint8_t data)
@ -98,13 +108,15 @@ void test_svst1_u8(svbool_t pg, uint8_t *base, svuint8_t data)
// CHECK-LABEL: @test_svst1_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
// CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i16>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z14test_svst1_u16u10__SVBool_tPtu12__SVUint16_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i16>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_u16(svbool_t pg, uint16_t *base, svuint16_t data)
@ -115,13 +127,15 @@ void test_svst1_u16(svbool_t pg, uint16_t *base, svuint16_t data)
// CHECK-LABEL: @test_svst1_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
// CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i32>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z14test_svst1_u32u10__SVBool_tPju12__SVUint32_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i32>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_u32(svbool_t pg, uint32_t *base, svuint32_t data)
@ -132,13 +146,15 @@ void test_svst1_u32(svbool_t pg, uint32_t *base, svuint32_t data)
// CHECK-LABEL: @test_svst1_u64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
// CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i64>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z14test_svst1_u64u10__SVBool_tPmu12__SVUint64_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i64>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_u64(svbool_t pg, uint64_t *base, svuint64_t data)
@ -149,13 +165,15 @@ void test_svst1_u64(svbool_t pg, uint64_t *base, svuint64_t data)
// CHECK-LABEL: @test_svst1_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
// CHECK-NEXT: call void @llvm.masked.store.nxv8f16.p0nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x half>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z14test_svst1_f16u10__SVBool_tPDhu13__SVFloat16_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], half* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv8f16.p0nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x half>* [[TMP1]], i32 1, <vscale x 8 x i1> [[TMP0]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_f16(svbool_t pg, float16_t *base, svfloat16_t data)
@ -166,13 +184,15 @@ void test_svst1_f16(svbool_t pg, float16_t *base, svfloat16_t data)
// CHECK-LABEL: @test_svst1_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
// CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x float>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z14test_svst1_f32u10__SVBool_tPfu13__SVFloat32_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], float* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x float>* [[TMP1]], i32 1, <vscale x 4 x i1> [[TMP0]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_f32(svbool_t pg, float32_t *base, svfloat32_t data)
@ -183,13 +203,15 @@ void test_svst1_f32(svbool_t pg, float32_t *base, svfloat32_t data)
// CHECK-LABEL: @test_svst1_f64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
// CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x double>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z14test_svst1_f64u10__SVBool_tPdu13__SVFloat64_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], double* [[BASE:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x double>* [[TMP1]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_f64(svbool_t pg, float64_t *base, svfloat64_t data)
@ -201,14 +223,16 @@ void test_svst1_f64(svbool_t pg, float64_t *base, svfloat64_t data)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <vscale x 16 x i8>*
// CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8>* [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z18test_svst1_vnum_s8u10__SVBool_tPalu10__SVInt8_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <vscale x 16 x i8>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8>* [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8_t data)
@ -221,7 +245,8 @@ void test_svst1_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8_t data)
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 8 x i16>*
// CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i16>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z19test_svst1_vnum_s16u10__SVBool_tPslu11__SVInt16_t(
@ -229,7 +254,8 @@ void test_svst1_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8_t data)
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 8 x i16>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i16>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16_t data)
@ -242,7 +268,8 @@ void test_svst1_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16_t dat
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 4 x i32>*
// CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i32>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z19test_svst1_vnum_s32u10__SVBool_tPilu11__SVInt32_t(
@ -250,7 +277,8 @@ void test_svst1_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16_t dat
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 4 x i32>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i32>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32_t data)
@ -263,7 +291,8 @@ void test_svst1_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32_t dat
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <vscale x 2 x i64>*
// CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i64>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z19test_svst1_vnum_s64u10__SVBool_tPllu11__SVInt64_t(
@ -271,7 +300,8 @@ void test_svst1_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32_t dat
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <vscale x 2 x i64>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i64>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64_t data)
@ -283,14 +313,16 @@ void test_svst1_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64_t dat
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
// CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <vscale x 16 x i8>*
// CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8>* [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z18test_svst1_vnum_u8u10__SVBool_tPhlu11__SVUint8_t(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
// CPP-CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* [[TMP0]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], i8* [[TMP1]])
// CPP-CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <vscale x 16 x i8>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8>* [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8_t data)
@ -303,7 +335,8 @@ void test_svst1_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8_t data
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 8 x i16>*
// CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i16>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z19test_svst1_vnum_u16u10__SVBool_tPtlu12__SVUint16_t(
@ -311,7 +344,8 @@ void test_svst1_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8_t data
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], i16* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <vscale x 8 x i16>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i16>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16_t data)
@ -324,7 +358,8 @@ void test_svst1_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16_t d
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 4 x i32>*
// CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i32>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z19test_svst1_vnum_u32u10__SVBool_tPjlu12__SVUint32_t(
@ -332,7 +367,8 @@ void test_svst1_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16_t d
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], i32* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <vscale x 4 x i32>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i32>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32_t data)
@ -345,7 +381,8 @@ void test_svst1_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32_t d
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <vscale x 2 x i64>*
// CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i64>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z19test_svst1_vnum_u64u10__SVBool_tPmlu12__SVUint64_t(
@ -353,7 +390,8 @@ void test_svst1_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32_t d
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], i64* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <vscale x 2 x i64>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i64>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64_t data)
@ -366,7 +404,8 @@ void test_svst1_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64_t d
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast half* [[TMP2]] to <vscale x 8 x half>*
// CHECK-NEXT: call void @llvm.masked.store.nxv8f16.p0nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x half>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z19test_svst1_vnum_f16u10__SVBool_tPDhlu13__SVFloat16_t(
@ -374,7 +413,8 @@ void test_svst1_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64_t d
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], half* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast half* [[TMP2]] to <vscale x 8 x half>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv8f16.p0nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x half>* [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16_t data)
@ -387,7 +427,8 @@ void test_svst1_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16_t
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <vscale x 4 x float>*
// CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x float>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z19test_svst1_vnum_f32u10__SVBool_tPflu13__SVFloat32_t(
@ -395,7 +436,8 @@ void test_svst1_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16_t
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], float* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <vscale x 4 x float>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x float>* [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32_t data)
@ -408,7 +450,8 @@ void test_svst1_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32_t
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
// CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[TMP2]] to <vscale x 2 x double>*
// CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x double>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
// CPP-CHECK-LABEL: @_Z19test_svst1_vnum_f64u10__SVBool_tPdlu13__SVFloat64_t(
@ -416,7 +459,8 @@ void test_svst1_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32_t
// CPP-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CPP-CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CPP-CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], double* [[TMP2]])
// CPP-CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[TMP2]] to <vscale x 2 x double>*
// CPP-CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x double>* [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CPP-CHECK-NEXT: ret void
//
void test_svst1_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64_t data)

View File

@ -16,7 +16,8 @@
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = trunc <vscale x 8 x i16> [[DATA:%.*]] to <vscale x 8 x i8>
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv8i8(<vscale x 8 x i8> [[TMP1]], <vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
// CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0nxv8i8(<vscale x 8 x i8> [[TMP1]], <vscale x 8 x i8>* [[TMP2]], i32 1, <vscale x 8 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1b_s16(svbool_t pg, int8_t *base, svint16_t data)
@ -28,7 +29,8 @@ void test_svst1b_s16(svbool_t pg, int8_t *base, svint16_t data)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv4i8(<vscale x 4 x i8> [[TMP1]], <vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
// CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0nxv4i8(<vscale x 4 x i8> [[TMP1]], <vscale x 4 x i8>* [[TMP2]], i32 1, <vscale x 4 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1b_s32(svbool_t pg, int8_t *base, svint32_t data)
@ -40,7 +42,8 @@ void test_svst1b_s32(svbool_t pg, int8_t *base, svint32_t data)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2i8(<vscale x 2 x i8> [[TMP1]], <vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
// CHECK-NEXT: call void @llvm.masked.store.nxv2i8.p0nxv2i8(<vscale x 2 x i8> [[TMP1]], <vscale x 2 x i8>* [[TMP2]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1b_s64(svbool_t pg, int8_t *base, svint64_t data)
@ -52,7 +55,8 @@ void test_svst1b_s64(svbool_t pg, int8_t *base, svint64_t data)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = trunc <vscale x 8 x i16> [[DATA:%.*]] to <vscale x 8 x i8>
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv8i8(<vscale x 8 x i8> [[TMP1]], <vscale x 8 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
// CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0nxv8i8(<vscale x 8 x i8> [[TMP1]], <vscale x 8 x i8>* [[TMP2]], i32 1, <vscale x 8 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1b_u16(svbool_t pg, uint8_t *base, svuint16_t data)
@ -64,7 +68,8 @@ void test_svst1b_u16(svbool_t pg, uint8_t *base, svuint16_t data)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv4i8(<vscale x 4 x i8> [[TMP1]], <vscale x 4 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
// CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0nxv4i8(<vscale x 4 x i8> [[TMP1]], <vscale x 4 x i8>* [[TMP2]], i32 1, <vscale x 4 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1b_u32(svbool_t pg, uint8_t *base, svuint32_t data)
@ -76,7 +81,8 @@ void test_svst1b_u32(svbool_t pg, uint8_t *base, svuint32_t data)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2i8(<vscale x 2 x i8> [[TMP1]], <vscale x 2 x i1> [[TMP0]], i8* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
// CHECK-NEXT: call void @llvm.masked.store.nxv2i8.p0nxv2i8(<vscale x 2 x i8> [[TMP1]], <vscale x 2 x i8>* [[TMP2]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1b_u64(svbool_t pg, uint8_t *base, svuint64_t data)
@ -90,7 +96,8 @@ void test_svst1b_u64(svbool_t pg, uint8_t *base, svuint64_t data)
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = trunc <vscale x 8 x i16> [[DATA:%.*]] to <vscale x 8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv8i8(<vscale x 8 x i8> [[TMP2]], <vscale x 8 x i1> [[TMP0]], i8* [[TMP3]])
// CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <vscale x 8 x i8>*
// CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0nxv8i8(<vscale x 8 x i8> [[TMP2]], <vscale x 8 x i8>* [[TMP4]], i32 1, <vscale x 8 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1b_vnum_s16(svbool_t pg, int8_t *base, int64_t vnum, svint16_t data)
@ -104,7 +111,8 @@ void test_svst1b_vnum_s16(svbool_t pg, int8_t *base, int64_t vnum, svint16_t dat
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv4i8(<vscale x 4 x i8> [[TMP2]], <vscale x 4 x i1> [[TMP0]], i8* [[TMP3]])
// CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <vscale x 4 x i8>*
// CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0nxv4i8(<vscale x 4 x i8> [[TMP2]], <vscale x 4 x i8>* [[TMP4]], i32 1, <vscale x 4 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1b_vnum_s32(svbool_t pg, int8_t *base, int64_t vnum, svint32_t data)
@ -118,7 +126,8 @@ void test_svst1b_vnum_s32(svbool_t pg, int8_t *base, int64_t vnum, svint32_t dat
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2i8(<vscale x 2 x i8> [[TMP2]], <vscale x 2 x i1> [[TMP0]], i8* [[TMP3]])
// CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <vscale x 2 x i8>*
// CHECK-NEXT: call void @llvm.masked.store.nxv2i8.p0nxv2i8(<vscale x 2 x i8> [[TMP2]], <vscale x 2 x i8>* [[TMP4]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1b_vnum_s64(svbool_t pg, int8_t *base, int64_t vnum, svint64_t data)
@ -132,7 +141,8 @@ void test_svst1b_vnum_s64(svbool_t pg, int8_t *base, int64_t vnum, svint64_t dat
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = trunc <vscale x 8 x i16> [[DATA:%.*]] to <vscale x 8 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv8i8(<vscale x 8 x i8> [[TMP2]], <vscale x 8 x i1> [[TMP0]], i8* [[TMP3]])
// CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <vscale x 8 x i8>*
// CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0nxv8i8(<vscale x 8 x i8> [[TMP2]], <vscale x 8 x i8>* [[TMP4]], i32 1, <vscale x 8 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1b_vnum_u16(svbool_t pg, uint8_t *base, int64_t vnum, svuint16_t data)
@ -146,7 +156,8 @@ void test_svst1b_vnum_u16(svbool_t pg, uint8_t *base, int64_t vnum, svuint16_t d
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv4i8(<vscale x 4 x i8> [[TMP2]], <vscale x 4 x i1> [[TMP0]], i8* [[TMP3]])
// CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <vscale x 4 x i8>*
// CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0nxv4i8(<vscale x 4 x i8> [[TMP2]], <vscale x 4 x i8>* [[TMP4]], i32 1, <vscale x 4 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1b_vnum_u32(svbool_t pg, uint8_t *base, int64_t vnum, svuint32_t data)
@ -160,7 +171,8 @@ void test_svst1b_vnum_u32(svbool_t pg, uint8_t *base, int64_t vnum, svuint32_t d
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
// CHECK-NEXT: [[TMP2:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i8>
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2i8(<vscale x 2 x i8> [[TMP2]], <vscale x 2 x i1> [[TMP0]], i8* [[TMP3]])
// CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to <vscale x 2 x i8>*
// CHECK-NEXT: call void @llvm.masked.store.nxv2i8.p0nxv2i8(<vscale x 2 x i8> [[TMP2]], <vscale x 2 x i8>* [[TMP4]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1b_vnum_u64(svbool_t pg, uint8_t *base, int64_t vnum, svuint64_t data)

View File

@ -16,7 +16,8 @@
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv4i16(<vscale x 4 x i16> [[TMP1]], <vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
// CHECK-NEXT: call void @llvm.masked.store.nxv4i16.p0nxv4i16(<vscale x 4 x i16> [[TMP1]], <vscale x 4 x i16>* [[TMP2]], i32 1, <vscale x 4 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1h_s32(svbool_t pg, int16_t *base, svint32_t data)
@ -28,7 +29,8 @@ void test_svst1h_s32(svbool_t pg, int16_t *base, svint32_t data)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2i16(<vscale x 2 x i16> [[TMP1]], <vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
// CHECK-NEXT: call void @llvm.masked.store.nxv2i16.p0nxv2i16(<vscale x 2 x i16> [[TMP1]], <vscale x 2 x i16>* [[TMP2]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1h_s64(svbool_t pg, int16_t *base, svint64_t data)
@ -40,7 +42,8 @@ void test_svst1h_s64(svbool_t pg, int16_t *base, svint64_t data)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv4i16(<vscale x 4 x i16> [[TMP1]], <vscale x 4 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
// CHECK-NEXT: call void @llvm.masked.store.nxv4i16.p0nxv4i16(<vscale x 4 x i16> [[TMP1]], <vscale x 4 x i16>* [[TMP2]], i32 1, <vscale x 4 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1h_u32(svbool_t pg, uint16_t *base, svuint32_t data)
@ -52,7 +55,8 @@ void test_svst1h_u32(svbool_t pg, uint16_t *base, svuint32_t data)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2i16(<vscale x 2 x i16> [[TMP1]], <vscale x 2 x i1> [[TMP0]], i16* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
// CHECK-NEXT: call void @llvm.masked.store.nxv2i16.p0nxv2i16(<vscale x 2 x i16> [[TMP1]], <vscale x 2 x i16>* [[TMP2]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1h_u64(svbool_t pg, uint16_t *base, svuint64_t data)
@ -66,7 +70,8 @@ void test_svst1h_u64(svbool_t pg, uint16_t *base, svuint64_t data)
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv4i16(<vscale x 4 x i16> [[TMP2]], <vscale x 4 x i1> [[TMP0]], i16* [[TMP3]])
// CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <vscale x 4 x i16>*
// CHECK-NEXT: call void @llvm.masked.store.nxv4i16.p0nxv4i16(<vscale x 4 x i16> [[TMP2]], <vscale x 4 x i16>* [[TMP4]], i32 1, <vscale x 4 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1h_vnum_s32(svbool_t pg, int16_t *base, int64_t vnum, svint32_t data)
@ -80,7 +85,8 @@ void test_svst1h_vnum_s32(svbool_t pg, int16_t *base, int64_t vnum, svint32_t da
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2i16(<vscale x 2 x i16> [[TMP2]], <vscale x 2 x i1> [[TMP0]], i16* [[TMP3]])
// CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <vscale x 2 x i16>*
// CHECK-NEXT: call void @llvm.masked.store.nxv2i16.p0nxv2i16(<vscale x 2 x i16> [[TMP2]], <vscale x 2 x i16>* [[TMP4]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1h_vnum_s64(svbool_t pg, int16_t *base, int64_t vnum, svint64_t data)
@ -94,7 +100,8 @@ void test_svst1h_vnum_s64(svbool_t pg, int16_t *base, int64_t vnum, svint64_t da
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv4i16(<vscale x 4 x i16> [[TMP2]], <vscale x 4 x i1> [[TMP0]], i16* [[TMP3]])
// CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <vscale x 4 x i16>*
// CHECK-NEXT: call void @llvm.masked.store.nxv4i16.p0nxv4i16(<vscale x 4 x i16> [[TMP2]], <vscale x 4 x i16>* [[TMP4]], i32 1, <vscale x 4 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1h_vnum_u32(svbool_t pg, uint16_t *base, int64_t vnum, svuint32_t data)
@ -108,7 +115,8 @@ void test_svst1h_vnum_u32(svbool_t pg, uint16_t *base, int64_t vnum, svuint32_t
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
// CHECK-NEXT: [[TMP2:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i16>
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2i16(<vscale x 2 x i16> [[TMP2]], <vscale x 2 x i1> [[TMP0]], i16* [[TMP3]])
// CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <vscale x 2 x i16>*
// CHECK-NEXT: call void @llvm.masked.store.nxv2i16.p0nxv2i16(<vscale x 2 x i16> [[TMP2]], <vscale x 2 x i16>* [[TMP4]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1h_vnum_u64(svbool_t pg, uint16_t *base, int64_t vnum, svuint64_t data)

View File

@ -1,7 +1,7 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: aarch64-registered-target
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o - -emit-llvm %s 2>&1 | FileCheck %s
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o - -emit-llvm %s 2>&1 | FileCheck %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o - -emit-llvm %s | FileCheck %s
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o - -emit-llvm %s | FileCheck %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o /dev/null %s
#include <arm_sve.h>
@ -16,7 +16,8 @@
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2i32(<vscale x 2 x i32> [[TMP1]], <vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
// CHECK-NEXT: call void @llvm.masked.store.nxv2i32.p0nxv2i32(<vscale x 2 x i32> [[TMP1]], <vscale x 2 x i32>* [[TMP2]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1w_s64(svbool_t pg, int32_t *base, svint64_t data)
@ -28,7 +29,8 @@ void test_svst1w_s64(svbool_t pg, int32_t *base, svint64_t data)
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2i32(<vscale x 2 x i32> [[TMP1]], <vscale x 2 x i1> [[TMP0]], i32* [[BASE:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
// CHECK-NEXT: call void @llvm.masked.store.nxv2i32.p0nxv2i32(<vscale x 2 x i32> [[TMP1]], <vscale x 2 x i32>* [[TMP2]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1w_u64(svbool_t pg, uint32_t *base, svuint64_t data)
@ -42,7 +44,8 @@ void test_svst1w_u64(svbool_t pg, uint32_t *base, svuint64_t data)
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
// CHECK-NEXT: [[TMP2:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2i32(<vscale x 2 x i32> [[TMP2]], <vscale x 2 x i1> [[TMP0]], i32* [[TMP3]])
// CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <vscale x 2 x i32>*
// CHECK-NEXT: call void @llvm.masked.store.nxv2i32.p0nxv2i32(<vscale x 2 x i32> [[TMP2]], <vscale x 2 x i32>* [[TMP4]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1w_vnum_s64(svbool_t pg, int32_t *base, int64_t vnum, svint64_t data)
@ -56,7 +59,8 @@ void test_svst1w_vnum_s64(svbool_t pg, int32_t *base, int64_t vnum, svint64_t da
// CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
// CHECK-NEXT: [[TMP2:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* [[TMP1]], i64 [[VNUM:%.*]], i64 0
// CHECK-NEXT: call void @llvm.aarch64.sve.st1.nxv2i32(<vscale x 2 x i32> [[TMP2]], <vscale x 2 x i1> [[TMP0]], i32* [[TMP3]])
// CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <vscale x 2 x i32>*
// CHECK-NEXT: call void @llvm.masked.store.nxv2i32.p0nxv2i32(<vscale x 2 x i32> [[TMP2]], <vscale x 2 x i32>* [[TMP4]], i32 1, <vscale x 2 x i1> [[TMP0]])
// CHECK-NEXT: ret void
//
void test_svst1w_vnum_u64(svbool_t pg, uint32_t *base, int64_t vnum, svuint64_t data)

View File

@ -725,6 +725,50 @@ static Optional<Instruction *> instCombineSVEVectorFMLA(InstCombiner &IC,
return IC.replaceInstUsesWith(II, FMLA);
}
static Optional<Instruction *>
instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
IRBuilder<> Builder(II.getContext());
Builder.SetInsertPoint(&II);
Value *Pred = II.getOperand(0);
Value *PtrOp = II.getOperand(1);
Type *VecTy = II.getType();
Value *VecPtr = Builder.CreateBitCast(PtrOp, VecTy->getPointerTo());
if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
m_ConstantInt<AArch64SVEPredPattern::all>()))) {
LoadInst *Load = Builder.CreateLoad(VecTy, VecPtr);
return IC.replaceInstUsesWith(II, Load);
}
CallInst *MaskedLoad =
Builder.CreateMaskedLoad(VecTy, VecPtr, PtrOp->getPointerAlignment(DL),
Pred, ConstantAggregateZero::get(VecTy));
return IC.replaceInstUsesWith(II, MaskedLoad);
}
static Optional<Instruction *>
instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
IRBuilder<> Builder(II.getContext());
Builder.SetInsertPoint(&II);
Value *VecOp = II.getOperand(0);
Value *Pred = II.getOperand(1);
Value *PtrOp = II.getOperand(2);
Value *VecPtr =
Builder.CreateBitCast(PtrOp, VecOp->getType()->getPointerTo());
if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
m_ConstantInt<AArch64SVEPredPattern::all>()))) {
Builder.CreateStore(VecOp, VecPtr);
return IC.eraseInstFromFunction(II);
}
Builder.CreateMaskedStore(VecOp, VecPtr, PtrOp->getPointerAlignment(DL),
Pred);
return IC.eraseInstFromFunction(II);
}
static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic) {
switch (Intrinsic) {
case Intrinsic::aarch64_sve_fmul:
@ -1025,6 +1069,10 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
return instCombineLD1GatherIndex(IC, II);
case Intrinsic::aarch64_sve_st1_scatter_index:
return instCombineST1ScatterIndex(IC, II);
case Intrinsic::aarch64_sve_ld1:
return instCombineSVELD1(IC, II, DL);
case Intrinsic::aarch64_sve_st1:
return instCombineSVEST1(IC, II, DL);
}
return None;

View File

@ -0,0 +1,58 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -instcombine < %s | FileCheck %s
target triple = "aarch64-unknown-linux-gnu"
define <vscale x 4 x i32> @combine_ld1(i32* %ptr) #0 {
; CHECK-LABEL: @combine_ld1(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to <vscale x 4 x i32>*
; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], align 16
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP2]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%2 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %1, i32* %ptr)
ret <vscale x 4 x i32> %2
}
define <vscale x 4 x i32> @combine_ld1_masked(i32* %ptr) #0 {
; CHECK-LABEL: @combine_ld1_masked(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[PTR:%.*]] to <vscale x 4 x i32>*
; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP2]], i32 1, <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> zeroinitializer)
; CHECK-NEXT: ret <vscale x 4 x i32> [[TMP3]]
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
%2 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %1, i32* %ptr)
ret <vscale x 4 x i32> %2
}
define void @combine_st1(<vscale x 4 x i32> %vec, i32* %ptr) #0 {
; CHECK-LABEL: @combine_st1(
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to <vscale x 4 x i32>*
; CHECK-NEXT: store <vscale x 4 x i32> [[VEC:%.*]], <vscale x 4 x i32>* [[TMP1]], align 16
; CHECK-NEXT: ret void
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %1, i32* %ptr)
ret void
}
define void @combine_st1_masked(<vscale x 4 x i32> %vec, i32* %ptr) #0 {
; CHECK-LABEL: @combine_st1_masked(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[PTR:%.*]] to <vscale x 4 x i32>*
; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[VEC:%.*]], <vscale x 4 x i32>* [[TMP2]], i32 1, <vscale x 4 x i1> [[TMP1]])
; CHECK-NEXT: ret void
;
%1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %1, i32* %ptr)
ret void
}
declare void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1>, i32*)
attributes #0 = { "target-features"="+sve" }