[clang][AArch64][SVE] Handle PRValue under VLAT <-> VLST cast

This change fixes the crash that PRValue cannot be handled by
EmitLValue.

Differential Revision: https://reviews.llvm.org/D105097
This commit is contained in:
Jun Ma 2021-06-29 18:04:28 +08:00
parent ae5433945f
commit 3afbf89804
5 changed files with 172 additions and 116 deletions

View File

@ -2095,24 +2095,11 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
isa<llvm::ScalableVectorType>(DstTy)) ||
(isa<llvm::ScalableVectorType>(SrcTy) &&
isa<llvm::FixedVectorType>(DstTy))) {
if (const CallExpr *CE = dyn_cast<CallExpr>(E)) {
// Call expressions can't have a scalar return unless the return type
// is a reference type so an lvalue can't be emitted. Create a temp
// alloca to store the call, bitcast the address then load.
QualType RetTy = CE->getCallReturnType(CGF.getContext());
Address Addr =
CGF.CreateDefaultAlignTempAlloca(SrcTy, "saved-call-rvalue");
LValue LV = CGF.MakeAddrLValue(Addr, RetTy);
CGF.EmitStoreOfScalar(Src, LV);
Addr = Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(DestTy),
"castFixedSve");
LValue DestLV = CGF.MakeAddrLValue(Addr, DestTy);
DestLV.setTBAAInfo(TBAAAccessInfo::getMayAliasInfo());
return EmitLoadOfLValue(DestLV, CE->getExprLoc());
}
Address Addr = EmitLValue(E).getAddress(CGF);
Addr = Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(DestTy));
Address Addr = CGF.CreateDefaultAlignTempAlloca(SrcTy, "saved-value");
LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
CGF.EmitStoreOfScalar(Src, LV);
Addr = Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(DestTy),
"castFixedSve");
LValue DestLV = CGF.MakeAddrLValue(Addr, DestTy);
DestLV.setTBAAInfo(TBAAAccessInfo::getMayAliasInfo());
return EmitLoadOfLValue(DestLV, CE->getExprLoc());

View File

@ -30,21 +30,21 @@ DEFINE_STRUCT(bool)
// CHECK-128-LABEL: @read_int64(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16, [[TBAA6:!tbaa !.*]]
// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6:![0-9]+]]
// CHECK-128-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> [[TMP0]], i64 0)
// CHECK-128-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
//
// CHECK-256-LABEL: @read_int64(
// CHECK-256-NEXT: entry:
// CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[ARRAYIDX]], align 16, [[TBAA6:!tbaa !.*]]
// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6:![0-9]+]]
// CHECK-256-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v4i64(<vscale x 2 x i64> undef, <4 x i64> [[TMP0]], i64 0)
// CHECK-256-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
//
// CHECK-512-LABEL: @read_int64(
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[ARRAYIDX]], align 16, [[TBAA6:!tbaa !.*]]
// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6:![0-9]+]]
// CHECK-512-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[TMP0]], i64 0)
// CHECK-512-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
//
@ -56,21 +56,21 @@ svint64_t read_int64(struct struct_int64 *s) {
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> [[X:%.*]], i64 0)
// CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-128-NEXT: store <2 x i64> [[CASTFIXEDSVE]], <2 x i64>* [[ARRAYIDX]], align 16, [[TBAA6]]
// CHECK-128-NEXT: store <2 x i64> [[CASTFIXEDSVE]], <2 x i64>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: ret void
//
// CHECK-256-LABEL: @write_int64(
// CHECK-256-NEXT: entry:
// CHECK-256-NEXT: [[CASTFIXEDSVE:%.*]] = call <4 x i64> @llvm.experimental.vector.extract.v4i64.nxv2i64(<vscale x 2 x i64> [[X:%.*]], i64 0)
// CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-256-NEXT: store <4 x i64> [[CASTFIXEDSVE]], <4 x i64>* [[ARRAYIDX]], align 16, [[TBAA6]]
// CHECK-256-NEXT: store <4 x i64> [[CASTFIXEDSVE]], <4 x i64>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-256-NEXT: ret void
//
// CHECK-512-LABEL: @write_int64(
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[X:%.*]], i64 0)
// CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-512-NEXT: store <8 x i64> [[CASTFIXEDSVE]], <8 x i64>* [[ARRAYIDX]], align 16, [[TBAA6]]
// CHECK-512-NEXT: store <8 x i64> [[CASTFIXEDSVE]], <8 x i64>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: ret void
//
void write_int64(struct struct_int64 *s, svint64_t x) {
@ -84,21 +84,21 @@ void write_int64(struct struct_int64 *s, svint64_t x) {
// CHECK-128-LABEL: @read_float64(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16, [[TBAA6]]
// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x double>, <2 x double>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> undef, <2 x double> [[TMP0]], i64 0)
// CHECK-128-NEXT: ret <vscale x 2 x double> [[CASTSCALABLESVE]]
//
// CHECK-256-LABEL: @read_float64(
// CHECK-256-NEXT: entry:
// CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x double>, <4 x double>* [[ARRAYIDX]], align 16, [[TBAA6]]
// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x double>, <4 x double>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-256-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v4f64(<vscale x 2 x double> undef, <4 x double> [[TMP0]], i64 0)
// CHECK-256-NEXT: ret <vscale x 2 x double> [[CASTSCALABLESVE]]
//
// CHECK-512-LABEL: @read_float64(
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x double>, <8 x double>* [[ARRAYIDX]], align 16, [[TBAA6]]
// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x double>, <8 x double>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x double> @llvm.experimental.vector.insert.nxv2f64.v8f64(<vscale x 2 x double> undef, <8 x double> [[TMP0]], i64 0)
// CHECK-512-NEXT: ret <vscale x 2 x double> [[CASTSCALABLESVE]]
//
@ -110,21 +110,21 @@ svfloat64_t read_float64(struct struct_float64 *s) {
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = call <2 x double> @llvm.experimental.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> [[X:%.*]], i64 0)
// CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-128-NEXT: store <2 x double> [[CASTFIXEDSVE]], <2 x double>* [[ARRAYIDX]], align 16, [[TBAA6]]
// CHECK-128-NEXT: store <2 x double> [[CASTFIXEDSVE]], <2 x double>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: ret void
//
// CHECK-256-LABEL: @write_float64(
// CHECK-256-NEXT: entry:
// CHECK-256-NEXT: [[CASTFIXEDSVE:%.*]] = call <4 x double> @llvm.experimental.vector.extract.v4f64.nxv2f64(<vscale x 2 x double> [[X:%.*]], i64 0)
// CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-256-NEXT: store <4 x double> [[CASTFIXEDSVE]], <4 x double>* [[ARRAYIDX]], align 16, [[TBAA6]]
// CHECK-256-NEXT: store <4 x double> [[CASTFIXEDSVE]], <4 x double>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-256-NEXT: ret void
//
// CHECK-512-LABEL: @write_float64(
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = call <8 x double> @llvm.experimental.vector.extract.v8f64.nxv2f64(<vscale x 2 x double> [[X:%.*]], i64 0)
// CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-512-NEXT: store <8 x double> [[CASTFIXEDSVE]], <8 x double>* [[ARRAYIDX]], align 16, [[TBAA6]]
// CHECK-512-NEXT: store <8 x double> [[CASTFIXEDSVE]], <8 x double>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: ret void
//
void write_float64(struct struct_float64 *s, svfloat64_t x) {
@ -138,21 +138,21 @@ void write_float64(struct struct_float64 *s, svfloat64_t x) {
// CHECK-128-LABEL: @read_bfloat16(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-128-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[ARRAYIDX]], align 16, [[TBAA6]]
// CHECK-128-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x bfloat> @llvm.experimental.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> undef, <8 x bfloat> [[TMP0]], i64 0)
// CHECK-128-NEXT: ret <vscale x 8 x bfloat> [[CASTSCALABLESVE]]
//
// CHECK-256-LABEL: @read_bfloat16(
// CHECK-256-NEXT: entry:
// CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-256-NEXT: [[TMP0:%.*]] = load <16 x bfloat>, <16 x bfloat>* [[ARRAYIDX]], align 16, [[TBAA6]]
// CHECK-256-NEXT: [[TMP0:%.*]] = load <16 x bfloat>, <16 x bfloat>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-256-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x bfloat> @llvm.experimental.vector.insert.nxv8bf16.v16bf16(<vscale x 8 x bfloat> undef, <16 x bfloat> [[TMP0]], i64 0)
// CHECK-256-NEXT: ret <vscale x 8 x bfloat> [[CASTSCALABLESVE]]
//
// CHECK-512-LABEL: @read_bfloat16(
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-512-NEXT: [[TMP0:%.*]] = load <32 x bfloat>, <32 x bfloat>* [[ARRAYIDX]], align 16, [[TBAA6]]
// CHECK-512-NEXT: [[TMP0:%.*]] = load <32 x bfloat>, <32 x bfloat>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x bfloat> @llvm.experimental.vector.insert.nxv8bf16.v32bf16(<vscale x 8 x bfloat> undef, <32 x bfloat> [[TMP0]], i64 0)
// CHECK-512-NEXT: ret <vscale x 8 x bfloat> [[CASTSCALABLESVE]]
//
@ -164,21 +164,21 @@ svbfloat16_t read_bfloat16(struct struct_bfloat16 *s) {
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = call <8 x bfloat> @llvm.experimental.vector.extract.v8bf16.nxv8bf16(<vscale x 8 x bfloat> [[X:%.*]], i64 0)
// CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-128-NEXT: store <8 x bfloat> [[CASTFIXEDSVE]], <8 x bfloat>* [[ARRAYIDX]], align 16, [[TBAA6]]
// CHECK-128-NEXT: store <8 x bfloat> [[CASTFIXEDSVE]], <8 x bfloat>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: ret void
//
// CHECK-256-LABEL: @write_bfloat16(
// CHECK-256-NEXT: entry:
// CHECK-256-NEXT: [[CASTFIXEDSVE:%.*]] = call <16 x bfloat> @llvm.experimental.vector.extract.v16bf16.nxv8bf16(<vscale x 8 x bfloat> [[X:%.*]], i64 0)
// CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-256-NEXT: store <16 x bfloat> [[CASTFIXEDSVE]], <16 x bfloat>* [[ARRAYIDX]], align 16, [[TBAA6]]
// CHECK-256-NEXT: store <16 x bfloat> [[CASTFIXEDSVE]], <16 x bfloat>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-256-NEXT: ret void
//
// CHECK-512-LABEL: @write_bfloat16(
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = call <32 x bfloat> @llvm.experimental.vector.extract.v32bf16.nxv8bf16(<vscale x 8 x bfloat> [[X:%.*]], i64 0)
// CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-512-NEXT: store <32 x bfloat> [[CASTFIXEDSVE]], <32 x bfloat>* [[ARRAYIDX]], align 16, [[TBAA6]]
// CHECK-512-NEXT: store <32 x bfloat> [[CASTFIXEDSVE]], <32 x bfloat>* [[ARRAYIDX]], align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: ret void
//
void write_bfloat16(struct struct_bfloat16 *s, svbfloat16_t x) {
@ -191,23 +191,32 @@ void write_bfloat16(struct struct_bfloat16 *s, svbfloat16_t x) {
// CHECK-128-LABEL: @read_bool(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <2 x i8>, align 16
// CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-128-NEXT: [[TMP0:%.*]] = bitcast <2 x i8>* [[ARRAYIDX]] to <vscale x 16 x i1>*
// CHECK-128-NEXT: [[TMP1:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP0]], align 2, [[TBAA6]]
// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]]
// CHECK-128-NEXT: store <2 x i8> [[TMP0]], <2 x i8>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <2 x i8>* [[SAVED_VALUE]] to <vscale x 16 x i1>*
// CHECK-128-NEXT: [[TMP1:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: ret <vscale x 16 x i1> [[TMP1]]
//
// CHECK-256-LABEL: @read_bool(
// CHECK-256-NEXT: entry:
// CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca <4 x i8>, align 16
// CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast <4 x i8>* [[ARRAYIDX]] to <vscale x 16 x i1>*
// CHECK-256-NEXT: [[TMP1:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP0]], align 2, [[TBAA6]]
// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i8>, <4 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]]
// CHECK-256-NEXT: store <4 x i8> [[TMP0]], <4 x i8>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA6]]
// CHECK-256-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <4 x i8>* [[SAVED_VALUE]] to <vscale x 16 x i1>*
// CHECK-256-NEXT: [[TMP1:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
// CHECK-256-NEXT: ret <vscale x 16 x i1> [[TMP1]]
//
// CHECK-512-LABEL: @read_bool(
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 16
// CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[ARRAYIDX]] to <vscale x 16 x i1>*
// CHECK-512-NEXT: [[TMP1:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP0]], align 2, [[TBAA6]]
// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]]
// CHECK-512-NEXT: store <8 x i8> [[TMP0]], <8 x i8>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE]] to <vscale x 16 x i1>*
// CHECK-512-NEXT: [[TMP1:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: ret <vscale x 16 x i1> [[TMP1]]
//
svbool_t read_bool(struct struct_bool *s) {
@ -216,32 +225,32 @@ svbool_t read_bool(struct struct_bool *s) {
// CHECK-128-LABEL: @write_bool(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[X_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-128-NEXT: store <vscale x 16 x i1> [[X:%.*]], <vscale x 16 x i1>* [[X_ADDR]], align 16, [[TBAA9:!tbaa !.*]]
// CHECK-128-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[X_ADDR]] to <2 x i8>*
// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 16, [[TBAA6]]
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-128-NEXT: store <vscale x 16 x i1> [[X:%.*]], <vscale x 16 x i1>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA9:![0-9]+]]
// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_VALUE]] to <2 x i8>*
// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-128-NEXT: store <2 x i8> [[TMP1]], <2 x i8>* [[ARRAYIDX]], align 2, [[TBAA6]]
// CHECK-128-NEXT: store <2 x i8> [[TMP0]], <2 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]]
// CHECK-128-NEXT: ret void
//
// CHECK-256-LABEL: @write_bool(
// CHECK-256-NEXT: entry:
// CHECK-256-NEXT: [[X_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-256-NEXT: store <vscale x 16 x i1> [[X:%.*]], <vscale x 16 x i1>* [[X_ADDR]], align 16, [[TBAA9:!tbaa !.*]]
// CHECK-256-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[X_ADDR]] to <4 x i8>*
// CHECK-256-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 16, [[TBAA6]]
// CHECK-256-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-256-NEXT: store <vscale x 16 x i1> [[X:%.*]], <vscale x 16 x i1>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA9:![0-9]+]]
// CHECK-256-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_VALUE]] to <4 x i8>*
// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i8>, <4 x i8>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
// CHECK-256-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-256-NEXT: store <4 x i8> [[TMP1]], <4 x i8>* [[ARRAYIDX]], align 2, [[TBAA6]]
// CHECK-256-NEXT: store <4 x i8> [[TMP0]], <4 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]]
// CHECK-256-NEXT: ret void
//
// CHECK-512-LABEL: @write_bool(
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[X_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-512-NEXT: store <vscale x 16 x i1> [[X:%.*]], <vscale x 16 x i1>* [[X_ADDR]], align 16, [[TBAA9:!tbaa !.*]]
// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[X_ADDR]] to <8 x i8>*
// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 16, [[TBAA6]]
// CHECK-512-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-512-NEXT: store <vscale x 16 x i1> [[X:%.*]], <vscale x 16 x i1>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA9:![0-9]+]]
// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_VALUE]] to <8 x i8>*
// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
// CHECK-512-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[ARRAYIDX]], align 2, [[TBAA6]]
// CHECK-512-NEXT: store <8 x i8> [[TMP0]], <8 x i8>* [[ARRAYIDX]], align 2, !tbaa [[TBAA6]]
// CHECK-512-NEXT: ret void
//
void write_bool(struct struct_bool *s, svbool_t x) {

View File

@ -79,9 +79,9 @@ fixed_float64_t call_float64_ff(svbool_t pg, fixed_float64_t op1, fixed_float64_
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OP1:%.*]] = alloca <8 x i8>, align 16
// CHECK-NEXT: [[OP2:%.*]] = alloca <8 x i8>, align 16
// CHECK-NEXT: [[OP1_ADDR:%.*]] = alloca <8 x i8>, align 16
// CHECK-NEXT: [[OP2_ADDR:%.*]] = alloca <8 x i8>, align 16
// CHECK-NEXT: [[SAVED_CALL_RVALUE:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 16
// CHECK-NEXT: [[SAVED_VALUE3:%.*]] = alloca <8 x i8>, align 16
// CHECK-NEXT: [[SAVED_VALUE5:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[OP1]] to <vscale x 16 x i1>*
// CHECK-NEXT: store <vscale x 16 x i1> [[OP1_COERCE:%.*]], <vscale x 16 x i1>* [[TMP0]], align 16
@ -89,20 +89,20 @@ fixed_float64_t call_float64_ff(svbool_t pg, fixed_float64_t op1, fixed_float64_
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP2]] to <vscale x 16 x i1>*
// CHECK-NEXT: store <vscale x 16 x i1> [[OP2_COERCE:%.*]], <vscale x 16 x i1>* [[TMP1]], align 16
// CHECK-NEXT: [[OP22:%.*]] = load <8 x i8>, <8 x i8>* [[OP2]], align 16, !tbaa [[TBAA6]]
// CHECK-NEXT: store <8 x i8> [[OP11]], <8 x i8>* [[OP1_ADDR]], align 16, !tbaa [[TBAA6]]
// CHECK-NEXT: store <8 x i8> [[OP22]], <8 x i8>* [[OP2_ADDR]], align 16, !tbaa [[TBAA6]]
// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to <vscale x 16 x i1>*
// CHECK-NEXT: [[TMP3:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP2]], align 16, !tbaa [[TBAA6]]
// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8>* [[OP2_ADDR]] to <vscale x 16 x i1>*
// CHECK-NEXT: [[TMP5:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP4]], align 16, !tbaa [[TBAA6]]
// CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[TMP3]], <vscale x 16 x i1> [[TMP5]])
// CHECK-NEXT: store <vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]], align 16, !tbaa [[TBAA9:![0-9]+]]
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]] to <8 x i8>*
// CHECK-NEXT: [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
// CHECK-NEXT: store <8 x i8> [[OP11]], <8 x i8>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA6]]
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE]] to <vscale x 16 x i1>*
// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
// CHECK-NEXT: store <8 x i8> [[OP22]], <8 x i8>* [[SAVED_VALUE3]], align 16, !tbaa [[TBAA6]]
// CHECK-NEXT: [[CASTFIXEDSVE4:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE3]] to <vscale x 16 x i1>*
// CHECK-NEXT: [[TMP3:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE4]], align 16, !tbaa [[TBAA6]]
// CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP3]])
// CHECK-NEXT: store <vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1>* [[SAVED_VALUE5]], align 16, !tbaa [[TBAA9:![0-9]+]]
// CHECK-NEXT: [[CASTFIXEDSVE6:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_VALUE5]] to <8 x i8>*
// CHECK-NEXT: [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE6]], align 16, !tbaa [[TBAA6]]
// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to <8 x i8>*
// CHECK-NEXT: store <8 x i8> [[TMP7]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16
// CHECK-NEXT: [[TMP8:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP8]]
// CHECK-NEXT: store <8 x i8> [[TMP5]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16
// CHECK-NEXT: [[TMP6:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP6]]
//
fixed_bool_t call_bool_ff(svbool_t pg, fixed_bool_t op1, fixed_bool_t op2) {
return svsel(pg, op1, op2);
@ -135,23 +135,23 @@ fixed_float64_t call_float64_fs(svbool_t pg, fixed_float64_t op1, svfloat64_t op
// CHECK-LABEL: @call_bool_fs(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OP1:%.*]] = alloca <8 x i8>, align 16
// CHECK-NEXT: [[OP1_ADDR:%.*]] = alloca <8 x i8>, align 16
// CHECK-NEXT: [[SAVED_CALL_RVALUE:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 16
// CHECK-NEXT: [[SAVED_VALUE2:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i8>* [[OP1]] to <vscale x 16 x i1>*
// CHECK-NEXT: store <vscale x 16 x i1> [[OP1_COERCE:%.*]], <vscale x 16 x i1>* [[TMP0]], align 16
// CHECK-NEXT: [[OP11:%.*]] = load <8 x i8>, <8 x i8>* [[OP1]], align 16, !tbaa [[TBAA6]]
// CHECK-NEXT: store <8 x i8> [[OP11]], <8 x i8>* [[OP1_ADDR]], align 16, !tbaa [[TBAA6]]
// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to <vscale x 16 x i1>*
// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP1]], align 16, !tbaa [[TBAA6]]
// CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[TMP2]], <vscale x 16 x i1> [[OP2:%.*]])
// CHECK-NEXT: store <vscale x 16 x i1> [[TMP3]], <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]], align 16, !tbaa [[TBAA9]]
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]] to <8 x i8>*
// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
// CHECK-NEXT: store <8 x i8> [[OP11]], <8 x i8>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA6]]
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE]] to <vscale x 16 x i1>*
// CHECK-NEXT: [[TMP1:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
// CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[TMP1]], <vscale x 16 x i1> [[OP2:%.*]])
// CHECK-NEXT: store <vscale x 16 x i1> [[TMP2]], <vscale x 16 x i1>* [[SAVED_VALUE2]], align 16, !tbaa [[TBAA9]]
// CHECK-NEXT: [[CASTFIXEDSVE3:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_VALUE2]] to <8 x i8>*
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE3]], align 16, !tbaa [[TBAA6]]
// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to <8 x i8>*
// CHECK-NEXT: store <8 x i8> [[TMP4]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16
// CHECK-NEXT: [[TMP5:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP5]]
// CHECK-NEXT: store <8 x i8> [[TMP3]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16
// CHECK-NEXT: [[TMP4:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
// CHECK-NEXT: ret <vscale x 16 x i1> [[TMP4]]
//
fixed_bool_t call_bool_fs(svbool_t pg, fixed_bool_t op1, svbool_t op2) {
return svsel(pg, op1, op2);
@ -183,11 +183,11 @@ fixed_float64_t call_float64_ss(svbool_t pg, svfloat64_t op1, svfloat64_t op2) {
// CHECK-LABEL: @call_bool_ss(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[SAVED_CALL_RVALUE:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-NEXT: [[RETVAL_COERCE:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
// CHECK-NEXT: store <vscale x 16 x i1> [[TMP0]], <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]], align 16, !tbaa [[TBAA9]]
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]] to <8 x i8>*
// CHECK-NEXT: store <vscale x 16 x i1> [[TMP0]], <vscale x 16 x i1>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA9]]
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_VALUE]] to <8 x i8>*
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
// CHECK-NEXT: [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to <8 x i8>*
// CHECK-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[RETVAL_0__SROA_CAST]], align 16

View File

@ -17,13 +17,19 @@ fixed_int32_t global_vec;
// CHECK-NEXT: [[PRED_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 2
// CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca <vscale x 4 x i32>, align 16
// CHECK-NEXT: [[PG:%.*]] = alloca <vscale x 16 x i1>, align 2
// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 8
// CHECK-NEXT: [[SAVED_VALUE1:%.*]] = alloca <8 x i8>, align 8
// CHECK-NEXT: store <vscale x 16 x i1> [[PRED:%.*]], <vscale x 16 x i1>* [[PRED_ADDR]], align 2
// CHECK-NEXT: store <vscale x 4 x i32> [[VEC:%.*]], <vscale x 4 x i32>* [[VEC_ADDR]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[PRED_ADDR]], align 2
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* @global_pred, align 2
// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* bitcast (<8 x i8>* @global_pred to <vscale x 16 x i1>*), align 2
// CHECK-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[SAVED_VALUE]], align 8
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE]] to <vscale x 16 x i1>*
// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* @global_pred, align 2
// CHECK-NEXT: [[TMP4:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* bitcast (<8 x i8>* @global_pred to <vscale x 16 x i1>*), align 2
// CHECK-NEXT: store <8 x i8> [[TMP3]], <8 x i8>* [[SAVED_VALUE1]], align 8
// CHECK-NEXT: [[CASTFIXEDSVE2:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE1]] to <vscale x 16 x i1>*
// CHECK-NEXT: [[TMP4:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE2]], align 8
// CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> [[TMP0]], <vscale x 16 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP4]])
// CHECK-NEXT: store <vscale x 16 x i1> [[TMP5]], <vscale x 16 x i1>* [[PG]], align 2
// CHECK-NEXT: [[TMP6:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[PG]], align 2
@ -32,11 +38,11 @@ fixed_int32_t global_vec;
// CHECK-NEXT: [[TMP8:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[VEC_ADDR]], align 16
// CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP6]])
// CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> [[TMP9]], <vscale x 4 x i32> [[CASTSCALABLESVE]], <vscale x 4 x i32> [[TMP8]])
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TMP10]], i64 0)
// CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE]], <16 x i32>* [[RETVAL]], align 16
// CHECK-NEXT: [[CASTFIXEDSVE3:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TMP10]], i64 0)
// CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE3]], <16 x i32>* [[RETVAL]], align 16
// CHECK-NEXT: [[TMP11:%.*]] = load <16 x i32>, <16 x i32>* [[RETVAL]], align 16
// CHECK-NEXT: [[CASTSCALABLESVE1:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP11]], i64 0)
// CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE1]]
// CHECK-NEXT: [[CASTSCALABLESVE4:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP11]], i64 0)
// CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE4]]
//
fixed_int32_t foo(svbool_t pred, svint32_t vec) {
svbool_t pg = svand_z(pred, global_pred, global_pred);
@ -103,3 +109,49 @@ fixed_bool_t address_of_array_idx() {
parr = &arr[0];
return *parr;
}
// CHECK-LABEL: @test_cast(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <16 x i32>, align 16
// CHECK-NEXT: [[PRED_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 2
// CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca <vscale x 4 x i32>, align 16
// CHECK-NEXT: [[XX:%.*]] = alloca <16 x i32>, align 16
// CHECK-NEXT: [[YY:%.*]] = alloca <16 x i32>, align 16
// CHECK-NEXT: [[PG:%.*]] = alloca <vscale x 16 x i1>, align 2
// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 8
// CHECK-NEXT: [[SAVED_VALUE1:%.*]] = alloca <16 x i32>, align 64
// CHECK-NEXT: store <vscale x 16 x i1> [[PRED:%.*]], <vscale x 16 x i1>* [[PRED_ADDR]], align 2
// CHECK-NEXT: store <vscale x 4 x i32> [[VEC:%.*]], <vscale x 4 x i32>* [[VEC_ADDR]], align 16
// CHECK-NEXT: store <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, <16 x i32>* [[XX]], align 16
// CHECK-NEXT: store <16 x i32> <i32 2, i32 5, i32 4, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, <16 x i32>* [[YY]], align 16
// CHECK-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[PRED_ADDR]], align 2
// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* @global_pred, align 2
// CHECK-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* [[SAVED_VALUE]], align 8
// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE]] to <vscale x 16 x i1>*
// CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, <16 x i32>* [[XX]], align 16
// CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, <16 x i32>* [[YY]], align 16
// CHECK-NEXT: [[ADD:%.*]] = add <16 x i32> [[TMP3]], [[TMP4]]
// CHECK-NEXT: store <16 x i32> [[ADD]], <16 x i32>* [[SAVED_VALUE1]], align 64
// CHECK-NEXT: [[CASTFIXEDSVE2:%.*]] = bitcast <16 x i32>* [[SAVED_VALUE1]] to <vscale x 16 x i1>*
// CHECK-NEXT: [[TMP5:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE2]], align 64
// CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> [[TMP0]], <vscale x 16 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP5]])
// CHECK-NEXT: store <vscale x 16 x i1> [[TMP6]], <vscale x 16 x i1>* [[PG]], align 2
// CHECK-NEXT: [[TMP7:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[PG]], align 2
// CHECK-NEXT: [[TMP8:%.*]] = load <16 x i32>, <16 x i32>* @global_vec, align 16
// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP8]], i64 0)
// CHECK-NEXT: [[TMP9:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[VEC_ADDR]], align 16
// CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP7]])
// CHECK-NEXT: [[TMP11:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> [[TMP10]], <vscale x 4 x i32> [[CASTSCALABLESVE]], <vscale x 4 x i32> [[TMP9]])
// CHECK-NEXT: [[CASTFIXEDSVE3:%.*]] = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[TMP11]], i64 0)
// CHECK-NEXT: store <16 x i32> [[CASTFIXEDSVE3]], <16 x i32>* [[RETVAL]], align 16
// CHECK-NEXT: [[TMP12:%.*]] = load <16 x i32>, <16 x i32>* [[RETVAL]], align 16
// CHECK-NEXT: [[CASTSCALABLESVE4:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[TMP12]], i64 0)
// CHECK-NEXT: ret <vscale x 4 x i32> [[CASTSCALABLESVE4]]
//
fixed_int32_t test_cast(svbool_t pred, svint32_t vec) {
fixed_int32_t xx = {1, 2, 3, 4};
fixed_int32_t yy = {2, 5, 4, 6};
svbool_t pg = svand_z(pred, global_pred, xx + yy);
return svadd_m(pg, global_vec, vec);
}

View File

@ -22,13 +22,13 @@ fixed_bool_t global_bool;
// CHECK-128-LABEL: @write_global_i64(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = call <2 x i64> @llvm.experimental.vector.extract.v2i64.nxv2i64(<vscale x 2 x i64> [[V:%.*]], i64 0)
// CHECK-128-NEXT: store <2 x i64> [[CASTFIXEDSVE]], <2 x i64>* @global_i64, align 16, [[TBAA6:!tbaa !.*]]
// CHECK-128-NEXT: store <2 x i64> [[CASTFIXEDSVE]], <2 x i64>* @global_i64, align 16, !tbaa [[TBAA6:![0-9]+]]
// CHECK-128-NEXT: ret void
//
// CHECK-512-LABEL: @write_global_i64(
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = call <8 x i64> @llvm.experimental.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[V:%.*]], i64 0)
// CHECK-512-NEXT: store <8 x i64> [[CASTFIXEDSVE]], <8 x i64>* @global_i64, align 16, [[TBAA6:!tbaa !.*]]
// CHECK-512-NEXT: store <8 x i64> [[CASTFIXEDSVE]], <8 x i64>* @global_i64, align 16, !tbaa [[TBAA6:![0-9]+]]
// CHECK-512-NEXT: ret void
//
void write_global_i64(svint64_t v) { global_i64 = v; }
@ -36,33 +36,33 @@ void write_global_i64(svint64_t v) { global_i64 = v; }
// CHECK-128-LABEL: @write_global_bf16(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = call <8 x bfloat> @llvm.experimental.vector.extract.v8bf16.nxv8bf16(<vscale x 8 x bfloat> [[V:%.*]], i64 0)
// CHECK-128-NEXT: store <8 x bfloat> [[CASTFIXEDSVE]], <8 x bfloat>* @global_bf16, align 16, [[TBAA6]]
// CHECK-128-NEXT: store <8 x bfloat> [[CASTFIXEDSVE]], <8 x bfloat>* @global_bf16, align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: ret void
//
// CHECK-512-LABEL: @write_global_bf16(
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = call <32 x bfloat> @llvm.experimental.vector.extract.v32bf16.nxv8bf16(<vscale x 8 x bfloat> [[V:%.*]], i64 0)
// CHECK-512-NEXT: store <32 x bfloat> [[CASTFIXEDSVE]], <32 x bfloat>* @global_bf16, align 16, [[TBAA6]]
// CHECK-512-NEXT: store <32 x bfloat> [[CASTFIXEDSVE]], <32 x bfloat>* @global_bf16, align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: ret void
//
void write_global_bf16(svbfloat16_t v) { global_bf16 = v; }
// CHECK-128-LABEL: @write_global_bool(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[V_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-128-NEXT: store <vscale x 16 x i1> [[V:%.*]], <vscale x 16 x i1>* [[V_ADDR]], align 16, [[TBAA9:!tbaa !.*]]
// CHECK-128-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[V_ADDR]] to <2 x i8>*
// CHECK-128-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 16, [[TBAA6]]
// CHECK-128-NEXT: store <2 x i8> [[TMP1]], <2 x i8>* @global_bool, align 2, [[TBAA6]]
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-128-NEXT: store <vscale x 16 x i1> [[V:%.*]], <vscale x 16 x i1>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA9:![0-9]+]]
// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_VALUE]] to <2 x i8>*
// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: store <2 x i8> [[TMP0]], <2 x i8>* @global_bool, align 2, !tbaa [[TBAA6]]
// CHECK-128-NEXT: ret void
//
// CHECK-512-LABEL: @write_global_bool(
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[V_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-512-NEXT: store <vscale x 16 x i1> [[V:%.*]], <vscale x 16 x i1>* [[V_ADDR]], align 16, [[TBAA9:!tbaa !.*]]
// CHECK-512-NEXT: [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[V_ADDR]] to <8 x i8>*
// CHECK-512-NEXT: [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 16, [[TBAA6]]
// CHECK-512-NEXT: store <8 x i8> [[TMP1]], <8 x i8>* @global_bool, align 2, [[TBAA6]]
// CHECK-512-NEXT: [[SAVED_VALUE:%.*]] = alloca <vscale x 16 x i1>, align 16
// CHECK-512-NEXT: store <vscale x 16 x i1> [[V:%.*]], <vscale x 16 x i1>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA9:![0-9]+]]
// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_VALUE]] to <8 x i8>*
// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: store <8 x i8> [[TMP0]], <8 x i8>* @global_bool, align 2, !tbaa [[TBAA6]]
// CHECK-512-NEXT: ret void
//
void write_global_bool(svbool_t v) { global_bool = v; }
@ -73,13 +73,13 @@ void write_global_bool(svbool_t v) { global_bool = v; }
// CHECK-128-LABEL: @read_global_i64(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* @global_i64, align 16, [[TBAA6]]
// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* @global_i64, align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> [[TMP0]], i64 0)
// CHECK-128-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
//
// CHECK-512-LABEL: @read_global_i64(
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* @global_i64, align 16, [[TBAA6]]
// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* @global_i64, align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[TMP0]], i64 0)
// CHECK-512-NEXT: ret <vscale x 2 x i64> [[CASTSCALABLESVE]]
//
@ -87,13 +87,13 @@ svint64_t read_global_i64() { return global_i64; }
// CHECK-128-LABEL: @read_global_bf16(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, <8 x bfloat>* @global_bf16, align 16, [[TBAA6]]
// CHECK-128-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, <8 x bfloat>* @global_bf16, align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x bfloat> @llvm.experimental.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> undef, <8 x bfloat> [[TMP0]], i64 0)
// CHECK-128-NEXT: ret <vscale x 8 x bfloat> [[CASTSCALABLESVE]]
//
// CHECK-512-LABEL: @read_global_bf16(
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[TMP0:%.*]] = load <32 x bfloat>, <32 x bfloat>* @global_bf16, align 16, [[TBAA6]]
// CHECK-512-NEXT: [[TMP0:%.*]] = load <32 x bfloat>, <32 x bfloat>* @global_bf16, align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x bfloat> @llvm.experimental.vector.insert.nxv8bf16.v32bf16(<vscale x 8 x bfloat> undef, <32 x bfloat> [[TMP0]], i64 0)
// CHECK-512-NEXT: ret <vscale x 8 x bfloat> [[CASTSCALABLESVE]]
//
@ -101,12 +101,20 @@ svbfloat16_t read_global_bf16() { return global_bf16; }
// CHECK-128-LABEL: @read_global_bool(
// CHECK-128-NEXT: entry:
// CHECK-128-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* bitcast (<2 x i8>* @global_bool to <vscale x 16 x i1>*), align 2, [[TBAA6]]
// CHECK-128-NEXT: ret <vscale x 16 x i1> [[TMP0]]
// CHECK-128-NEXT: [[SAVED_VALUE:%.*]] = alloca <2 x i8>, align 16
// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i8>, <2 x i8>* @global_bool, align 2, !tbaa [[TBAA6]]
// CHECK-128-NEXT: store <2 x i8> [[TMP0]], <2 x i8>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <2 x i8>* [[SAVED_VALUE]] to <vscale x 16 x i1>*
// CHECK-128-NEXT: [[TMP1:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
// CHECK-128-NEXT: ret <vscale x 16 x i1> [[TMP1]]
//
// CHECK-512-LABEL: @read_global_bool(
// CHECK-512-NEXT: entry:
// CHECK-512-NEXT: [[TMP0:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* bitcast (<8 x i8>* @global_bool to <vscale x 16 x i1>*), align 2, [[TBAA6]]
// CHECK-512-NEXT: ret <vscale x 16 x i1> [[TMP0]]
// CHECK-512-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i8>, align 16
// CHECK-512-NEXT: [[TMP0:%.*]] = load <8 x i8>, <8 x i8>* @global_bool, align 2, !tbaa [[TBAA6]]
// CHECK-512-NEXT: store <8 x i8> [[TMP0]], <8 x i8>* [[SAVED_VALUE]], align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = bitcast <8 x i8>* [[SAVED_VALUE]] to <vscale x 16 x i1>*
// CHECK-512-NEXT: [[TMP1:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[CASTFIXEDSVE]], align 16, !tbaa [[TBAA6]]
// CHECK-512-NEXT: ret <vscale x 16 x i1> [[TMP1]]
//
svbool_t read_global_bool() { return global_bool; }