[AArch64][SVE] Fill out missing unpredicated load/store patterns.

The set of patterns for unpredicated load/store was incomplete: it only
included non-extending stores.  Fill out the remaining patterns for
extending stores, and add the corresponding support to frame offset
lowering.

Differential Revision: https://reviews.llvm.org/D80349
This commit is contained in:
Eli Friedman 2020-05-20 17:52:36 -07:00
parent e79d002309
commit f09d220c71
3 changed files with 329 additions and 24 deletions

View File

@ -1831,6 +1831,24 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
case AArch64::ST1H_IMM:
case AArch64::ST1W_IMM:
case AArch64::ST1D_IMM:
case AArch64::LD1B_H_IMM:
case AArch64::LD1SB_H_IMM:
case AArch64::LD1H_S_IMM:
case AArch64::LD1SH_S_IMM:
case AArch64::LD1W_D_IMM:
case AArch64::LD1SW_D_IMM:
case AArch64::ST1B_H_IMM:
case AArch64::ST1H_S_IMM:
case AArch64::ST1W_D_IMM:
case AArch64::LD1B_S_IMM:
case AArch64::LD1SB_S_IMM:
case AArch64::LD1H_D_IMM:
case AArch64::LD1SH_D_IMM:
case AArch64::ST1B_S_IMM:
case AArch64::ST1H_D_IMM:
case AArch64::LD1B_D_IMM:
case AArch64::LD1SB_D_IMM:
case AArch64::ST1B_D_IMM:
return 3;
case AArch64::ADDG:
case AArch64::STGOffset:
@ -2289,6 +2307,45 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
MinOffset = -8;
MaxOffset = 7;
break;
case AArch64::LD1B_H_IMM:
case AArch64::LD1SB_H_IMM:
case AArch64::LD1H_S_IMM:
case AArch64::LD1SH_S_IMM:
case AArch64::LD1W_D_IMM:
case AArch64::LD1SW_D_IMM:
case AArch64::ST1B_H_IMM:
case AArch64::ST1H_S_IMM:
case AArch64::ST1W_D_IMM:
// A half vector worth of data
// Width = mbytes * elements
Scale = TypeSize::Scalable(8);
Width = SVEMaxBytesPerVector / 2;
MinOffset = -8;
MaxOffset = 7;
break;
case AArch64::LD1B_S_IMM:
case AArch64::LD1SB_S_IMM:
case AArch64::LD1H_D_IMM:
case AArch64::LD1SH_D_IMM:
case AArch64::ST1B_S_IMM:
case AArch64::ST1H_D_IMM:
// A quarter vector worth of data
// Width = mbytes * elements
Scale = TypeSize::Scalable(4);
Width = SVEMaxBytesPerVector / 4;
MinOffset = -8;
MaxOffset = 7;
break;
case AArch64::LD1B_D_IMM:
case AArch64::LD1SB_D_IMM:
case AArch64::ST1B_D_IMM:
// A eighth vector worth of data
// Width = mbytes * elements
Scale = TypeSize::Scalable(2);
Width = SVEMaxBytesPerVector / 8;
MinOffset = -8;
MaxOffset = 7;
break;
case AArch64::ST2GOffset:
case AArch64::STZ2GOffset:
Scale = TypeSize::Fixed(16);

View File

@ -1560,37 +1560,69 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
defm : pred_store<nxv4i32, nxv4i1, non_temporal_store, STNT1W_ZRR, STNT1W_ZRI, am_sve_regreg_lsl2>;
defm : pred_store<nxv2i64, nxv2i1, non_temporal_store, STNT1D_ZRR, STNT1D_ZRI, am_sve_regreg_lsl3>;
multiclass unpred_store<ValueType Ty, Instruction RegImmInst, Instruction PTrue> {
def _fi : Pat<(store (Ty ZPR:$val), (am_sve_fi GPR64sp:$base, simm4s1:$offset)),
(RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
multiclass unpred_store<PatFrag Store, ValueType Ty, Instruction RegImmInst,
Instruction PTrue> {
def : Pat<(Store (Ty ZPR:$val), (am_sve_fi GPR64sp:$base, simm4s1:$offset)),
(RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
def _default : Pat<(store (Ty ZPR:$val), GPR64:$base),
(RegImmInst ZPR:$val, (PTrue 31), GPR64:$base, (i64 0))>;
def : Pat<(Store (Ty ZPR:$val), GPR64:$base),
(RegImmInst ZPR:$val, (PTrue 31), GPR64:$base, (i64 0))>;
}
defm Pat_ST1B : unpred_store<nxv16i8, ST1B_IMM, PTRUE_B>;
defm Pat_ST1H : unpred_store<nxv8i16, ST1H_IMM, PTRUE_H>;
defm Pat_ST1W : unpred_store<nxv4i32, ST1W_IMM, PTRUE_S>;
defm Pat_ST1D : unpred_store<nxv2i64, ST1D_IMM, PTRUE_D>;
defm Pat_ST1H_float16: unpred_store<nxv8f16, ST1H_IMM, PTRUE_H>;
defm Pat_ST1W_float : unpred_store<nxv4f32, ST1W_IMM, PTRUE_S>;
defm Pat_ST1D_double : unpred_store<nxv2f64, ST1D_IMM, PTRUE_D>;
defm : unpred_store< store, nxv16i8, ST1B_IMM, PTRUE_B>;
defm : unpred_store< truncstorevi8, nxv8i16, ST1B_H_IMM, PTRUE_H>;
defm : unpred_store< truncstorevi8, nxv4i32, ST1B_S_IMM, PTRUE_S>;
defm : unpred_store< truncstorevi8, nxv2i64, ST1B_D_IMM, PTRUE_D>;
defm : unpred_store< store, nxv8i16, ST1H_IMM, PTRUE_H>;
defm : unpred_store<truncstorevi16, nxv4i32, ST1H_S_IMM, PTRUE_S>;
defm : unpred_store<truncstorevi16, nxv2i64, ST1H_D_IMM, PTRUE_D>;
defm : unpred_store< store, nxv4i32, ST1W_IMM, PTRUE_S>;
defm : unpred_store<truncstorevi32, nxv2i64, ST1W_D_IMM, PTRUE_D>;
defm : unpred_store< store, nxv2i64, ST1D_IMM, PTRUE_D>;
defm : unpred_store< store, nxv8f16, ST1H_IMM, PTRUE_H>;
defm : unpred_store< store, nxv4f16, ST1H_S_IMM, PTRUE_S>;
defm : unpred_store< store, nxv2f16, ST1H_D_IMM, PTRUE_D>;
defm : unpred_store< store, nxv4f32, ST1W_IMM, PTRUE_S>;
defm : unpred_store< store, nxv4f32, ST1W_D_IMM, PTRUE_D>;
defm : unpred_store< store, nxv2f64, ST1D_IMM, PTRUE_D>;
multiclass unpred_load<ValueType Ty, Instruction RegImmInst, Instruction PTrue> {
def _fi : Pat<(Ty (load (am_sve_fi GPR64sp:$base, simm4s1:$offset))),
(RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
multiclass unpred_load<PatFrag Load, ValueType Ty, Instruction RegImmInst,
Instruction PTrue> {
def : Pat<(Ty (Load (am_sve_fi GPR64sp:$base, simm4s1:$offset))),
(RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
def _default : Pat<(Ty (load GPR64:$base)),
(RegImmInst (PTrue 31), GPR64:$base, (i64 0))>;
def : Pat<(Ty (Load GPR64:$base)),
(RegImmInst (PTrue 31), GPR64:$base, (i64 0))>;
}
defm Pat_LD1B : unpred_load<nxv16i8, LD1B_IMM, PTRUE_B>;
defm Pat_LD1H : unpred_load<nxv8i16, LD1H_IMM, PTRUE_H>;
defm Pat_LD1W : unpred_load<nxv4i32, LD1W_IMM, PTRUE_S>;
defm Pat_LD1D : unpred_load<nxv2i64, LD1D_IMM, PTRUE_D>;
defm Pat_LD1H_float16: unpred_load<nxv8f16, LD1H_IMM, PTRUE_H>;
defm Pat_LD1W_float : unpred_load<nxv4f32, LD1W_IMM, PTRUE_S>;
defm Pat_LD1D_double : unpred_load<nxv2f64, LD1D_IMM, PTRUE_D>;
defm : unpred_load< load, nxv16i8, LD1B_IMM, PTRUE_B>;
defm : unpred_load< zextloadvi8, nxv8i16, LD1B_H_IMM, PTRUE_H>;
defm : unpred_load< zextloadvi8, nxv4i32, LD1B_S_IMM, PTRUE_S>;
defm : unpred_load< zextloadvi8, nxv2i64, LD1B_D_IMM, PTRUE_D>;
defm : unpred_load< extloadvi8, nxv8i16, LD1B_H_IMM, PTRUE_H>;
defm : unpred_load< extloadvi8, nxv4i32, LD1B_S_IMM, PTRUE_S>;
defm : unpred_load< extloadvi8, nxv2i64, LD1B_D_IMM, PTRUE_D>;
defm : unpred_load< sextloadvi8, nxv8i16, LD1SB_H_IMM, PTRUE_H>;
defm : unpred_load< sextloadvi8, nxv4i32, LD1SB_S_IMM, PTRUE_S>;
defm : unpred_load< sextloadvi8, nxv2i64, LD1SB_D_IMM, PTRUE_D>;
defm : unpred_load< load, nxv8i16, LD1H_IMM, PTRUE_H>;
defm : unpred_load<zextloadvi16, nxv4i32, LD1H_S_IMM, PTRUE_S>;
defm : unpred_load<zextloadvi16, nxv2i64, LD1H_D_IMM, PTRUE_D>;
defm : unpred_load< extloadvi16, nxv4i32, LD1H_S_IMM, PTRUE_S>;
defm : unpred_load< extloadvi16, nxv2i64, LD1H_D_IMM, PTRUE_D>;
defm : unpred_load<sextloadvi16, nxv4i32, LD1SH_S_IMM, PTRUE_S>;
defm : unpred_load<sextloadvi16, nxv2i64, LD1SH_D_IMM, PTRUE_D>;
defm : unpred_load< load, nxv4i32, LD1W_IMM, PTRUE_S>;
defm : unpred_load<zextloadvi32, nxv2i64, LD1W_D_IMM, PTRUE_D>;
defm : unpred_load< extloadvi32, nxv2i64, LD1W_D_IMM, PTRUE_D>;
defm : unpred_load<sextloadvi32, nxv2i64, LD1SW_D_IMM, PTRUE_D>;
defm : unpred_load< load, nxv2i64, LD1D_IMM, PTRUE_D>;
defm : unpred_load< load, nxv8f16, LD1H_IMM, PTRUE_H>;
defm : unpred_load< load, nxv4f16, LD1H_S_IMM, PTRUE_S>;
defm : unpred_load< load, nxv2f16, LD1H_D_IMM, PTRUE_D>;
defm : unpred_load< load, nxv4f32, LD1W_IMM, PTRUE_S>;
defm : unpred_load< load, nxv2f32, LD1W_D_IMM, PTRUE_D>;
defm : unpred_load< load, nxv2f64, LD1D_IMM, PTRUE_D>;
multiclass unpred_store_predicate<ValueType Ty, Instruction Store> {
def _fi : Pat<(store (Ty PPR:$val), (am_sve_fi GPR64sp:$base, simm9:$offset)),

View File

@ -16,6 +16,81 @@ define void @fill_nxv16i8() {
ret void
}
define void @fill_nxv8i8() {
; CHECK-LABEL: fill_nxv8i8
; CHECK-DAG: ld1b { z{{[01]}}.h }, p0/z, [sp]
; CHECK-DAG: ld1b { z{{[01]}}.h }, p0/z, [sp, #1, mul vl]
%local0 = alloca <vscale x 8 x i8>
%local1 = alloca <vscale x 8 x i8>
load volatile <vscale x 8 x i8>, <vscale x 8 x i8>* %local0
load volatile <vscale x 8 x i8>, <vscale x 8 x i8>* %local1
ret void
}
define <vscale x 8 x i16> @fill_signed_nxv8i8() {
; CHECK-LABEL: fill_signed_nxv8i8
; CHECK-DAG: ld1sb { z{{[01]}}.h }, p0/z, [sp]
; CHECK-DAG: ld1sb { z{{[01]}}.h }, p0/z, [sp, #1, mul vl]
%local0 = alloca <vscale x 8 x i8>
%local1 = alloca <vscale x 8 x i8>
%a = load volatile <vscale x 8 x i8>, <vscale x 8 x i8>* %local0
%a_ext = sext <vscale x 8 x i8> %a to <vscale x 8 x i16>
%b = load volatile <vscale x 8 x i8>, <vscale x 8 x i8>* %local1
%b_ext = sext <vscale x 8 x i8> %b to <vscale x 8 x i16>
%sum = add <vscale x 8 x i16> %a_ext, %b_ext
ret <vscale x 8 x i16> %sum
}
define void @fill_nxv4i8() {
; CHECK-LABEL: fill_nxv4i8
; CHECK-DAG: ld1b { z{{[01]}}.s }, p0/z, [sp, #3, mul vl]
; CHECK-DAG: ld1b { z{{[01]}}.s }, p0/z, [sp, #2, mul vl]
%local0 = alloca <vscale x 4 x i8>
%local1 = alloca <vscale x 4 x i8>
load volatile <vscale x 4 x i8>, <vscale x 4 x i8>* %local0
load volatile <vscale x 4 x i8>, <vscale x 4 x i8>* %local1
ret void
}
define <vscale x 4 x i32> @fill_signed_nxv4i8() {
; CHECK-LABEL: fill_signed_nxv4i8
; CHECK-DAG: ld1sb { z{{[01]}}.s }, p0/z, [sp, #3, mul vl]
; CHECK-DAG: ld1sb { z{{[01]}}.s }, p0/z, [sp, #2, mul vl]
%local0 = alloca <vscale x 4 x i8>
%local1 = alloca <vscale x 4 x i8>
%a = load volatile <vscale x 4 x i8>, <vscale x 4 x i8>* %local0
%a_ext = sext <vscale x 4 x i8> %a to <vscale x 4 x i32>
%b = load volatile <vscale x 4 x i8>, <vscale x 4 x i8>* %local1
%b_ext = sext <vscale x 4 x i8> %b to <vscale x 4 x i32>
%sum = add <vscale x 4 x i32> %a_ext, %b_ext
ret <vscale x 4 x i32> %sum
}
define void @fill_nxv2i8() {
; CHECK-LABEL: fill_nxv2i8
; CHECK-DAG: ld1b { z{{[01]}}.d }, p0/z, [sp, #7, mul vl]
; CHECK-DAG: ld1b { z{{[01]}}.d }, p0/z, [sp, #6, mul vl]
%local0 = alloca <vscale x 2 x i8>
%local1 = alloca <vscale x 2 x i8>
load volatile <vscale x 2 x i8>, <vscale x 2 x i8>* %local0
load volatile <vscale x 2 x i8>, <vscale x 2 x i8>* %local1
ret void
}
define <vscale x 2 x i64> @fill_signed_nxv2i8() {
; CHECK-LABEL: fill_signed_nxv2i8
; CHECK-DAG: ld1sb { z{{[01]}}.d }, p0/z, [sp, #7, mul vl]
; CHECK-DAG: ld1sb { z{{[01]}}.d }, p0/z, [sp, #6, mul vl]
%local0 = alloca <vscale x 2 x i8>
%local1 = alloca <vscale x 2 x i8>
%a = load volatile <vscale x 2 x i8>, <vscale x 2 x i8>* %local0
%a_ext = sext <vscale x 2 x i8> %a to <vscale x 2 x i64>
%b = load volatile <vscale x 2 x i8>, <vscale x 2 x i8>* %local1
%b_ext = sext <vscale x 2 x i8> %b to <vscale x 2 x i64>
%sum = add <vscale x 2 x i64> %a_ext, %b_ext
ret <vscale x 2 x i64> %sum
}
define void @fill_nxv8i16() {
; CHECK-LABEL: fill_nxv8i16
; CHECK-DAG: ld1h { z{{[01]}}.h }, p0/z, [sp]
@ -27,6 +102,56 @@ define void @fill_nxv8i16() {
ret void
}
define void @fill_nxv4i16() {
; CHECK-LABEL: fill_nxv4i16
; CHECK-DAG: ld1h { z{{[01]}}.s }, p0/z, [sp]
; CHECK-DAG: ld1h { z{{[01]}}.s }, p0/z, [sp, #1, mul vl]
%local0 = alloca <vscale x 4 x i16>
%local1 = alloca <vscale x 4 x i16>
load volatile <vscale x 4 x i16>, <vscale x 4 x i16>* %local0
load volatile <vscale x 4 x i16>, <vscale x 4 x i16>* %local1
ret void
}
define <vscale x 4 x i32> @fill_signed_nxv4i16() {
; CHECK-LABEL: fill_signed_nxv4i16
; CHECK-DAG: ld1sh { z{{[01]}}.s }, p0/z, [sp]
; CHECK-DAG: ld1sh { z{{[01]}}.s }, p0/z, [sp, #1, mul vl]
%local0 = alloca <vscale x 4 x i16>
%local1 = alloca <vscale x 4 x i16>
%a = load volatile <vscale x 4 x i16>, <vscale x 4 x i16>* %local0
%a_ext = sext <vscale x 4 x i16> %a to <vscale x 4 x i32>
%b = load volatile <vscale x 4 x i16>, <vscale x 4 x i16>* %local1
%b_ext = sext <vscale x 4 x i16> %b to <vscale x 4 x i32>
%sum = add <vscale x 4 x i32> %a_ext, %b_ext
ret <vscale x 4 x i32> %sum
}
define void @fill_nxv2i16() {
; CHECK-LABEL: fill_nxv2i16
; CHECK-DAG: ld1h { z{{[01]}}.d }, p0/z, [sp, #3, mul vl]
; CHECK-DAG: ld1h { z{{[01]}}.d }, p0/z, [sp, #2, mul vl]
%local0 = alloca <vscale x 2 x i16>
%local1 = alloca <vscale x 2 x i16>
load volatile <vscale x 2 x i16>, <vscale x 2 x i16>* %local0
load volatile <vscale x 2 x i16>, <vscale x 2 x i16>* %local1
ret void
}
define <vscale x 2 x i64> @fill_signed_nxv2i16() {
; CHECK-LABEL: fill_signed_nxv2i16
; CHECK-DAG: ld1sh { z{{[01]}}.d }, p0/z, [sp, #3, mul vl]
; CHECK-DAG: ld1sh { z{{[01]}}.d }, p0/z, [sp, #2, mul vl]
%local0 = alloca <vscale x 2 x i16>
%local1 = alloca <vscale x 2 x i16>
%a = load volatile <vscale x 2 x i16>, <vscale x 2 x i16>* %local0
%a_ext = sext <vscale x 2 x i16> %a to <vscale x 2 x i64>
%b = load volatile <vscale x 2 x i16>, <vscale x 2 x i16>* %local1
%b_ext = sext <vscale x 2 x i16> %b to <vscale x 2 x i64>
%sum = add <vscale x 2 x i64> %a_ext, %b_ext
ret <vscale x 2 x i64> %sum
}
define void @fill_nxv4i32() {
; CHECK-LABEL: fill_nxv4i32
; CHECK-DAG: ld1w { z{{[01]}}.s }, p0/z, [sp]
@ -38,6 +163,31 @@ define void @fill_nxv4i32() {
ret void
}
define void @fill_nxv2i32() {
; CHECK-LABEL: fill_nxv2i32
; CHECK-DAG: ld1w { z{{[01]}}.d }, p0/z, [sp]
; CHECK-DAG: ld1w { z{{[01]}}.d }, p0/z, [sp, #1, mul vl]
%local0 = alloca <vscale x 2 x i32>
%local1 = alloca <vscale x 2 x i32>
load volatile <vscale x 2 x i32>, <vscale x 2 x i32>* %local0
load volatile <vscale x 2 x i32>, <vscale x 2 x i32>* %local1
ret void
}
define <vscale x 2 x i64> @fill_signed_nxv2i32() {
; CHECK-LABEL: fill_signed_nxv2i32
; CHECK-DAG: ld1sw { z{{[01]}}.d }, p0/z, [sp]
; CHECK-DAG: ld1sw { z{{[01]}}.d }, p0/z, [sp, #1, mul vl]
%local0 = alloca <vscale x 2 x i32>
%local1 = alloca <vscale x 2 x i32>
%a = load volatile <vscale x 2 x i32>, <vscale x 2 x i32>* %local0
%a_ext = sext <vscale x 2 x i32> %a to <vscale x 2 x i64>
%b = load volatile <vscale x 2 x i32>, <vscale x 2 x i32>* %local1
%b_ext = sext <vscale x 2 x i32> %b to <vscale x 2 x i64>
%sum = add <vscale x 2 x i64> %a_ext, %b_ext
ret <vscale x 2 x i64> %sum
}
define void @fill_nxv2i64() {
; CHECK-LABEL: fill_nxv2i64
; CHECK-DAG: ld1d { z{{[01]}}.d }, p0/z, [sp]
@ -63,6 +213,39 @@ define void @spill_nxv16i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1) {
ret void
}
define void @spill_nxv8i8(<vscale x 8 x i8> %v0, <vscale x 8 x i8> %v1) {
; CHECK-LABEL: spill_nxv8i8
; CHECK-DAG: st1b { z{{[01]}}.h }, p0, [sp]
; CHECK-DAG: st1b { z{{[01]}}.h }, p0, [sp, #1, mul vl]
%local0 = alloca <vscale x 8 x i8>
%local1 = alloca <vscale x 8 x i8>
store volatile <vscale x 8 x i8> %v0, <vscale x 8 x i8>* %local0
store volatile <vscale x 8 x i8> %v1, <vscale x 8 x i8>* %local1
ret void
}
define void @spill_nxv4i8(<vscale x 4 x i8> %v0, <vscale x 4 x i8> %v1) {
; CHECK-LABEL: spill_nxv4i8
; CHECK-DAG: st1b { z{{[01]}}.s }, p0, [sp, #3, mul vl]
; CHECK-DAG: st1b { z{{[01]}}.s }, p0, [sp, #2, mul vl]
%local0 = alloca <vscale x 4 x i8>
%local1 = alloca <vscale x 4 x i8>
store volatile <vscale x 4 x i8> %v0, <vscale x 4 x i8>* %local0
store volatile <vscale x 4 x i8> %v1, <vscale x 4 x i8>* %local1
ret void
}
define void @spill_nxv2i8(<vscale x 2 x i8> %v0, <vscale x 2 x i8> %v1) {
; CHECK-LABEL: spill_nxv2i8
; CHECK-DAG: st1b { z{{[01]}}.d }, p0, [sp, #7, mul vl]
; CHECK-DAG: st1b { z{{[01]}}.d }, p0, [sp, #6, mul vl]
%local0 = alloca <vscale x 2 x i8>
%local1 = alloca <vscale x 2 x i8>
store volatile <vscale x 2 x i8> %v0, <vscale x 2 x i8>* %local0
store volatile <vscale x 2 x i8> %v1, <vscale x 2 x i8>* %local1
ret void
}
define void @spill_nxv8i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1) {
; CHECK-LABEL: spill_nxv8i16
; CHECK-DAG: st1h { z{{[01]}}.h }, p0, [sp]
@ -74,6 +257,28 @@ define void @spill_nxv8i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1) {
ret void
}
define void @spill_nxv4i16(<vscale x 4 x i16> %v0, <vscale x 4 x i16> %v1) {
; CHECK-LABEL: spill_nxv4i16
; CHECK-DAG: st1h { z{{[01]}}.s }, p0, [sp]
; CHECK-DAG: st1h { z{{[01]}}.s }, p0, [sp, #1, mul vl]
%local0 = alloca <vscale x 4 x i16>
%local1 = alloca <vscale x 4 x i16>
store volatile <vscale x 4 x i16> %v0, <vscale x 4 x i16>* %local0
store volatile <vscale x 4 x i16> %v1, <vscale x 4 x i16>* %local1
ret void
}
define void @spill_nxv2i16(<vscale x 2 x i16> %v0, <vscale x 2 x i16> %v1) {
; CHECK-LABEL: spill_nxv2i16
; CHECK-DAG: st1h { z{{[01]}}.d }, p0, [sp, #3, mul vl]
; CHECK-DAG: st1h { z{{[01]}}.d }, p0, [sp, #2, mul vl]
%local0 = alloca <vscale x 2 x i16>
%local1 = alloca <vscale x 2 x i16>
store volatile <vscale x 2 x i16> %v0, <vscale x 2 x i16>* %local0
store volatile <vscale x 2 x i16> %v1, <vscale x 2 x i16>* %local1
ret void
}
define void @spill_nxv4i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1) {
; CHECK-LABEL: spill_nxv4i32
; CHECK-DAG: st1w { z{{[01]}}.s }, p0, [sp]
@ -85,6 +290,17 @@ define void @spill_nxv4i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1) {
ret void
}
define void @spill_nxv2i32(<vscale x 2 x i32> %v0, <vscale x 2 x i32> %v1) {
; CHECK-LABEL: spill_nxv2i32
; CHECK-DAG: st1w { z{{[01]}}.d }, p0, [sp]
; CHECK-DAG: st1w { z{{[01]}}.d }, p0, [sp, #1, mul vl]
%local0 = alloca <vscale x 2 x i32>
%local1 = alloca <vscale x 2 x i32>
store volatile <vscale x 2 x i32> %v0, <vscale x 2 x i32>* %local0
store volatile <vscale x 2 x i32> %v1, <vscale x 2 x i32>* %local1
ret void
}
define void @spill_nxv2i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1) {
; CHECK-LABEL: spill_nxv2i64
; CHECK-DAG: st1d { z{{[01]}}.d }, p0, [sp]