forked from OSchip/llvm-project
[AArch64][SVE] Fill out missing unpredicated load/store patterns.
The set of patterns for unpredicated load/store was incomplete: it only included non-extending stores. Fill out the remaining patterns for extending stores, and add the corresponding support to frame offset lowering. Differential Revision: https://reviews.llvm.org/D80349
This commit is contained in:
parent
e79d002309
commit
f09d220c71
|
@ -1831,6 +1831,24 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
|
|||
case AArch64::ST1H_IMM:
|
||||
case AArch64::ST1W_IMM:
|
||||
case AArch64::ST1D_IMM:
|
||||
case AArch64::LD1B_H_IMM:
|
||||
case AArch64::LD1SB_H_IMM:
|
||||
case AArch64::LD1H_S_IMM:
|
||||
case AArch64::LD1SH_S_IMM:
|
||||
case AArch64::LD1W_D_IMM:
|
||||
case AArch64::LD1SW_D_IMM:
|
||||
case AArch64::ST1B_H_IMM:
|
||||
case AArch64::ST1H_S_IMM:
|
||||
case AArch64::ST1W_D_IMM:
|
||||
case AArch64::LD1B_S_IMM:
|
||||
case AArch64::LD1SB_S_IMM:
|
||||
case AArch64::LD1H_D_IMM:
|
||||
case AArch64::LD1SH_D_IMM:
|
||||
case AArch64::ST1B_S_IMM:
|
||||
case AArch64::ST1H_D_IMM:
|
||||
case AArch64::LD1B_D_IMM:
|
||||
case AArch64::LD1SB_D_IMM:
|
||||
case AArch64::ST1B_D_IMM:
|
||||
return 3;
|
||||
case AArch64::ADDG:
|
||||
case AArch64::STGOffset:
|
||||
|
@ -2289,6 +2307,45 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
|
|||
MinOffset = -8;
|
||||
MaxOffset = 7;
|
||||
break;
|
||||
case AArch64::LD1B_H_IMM:
|
||||
case AArch64::LD1SB_H_IMM:
|
||||
case AArch64::LD1H_S_IMM:
|
||||
case AArch64::LD1SH_S_IMM:
|
||||
case AArch64::LD1W_D_IMM:
|
||||
case AArch64::LD1SW_D_IMM:
|
||||
case AArch64::ST1B_H_IMM:
|
||||
case AArch64::ST1H_S_IMM:
|
||||
case AArch64::ST1W_D_IMM:
|
||||
// A half vector worth of data
|
||||
// Width = mbytes * elements
|
||||
Scale = TypeSize::Scalable(8);
|
||||
Width = SVEMaxBytesPerVector / 2;
|
||||
MinOffset = -8;
|
||||
MaxOffset = 7;
|
||||
break;
|
||||
case AArch64::LD1B_S_IMM:
|
||||
case AArch64::LD1SB_S_IMM:
|
||||
case AArch64::LD1H_D_IMM:
|
||||
case AArch64::LD1SH_D_IMM:
|
||||
case AArch64::ST1B_S_IMM:
|
||||
case AArch64::ST1H_D_IMM:
|
||||
// A quarter vector worth of data
|
||||
// Width = mbytes * elements
|
||||
Scale = TypeSize::Scalable(4);
|
||||
Width = SVEMaxBytesPerVector / 4;
|
||||
MinOffset = -8;
|
||||
MaxOffset = 7;
|
||||
break;
|
||||
case AArch64::LD1B_D_IMM:
|
||||
case AArch64::LD1SB_D_IMM:
|
||||
case AArch64::ST1B_D_IMM:
|
||||
// A eighth vector worth of data
|
||||
// Width = mbytes * elements
|
||||
Scale = TypeSize::Scalable(2);
|
||||
Width = SVEMaxBytesPerVector / 8;
|
||||
MinOffset = -8;
|
||||
MaxOffset = 7;
|
||||
break;
|
||||
case AArch64::ST2GOffset:
|
||||
case AArch64::STZ2GOffset:
|
||||
Scale = TypeSize::Fixed(16);
|
||||
|
|
|
@ -1560,37 +1560,69 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
|
|||
defm : pred_store<nxv4i32, nxv4i1, non_temporal_store, STNT1W_ZRR, STNT1W_ZRI, am_sve_regreg_lsl2>;
|
||||
defm : pred_store<nxv2i64, nxv2i1, non_temporal_store, STNT1D_ZRR, STNT1D_ZRI, am_sve_regreg_lsl3>;
|
||||
|
||||
multiclass unpred_store<ValueType Ty, Instruction RegImmInst, Instruction PTrue> {
|
||||
def _fi : Pat<(store (Ty ZPR:$val), (am_sve_fi GPR64sp:$base, simm4s1:$offset)),
|
||||
(RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
|
||||
multiclass unpred_store<PatFrag Store, ValueType Ty, Instruction RegImmInst,
|
||||
Instruction PTrue> {
|
||||
def : Pat<(Store (Ty ZPR:$val), (am_sve_fi GPR64sp:$base, simm4s1:$offset)),
|
||||
(RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
|
||||
|
||||
def _default : Pat<(store (Ty ZPR:$val), GPR64:$base),
|
||||
(RegImmInst ZPR:$val, (PTrue 31), GPR64:$base, (i64 0))>;
|
||||
def : Pat<(Store (Ty ZPR:$val), GPR64:$base),
|
||||
(RegImmInst ZPR:$val, (PTrue 31), GPR64:$base, (i64 0))>;
|
||||
}
|
||||
|
||||
defm Pat_ST1B : unpred_store<nxv16i8, ST1B_IMM, PTRUE_B>;
|
||||
defm Pat_ST1H : unpred_store<nxv8i16, ST1H_IMM, PTRUE_H>;
|
||||
defm Pat_ST1W : unpred_store<nxv4i32, ST1W_IMM, PTRUE_S>;
|
||||
defm Pat_ST1D : unpred_store<nxv2i64, ST1D_IMM, PTRUE_D>;
|
||||
defm Pat_ST1H_float16: unpred_store<nxv8f16, ST1H_IMM, PTRUE_H>;
|
||||
defm Pat_ST1W_float : unpred_store<nxv4f32, ST1W_IMM, PTRUE_S>;
|
||||
defm Pat_ST1D_double : unpred_store<nxv2f64, ST1D_IMM, PTRUE_D>;
|
||||
defm : unpred_store< store, nxv16i8, ST1B_IMM, PTRUE_B>;
|
||||
defm : unpred_store< truncstorevi8, nxv8i16, ST1B_H_IMM, PTRUE_H>;
|
||||
defm : unpred_store< truncstorevi8, nxv4i32, ST1B_S_IMM, PTRUE_S>;
|
||||
defm : unpred_store< truncstorevi8, nxv2i64, ST1B_D_IMM, PTRUE_D>;
|
||||
defm : unpred_store< store, nxv8i16, ST1H_IMM, PTRUE_H>;
|
||||
defm : unpred_store<truncstorevi16, nxv4i32, ST1H_S_IMM, PTRUE_S>;
|
||||
defm : unpred_store<truncstorevi16, nxv2i64, ST1H_D_IMM, PTRUE_D>;
|
||||
defm : unpred_store< store, nxv4i32, ST1W_IMM, PTRUE_S>;
|
||||
defm : unpred_store<truncstorevi32, nxv2i64, ST1W_D_IMM, PTRUE_D>;
|
||||
defm : unpred_store< store, nxv2i64, ST1D_IMM, PTRUE_D>;
|
||||
defm : unpred_store< store, nxv8f16, ST1H_IMM, PTRUE_H>;
|
||||
defm : unpred_store< store, nxv4f16, ST1H_S_IMM, PTRUE_S>;
|
||||
defm : unpred_store< store, nxv2f16, ST1H_D_IMM, PTRUE_D>;
|
||||
defm : unpred_store< store, nxv4f32, ST1W_IMM, PTRUE_S>;
|
||||
defm : unpred_store< store, nxv4f32, ST1W_D_IMM, PTRUE_D>;
|
||||
defm : unpred_store< store, nxv2f64, ST1D_IMM, PTRUE_D>;
|
||||
|
||||
multiclass unpred_load<ValueType Ty, Instruction RegImmInst, Instruction PTrue> {
|
||||
def _fi : Pat<(Ty (load (am_sve_fi GPR64sp:$base, simm4s1:$offset))),
|
||||
(RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
|
||||
multiclass unpred_load<PatFrag Load, ValueType Ty, Instruction RegImmInst,
|
||||
Instruction PTrue> {
|
||||
def : Pat<(Ty (Load (am_sve_fi GPR64sp:$base, simm4s1:$offset))),
|
||||
(RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
|
||||
|
||||
def _default : Pat<(Ty (load GPR64:$base)),
|
||||
(RegImmInst (PTrue 31), GPR64:$base, (i64 0))>;
|
||||
def : Pat<(Ty (Load GPR64:$base)),
|
||||
(RegImmInst (PTrue 31), GPR64:$base, (i64 0))>;
|
||||
}
|
||||
|
||||
defm Pat_LD1B : unpred_load<nxv16i8, LD1B_IMM, PTRUE_B>;
|
||||
defm Pat_LD1H : unpred_load<nxv8i16, LD1H_IMM, PTRUE_H>;
|
||||
defm Pat_LD1W : unpred_load<nxv4i32, LD1W_IMM, PTRUE_S>;
|
||||
defm Pat_LD1D : unpred_load<nxv2i64, LD1D_IMM, PTRUE_D>;
|
||||
defm Pat_LD1H_float16: unpred_load<nxv8f16, LD1H_IMM, PTRUE_H>;
|
||||
defm Pat_LD1W_float : unpred_load<nxv4f32, LD1W_IMM, PTRUE_S>;
|
||||
defm Pat_LD1D_double : unpred_load<nxv2f64, LD1D_IMM, PTRUE_D>;
|
||||
defm : unpred_load< load, nxv16i8, LD1B_IMM, PTRUE_B>;
|
||||
defm : unpred_load< zextloadvi8, nxv8i16, LD1B_H_IMM, PTRUE_H>;
|
||||
defm : unpred_load< zextloadvi8, nxv4i32, LD1B_S_IMM, PTRUE_S>;
|
||||
defm : unpred_load< zextloadvi8, nxv2i64, LD1B_D_IMM, PTRUE_D>;
|
||||
defm : unpred_load< extloadvi8, nxv8i16, LD1B_H_IMM, PTRUE_H>;
|
||||
defm : unpred_load< extloadvi8, nxv4i32, LD1B_S_IMM, PTRUE_S>;
|
||||
defm : unpred_load< extloadvi8, nxv2i64, LD1B_D_IMM, PTRUE_D>;
|
||||
defm : unpred_load< sextloadvi8, nxv8i16, LD1SB_H_IMM, PTRUE_H>;
|
||||
defm : unpred_load< sextloadvi8, nxv4i32, LD1SB_S_IMM, PTRUE_S>;
|
||||
defm : unpred_load< sextloadvi8, nxv2i64, LD1SB_D_IMM, PTRUE_D>;
|
||||
defm : unpred_load< load, nxv8i16, LD1H_IMM, PTRUE_H>;
|
||||
defm : unpred_load<zextloadvi16, nxv4i32, LD1H_S_IMM, PTRUE_S>;
|
||||
defm : unpred_load<zextloadvi16, nxv2i64, LD1H_D_IMM, PTRUE_D>;
|
||||
defm : unpred_load< extloadvi16, nxv4i32, LD1H_S_IMM, PTRUE_S>;
|
||||
defm : unpred_load< extloadvi16, nxv2i64, LD1H_D_IMM, PTRUE_D>;
|
||||
defm : unpred_load<sextloadvi16, nxv4i32, LD1SH_S_IMM, PTRUE_S>;
|
||||
defm : unpred_load<sextloadvi16, nxv2i64, LD1SH_D_IMM, PTRUE_D>;
|
||||
defm : unpred_load< load, nxv4i32, LD1W_IMM, PTRUE_S>;
|
||||
defm : unpred_load<zextloadvi32, nxv2i64, LD1W_D_IMM, PTRUE_D>;
|
||||
defm : unpred_load< extloadvi32, nxv2i64, LD1W_D_IMM, PTRUE_D>;
|
||||
defm : unpred_load<sextloadvi32, nxv2i64, LD1SW_D_IMM, PTRUE_D>;
|
||||
defm : unpred_load< load, nxv2i64, LD1D_IMM, PTRUE_D>;
|
||||
defm : unpred_load< load, nxv8f16, LD1H_IMM, PTRUE_H>;
|
||||
defm : unpred_load< load, nxv4f16, LD1H_S_IMM, PTRUE_S>;
|
||||
defm : unpred_load< load, nxv2f16, LD1H_D_IMM, PTRUE_D>;
|
||||
defm : unpred_load< load, nxv4f32, LD1W_IMM, PTRUE_S>;
|
||||
defm : unpred_load< load, nxv2f32, LD1W_D_IMM, PTRUE_D>;
|
||||
defm : unpred_load< load, nxv2f64, LD1D_IMM, PTRUE_D>;
|
||||
|
||||
multiclass unpred_store_predicate<ValueType Ty, Instruction Store> {
|
||||
def _fi : Pat<(store (Ty PPR:$val), (am_sve_fi GPR64sp:$base, simm9:$offset)),
|
||||
|
|
|
@ -16,6 +16,81 @@ define void @fill_nxv16i8() {
|
|||
ret void
|
||||
}
|
||||
|
||||
define void @fill_nxv8i8() {
|
||||
; CHECK-LABEL: fill_nxv8i8
|
||||
; CHECK-DAG: ld1b { z{{[01]}}.h }, p0/z, [sp]
|
||||
; CHECK-DAG: ld1b { z{{[01]}}.h }, p0/z, [sp, #1, mul vl]
|
||||
%local0 = alloca <vscale x 8 x i8>
|
||||
%local1 = alloca <vscale x 8 x i8>
|
||||
load volatile <vscale x 8 x i8>, <vscale x 8 x i8>* %local0
|
||||
load volatile <vscale x 8 x i8>, <vscale x 8 x i8>* %local1
|
||||
ret void
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @fill_signed_nxv8i8() {
|
||||
; CHECK-LABEL: fill_signed_nxv8i8
|
||||
; CHECK-DAG: ld1sb { z{{[01]}}.h }, p0/z, [sp]
|
||||
; CHECK-DAG: ld1sb { z{{[01]}}.h }, p0/z, [sp, #1, mul vl]
|
||||
%local0 = alloca <vscale x 8 x i8>
|
||||
%local1 = alloca <vscale x 8 x i8>
|
||||
%a = load volatile <vscale x 8 x i8>, <vscale x 8 x i8>* %local0
|
||||
%a_ext = sext <vscale x 8 x i8> %a to <vscale x 8 x i16>
|
||||
%b = load volatile <vscale x 8 x i8>, <vscale x 8 x i8>* %local1
|
||||
%b_ext = sext <vscale x 8 x i8> %b to <vscale x 8 x i16>
|
||||
%sum = add <vscale x 8 x i16> %a_ext, %b_ext
|
||||
ret <vscale x 8 x i16> %sum
|
||||
}
|
||||
|
||||
define void @fill_nxv4i8() {
|
||||
; CHECK-LABEL: fill_nxv4i8
|
||||
; CHECK-DAG: ld1b { z{{[01]}}.s }, p0/z, [sp, #3, mul vl]
|
||||
; CHECK-DAG: ld1b { z{{[01]}}.s }, p0/z, [sp, #2, mul vl]
|
||||
%local0 = alloca <vscale x 4 x i8>
|
||||
%local1 = alloca <vscale x 4 x i8>
|
||||
load volatile <vscale x 4 x i8>, <vscale x 4 x i8>* %local0
|
||||
load volatile <vscale x 4 x i8>, <vscale x 4 x i8>* %local1
|
||||
ret void
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @fill_signed_nxv4i8() {
|
||||
; CHECK-LABEL: fill_signed_nxv4i8
|
||||
; CHECK-DAG: ld1sb { z{{[01]}}.s }, p0/z, [sp, #3, mul vl]
|
||||
; CHECK-DAG: ld1sb { z{{[01]}}.s }, p0/z, [sp, #2, mul vl]
|
||||
%local0 = alloca <vscale x 4 x i8>
|
||||
%local1 = alloca <vscale x 4 x i8>
|
||||
%a = load volatile <vscale x 4 x i8>, <vscale x 4 x i8>* %local0
|
||||
%a_ext = sext <vscale x 4 x i8> %a to <vscale x 4 x i32>
|
||||
%b = load volatile <vscale x 4 x i8>, <vscale x 4 x i8>* %local1
|
||||
%b_ext = sext <vscale x 4 x i8> %b to <vscale x 4 x i32>
|
||||
%sum = add <vscale x 4 x i32> %a_ext, %b_ext
|
||||
ret <vscale x 4 x i32> %sum
|
||||
}
|
||||
|
||||
define void @fill_nxv2i8() {
|
||||
; CHECK-LABEL: fill_nxv2i8
|
||||
; CHECK-DAG: ld1b { z{{[01]}}.d }, p0/z, [sp, #7, mul vl]
|
||||
; CHECK-DAG: ld1b { z{{[01]}}.d }, p0/z, [sp, #6, mul vl]
|
||||
%local0 = alloca <vscale x 2 x i8>
|
||||
%local1 = alloca <vscale x 2 x i8>
|
||||
load volatile <vscale x 2 x i8>, <vscale x 2 x i8>* %local0
|
||||
load volatile <vscale x 2 x i8>, <vscale x 2 x i8>* %local1
|
||||
ret void
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @fill_signed_nxv2i8() {
|
||||
; CHECK-LABEL: fill_signed_nxv2i8
|
||||
; CHECK-DAG: ld1sb { z{{[01]}}.d }, p0/z, [sp, #7, mul vl]
|
||||
; CHECK-DAG: ld1sb { z{{[01]}}.d }, p0/z, [sp, #6, mul vl]
|
||||
%local0 = alloca <vscale x 2 x i8>
|
||||
%local1 = alloca <vscale x 2 x i8>
|
||||
%a = load volatile <vscale x 2 x i8>, <vscale x 2 x i8>* %local0
|
||||
%a_ext = sext <vscale x 2 x i8> %a to <vscale x 2 x i64>
|
||||
%b = load volatile <vscale x 2 x i8>, <vscale x 2 x i8>* %local1
|
||||
%b_ext = sext <vscale x 2 x i8> %b to <vscale x 2 x i64>
|
||||
%sum = add <vscale x 2 x i64> %a_ext, %b_ext
|
||||
ret <vscale x 2 x i64> %sum
|
||||
}
|
||||
|
||||
define void @fill_nxv8i16() {
|
||||
; CHECK-LABEL: fill_nxv8i16
|
||||
; CHECK-DAG: ld1h { z{{[01]}}.h }, p0/z, [sp]
|
||||
|
@ -27,6 +102,56 @@ define void @fill_nxv8i16() {
|
|||
ret void
|
||||
}
|
||||
|
||||
define void @fill_nxv4i16() {
|
||||
; CHECK-LABEL: fill_nxv4i16
|
||||
; CHECK-DAG: ld1h { z{{[01]}}.s }, p0/z, [sp]
|
||||
; CHECK-DAG: ld1h { z{{[01]}}.s }, p0/z, [sp, #1, mul vl]
|
||||
%local0 = alloca <vscale x 4 x i16>
|
||||
%local1 = alloca <vscale x 4 x i16>
|
||||
load volatile <vscale x 4 x i16>, <vscale x 4 x i16>* %local0
|
||||
load volatile <vscale x 4 x i16>, <vscale x 4 x i16>* %local1
|
||||
ret void
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @fill_signed_nxv4i16() {
|
||||
; CHECK-LABEL: fill_signed_nxv4i16
|
||||
; CHECK-DAG: ld1sh { z{{[01]}}.s }, p0/z, [sp]
|
||||
; CHECK-DAG: ld1sh { z{{[01]}}.s }, p0/z, [sp, #1, mul vl]
|
||||
%local0 = alloca <vscale x 4 x i16>
|
||||
%local1 = alloca <vscale x 4 x i16>
|
||||
%a = load volatile <vscale x 4 x i16>, <vscale x 4 x i16>* %local0
|
||||
%a_ext = sext <vscale x 4 x i16> %a to <vscale x 4 x i32>
|
||||
%b = load volatile <vscale x 4 x i16>, <vscale x 4 x i16>* %local1
|
||||
%b_ext = sext <vscale x 4 x i16> %b to <vscale x 4 x i32>
|
||||
%sum = add <vscale x 4 x i32> %a_ext, %b_ext
|
||||
ret <vscale x 4 x i32> %sum
|
||||
}
|
||||
|
||||
define void @fill_nxv2i16() {
|
||||
; CHECK-LABEL: fill_nxv2i16
|
||||
; CHECK-DAG: ld1h { z{{[01]}}.d }, p0/z, [sp, #3, mul vl]
|
||||
; CHECK-DAG: ld1h { z{{[01]}}.d }, p0/z, [sp, #2, mul vl]
|
||||
%local0 = alloca <vscale x 2 x i16>
|
||||
%local1 = alloca <vscale x 2 x i16>
|
||||
load volatile <vscale x 2 x i16>, <vscale x 2 x i16>* %local0
|
||||
load volatile <vscale x 2 x i16>, <vscale x 2 x i16>* %local1
|
||||
ret void
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @fill_signed_nxv2i16() {
|
||||
; CHECK-LABEL: fill_signed_nxv2i16
|
||||
; CHECK-DAG: ld1sh { z{{[01]}}.d }, p0/z, [sp, #3, mul vl]
|
||||
; CHECK-DAG: ld1sh { z{{[01]}}.d }, p0/z, [sp, #2, mul vl]
|
||||
%local0 = alloca <vscale x 2 x i16>
|
||||
%local1 = alloca <vscale x 2 x i16>
|
||||
%a = load volatile <vscale x 2 x i16>, <vscale x 2 x i16>* %local0
|
||||
%a_ext = sext <vscale x 2 x i16> %a to <vscale x 2 x i64>
|
||||
%b = load volatile <vscale x 2 x i16>, <vscale x 2 x i16>* %local1
|
||||
%b_ext = sext <vscale x 2 x i16> %b to <vscale x 2 x i64>
|
||||
%sum = add <vscale x 2 x i64> %a_ext, %b_ext
|
||||
ret <vscale x 2 x i64> %sum
|
||||
}
|
||||
|
||||
define void @fill_nxv4i32() {
|
||||
; CHECK-LABEL: fill_nxv4i32
|
||||
; CHECK-DAG: ld1w { z{{[01]}}.s }, p0/z, [sp]
|
||||
|
@ -38,6 +163,31 @@ define void @fill_nxv4i32() {
|
|||
ret void
|
||||
}
|
||||
|
||||
define void @fill_nxv2i32() {
|
||||
; CHECK-LABEL: fill_nxv2i32
|
||||
; CHECK-DAG: ld1w { z{{[01]}}.d }, p0/z, [sp]
|
||||
; CHECK-DAG: ld1w { z{{[01]}}.d }, p0/z, [sp, #1, mul vl]
|
||||
%local0 = alloca <vscale x 2 x i32>
|
||||
%local1 = alloca <vscale x 2 x i32>
|
||||
load volatile <vscale x 2 x i32>, <vscale x 2 x i32>* %local0
|
||||
load volatile <vscale x 2 x i32>, <vscale x 2 x i32>* %local1
|
||||
ret void
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @fill_signed_nxv2i32() {
|
||||
; CHECK-LABEL: fill_signed_nxv2i32
|
||||
; CHECK-DAG: ld1sw { z{{[01]}}.d }, p0/z, [sp]
|
||||
; CHECK-DAG: ld1sw { z{{[01]}}.d }, p0/z, [sp, #1, mul vl]
|
||||
%local0 = alloca <vscale x 2 x i32>
|
||||
%local1 = alloca <vscale x 2 x i32>
|
||||
%a = load volatile <vscale x 2 x i32>, <vscale x 2 x i32>* %local0
|
||||
%a_ext = sext <vscale x 2 x i32> %a to <vscale x 2 x i64>
|
||||
%b = load volatile <vscale x 2 x i32>, <vscale x 2 x i32>* %local1
|
||||
%b_ext = sext <vscale x 2 x i32> %b to <vscale x 2 x i64>
|
||||
%sum = add <vscale x 2 x i64> %a_ext, %b_ext
|
||||
ret <vscale x 2 x i64> %sum
|
||||
}
|
||||
|
||||
define void @fill_nxv2i64() {
|
||||
; CHECK-LABEL: fill_nxv2i64
|
||||
; CHECK-DAG: ld1d { z{{[01]}}.d }, p0/z, [sp]
|
||||
|
@ -63,6 +213,39 @@ define void @spill_nxv16i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1) {
|
|||
ret void
|
||||
}
|
||||
|
||||
define void @spill_nxv8i8(<vscale x 8 x i8> %v0, <vscale x 8 x i8> %v1) {
|
||||
; CHECK-LABEL: spill_nxv8i8
|
||||
; CHECK-DAG: st1b { z{{[01]}}.h }, p0, [sp]
|
||||
; CHECK-DAG: st1b { z{{[01]}}.h }, p0, [sp, #1, mul vl]
|
||||
%local0 = alloca <vscale x 8 x i8>
|
||||
%local1 = alloca <vscale x 8 x i8>
|
||||
store volatile <vscale x 8 x i8> %v0, <vscale x 8 x i8>* %local0
|
||||
store volatile <vscale x 8 x i8> %v1, <vscale x 8 x i8>* %local1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @spill_nxv4i8(<vscale x 4 x i8> %v0, <vscale x 4 x i8> %v1) {
|
||||
; CHECK-LABEL: spill_nxv4i8
|
||||
; CHECK-DAG: st1b { z{{[01]}}.s }, p0, [sp, #3, mul vl]
|
||||
; CHECK-DAG: st1b { z{{[01]}}.s }, p0, [sp, #2, mul vl]
|
||||
%local0 = alloca <vscale x 4 x i8>
|
||||
%local1 = alloca <vscale x 4 x i8>
|
||||
store volatile <vscale x 4 x i8> %v0, <vscale x 4 x i8>* %local0
|
||||
store volatile <vscale x 4 x i8> %v1, <vscale x 4 x i8>* %local1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @spill_nxv2i8(<vscale x 2 x i8> %v0, <vscale x 2 x i8> %v1) {
|
||||
; CHECK-LABEL: spill_nxv2i8
|
||||
; CHECK-DAG: st1b { z{{[01]}}.d }, p0, [sp, #7, mul vl]
|
||||
; CHECK-DAG: st1b { z{{[01]}}.d }, p0, [sp, #6, mul vl]
|
||||
%local0 = alloca <vscale x 2 x i8>
|
||||
%local1 = alloca <vscale x 2 x i8>
|
||||
store volatile <vscale x 2 x i8> %v0, <vscale x 2 x i8>* %local0
|
||||
store volatile <vscale x 2 x i8> %v1, <vscale x 2 x i8>* %local1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @spill_nxv8i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1) {
|
||||
; CHECK-LABEL: spill_nxv8i16
|
||||
; CHECK-DAG: st1h { z{{[01]}}.h }, p0, [sp]
|
||||
|
@ -74,6 +257,28 @@ define void @spill_nxv8i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1) {
|
|||
ret void
|
||||
}
|
||||
|
||||
define void @spill_nxv4i16(<vscale x 4 x i16> %v0, <vscale x 4 x i16> %v1) {
|
||||
; CHECK-LABEL: spill_nxv4i16
|
||||
; CHECK-DAG: st1h { z{{[01]}}.s }, p0, [sp]
|
||||
; CHECK-DAG: st1h { z{{[01]}}.s }, p0, [sp, #1, mul vl]
|
||||
%local0 = alloca <vscale x 4 x i16>
|
||||
%local1 = alloca <vscale x 4 x i16>
|
||||
store volatile <vscale x 4 x i16> %v0, <vscale x 4 x i16>* %local0
|
||||
store volatile <vscale x 4 x i16> %v1, <vscale x 4 x i16>* %local1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @spill_nxv2i16(<vscale x 2 x i16> %v0, <vscale x 2 x i16> %v1) {
|
||||
; CHECK-LABEL: spill_nxv2i16
|
||||
; CHECK-DAG: st1h { z{{[01]}}.d }, p0, [sp, #3, mul vl]
|
||||
; CHECK-DAG: st1h { z{{[01]}}.d }, p0, [sp, #2, mul vl]
|
||||
%local0 = alloca <vscale x 2 x i16>
|
||||
%local1 = alloca <vscale x 2 x i16>
|
||||
store volatile <vscale x 2 x i16> %v0, <vscale x 2 x i16>* %local0
|
||||
store volatile <vscale x 2 x i16> %v1, <vscale x 2 x i16>* %local1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @spill_nxv4i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1) {
|
||||
; CHECK-LABEL: spill_nxv4i32
|
||||
; CHECK-DAG: st1w { z{{[01]}}.s }, p0, [sp]
|
||||
|
@ -85,6 +290,17 @@ define void @spill_nxv4i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1) {
|
|||
ret void
|
||||
}
|
||||
|
||||
define void @spill_nxv2i32(<vscale x 2 x i32> %v0, <vscale x 2 x i32> %v1) {
|
||||
; CHECK-LABEL: spill_nxv2i32
|
||||
; CHECK-DAG: st1w { z{{[01]}}.d }, p0, [sp]
|
||||
; CHECK-DAG: st1w { z{{[01]}}.d }, p0, [sp, #1, mul vl]
|
||||
%local0 = alloca <vscale x 2 x i32>
|
||||
%local1 = alloca <vscale x 2 x i32>
|
||||
store volatile <vscale x 2 x i32> %v0, <vscale x 2 x i32>* %local0
|
||||
store volatile <vscale x 2 x i32> %v1, <vscale x 2 x i32>* %local1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @spill_nxv2i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1) {
|
||||
; CHECK-LABEL: spill_nxv2i64
|
||||
; CHECK-DAG: st1d { z{{[01]}}.d }, p0, [sp]
|
||||
|
|
Loading…
Reference in New Issue