[AArch64][SVE] Use TargetFrameIndex in more SVE load/store addressing modes

Add support for generating TargetFrameIndex in complex patterns for
indexed addressing modes in SVE. Additionally, add missing load/stores
to getMemOpInfo and getLoadStoreImmIdx.

Differential Revision: https://reviews.llvm.org/D112617
This commit is contained in:
Bradley Smith 2021-10-26 14:53:53 +00:00
parent 014c6b0736
commit 86972f1114
14 changed files with 3462 additions and 2956 deletions

View File

@ -5050,6 +5050,14 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
SDValue &Base, SDValue &Base,
SDValue &OffImm) { SDValue &OffImm) {
const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root); const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
const DataLayout &DL = CurDAG->getDataLayout();
if (N.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
return true;
}
if (MemVT == EVT()) if (MemVT == EVT())
return false; return false;
@ -5073,6 +5081,11 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
return false; return false;
Base = N.getOperand(0); Base = N.getOperand(0);
if (Base.getOpcode() == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
}
OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64); OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
return true; return true;
} }

View File

@ -2263,32 +2263,35 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
case AArch64::STNPSi: case AArch64::STNPSi:
case AArch64::LDG: case AArch64::LDG:
case AArch64::STGPi: case AArch64::STGPi:
case AArch64::LD1B_IMM: case AArch64::LD1B_IMM:
case AArch64::LD1H_IMM:
case AArch64::LD1W_IMM:
case AArch64::LD1D_IMM:
case AArch64::ST1B_IMM:
case AArch64::ST1H_IMM:
case AArch64::ST1W_IMM:
case AArch64::ST1D_IMM:
case AArch64::LD1B_H_IMM: case AArch64::LD1B_H_IMM:
case AArch64::LD1B_S_IMM:
case AArch64::LD1B_D_IMM:
case AArch64::LD1SB_H_IMM: case AArch64::LD1SB_H_IMM:
case AArch64::LD1SB_S_IMM:
case AArch64::LD1SB_D_IMM:
case AArch64::LD1H_IMM:
case AArch64::LD1H_S_IMM: case AArch64::LD1H_S_IMM:
case AArch64::LD1H_D_IMM:
case AArch64::LD1SH_S_IMM: case AArch64::LD1SH_S_IMM:
case AArch64::LD1SH_D_IMM:
case AArch64::LD1W_IMM:
case AArch64::LD1W_D_IMM: case AArch64::LD1W_D_IMM:
case AArch64::LD1SW_D_IMM: case AArch64::LD1SW_D_IMM:
case AArch64::LD1D_IMM:
case AArch64::ST1B_IMM:
case AArch64::ST1B_H_IMM: case AArch64::ST1B_H_IMM:
case AArch64::ST1H_S_IMM:
case AArch64::ST1W_D_IMM:
case AArch64::LD1B_S_IMM:
case AArch64::LD1SB_S_IMM:
case AArch64::LD1H_D_IMM:
case AArch64::LD1SH_D_IMM:
case AArch64::ST1B_S_IMM: case AArch64::ST1B_S_IMM:
case AArch64::ST1H_D_IMM:
case AArch64::LD1B_D_IMM:
case AArch64::LD1SB_D_IMM:
case AArch64::ST1B_D_IMM: case AArch64::ST1B_D_IMM:
case AArch64::ST1H_IMM:
case AArch64::ST1H_S_IMM:
case AArch64::ST1H_D_IMM:
case AArch64::ST1W_IMM:
case AArch64::ST1W_D_IMM:
case AArch64::ST1D_IMM:
case AArch64::LD1RB_IMM: case AArch64::LD1RB_IMM:
case AArch64::LD1RB_H_IMM: case AArch64::LD1RB_H_IMM:
case AArch64::LD1RB_S_IMM: case AArch64::LD1RB_S_IMM:
@ -2305,6 +2308,32 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
case AArch64::LD1RW_D_IMM: case AArch64::LD1RW_D_IMM:
case AArch64::LD1RSW_IMM: case AArch64::LD1RSW_IMM:
case AArch64::LD1RD_IMM: case AArch64::LD1RD_IMM:
case AArch64::LDNT1B_ZRI:
case AArch64::LDNT1H_ZRI:
case AArch64::LDNT1W_ZRI:
case AArch64::LDNT1D_ZRI:
case AArch64::STNT1B_ZRI:
case AArch64::STNT1H_ZRI:
case AArch64::STNT1W_ZRI:
case AArch64::STNT1D_ZRI:
case AArch64::LDNF1B_IMM:
case AArch64::LDNF1B_H_IMM:
case AArch64::LDNF1B_S_IMM:
case AArch64::LDNF1B_D_IMM:
case AArch64::LDNF1SB_H_IMM:
case AArch64::LDNF1SB_S_IMM:
case AArch64::LDNF1SB_D_IMM:
case AArch64::LDNF1H_IMM:
case AArch64::LDNF1H_S_IMM:
case AArch64::LDNF1H_D_IMM:
case AArch64::LDNF1SH_S_IMM:
case AArch64::LDNF1SH_D_IMM:
case AArch64::LDNF1W_IMM:
case AArch64::LDNF1W_D_IMM:
case AArch64::LDNF1SW_D_IMM:
case AArch64::LDNF1D_IMM:
return 3; return 3;
case AArch64::ADDG: case AArch64::ADDG:
case AArch64::STGOffset: case AArch64::STGOffset:
@ -2855,10 +2884,22 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LD1H_IMM: case AArch64::LD1H_IMM:
case AArch64::LD1W_IMM: case AArch64::LD1W_IMM:
case AArch64::LD1D_IMM: case AArch64::LD1D_IMM:
case AArch64::LDNT1B_ZRI:
case AArch64::LDNT1H_ZRI:
case AArch64::LDNT1W_ZRI:
case AArch64::LDNT1D_ZRI:
case AArch64::ST1B_IMM: case AArch64::ST1B_IMM:
case AArch64::ST1H_IMM: case AArch64::ST1H_IMM:
case AArch64::ST1W_IMM: case AArch64::ST1W_IMM:
case AArch64::ST1D_IMM: case AArch64::ST1D_IMM:
case AArch64::STNT1B_ZRI:
case AArch64::STNT1H_ZRI:
case AArch64::STNT1W_ZRI:
case AArch64::STNT1D_ZRI:
case AArch64::LDNF1B_IMM:
case AArch64::LDNF1H_IMM:
case AArch64::LDNF1W_IMM:
case AArch64::LDNF1D_IMM:
// A full vectors worth of data // A full vectors worth of data
// Width = mbytes * elements // Width = mbytes * elements
Scale = TypeSize::Scalable(16); Scale = TypeSize::Scalable(16);
@ -2875,6 +2916,12 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::ST1B_H_IMM: case AArch64::ST1B_H_IMM:
case AArch64::ST1H_S_IMM: case AArch64::ST1H_S_IMM:
case AArch64::ST1W_D_IMM: case AArch64::ST1W_D_IMM:
case AArch64::LDNF1B_H_IMM:
case AArch64::LDNF1SB_H_IMM:
case AArch64::LDNF1H_S_IMM:
case AArch64::LDNF1SH_S_IMM:
case AArch64::LDNF1W_D_IMM:
case AArch64::LDNF1SW_D_IMM:
// A half vector worth of data // A half vector worth of data
// Width = mbytes * elements // Width = mbytes * elements
Scale = TypeSize::Scalable(8); Scale = TypeSize::Scalable(8);
@ -2888,6 +2935,10 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LD1SH_D_IMM: case AArch64::LD1SH_D_IMM:
case AArch64::ST1B_S_IMM: case AArch64::ST1B_S_IMM:
case AArch64::ST1H_D_IMM: case AArch64::ST1H_D_IMM:
case AArch64::LDNF1B_S_IMM:
case AArch64::LDNF1SB_S_IMM:
case AArch64::LDNF1H_D_IMM:
case AArch64::LDNF1SH_D_IMM:
// A quarter vector worth of data // A quarter vector worth of data
// Width = mbytes * elements // Width = mbytes * elements
Scale = TypeSize::Scalable(4); Scale = TypeSize::Scalable(4);
@ -2898,6 +2949,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LD1B_D_IMM: case AArch64::LD1B_D_IMM:
case AArch64::LD1SB_D_IMM: case AArch64::LD1SB_D_IMM:
case AArch64::ST1B_D_IMM: case AArch64::ST1B_D_IMM:
case AArch64::LDNF1B_D_IMM:
case AArch64::LDNF1SB_D_IMM:
// A eighth vector worth of data // A eighth vector worth of data
// Width = mbytes * elements // Width = mbytes * elements
Scale = TypeSize::Scalable(2); Scale = TypeSize::Scalable(2);

View File

@ -2200,10 +2200,6 @@ let Predicates = [HasSVEorStreamingSVE] in {
def _imm : Pat<(Store (Ty ZPR:$val), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset)), def _imm : Pat<(Store (Ty ZPR:$val), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset)),
(RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>; (RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
} }
let AddedComplexity = 3 in {
def _fi : Pat<(Store (Ty ZPR:$val), (am_sve_fi GPR64sp:$base, simm4s1:$offset)),
(RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
}
def : Pat<(Store (Ty ZPR:$val), GPR64:$base), def : Pat<(Store (Ty ZPR:$val), GPR64:$base),
(RegImmInst ZPR:$val, (PTrue 31), GPR64:$base, (i64 0))>; (RegImmInst ZPR:$val, (PTrue 31), GPR64:$base, (i64 0))>;
@ -2240,10 +2236,6 @@ let Predicates = [HasSVEorStreamingSVE] in {
def _imm: Pat<(Ty (Load (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset))), def _imm: Pat<(Ty (Load (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset))),
(RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>; (RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
} }
let AddedComplexity = 3 in {
def _fi : Pat<(Ty (Load (am_sve_fi GPR64sp:$base, simm4s1:$offset))),
(RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
}
def : Pat<(Ty (Load GPR64:$base)), def : Pat<(Ty (Load GPR64:$base)),
(RegImmInst (PTrue 31), GPR64:$base, (i64 0))>; (RegImmInst (PTrue 31), GPR64:$base, (i64 0))>;

View File

@ -40,14 +40,14 @@ define <vscale x 16 x i8> @splice_nxv16i8_clamped_idx(<vscale x 16 x i8> %a, <vs
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: mov x8, #-1
; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: mov w9, #256
; CHECK-NEXT: mov w10, #256
; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: st1b { z0.b }, p0, [sp] ; CHECK-NEXT: st1b { z0.b }, p0, [sp]
; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: st1b { z1.b }, p0, [x9, #1, mul vl]
; CHECK-NEXT: cmp x8, #256 ; CHECK-NEXT: cmp x8, #256
; CHECK-NEXT: csel x8, x8, x10, lo ; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x9, x8] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x9, x8]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@ -81,14 +81,14 @@ define <vscale x 8 x i16> @splice_nxv8i16_clamped_idx(<vscale x 8 x i16> %a, <vs
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: cnth x8 ; CHECK-NEXT: cnth x8
; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: mov w9, #128
; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: mov w10, #128
; CHECK-NEXT: cmp x8, #128
; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: csel x8, x8, x10, lo ; CHECK-NEXT: cmp x8, #128
; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: st1h { z1.h }, p0, [x9, #1, mul vl] ; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x9, x8, lsl #1] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x9, x8, lsl #1]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@ -122,14 +122,14 @@ define <vscale x 4 x i32> @splice_nxv4i32_clamped_idx(<vscale x 4 x i32> %a, <vs
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: cntw x8 ; CHECK-NEXT: cntw x8
; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: mov w9, #64
; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: mov w10, #64
; CHECK-NEXT: cmp x8, #64
; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: csel x8, x8, x10, lo ; CHECK-NEXT: cmp x8, #64
; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: st1w { z1.s }, p0, [x9, #1, mul vl] ; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@ -163,14 +163,14 @@ define <vscale x 2 x i64> @splice_nxv2i64_clamped_idx(<vscale x 2 x i64> %a, <vs
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: cntd x8 ; CHECK-NEXT: cntd x8
; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: mov w9, #32
; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: mov w10, #32
; CHECK-NEXT: cmp x8, #32
; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: csel x8, x8, x10, lo ; CHECK-NEXT: cmp x8, #32
; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: st1d { z1.d }, p0, [x9, #1, mul vl] ; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@ -200,7 +200,7 @@ define <vscale x 2 x half> @splice_nxv2f16_neg2_idx(<vscale x 2 x half> %a, <vsc
; CHECK-NEXT: mov x9, #-8 ; CHECK-NEXT: mov x9, #-8
; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
@ -237,15 +237,15 @@ define <vscale x 2 x half> @splice_nxv2f16_clamped_idx(<vscale x 2 x half> %a, <
; CHECK-NEXT: cntd x8 ; CHECK-NEXT: cntd x8
; CHECK-NEXT: mov w9, #32 ; CHECK-NEXT: mov w9, #32
; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: cmp x8, #32
; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: cmp x8, #32
; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: lsl x8, x8, #3
; CHECK-NEXT: st1h { z1.h }, p0, [x10, #1, mul vl] ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x10, x8] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x9, x8]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
@ -274,7 +274,7 @@ define <vscale x 4 x half> @splice_nxv4f16_neg3_idx(<vscale x 4 x half> %a, <vsc
; CHECK-NEXT: mov x9, #-6 ; CHECK-NEXT: mov x9, #-6
; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
@ -311,15 +311,15 @@ define <vscale x 4 x half> @splice_nxv4f16_clamped_idx(<vscale x 4 x half> %a, <
; CHECK-NEXT: cntw x8 ; CHECK-NEXT: cntw x8
; CHECK-NEXT: mov w9, #64 ; CHECK-NEXT: mov w9, #64
; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: cmp x8, #64
; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: cmp x8, #64
; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: lsl x8, x8, #2 ; CHECK-NEXT: lsl x8, x8, #2
; CHECK-NEXT: st1h { z1.h }, p0, [x10, #1, mul vl] ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x10, x8] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x9, x8]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
@ -352,14 +352,14 @@ define <vscale x 8 x half> @splice_nxv8f16_clamped_idx(<vscale x 8 x half> %a, <
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: cnth x8 ; CHECK-NEXT: cnth x8
; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: mov w9, #128
; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: mov w10, #128
; CHECK-NEXT: cmp x8, #128
; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: csel x8, x8, x10, lo ; CHECK-NEXT: cmp x8, #128
; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: st1h { z1.h }, p0, [x9, #1, mul vl] ; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x9, x8, lsl #1] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x9, x8, lsl #1]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@ -389,7 +389,7 @@ define <vscale x 2 x float> @splice_nxv2f32_neg2_idx(<vscale x 2 x float> %a, <v
; CHECK-NEXT: mov x9, #-4 ; CHECK-NEXT: mov x9, #-4
; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
@ -426,15 +426,15 @@ define <vscale x 2 x float> @splice_nxv2f32_clamped_idx(<vscale x 2 x float> %a,
; CHECK-NEXT: cntd x8 ; CHECK-NEXT: cntd x8
; CHECK-NEXT: mov w9, #32 ; CHECK-NEXT: mov w9, #32
; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: cmp x8, #32
; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: cmp x8, #32
; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: lsl x8, x8, #3
; CHECK-NEXT: st1w { z1.s }, p0, [x10, #1, mul vl] ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x10, x8] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x9, x8]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
@ -467,14 +467,14 @@ define <vscale x 4 x float> @splice_nxv4f32_clamped_idx(<vscale x 4 x float> %a,
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: cntw x8 ; CHECK-NEXT: cntw x8
; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: mov w9, #64
; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: mov w10, #64
; CHECK-NEXT: cmp x8, #64
; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: csel x8, x8, x10, lo ; CHECK-NEXT: cmp x8, #64
; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: st1w { z1.s }, p0, [x9, #1, mul vl] ; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@ -508,14 +508,14 @@ define <vscale x 2 x double> @splice_nxv2f64_clamped_idx(<vscale x 2 x double> %
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: cntd x8 ; CHECK-NEXT: cntd x8
; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: mov w9, #32
; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: mov w10, #32
; CHECK-NEXT: cmp x8, #32
; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: csel x8, x8, x10, lo ; CHECK-NEXT: cmp x8, #32
; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: st1d { z1.d }, p0, [x9, #1, mul vl] ; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@ -602,13 +602,13 @@ define <vscale x 8 x i32> @splice_nxv8i32_idx(<vscale x 8 x i32> %a, <vscale x 8
; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: addvl sp, sp, #-4
; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: orr x9, x8, #0x8 ; CHECK-NEXT: orr x8, x8, #0x8
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: st1w { z3.s }, p0, [x8, #3, mul vl] ; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl]
; CHECK-NEXT: st1w { z2.s }, p0, [x8, #2, mul vl] ; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x9, #1, mul vl] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: addvl sp, sp, #4
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
@ -623,21 +623,21 @@ define <vscale x 16 x float> @splice_nxv16f32_clamped_idx(<vscale x 16 x float>
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-8 ; CHECK-NEXT: addvl sp, sp, #-8
; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: mov x8, #-1
; CHECK-NEXT: mov w10, #16 ; CHECK-NEXT: mov w9, #16
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl]
; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl]
; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: cmp x8, #16 ; CHECK-NEXT: cmp x8, #16
; CHECK-NEXT: st1w { z3.s }, p0, [x9, #3, mul vl]
; CHECK-NEXT: csel x8, x8, x10, lo
; CHECK-NEXT: st1w { z2.s }, p0, [x9, #2, mul vl]
; CHECK-NEXT: st1w { z1.s }, p0, [x9, #1, mul vl]
; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1w { z7.s }, p0, [sp, #7, mul vl]
; CHECK-NEXT: st1w { z4.s }, p0, [sp, #4, mul vl]
; CHECK-NEXT: add x10, x9, x8, lsl #2 ; CHECK-NEXT: add x10, x9, x8, lsl #2
; CHECK-NEXT: st1w { z7.s }, p0, [x9, #7, mul vl] ; CHECK-NEXT: st1w { z5.s }, p0, [sp, #5, mul vl]
; CHECK-NEXT: st1w { z4.s }, p0, [x9, #4, mul vl] ; CHECK-NEXT: st1w { z6.s }, p0, [sp, #6, mul vl]
; CHECK-NEXT: st1w { z5.s }, p0, [x9, #5, mul vl]
; CHECK-NEXT: st1w { z6.s }, p0, [x9, #6, mul vl]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x10, #1, mul vl] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x10, #1, mul vl]
; CHECK-NEXT: ld1w { z2.s }, p0/z, [x10, #2, mul vl] ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x10, #2, mul vl]
@ -662,7 +662,7 @@ define <vscale x 16 x i8> @splice_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x
; CHECK-NEXT: mov x9, #-16 ; CHECK-NEXT: mov x9, #-16
; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: st1b { z0.b }, p0, [sp] ; CHECK-NEXT: st1b { z0.b }, p0, [sp]
; CHECK-NEXT: st1b { z1.b }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, x9] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, x9]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
@ -696,11 +696,11 @@ define <vscale x 16 x i8> @splice_nxv16i8_clamped(<vscale x 16 x i8> %a, <vscale
; CHECK-NEXT: mov w10, #17 ; CHECK-NEXT: mov w10, #17
; CHECK-NEXT: csel x9, x9, x10, lo ; CHECK-NEXT: csel x9, x9, x10, lo
; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: addvl x10, x8, #1 ; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: st1b { z0.b }, p0, [sp] ; CHECK-NEXT: st1b { z0.b }, p0, [sp]
; CHECK-NEXT: sub x9, x10, x9 ; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: st1b { z1.b }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x9] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
@ -717,7 +717,7 @@ define <vscale x 8 x i16> @splice_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i
; CHECK-NEXT: mov x9, #-8 ; CHECK-NEXT: mov x9, #-8
; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
@ -751,11 +751,11 @@ define <vscale x 8 x i16> @splice_nxv8i16_clamped(<vscale x 8 x i16> %a, <vscale
; CHECK-NEXT: mov w10, #18 ; CHECK-NEXT: mov w10, #18
; CHECK-NEXT: csel x9, x9, x10, lo ; CHECK-NEXT: csel x9, x9, x10, lo
; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: addvl x10, x8, #1 ; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: sub x9, x10, x9 ; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x9] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
@ -772,7 +772,7 @@ define <vscale x 4 x i32> @splice_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i
; CHECK-NEXT: mov x9, #-4 ; CHECK-NEXT: mov x9, #-4
; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
@ -806,11 +806,11 @@ define <vscale x 4 x i32> @splice_nxv4i32_clamped(<vscale x 4 x i32> %a, <vscale
; CHECK-NEXT: mov w10, #20 ; CHECK-NEXT: mov w10, #20
; CHECK-NEXT: csel x9, x9, x10, lo ; CHECK-NEXT: csel x9, x9, x10, lo
; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: addvl x10, x8, #1 ; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: sub x9, x10, x9 ; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
@ -827,7 +827,7 @@ define <vscale x 2 x i64> @splice_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i
; CHECK-NEXT: mov x9, #-2 ; CHECK-NEXT: mov x9, #-2
; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
@ -861,11 +861,11 @@ define <vscale x 2 x i64> @splice_nxv2i64_clamped(<vscale x 2 x i64> %a, <vscale
; CHECK-NEXT: mov w10, #24 ; CHECK-NEXT: mov w10, #24
; CHECK-NEXT: csel x9, x9, x10, lo ; CHECK-NEXT: csel x9, x9, x10, lo
; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: addvl x10, x8, #1 ; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: sub x9, x10, x9 ; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
@ -882,7 +882,7 @@ define <vscale x 8 x half> @splice_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x
; CHECK-NEXT: mov x9, #-8 ; CHECK-NEXT: mov x9, #-8
; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
@ -916,11 +916,11 @@ define <vscale x 8 x half> @splice_nxv8f16_clamped(<vscale x 8 x half> %a, <vsca
; CHECK-NEXT: mov w10, #18 ; CHECK-NEXT: mov w10, #18
; CHECK-NEXT: csel x9, x9, x10, lo ; CHECK-NEXT: csel x9, x9, x10, lo
; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: addvl x10, x8, #1 ; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: sub x9, x10, x9 ; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x9] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
@ -937,7 +937,7 @@ define <vscale x 4 x float> @splice_nxv4f32(<vscale x 4 x float> %a, <vscale x 4
; CHECK-NEXT: mov x9, #-4 ; CHECK-NEXT: mov x9, #-4
; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
@ -971,11 +971,11 @@ define <vscale x 4 x float> @splice_nxv4f32_clamped(<vscale x 4 x float> %a, <vs
; CHECK-NEXT: mov w10, #20 ; CHECK-NEXT: mov w10, #20
; CHECK-NEXT: csel x9, x9, x10, lo ; CHECK-NEXT: csel x9, x9, x10, lo
; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: addvl x10, x8, #1 ; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: sub x9, x10, x9 ; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
@ -992,7 +992,7 @@ define <vscale x 2 x double> @splice_nxv2f64(<vscale x 2 x double> %a, <vscale x
; CHECK-NEXT: mov x9, #-2 ; CHECK-NEXT: mov x9, #-2
; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
@ -1026,11 +1026,11 @@ define <vscale x 2 x double> @splice_nxv2f64_clamped(<vscale x 2 x double> %a, <
; CHECK-NEXT: mov w10, #24 ; CHECK-NEXT: mov w10, #24
; CHECK-NEXT: csel x9, x9, x10, lo ; CHECK-NEXT: csel x9, x9, x10, lo
; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: addvl x10, x8, #1 ; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: sub x9, x10, x9 ; CHECK-NEXT: sub x8, x8, x9
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
@ -1112,7 +1112,7 @@ define <vscale x 2 x i8> @splice_nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8>
; CHECK-NEXT: mov x9, #-2 ; CHECK-NEXT: mov x9, #-2
; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
@ -1129,16 +1129,16 @@ define <vscale x 8 x i32> @splice_nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: addvl sp, sp, #-4
; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: mov x10, #-8 ; CHECK-NEXT: mov x9, #-8
; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: addvl x9, x8, #2 ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
; CHECK-NEXT: sub x11, x9, #32
; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: st1w { z3.s }, p0, [x8, #3, mul vl] ; CHECK-NEXT: addvl x8, x8, #2
; CHECK-NEXT: st1w { z2.s }, p0, [x8, #2, mul vl] ; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x10, lsl #2] ; CHECK-NEXT: sub x10, x8, #32
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x11, #1, mul vl] ; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x10, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: addvl sp, sp, #4
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
@ -1152,26 +1152,26 @@ define <vscale x 16 x float> @splice_nxv16f32_clamped(<vscale x 16 x float> %a,
; CHECK: // %bb.0: ; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-8 ; CHECK-NEXT: addvl sp, sp, #-8
; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: rdvl x9, #4 ; CHECK-NEXT: rdvl x8, #4
; CHECK-NEXT: cmp x9, #68 ; CHECK-NEXT: cmp x8, #68
; CHECK-NEXT: mov w10, #68 ; CHECK-NEXT: mov w9, #68
; CHECK-NEXT: csel x9, x9, x10, lo ; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: addvl x10, x8, #4 ; CHECK-NEXT: addvl x9, x10, #4
; CHECK-NEXT: st1w { z3.s }, p0, [x8, #3, mul vl] ; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl]
; CHECK-NEXT: sub x9, x10, x9 ; CHECK-NEXT: sub x8, x9, x8
; CHECK-NEXT: st1w { z2.s }, p0, [x8, #2, mul vl] ; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl]
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: st1w { z7.s }, p0, [x8, #7, mul vl] ; CHECK-NEXT: st1w { z7.s }, p0, [sp, #7, mul vl]
; CHECK-NEXT: st1w { z4.s }, p0, [x8, #4, mul vl] ; CHECK-NEXT: st1w { z4.s }, p0, [sp, #4, mul vl]
; CHECK-NEXT: st1w { z5.s }, p0, [x8, #5, mul vl] ; CHECK-NEXT: st1w { z5.s }, p0, [sp, #5, mul vl]
; CHECK-NEXT: st1w { z6.s }, p0, [x8, #6, mul vl] ; CHECK-NEXT: st1w { z6.s }, p0, [sp, #6, mul vl]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x9, #1, mul vl] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8, #1, mul vl]
; CHECK-NEXT: ld1w { z2.s }, p0/z, [x9, #2, mul vl] ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x8, #2, mul vl]
; CHECK-NEXT: ld1w { z3.s }, p0/z, [x9, #3, mul vl] ; CHECK-NEXT: ld1w { z3.s }, p0/z, [x8, #3, mul vl]
; CHECK-NEXT: addvl sp, sp, #8 ; CHECK-NEXT: addvl sp, sp, #8
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret

View File

@ -12,17 +12,16 @@ define float @foo1(double* %x0, double* %x1, double* %x2) nounwind {
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: addvl sp, sp, #-4
; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: fmov s0, #1.00000000
; CHECK-NEXT: ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0] ; CHECK-NEXT: ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0]
; CHECK-NEXT: ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1] ; CHECK-NEXT: ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1]
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x2] ; CHECK-NEXT: ld1d { z5.d }, p0/z, [x2]
; CHECK-NEXT: fmov s0, #1.00000000
; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: mov x0, sp
; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: st1d { z16.d }, p0, [sp] ; CHECK-NEXT: st1d { z16.d }, p0, [sp]
; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl] ; CHECK-NEXT: st1d { z18.d }, p0, [sp, #2, mul vl]
; CHECK-NEXT: st1d { z19.d }, p0, [x8, #3, mul vl] ; CHECK-NEXT: st1d { z19.d }, p0, [sp, #3, mul vl]
; CHECK-NEXT: bl callee1 ; CHECK-NEXT: bl callee1
; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: addvl sp, sp, #4
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
@ -44,24 +43,27 @@ define float @foo2(double* %x0, double* %x1) nounwind {
; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: addvl sp, sp, #-4
; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: add x9, sp, #16
; CHECK-NEXT: ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0] ; CHECK-NEXT: ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0]
; CHECK-NEXT: ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1] ; CHECK-NEXT: ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: fmov s0, #1.00000000 ; CHECK-NEXT: fmov s0, #1.00000000
; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: mov w1, #1 ; CHECK-NEXT: mov w1, #1
; CHECK-NEXT: mov w2, #2 ; CHECK-NEXT: mov w2, #2
; CHECK-NEXT: st1d { z16.d }, p0, [x9]
; CHECK-NEXT: add x9, sp, #16
; CHECK-NEXT: mov w3, #3 ; CHECK-NEXT: mov w3, #3
; CHECK-NEXT: mov w4, #4 ; CHECK-NEXT: mov w4, #4
; CHECK-NEXT: mov w5, #5 ; CHECK-NEXT: mov w5, #5
; CHECK-NEXT: mov w6, #6 ; CHECK-NEXT: mov w6, #6
; CHECK-NEXT: mov w7, #7 ; CHECK-NEXT: st1d { z17.d }, p0, [x9, #1, mul vl]
; CHECK-NEXT: add x9, sp, #16 ; CHECK-NEXT: add x9, sp, #16
; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov w7, #7
; CHECK-NEXT: st1d { z16.d }, p0, [x9] ; CHECK-NEXT: st1d { z18.d }, p0, [x9, #2, mul vl]
; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl] ; CHECK-NEXT: add x9, sp, #16
; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl] ; CHECK-NEXT: st1d { z19.d }, p0, [x9, #3, mul vl]
; CHECK-NEXT: st1d { z19.d }, p0, [x8, #3, mul vl]
; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: str x8, [sp]
; CHECK-NEXT: bl callee2 ; CHECK-NEXT: bl callee2
; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: addvl sp, sp, #4
@ -83,17 +85,16 @@ define float @foo3(double* %x0, double* %x1, double* %x2) nounwind {
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: addvl sp, sp, #-3
; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: fmov s0, #1.00000000
; CHECK-NEXT: ld4d { z2.d, z3.d, z4.d, z5.d }, p0/z, [x0] ; CHECK-NEXT: ld4d { z2.d, z3.d, z4.d, z5.d }, p0/z, [x0]
; CHECK-NEXT: ld3d { z16.d, z17.d, z18.d }, p0/z, [x1] ; CHECK-NEXT: ld3d { z16.d, z17.d, z18.d }, p0/z, [x1]
; CHECK-NEXT: ld1d { z6.d }, p0/z, [x2] ; CHECK-NEXT: ld1d { z6.d }, p0/z, [x2]
; CHECK-NEXT: fmov s0, #1.00000000
; CHECK-NEXT: fmov s1, #2.00000000 ; CHECK-NEXT: fmov s1, #2.00000000
; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: mov x0, sp
; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: st1d { z16.d }, p0, [sp] ; CHECK-NEXT: st1d { z16.d }, p0, [sp]
; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl] ; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl] ; CHECK-NEXT: st1d { z18.d }, p0, [sp, #2, mul vl]
; CHECK-NEXT: bl callee3 ; CHECK-NEXT: bl callee3
; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: addvl sp, sp, #3
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -939,7 +939,6 @@ define void @shuffle_ext_invalid(<4 x double>* %a, <4 x double>* %b) #0 {
; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: mov z2.d, z1.d[1] ; CHECK-NEXT: mov z2.d, z1.d[1]
@ -947,7 +946,7 @@ define void @shuffle_ext_invalid(<4 x double>* %a, <4 x double>* %b) #0 {
; CHECK-NEXT: mov z1.d, z0.d[3] ; CHECK-NEXT: mov z1.d, z0.d[3]
; CHECK-NEXT: mov z0.d, z0.d[2] ; CHECK-NEXT: mov z0.d, z0.d[2]
; CHECK-NEXT: stp d0, d1, [sp] ; CHECK-NEXT: stp d0, d1, [sp]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload

View File

@ -506,18 +506,18 @@ define <vscale x 32 x i1> @test_predicate_insert_32xi1(<vscale x 32 x i1> %val,
; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: mov x8, #-1
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
; CHECK-NEXT: sxtw x9, w1 ; CHECK-NEXT: sxtw x9, w1
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1 ; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1
; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: ptrue p1.b
; CHECK-NEXT: addvl x8, x8, #2 ; CHECK-NEXT: st1b { z0.b }, p1, [sp, #1, mul vl]
; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: st1b { z0.b }, p1, [x10, #1, mul vl]
; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 ; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
; CHECK-NEXT: addvl x8, x8, #2
; CHECK-NEXT: st1b { z0.b }, p1, [sp] ; CHECK-NEXT: st1b { z0.b }, p1, [sp]
; CHECK-NEXT: strb w0, [x10, x8] ; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: strb w0, [x9, x8]
; CHECK-NEXT: ld1b { z0.b }, p1/z, [sp] ; CHECK-NEXT: ld1b { z0.b }, p1/z, [sp]
; CHECK-NEXT: ld1b { z1.b }, p1/z, [x10, #1, mul vl] ; CHECK-NEXT: ld1b { z1.b }, p1/z, [sp, #1, mul vl]
; CHECK-NEXT: and z0.b, z0.b, #0x1 ; CHECK-NEXT: and z0.b, z0.b, #0x1
; CHECK-NEXT: and z1.b, z1.b, #0x1 ; CHECK-NEXT: and z1.b, z1.b, #0x1
; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0 ; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0

View File

@ -213,19 +213,18 @@ define void @insert_v2i64_nxv16i64(<2 x i64> %sv0, <2 x i64> %sv1, <vscale x 16
; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: addvl sp, sp, #-4
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: str q1, [sp, #32] ; CHECK-NEXT: str q1, [sp, #32]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, #1, mul vl] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x8, #2, mul vl] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [sp, #1, mul vl]
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x8, #3, mul vl] ; CHECK-NEXT: ld1d { z2.d }, p0/z, [sp, #2, mul vl]
; CHECK-NEXT: ld1d { z3.d }, p0/z, [sp] ; CHECK-NEXT: ld1d { z3.d }, p0/z, [sp, #3, mul vl]
; CHECK-NEXT: st1d { z2.d }, p0, [x0, #3, mul vl] ; CHECK-NEXT: st1d { z3.d }, p0, [x0, #3, mul vl]
; CHECK-NEXT: st1d { z1.d }, p0, [x0, #2, mul vl] ; CHECK-NEXT: st1d { z2.d }, p0, [x0, #2, mul vl]
; CHECK-NEXT: st1d { z0.d }, p0, [x0, #1, mul vl] ; CHECK-NEXT: st1d { z1.d }, p0, [x0, #1, mul vl]
; CHECK-NEXT: st1d { z3.d }, p0, [x0] ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: addvl sp, sp, #4
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
@ -256,13 +255,12 @@ define void @insert_v2i64_nxv16i64_lo2(<2 x i64>* %psv, <vscale x 16 x i64>* %ou
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q0, [x0]
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: str q0, [sp, #16] ; CHECK-NEXT: str q0, [sp, #16]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, #1, mul vl] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [sp] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [sp, #1, mul vl]
; CHECK-NEXT: st1d { z0.d }, p0, [x1, #1, mul vl] ; CHECK-NEXT: st1d { z1.d }, p0, [x1, #1, mul vl]
; CHECK-NEXT: st1d { z1.d }, p0, [x1] ; CHECK-NEXT: st1d { z0.d }, p0, [x1]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
@ -319,11 +317,10 @@ define <vscale x 8 x half> @insert_nxv8f16_nxv2f16(<vscale x 8 x half> %vec, <vs
; CHECK: // %bb.0: ; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: st1h { z1.d }, p1, [x8, #1, mul vl] ; CHECK-NEXT: st1h { z1.d }, p1, [sp, #1, mul vl]
; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp]
; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@ -446,15 +443,14 @@ define <vscale x 6 x i32> @insert_nxv6i32_nxv2i32(<vscale x 2 x i32> %sv0, <vsc
; CHECK: // %bb.0: ; CHECK: // %bb.0:
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: uunpklo z2.d, z0.s
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: uunpklo z1.d, z0.s
; CHECK-NEXT: st1w { z2.d }, p0, [x8, #2, mul vl] ; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: st1w { z0.s }, p1, [sp] ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: ld1w { z1.s }, p1/z, [x8, #1, mul vl] ; CHECK-NEXT: st1w { z1.d }, p1, [sp, #2, mul vl]
; CHECK-NEXT: ld1w { z0.s }, p1/z, [sp] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret

View File

@ -0,0 +1,277 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -run-pass=prologepilog -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s
#
# Test that prologepilog works for each of the LDNF1 instructions for stack-based objects.
#
--- |
define void @testcase_positive_offset() {
%dummy = alloca <vscale x 2 x i64>, align 8
%object = alloca <vscale x 2 x i64>, align 8
; Reads from %object at offset 63 * readsize
ret void
}
define void @testcase_negative_offset() {
%dummy = alloca <vscale x 2 x i64>, align 8
%object = alloca <vscale x 2 x i64>, align 8
; Reads from %object at offset 63 * readsize
ret void
}
define void @testcase_positive_offset_out_of_range() {
%dummy = alloca <vscale x 2 x i64>, align 8
%object = alloca <vscale x 2 x i64>, align 8
; Reads from %object at offset 64 * readsize
ret void
}
define void @testcase_negative_offset_out_of_range() {
%dummy = alloca <vscale x 2 x i64>, align 8
%object = alloca <vscale x 2 x i64>, align 8
; Reads from %object at offset -1 * readsize
ret void
}
...
---
name: testcase_positive_offset
tracksRegLiveness: true
stack:
- { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
- { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
body: |
bb.0 (%ir-block.0):
liveins: $p0
; CHECK-LABEL: name: testcase_positive_offset
; CHECK: liveins: $p0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2)
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
; CHECK-NEXT: renamable $z0 = LDNF1B_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: renamable $z0 = LDNF1B_H_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: renamable $z0 = LDNF1B_S_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: renamable $z0 = LDNF1B_D_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: renamable $z0 = LDNF1SB_H_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: renamable $z0 = LDNF1SB_S_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: renamable $z0 = LDNF1SB_D_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: renamable $z0 = LDNF1H_IMM renamable $p0, $sp, 7 :: (load (s16) from %ir.object)
; CHECK-NEXT: renamable $z0 = LDNF1H_S_IMM renamable $p0, $sp, 7 :: (load (s16) from %ir.object)
; CHECK-NEXT: renamable $z0 = LDNF1H_D_IMM renamable $p0, $sp, 7 :: (load (s16) from %ir.object)
; CHECK-NEXT: renamable $z0 = LDNF1SH_S_IMM renamable $p0, $sp, 7 :: (load (s16) from %ir.object)
; CHECK-NEXT: renamable $z0 = LDNF1SH_D_IMM renamable $p0, $sp, 7 :: (load (s16) from %ir.object)
; CHECK-NEXT: renamable $z0 = LDNF1W_IMM renamable $p0, $sp, 7 :: (load (s32) from %ir.object, align 8)
; CHECK-NEXT: renamable $z0 = LDNF1W_D_IMM renamable $p0, $sp, 7 :: (load (s32) from %ir.object, align 8)
; CHECK-NEXT: renamable $z0 = LDNF1SW_D_IMM renamable $p0, $sp, 7 :: (load (s32) from %ir.object, align 8)
; CHECK-NEXT: renamable $z0 = LDNF1D_IMM renamable $p0, $sp, 7 :: (load (s64) from %ir.object)
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4
; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
; CHECK-NEXT: RET_ReallyLR implicit $z0
renamable $z0 = LDNF1B_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1B_H_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1B_S_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1B_D_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1SB_H_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1SB_S_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1SB_D_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1H_IMM renamable $p0, %stack.1.object, 7 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNF1H_S_IMM renamable $p0, %stack.1.object, 7 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNF1H_D_IMM renamable $p0, %stack.1.object, 7 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNF1SH_S_IMM renamable $p0, %stack.1.object, 7 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNF1SH_D_IMM renamable $p0, %stack.1.object, 7 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNF1W_IMM renamable $p0, %stack.1.object, 7 :: (load 4 from %ir.object, align 8)
renamable $z0 = LDNF1W_D_IMM renamable $p0, %stack.1.object, 7 :: (load 4 from %ir.object, align 8)
renamable $z0 = LDNF1SW_D_IMM renamable $p0, %stack.1.object, 7 :: (load 4 from %ir.object, align 8)
renamable $z0 = LDNF1D_IMM renamable $p0, %stack.1.object, 7 :: (load 8 from %ir.object, align 8)
RET_ReallyLR implicit $z0
...
---
name: testcase_negative_offset
tracksRegLiveness: true
stack:
- { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
- { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
body: |
bb.0 (%ir-block.0):
liveins: $p0
; CHECK-LABEL: name: testcase_negative_offset
; CHECK: liveins: $p0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2)
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
; CHECK-NEXT: renamable $z0 = LDNF1B_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: renamable $z0 = LDNF1B_H_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: renamable $z0 = LDNF1B_S_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: renamable $z0 = LDNF1B_D_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: renamable $z0 = LDNF1SB_H_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: renamable $z0 = LDNF1SB_S_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: renamable $z0 = LDNF1SB_D_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: renamable $z0 = LDNF1H_IMM renamable $p0, $sp, -8 :: (load (s16) from %ir.object)
; CHECK-NEXT: renamable $z0 = LDNF1H_S_IMM renamable $p0, $sp, -8 :: (load (s16) from %ir.object)
; CHECK-NEXT: renamable $z0 = LDNF1H_D_IMM renamable $p0, $sp, -8 :: (load (s16) from %ir.object)
; CHECK-NEXT: renamable $z0 = LDNF1SH_S_IMM renamable $p0, $sp, -8 :: (load (s16) from %ir.object)
; CHECK-NEXT: renamable $z0 = LDNF1SH_D_IMM renamable $p0, $sp, -8 :: (load (s16) from %ir.object)
; CHECK-NEXT: renamable $z0 = LDNF1W_IMM renamable $p0, $sp, -8 :: (load (s32) from %ir.object, align 8)
; CHECK-NEXT: renamable $z0 = LDNF1W_D_IMM renamable $p0, $sp, -8 :: (load (s32) from %ir.object, align 8)
; CHECK-NEXT: renamable $z0 = LDNF1SW_D_IMM renamable $p0, $sp, -8 :: (load (s32) from %ir.object, align 8)
; CHECK-NEXT: renamable $z0 = LDNF1D_IMM renamable $p0, $sp, -8 :: (load (s64) from %ir.object)
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4
; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
; CHECK-NEXT: RET_ReallyLR implicit $z0
renamable $z0 = LDNF1B_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1B_H_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1B_S_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1B_D_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1SB_H_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1SB_S_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1SB_D_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1H_IMM renamable $p0, %stack.1.object, -8 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNF1H_S_IMM renamable $p0, %stack.1.object, -8 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNF1H_D_IMM renamable $p0, %stack.1.object, -8 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNF1SH_S_IMM renamable $p0, %stack.1.object, -8 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNF1SH_D_IMM renamable $p0, %stack.1.object, -8 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNF1W_IMM renamable $p0, %stack.1.object, -8 :: (load 4 from %ir.object, align 8)
renamable $z0 = LDNF1W_D_IMM renamable $p0, %stack.1.object, -8 :: (load 4 from %ir.object, align 8)
renamable $z0 = LDNF1SW_D_IMM renamable $p0, %stack.1.object, -8 :: (load 4 from %ir.object, align 8)
renamable $z0 = LDNF1D_IMM renamable $p0, %stack.1.object, -8 :: (load 8 from %ir.object, align 8)
RET_ReallyLR implicit $z0
...
---
name: testcase_positive_offset_out_of_range
tracksRegLiveness: true
stack:
- { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
- { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
body: |
bb.0 (%ir-block.0):
liveins: $p0
; CHECK-LABEL: name: testcase_positive_offset_out_of_range
; CHECK: liveins: $p0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2)
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
; CHECK-NEXT: renamable $z0 = LDNF1B_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4
; CHECK-NEXT: renamable $z0 = LDNF1B_H_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 2
; CHECK-NEXT: renamable $z0 = LDNF1B_S_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 1
; CHECK-NEXT: renamable $z0 = LDNF1B_D_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4
; CHECK-NEXT: renamable $z0 = LDNF1SB_H_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 2
; CHECK-NEXT: renamable $z0 = LDNF1SB_S_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 1
; CHECK-NEXT: renamable $z0 = LDNF1SB_D_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
; CHECK-NEXT: renamable $z0 = LDNF1H_IMM renamable $p0, killed $x8, 7 :: (load (s16) from %ir.object)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4
; CHECK-NEXT: renamable $z0 = LDNF1H_S_IMM renamable $p0, killed $x8, 7 :: (load (s16) from %ir.object)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 2
; CHECK-NEXT: renamable $z0 = LDNF1H_D_IMM renamable $p0, killed $x8, 7 :: (load (s16) from %ir.object)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4
; CHECK-NEXT: renamable $z0 = LDNF1SH_S_IMM renamable $p0, killed $x8, 7 :: (load (s16) from %ir.object)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 2
; CHECK-NEXT: renamable $z0 = LDNF1SH_D_IMM renamable $p0, killed $x8, 7 :: (load (s16) from %ir.object)
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
; CHECK-NEXT: renamable $z0 = LDNF1W_IMM renamable $p0, killed $x8, 7 :: (load (s32) from %ir.object, align 8)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4
; CHECK-NEXT: renamable $z0 = LDNF1W_D_IMM renamable $p0, killed $x8, 7 :: (load (s32) from %ir.object, align 8)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4
; CHECK-NEXT: renamable $z0 = LDNF1SW_D_IMM renamable $p0, killed $x8, 7 :: (load (s32) from %ir.object, align 8)
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4
; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
; CHECK-NEXT: RET_ReallyLR implicit $z0
renamable $z0 = LDNF1B_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1B_H_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1B_S_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1B_D_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1SB_H_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1SB_S_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1SB_D_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1H_IMM renamable $p0, %stack.1.object, 8 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNF1H_S_IMM renamable $p0, %stack.1.object, 8 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNF1H_D_IMM renamable $p0, %stack.1.object, 8 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNF1SH_S_IMM renamable $p0, %stack.1.object, 8 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNF1SH_D_IMM renamable $p0, %stack.1.object, 8 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNF1W_IMM renamable $p0, %stack.1.object, 8 :: (load 4 from %ir.object, align 8)
renamable $z0 = LDNF1W_D_IMM renamable $p0, %stack.1.object, 8 :: (load 4 from %ir.object, align 8)
renamable $z0 = LDNF1SW_D_IMM renamable $p0, %stack.1.object, 8 :: (load 4 from %ir.object, align 8)
RET_ReallyLR implicit $z0
...
---
name: testcase_negative_offset_out_of_range
tracksRegLiveness: true
stack:
- { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
- { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
body: |
bb.0 (%ir-block.0):
liveins: $p0
; CHECK-LABEL: name: testcase_negative_offset_out_of_range
; CHECK: liveins: $p0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2)
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
; CHECK-NEXT: renamable $z0 = LDNF1B_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4
; CHECK-NEXT: renamable $z0 = LDNF1B_H_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -2
; CHECK-NEXT: renamable $z0 = LDNF1B_S_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -1
; CHECK-NEXT: renamable $z0 = LDNF1B_D_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4
; CHECK-NEXT: renamable $z0 = LDNF1SB_H_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -2
; CHECK-NEXT: renamable $z0 = LDNF1SB_S_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -1
; CHECK-NEXT: renamable $z0 = LDNF1SB_D_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
; CHECK-NEXT: renamable $z0 = LDNF1H_IMM renamable $p0, killed $x8, -8 :: (load (s16) from %ir.object)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4
; CHECK-NEXT: renamable $z0 = LDNF1H_S_IMM renamable $p0, killed $x8, -8 :: (load (s16) from %ir.object)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -2
; CHECK-NEXT: renamable $z0 = LDNF1H_D_IMM renamable $p0, killed $x8, -8 :: (load (s16) from %ir.object)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4
; CHECK-NEXT: renamable $z0 = LDNF1SH_S_IMM renamable $p0, killed $x8, -8 :: (load (s16) from %ir.object)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -2
; CHECK-NEXT: renamable $z0 = LDNF1SH_D_IMM renamable $p0, killed $x8, -8 :: (load (s16) from %ir.object)
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
; CHECK-NEXT: renamable $z0 = LDNF1W_IMM renamable $p0, killed $x8, -8 :: (load (s32) from %ir.object, align 8)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4
; CHECK-NEXT: renamable $z0 = LDNF1W_D_IMM renamable $p0, killed $x8, -8 :: (load (s32) from %ir.object, align 8)
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4
; CHECK-NEXT: renamable $z0 = LDNF1SW_D_IMM renamable $p0, killed $x8, -8 :: (load (s32) from %ir.object, align 8)
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4
; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
; CHECK-NEXT: RET_ReallyLR implicit $z0
renamable $z0 = LDNF1B_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1B_H_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1B_S_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1B_D_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1SB_H_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1SB_S_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1SB_D_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNF1H_IMM renamable $p0, %stack.1.object, -9 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNF1H_S_IMM renamable $p0, %stack.1.object, -9 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNF1H_D_IMM renamable $p0, %stack.1.object, -9 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNF1SH_S_IMM renamable $p0, %stack.1.object, -9 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNF1SH_D_IMM renamable $p0, %stack.1.object, -9 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNF1W_IMM renamable $p0, %stack.1.object, -9 :: (load 4 from %ir.object, align 8)
renamable $z0 = LDNF1W_D_IMM renamable $p0, %stack.1.object, -9 :: (load 4 from %ir.object, align 8)
renamable $z0 = LDNF1SW_D_IMM renamable $p0, %stack.1.object, -9 :: (load 4 from %ir.object, align 8)
RET_ReallyLR implicit $z0
...

View File

@ -0,0 +1,203 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -run-pass=prologepilog -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s
#
# Test that prologepilog works for each of the LDNT1/STNT1 instructions for stack-based objects.
#
--- |
define void @testcase_positive_offset() {
%dummy = alloca <vscale x 2 x i64>, align 8
%object = alloca <vscale x 2 x i64>, align 8
; Reads from %object at offset 7 * readsize
ret void
}
define void @testcase_negative_offset() {
%dummy = alloca <vscale x 2 x i64>, align 8
%object = alloca <vscale x 2 x i64>, align 8
; Reads from %object at offset -8 * readsize
ret void
}
define void @testcase_positive_offset_out_of_range() {
%dummy = alloca <vscale x 2 x i64>, align 8
%object = alloca <vscale x 2 x i64>, align 8
; Reads from %object at offset 8 * readsize
ret void
}
define void @testcase_negative_offset_out_of_range() {
%dummy = alloca <vscale x 2 x i64>, align 8
%object = alloca <vscale x 2 x i64>, align 8
; Reads from %object at offset -9 * readsize
ret void
}
...
---
name: testcase_positive_offset
tracksRegLiveness: true
stack:
- { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
- { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
body: |
bb.0 (%ir-block.0):
liveins: $p0
; CHECK-LABEL: name: testcase_positive_offset
; CHECK: liveins: $p0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2)
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
; CHECK-NEXT: renamable $z0 = LDNT1B_ZRI renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: renamable $z0 = LDNT1H_ZRI renamable $p0, $sp, 7 :: (load (s16) from %ir.object)
; CHECK-NEXT: renamable $z0 = LDNT1W_ZRI renamable $p0, $sp, 7 :: (load (s32) from %ir.object, align 8)
; CHECK-NEXT: renamable $z0 = LDNT1D_ZRI renamable $p0, $sp, 7 :: (load (s64) from %ir.object)
; CHECK-NEXT: STNT1B_ZRI renamable $z0, renamable $p0, $sp, 7 :: (store (s8) into %ir.object, align 8)
; CHECK-NEXT: STNT1H_ZRI renamable $z0, renamable $p0, $sp, 7 :: (store (s16) into %ir.object, align 8)
; CHECK-NEXT: STNT1W_ZRI renamable $z0, renamable $p0, $sp, 7 :: (store (s32) into %ir.object, align 8)
; CHECK-NEXT: STNT1D_ZRI renamable $z0, renamable $p0, $sp, 7 :: (store (s64) into %ir.object)
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4
; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
; CHECK-NEXT: RET_ReallyLR implicit $z0
renamable $z0 = LDNT1B_ZRI renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNT1H_ZRI renamable $p0, %stack.1.object, 7 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNT1W_ZRI renamable $p0, %stack.1.object, 7 :: (load 4 from %ir.object, align 8)
renamable $z0 = LDNT1D_ZRI renamable $p0, %stack.1.object, 7 :: (load 8 from %ir.object, align 8)
STNT1B_ZRI renamable $z0, renamable $p0, %stack.1.object, 7 :: (store 1 into %ir.object, align 8)
STNT1H_ZRI renamable $z0, renamable $p0, %stack.1.object, 7 :: (store 2 into %ir.object, align 8)
STNT1W_ZRI renamable $z0, renamable $p0, %stack.1.object, 7 :: (store 4 into %ir.object, align 8)
STNT1D_ZRI renamable $z0, renamable $p0, %stack.1.object, 7 :: (store 8 into %ir.object, align 8)
RET_ReallyLR implicit $z0
...
---
name: testcase_negative_offset
tracksRegLiveness: true
stack:
- { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
- { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
body: |
bb.0 (%ir-block.0):
liveins: $p0
; CHECK-LABEL: name: testcase_negative_offset
; CHECK: liveins: $p0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2)
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
; CHECK-NEXT: renamable $z0 = LDNT1B_ZRI renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: renamable $z0 = LDNT1H_ZRI renamable $p0, $sp, -8 :: (load (s16) from %ir.object)
; CHECK-NEXT: renamable $z0 = LDNT1W_ZRI renamable $p0, $sp, -8 :: (load (s32) from %ir.object)
; CHECK-NEXT: renamable $z0 = LDNT1D_ZRI renamable $p0, $sp, -8 :: (load (s64) from %ir.object)
; CHECK-NEXT: STNT1B_ZRI renamable $z0, renamable $p0, $sp, -8 :: (store (s8) into %ir.object, align 8)
; CHECK-NEXT: STNT1H_ZRI renamable $z0, renamable $p0, $sp, -8 :: (store (s16) into %ir.object, align 8)
; CHECK-NEXT: STNT1W_ZRI renamable $z0, renamable $p0, $sp, -8 :: (store (s32) into %ir.object, align 8)
; CHECK-NEXT: STNT1D_ZRI renamable $z0, renamable $p0, $sp, -8 :: (store (s64) into %ir.object)
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4
; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
; CHECK-NEXT: RET_ReallyLR implicit $z0
renamable $z0 = LDNT1B_ZRI renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNT1H_ZRI renamable $p0, %stack.1.object, -8 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNT1W_ZRI renamable $p0, %stack.1.object, -8 :: (load 4 from %ir.object, align 4)
renamable $z0 = LDNT1D_ZRI renamable $p0, %stack.1.object, -8 :: (load 8 from %ir.object, align 8)
STNT1B_ZRI renamable $z0, renamable $p0, %stack.1.object, -8 :: (store 1 into %ir.object, align 8)
STNT1H_ZRI renamable $z0, renamable $p0, %stack.1.object, -8 :: (store 2 into %ir.object, align 8)
STNT1W_ZRI renamable $z0, renamable $p0, %stack.1.object, -8 :: (store 4 into %ir.object, align 8)
STNT1D_ZRI renamable $z0, renamable $p0, %stack.1.object, -8 :: (store 8 into %ir.object, align 8)
RET_ReallyLR implicit $z0
...
---
name: testcase_positive_offset_out_of_range
tracksRegLiveness: true
stack:
- { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
- { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
body: |
bb.0 (%ir-block.0):
liveins: $p0
; CHECK-LABEL: name: testcase_positive_offset_out_of_range
; CHECK: liveins: $p0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2)
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
; CHECK-NEXT: renamable $z0 = LDNT1B_ZRI renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
; CHECK-NEXT: renamable $z0 = LDNT1H_ZRI renamable $p0, killed $x8, 7 :: (load (s16) from %ir.object)
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
; CHECK-NEXT: renamable $z0 = LDNT1W_ZRI renamable $p0, killed $x8, 7 :: (load (s32) from %ir.object)
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
; CHECK-NEXT: renamable $z0 = LDNT1D_ZRI renamable $p0, killed $x8, 7 :: (load (s64) from %ir.object)
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
; CHECK-NEXT: STNT1B_ZRI renamable $z0, renamable $p0, killed $x8, 7 :: (store (s8) into %ir.object, align 8)
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
; CHECK-NEXT: STNT1H_ZRI renamable $z0, renamable $p0, killed $x8, 7 :: (store (s16) into %ir.object, align 8)
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
; CHECK-NEXT: STNT1W_ZRI renamable $z0, renamable $p0, killed $x8, 7 :: (store (s32) into %ir.object, align 8)
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
; CHECK-NEXT: STNT1D_ZRI renamable $z0, renamable $p0, killed $x8, 7 :: (store (s64) into %ir.object)
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4
; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
; CHECK-NEXT: RET_ReallyLR implicit $z0
renamable $z0 = LDNT1B_ZRI renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNT1H_ZRI renamable $p0, %stack.1.object, 8 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNT1W_ZRI renamable $p0, %stack.1.object, 8 :: (load 4 from %ir.object, align 4)
renamable $z0 = LDNT1D_ZRI renamable $p0, %stack.1.object, 8 :: (load 8 from %ir.object, align 8)
STNT1B_ZRI renamable $z0, renamable $p0, %stack.1.object, 8 :: (store 1 into %ir.object, align 8)
STNT1H_ZRI renamable $z0, renamable $p0, %stack.1.object, 8 :: (store 2 into %ir.object, align 8)
STNT1W_ZRI renamable $z0, renamable $p0, %stack.1.object, 8 :: (store 4 into %ir.object, align 8)
STNT1D_ZRI renamable $z0, renamable $p0, %stack.1.object, 8 :: (store 8 into %ir.object, align 8)
RET_ReallyLR implicit $z0
...
---
name: testcase_negative_offset_out_of_range
tracksRegLiveness: true
stack:
- { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
- { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
body: |
bb.0 (%ir-block.0):
liveins: $p0
; CHECK-LABEL: name: testcase_negative_offset_out_of_range
; CHECK: liveins: $p0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2)
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
; CHECK-NEXT: renamable $z0 = LDNT1B_ZRI renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2)
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
; CHECK-NEXT: renamable $z0 = LDNT1H_ZRI renamable $p0, killed $x8, -8 :: (load (s16) from %ir.object)
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
; CHECK-NEXT: renamable $z0 = LDNT1W_ZRI renamable $p0, killed $x8, -8 :: (load (s32) from %ir.object)
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
; CHECK-NEXT: renamable $z0 = LDNT1D_ZRI renamable $p0, killed $x8, -8 :: (load (s64) from %ir.object)
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
; CHECK-NEXT: STNT1B_ZRI renamable $z0, renamable $p0, killed $x8, -8 :: (store (s8) into %ir.object, align 8)
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
; CHECK-NEXT: STNT1H_ZRI renamable $z0, renamable $p0, killed $x8, -8 :: (store (s16) into %ir.object, align 8)
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
; CHECK-NEXT: STNT1W_ZRI renamable $z0, renamable $p0, killed $x8, -8 :: (store (s32) into %ir.object, align 8)
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
; CHECK-NEXT: STNT1D_ZRI renamable $z0, renamable $p0, killed $x8, -8 :: (store (s64) into %ir.object)
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4
; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
; CHECK-NEXT: RET_ReallyLR implicit $z0
renamable $z0 = LDNT1B_ZRI renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2)
renamable $z0 = LDNT1H_ZRI renamable $p0, %stack.1.object, -9 :: (load 2 from %ir.object, align 2)
renamable $z0 = LDNT1W_ZRI renamable $p0, %stack.1.object, -9 :: (load 4 from %ir.object, align 4)
renamable $z0 = LDNT1D_ZRI renamable $p0, %stack.1.object, -9 :: (load 8 from %ir.object, align 8)
STNT1B_ZRI renamable $z0, renamable $p0, %stack.1.object, -9 :: (store 1 into %ir.object, align 8)
STNT1H_ZRI renamable $z0, renamable $p0, %stack.1.object, -9 :: (store 2 into %ir.object, align 8)
STNT1W_ZRI renamable $z0, renamable $p0, %stack.1.object, -9 :: (store 4 into %ir.object, align 8)
STNT1D_ZRI renamable $z0, renamable $p0, %stack.1.object, -9 :: (store 8 into %ir.object, align 8)
RET_ReallyLR implicit $z0
...

View File

@ -26,14 +26,14 @@ define i8 @split_extract_32i8_idx(<vscale x 32 x i8> %a, i32 %idx) {
; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: mov x8, #-1
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x9, w0 ; CHECK-NEXT: sxtw x9, w0
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
; CHECK-NEXT: addvl x8, x8, #2 ; CHECK-NEXT: addvl x8, x8, #2
; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: st1b { z1.b }, p0, [x10, #1, mul vl]
; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: st1b { z0.b }, p0, [sp] ; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ldrb w0, [x10, x8] ; CHECK-NEXT: ldrb w0, [x9, x8]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
@ -51,14 +51,14 @@ define i16 @split_extract_16i16_idx(<vscale x 16 x i16> %a, i32 %idx) {
; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: mov x8, #-1
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x9, w0 ; CHECK-NEXT: sxtw x9, w0
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: st1h { z1.h }, p0, [x10, #1, mul vl]
; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ldrh w0, [x10, x8, lsl #1] ; CHECK-NEXT: ldrh w0, [x9, x8, lsl #1]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
@ -77,13 +77,13 @@ define i32 @split_extract_8i32_idx(<vscale x 8 x i32> %a, i32 %idx) {
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x9, w0 ; CHECK-NEXT: sxtw x9, w0
; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: st1w { z1.s }, p0, [x10, #1, mul vl] ; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: ldr w0, [x10, x8, lsl #2] ; CHECK-NEXT: ldr w0, [x9, x8, lsl #2]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
@ -98,19 +98,19 @@ define i64 @split_extract_8i64_idx(<vscale x 8 x i64> %a, i32 %idx) {
; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: addvl sp, sp, #-4
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: cnth x9 ; CHECK-NEXT: cnth x8
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT: sxtw x10, w0 ; CHECK-NEXT: sxtw x9, w0
; CHECK-NEXT: sub x9, x9, #1 ; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: mov x8, sp
; CHECK-NEXT: cmp x10, x9
; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: csel x9, x10, x9, lo ; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: st1d { z3.d }, p0, [x8, #3, mul vl] ; CHECK-NEXT: st1d { z3.d }, p0, [sp, #3, mul vl]
; CHECK-NEXT: st1d { z2.d }, p0, [x8, #2, mul vl] ; CHECK-NEXT: csel x8, x9, x8, lo
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl] ; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1d { z2.d }, p0, [sp, #2, mul vl]
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: ldr x0, [x8, x9, lsl #3] ; CHECK-NEXT: ldr x0, [x9, x8, lsl #3]
; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: addvl sp, sp, #4
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
@ -146,14 +146,14 @@ define i16 @split_extract_16i16(<vscale x 16 x i16> %a) {
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: mov x8, #-1
; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: mov w9, #128
; CHECK-NEXT: mov w10, #128
; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1h { z1.h }, p0, [x9, #1, mul vl]
; CHECK-NEXT: cmp x8, #128
; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: csel x8, x8, x10, lo ; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: cmp x8, #128
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ldrh w0, [x9, x8, lsl #1] ; CHECK-NEXT: ldrh w0, [x9, x8, lsl #1]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@ -170,17 +170,17 @@ define i32 @split_extract_16i32(<vscale x 16 x i32> %a) {
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: mov x8, #-1
; CHECK-NEXT: mov w10, #34464 ; CHECK-NEXT: mov w9, #34464
; CHECK-NEXT: movk w10, #1, lsl #16 ; CHECK-NEXT: movk w9, #1, lsl #16
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl]
; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl]
; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: addvl x8, x8, #1
; CHECK-NEXT: cmp x8, x10 ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1w { z3.s }, p0, [x9, #3, mul vl] ; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: csel x8, x8, x10, lo
; CHECK-NEXT: st1w { z2.s }, p0, [x9, #2, mul vl]
; CHECK-NEXT: st1w { z1.s }, p0, [x9, #1, mul vl]
; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ldr w0, [x9, x8, lsl #2] ; CHECK-NEXT: ldr w0, [x9, x8, lsl #2]
; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: addvl sp, sp, #4
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
@ -197,13 +197,13 @@ define i64 @split_extract_4i64(<vscale x 4 x i64> %a) {
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: cntw x8 ; CHECK-NEXT: cntw x8
; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: mov w9, #10
; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: mov w10, #10
; CHECK-NEXT: cmp x8, #10
; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: csel x8, x8, x10, lo ; CHECK-NEXT: cmp x8, #10
; CHECK-NEXT: st1d { z1.d }, p0, [x9, #1, mul vl] ; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: ldr x0, [x9, x8, lsl #3] ; CHECK-NEXT: ldr x0, [x9, x8, lsl #3]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2

View File

@ -26,14 +26,14 @@ define <vscale x 32 x i8> @split_insert_32i8_idx(<vscale x 32 x i8> %a, i8 %elt,
; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: mov x8, #-1
; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: addvl x8, x8, #2 ; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1b { z1.b }, p0, [x9, #1, mul vl]
; CHECK-NEXT: cmp x1, x8
; CHECK-NEXT: st1b { z0.b }, p0, [sp] ; CHECK-NEXT: st1b { z0.b }, p0, [sp]
; CHECK-NEXT: addvl x8, x8, #2
; CHECK-NEXT: cmp x1, x8
; CHECK-NEXT: csel x8, x1, x8, lo ; CHECK-NEXT: csel x8, x1, x8, lo
; CHECK-NEXT: strb w0, [x9, x8] ; CHECK-NEXT: strb w0, [x9, x8]
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x9, #1, mul vl]
; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp]
; CHECK-NEXT: ld1b { z1.b }, p0/z, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
@ -53,12 +53,12 @@ define <vscale x 8 x float> @split_insert_8f32_idx(<vscale x 8 x float> %a, floa
; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: cmp x0, x8 ; CHECK-NEXT: cmp x0, x8
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: csel x8, x0, x8, lo ; CHECK-NEXT: csel x8, x0, x8, lo
; CHECK-NEXT: st1w { z1.s }, p0, [x9, #1, mul vl]
; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: str s2, [x9, x8, lsl #2] ; CHECK-NEXT: str s2, [x9, x8, lsl #2]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x9, #1, mul vl]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
@ -78,16 +78,16 @@ define <vscale x 8 x i64> @split_insert_8i64_idx(<vscale x 8 x i64> %a, i64 %elt
; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: cmp x1, x8 ; CHECK-NEXT: cmp x1, x8
; CHECK-NEXT: st1d { z3.d }, p0, [sp, #3, mul vl]
; CHECK-NEXT: csel x8, x1, x8, lo ; CHECK-NEXT: csel x8, x1, x8, lo
; CHECK-NEXT: st1d { z3.d }, p0, [x9, #3, mul vl] ; CHECK-NEXT: st1d { z2.d }, p0, [sp, #2, mul vl]
; CHECK-NEXT: st1d { z2.d }, p0, [x9, #2, mul vl] ; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1d { z1.d }, p0, [x9, #1, mul vl]
; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: st1d { z0.d }, p0, [sp]
; CHECK-NEXT: str x0, [x9, x8, lsl #3] ; CHECK-NEXT: str x0, [x9, x8, lsl #3]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x9, #1, mul vl]
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x9, #2, mul vl]
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x9, #3, mul vl]
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [sp, #1, mul vl]
; CHECK-NEXT: ld1d { z2.d }, p0/z, [sp, #2, mul vl]
; CHECK-NEXT: ld1d { z3.d }, p0/z, [sp, #3, mul vl]
; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: addvl sp, sp, #4
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
@ -136,21 +136,21 @@ define <vscale x 32 x i16> @split_insert_32i16(<vscale x 32 x i16> %a, i16 %elt)
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: mov x8, #-1
; CHECK-NEXT: mov w10, #128 ; CHECK-NEXT: mov w9, #128
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: st1h { z3.h }, p0, [sp, #3, mul vl]
; CHECK-NEXT: st1h { z2.h }, p0, [sp, #2, mul vl]
; CHECK-NEXT: addvl x8, x8, #2 ; CHECK-NEXT: addvl x8, x8, #2
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
; CHECK-NEXT: cmp x8, #128 ; CHECK-NEXT: cmp x8, #128
; CHECK-NEXT: st1h { z3.h }, p0, [x9, #3, mul vl]
; CHECK-NEXT: csel x8, x8, x10, lo
; CHECK-NEXT: st1h { z2.h }, p0, [x9, #2, mul vl]
; CHECK-NEXT: st1h { z1.h }, p0, [x9, #1, mul vl]
; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: st1h { z0.h }, p0, [sp]
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: strh w0, [x9, x8, lsl #1] ; CHECK-NEXT: strh w0, [x9, x8, lsl #1]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x9, #1, mul vl]
; CHECK-NEXT: ld1h { z2.h }, p0/z, [x9, #2, mul vl]
; CHECK-NEXT: ld1h { z3.h }, p0/z, [x9, #3, mul vl]
; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [sp, #1, mul vl]
; CHECK-NEXT: ld1h { z2.h }, p0/z, [sp, #2, mul vl]
; CHECK-NEXT: ld1h { z3.h }, p0/z, [sp, #3, mul vl]
; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: addvl sp, sp, #4
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret
@ -170,14 +170,14 @@ define <vscale x 8 x i32> @split_insert_8i32(<vscale x 8 x i32> %a, i32 %elt) {
; CHECK-NEXT: movk w9, #15, lsl #16 ; CHECK-NEXT: movk w9, #15, lsl #16
; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: sub x8, x8, #1
; CHECK-NEXT: cmp x8, x9 ; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: mov x10, sp
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: st1w { z1.s }, p0, [x10, #1, mul vl] ; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z0.s }, p0, [sp]
; CHECK-NEXT: str w0, [x10, x8, lsl #2] ; CHECK-NEXT: str w0, [x9, x8, lsl #2]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x10, #1, mul vl]
; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: addvl sp, sp, #2
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret ; CHECK-NEXT: ret