forked from OSchip/llvm-project
[AArch64][SVE] Use TargetFrameIndex in more SVE load/store addressing modes
Add support for generating TargetFrameIndex in complex patterns for indexed addressing modes in SVE. Additionally, add missing load/stores to getMemOpInfo and getLoadStoreImmIdx. Differential Revision: https://reviews.llvm.org/D112617
This commit is contained in:
parent
014c6b0736
commit
86972f1114
|
@ -5050,6 +5050,14 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
|
|||
SDValue &Base,
|
||||
SDValue &OffImm) {
|
||||
const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
|
||||
const DataLayout &DL = CurDAG->getDataLayout();
|
||||
|
||||
if (N.getOpcode() == ISD::FrameIndex) {
|
||||
int FI = cast<FrameIndexSDNode>(N)->getIndex();
|
||||
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
|
||||
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (MemVT == EVT())
|
||||
return false;
|
||||
|
@ -5073,6 +5081,11 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
|
|||
return false;
|
||||
|
||||
Base = N.getOperand(0);
|
||||
if (Base.getOpcode() == ISD::FrameIndex) {
|
||||
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
|
||||
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
|
||||
}
|
||||
|
||||
OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -2263,32 +2263,35 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
|
|||
case AArch64::STNPSi:
|
||||
case AArch64::LDG:
|
||||
case AArch64::STGPi:
|
||||
|
||||
case AArch64::LD1B_IMM:
|
||||
case AArch64::LD1H_IMM:
|
||||
case AArch64::LD1W_IMM:
|
||||
case AArch64::LD1D_IMM:
|
||||
case AArch64::ST1B_IMM:
|
||||
case AArch64::ST1H_IMM:
|
||||
case AArch64::ST1W_IMM:
|
||||
case AArch64::ST1D_IMM:
|
||||
case AArch64::LD1B_H_IMM:
|
||||
case AArch64::LD1B_S_IMM:
|
||||
case AArch64::LD1B_D_IMM:
|
||||
case AArch64::LD1SB_H_IMM:
|
||||
case AArch64::LD1SB_S_IMM:
|
||||
case AArch64::LD1SB_D_IMM:
|
||||
case AArch64::LD1H_IMM:
|
||||
case AArch64::LD1H_S_IMM:
|
||||
case AArch64::LD1H_D_IMM:
|
||||
case AArch64::LD1SH_S_IMM:
|
||||
case AArch64::LD1SH_D_IMM:
|
||||
case AArch64::LD1W_IMM:
|
||||
case AArch64::LD1W_D_IMM:
|
||||
case AArch64::LD1SW_D_IMM:
|
||||
case AArch64::LD1D_IMM:
|
||||
|
||||
case AArch64::ST1B_IMM:
|
||||
case AArch64::ST1B_H_IMM:
|
||||
case AArch64::ST1H_S_IMM:
|
||||
case AArch64::ST1W_D_IMM:
|
||||
case AArch64::LD1B_S_IMM:
|
||||
case AArch64::LD1SB_S_IMM:
|
||||
case AArch64::LD1H_D_IMM:
|
||||
case AArch64::LD1SH_D_IMM:
|
||||
case AArch64::ST1B_S_IMM:
|
||||
case AArch64::ST1H_D_IMM:
|
||||
case AArch64::LD1B_D_IMM:
|
||||
case AArch64::LD1SB_D_IMM:
|
||||
case AArch64::ST1B_D_IMM:
|
||||
case AArch64::ST1H_IMM:
|
||||
case AArch64::ST1H_S_IMM:
|
||||
case AArch64::ST1H_D_IMM:
|
||||
case AArch64::ST1W_IMM:
|
||||
case AArch64::ST1W_D_IMM:
|
||||
case AArch64::ST1D_IMM:
|
||||
|
||||
case AArch64::LD1RB_IMM:
|
||||
case AArch64::LD1RB_H_IMM:
|
||||
case AArch64::LD1RB_S_IMM:
|
||||
|
@ -2305,6 +2308,32 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
|
|||
case AArch64::LD1RW_D_IMM:
|
||||
case AArch64::LD1RSW_IMM:
|
||||
case AArch64::LD1RD_IMM:
|
||||
|
||||
case AArch64::LDNT1B_ZRI:
|
||||
case AArch64::LDNT1H_ZRI:
|
||||
case AArch64::LDNT1W_ZRI:
|
||||
case AArch64::LDNT1D_ZRI:
|
||||
case AArch64::STNT1B_ZRI:
|
||||
case AArch64::STNT1H_ZRI:
|
||||
case AArch64::STNT1W_ZRI:
|
||||
case AArch64::STNT1D_ZRI:
|
||||
|
||||
case AArch64::LDNF1B_IMM:
|
||||
case AArch64::LDNF1B_H_IMM:
|
||||
case AArch64::LDNF1B_S_IMM:
|
||||
case AArch64::LDNF1B_D_IMM:
|
||||
case AArch64::LDNF1SB_H_IMM:
|
||||
case AArch64::LDNF1SB_S_IMM:
|
||||
case AArch64::LDNF1SB_D_IMM:
|
||||
case AArch64::LDNF1H_IMM:
|
||||
case AArch64::LDNF1H_S_IMM:
|
||||
case AArch64::LDNF1H_D_IMM:
|
||||
case AArch64::LDNF1SH_S_IMM:
|
||||
case AArch64::LDNF1SH_D_IMM:
|
||||
case AArch64::LDNF1W_IMM:
|
||||
case AArch64::LDNF1W_D_IMM:
|
||||
case AArch64::LDNF1SW_D_IMM:
|
||||
case AArch64::LDNF1D_IMM:
|
||||
return 3;
|
||||
case AArch64::ADDG:
|
||||
case AArch64::STGOffset:
|
||||
|
@ -2855,10 +2884,22 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
|
|||
case AArch64::LD1H_IMM:
|
||||
case AArch64::LD1W_IMM:
|
||||
case AArch64::LD1D_IMM:
|
||||
case AArch64::LDNT1B_ZRI:
|
||||
case AArch64::LDNT1H_ZRI:
|
||||
case AArch64::LDNT1W_ZRI:
|
||||
case AArch64::LDNT1D_ZRI:
|
||||
case AArch64::ST1B_IMM:
|
||||
case AArch64::ST1H_IMM:
|
||||
case AArch64::ST1W_IMM:
|
||||
case AArch64::ST1D_IMM:
|
||||
case AArch64::STNT1B_ZRI:
|
||||
case AArch64::STNT1H_ZRI:
|
||||
case AArch64::STNT1W_ZRI:
|
||||
case AArch64::STNT1D_ZRI:
|
||||
case AArch64::LDNF1B_IMM:
|
||||
case AArch64::LDNF1H_IMM:
|
||||
case AArch64::LDNF1W_IMM:
|
||||
case AArch64::LDNF1D_IMM:
|
||||
// A full vectors worth of data
|
||||
// Width = mbytes * elements
|
||||
Scale = TypeSize::Scalable(16);
|
||||
|
@ -2875,6 +2916,12 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
|
|||
case AArch64::ST1B_H_IMM:
|
||||
case AArch64::ST1H_S_IMM:
|
||||
case AArch64::ST1W_D_IMM:
|
||||
case AArch64::LDNF1B_H_IMM:
|
||||
case AArch64::LDNF1SB_H_IMM:
|
||||
case AArch64::LDNF1H_S_IMM:
|
||||
case AArch64::LDNF1SH_S_IMM:
|
||||
case AArch64::LDNF1W_D_IMM:
|
||||
case AArch64::LDNF1SW_D_IMM:
|
||||
// A half vector worth of data
|
||||
// Width = mbytes * elements
|
||||
Scale = TypeSize::Scalable(8);
|
||||
|
@ -2888,6 +2935,10 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
|
|||
case AArch64::LD1SH_D_IMM:
|
||||
case AArch64::ST1B_S_IMM:
|
||||
case AArch64::ST1H_D_IMM:
|
||||
case AArch64::LDNF1B_S_IMM:
|
||||
case AArch64::LDNF1SB_S_IMM:
|
||||
case AArch64::LDNF1H_D_IMM:
|
||||
case AArch64::LDNF1SH_D_IMM:
|
||||
// A quarter vector worth of data
|
||||
// Width = mbytes * elements
|
||||
Scale = TypeSize::Scalable(4);
|
||||
|
@ -2898,6 +2949,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
|
|||
case AArch64::LD1B_D_IMM:
|
||||
case AArch64::LD1SB_D_IMM:
|
||||
case AArch64::ST1B_D_IMM:
|
||||
case AArch64::LDNF1B_D_IMM:
|
||||
case AArch64::LDNF1SB_D_IMM:
|
||||
// A eighth vector worth of data
|
||||
// Width = mbytes * elements
|
||||
Scale = TypeSize::Scalable(2);
|
||||
|
|
|
@ -2200,10 +2200,6 @@ let Predicates = [HasSVEorStreamingSVE] in {
|
|||
def _imm : Pat<(Store (Ty ZPR:$val), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset)),
|
||||
(RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
|
||||
}
|
||||
let AddedComplexity = 3 in {
|
||||
def _fi : Pat<(Store (Ty ZPR:$val), (am_sve_fi GPR64sp:$base, simm4s1:$offset)),
|
||||
(RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
|
||||
}
|
||||
|
||||
def : Pat<(Store (Ty ZPR:$val), GPR64:$base),
|
||||
(RegImmInst ZPR:$val, (PTrue 31), GPR64:$base, (i64 0))>;
|
||||
|
@ -2240,10 +2236,6 @@ let Predicates = [HasSVEorStreamingSVE] in {
|
|||
def _imm: Pat<(Ty (Load (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset))),
|
||||
(RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
|
||||
}
|
||||
let AddedComplexity = 3 in {
|
||||
def _fi : Pat<(Ty (Load (am_sve_fi GPR64sp:$base, simm4s1:$offset))),
|
||||
(RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
|
||||
}
|
||||
|
||||
def : Pat<(Ty (Load GPR64:$base)),
|
||||
(RegImmInst (PTrue 31), GPR64:$base, (i64 0))>;
|
||||
|
|
|
@ -40,14 +40,14 @@ define <vscale x 16 x i8> @splice_nxv16i8_clamped_idx(<vscale x 16 x i8> %a, <vs
|
|||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: mov x8, #-1
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: mov w10, #256
|
||||
; CHECK-NEXT: mov w9, #256
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
|
||||
; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: st1b { z1.b }, p0, [x9, #1, mul vl]
|
||||
; CHECK-NEXT: cmp x8, #256
|
||||
; CHECK-NEXT: csel x8, x8, x10, lo
|
||||
; CHECK-NEXT: csel x8, x8, x9, lo
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x9, x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
|
@ -81,14 +81,14 @@ define <vscale x 8 x i16> @splice_nxv8i16_clamped_idx(<vscale x 8 x i16> %a, <vs
|
|||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: cnth x8
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: mov w9, #128
|
||||
; CHECK-NEXT: sub x8, x8, #1
|
||||
; CHECK-NEXT: mov w10, #128
|
||||
; CHECK-NEXT: cmp x8, #128
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: csel x8, x8, x10, lo
|
||||
; CHECK-NEXT: cmp x8, #128
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x9, #1, mul vl]
|
||||
; CHECK-NEXT: csel x8, x8, x9, lo
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x9, x8, lsl #1]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
|
@ -122,14 +122,14 @@ define <vscale x 4 x i32> @splice_nxv4i32_clamped_idx(<vscale x 4 x i32> %a, <vs
|
|||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: cntw x8
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: mov w9, #64
|
||||
; CHECK-NEXT: sub x8, x8, #1
|
||||
; CHECK-NEXT: mov w10, #64
|
||||
; CHECK-NEXT: cmp x8, #64
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: csel x8, x8, x10, lo
|
||||
; CHECK-NEXT: cmp x8, #64
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x9, #1, mul vl]
|
||||
; CHECK-NEXT: csel x8, x8, x9, lo
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
|
@ -163,14 +163,14 @@ define <vscale x 2 x i64> @splice_nxv2i64_clamped_idx(<vscale x 2 x i64> %a, <vs
|
|||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: cntd x8
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: mov w9, #32
|
||||
; CHECK-NEXT: sub x8, x8, #1
|
||||
; CHECK-NEXT: mov w10, #32
|
||||
; CHECK-NEXT: cmp x8, #32
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: csel x8, x8, x10, lo
|
||||
; CHECK-NEXT: cmp x8, #32
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x9, #1, mul vl]
|
||||
; CHECK-NEXT: csel x8, x8, x9, lo
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
|
@ -200,7 +200,7 @@ define <vscale x 2 x half> @splice_nxv2f16_neg2_idx(<vscale x 2 x half> %a, <vsc
|
|||
; CHECK-NEXT: mov x9, #-8
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
|
@ -237,15 +237,15 @@ define <vscale x 2 x half> @splice_nxv2f16_clamped_idx(<vscale x 2 x half> %a, <
|
|||
; CHECK-NEXT: cntd x8
|
||||
; CHECK-NEXT: mov w9, #32
|
||||
; CHECK-NEXT: sub x8, x8, #1
|
||||
; CHECK-NEXT: mov x10, sp
|
||||
; CHECK-NEXT: cmp x8, #32
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: csel x8, x8, x9, lo
|
||||
; CHECK-NEXT: cmp x8, #32
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: csel x8, x8, x9, lo
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: lsl x8, x8, #3
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x10, #1, mul vl]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x10, x8]
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x9, x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -274,7 +274,7 @@ define <vscale x 4 x half> @splice_nxv4f16_neg3_idx(<vscale x 4 x half> %a, <vsc
|
|||
; CHECK-NEXT: mov x9, #-6
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
|
@ -311,15 +311,15 @@ define <vscale x 4 x half> @splice_nxv4f16_clamped_idx(<vscale x 4 x half> %a, <
|
|||
; CHECK-NEXT: cntw x8
|
||||
; CHECK-NEXT: mov w9, #64
|
||||
; CHECK-NEXT: sub x8, x8, #1
|
||||
; CHECK-NEXT: mov x10, sp
|
||||
; CHECK-NEXT: cmp x8, #64
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: csel x8, x8, x9, lo
|
||||
; CHECK-NEXT: cmp x8, #64
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: csel x8, x8, x9, lo
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: lsl x8, x8, #2
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x10, #1, mul vl]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x10, x8]
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x9, x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -352,14 +352,14 @@ define <vscale x 8 x half> @splice_nxv8f16_clamped_idx(<vscale x 8 x half> %a, <
|
|||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: cnth x8
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: mov w9, #128
|
||||
; CHECK-NEXT: sub x8, x8, #1
|
||||
; CHECK-NEXT: mov w10, #128
|
||||
; CHECK-NEXT: cmp x8, #128
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: csel x8, x8, x10, lo
|
||||
; CHECK-NEXT: cmp x8, #128
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x9, #1, mul vl]
|
||||
; CHECK-NEXT: csel x8, x8, x9, lo
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x9, x8, lsl #1]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
|
@ -389,7 +389,7 @@ define <vscale x 2 x float> @splice_nxv2f32_neg2_idx(<vscale x 2 x float> %a, <v
|
|||
; CHECK-NEXT: mov x9, #-4
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
|
@ -426,15 +426,15 @@ define <vscale x 2 x float> @splice_nxv2f32_clamped_idx(<vscale x 2 x float> %a,
|
|||
; CHECK-NEXT: cntd x8
|
||||
; CHECK-NEXT: mov w9, #32
|
||||
; CHECK-NEXT: sub x8, x8, #1
|
||||
; CHECK-NEXT: mov x10, sp
|
||||
; CHECK-NEXT: cmp x8, #32
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: csel x8, x8, x9, lo
|
||||
; CHECK-NEXT: cmp x8, #32
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: csel x8, x8, x9, lo
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: lsl x8, x8, #3
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x10, #1, mul vl]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x10, x8]
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x9, x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -467,14 +467,14 @@ define <vscale x 4 x float> @splice_nxv4f32_clamped_idx(<vscale x 4 x float> %a,
|
|||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: cntw x8
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: mov w9, #64
|
||||
; CHECK-NEXT: sub x8, x8, #1
|
||||
; CHECK-NEXT: mov w10, #64
|
||||
; CHECK-NEXT: cmp x8, #64
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: csel x8, x8, x10, lo
|
||||
; CHECK-NEXT: cmp x8, #64
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x9, #1, mul vl]
|
||||
; CHECK-NEXT: csel x8, x8, x9, lo
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
|
@ -508,14 +508,14 @@ define <vscale x 2 x double> @splice_nxv2f64_clamped_idx(<vscale x 2 x double> %
|
|||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: cntd x8
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: mov w9, #32
|
||||
; CHECK-NEXT: sub x8, x8, #1
|
||||
; CHECK-NEXT: mov w10, #32
|
||||
; CHECK-NEXT: cmp x8, #32
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: csel x8, x8, x10, lo
|
||||
; CHECK-NEXT: cmp x8, #32
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x9, #1, mul vl]
|
||||
; CHECK-NEXT: csel x8, x8, x9, lo
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
|
@ -602,13 +602,13 @@ define <vscale x 8 x i32> @splice_nxv8i32_idx(<vscale x 8 x i32> %a, <vscale x 8
|
|||
; CHECK-NEXT: addvl sp, sp, #-4
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: orr x9, x8, #0x8
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: orr x8, x8, #0x8
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z3.s }, p0, [x8, #3, mul vl]
|
||||
; CHECK-NEXT: st1w { z2.s }, p0, [x8, #2, mul vl]
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9]
|
||||
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x9, #1, mul vl]
|
||||
; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl]
|
||||
; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
|
||||
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #4
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -623,21 +623,21 @@ define <vscale x 16 x float> @splice_nxv16f32_clamped_idx(<vscale x 16 x float>
|
|||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-8
|
||||
; CHECK-NEXT: mov x8, #-1
|
||||
; CHECK-NEXT: mov w10, #16
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: mov w9, #16
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl]
|
||||
; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: cmp x8, #16
|
||||
; CHECK-NEXT: st1w { z3.s }, p0, [x9, #3, mul vl]
|
||||
; CHECK-NEXT: csel x8, x8, x10, lo
|
||||
; CHECK-NEXT: st1w { z2.s }, p0, [x9, #2, mul vl]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x9, #1, mul vl]
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: csel x8, x8, x9, lo
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: st1w { z7.s }, p0, [sp, #7, mul vl]
|
||||
; CHECK-NEXT: st1w { z4.s }, p0, [sp, #4, mul vl]
|
||||
; CHECK-NEXT: add x10, x9, x8, lsl #2
|
||||
; CHECK-NEXT: st1w { z7.s }, p0, [x9, #7, mul vl]
|
||||
; CHECK-NEXT: st1w { z4.s }, p0, [x9, #4, mul vl]
|
||||
; CHECK-NEXT: st1w { z5.s }, p0, [x9, #5, mul vl]
|
||||
; CHECK-NEXT: st1w { z6.s }, p0, [x9, #6, mul vl]
|
||||
; CHECK-NEXT: st1w { z5.s }, p0, [sp, #5, mul vl]
|
||||
; CHECK-NEXT: st1w { z6.s }, p0, [sp, #6, mul vl]
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2]
|
||||
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x10, #1, mul vl]
|
||||
; CHECK-NEXT: ld1w { z2.s }, p0/z, [x10, #2, mul vl]
|
||||
|
@ -662,7 +662,7 @@ define <vscale x 16 x i8> @splice_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x
|
|||
; CHECK-NEXT: mov x9, #-16
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
|
||||
; CHECK-NEXT: st1b { z1.b }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, x9]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
|
@ -696,11 +696,11 @@ define <vscale x 16 x i8> @splice_nxv16i8_clamped(<vscale x 16 x i8> %a, <vscale
|
|||
; CHECK-NEXT: mov w10, #17
|
||||
; CHECK-NEXT: csel x9, x9, x10, lo
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: addvl x10, x8, #1
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
|
||||
; CHECK-NEXT: sub x9, x10, x9
|
||||
; CHECK-NEXT: st1b { z1.b }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x9]
|
||||
; CHECK-NEXT: sub x8, x8, x9
|
||||
; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -717,7 +717,7 @@ define <vscale x 8 x i16> @splice_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i
|
|||
; CHECK-NEXT: mov x9, #-8
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
|
@ -751,11 +751,11 @@ define <vscale x 8 x i16> @splice_nxv8i16_clamped(<vscale x 8 x i16> %a, <vscale
|
|||
; CHECK-NEXT: mov w10, #18
|
||||
; CHECK-NEXT: csel x9, x9, x10, lo
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: addvl x10, x8, #1
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: sub x9, x10, x9
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x9]
|
||||
; CHECK-NEXT: sub x8, x8, x9
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -772,7 +772,7 @@ define <vscale x 4 x i32> @splice_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i
|
|||
; CHECK-NEXT: mov x9, #-4
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
|
@ -806,11 +806,11 @@ define <vscale x 4 x i32> @splice_nxv4i32_clamped(<vscale x 4 x i32> %a, <vscale
|
|||
; CHECK-NEXT: mov w10, #20
|
||||
; CHECK-NEXT: csel x9, x9, x10, lo
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: addvl x10, x8, #1
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: sub x9, x10, x9
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9]
|
||||
; CHECK-NEXT: sub x8, x8, x9
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -827,7 +827,7 @@ define <vscale x 2 x i64> @splice_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i
|
|||
; CHECK-NEXT: mov x9, #-2
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
|
@ -861,11 +861,11 @@ define <vscale x 2 x i64> @splice_nxv2i64_clamped(<vscale x 2 x i64> %a, <vscale
|
|||
; CHECK-NEXT: mov w10, #24
|
||||
; CHECK-NEXT: csel x9, x9, x10, lo
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: addvl x10, x8, #1
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: sub x9, x10, x9
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9]
|
||||
; CHECK-NEXT: sub x8, x8, x9
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -882,7 +882,7 @@ define <vscale x 8 x half> @splice_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x
|
|||
; CHECK-NEXT: mov x9, #-8
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
|
@ -916,11 +916,11 @@ define <vscale x 8 x half> @splice_nxv8f16_clamped(<vscale x 8 x half> %a, <vsca
|
|||
; CHECK-NEXT: mov w10, #18
|
||||
; CHECK-NEXT: csel x9, x9, x10, lo
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: addvl x10, x8, #1
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: sub x9, x10, x9
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x9]
|
||||
; CHECK-NEXT: sub x8, x8, x9
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -937,7 +937,7 @@ define <vscale x 4 x float> @splice_nxv4f32(<vscale x 4 x float> %a, <vscale x 4
|
|||
; CHECK-NEXT: mov x9, #-4
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
|
@ -971,11 +971,11 @@ define <vscale x 4 x float> @splice_nxv4f32_clamped(<vscale x 4 x float> %a, <vs
|
|||
; CHECK-NEXT: mov w10, #20
|
||||
; CHECK-NEXT: csel x9, x9, x10, lo
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: addvl x10, x8, #1
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: sub x9, x10, x9
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9]
|
||||
; CHECK-NEXT: sub x8, x8, x9
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -992,7 +992,7 @@ define <vscale x 2 x double> @splice_nxv2f64(<vscale x 2 x double> %a, <vscale x
|
|||
; CHECK-NEXT: mov x9, #-2
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
|
@ -1026,11 +1026,11 @@ define <vscale x 2 x double> @splice_nxv2f64_clamped(<vscale x 2 x double> %a, <
|
|||
; CHECK-NEXT: mov w10, #24
|
||||
; CHECK-NEXT: csel x9, x9, x10, lo
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: addvl x10, x8, #1
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: sub x9, x10, x9
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9]
|
||||
; CHECK-NEXT: sub x8, x8, x9
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -1112,7 +1112,7 @@ define <vscale x 2 x i8> @splice_nxv2i8(<vscale x 2 x i8> %a, <vscale x 2 x i8>
|
|||
; CHECK-NEXT: mov x9, #-2
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
|
@ -1129,16 +1129,16 @@ define <vscale x 8 x i32> @splice_nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i
|
|||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-4
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: mov x10, #-8
|
||||
; CHECK-NEXT: mov x9, #-8
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: addvl x9, x8, #2
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: sub x11, x9, #32
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z3.s }, p0, [x8, #3, mul vl]
|
||||
; CHECK-NEXT: st1w { z2.s }, p0, [x8, #2, mul vl]
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x10, lsl #2]
|
||||
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x11, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #2
|
||||
; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl]
|
||||
; CHECK-NEXT: sub x10, x8, #32
|
||||
; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
|
||||
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x10, #1, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #4
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -1152,26 +1152,26 @@ define <vscale x 16 x float> @splice_nxv16f32_clamped(<vscale x 16 x float> %a,
|
|||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-8
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: rdvl x9, #4
|
||||
; CHECK-NEXT: cmp x9, #68
|
||||
; CHECK-NEXT: mov w10, #68
|
||||
; CHECK-NEXT: csel x9, x9, x10, lo
|
||||
; CHECK-NEXT: mov x10, sp
|
||||
; CHECK-NEXT: rdvl x8, #4
|
||||
; CHECK-NEXT: cmp x8, #68
|
||||
; CHECK-NEXT: mov w9, #68
|
||||
; CHECK-NEXT: csel x8, x8, x9, lo
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: addvl x10, x8, #4
|
||||
; CHECK-NEXT: st1w { z3.s }, p0, [x8, #3, mul vl]
|
||||
; CHECK-NEXT: sub x9, x10, x9
|
||||
; CHECK-NEXT: st1w { z2.s }, p0, [x8, #2, mul vl]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: addvl x9, x10, #4
|
||||
; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl]
|
||||
; CHECK-NEXT: sub x8, x9, x8
|
||||
; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z7.s }, p0, [x8, #7, mul vl]
|
||||
; CHECK-NEXT: st1w { z4.s }, p0, [x8, #4, mul vl]
|
||||
; CHECK-NEXT: st1w { z5.s }, p0, [x8, #5, mul vl]
|
||||
; CHECK-NEXT: st1w { z6.s }, p0, [x8, #6, mul vl]
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9]
|
||||
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x9, #1, mul vl]
|
||||
; CHECK-NEXT: ld1w { z2.s }, p0/z, [x9, #2, mul vl]
|
||||
; CHECK-NEXT: ld1w { z3.s }, p0/z, [x9, #3, mul vl]
|
||||
; CHECK-NEXT: st1w { z7.s }, p0, [sp, #7, mul vl]
|
||||
; CHECK-NEXT: st1w { z4.s }, p0, [sp, #4, mul vl]
|
||||
; CHECK-NEXT: st1w { z5.s }, p0, [sp, #5, mul vl]
|
||||
; CHECK-NEXT: st1w { z6.s }, p0, [sp, #6, mul vl]
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8]
|
||||
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: ld1w { z2.s }, p0/z, [x8, #2, mul vl]
|
||||
; CHECK-NEXT: ld1w { z3.s }, p0/z, [x8, #3, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #8
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
|
|
@ -12,17 +12,16 @@ define float @foo1(double* %x0, double* %x1, double* %x2) nounwind {
|
|||
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-4
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: fmov s0, #1.00000000
|
||||
; CHECK-NEXT: ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0]
|
||||
; CHECK-NEXT: ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1]
|
||||
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x2]
|
||||
; CHECK-NEXT: fmov s0, #1.00000000
|
||||
; CHECK-NEXT: mov x0, sp
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: st1d { z16.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl]
|
||||
; CHECK-NEXT: st1d { z19.d }, p0, [x8, #3, mul vl]
|
||||
; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: st1d { z18.d }, p0, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: st1d { z19.d }, p0, [sp, #3, mul vl]
|
||||
; CHECK-NEXT: bl callee1
|
||||
; CHECK-NEXT: addvl sp, sp, #4
|
||||
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
|
||||
|
@ -44,24 +43,27 @@ define float @foo2(double* %x0, double* %x1) nounwind {
|
|||
; CHECK-NEXT: addvl sp, sp, #-4
|
||||
; CHECK-NEXT: sub sp, sp, #16
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: add x8, sp, #16
|
||||
; CHECK-NEXT: add x9, sp, #16
|
||||
; CHECK-NEXT: ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0]
|
||||
; CHECK-NEXT: ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1]
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: add x8, sp, #16
|
||||
; CHECK-NEXT: fmov s0, #1.00000000
|
||||
; CHECK-NEXT: mov w0, wzr
|
||||
; CHECK-NEXT: mov w1, #1
|
||||
; CHECK-NEXT: mov w2, #2
|
||||
; CHECK-NEXT: st1d { z16.d }, p0, [x9]
|
||||
; CHECK-NEXT: add x9, sp, #16
|
||||
; CHECK-NEXT: mov w3, #3
|
||||
; CHECK-NEXT: mov w4, #4
|
||||
; CHECK-NEXT: mov w5, #5
|
||||
; CHECK-NEXT: mov w6, #6
|
||||
; CHECK-NEXT: mov w7, #7
|
||||
; CHECK-NEXT: st1d { z17.d }, p0, [x9, #1, mul vl]
|
||||
; CHECK-NEXT: add x9, sp, #16
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: st1d { z16.d }, p0, [x9]
|
||||
; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl]
|
||||
; CHECK-NEXT: st1d { z19.d }, p0, [x8, #3, mul vl]
|
||||
; CHECK-NEXT: mov w7, #7
|
||||
; CHECK-NEXT: st1d { z18.d }, p0, [x9, #2, mul vl]
|
||||
; CHECK-NEXT: add x9, sp, #16
|
||||
; CHECK-NEXT: st1d { z19.d }, p0, [x9, #3, mul vl]
|
||||
; CHECK-NEXT: str x8, [sp]
|
||||
; CHECK-NEXT: bl callee2
|
||||
; CHECK-NEXT: addvl sp, sp, #4
|
||||
|
@ -83,17 +85,16 @@ define float @foo3(double* %x0, double* %x1, double* %x2) nounwind {
|
|||
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-3
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: fmov s0, #1.00000000
|
||||
; CHECK-NEXT: ld4d { z2.d, z3.d, z4.d, z5.d }, p0/z, [x0]
|
||||
; CHECK-NEXT: ld3d { z16.d, z17.d, z18.d }, p0/z, [x1]
|
||||
; CHECK-NEXT: ld1d { z6.d }, p0/z, [x2]
|
||||
; CHECK-NEXT: fmov s0, #1.00000000
|
||||
; CHECK-NEXT: fmov s1, #2.00000000
|
||||
; CHECK-NEXT: mov x0, sp
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: st1d { z16.d }, p0, [sp]
|
||||
; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl]
|
||||
; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: st1d { z18.d }, p0, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: bl callee3
|
||||
; CHECK-NEXT: addvl sp, sp, #3
|
||||
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -939,7 +939,6 @@ define void @shuffle_ext_invalid(<4 x double>* %a, <4 x double>* %b) #0 {
|
|||
; CHECK-NEXT: .cfi_offset w30, -8
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: ptrue p0.d, vl4
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
|
||||
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
|
||||
; CHECK-NEXT: mov z2.d, z1.d[1]
|
||||
|
@ -947,7 +946,7 @@ define void @shuffle_ext_invalid(<4 x double>* %a, <4 x double>* %b) #0 {
|
|||
; CHECK-NEXT: mov z1.d, z0.d[3]
|
||||
; CHECK-NEXT: mov z0.d, z0.d[2]
|
||||
; CHECK-NEXT: stp d0, d1, [sp]
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8]
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||
; CHECK-NEXT: mov sp, x29
|
||||
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
|
||||
|
|
|
@ -506,18 +506,18 @@ define <vscale x 32 x i1> @test_predicate_insert_32xi1(<vscale x 32 x i1> %val,
|
|||
; CHECK-NEXT: mov x8, #-1
|
||||
; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1
|
||||
; CHECK-NEXT: sxtw x9, w1
|
||||
; CHECK-NEXT: mov x10, sp
|
||||
; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1
|
||||
; CHECK-NEXT: ptrue p1.b
|
||||
; CHECK-NEXT: addvl x8, x8, #2
|
||||
; CHECK-NEXT: cmp x9, x8
|
||||
; CHECK-NEXT: st1b { z0.b }, p1, [x10, #1, mul vl]
|
||||
; CHECK-NEXT: csel x8, x9, x8, lo
|
||||
; CHECK-NEXT: st1b { z0.b }, p1, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
|
||||
; CHECK-NEXT: addvl x8, x8, #2
|
||||
; CHECK-NEXT: st1b { z0.b }, p1, [sp]
|
||||
; CHECK-NEXT: strb w0, [x10, x8]
|
||||
; CHECK-NEXT: cmp x9, x8
|
||||
; CHECK-NEXT: csel x8, x9, x8, lo
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: strb w0, [x9, x8]
|
||||
; CHECK-NEXT: ld1b { z0.b }, p1/z, [sp]
|
||||
; CHECK-NEXT: ld1b { z1.b }, p1/z, [x10, #1, mul vl]
|
||||
; CHECK-NEXT: ld1b { z1.b }, p1/z, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: and z0.b, z0.b, #0x1
|
||||
; CHECK-NEXT: and z1.b, z1.b, #0x1
|
||||
; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0
|
||||
|
|
|
@ -213,19 +213,18 @@ define void @insert_v2i64_nxv16i64(<2 x i64> %sv0, <2 x i64> %sv1, <vscale x 16
|
|||
; CHECK-NEXT: addvl sp, sp, #-4
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: str q1, [sp, #32]
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x8, #2, mul vl]
|
||||
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x8, #3, mul vl]
|
||||
; CHECK-NEXT: ld1d { z3.d }, p0/z, [sp]
|
||||
; CHECK-NEXT: st1d { z2.d }, p0, [x0, #3, mul vl]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x0, #2, mul vl]
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x0, #1, mul vl]
|
||||
; CHECK-NEXT: st1d { z3.d }, p0, [x0]
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
|
||||
; CHECK-NEXT: ld1d { z1.d }, p0/z, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ld1d { z2.d }, p0/z, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: ld1d { z3.d }, p0/z, [sp, #3, mul vl]
|
||||
; CHECK-NEXT: st1d { z3.d }, p0, [x0, #3, mul vl]
|
||||
; CHECK-NEXT: st1d { z2.d }, p0, [x0, #2, mul vl]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x0, #1, mul vl]
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
|
||||
; CHECK-NEXT: addvl sp, sp, #4
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -256,13 +255,12 @@ define void @insert_v2i64_nxv16i64_lo2(<2 x i64>* %psv, <vscale x 16 x i64>* %ou
|
|||
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: ldr q0, [x0]
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: str q0, [sp, #16]
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: ld1d { z1.d }, p0/z, [sp]
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x1, #1, mul vl]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x1]
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
|
||||
; CHECK-NEXT: ld1d { z1.d }, p0/z, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x1, #1, mul vl]
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [x1]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -319,11 +317,10 @@ define <vscale x 8 x half> @insert_nxv8f16_nxv2f16(<vscale x 8 x half> %vec, <vs
|
|||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: st1h { z1.d }, p1, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: st1h { z1.d }, p1, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp]
|
||||
; CHECK-NEXT: addvl sp, sp, #1
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
|
@ -446,15 +443,14 @@ define <vscale x 6 x i32> @insert_nxv6i32_nxv2i32(<vscale x 2 x i32> %sv0, <vsc
|
|||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: uunpklo z2.d, z0.s
|
||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: ptrue p1.s
|
||||
; CHECK-NEXT: st1w { z2.d }, p0, [x8, #2, mul vl]
|
||||
; CHECK-NEXT: st1w { z0.s }, p1, [sp]
|
||||
; CHECK-NEXT: ld1w { z1.s }, p1/z, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: ld1w { z0.s }, p1/z, [sp]
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: uunpklo z1.d, z0.s
|
||||
; CHECK-NEXT: ptrue p1.d
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: st1w { z1.d }, p1, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp]
|
||||
; CHECK-NEXT: ld1w { z1.s }, p0/z, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
|
|
@ -0,0 +1,277 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -run-pass=prologepilog -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s
|
||||
#
|
||||
# Test that prologepilog works for each of the LDNF1 instructions for stack-based objects.
|
||||
#
|
||||
--- |
|
||||
define void @testcase_positive_offset() {
|
||||
%dummy = alloca <vscale x 2 x i64>, align 8
|
||||
%object = alloca <vscale x 2 x i64>, align 8
|
||||
; Reads from %object at offset 63 * readsize
|
||||
ret void
|
||||
}
|
||||
define void @testcase_negative_offset() {
|
||||
%dummy = alloca <vscale x 2 x i64>, align 8
|
||||
%object = alloca <vscale x 2 x i64>, align 8
|
||||
; Reads from %object at offset 63 * readsize
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @testcase_positive_offset_out_of_range() {
|
||||
%dummy = alloca <vscale x 2 x i64>, align 8
|
||||
%object = alloca <vscale x 2 x i64>, align 8
|
||||
; Reads from %object at offset 64 * readsize
|
||||
ret void
|
||||
}
|
||||
define void @testcase_negative_offset_out_of_range() {
|
||||
%dummy = alloca <vscale x 2 x i64>, align 8
|
||||
%object = alloca <vscale x 2 x i64>, align 8
|
||||
; Reads from %object at offset -1 * readsize
|
||||
ret void
|
||||
}
|
||||
...
|
||||
---
|
||||
name: testcase_positive_offset
|
||||
tracksRegLiveness: true
|
||||
stack:
|
||||
- { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
|
||||
- { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: $p0
|
||||
|
||||
; CHECK-LABEL: name: testcase_positive_offset
|
||||
; CHECK: liveins: $p0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2)
|
||||
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4
|
||||
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
||||
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1B_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1B_H_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1B_S_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1B_D_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SB_H_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SB_S_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SB_D_IMM renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1H_IMM renamable $p0, $sp, 7 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1H_S_IMM renamable $p0, $sp, 7 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1H_D_IMM renamable $p0, $sp, 7 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SH_S_IMM renamable $p0, $sp, 7 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SH_D_IMM renamable $p0, $sp, 7 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1W_IMM renamable $p0, $sp, 7 :: (load (s32) from %ir.object, align 8)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1W_D_IMM renamable $p0, $sp, 7 :: (load (s32) from %ir.object, align 8)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SW_D_IMM renamable $p0, $sp, 7 :: (load (s32) from %ir.object, align 8)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1D_IMM renamable $p0, $sp, 7 :: (load (s64) from %ir.object)
|
||||
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4
|
||||
; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $z0
|
||||
renamable $z0 = LDNF1B_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1B_H_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1B_S_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1B_D_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1SB_H_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1SB_S_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1SB_D_IMM renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1H_IMM renamable $p0, %stack.1.object, 7 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1H_S_IMM renamable $p0, %stack.1.object, 7 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1H_D_IMM renamable $p0, %stack.1.object, 7 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1SH_S_IMM renamable $p0, %stack.1.object, 7 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1SH_D_IMM renamable $p0, %stack.1.object, 7 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1W_IMM renamable $p0, %stack.1.object, 7 :: (load 4 from %ir.object, align 8)
|
||||
renamable $z0 = LDNF1W_D_IMM renamable $p0, %stack.1.object, 7 :: (load 4 from %ir.object, align 8)
|
||||
renamable $z0 = LDNF1SW_D_IMM renamable $p0, %stack.1.object, 7 :: (load 4 from %ir.object, align 8)
|
||||
renamable $z0 = LDNF1D_IMM renamable $p0, %stack.1.object, 7 :: (load 8 from %ir.object, align 8)
|
||||
RET_ReallyLR implicit $z0
|
||||
...
|
||||
|
||||
---
|
||||
name: testcase_negative_offset
|
||||
tracksRegLiveness: true
|
||||
stack:
|
||||
- { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
|
||||
- { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: $p0
|
||||
|
||||
; CHECK-LABEL: name: testcase_negative_offset
|
||||
; CHECK: liveins: $p0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2)
|
||||
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4
|
||||
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
||||
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1B_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1B_H_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1B_S_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1B_D_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SB_H_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SB_S_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SB_D_IMM renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1H_IMM renamable $p0, $sp, -8 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1H_S_IMM renamable $p0, $sp, -8 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1H_D_IMM renamable $p0, $sp, -8 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SH_S_IMM renamable $p0, $sp, -8 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SH_D_IMM renamable $p0, $sp, -8 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1W_IMM renamable $p0, $sp, -8 :: (load (s32) from %ir.object, align 8)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1W_D_IMM renamable $p0, $sp, -8 :: (load (s32) from %ir.object, align 8)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SW_D_IMM renamable $p0, $sp, -8 :: (load (s32) from %ir.object, align 8)
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1D_IMM renamable $p0, $sp, -8 :: (load (s64) from %ir.object)
|
||||
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4
|
||||
; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $z0
|
||||
renamable $z0 = LDNF1B_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1B_H_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1B_S_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1B_D_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1SB_H_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1SB_S_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1SB_D_IMM renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1H_IMM renamable $p0, %stack.1.object, -8 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1H_S_IMM renamable $p0, %stack.1.object, -8 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1H_D_IMM renamable $p0, %stack.1.object, -8 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1SH_S_IMM renamable $p0, %stack.1.object, -8 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1SH_D_IMM renamable $p0, %stack.1.object, -8 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1W_IMM renamable $p0, %stack.1.object, -8 :: (load 4 from %ir.object, align 8)
|
||||
renamable $z0 = LDNF1W_D_IMM renamable $p0, %stack.1.object, -8 :: (load 4 from %ir.object, align 8)
|
||||
renamable $z0 = LDNF1SW_D_IMM renamable $p0, %stack.1.object, -8 :: (load 4 from %ir.object, align 8)
|
||||
renamable $z0 = LDNF1D_IMM renamable $p0, %stack.1.object, -8 :: (load 8 from %ir.object, align 8)
|
||||
RET_ReallyLR implicit $z0
|
||||
...
|
||||
|
||||
---
|
||||
name: testcase_positive_offset_out_of_range
|
||||
tracksRegLiveness: true
|
||||
stack:
|
||||
- { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
|
||||
- { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: $p0
|
||||
|
||||
; CHECK-LABEL: name: testcase_positive_offset_out_of_range
|
||||
; CHECK: liveins: $p0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2)
|
||||
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4
|
||||
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
||||
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
|
||||
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1B_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1B_H_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 2
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1B_S_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 1
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1B_D_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SB_H_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 2
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SB_S_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 1
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SB_D_IMM renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1H_IMM renamable $p0, killed $x8, 7 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1H_S_IMM renamable $p0, killed $x8, 7 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 2
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1H_D_IMM renamable $p0, killed $x8, 7 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SH_S_IMM renamable $p0, killed $x8, 7 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 2
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SH_D_IMM renamable $p0, killed $x8, 7 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1W_IMM renamable $p0, killed $x8, 7 :: (load (s32) from %ir.object, align 8)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1W_D_IMM renamable $p0, killed $x8, 7 :: (load (s32) from %ir.object, align 8)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, 4
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SW_D_IMM renamable $p0, killed $x8, 7 :: (load (s32) from %ir.object, align 8)
|
||||
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4
|
||||
; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $z0
|
||||
renamable $z0 = LDNF1B_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1B_H_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1B_S_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1B_D_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1SB_H_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1SB_S_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1SB_D_IMM renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1H_IMM renamable $p0, %stack.1.object, 8 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1H_S_IMM renamable $p0, %stack.1.object, 8 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1H_D_IMM renamable $p0, %stack.1.object, 8 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1SH_S_IMM renamable $p0, %stack.1.object, 8 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1SH_D_IMM renamable $p0, %stack.1.object, 8 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1W_IMM renamable $p0, %stack.1.object, 8 :: (load 4 from %ir.object, align 8)
|
||||
renamable $z0 = LDNF1W_D_IMM renamable $p0, %stack.1.object, 8 :: (load 4 from %ir.object, align 8)
|
||||
renamable $z0 = LDNF1SW_D_IMM renamable $p0, %stack.1.object, 8 :: (load 4 from %ir.object, align 8)
|
||||
RET_ReallyLR implicit $z0
|
||||
...
|
||||
|
||||
---
|
||||
name: testcase_negative_offset_out_of_range
|
||||
tracksRegLiveness: true
|
||||
stack:
|
||||
- { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
|
||||
- { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: $p0
|
||||
|
||||
; CHECK-LABEL: name: testcase_negative_offset_out_of_range
|
||||
; CHECK: liveins: $p0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2)
|
||||
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4
|
||||
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
||||
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
|
||||
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1B_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1B_H_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -2
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1B_S_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -1
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1B_D_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SB_H_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -2
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SB_S_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -1
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SB_D_IMM renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1H_IMM renamable $p0, killed $x8, -8 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1H_S_IMM renamable $p0, killed $x8, -8 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -2
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1H_D_IMM renamable $p0, killed $x8, -8 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SH_S_IMM renamable $p0, killed $x8, -8 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -2
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SH_D_IMM renamable $p0, killed $x8, -8 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1W_IMM renamable $p0, killed $x8, -8 :: (load (s32) from %ir.object, align 8)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1W_D_IMM renamable $p0, killed $x8, -8 :: (load (s32) from %ir.object, align 8)
|
||||
; CHECK-NEXT: $x8 = ADDPL_XXI $sp, -4
|
||||
; CHECK-NEXT: renamable $z0 = LDNF1SW_D_IMM renamable $p0, killed $x8, -8 :: (load (s32) from %ir.object, align 8)
|
||||
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4
|
||||
; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $z0
|
||||
renamable $z0 = LDNF1B_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1B_H_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1B_S_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1B_D_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1SB_H_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1SB_S_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1SB_D_IMM renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1H_IMM renamable $p0, %stack.1.object, -9 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1H_S_IMM renamable $p0, %stack.1.object, -9 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1H_D_IMM renamable $p0, %stack.1.object, -9 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1SH_S_IMM renamable $p0, %stack.1.object, -9 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1SH_D_IMM renamable $p0, %stack.1.object, -9 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNF1W_IMM renamable $p0, %stack.1.object, -9 :: (load 4 from %ir.object, align 8)
|
||||
renamable $z0 = LDNF1W_D_IMM renamable $p0, %stack.1.object, -9 :: (load 4 from %ir.object, align 8)
|
||||
renamable $z0 = LDNF1SW_D_IMM renamable $p0, %stack.1.object, -9 :: (load 4 from %ir.object, align 8)
|
||||
RET_ReallyLR implicit $z0
|
||||
...
|
|
@ -0,0 +1,203 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -run-pass=prologepilog -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s
|
||||
#
|
||||
# Test that prologepilog works for each of the LDNT1/STNT1 instructions for stack-based objects.
|
||||
#
|
||||
--- |
|
||||
define void @testcase_positive_offset() {
|
||||
%dummy = alloca <vscale x 2 x i64>, align 8
|
||||
%object = alloca <vscale x 2 x i64>, align 8
|
||||
; Reads from %object at offset 7 * readsize
|
||||
ret void
|
||||
}
|
||||
define void @testcase_negative_offset() {
|
||||
%dummy = alloca <vscale x 2 x i64>, align 8
|
||||
%object = alloca <vscale x 2 x i64>, align 8
|
||||
; Reads from %object at offset -8 * readsize
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @testcase_positive_offset_out_of_range() {
|
||||
%dummy = alloca <vscale x 2 x i64>, align 8
|
||||
%object = alloca <vscale x 2 x i64>, align 8
|
||||
; Reads from %object at offset 8 * readsize
|
||||
ret void
|
||||
}
|
||||
define void @testcase_negative_offset_out_of_range() {
|
||||
%dummy = alloca <vscale x 2 x i64>, align 8
|
||||
%object = alloca <vscale x 2 x i64>, align 8
|
||||
; Reads from %object at offset -9 * readsize
|
||||
ret void
|
||||
}
|
||||
...
|
||||
---
|
||||
name: testcase_positive_offset
|
||||
tracksRegLiveness: true
|
||||
stack:
|
||||
- { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
|
||||
- { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: $p0
|
||||
|
||||
; CHECK-LABEL: name: testcase_positive_offset
|
||||
; CHECK: liveins: $p0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2)
|
||||
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4
|
||||
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
||||
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
|
||||
; CHECK-NEXT: renamable $z0 = LDNT1B_ZRI renamable $p0, $sp, 7 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: renamable $z0 = LDNT1H_ZRI renamable $p0, $sp, 7 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: renamable $z0 = LDNT1W_ZRI renamable $p0, $sp, 7 :: (load (s32) from %ir.object, align 8)
|
||||
; CHECK-NEXT: renamable $z0 = LDNT1D_ZRI renamable $p0, $sp, 7 :: (load (s64) from %ir.object)
|
||||
; CHECK-NEXT: STNT1B_ZRI renamable $z0, renamable $p0, $sp, 7 :: (store (s8) into %ir.object, align 8)
|
||||
; CHECK-NEXT: STNT1H_ZRI renamable $z0, renamable $p0, $sp, 7 :: (store (s16) into %ir.object, align 8)
|
||||
; CHECK-NEXT: STNT1W_ZRI renamable $z0, renamable $p0, $sp, 7 :: (store (s32) into %ir.object, align 8)
|
||||
; CHECK-NEXT: STNT1D_ZRI renamable $z0, renamable $p0, $sp, 7 :: (store (s64) into %ir.object)
|
||||
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4
|
||||
; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $z0
|
||||
renamable $z0 = LDNT1B_ZRI renamable $p0, %stack.1.object, 7 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNT1H_ZRI renamable $p0, %stack.1.object, 7 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNT1W_ZRI renamable $p0, %stack.1.object, 7 :: (load 4 from %ir.object, align 8)
|
||||
renamable $z0 = LDNT1D_ZRI renamable $p0, %stack.1.object, 7 :: (load 8 from %ir.object, align 8)
|
||||
STNT1B_ZRI renamable $z0, renamable $p0, %stack.1.object, 7 :: (store 1 into %ir.object, align 8)
|
||||
STNT1H_ZRI renamable $z0, renamable $p0, %stack.1.object, 7 :: (store 2 into %ir.object, align 8)
|
||||
STNT1W_ZRI renamable $z0, renamable $p0, %stack.1.object, 7 :: (store 4 into %ir.object, align 8)
|
||||
STNT1D_ZRI renamable $z0, renamable $p0, %stack.1.object, 7 :: (store 8 into %ir.object, align 8)
|
||||
RET_ReallyLR implicit $z0
|
||||
...
|
||||
|
||||
---
|
||||
name: testcase_negative_offset
|
||||
tracksRegLiveness: true
|
||||
stack:
|
||||
- { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
|
||||
- { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: $p0
|
||||
|
||||
; CHECK-LABEL: name: testcase_negative_offset
|
||||
; CHECK: liveins: $p0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2)
|
||||
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4
|
||||
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
||||
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
|
||||
; CHECK-NEXT: renamable $z0 = LDNT1B_ZRI renamable $p0, $sp, -8 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: renamable $z0 = LDNT1H_ZRI renamable $p0, $sp, -8 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: renamable $z0 = LDNT1W_ZRI renamable $p0, $sp, -8 :: (load (s32) from %ir.object)
|
||||
; CHECK-NEXT: renamable $z0 = LDNT1D_ZRI renamable $p0, $sp, -8 :: (load (s64) from %ir.object)
|
||||
; CHECK-NEXT: STNT1B_ZRI renamable $z0, renamable $p0, $sp, -8 :: (store (s8) into %ir.object, align 8)
|
||||
; CHECK-NEXT: STNT1H_ZRI renamable $z0, renamable $p0, $sp, -8 :: (store (s16) into %ir.object, align 8)
|
||||
; CHECK-NEXT: STNT1W_ZRI renamable $z0, renamable $p0, $sp, -8 :: (store (s32) into %ir.object, align 8)
|
||||
; CHECK-NEXT: STNT1D_ZRI renamable $z0, renamable $p0, $sp, -8 :: (store (s64) into %ir.object)
|
||||
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4
|
||||
; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $z0
|
||||
renamable $z0 = LDNT1B_ZRI renamable $p0, %stack.1.object, -8 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNT1H_ZRI renamable $p0, %stack.1.object, -8 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNT1W_ZRI renamable $p0, %stack.1.object, -8 :: (load 4 from %ir.object, align 4)
|
||||
renamable $z0 = LDNT1D_ZRI renamable $p0, %stack.1.object, -8 :: (load 8 from %ir.object, align 8)
|
||||
STNT1B_ZRI renamable $z0, renamable $p0, %stack.1.object, -8 :: (store 1 into %ir.object, align 8)
|
||||
STNT1H_ZRI renamable $z0, renamable $p0, %stack.1.object, -8 :: (store 2 into %ir.object, align 8)
|
||||
STNT1W_ZRI renamable $z0, renamable $p0, %stack.1.object, -8 :: (store 4 into %ir.object, align 8)
|
||||
STNT1D_ZRI renamable $z0, renamable $p0, %stack.1.object, -8 :: (store 8 into %ir.object, align 8)
|
||||
RET_ReallyLR implicit $z0
|
||||
...
|
||||
|
||||
---
|
||||
name: testcase_positive_offset_out_of_range
|
||||
tracksRegLiveness: true
|
||||
stack:
|
||||
- { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
|
||||
- { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: $p0
|
||||
|
||||
; CHECK-LABEL: name: testcase_positive_offset_out_of_range
|
||||
; CHECK: liveins: $p0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2)
|
||||
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4
|
||||
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
||||
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
|
||||
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
|
||||
; CHECK-NEXT: renamable $z0 = LDNT1B_ZRI renamable $p0, killed $x8, 7 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
|
||||
; CHECK-NEXT: renamable $z0 = LDNT1H_ZRI renamable $p0, killed $x8, 7 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
|
||||
; CHECK-NEXT: renamable $z0 = LDNT1W_ZRI renamable $p0, killed $x8, 7 :: (load (s32) from %ir.object)
|
||||
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
|
||||
; CHECK-NEXT: renamable $z0 = LDNT1D_ZRI renamable $p0, killed $x8, 7 :: (load (s64) from %ir.object)
|
||||
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
|
||||
; CHECK-NEXT: STNT1B_ZRI renamable $z0, renamable $p0, killed $x8, 7 :: (store (s8) into %ir.object, align 8)
|
||||
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
|
||||
; CHECK-NEXT: STNT1H_ZRI renamable $z0, renamable $p0, killed $x8, 7 :: (store (s16) into %ir.object, align 8)
|
||||
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
|
||||
; CHECK-NEXT: STNT1W_ZRI renamable $z0, renamable $p0, killed $x8, 7 :: (store (s32) into %ir.object, align 8)
|
||||
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, 1
|
||||
; CHECK-NEXT: STNT1D_ZRI renamable $z0, renamable $p0, killed $x8, 7 :: (store (s64) into %ir.object)
|
||||
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4
|
||||
; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $z0
|
||||
renamable $z0 = LDNT1B_ZRI renamable $p0, %stack.1.object, 8 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNT1H_ZRI renamable $p0, %stack.1.object, 8 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNT1W_ZRI renamable $p0, %stack.1.object, 8 :: (load 4 from %ir.object, align 4)
|
||||
renamable $z0 = LDNT1D_ZRI renamable $p0, %stack.1.object, 8 :: (load 8 from %ir.object, align 8)
|
||||
STNT1B_ZRI renamable $z0, renamable $p0, %stack.1.object, 8 :: (store 1 into %ir.object, align 8)
|
||||
STNT1H_ZRI renamable $z0, renamable $p0, %stack.1.object, 8 :: (store 2 into %ir.object, align 8)
|
||||
STNT1W_ZRI renamable $z0, renamable $p0, %stack.1.object, 8 :: (store 4 into %ir.object, align 8)
|
||||
STNT1D_ZRI renamable $z0, renamable $p0, %stack.1.object, 8 :: (store 8 into %ir.object, align 8)
|
||||
RET_ReallyLR implicit $z0
|
||||
...
|
||||
|
||||
---
|
||||
name: testcase_negative_offset_out_of_range
|
||||
tracksRegLiveness: true
|
||||
stack:
|
||||
- { id: 0, name: dummy, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
|
||||
- { id: 1, name: object, type: default, offset: 0, size: 32, alignment: 16, stack-id: scalable-vector }
|
||||
body: |
|
||||
bb.0 (%ir-block.0):
|
||||
liveins: $p0
|
||||
|
||||
; CHECK-LABEL: name: testcase_negative_offset_out_of_range
|
||||
; CHECK: liveins: $p0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2)
|
||||
; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -4
|
||||
; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22
|
||||
; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16
|
||||
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
|
||||
; CHECK-NEXT: renamable $z0 = LDNT1B_ZRI renamable $p0, killed $x8, -8 :: (load (s8) from %ir.object, align 2)
|
||||
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
|
||||
; CHECK-NEXT: renamable $z0 = LDNT1H_ZRI renamable $p0, killed $x8, -8 :: (load (s16) from %ir.object)
|
||||
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
|
||||
; CHECK-NEXT: renamable $z0 = LDNT1W_ZRI renamable $p0, killed $x8, -8 :: (load (s32) from %ir.object)
|
||||
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
|
||||
; CHECK-NEXT: renamable $z0 = LDNT1D_ZRI renamable $p0, killed $x8, -8 :: (load (s64) from %ir.object)
|
||||
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
|
||||
; CHECK-NEXT: STNT1B_ZRI renamable $z0, renamable $p0, killed $x8, -8 :: (store (s8) into %ir.object, align 8)
|
||||
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
|
||||
; CHECK-NEXT: STNT1H_ZRI renamable $z0, renamable $p0, killed $x8, -8 :: (store (s16) into %ir.object, align 8)
|
||||
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
|
||||
; CHECK-NEXT: STNT1W_ZRI renamable $z0, renamable $p0, killed $x8, -8 :: (store (s32) into %ir.object, align 8)
|
||||
; CHECK-NEXT: $x8 = ADDVL_XXI $sp, -1
|
||||
; CHECK-NEXT: STNT1D_ZRI renamable $z0, renamable $p0, killed $x8, -8 :: (store (s64) into %ir.object)
|
||||
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 4
|
||||
; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2)
|
||||
; CHECK-NEXT: RET_ReallyLR implicit $z0
|
||||
renamable $z0 = LDNT1B_ZRI renamable $p0, %stack.1.object, -9 :: (load 1 from %ir.object, align 2)
|
||||
renamable $z0 = LDNT1H_ZRI renamable $p0, %stack.1.object, -9 :: (load 2 from %ir.object, align 2)
|
||||
renamable $z0 = LDNT1W_ZRI renamable $p0, %stack.1.object, -9 :: (load 4 from %ir.object, align 4)
|
||||
renamable $z0 = LDNT1D_ZRI renamable $p0, %stack.1.object, -9 :: (load 8 from %ir.object, align 8)
|
||||
STNT1B_ZRI renamable $z0, renamable $p0, %stack.1.object, -9 :: (store 1 into %ir.object, align 8)
|
||||
STNT1H_ZRI renamable $z0, renamable $p0, %stack.1.object, -9 :: (store 2 into %ir.object, align 8)
|
||||
STNT1W_ZRI renamable $z0, renamable $p0, %stack.1.object, -9 :: (store 4 into %ir.object, align 8)
|
||||
STNT1D_ZRI renamable $z0, renamable $p0, %stack.1.object, -9 :: (store 8 into %ir.object, align 8)
|
||||
RET_ReallyLR implicit $z0
|
||||
...
|
|
@ -26,14 +26,14 @@ define i8 @split_extract_32i8_idx(<vscale x 32 x i8> %a, i32 %idx) {
|
|||
; CHECK-NEXT: mov x8, #-1
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
|
||||
; CHECK-NEXT: sxtw x9, w0
|
||||
; CHECK-NEXT: mov x10, sp
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
|
||||
; CHECK-NEXT: addvl x8, x8, #2
|
||||
; CHECK-NEXT: cmp x9, x8
|
||||
; CHECK-NEXT: st1b { z1.b }, p0, [x10, #1, mul vl]
|
||||
; CHECK-NEXT: csel x8, x9, x8, lo
|
||||
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
|
||||
; CHECK-NEXT: ldrb w0, [x10, x8]
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: ldrb w0, [x9, x8]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -51,14 +51,14 @@ define i16 @split_extract_16i16_idx(<vscale x 16 x i16> %a, i32 %idx) {
|
|||
; CHECK-NEXT: mov x8, #-1
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
|
||||
; CHECK-NEXT: sxtw x9, w0
|
||||
; CHECK-NEXT: mov x10, sp
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: cmp x9, x8
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x10, #1, mul vl]
|
||||
; CHECK-NEXT: csel x8, x9, x8, lo
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: ldrh w0, [x10, x8, lsl #1]
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: ldrh w0, [x9, x8, lsl #1]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -77,13 +77,13 @@ define i32 @split_extract_8i32_idx(<vscale x 8 x i32> %a, i32 %idx) {
|
|||
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
|
||||
; CHECK-NEXT: sxtw x9, w0
|
||||
; CHECK-NEXT: sub x8, x8, #1
|
||||
; CHECK-NEXT: mov x10, sp
|
||||
; CHECK-NEXT: cmp x9, x8
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: cmp x9, x8
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: csel x8, x9, x8, lo
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x10, #1, mul vl]
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: ldr w0, [x10, x8, lsl #2]
|
||||
; CHECK-NEXT: ldr w0, [x9, x8, lsl #2]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -98,19 +98,19 @@ define i64 @split_extract_8i64_idx(<vscale x 8 x i64> %a, i32 %idx) {
|
|||
; CHECK-NEXT: addvl sp, sp, #-4
|
||||
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: cnth x9
|
||||
; CHECK-NEXT: cnth x8
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
|
||||
; CHECK-NEXT: sxtw x10, w0
|
||||
; CHECK-NEXT: sub x9, x9, #1
|
||||
; CHECK-NEXT: mov x8, sp
|
||||
; CHECK-NEXT: cmp x10, x9
|
||||
; CHECK-NEXT: sxtw x9, w0
|
||||
; CHECK-NEXT: sub x8, x8, #1
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: csel x9, x10, x9, lo
|
||||
; CHECK-NEXT: st1d { z3.d }, p0, [x8, #3, mul vl]
|
||||
; CHECK-NEXT: st1d { z2.d }, p0, [x8, #2, mul vl]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x8, #1, mul vl]
|
||||
; CHECK-NEXT: cmp x9, x8
|
||||
; CHECK-NEXT: st1d { z3.d }, p0, [sp, #3, mul vl]
|
||||
; CHECK-NEXT: csel x8, x9, x8, lo
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: st1d { z2.d }, p0, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: ldr x0, [x8, x9, lsl #3]
|
||||
; CHECK-NEXT: ldr x0, [x9, x8, lsl #3]
|
||||
; CHECK-NEXT: addvl sp, sp, #4
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -146,14 +146,14 @@ define i16 @split_extract_16i16(<vscale x 16 x i16> %a) {
|
|||
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: mov x8, #-1
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: mov w10, #128
|
||||
; CHECK-NEXT: mov w9, #128
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x9, #1, mul vl]
|
||||
; CHECK-NEXT: cmp x8, #128
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: csel x8, x8, x10, lo
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: cmp x8, #128
|
||||
; CHECK-NEXT: csel x8, x8, x9, lo
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: ldrh w0, [x9, x8, lsl #1]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
|
@ -170,17 +170,17 @@ define i32 @split_extract_16i32(<vscale x 16 x i32> %a) {
|
|||
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: mov x8, #-1
|
||||
; CHECK-NEXT: mov w10, #34464
|
||||
; CHECK-NEXT: movk w10, #1, lsl #16
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: mov w9, #34464
|
||||
; CHECK-NEXT: movk w9, #1, lsl #16
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl]
|
||||
; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #1
|
||||
; CHECK-NEXT: cmp x8, x10
|
||||
; CHECK-NEXT: st1w { z3.s }, p0, [x9, #3, mul vl]
|
||||
; CHECK-NEXT: csel x8, x8, x10, lo
|
||||
; CHECK-NEXT: st1w { z2.s }, p0, [x9, #2, mul vl]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x9, #1, mul vl]
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: cmp x8, x9
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: csel x8, x8, x9, lo
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: ldr w0, [x9, x8, lsl #2]
|
||||
; CHECK-NEXT: addvl sp, sp, #4
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
|
@ -197,13 +197,13 @@ define i64 @split_extract_4i64(<vscale x 4 x i64> %a) {
|
|||
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: cntw x8
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: mov w9, #10
|
||||
; CHECK-NEXT: sub x8, x8, #1
|
||||
; CHECK-NEXT: mov w10, #10
|
||||
; CHECK-NEXT: cmp x8, #10
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: csel x8, x8, x10, lo
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x9, #1, mul vl]
|
||||
; CHECK-NEXT: cmp x8, #10
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: csel x8, x8, x9, lo
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: ldr x0, [x9, x8, lsl #3]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
|
|
|
@ -26,14 +26,14 @@ define <vscale x 32 x i8> @split_insert_32i8_idx(<vscale x 32 x i8> %a, i8 %elt,
|
|||
; CHECK-NEXT: mov x8, #-1
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: ptrue p0.b
|
||||
; CHECK-NEXT: addvl x8, x8, #2
|
||||
; CHECK-NEXT: st1b { z1.b }, p0, [x9, #1, mul vl]
|
||||
; CHECK-NEXT: cmp x1, x8
|
||||
; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: st1b { z0.b }, p0, [sp]
|
||||
; CHECK-NEXT: addvl x8, x8, #2
|
||||
; CHECK-NEXT: cmp x1, x8
|
||||
; CHECK-NEXT: csel x8, x1, x8, lo
|
||||
; CHECK-NEXT: strb w0, [x9, x8]
|
||||
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x9, #1, mul vl]
|
||||
; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp]
|
||||
; CHECK-NEXT: ld1b { z1.b }, p0/z, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -53,12 +53,12 @@ define <vscale x 8 x float> @split_insert_8f32_idx(<vscale x 8 x float> %a, floa
|
|||
; CHECK-NEXT: sub x8, x8, #1
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: cmp x0, x8
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: csel x8, x0, x8, lo
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x9, #1, mul vl]
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: str s2, [x9, x8, lsl #2]
|
||||
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x9, #1, mul vl]
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp]
|
||||
; CHECK-NEXT: ld1w { z1.s }, p0/z, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -78,16 +78,16 @@ define <vscale x 8 x i64> @split_insert_8i64_idx(<vscale x 8 x i64> %a, i64 %elt
|
|||
; CHECK-NEXT: sub x8, x8, #1
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: cmp x1, x8
|
||||
; CHECK-NEXT: st1d { z3.d }, p0, [sp, #3, mul vl]
|
||||
; CHECK-NEXT: csel x8, x1, x8, lo
|
||||
; CHECK-NEXT: st1d { z3.d }, p0, [x9, #3, mul vl]
|
||||
; CHECK-NEXT: st1d { z2.d }, p0, [x9, #2, mul vl]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [x9, #1, mul vl]
|
||||
; CHECK-NEXT: st1d { z2.d }, p0, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: st1d { z0.d }, p0, [sp]
|
||||
; CHECK-NEXT: str x0, [x9, x8, lsl #3]
|
||||
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x9, #1, mul vl]
|
||||
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x9, #2, mul vl]
|
||||
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x9, #3, mul vl]
|
||||
; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp]
|
||||
; CHECK-NEXT: ld1d { z1.d }, p0/z, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ld1d { z2.d }, p0/z, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: ld1d { z3.d }, p0/z, [sp, #3, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #4
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -136,21 +136,21 @@ define <vscale x 32 x i16> @split_insert_32i16(<vscale x 32 x i16> %a, i16 %elt)
|
|||
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG
|
||||
; CHECK-NEXT: .cfi_offset w29, -16
|
||||
; CHECK-NEXT: mov x8, #-1
|
||||
; CHECK-NEXT: mov w10, #128
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: mov w9, #128
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: st1h { z3.h }, p0, [sp, #3, mul vl]
|
||||
; CHECK-NEXT: st1h { z2.h }, p0, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: addvl x8, x8, #2
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: cmp x8, #128
|
||||
; CHECK-NEXT: st1h { z3.h }, p0, [x9, #3, mul vl]
|
||||
; CHECK-NEXT: csel x8, x8, x10, lo
|
||||
; CHECK-NEXT: st1h { z2.h }, p0, [x9, #2, mul vl]
|
||||
; CHECK-NEXT: st1h { z1.h }, p0, [x9, #1, mul vl]
|
||||
; CHECK-NEXT: st1h { z0.h }, p0, [sp]
|
||||
; CHECK-NEXT: csel x8, x8, x9, lo
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: strh w0, [x9, x8, lsl #1]
|
||||
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x9, #1, mul vl]
|
||||
; CHECK-NEXT: ld1h { z2.h }, p0/z, [x9, #2, mul vl]
|
||||
; CHECK-NEXT: ld1h { z3.h }, p0/z, [x9, #3, mul vl]
|
||||
; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp]
|
||||
; CHECK-NEXT: ld1h { z1.h }, p0/z, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: ld1h { z2.h }, p0/z, [sp, #2, mul vl]
|
||||
; CHECK-NEXT: ld1h { z3.h }, p0/z, [sp, #3, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #4
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
@ -170,14 +170,14 @@ define <vscale x 8 x i32> @split_insert_8i32(<vscale x 8 x i32> %a, i32 %elt) {
|
|||
; CHECK-NEXT: movk w9, #15, lsl #16
|
||||
; CHECK-NEXT: sub x8, x8, #1
|
||||
; CHECK-NEXT: cmp x8, x9
|
||||
; CHECK-NEXT: mov x10, sp
|
||||
; CHECK-NEXT: csel x8, x8, x9, lo
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x10, #1, mul vl]
|
||||
; CHECK-NEXT: csel x8, x8, x9, lo
|
||||
; CHECK-NEXT: mov x9, sp
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
|
||||
; CHECK-NEXT: str w0, [x10, x8, lsl #2]
|
||||
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x10, #1, mul vl]
|
||||
; CHECK-NEXT: str w0, [x9, x8, lsl #2]
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp]
|
||||
; CHECK-NEXT: ld1w { z1.s }, p0/z, [sp, #1, mul vl]
|
||||
; CHECK-NEXT: addvl sp, sp, #2
|
||||
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
|
|
Loading…
Reference in New Issue